vfs_default.c revision 120514
1219820Sjeff/*
2219820Sjeff * Copyright (c) 1989, 1993
3219820Sjeff *	The Regents of the University of California.  All rights reserved.
4219820Sjeff *
5219820Sjeff * This code is derived from software contributed
6219820Sjeff * to Berkeley by John Heidemann of the UCLA Ficus project.
7219820Sjeff *
8219820Sjeff * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9219820Sjeff *
10219820Sjeff * Redistribution and use in source and binary forms, with or without
11219820Sjeff * modification, are permitted provided that the following conditions
12219820Sjeff * are met:
13219820Sjeff * 1. Redistributions of source code must retain the above copyright
14219820Sjeff *    notice, this list of conditions and the following disclaimer.
15219820Sjeff * 2. Redistributions in binary form must reproduce the above copyright
16219820Sjeff *    notice, this list of conditions and the following disclaimer in the
17219820Sjeff *    documentation and/or other materials provided with the distribution.
18219820Sjeff * 3. All advertising materials mentioning features or use of this software
19219820Sjeff *    must display the following acknowledgement:
20219820Sjeff *	This product includes software developed by the University of
21219820Sjeff *	California, Berkeley and its contributors.
22219820Sjeff * 4. Neither the name of the University nor the names of its contributors
23219820Sjeff *    may be used to endorse or promote products derived from this software
24219820Sjeff *    without specific prior written permission.
25219820Sjeff *
26219820Sjeff * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27219820Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28219820Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29219820Sjeff * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30219820Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31219820Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32219820Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33219820Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34219820Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35219820Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36219820Sjeff * SUCH DAMAGE.
37219820Sjeff */
38219820Sjeff
39219820Sjeff#include <sys/cdefs.h>
40219820Sjeff__FBSDID("$FreeBSD: head/sys/kern/vfs_default.c 120514 2003-09-27 12:53:33Z phk $");
41219820Sjeff
42219820Sjeff#include <sys/param.h>
43219820Sjeff#include <sys/systm.h>
44219820Sjeff#include <sys/bio.h>
45219820Sjeff#include <sys/buf.h>
46219820Sjeff#include <sys/conf.h>
47219820Sjeff#include <sys/kernel.h>
48219820Sjeff#include <sys/limits.h>
49219820Sjeff#include <sys/lock.h>
50219820Sjeff#include <sys/malloc.h>
51219820Sjeff#include <sys/mount.h>
52219820Sjeff#include <sys/mutex.h>
53219820Sjeff#include <sys/unistd.h>
54219820Sjeff#include <sys/vnode.h>
55219820Sjeff#include <sys/poll.h>
56219820Sjeff
57219820Sjeff#include <vm/vm.h>
58219820Sjeff#include <vm/vm_object.h>
59219820Sjeff#include <vm/vm_extern.h>
60219820Sjeff#include <vm/pmap.h>
61219820Sjeff#include <vm/vm_map.h>
62219820Sjeff#include <vm/vm_page.h>
63219820Sjeff#include <vm/vm_pager.h>
64219820Sjeff#include <vm/vnode_pager.h>
65219820Sjeff
66219820Sjeffstatic int	vop_nolookup(struct vop_lookup_args *);
67219820Sjeffstatic int	vop_nostrategy(struct vop_strategy_args *);
68219820Sjeff
69219820Sjeff/*
70219820Sjeff * This vnode table stores what we want to do if the filesystem doesn't
71219820Sjeff * implement a particular VOP.
72219820Sjeff *
73219820Sjeff * If there is no specific entry here, we will return EOPNOTSUPP.
74219820Sjeff *
75219820Sjeff */
76219820Sjeff
77219820Sjeffvop_t **default_vnodeop_p;
78219820Sjeffstatic struct vnodeopv_entry_desc default_vnodeop_entries[] = {
79219820Sjeff	{ &vop_default_desc,		(vop_t *) vop_eopnotsupp },
80219820Sjeff	{ &vop_advlock_desc,		(vop_t *) vop_einval },
81219820Sjeff	{ &vop_bmap_desc,		(vop_t *) vop_stdbmap },
82219820Sjeff	{ &vop_close_desc,		(vop_t *) vop_null },
83219820Sjeff	{ &vop_createvobject_desc,	(vop_t *) vop_stdcreatevobject },
84219820Sjeff	{ &vop_destroyvobject_desc,	(vop_t *) vop_stddestroyvobject },
85219820Sjeff	{ &vop_fsync_desc,		(vop_t *) vop_null },
86219820Sjeff	{ &vop_getpages_desc,		(vop_t *) vop_stdgetpages },
87219820Sjeff	{ &vop_getvobject_desc,		(vop_t *) vop_stdgetvobject },
88219820Sjeff	{ &vop_inactive_desc,		(vop_t *) vop_stdinactive },
89219820Sjeff	{ &vop_ioctl_desc,		(vop_t *) vop_enotty },
90219820Sjeff	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
91219820Sjeff	{ &vop_lease_desc,		(vop_t *) vop_null },
92219820Sjeff	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
93219820Sjeff	{ &vop_lookup_desc,		(vop_t *) vop_nolookup },
94219820Sjeff	{ &vop_open_desc,		(vop_t *) vop_null },
95219820Sjeff	{ &vop_pathconf_desc,		(vop_t *) vop_einval },
96219820Sjeff	{ &vop_poll_desc,		(vop_t *) vop_nopoll },
97219820Sjeff	{ &vop_putpages_desc,		(vop_t *) vop_stdputpages },
98219820Sjeff	{ &vop_readlink_desc,		(vop_t *) vop_einval },
99219820Sjeff	{ &vop_revoke_desc,		(vop_t *) vop_revoke },
100219820Sjeff	{ &vop_specstrategy_desc,	(vop_t *) vop_panic },
101219820Sjeff	{ &vop_strategy_desc,		(vop_t *) vop_nostrategy },
102219820Sjeff	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
103219820Sjeff	{ NULL, NULL }
104219820Sjeff};
105219820Sjeff
106219820Sjeffstatic struct vnodeopv_desc default_vnodeop_opv_desc =
107219820Sjeff        { &default_vnodeop_p, default_vnodeop_entries };
108219820Sjeff
109219820SjeffVNODEOP_SET(default_vnodeop_opv_desc);
110219820Sjeff
111219820Sjeff/*
112219820Sjeff * Series of placeholder functions for various error returns for
113219820Sjeff * VOPs.
114219820Sjeff */
115219820Sjeff
116219820Sjeffint
117219820Sjeffvop_eopnotsupp(struct vop_generic_args *ap)
118219820Sjeff{
119219820Sjeff	/*
120219820Sjeff	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
121219820Sjeff	*/
122219820Sjeff
123219820Sjeff	return (EOPNOTSUPP);
124219820Sjeff}
125219820Sjeff
126219820Sjeffint
127219820Sjeffvop_ebadf(struct vop_generic_args *ap)
128219820Sjeff{
129219820Sjeff
130219820Sjeff	return (EBADF);
131219820Sjeff}
132219820Sjeff
133219820Sjeffint
134219820Sjeffvop_enotty(struct vop_generic_args *ap)
135219820Sjeff{
136219820Sjeff
137219820Sjeff	return (ENOTTY);
138219820Sjeff}
139219820Sjeff
140219820Sjeffint
141219820Sjeffvop_einval(struct vop_generic_args *ap)
142219820Sjeff{
143219820Sjeff
144219820Sjeff	return (EINVAL);
145219820Sjeff}
146219820Sjeff
147219820Sjeffint
148219820Sjeffvop_null(struct vop_generic_args *ap)
149219820Sjeff{
150219820Sjeff
151219820Sjeff	return (0);
152219820Sjeff}
153219820Sjeff
154219820Sjeff/*
155219820Sjeff * Used to make a defined VOP fall back to the default VOP.
156219820Sjeff */
157219820Sjeffint
158219820Sjeffvop_defaultop(struct vop_generic_args *ap)
159219820Sjeff{
160219820Sjeff
161219820Sjeff	return (VOCALL(default_vnodeop_p, ap->a_desc->vdesc_offset, ap));
162219820Sjeff}
163219820Sjeff
164219820Sjeff/*
165219820Sjeff * Helper function to panic on some bad VOPs in some filesystems.
166219820Sjeff */
167219820Sjeffint
168219820Sjeffvop_panic(struct vop_generic_args *ap)
169219820Sjeff{
170219820Sjeff
171219820Sjeff	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
172219820Sjeff}
173219820Sjeff
174219820Sjeff/*
175219820Sjeff * vop_std<something> and vop_no<something> are default functions for use by
176219820Sjeff * filesystems that need the "default reasonable" implementation for a
177219820Sjeff * particular operation.
178219820Sjeff *
179219820Sjeff * The documentation for the operations they implement exists (if it exists)
180219820Sjeff * in the VOP_<SOMETHING>(9) manpage (all uppercase).
181219820Sjeff */
182219820Sjeff
183219820Sjeff/*
184219820Sjeff * Default vop for filesystems that do not support name lookup
185219820Sjeff */
186219820Sjeffstatic int
187219820Sjeffvop_nolookup(ap)
188219820Sjeff	struct vop_lookup_args /* {
189219820Sjeff		struct vnode *a_dvp;
190219820Sjeff		struct vnode **a_vpp;
191219820Sjeff		struct componentname *a_cnp;
192219820Sjeff	} */ *ap;
193219820Sjeff{
194219820Sjeff
195219820Sjeff	*ap->a_vpp = NULL;
196219820Sjeff	return (ENOTDIR);
197219820Sjeff}
198219820Sjeff
199219820Sjeff/*
200219820Sjeff *	vop_nostrategy:
201219820Sjeff *
202219820Sjeff *	Strategy routine for VFS devices that have none.
203219820Sjeff *
204219820Sjeff *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
205219820Sjeff *	routine.  Typically this is done for a BIO_READ strategy call.
206219820Sjeff *	Typically B_INVAL is assumed to already be clear prior to a write
207219820Sjeff *	and should not be cleared manually unless you just made the buffer
208219820Sjeff *	invalid.  BIO_ERROR should be cleared either way.
209219820Sjeff */
210219820Sjeff
211219820Sjeffstatic int
212219820Sjeffvop_nostrategy (struct vop_strategy_args *ap)
213219820Sjeff{
214219820Sjeff	KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)",
215219820Sjeff	    __func__, ap->a_vp, ap->a_bp->b_vp));
216219820Sjeff	printf("No strategy for buffer at %p\n", ap->a_bp);
217219820Sjeff	vprint("vnode", ap->a_vp);
218219820Sjeff	vprint("device vnode", ap->a_bp->b_vp);
219219820Sjeff	ap->a_bp->b_ioflags |= BIO_ERROR;
220219820Sjeff	ap->a_bp->b_error = EOPNOTSUPP;
221219820Sjeff	bufdone(ap->a_bp);
222219820Sjeff	return (EOPNOTSUPP);
223219820Sjeff}
224219820Sjeff
225219820Sjeff/*
226219820Sjeff * vop_stdpathconf:
227219820Sjeff *
228219820Sjeff * Standard implementation of POSIX pathconf, to get information about limits
229219820Sjeff * for a filesystem.
230219820Sjeff * Override per filesystem for the case where the filesystem has smaller
231219820Sjeff * limits.
232219820Sjeff */
233219820Sjeffint
234219820Sjeffvop_stdpathconf(ap)
235219820Sjeff	struct vop_pathconf_args /* {
236219820Sjeff	struct vnode *a_vp;
237219820Sjeff	int a_name;
238219820Sjeff	int *a_retval;
239219820Sjeff	} */ *ap;
240219820Sjeff{
241219820Sjeff
242219820Sjeff	switch (ap->a_name) {
243219820Sjeff		case _PC_LINK_MAX:
244219820Sjeff			*ap->a_retval = LINK_MAX;
245219820Sjeff			return (0);
246219820Sjeff		case _PC_MAX_CANON:
247219820Sjeff			*ap->a_retval = MAX_CANON;
248219820Sjeff			return (0);
249219820Sjeff		case _PC_MAX_INPUT:
250219820Sjeff			*ap->a_retval = MAX_INPUT;
251219820Sjeff			return (0);
252219820Sjeff		case _PC_PIPE_BUF:
253219820Sjeff			*ap->a_retval = PIPE_BUF;
254219820Sjeff			return (0);
255		case _PC_CHOWN_RESTRICTED:
256			*ap->a_retval = 1;
257			return (0);
258		case _PC_VDISABLE:
259			*ap->a_retval = _POSIX_VDISABLE;
260			return (0);
261		default:
262			return (EINVAL);
263	}
264	/* NOTREACHED */
265}
266
267/*
268 * Standard lock, unlock and islocked functions.
269 */
270int
271vop_stdlock(ap)
272	struct vop_lock_args /* {
273		struct vnode *a_vp;
274		int a_flags;
275		struct thread *a_td;
276	} */ *ap;
277{
278	struct vnode *vp = ap->a_vp;
279
280#ifndef	DEBUG_LOCKS
281	return (lockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp), ap->a_td));
282#else
283	return (debuglockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
284	    ap->a_td, "vop_stdlock", vp->filename, vp->line));
285#endif
286}
287
288/* See above. */
289int
290vop_stdunlock(ap)
291	struct vop_unlock_args /* {
292		struct vnode *a_vp;
293		int a_flags;
294		struct thread *a_td;
295	} */ *ap;
296{
297	struct vnode *vp = ap->a_vp;
298
299	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp),
300	    ap->a_td));
301}
302
303/* See above. */
304int
305vop_stdislocked(ap)
306	struct vop_islocked_args /* {
307		struct vnode *a_vp;
308		struct thread *a_td;
309	} */ *ap;
310{
311
312	return (lockstatus(ap->a_vp->v_vnlock, ap->a_td));
313}
314
315/* Mark the vnode inactive */
316int
317vop_stdinactive(ap)
318	struct vop_inactive_args /* {
319		struct vnode *a_vp;
320		struct thread *a_td;
321	} */ *ap;
322{
323
324	VOP_UNLOCK(ap->a_vp, 0, ap->a_td);
325	return (0);
326}
327
328/*
329 * Return true for select/poll.
330 */
331int
332vop_nopoll(ap)
333	struct vop_poll_args /* {
334		struct vnode *a_vp;
335		int  a_events;
336		struct ucred *a_cred;
337		struct thread *a_td;
338	} */ *ap;
339{
340	/*
341	 * Return true for read/write.  If the user asked for something
342	 * special, return POLLNVAL, so that clients have a way of
343	 * determining reliably whether or not the extended
344	 * functionality is present without hard-coding knowledge
345	 * of specific filesystem implementations.
346	 * Stay in sync with kern_conf.c::no_poll().
347	 */
348	if (ap->a_events & ~POLLSTANDARD)
349		return (POLLNVAL);
350
351	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
352}
353
354/*
355 * Implement poll for local filesystems that support it.
356 */
357int
358vop_stdpoll(ap)
359	struct vop_poll_args /* {
360		struct vnode *a_vp;
361		int  a_events;
362		struct ucred *a_cred;
363		struct thread *a_td;
364	} */ *ap;
365{
366	if (ap->a_events & ~POLLSTANDARD)
367		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
368	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
369}
370
371/*
372 * Stubs to use when there is no locking to be done on the underlying object.
373 * A minimal shared lock is necessary to ensure that the underlying object
374 * is not revoked while an operation is in progress. So, an active shared
375 * count is maintained in an auxillary vnode lock structure.
376 */
377int
378vop_sharedlock(ap)
379	struct vop_lock_args /* {
380		struct vnode *a_vp;
381		int a_flags;
382		struct thread *a_td;
383	} */ *ap;
384{
385	/*
386	 * This code cannot be used until all the non-locking filesystems
387	 * (notably NFS) are converted to properly lock and release nodes.
388	 * Also, certain vnode operations change the locking state within
389	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
390	 * and symlink). Ideally these operations should not change the
391	 * lock state, but should be changed to let the caller of the
392	 * function unlock them. Otherwise all intermediate vnode layers
393	 * (such as union, umapfs, etc) must catch these functions to do
394	 * the necessary locking at their layer. Note that the inactive
395	 * and lookup operations also change their lock state, but this
396	 * cannot be avoided, so these two operations will always need
397	 * to be handled in intermediate layers.
398	 */
399	struct vnode *vp = ap->a_vp;
400	int vnflags, flags = ap->a_flags;
401
402	switch (flags & LK_TYPE_MASK) {
403	case LK_DRAIN:
404		vnflags = LK_DRAIN;
405		break;
406	case LK_EXCLUSIVE:
407#ifdef DEBUG_VFS_LOCKS
408		/*
409		 * Normally, we use shared locks here, but that confuses
410		 * the locking assertions.
411		 */
412		vnflags = LK_EXCLUSIVE;
413		break;
414#endif
415	case LK_SHARED:
416		vnflags = LK_SHARED;
417		break;
418	case LK_UPGRADE:
419	case LK_EXCLUPGRADE:
420	case LK_DOWNGRADE:
421		return (0);
422	case LK_RELEASE:
423	default:
424		panic("vop_sharedlock: bad operation %d", flags & LK_TYPE_MASK);
425	}
426	vnflags |= flags & (LK_INTERLOCK | LK_EXTFLG_MASK);
427#ifndef	DEBUG_LOCKS
428	return (lockmgr(vp->v_vnlock, vnflags, VI_MTX(vp), ap->a_td));
429#else
430	return (debuglockmgr(vp->v_vnlock, vnflags, VI_MTX(vp), ap->a_td,
431	    "vop_sharedlock", vp->filename, vp->line));
432#endif
433}
434
435/*
436 * Stubs to use when there is no locking to be done on the underlying object.
437 * A minimal shared lock is necessary to ensure that the underlying object
438 * is not revoked while an operation is in progress. So, an active shared
439 * count is maintained in an auxillary vnode lock structure.
440 */
441int
442vop_nolock(ap)
443	struct vop_lock_args /* {
444		struct vnode *a_vp;
445		int a_flags;
446		struct thread *a_td;
447	} */ *ap;
448{
449#ifdef notyet
450	/*
451	 * This code cannot be used until all the non-locking filesystems
452	 * (notably NFS) are converted to properly lock and release nodes.
453	 * Also, certain vnode operations change the locking state within
454	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
455	 * and symlink). Ideally these operations should not change the
456	 * lock state, but should be changed to let the caller of the
457	 * function unlock them. Otherwise all intermediate vnode layers
458	 * (such as union, umapfs, etc) must catch these functions to do
459	 * the necessary locking at their layer. Note that the inactive
460	 * and lookup operations also change their lock state, but this
461	 * cannot be avoided, so these two operations will always need
462	 * to be handled in intermediate layers.
463	 */
464	struct vnode *vp = ap->a_vp;
465	int vnflags, flags = ap->a_flags;
466
467	switch (flags & LK_TYPE_MASK) {
468	case LK_DRAIN:
469		vnflags = LK_DRAIN;
470		break;
471	case LK_EXCLUSIVE:
472	case LK_SHARED:
473		vnflags = LK_SHARED;
474		break;
475	case LK_UPGRADE:
476	case LK_EXCLUPGRADE:
477	case LK_DOWNGRADE:
478		return (0);
479	case LK_RELEASE:
480	default:
481		panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
482	}
483	vnflags |= flags & (LK_INTERLOCK | LK_EXTFLG_MASK);
484	return(lockmgr(vp->v_vnlock, vnflags, VI_MTX(vp), ap->a_td));
485#else /* for now */
486	/*
487	 * Since we are not using the lock manager, we must clear
488	 * the interlock here.
489	 */
490	if (ap->a_flags & LK_INTERLOCK)
491		VI_UNLOCK(ap->a_vp);
492	return (0);
493#endif
494}
495
496/*
497 * Do the inverse of vop_nolock, handling the interlock in a compatible way.
498 */
499int
500vop_nounlock(ap)
501	struct vop_unlock_args /* {
502		struct vnode *a_vp;
503		int a_flags;
504		struct thread *a_td;
505	} */ *ap;
506{
507
508	/*
509	 * Since we are not using the lock manager, we must clear
510	 * the interlock here.
511	 */
512	if (ap->a_flags & LK_INTERLOCK)
513		VI_UNLOCK(ap->a_vp);
514	return (0);
515}
516
517/*
518 * Return whether or not the node is in use.
519 */
520int
521vop_noislocked(ap)
522	struct vop_islocked_args /* {
523		struct vnode *a_vp;
524		struct thread *a_td;
525	} */ *ap;
526{
527
528	return (0);
529}
530
531/*
532 * Return our mount point, as we will take charge of the writes.
533 */
534int
535vop_stdgetwritemount(ap)
536	struct vop_getwritemount_args /* {
537		struct vnode *a_vp;
538		struct mount **a_mpp;
539	} */ *ap;
540{
541
542	*(ap->a_mpp) = ap->a_vp->v_mount;
543	return (0);
544}
545
546/* Create the VM system backing object for this vnode */
547int
548vop_stdcreatevobject(ap)
549	struct vop_createvobject_args /* {
550		struct vnode *vp;
551		struct ucred *cred;
552		struct thread *td;
553	} */ *ap;
554{
555	struct vnode *vp = ap->a_vp;
556	struct ucred *cred = ap->a_cred;
557	struct thread *td = ap->a_td;
558	struct vattr vat;
559	vm_object_t object;
560	int error = 0;
561
562	GIANT_REQUIRED;
563
564	if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE)
565		return (0);
566
567retry:
568	if ((object = vp->v_object) == NULL) {
569		if (vp->v_type == VREG || vp->v_type == VDIR) {
570			if ((error = VOP_GETATTR(vp, &vat, cred, td)) != 0)
571				goto retn;
572			object = vnode_pager_alloc(vp, vat.va_size, 0, 0);
573		} else if (devsw(vp->v_rdev) != NULL) {
574			/*
575			 * This simply allocates the biggest object possible
576			 * for a disk vnode.  This should be fixed, but doesn't
577			 * cause any problems (yet).
578			 */
579			object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0);
580		} else {
581			goto retn;
582		}
583		/*
584		 * Dereference the reference we just created.  This assumes
585		 * that the object is associated with the vp.
586		 */
587		VM_OBJECT_LOCK(object);
588		object->ref_count--;
589		VM_OBJECT_UNLOCK(object);
590		vrele(vp);
591	} else {
592		VM_OBJECT_LOCK(object);
593		if (object->flags & OBJ_DEAD) {
594			VOP_UNLOCK(vp, 0, td);
595			msleep(object, VM_OBJECT_MTX(object), PDROP | PVM,
596			    "vodead", 0);
597			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
598			goto retry;
599		}
600		VM_OBJECT_UNLOCK(object);
601	}
602
603	KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object"));
604	vp->v_vflag |= VV_OBJBUF;
605
606retn:
607	return (error);
608}
609
610/* Destroy the VM system object associated with this vnode */
611int
612vop_stddestroyvobject(ap)
613	struct vop_destroyvobject_args /* {
614		struct vnode *vp;
615	} */ *ap;
616{
617	struct vnode *vp = ap->a_vp;
618	vm_object_t obj = vp->v_object;
619
620	GIANT_REQUIRED;
621
622	if (obj == NULL)
623		return (0);
624	VM_OBJECT_LOCK(obj);
625	if (obj->ref_count == 0) {
626		/*
627		 * vclean() may be called twice. The first time
628		 * removes the primary reference to the object,
629		 * the second time goes one further and is a
630		 * special-case to terminate the object.
631		 *
632		 * don't double-terminate the object
633		 */
634		if ((obj->flags & OBJ_DEAD) == 0)
635			vm_object_terminate(obj);
636		else
637			VM_OBJECT_UNLOCK(obj);
638	} else {
639		/*
640		 * Woe to the process that tries to page now :-).
641		 */
642		vm_pager_deallocate(obj);
643		VM_OBJECT_UNLOCK(obj);
644	}
645	return (0);
646}
647
648/*
649 * Return the underlying VM object.  This routine may be called with or
650 * without the vnode interlock held.  If called without, the returned
651 * object is not guarenteed to be valid.  The syncer typically gets the
652 * object without holding the interlock in order to quickly test whether
653 * it might be dirty before going heavy-weight.  vm_object's use zalloc
654 * and thus stable-storage, so this is safe.
655 */
656int
657vop_stdgetvobject(ap)
658	struct vop_getvobject_args /* {
659		struct vnode *vp;
660		struct vm_object **objpp;
661	} */ *ap;
662{
663	struct vnode *vp = ap->a_vp;
664	struct vm_object **objpp = ap->a_objpp;
665
666	if (objpp)
667		*objpp = vp->v_object;
668	return (vp->v_object ? 0 : EINVAL);
669}
670
671/* XXX Needs good comment and VOP_BMAP(9) manpage */
672int
673vop_stdbmap(ap)
674	struct vop_bmap_args /* {
675		struct vnode *a_vp;
676		daddr_t  a_bn;
677		struct vnode **a_vpp;
678		daddr_t *a_bnp;
679		int *a_runp;
680		int *a_runb;
681	} */ *ap;
682{
683
684	if (ap->a_vpp != NULL)
685		*ap->a_vpp = ap->a_vp;
686	if (ap->a_bnp != NULL)
687		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
688	if (ap->a_runp != NULL)
689		*ap->a_runp = 0;
690	if (ap->a_runb != NULL)
691		*ap->a_runb = 0;
692	return (0);
693}
694
695int
696vop_stdfsync(ap)
697	struct vop_fsync_args /* {
698		struct vnode *a_vp;
699		struct ucred *a_cred;
700		int a_waitfor;
701		struct thread *a_td;
702	} */ *ap;
703{
704	struct vnode *vp = ap->a_vp;
705	struct buf *bp;
706	struct buf *nbp;
707	int s, error = 0;
708	int maxretry = 100;     /* large, arbitrarily chosen */
709
710	VI_LOCK(vp);
711loop1:
712	/*
713	 * MARK/SCAN initialization to avoid infinite loops.
714	 */
715	s = splbio();
716        TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
717                bp->b_vflags &= ~BV_SCANNED;
718		bp->b_error = 0;
719	}
720	splx(s);
721
722	/*
723	 * Flush all dirty buffers associated with a block device.
724	 */
725loop2:
726	s = splbio();
727	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp != NULL; bp = nbp) {
728		nbp = TAILQ_NEXT(bp, b_vnbufs);
729		if ((bp->b_vflags & BV_SCANNED) != 0)
730			continue;
731		bp->b_vflags |= BV_SCANNED;
732		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
733			continue;
734		VI_UNLOCK(vp);
735		if ((bp->b_flags & B_DELWRI) == 0)
736			panic("fsync: not dirty");
737		if ((vp->v_vflag & VV_OBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
738			vfs_bio_awrite(bp);
739			splx(s);
740		} else {
741			bremfree(bp);
742			splx(s);
743			bawrite(bp);
744		}
745		VI_LOCK(vp);
746		goto loop2;
747	}
748
749	/*
750	 * If synchronous the caller expects us to completely resolve all
751	 * dirty buffers in the system.  Wait for in-progress I/O to
752	 * complete (which could include background bitmap writes), then
753	 * retry if dirty blocks still exist.
754	 */
755	if (ap->a_waitfor == MNT_WAIT) {
756		while (vp->v_numoutput) {
757			vp->v_iflag |= VI_BWAIT;
758			msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp),
759			    PRIBIO + 1, "fsync", 0);
760		}
761		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
762			/*
763			 * If we are unable to write any of these buffers
764			 * then we fail now rather than trying endlessly
765			 * to write them out.
766			 */
767			TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
768				if ((error = bp->b_error) == 0)
769					continue;
770			if (error == 0 && --maxretry >= 0) {
771				splx(s);
772				goto loop1;
773			}
774			vprint("fsync: giving up on dirty", vp);
775			error = EAGAIN;
776		}
777	}
778	VI_UNLOCK(vp);
779	splx(s);
780
781	return (error);
782}
783
784/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
785int
786vop_stdgetpages(ap)
787	struct vop_getpages_args /* {
788		struct vnode *a_vp;
789		vm_page_t *a_m;
790		int a_count;
791		int a_reqpage;
792		vm_ooffset_t a_offset;
793	} */ *ap;
794{
795
796	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
797	    ap->a_count, ap->a_reqpage);
798}
799
800/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
801int
802vop_stdputpages(ap)
803	struct vop_putpages_args /* {
804		struct vnode *a_vp;
805		vm_page_t *a_m;
806		int a_count;
807		int a_sync;
808		int *a_rtvals;
809		vm_ooffset_t a_offset;
810	} */ *ap;
811{
812
813	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
814	     ap->a_sync, ap->a_rtvals);
815}
816
817/*
818 * vfs default ops
819 * used to fill the vfs function table to get reasonable default return values.
820 */
821int
822vfs_stdroot (mp, vpp)
823	struct mount *mp;
824	struct vnode **vpp;
825{
826	return (EOPNOTSUPP);
827}
828
829int
830vfs_stdstatfs (mp, sbp, td)
831	struct mount *mp;
832	struct statfs *sbp;
833	struct thread *td;
834{
835	return (EOPNOTSUPP);
836}
837
838int
839vfs_stdvptofh (vp, fhp)
840	struct vnode *vp;
841	struct fid *fhp;
842{
843	return (EOPNOTSUPP);
844}
845
846int
847vfs_stdstart (mp, flags, td)
848	struct mount *mp;
849	int flags;
850	struct thread *td;
851{
852	return (0);
853}
854
855int
856vfs_stdquotactl (mp, cmds, uid, arg, td)
857	struct mount *mp;
858	int cmds;
859	uid_t uid;
860	caddr_t arg;
861	struct thread *td;
862{
863	return (EOPNOTSUPP);
864}
865
866int
867vfs_stdsync(mp, waitfor, cred, td)
868	struct mount *mp;
869	int waitfor;
870	struct ucred *cred;
871	struct thread *td;
872{
873	struct vnode *vp, *nvp;
874	int error, lockreq, allerror = 0;
875
876	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
877	if (waitfor != MNT_WAIT)
878		lockreq |= LK_NOWAIT;
879	/*
880	 * Force stale buffer cache information to be flushed.
881	 */
882	mtx_lock(&mntvnode_mtx);
883loop:
884	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
885		/*
886		 * If the vnode that we are about to sync is no longer
887		 * associated with this mount point, start over.
888		 */
889		if (vp->v_mount != mp)
890			goto loop;
891
892		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
893
894		VI_LOCK(vp);
895		if (TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
896			VI_UNLOCK(vp);
897			continue;
898		}
899		mtx_unlock(&mntvnode_mtx);
900
901		if ((error = vget(vp, lockreq, td)) != 0) {
902			if (error == ENOENT)
903				goto loop;
904			continue;
905		}
906		error = VOP_FSYNC(vp, cred, waitfor, td);
907		if (error)
908			allerror = error;
909
910		mtx_lock(&mntvnode_mtx);
911		if (nvp != TAILQ_NEXT(vp, v_nmntvnodes)) {
912			vput(vp);
913			goto loop;
914		}
915		vput(vp);
916	}
917	mtx_unlock(&mntvnode_mtx);
918	return (allerror);
919}
920
921int
922vfs_stdnosync (mp, waitfor, cred, td)
923	struct mount *mp;
924	int waitfor;
925	struct ucred *cred;
926	struct thread *td;
927{
928	return (0);
929}
930
931int
932vfs_stdvget (mp, ino, flags, vpp)
933	struct mount *mp;
934	ino_t ino;
935	int flags;
936	struct vnode **vpp;
937{
938	return (EOPNOTSUPP);
939}
940
941int
942vfs_stdfhtovp (mp, fhp, vpp)
943	struct mount *mp;
944	struct fid *fhp;
945	struct vnode **vpp;
946{
947	return (EOPNOTSUPP);
948}
949
950int
951vfs_stdinit (vfsp)
952	struct vfsconf *vfsp;
953{
954	return (0);
955}
956
957int
958vfs_stduninit (vfsp)
959	struct vfsconf *vfsp;
960{
961	return(0);
962}
963
964int
965vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, td)
966	struct mount *mp;
967	int cmd;
968	struct vnode *filename_vp;
969	int attrnamespace;
970	const char *attrname;
971	struct thread *td;
972{
973	if (filename_vp != NULL)
974		VOP_UNLOCK(filename_vp, 0, td);
975	return(EOPNOTSUPP);
976}
977
978/* end of vfs default ops */
979