ffs_vfsops.c revision 177493
1/*-
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 177493 2008-03-22 09:15:16Z jeff $");
34
35#include "opt_mac.h"
36#include "opt_quota.h"
37#include "opt_ufs.h"
38#include "opt_ffs.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/priv.h>
44#include <sys/proc.h>
45#include <sys/kernel.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54
55#include <security/mac/mac_framework.h>
56
57#include <ufs/ufs/extattr.h>
58#include <ufs/ufs/gjournal.h>
59#include <ufs/ufs/quota.h>
60#include <ufs/ufs/ufsmount.h>
61#include <ufs/ufs/inode.h>
62#include <ufs/ufs/ufs_extern.h>
63
64#include <ufs/ffs/fs.h>
65#include <ufs/ffs/ffs_extern.h>
66
67#include <vm/vm.h>
68#include <vm/uma.h>
69#include <vm/vm_page.h>
70
71#include <geom/geom.h>
72#include <geom/geom_vfs.h>
73
74static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
75
76static int	ffs_reload(struct mount *, struct thread *);
77static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
78static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
79		    ufs2_daddr_t);
80static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
81static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
82static vfs_init_t ffs_init;
83static vfs_uninit_t ffs_uninit;
84static vfs_extattrctl_t ffs_extattrctl;
85static vfs_cmount_t ffs_cmount;
86static vfs_unmount_t ffs_unmount;
87static vfs_mount_t ffs_mount;
88static vfs_statfs_t ffs_statfs;
89static vfs_fhtovp_t ffs_fhtovp;
90static vfs_sync_t ffs_sync;
91
92static struct vfsops ufs_vfsops = {
93	.vfs_extattrctl =	ffs_extattrctl,
94	.vfs_fhtovp =		ffs_fhtovp,
95	.vfs_init =		ffs_init,
96	.vfs_mount =		ffs_mount,
97	.vfs_cmount =		ffs_cmount,
98	.vfs_quotactl =		ufs_quotactl,
99	.vfs_root =		ufs_root,
100	.vfs_statfs =		ffs_statfs,
101	.vfs_sync =		ffs_sync,
102	.vfs_uninit =		ffs_uninit,
103	.vfs_unmount =		ffs_unmount,
104	.vfs_vget =		ffs_vget,
105};
106
107VFS_SET(ufs_vfsops, ufs, 0);
108MODULE_VERSION(ufs, 1);
109
110static b_strategy_t ffs_geom_strategy;
111static b_write_t ffs_bufwrite;
112
113static struct buf_ops ffs_ops = {
114	.bop_name =	"FFS",
115	.bop_write =	ffs_bufwrite,
116	.bop_strategy =	ffs_geom_strategy,
117	.bop_sync =	bufsync,
118#ifdef NO_FFS_SNAPSHOT
119	.bop_bdflush =	bufbdflush,
120#else
121	.bop_bdflush =	ffs_bdflush,
122#endif
123};
124
125static const char *ffs_opts[] = { "acls", "async", "atime", "clusterr",
126    "clusterw", "exec", "export", "force", "from", "multilabel",
127    "snapshot", "suid", "suiddir", "symfollow", "sync",
128    "union", NULL };
129
130static int
131ffs_mount(struct mount *mp, struct thread *td)
132{
133	struct vnode *devvp;
134	struct ufsmount *ump = 0;
135	struct fs *fs;
136	int error, flags;
137	u_int mntorflags, mntandnotflags;
138	mode_t accessmode;
139	struct nameidata ndp;
140	char *fspec;
141
142	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
143		return (EINVAL);
144	if (uma_inode == NULL) {
145		uma_inode = uma_zcreate("FFS inode",
146		    sizeof(struct inode), NULL, NULL, NULL, NULL,
147		    UMA_ALIGN_PTR, 0);
148		uma_ufs1 = uma_zcreate("FFS1 dinode",
149		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
150		    UMA_ALIGN_PTR, 0);
151		uma_ufs2 = uma_zcreate("FFS2 dinode",
152		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
153		    UMA_ALIGN_PTR, 0);
154	}
155
156	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
157	if (error)
158		return (error);
159
160	mntorflags = 0;
161	mntandnotflags = 0;
162	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
163		mntorflags |= MNT_ACLS;
164
165	if (vfs_getopt(mp->mnt_optnew, "async", NULL, NULL) == 0)
166		mntorflags |= MNT_ASYNC;
167
168	if (vfs_getopt(mp->mnt_optnew, "force", NULL, NULL) == 0)
169		mntorflags |= MNT_FORCE;
170
171	if (vfs_getopt(mp->mnt_optnew, "multilabel", NULL, NULL) == 0)
172		mntorflags |= MNT_MULTILABEL;
173
174	if (vfs_getopt(mp->mnt_optnew, "noasync", NULL, NULL) == 0)
175		mntandnotflags |= MNT_ASYNC;
176
177	if (vfs_getopt(mp->mnt_optnew, "noatime", NULL, NULL) == 0)
178		mntorflags |= MNT_NOATIME;
179
180	if (vfs_getopt(mp->mnt_optnew, "noclusterr", NULL, NULL) == 0)
181		mntorflags |= MNT_NOCLUSTERR;
182
183	if (vfs_getopt(mp->mnt_optnew, "noclusterw", NULL, NULL) == 0)
184		mntorflags |= MNT_NOCLUSTERW;
185
186	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0)
187		mntorflags |= MNT_SNAPSHOT;
188
189	MNT_ILOCK(mp);
190	mp->mnt_flag = (mp->mnt_flag | mntorflags) & ~mntandnotflags;
191	MNT_IUNLOCK(mp);
192	/*
193	 * If updating, check whether changing from read-only to
194	 * read/write; if there is no device name, that's all we do.
195	 */
196	if (mp->mnt_flag & MNT_UPDATE) {
197		ump = VFSTOUFS(mp);
198		fs = ump->um_fs;
199		devvp = ump->um_devvp;
200		if (fs->fs_ronly == 0 &&
201		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
202			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
203				return (error);
204			/*
205			 * Flush any dirty data.
206			 */
207			if ((error = ffs_sync(mp, MNT_WAIT, td)) != 0) {
208				vn_finished_write(mp);
209				return (error);
210			}
211			/*
212			 * Check for and optionally get rid of files open
213			 * for writing.
214			 */
215			flags = WRITECLOSE;
216			if (mp->mnt_flag & MNT_FORCE)
217				flags |= FORCECLOSE;
218			if (mp->mnt_flag & MNT_SOFTDEP) {
219				error = softdep_flushfiles(mp, flags, td);
220			} else {
221				error = ffs_flushfiles(mp, flags, td);
222			}
223			if (error) {
224				vn_finished_write(mp);
225				return (error);
226			}
227			if (fs->fs_pendingblocks != 0 ||
228			    fs->fs_pendinginodes != 0) {
229				printf("%s: %s: blocks %jd files %d\n",
230				    fs->fs_fsmnt, "update error",
231				    (intmax_t)fs->fs_pendingblocks,
232				    fs->fs_pendinginodes);
233				fs->fs_pendingblocks = 0;
234				fs->fs_pendinginodes = 0;
235			}
236			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
237				fs->fs_clean = 1;
238			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
239				fs->fs_ronly = 0;
240				fs->fs_clean = 0;
241				vn_finished_write(mp);
242				return (error);
243			}
244			vn_finished_write(mp);
245			DROP_GIANT();
246			g_topology_lock();
247			g_access(ump->um_cp, 0, -1, 0);
248			g_topology_unlock();
249			PICKUP_GIANT();
250			fs->fs_ronly = 1;
251			MNT_ILOCK(mp);
252			mp->mnt_flag |= MNT_RDONLY;
253			MNT_IUNLOCK(mp);
254		}
255		if ((mp->mnt_flag & MNT_RELOAD) &&
256		    (error = ffs_reload(mp, td)) != 0)
257			return (error);
258		if (fs->fs_ronly &&
259		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
260			/*
261			 * If upgrade to read-write by non-root, then verify
262			 * that user has necessary permissions on the device.
263			 */
264			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
265			error = VOP_ACCESS(devvp, VREAD | VWRITE,
266			    td->td_ucred, td);
267			if (error)
268				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
269			if (error) {
270				VOP_UNLOCK(devvp, 0);
271				return (error);
272			}
273			VOP_UNLOCK(devvp, 0);
274			fs->fs_flags &= ~FS_UNCLEAN;
275			if (fs->fs_clean == 0) {
276				fs->fs_flags |= FS_UNCLEAN;
277				if ((mp->mnt_flag & MNT_FORCE) ||
278				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
279				     (fs->fs_flags & FS_DOSOFTDEP))) {
280					printf("WARNING: %s was not %s\n",
281					   fs->fs_fsmnt, "properly dismounted");
282				} else {
283					printf(
284"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
285					    fs->fs_fsmnt);
286					return (EPERM);
287				}
288			}
289			DROP_GIANT();
290			g_topology_lock();
291			/*
292			 * If we're the root device, we may not have an E count
293			 * yet, get it now.
294			 */
295			if (ump->um_cp->ace == 0)
296				error = g_access(ump->um_cp, 0, 1, 1);
297			else
298				error = g_access(ump->um_cp, 0, 1, 0);
299			g_topology_unlock();
300			PICKUP_GIANT();
301			if (error)
302				return (error);
303			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
304				return (error);
305			fs->fs_ronly = 0;
306			MNT_ILOCK(mp);
307			mp->mnt_flag &= ~MNT_RDONLY;
308			MNT_IUNLOCK(mp);
309			fs->fs_clean = 0;
310			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
311				vn_finished_write(mp);
312				return (error);
313			}
314			/* check to see if we need to start softdep */
315			if ((fs->fs_flags & FS_DOSOFTDEP) &&
316			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
317				vn_finished_write(mp);
318				return (error);
319			}
320			if (fs->fs_snapinum[0] != 0)
321				ffs_snapshot_mount(mp);
322			vn_finished_write(mp);
323		}
324		/*
325		 * Soft updates is incompatible with "async",
326		 * so if we are doing softupdates stop the user
327		 * from setting the async flag in an update.
328		 * Softdep_mount() clears it in an initial mount
329		 * or ro->rw remount.
330		 */
331		if (mp->mnt_flag & MNT_SOFTDEP) {
332			/* XXX: Reset too late ? */
333			MNT_ILOCK(mp);
334			mp->mnt_flag &= ~MNT_ASYNC;
335			MNT_IUNLOCK(mp);
336		}
337		/*
338		 * Keep MNT_ACLS flag if it is stored in superblock.
339		 */
340		if ((fs->fs_flags & FS_ACLS) != 0) {
341			/* XXX: Set too late ? */
342			MNT_ILOCK(mp);
343			mp->mnt_flag |= MNT_ACLS;
344			MNT_IUNLOCK(mp);
345		}
346
347		/*
348		 * If this is a snapshot request, take the snapshot.
349		 */
350		if (mp->mnt_flag & MNT_SNAPSHOT)
351			return (ffs_snapshot(mp, fspec));
352	}
353
354	/*
355	 * Not an update, or updating the name: look up the name
356	 * and verify that it refers to a sensible disk device.
357	 */
358	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
359	if ((error = namei(&ndp)) != 0)
360		return (error);
361	NDFREE(&ndp, NDF_ONLY_PNBUF);
362	devvp = ndp.ni_vp;
363	if (!vn_isdisk(devvp, &error)) {
364		vput(devvp);
365		return (error);
366	}
367
368	/*
369	 * If mount by non-root, then verify that user has necessary
370	 * permissions on the device.
371	 */
372	accessmode = VREAD;
373	if ((mp->mnt_flag & MNT_RDONLY) == 0)
374		accessmode |= VWRITE;
375	error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td);
376	if (error)
377		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
378	if (error) {
379		vput(devvp);
380		return (error);
381	}
382
383	if (mp->mnt_flag & MNT_UPDATE) {
384		/*
385		 * Update only
386		 *
387		 * If it's not the same vnode, or at least the same device
388		 * then it's not correct.
389		 */
390
391		if (devvp->v_rdev != ump->um_devvp->v_rdev)
392			error = EINVAL;	/* needs translation */
393		vput(devvp);
394		if (error)
395			return (error);
396	} else {
397		/*
398		 * New mount
399		 *
400		 * We need the name for the mount point (also used for
401		 * "last mounted on") copied in. If an error occurs,
402		 * the mount point is discarded by the upper level code.
403		 * Note that vfs_mount() populates f_mntonname for us.
404		 */
405		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
406			vrele(devvp);
407			return (error);
408		}
409	}
410	vfs_mountedfrom(mp, fspec);
411	return (0);
412}
413
414/*
415 * Compatibility with old mount system call.
416 */
417
418static int
419ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
420{
421	struct ufs_args args;
422	int error;
423
424	if (data == NULL)
425		return (EINVAL);
426	error = copyin(data, &args, sizeof args);
427	if (error)
428		return (error);
429
430	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
431	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
432	error = kernel_mount(ma, flags);
433
434	return (error);
435}
436
437/*
438 * Reload all incore data for a filesystem (used after running fsck on
439 * the root filesystem and finding things to fix). The filesystem must
440 * be mounted read-only.
441 *
442 * Things to do to update the mount:
443 *	1) invalidate all cached meta-data.
444 *	2) re-read superblock from disk.
445 *	3) re-read summary information from disk.
446 *	4) invalidate all inactive vnodes.
447 *	5) invalidate all cached file data.
448 *	6) re-read inode data for all active vnodes.
449 */
450static int
451ffs_reload(struct mount *mp, struct thread *td)
452{
453	struct vnode *vp, *mvp, *devvp;
454	struct inode *ip;
455	void *space;
456	struct buf *bp;
457	struct fs *fs, *newfs;
458	struct ufsmount *ump;
459	ufs2_daddr_t sblockloc;
460	int i, blks, size, error;
461	int32_t *lp;
462
463	if ((mp->mnt_flag & MNT_RDONLY) == 0)
464		return (EINVAL);
465	ump = VFSTOUFS(mp);
466	/*
467	 * Step 1: invalidate all cached meta-data.
468	 */
469	devvp = VFSTOUFS(mp)->um_devvp;
470	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
471	if (vinvalbuf(devvp, 0, td, 0, 0) != 0)
472		panic("ffs_reload: dirty1");
473	VOP_UNLOCK(devvp, 0);
474
475	/*
476	 * Step 2: re-read superblock from disk.
477	 */
478	fs = VFSTOUFS(mp)->um_fs;
479	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
480	    NOCRED, &bp)) != 0)
481		return (error);
482	newfs = (struct fs *)bp->b_data;
483	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
484	     newfs->fs_magic != FS_UFS2_MAGIC) ||
485	    newfs->fs_bsize > MAXBSIZE ||
486	    newfs->fs_bsize < sizeof(struct fs)) {
487			brelse(bp);
488			return (EIO);		/* XXX needs translation */
489	}
490	/*
491	 * Copy pointer fields back into superblock before copying in	XXX
492	 * new superblock. These should really be in the ufsmount.	XXX
493	 * Note that important parameters (eg fs_ncg) are unchanged.
494	 */
495	newfs->fs_csp = fs->fs_csp;
496	newfs->fs_maxcluster = fs->fs_maxcluster;
497	newfs->fs_contigdirs = fs->fs_contigdirs;
498	newfs->fs_active = fs->fs_active;
499	/* The file system is still read-only. */
500	newfs->fs_ronly = 1;
501	sblockloc = fs->fs_sblockloc;
502	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
503	brelse(bp);
504	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
505	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
506	UFS_LOCK(ump);
507	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
508		printf("%s: reload pending error: blocks %jd files %d\n",
509		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
510		    fs->fs_pendinginodes);
511		fs->fs_pendingblocks = 0;
512		fs->fs_pendinginodes = 0;
513	}
514	UFS_UNLOCK(ump);
515
516	/*
517	 * Step 3: re-read summary information from disk.
518	 */
519	blks = howmany(fs->fs_cssize, fs->fs_fsize);
520	space = fs->fs_csp;
521	for (i = 0; i < blks; i += fs->fs_frag) {
522		size = fs->fs_bsize;
523		if (i + fs->fs_frag > blks)
524			size = (blks - i) * fs->fs_fsize;
525		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
526		    NOCRED, &bp);
527		if (error)
528			return (error);
529		bcopy(bp->b_data, space, (u_int)size);
530		space = (char *)space + size;
531		brelse(bp);
532	}
533	/*
534	 * We no longer know anything about clusters per cylinder group.
535	 */
536	if (fs->fs_contigsumsize > 0) {
537		lp = fs->fs_maxcluster;
538		for (i = 0; i < fs->fs_ncg; i++)
539			*lp++ = fs->fs_contigsumsize;
540	}
541
542loop:
543	MNT_ILOCK(mp);
544	MNT_VNODE_FOREACH(vp, mp, mvp) {
545		VI_LOCK(vp);
546		if (vp->v_iflag & VI_DOOMED) {
547			VI_UNLOCK(vp);
548			continue;
549		}
550		MNT_IUNLOCK(mp);
551		/*
552		 * Step 4: invalidate all cached file data.
553		 */
554		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
555			MNT_VNODE_FOREACH_ABORT(mp, mvp);
556			goto loop;
557		}
558		if (vinvalbuf(vp, 0, td, 0, 0))
559			panic("ffs_reload: dirty2");
560		/*
561		 * Step 5: re-read inode data for all active vnodes.
562		 */
563		ip = VTOI(vp);
564		error =
565		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
566		    (int)fs->fs_bsize, NOCRED, &bp);
567		if (error) {
568			VOP_UNLOCK(vp, 0);
569			vrele(vp);
570			MNT_VNODE_FOREACH_ABORT(mp, mvp);
571			return (error);
572		}
573		ffs_load_inode(bp, ip, fs, ip->i_number);
574		ip->i_effnlink = ip->i_nlink;
575		brelse(bp);
576		VOP_UNLOCK(vp, 0);
577		vrele(vp);
578		MNT_ILOCK(mp);
579	}
580	MNT_IUNLOCK(mp);
581	return (0);
582}
583
584/*
585 * Possible superblock locations ordered from most to least likely.
586 */
587static int sblock_try[] = SBLOCKSEARCH;
588
589/*
590 * Common code for mount and mountroot
591 */
592static int
593ffs_mountfs(devvp, mp, td)
594	struct vnode *devvp;
595	struct mount *mp;
596	struct thread *td;
597{
598	struct ufsmount *ump;
599	struct buf *bp;
600	struct fs *fs;
601	struct cdev *dev;
602	void *space;
603	ufs2_daddr_t sblockloc;
604	int error, i, blks, size, ronly;
605	int32_t *lp;
606	struct ucred *cred;
607	struct g_consumer *cp;
608	struct mount *nmp;
609
610	dev = devvp->v_rdev;
611	cred = td ? td->td_ucred : NOCRED;
612
613	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
614	DROP_GIANT();
615	g_topology_lock();
616	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
617
618	/*
619	 * If we are a root mount, drop the E flag so fsck can do its magic.
620	 * We will pick it up again when we remount R/W.
621	 */
622	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
623		error = g_access(cp, 0, 0, -1);
624	g_topology_unlock();
625	PICKUP_GIANT();
626	VOP_UNLOCK(devvp, 0);
627	if (error)
628		return (error);
629	if (devvp->v_rdev->si_iosize_max != 0)
630		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
631	if (mp->mnt_iosize_max > MAXPHYS)
632		mp->mnt_iosize_max = MAXPHYS;
633
634	devvp->v_bufobj.bo_private = cp;
635	devvp->v_bufobj.bo_ops = &ffs_ops;
636
637	bp = NULL;
638	ump = NULL;
639	fs = NULL;
640	sblockloc = 0;
641	/*
642	 * Try reading the superblock in each of its possible locations.
643	 */
644	for (i = 0; sblock_try[i] != -1; i++) {
645		if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
646			error = EINVAL;
647			vfs_mount_error(mp,
648			    "Invalid sectorsize %d for superblock size %d",
649			    cp->provider->sectorsize, SBLOCKSIZE);
650			goto out;
651		}
652		if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
653		    cred, &bp)) != 0)
654			goto out;
655		fs = (struct fs *)bp->b_data;
656		sblockloc = sblock_try[i];
657		if ((fs->fs_magic == FS_UFS1_MAGIC ||
658		     (fs->fs_magic == FS_UFS2_MAGIC &&
659		      (fs->fs_sblockloc == sblockloc ||
660		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
661		    fs->fs_bsize <= MAXBSIZE &&
662		    fs->fs_bsize >= sizeof(struct fs))
663			break;
664		brelse(bp);
665		bp = NULL;
666	}
667	if (sblock_try[i] == -1) {
668		error = EINVAL;		/* XXX needs translation */
669		goto out;
670	}
671	fs->fs_fmod = 0;
672	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
673	fs->fs_flags &= ~FS_UNCLEAN;
674	if (fs->fs_clean == 0) {
675		fs->fs_flags |= FS_UNCLEAN;
676		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
677		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
678		     (fs->fs_flags & FS_DOSOFTDEP))) {
679			printf(
680"WARNING: %s was not properly dismounted\n",
681			    fs->fs_fsmnt);
682		} else {
683			printf(
684"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
685			    fs->fs_fsmnt);
686			error = EPERM;
687			goto out;
688		}
689		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
690		    (mp->mnt_flag & MNT_FORCE)) {
691			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
692			    (intmax_t)fs->fs_pendingblocks,
693			    fs->fs_pendinginodes);
694			fs->fs_pendingblocks = 0;
695			fs->fs_pendinginodes = 0;
696		}
697	}
698	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
699		printf("%s: mount pending error: blocks %jd files %d\n",
700		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
701		    fs->fs_pendinginodes);
702		fs->fs_pendingblocks = 0;
703		fs->fs_pendinginodes = 0;
704	}
705	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
706#ifdef UFS_GJOURNAL
707		/*
708		 * Get journal provider name.
709		 */
710		size = 1024;
711		mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
712		if (g_io_getattr("GJOURNAL::provider", cp, &size,
713		    mp->mnt_gjprovider) == 0) {
714			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
715			    M_UFSMNT, M_WAITOK);
716			MNT_ILOCK(mp);
717			mp->mnt_flag |= MNT_GJOURNAL;
718			MNT_IUNLOCK(mp);
719		} else {
720			printf(
721"WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
722			    mp->mnt_stat.f_mntonname);
723			free(mp->mnt_gjprovider, M_UFSMNT);
724			mp->mnt_gjprovider = NULL;
725		}
726#else
727		printf(
728"WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
729		    mp->mnt_stat.f_mntonname);
730#endif
731	} else {
732		mp->mnt_gjprovider = NULL;
733	}
734	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
735	ump->um_cp = cp;
736	ump->um_bo = &devvp->v_bufobj;
737	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
738	if (fs->fs_magic == FS_UFS1_MAGIC) {
739		ump->um_fstype = UFS1;
740		ump->um_balloc = ffs_balloc_ufs1;
741	} else {
742		ump->um_fstype = UFS2;
743		ump->um_balloc = ffs_balloc_ufs2;
744	}
745	ump->um_blkatoff = ffs_blkatoff;
746	ump->um_truncate = ffs_truncate;
747	ump->um_update = ffs_update;
748	ump->um_valloc = ffs_valloc;
749	ump->um_vfree = ffs_vfree;
750	ump->um_ifree = ffs_ifree;
751	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
752	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
753	if (fs->fs_sbsize < SBLOCKSIZE)
754		bp->b_flags |= B_INVAL | B_NOCACHE;
755	brelse(bp);
756	bp = NULL;
757	fs = ump->um_fs;
758	ffs_oldfscompat_read(fs, ump, sblockloc);
759	fs->fs_ronly = ronly;
760	size = fs->fs_cssize;
761	blks = howmany(size, fs->fs_fsize);
762	if (fs->fs_contigsumsize > 0)
763		size += fs->fs_ncg * sizeof(int32_t);
764	size += fs->fs_ncg * sizeof(u_int8_t);
765	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
766	fs->fs_csp = space;
767	for (i = 0; i < blks; i += fs->fs_frag) {
768		size = fs->fs_bsize;
769		if (i + fs->fs_frag > blks)
770			size = (blks - i) * fs->fs_fsize;
771		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
772		    cred, &bp)) != 0) {
773			free(fs->fs_csp, M_UFSMNT);
774			goto out;
775		}
776		bcopy(bp->b_data, space, (u_int)size);
777		space = (char *)space + size;
778		brelse(bp);
779		bp = NULL;
780	}
781	if (fs->fs_contigsumsize > 0) {
782		fs->fs_maxcluster = lp = space;
783		for (i = 0; i < fs->fs_ncg; i++)
784			*lp++ = fs->fs_contigsumsize;
785		space = lp;
786	}
787	size = fs->fs_ncg * sizeof(u_int8_t);
788	fs->fs_contigdirs = (u_int8_t *)space;
789	bzero(fs->fs_contigdirs, size);
790	fs->fs_active = NULL;
791	mp->mnt_data = ump;
792	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
793	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
794	nmp = NULL;
795	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
796	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
797		if (nmp)
798			vfs_rel(nmp);
799		vfs_getnewfsid(mp);
800	}
801	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
802	MNT_ILOCK(mp);
803	mp->mnt_flag |= MNT_LOCAL;
804	MNT_IUNLOCK(mp);
805	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
806#ifdef MAC
807		MNT_ILOCK(mp);
808		mp->mnt_flag |= MNT_MULTILABEL;
809		MNT_IUNLOCK(mp);
810#else
811		printf(
812"WARNING: %s: multilabel flag on fs but no MAC support\n",
813		    mp->mnt_stat.f_mntonname);
814#endif
815	}
816	if ((fs->fs_flags & FS_ACLS) != 0) {
817#ifdef UFS_ACL
818		MNT_ILOCK(mp);
819		mp->mnt_flag |= MNT_ACLS;
820		MNT_IUNLOCK(mp);
821#else
822		printf(
823"WARNING: %s: ACLs flag on fs but no ACLs support\n",
824		    mp->mnt_stat.f_mntonname);
825#endif
826	}
827	ump->um_mountp = mp;
828	ump->um_dev = dev;
829	ump->um_devvp = devvp;
830	ump->um_nindir = fs->fs_nindir;
831	ump->um_bptrtodb = fs->fs_fsbtodb;
832	ump->um_seqinc = fs->fs_frag;
833	for (i = 0; i < MAXQUOTAS; i++)
834		ump->um_quotas[i] = NULLVP;
835#ifdef UFS_EXTATTR
836	ufs_extattr_uepm_init(&ump->um_extattr);
837#endif
838	/*
839	 * Set FS local "last mounted on" information (NULL pad)
840	 */
841	bzero(fs->fs_fsmnt, MAXMNTLEN);
842	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
843
844	if( mp->mnt_flag & MNT_ROOTFS) {
845		/*
846		 * Root mount; update timestamp in mount structure.
847		 * this will be used by the common root mount code
848		 * to update the system clock.
849		 */
850		mp->mnt_time = fs->fs_time;
851	}
852
853	if (ronly == 0) {
854		if ((fs->fs_flags & FS_DOSOFTDEP) &&
855		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
856			free(fs->fs_csp, M_UFSMNT);
857			goto out;
858		}
859		if (fs->fs_snapinum[0] != 0)
860			ffs_snapshot_mount(mp);
861		fs->fs_fmod = 1;
862		fs->fs_clean = 0;
863		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
864	}
865	/*
866	 * Initialize filesystem stat information in mount struct.
867	 */
868	MNT_ILOCK(mp);
869	mp->mnt_kern_flag |= MNTK_MPSAFE;
870	MNT_IUNLOCK(mp);
871#ifdef UFS_EXTATTR
872#ifdef UFS_EXTATTR_AUTOSTART
873	/*
874	 *
875	 * Auto-starting does the following:
876	 *	- check for /.attribute in the fs, and extattr_start if so
877	 *	- for each file in .attribute, enable that file with
878	 * 	  an attribute of the same name.
879	 * Not clear how to report errors -- probably eat them.
880	 * This would all happen while the filesystem was busy/not
881	 * available, so would effectively be "atomic".
882	 */
883	mp->mnt_stat.f_iosize = fs->fs_bsize;
884	(void) ufs_extattr_autostart(mp, td);
885#endif /* !UFS_EXTATTR_AUTOSTART */
886#endif /* !UFS_EXTATTR */
887	return (0);
888out:
889	if (bp)
890		brelse(bp);
891	if (cp != NULL) {
892		DROP_GIANT();
893		g_topology_lock();
894		g_vfs_close(cp, td);
895		g_topology_unlock();
896		PICKUP_GIANT();
897	}
898	if (ump) {
899		mtx_destroy(UFS_MTX(ump));
900		if (mp->mnt_gjprovider != NULL) {
901			free(mp->mnt_gjprovider, M_UFSMNT);
902			mp->mnt_gjprovider = NULL;
903		}
904		free(ump->um_fs, M_UFSMNT);
905		free(ump, M_UFSMNT);
906		mp->mnt_data = NULL;
907	}
908	return (error);
909}
910
911#include <sys/sysctl.h>
912static int bigcgs = 0;
913SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
914
915/*
916 * Sanity checks for loading old filesystem superblocks.
917 * See ffs_oldfscompat_write below for unwound actions.
918 *
919 * XXX - Parts get retired eventually.
920 * Unfortunately new bits get added.
921 */
922static void
923ffs_oldfscompat_read(fs, ump, sblockloc)
924	struct fs *fs;
925	struct ufsmount *ump;
926	ufs2_daddr_t sblockloc;
927{
928	off_t maxfilesize;
929
930	/*
931	 * If not yet done, update fs_flags location and value of fs_sblockloc.
932	 */
933	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
934		fs->fs_flags = fs->fs_old_flags;
935		fs->fs_old_flags |= FS_FLAGS_UPDATED;
936		fs->fs_sblockloc = sblockloc;
937	}
938	/*
939	 * If not yet done, update UFS1 superblock with new wider fields.
940	 */
941	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
942		fs->fs_maxbsize = fs->fs_bsize;
943		fs->fs_time = fs->fs_old_time;
944		fs->fs_size = fs->fs_old_size;
945		fs->fs_dsize = fs->fs_old_dsize;
946		fs->fs_csaddr = fs->fs_old_csaddr;
947		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
948		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
949		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
950		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
951	}
952	if (fs->fs_magic == FS_UFS1_MAGIC &&
953	    fs->fs_old_inodefmt < FS_44INODEFMT) {
954		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
955		fs->fs_qbmask = ~fs->fs_bmask;
956		fs->fs_qfmask = ~fs->fs_fmask;
957	}
958	if (fs->fs_magic == FS_UFS1_MAGIC) {
959		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
960		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
961		if (fs->fs_maxfilesize > maxfilesize)
962			fs->fs_maxfilesize = maxfilesize;
963	}
964	/* Compatibility for old filesystems */
965	if (fs->fs_avgfilesize <= 0)
966		fs->fs_avgfilesize = AVFILESIZ;
967	if (fs->fs_avgfpdir <= 0)
968		fs->fs_avgfpdir = AFPDIR;
969	if (bigcgs) {
970		fs->fs_save_cgsize = fs->fs_cgsize;
971		fs->fs_cgsize = fs->fs_bsize;
972	}
973}
974
975/*
976 * Unwinding superblock updates for old filesystems.
977 * See ffs_oldfscompat_read above for details.
978 *
979 * XXX - Parts get retired eventually.
980 * Unfortunately new bits get added.
981 */
982static void
983ffs_oldfscompat_write(fs, ump)
984	struct fs *fs;
985	struct ufsmount *ump;
986{
987
988	/*
989	 * Copy back UFS2 updated fields that UFS1 inspects.
990	 */
991	if (fs->fs_magic == FS_UFS1_MAGIC) {
992		fs->fs_old_time = fs->fs_time;
993		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
994		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
995		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
996		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
997		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
998	}
999	if (bigcgs) {
1000		fs->fs_cgsize = fs->fs_save_cgsize;
1001		fs->fs_save_cgsize = 0;
1002	}
1003}
1004
1005/*
1006 * unmount system call
1007 */
1008static int
1009ffs_unmount(mp, mntflags, td)
1010	struct mount *mp;
1011	int mntflags;
1012	struct thread *td;
1013{
1014	struct ufsmount *ump = VFSTOUFS(mp);
1015	struct fs *fs;
1016	int error, flags;
1017
1018	flags = 0;
1019	if (mntflags & MNT_FORCE) {
1020		flags |= FORCECLOSE;
1021	}
1022#ifdef UFS_EXTATTR
1023	if ((error = ufs_extattr_stop(mp, td))) {
1024		if (error != EOPNOTSUPP)
1025			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1026			    error);
1027	} else {
1028		ufs_extattr_uepm_destroy(&ump->um_extattr);
1029	}
1030#endif
1031	if (mp->mnt_flag & MNT_SOFTDEP) {
1032		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
1033			return (error);
1034	} else {
1035		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
1036			return (error);
1037	}
1038	fs = ump->um_fs;
1039	UFS_LOCK(ump);
1040	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1041		printf("%s: unmount pending error: blocks %jd files %d\n",
1042		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1043		    fs->fs_pendinginodes);
1044		fs->fs_pendingblocks = 0;
1045		fs->fs_pendinginodes = 0;
1046	}
1047	UFS_UNLOCK(ump);
1048	if (fs->fs_ronly == 0) {
1049		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1050		error = ffs_sbupdate(ump, MNT_WAIT, 0);
1051		if (error) {
1052			fs->fs_clean = 0;
1053			return (error);
1054		}
1055	}
1056	DROP_GIANT();
1057	g_topology_lock();
1058	g_vfs_close(ump->um_cp, td);
1059	g_topology_unlock();
1060	PICKUP_GIANT();
1061	vrele(ump->um_devvp);
1062	mtx_destroy(UFS_MTX(ump));
1063	if (mp->mnt_gjprovider != NULL) {
1064		free(mp->mnt_gjprovider, M_UFSMNT);
1065		mp->mnt_gjprovider = NULL;
1066	}
1067	free(fs->fs_csp, M_UFSMNT);
1068	free(fs, M_UFSMNT);
1069	free(ump, M_UFSMNT);
1070	mp->mnt_data = NULL;
1071	MNT_ILOCK(mp);
1072	mp->mnt_flag &= ~MNT_LOCAL;
1073	MNT_IUNLOCK(mp);
1074	return (error);
1075}
1076
1077/*
1078 * Flush out all the files in a filesystem.
1079 */
1080int
1081ffs_flushfiles(mp, flags, td)
1082	struct mount *mp;
1083	int flags;
1084	struct thread *td;
1085{
1086	struct ufsmount *ump;
1087	int error;
1088
1089	ump = VFSTOUFS(mp);
1090#ifdef QUOTA
1091	if (mp->mnt_flag & MNT_QUOTA) {
1092		int i;
1093		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1094		if (error)
1095			return (error);
1096		for (i = 0; i < MAXQUOTAS; i++) {
1097			quotaoff(td, mp, i);
1098		}
1099		/*
1100		 * Here we fall through to vflush again to ensure
1101		 * that we have gotten rid of all the system vnodes.
1102		 */
1103	}
1104#endif
1105	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1106	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1107		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1108			return (error);
1109		ffs_snapshot_unmount(mp);
1110		flags |= FORCECLOSE;
1111		/*
1112		 * Here we fall through to vflush again to ensure
1113		 * that we have gotten rid of all the system vnodes.
1114		 */
1115	}
1116        /*
1117	 * Flush all the files.
1118	 */
1119	if ((error = vflush(mp, 0, flags, td)) != 0)
1120		return (error);
1121	/*
1122	 * Flush filesystem metadata.
1123	 */
1124	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1125	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1126	VOP_UNLOCK(ump->um_devvp, 0);
1127	return (error);
1128}
1129
1130/*
1131 * Get filesystem statistics.
1132 */
1133static int
1134ffs_statfs(mp, sbp, td)
1135	struct mount *mp;
1136	struct statfs *sbp;
1137	struct thread *td;
1138{
1139	struct ufsmount *ump;
1140	struct fs *fs;
1141
1142	ump = VFSTOUFS(mp);
1143	fs = ump->um_fs;
1144	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1145		panic("ffs_statfs");
1146	sbp->f_version = STATFS_VERSION;
1147	sbp->f_bsize = fs->fs_fsize;
1148	sbp->f_iosize = fs->fs_bsize;
1149	sbp->f_blocks = fs->fs_dsize;
1150	UFS_LOCK(ump);
1151	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1152	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1153	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1154	    dbtofsb(fs, fs->fs_pendingblocks);
1155	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1156	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1157	UFS_UNLOCK(ump);
1158	sbp->f_namemax = NAME_MAX;
1159	return (0);
1160}
1161
1162/*
1163 * Go through the disk queues to initiate sandbagged IO;
1164 * go through the inodes to write those that have been modified;
1165 * initiate the writing of the super block if it has been modified.
1166 *
1167 * Note: we are always called with the filesystem marked `MPBUSY'.
1168 */
1169static int
1170ffs_sync(mp, waitfor, td)
1171	struct mount *mp;
1172	int waitfor;
1173	struct thread *td;
1174{
1175	struct vnode *mvp, *vp, *devvp;
1176	struct inode *ip;
1177	struct ufsmount *ump = VFSTOUFS(mp);
1178	struct fs *fs;
1179	int error, count, wait, lockreq, allerror = 0;
1180	int suspend;
1181	int suspended;
1182	int secondary_writes;
1183	int secondary_accwrites;
1184	int softdep_deps;
1185	int softdep_accdeps;
1186	struct bufobj *bo;
1187
1188	fs = ump->um_fs;
1189	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1190		printf("fs = %s\n", fs->fs_fsmnt);
1191		panic("ffs_sync: rofs mod");
1192	}
1193	/*
1194	 * Write back each (modified) inode.
1195	 */
1196	wait = 0;
1197	suspend = 0;
1198	suspended = 0;
1199	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1200	if (waitfor == MNT_SUSPEND) {
1201		suspend = 1;
1202		waitfor = MNT_WAIT;
1203	}
1204	if (waitfor == MNT_WAIT) {
1205		wait = 1;
1206		lockreq = LK_EXCLUSIVE;
1207	}
1208	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1209	MNT_ILOCK(mp);
1210loop:
1211	/* Grab snapshot of secondary write counts */
1212	secondary_writes = mp->mnt_secondary_writes;
1213	secondary_accwrites = mp->mnt_secondary_accwrites;
1214
1215	/* Grab snapshot of softdep dependency counts */
1216	MNT_IUNLOCK(mp);
1217	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1218	MNT_ILOCK(mp);
1219
1220	MNT_VNODE_FOREACH(vp, mp, mvp) {
1221		/*
1222		 * Depend on the mntvnode_slock to keep things stable enough
1223		 * for a quick test.  Since there might be hundreds of
1224		 * thousands of vnodes, we cannot afford even a subroutine
1225		 * call unless there's a good chance that we have work to do.
1226		 */
1227		VI_LOCK(vp);
1228		if (vp->v_iflag & VI_DOOMED) {
1229			VI_UNLOCK(vp);
1230			continue;
1231		}
1232		ip = VTOI(vp);
1233		if (vp->v_type == VNON || ((ip->i_flag &
1234		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1235		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1236			VI_UNLOCK(vp);
1237			continue;
1238		}
1239		MNT_IUNLOCK(mp);
1240		if ((error = vget(vp, lockreq, td)) != 0) {
1241			MNT_ILOCK(mp);
1242			if (error == ENOENT || error == ENOLCK) {
1243				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1244				goto loop;
1245			}
1246			continue;
1247		}
1248		if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1249			allerror = error;
1250		vput(vp);
1251		MNT_ILOCK(mp);
1252	}
1253	MNT_IUNLOCK(mp);
1254	/*
1255	 * Force stale filesystem control information to be flushed.
1256	 */
1257	if (waitfor == MNT_WAIT) {
1258		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1259			allerror = error;
1260		/* Flushed work items may create new vnodes to clean */
1261		if (allerror == 0 && count) {
1262			MNT_ILOCK(mp);
1263			goto loop;
1264		}
1265	}
1266#ifdef QUOTA
1267	qsync(mp);
1268#endif
1269	devvp = ump->um_devvp;
1270	bo = &devvp->v_bufobj;
1271	BO_LOCK(bo);
1272	if (waitfor != MNT_LAZY &&
1273	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1274		BO_UNLOCK(bo);
1275		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1276		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1277			allerror = error;
1278		VOP_UNLOCK(devvp, 0);
1279		if (allerror == 0 && waitfor == MNT_WAIT) {
1280			MNT_ILOCK(mp);
1281			goto loop;
1282		}
1283	} else if (suspend != 0) {
1284		if (softdep_check_suspend(mp,
1285					  devvp,
1286					  softdep_deps,
1287					  softdep_accdeps,
1288					  secondary_writes,
1289					  secondary_accwrites) != 0)
1290			goto loop;	/* More work needed */
1291		mtx_assert(MNT_MTX(mp), MA_OWNED);
1292		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1293		MNT_IUNLOCK(mp);
1294		suspended = 1;
1295	} else
1296		BO_UNLOCK(bo);
1297	/*
1298	 * Write back modified superblock.
1299	 */
1300	if (fs->fs_fmod != 0 &&
1301	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1302		allerror = error;
1303	return (allerror);
1304}
1305
1306int
1307ffs_vget(mp, ino, flags, vpp)
1308	struct mount *mp;
1309	ino_t ino;
1310	int flags;
1311	struct vnode **vpp;
1312{
1313	struct fs *fs;
1314	struct inode *ip;
1315	struct ufsmount *ump;
1316	struct buf *bp;
1317	struct vnode *vp;
1318	struct cdev *dev;
1319	int error;
1320	struct thread *td;
1321
1322	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1323	if (error || *vpp != NULL)
1324		return (error);
1325
1326	/*
1327	 * We must promote to an exclusive lock for vnode creation.  This
1328	 * can happen if lookup is passed LOCKSHARED.
1329 	 */
1330	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1331		flags &= ~LK_TYPE_MASK;
1332		flags |= LK_EXCLUSIVE;
1333	}
1334
1335	/*
1336	 * We do not lock vnode creation as it is believed to be too
1337	 * expensive for such rare case as simultaneous creation of vnode
1338	 * for same ino by different processes. We just allow them to race
1339	 * and check later to decide who wins. Let the race begin!
1340	 */
1341
1342	ump = VFSTOUFS(mp);
1343	dev = ump->um_dev;
1344	fs = ump->um_fs;
1345
1346	/*
1347	 * If this MALLOC() is performed after the getnewvnode()
1348	 * it might block, leaving a vnode with a NULL v_data to be
1349	 * found by ffs_sync() if a sync happens to fire right then,
1350	 * which will cause a panic because ffs_sync() blindly
1351	 * dereferences vp->v_data (as well it should).
1352	 */
1353	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1354
1355	/* Allocate a new vnode/inode. */
1356	if (fs->fs_magic == FS_UFS1_MAGIC)
1357		error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1358	else
1359		error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1360	if (error) {
1361		*vpp = NULL;
1362		uma_zfree(uma_inode, ip);
1363		return (error);
1364	}
1365	/*
1366	 * FFS supports recursive and shared locking.
1367	 */
1368	VN_LOCK_AREC(vp);
1369	VN_LOCK_ASHARE(vp);
1370	vp->v_data = ip;
1371	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1372	ip->i_vnode = vp;
1373	ip->i_ump = ump;
1374	ip->i_fs = fs;
1375	ip->i_dev = dev;
1376	ip->i_number = ino;
1377#ifdef QUOTA
1378	{
1379		int i;
1380		for (i = 0; i < MAXQUOTAS; i++)
1381			ip->i_dquot[i] = NODQUOT;
1382	}
1383#endif
1384
1385	td = curthread;
1386	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1387	error = insmntque(vp, mp);
1388	if (error != 0) {
1389		uma_zfree(uma_inode, ip);
1390		*vpp = NULL;
1391		return (error);
1392	}
1393	error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
1394	if (error || *vpp != NULL)
1395		return (error);
1396
1397	/* Read in the disk contents for the inode, copy into the inode. */
1398	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1399	    (int)fs->fs_bsize, NOCRED, &bp);
1400	if (error) {
1401		/*
1402		 * The inode does not contain anything useful, so it would
1403		 * be misleading to leave it on its hash chain. With mode
1404		 * still zero, it will be unlinked and returned to the free
1405		 * list by vput().
1406		 */
1407		brelse(bp);
1408		vput(vp);
1409		*vpp = NULL;
1410		return (error);
1411	}
1412	if (ip->i_ump->um_fstype == UFS1)
1413		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1414	else
1415		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1416	ffs_load_inode(bp, ip, fs, ino);
1417	if (DOINGSOFTDEP(vp))
1418		softdep_load_inodeblock(ip);
1419	else
1420		ip->i_effnlink = ip->i_nlink;
1421	bqrelse(bp);
1422
1423	/*
1424	 * Initialize the vnode from the inode, check for aliases.
1425	 * Note that the underlying vnode may have changed.
1426	 */
1427	if (ip->i_ump->um_fstype == UFS1)
1428		error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1429	else
1430		error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1431	if (error) {
1432		vput(vp);
1433		*vpp = NULL;
1434		return (error);
1435	}
1436
1437	/*
1438	 * Finish inode initialization.
1439	 */
1440
1441	/*
1442	 * Set up a generation number for this inode if it does not
1443	 * already have one. This should only happen on old filesystems.
1444	 */
1445	if (ip->i_gen == 0) {
1446		ip->i_gen = arc4random() / 2 + 1;
1447		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1448			ip->i_flag |= IN_MODIFIED;
1449			DIP_SET(ip, i_gen, ip->i_gen);
1450		}
1451	}
1452	/*
1453	 * Ensure that uid and gid are correct. This is a temporary
1454	 * fix until fsck has been changed to do the update.
1455	 */
1456	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1457	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1458		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1459		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1460	}						/* XXX */
1461
1462#ifdef MAC
1463	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1464		/*
1465		 * If this vnode is already allocated, and we're running
1466		 * multi-label, attempt to perform a label association
1467		 * from the extended attributes on the inode.
1468		 */
1469		error = mac_vnode_associate_extattr(mp, vp);
1470		if (error) {
1471			/* ufs_inactive will release ip->i_devvp ref. */
1472			vput(vp);
1473			*vpp = NULL;
1474			return (error);
1475		}
1476	}
1477#endif
1478
1479	*vpp = vp;
1480	return (0);
1481}
1482
1483/*
1484 * File handle to vnode
1485 *
1486 * Have to be really careful about stale file handles:
1487 * - check that the inode number is valid
1488 * - call ffs_vget() to get the locked inode
1489 * - check for an unallocated inode (i_mode == 0)
1490 * - check that the given client host has export rights and return
1491 *   those rights via. exflagsp and credanonp
1492 */
1493static int
1494ffs_fhtovp(mp, fhp, vpp)
1495	struct mount *mp;
1496	struct fid *fhp;
1497	struct vnode **vpp;
1498{
1499	struct ufid *ufhp;
1500	struct fs *fs;
1501
1502	ufhp = (struct ufid *)fhp;
1503	fs = VFSTOUFS(mp)->um_fs;
1504	if (ufhp->ufid_ino < ROOTINO ||
1505	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1506		return (ESTALE);
1507	return (ufs_fhtovp(mp, ufhp, vpp));
1508}
1509
1510/*
1511 * Initialize the filesystem.
1512 */
1513static int
1514ffs_init(vfsp)
1515	struct vfsconf *vfsp;
1516{
1517
1518	softdep_initialize();
1519	return (ufs_init(vfsp));
1520}
1521
1522/*
1523 * Undo the work of ffs_init().
1524 */
1525static int
1526ffs_uninit(vfsp)
1527	struct vfsconf *vfsp;
1528{
1529	int ret;
1530
1531	ret = ufs_uninit(vfsp);
1532	softdep_uninitialize();
1533	return (ret);
1534}
1535
1536/*
1537 * Write a superblock and associated information back to disk.
1538 */
1539int
1540ffs_sbupdate(mp, waitfor, suspended)
1541	struct ufsmount *mp;
1542	int waitfor;
1543	int suspended;
1544{
1545	struct fs *fs = mp->um_fs;
1546	struct buf *sbbp;
1547	struct buf *bp;
1548	int blks;
1549	void *space;
1550	int i, size, error, allerror = 0;
1551
1552	if (fs->fs_ronly == 1 &&
1553	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1554	    (MNT_RDONLY | MNT_UPDATE))
1555		panic("ffs_sbupdate: write read-only filesystem");
1556	/*
1557	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1558	 */
1559	sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1560	    0, 0, 0);
1561	/*
1562	 * First write back the summary information.
1563	 */
1564	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1565	space = fs->fs_csp;
1566	for (i = 0; i < blks; i += fs->fs_frag) {
1567		size = fs->fs_bsize;
1568		if (i + fs->fs_frag > blks)
1569			size = (blks - i) * fs->fs_fsize;
1570		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1571		    size, 0, 0, 0);
1572		bcopy(space, bp->b_data, (u_int)size);
1573		space = (char *)space + size;
1574		if (suspended)
1575			bp->b_flags |= B_VALIDSUSPWRT;
1576		if (waitfor != MNT_WAIT)
1577			bawrite(bp);
1578		else if ((error = bwrite(bp)) != 0)
1579			allerror = error;
1580	}
1581	/*
1582	 * Now write back the superblock itself. If any errors occurred
1583	 * up to this point, then fail so that the superblock avoids
1584	 * being written out as clean.
1585	 */
1586	if (allerror) {
1587		brelse(sbbp);
1588		return (allerror);
1589	}
1590	bp = sbbp;
1591	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1592	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1593		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1594		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1595		fs->fs_sblockloc = SBLOCK_UFS1;
1596	}
1597	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1598	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1599		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1600		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1601		fs->fs_sblockloc = SBLOCK_UFS2;
1602	}
1603	fs->fs_fmod = 0;
1604	fs->fs_time = time_second;
1605	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1606	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1607	if (suspended)
1608		bp->b_flags |= B_VALIDSUSPWRT;
1609	if (waitfor != MNT_WAIT)
1610		bawrite(bp);
1611	else if ((error = bwrite(bp)) != 0)
1612		allerror = error;
1613	return (allerror);
1614}
1615
1616static int
1617ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1618	int attrnamespace, const char *attrname, struct thread *td)
1619{
1620
1621#ifdef UFS_EXTATTR
1622	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1623	    attrname, td));
1624#else
1625	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1626	    attrname, td));
1627#endif
1628}
1629
1630static void
1631ffs_ifree(struct ufsmount *ump, struct inode *ip)
1632{
1633
1634	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1635		uma_zfree(uma_ufs1, ip->i_din1);
1636	else if (ip->i_din2 != NULL)
1637		uma_zfree(uma_ufs2, ip->i_din2);
1638	uma_zfree(uma_inode, ip);
1639}
1640
1641static int dobkgrdwrite = 1;
1642SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1643    "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1644
1645/*
1646 * Complete a background write started from bwrite.
1647 */
1648static void
1649ffs_backgroundwritedone(struct buf *bp)
1650{
1651	struct bufobj *bufobj;
1652	struct buf *origbp;
1653
1654	/*
1655	 * Find the original buffer that we are writing.
1656	 */
1657	bufobj = bp->b_bufobj;
1658	BO_LOCK(bufobj);
1659	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1660		panic("backgroundwritedone: lost buffer");
1661	/* Grab an extra reference to be dropped by the bufdone() below. */
1662	bufobj_wrefl(bufobj);
1663	BO_UNLOCK(bufobj);
1664	/*
1665	 * Process dependencies then return any unfinished ones.
1666	 */
1667	if (!LIST_EMPTY(&bp->b_dep))
1668		buf_complete(bp);
1669#ifdef SOFTUPDATES
1670	if (!LIST_EMPTY(&bp->b_dep))
1671		softdep_move_dependencies(bp, origbp);
1672#endif
1673	/*
1674	 * This buffer is marked B_NOCACHE so when it is released
1675	 * by biodone it will be tossed.
1676	 */
1677	bp->b_flags |= B_NOCACHE;
1678	bp->b_flags &= ~B_CACHE;
1679	bufdone(bp);
1680	BO_LOCK(bufobj);
1681	/*
1682	 * Clear the BV_BKGRDINPROG flag in the original buffer
1683	 * and awaken it if it is waiting for the write to complete.
1684	 * If BV_BKGRDINPROG is not set in the original buffer it must
1685	 * have been released and re-instantiated - which is not legal.
1686	 */
1687	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1688	    ("backgroundwritedone: lost buffer2"));
1689	origbp->b_vflags &= ~BV_BKGRDINPROG;
1690	if (origbp->b_vflags & BV_BKGRDWAIT) {
1691		origbp->b_vflags &= ~BV_BKGRDWAIT;
1692		wakeup(&origbp->b_xflags);
1693	}
1694	BO_UNLOCK(bufobj);
1695}
1696
1697
1698/*
1699 * Write, release buffer on completion.  (Done by iodone
1700 * if async).  Do not bother writing anything if the buffer
1701 * is invalid.
1702 *
1703 * Note that we set B_CACHE here, indicating that buffer is
1704 * fully valid and thus cacheable.  This is true even of NFS
1705 * now so we set it generally.  This could be set either here
1706 * or in biodone() since the I/O is synchronous.  We put it
1707 * here.
1708 */
1709static int
1710ffs_bufwrite(struct buf *bp)
1711{
1712	int oldflags, s;
1713	struct buf *newbp;
1714
1715	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1716	if (bp->b_flags & B_INVAL) {
1717		brelse(bp);
1718		return (0);
1719	}
1720
1721	oldflags = bp->b_flags;
1722
1723	if (!BUF_ISLOCKED(bp))
1724		panic("bufwrite: buffer is not busy???");
1725	s = splbio();
1726	/*
1727	 * If a background write is already in progress, delay
1728	 * writing this block if it is asynchronous. Otherwise
1729	 * wait for the background write to complete.
1730	 */
1731	BO_LOCK(bp->b_bufobj);
1732	if (bp->b_vflags & BV_BKGRDINPROG) {
1733		if (bp->b_flags & B_ASYNC) {
1734			BO_UNLOCK(bp->b_bufobj);
1735			splx(s);
1736			bdwrite(bp);
1737			return (0);
1738		}
1739		bp->b_vflags |= BV_BKGRDWAIT;
1740		msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1741		if (bp->b_vflags & BV_BKGRDINPROG)
1742			panic("bufwrite: still writing");
1743	}
1744	BO_UNLOCK(bp->b_bufobj);
1745
1746	/* Mark the buffer clean */
1747	bundirty(bp);
1748
1749	/*
1750	 * If this buffer is marked for background writing and we
1751	 * do not have to wait for it, make a copy and write the
1752	 * copy so as to leave this buffer ready for further use.
1753	 *
1754	 * This optimization eats a lot of memory.  If we have a page
1755	 * or buffer shortfall we can't do it.
1756	 */
1757	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1758	    (bp->b_flags & B_ASYNC) &&
1759	    !vm_page_count_severe() &&
1760	    !buf_dirty_count_severe()) {
1761		KASSERT(bp->b_iodone == NULL,
1762		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1763
1764		/* get a new block */
1765		newbp = geteblk(bp->b_bufsize);
1766
1767		/*
1768		 * set it to be identical to the old block.  We have to
1769		 * set b_lblkno and BKGRDMARKER before calling bgetvp()
1770		 * to avoid confusing the splay tree and gbincore().
1771		 */
1772		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1773		newbp->b_lblkno = bp->b_lblkno;
1774		newbp->b_xflags |= BX_BKGRDMARKER;
1775		BO_LOCK(bp->b_bufobj);
1776		bp->b_vflags |= BV_BKGRDINPROG;
1777		bgetvp(bp->b_vp, newbp);
1778		BO_UNLOCK(bp->b_bufobj);
1779		newbp->b_bufobj = &bp->b_vp->v_bufobj;
1780		newbp->b_blkno = bp->b_blkno;
1781		newbp->b_offset = bp->b_offset;
1782		newbp->b_iodone = ffs_backgroundwritedone;
1783		newbp->b_flags |= B_ASYNC;
1784		newbp->b_flags &= ~B_INVAL;
1785
1786#ifdef SOFTUPDATES
1787		/* move over the dependencies */
1788		if (!LIST_EMPTY(&bp->b_dep))
1789			softdep_move_dependencies(bp, newbp);
1790#endif
1791
1792		/*
1793		 * Initiate write on the copy, release the original to
1794		 * the B_LOCKED queue so that it cannot go away until
1795		 * the background write completes. If not locked it could go
1796		 * away and then be reconstituted while it was being written.
1797		 * If the reconstituted buffer were written, we could end up
1798		 * with two background copies being written at the same time.
1799		 */
1800		bqrelse(bp);
1801		bp = newbp;
1802	}
1803
1804	/* Let the normal bufwrite do the rest for us */
1805	return (bufwrite(bp));
1806}
1807
1808
1809static void
1810ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1811{
1812	struct vnode *vp;
1813	int error;
1814	struct buf *tbp;
1815
1816	vp = bo->__bo_vnode;
1817	if (bp->b_iocmd == BIO_WRITE) {
1818		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1819		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1820		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1821			panic("ffs_geom_strategy: bad I/O");
1822		bp->b_flags &= ~B_VALIDSUSPWRT;
1823		if ((vp->v_vflag & VV_COPYONWRITE) &&
1824		    vp->v_rdev->si_snapdata != NULL) {
1825			if ((bp->b_flags & B_CLUSTER) != 0) {
1826				runningbufwakeup(bp);
1827				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1828					      b_cluster.cluster_entry) {
1829					error = ffs_copyonwrite(vp, tbp);
1830					if (error != 0 &&
1831					    error != EOPNOTSUPP) {
1832						bp->b_error = error;
1833						bp->b_ioflags |= BIO_ERROR;
1834						bufdone(bp);
1835						return;
1836					}
1837				}
1838				bp->b_runningbufspace = bp->b_bufsize;
1839				atomic_add_int(&runningbufspace,
1840					       bp->b_runningbufspace);
1841			} else {
1842				error = ffs_copyonwrite(vp, bp);
1843				if (error != 0 && error != EOPNOTSUPP) {
1844					bp->b_error = error;
1845					bp->b_ioflags |= BIO_ERROR;
1846					bufdone(bp);
1847					return;
1848				}
1849			}
1850		}
1851#ifdef SOFTUPDATES
1852		if ((bp->b_flags & B_CLUSTER) != 0) {
1853			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1854				      b_cluster.cluster_entry) {
1855				if (!LIST_EMPTY(&tbp->b_dep))
1856					buf_start(tbp);
1857			}
1858		} else {
1859			if (!LIST_EMPTY(&bp->b_dep))
1860				buf_start(bp);
1861		}
1862
1863#endif
1864	}
1865	g_vfs_strategy(bo, bp);
1866}
1867