ffs_vfsops.c revision 188956
1/*-
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 188956 2009-02-23 21:09:28Z trasz $");
34
35#include "opt_mac.h"
36#include "opt_quota.h"
37#include "opt_ufs.h"
38#include "opt_ffs.h"
39#include "opt_ddb.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/namei.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/kernel.h>
47#include <sys/vnode.h>
48#include <sys/mount.h>
49#include <sys/bio.h>
50#include <sys/buf.h>
51#include <sys/conf.h>
52#include <sys/fcntl.h>
53#include <sys/malloc.h>
54#include <sys/mutex.h>
55
56#include <security/mac/mac_framework.h>
57
58#include <ufs/ufs/extattr.h>
59#include <ufs/ufs/gjournal.h>
60#include <ufs/ufs/quota.h>
61#include <ufs/ufs/ufsmount.h>
62#include <ufs/ufs/inode.h>
63#include <ufs/ufs/ufs_extern.h>
64
65#include <ufs/ffs/fs.h>
66#include <ufs/ffs/ffs_extern.h>
67
68#include <vm/vm.h>
69#include <vm/uma.h>
70#include <vm/vm_page.h>
71
72#include <geom/geom.h>
73#include <geom/geom_vfs.h>
74
75#include <ddb/ddb.h>
76
77static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
78
79static int	ffs_reload(struct mount *, struct thread *);
80static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
81static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
82		    ufs2_daddr_t);
83static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
84static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
85static vfs_init_t ffs_init;
86static vfs_uninit_t ffs_uninit;
87static vfs_extattrctl_t ffs_extattrctl;
88static vfs_cmount_t ffs_cmount;
89static vfs_unmount_t ffs_unmount;
90static vfs_mount_t ffs_mount;
91static vfs_statfs_t ffs_statfs;
92static vfs_fhtovp_t ffs_fhtovp;
93static vfs_sync_t ffs_sync;
94
95static struct vfsops ufs_vfsops = {
96	.vfs_extattrctl =	ffs_extattrctl,
97	.vfs_fhtovp =		ffs_fhtovp,
98	.vfs_init =		ffs_init,
99	.vfs_mount =		ffs_mount,
100	.vfs_cmount =		ffs_cmount,
101	.vfs_quotactl =		ufs_quotactl,
102	.vfs_root =		ufs_root,
103	.vfs_statfs =		ffs_statfs,
104	.vfs_sync =		ffs_sync,
105	.vfs_uninit =		ffs_uninit,
106	.vfs_unmount =		ffs_unmount,
107	.vfs_vget =		ffs_vget,
108	.vfs_susp_clean =	process_deferred_inactive,
109};
110
111VFS_SET(ufs_vfsops, ufs, 0);
112MODULE_VERSION(ufs, 1);
113
114static b_strategy_t ffs_geom_strategy;
115static b_write_t ffs_bufwrite;
116
117static struct buf_ops ffs_ops = {
118	.bop_name =	"FFS",
119	.bop_write =	ffs_bufwrite,
120	.bop_strategy =	ffs_geom_strategy,
121	.bop_sync =	bufsync,
122#ifdef NO_FFS_SNAPSHOT
123	.bop_bdflush =	bufbdflush,
124#else
125	.bop_bdflush =	ffs_bdflush,
126#endif
127};
128
129static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
130    "noclusterw", "noexec", "export", "force", "from", "multilabel",
131    "snapshot", "nosuid", "suiddir", "nosymfollow", "sync",
132    "union", NULL };
133
134static int
135ffs_mount(struct mount *mp, struct thread *td)
136{
137	struct vnode *devvp;
138	struct ufsmount *ump = 0;
139	struct fs *fs;
140	int error, flags;
141	u_int mntorflags, mntandnotflags;
142	accmode_t accmode;
143	struct nameidata ndp;
144	char *fspec;
145
146	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
147		return (EINVAL);
148	if (uma_inode == NULL) {
149		uma_inode = uma_zcreate("FFS inode",
150		    sizeof(struct inode), NULL, NULL, NULL, NULL,
151		    UMA_ALIGN_PTR, 0);
152		uma_ufs1 = uma_zcreate("FFS1 dinode",
153		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
154		    UMA_ALIGN_PTR, 0);
155		uma_ufs2 = uma_zcreate("FFS2 dinode",
156		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
157		    UMA_ALIGN_PTR, 0);
158	}
159
160	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
161	if (error)
162		return (error);
163
164	mntorflags = 0;
165	mntandnotflags = 0;
166	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
167		mntorflags |= MNT_ACLS;
168
169	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
170		mntorflags |= MNT_SNAPSHOT;
171		/*
172		 * Once we have set the MNT_SNAPSHOT flag, do not
173		 * persist "snapshot" in the options list.
174		 */
175		vfs_deleteopt(mp->mnt_optnew, "snapshot");
176		vfs_deleteopt(mp->mnt_opt, "snapshot");
177	}
178
179	MNT_ILOCK(mp);
180	mp->mnt_flag = (mp->mnt_flag | mntorflags) & ~mntandnotflags;
181	MNT_IUNLOCK(mp);
182	/*
183	 * If updating, check whether changing from read-only to
184	 * read/write; if there is no device name, that's all we do.
185	 */
186	if (mp->mnt_flag & MNT_UPDATE) {
187		ump = VFSTOUFS(mp);
188		fs = ump->um_fs;
189		devvp = ump->um_devvp;
190		if (fs->fs_ronly == 0 &&
191		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
192			/*
193			 * Flush any dirty data and suspend filesystem.
194			 */
195			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
196				return (error);
197			for (;;) {
198				vn_finished_write(mp);
199				if ((error = vfs_write_suspend(mp)) != 0)
200					return (error);
201				MNT_ILOCK(mp);
202				if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
203					/*
204					 * Allow the secondary writes
205					 * to proceed.
206					 */
207					mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
208					    MNTK_SUSPEND2);
209					wakeup(&mp->mnt_flag);
210					MNT_IUNLOCK(mp);
211					/*
212					 * Allow the curthread to
213					 * ignore the suspension to
214					 * synchronize on-disk state.
215					 */
216					curthread->td_pflags |= TDP_IGNSUSP;
217					break;
218				}
219				MNT_IUNLOCK(mp);
220				vn_start_write(NULL, &mp, V_WAIT);
221			}
222			/*
223			 * Check for and optionally get rid of files open
224			 * for writing.
225			 */
226			flags = WRITECLOSE;
227			if (mp->mnt_flag & MNT_FORCE)
228				flags |= FORCECLOSE;
229			if (mp->mnt_flag & MNT_SOFTDEP) {
230				error = softdep_flushfiles(mp, flags, td);
231			} else {
232				error = ffs_flushfiles(mp, flags, td);
233			}
234			if (error) {
235				vfs_write_resume(mp);
236				return (error);
237			}
238			if (fs->fs_pendingblocks != 0 ||
239			    fs->fs_pendinginodes != 0) {
240				printf("%s: %s: blocks %jd files %d\n",
241				    fs->fs_fsmnt, "update error",
242				    (intmax_t)fs->fs_pendingblocks,
243				    fs->fs_pendinginodes);
244				fs->fs_pendingblocks = 0;
245				fs->fs_pendinginodes = 0;
246			}
247			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
248				fs->fs_clean = 1;
249			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
250				fs->fs_ronly = 0;
251				fs->fs_clean = 0;
252				vfs_write_resume(mp);
253				return (error);
254			}
255			DROP_GIANT();
256			g_topology_lock();
257			g_access(ump->um_cp, 0, -1, 0);
258			g_topology_unlock();
259			PICKUP_GIANT();
260			fs->fs_ronly = 1;
261			MNT_ILOCK(mp);
262			mp->mnt_flag |= MNT_RDONLY;
263			MNT_IUNLOCK(mp);
264			/*
265			 * Allow the writers to note that filesystem
266			 * is ro now.
267			 */
268			vfs_write_resume(mp);
269		}
270		if ((mp->mnt_flag & MNT_RELOAD) &&
271		    (error = ffs_reload(mp, td)) != 0)
272			return (error);
273		if (fs->fs_ronly &&
274		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
275			/*
276			 * If upgrade to read-write by non-root, then verify
277			 * that user has necessary permissions on the device.
278			 */
279			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
280			error = VOP_ACCESS(devvp, VREAD | VWRITE,
281			    td->td_ucred, td);
282			if (error)
283				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
284			if (error) {
285				VOP_UNLOCK(devvp, 0);
286				return (error);
287			}
288			VOP_UNLOCK(devvp, 0);
289			fs->fs_flags &= ~FS_UNCLEAN;
290			if (fs->fs_clean == 0) {
291				fs->fs_flags |= FS_UNCLEAN;
292				if ((mp->mnt_flag & MNT_FORCE) ||
293				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
294				     (fs->fs_flags & FS_DOSOFTDEP))) {
295					printf("WARNING: %s was not %s\n",
296					   fs->fs_fsmnt, "properly dismounted");
297				} else {
298					printf(
299"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
300					    fs->fs_fsmnt);
301					return (EPERM);
302				}
303			}
304			DROP_GIANT();
305			g_topology_lock();
306			/*
307			 * If we're the root device, we may not have an E count
308			 * yet, get it now.
309			 */
310			if (ump->um_cp->ace == 0)
311				error = g_access(ump->um_cp, 0, 1, 1);
312			else
313				error = g_access(ump->um_cp, 0, 1, 0);
314			g_topology_unlock();
315			PICKUP_GIANT();
316			if (error)
317				return (error);
318			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
319				return (error);
320			fs->fs_ronly = 0;
321			MNT_ILOCK(mp);
322			mp->mnt_flag &= ~MNT_RDONLY;
323			MNT_IUNLOCK(mp);
324			fs->fs_clean = 0;
325			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
326				vn_finished_write(mp);
327				return (error);
328			}
329			/* check to see if we need to start softdep */
330			if ((fs->fs_flags & FS_DOSOFTDEP) &&
331			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
332				vn_finished_write(mp);
333				return (error);
334			}
335			if (fs->fs_snapinum[0] != 0)
336				ffs_snapshot_mount(mp);
337			vn_finished_write(mp);
338		}
339		/*
340		 * Soft updates is incompatible with "async",
341		 * so if we are doing softupdates stop the user
342		 * from setting the async flag in an update.
343		 * Softdep_mount() clears it in an initial mount
344		 * or ro->rw remount.
345		 */
346		if (mp->mnt_flag & MNT_SOFTDEP) {
347			/* XXX: Reset too late ? */
348			MNT_ILOCK(mp);
349			mp->mnt_flag &= ~MNT_ASYNC;
350			MNT_IUNLOCK(mp);
351		}
352		/*
353		 * Keep MNT_ACLS flag if it is stored in superblock.
354		 */
355		if ((fs->fs_flags & FS_ACLS) != 0) {
356			/* XXX: Set too late ? */
357			MNT_ILOCK(mp);
358			mp->mnt_flag |= MNT_ACLS;
359			MNT_IUNLOCK(mp);
360		}
361
362		/*
363		 * If this is a snapshot request, take the snapshot.
364		 */
365		if (mp->mnt_flag & MNT_SNAPSHOT)
366			return (ffs_snapshot(mp, fspec));
367	}
368
369	/*
370	 * Not an update, or updating the name: look up the name
371	 * and verify that it refers to a sensible disk device.
372	 */
373	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
374	if ((error = namei(&ndp)) != 0)
375		return (error);
376	NDFREE(&ndp, NDF_ONLY_PNBUF);
377	devvp = ndp.ni_vp;
378	if (!vn_isdisk(devvp, &error)) {
379		vput(devvp);
380		return (error);
381	}
382
383	/*
384	 * If mount by non-root, then verify that user has necessary
385	 * permissions on the device.
386	 */
387	accmode = VREAD;
388	if ((mp->mnt_flag & MNT_RDONLY) == 0)
389		accmode |= VWRITE;
390	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
391	if (error)
392		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
393	if (error) {
394		vput(devvp);
395		return (error);
396	}
397
398	if (mp->mnt_flag & MNT_UPDATE) {
399		/*
400		 * Update only
401		 *
402		 * If it's not the same vnode, or at least the same device
403		 * then it's not correct.
404		 */
405
406		if (devvp->v_rdev != ump->um_devvp->v_rdev)
407			error = EINVAL;	/* needs translation */
408		vput(devvp);
409		if (error)
410			return (error);
411	} else {
412		/*
413		 * New mount
414		 *
415		 * We need the name for the mount point (also used for
416		 * "last mounted on") copied in. If an error occurs,
417		 * the mount point is discarded by the upper level code.
418		 * Note that vfs_mount() populates f_mntonname for us.
419		 */
420		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
421			vrele(devvp);
422			return (error);
423		}
424	}
425	vfs_mountedfrom(mp, fspec);
426	return (0);
427}
428
429/*
430 * Compatibility with old mount system call.
431 */
432
433static int
434ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
435{
436	struct ufs_args args;
437	int error;
438
439	if (data == NULL)
440		return (EINVAL);
441	error = copyin(data, &args, sizeof args);
442	if (error)
443		return (error);
444
445	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
446	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
447	error = kernel_mount(ma, flags);
448
449	return (error);
450}
451
452/*
453 * Reload all incore data for a filesystem (used after running fsck on
454 * the root filesystem and finding things to fix). The filesystem must
455 * be mounted read-only.
456 *
457 * Things to do to update the mount:
458 *	1) invalidate all cached meta-data.
459 *	2) re-read superblock from disk.
460 *	3) re-read summary information from disk.
461 *	4) invalidate all inactive vnodes.
462 *	5) invalidate all cached file data.
463 *	6) re-read inode data for all active vnodes.
464 */
465static int
466ffs_reload(struct mount *mp, struct thread *td)
467{
468	struct vnode *vp, *mvp, *devvp;
469	struct inode *ip;
470	void *space;
471	struct buf *bp;
472	struct fs *fs, *newfs;
473	struct ufsmount *ump;
474	ufs2_daddr_t sblockloc;
475	int i, blks, size, error;
476	int32_t *lp;
477
478	if ((mp->mnt_flag & MNT_RDONLY) == 0)
479		return (EINVAL);
480	ump = VFSTOUFS(mp);
481	/*
482	 * Step 1: invalidate all cached meta-data.
483	 */
484	devvp = VFSTOUFS(mp)->um_devvp;
485	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
486	if (vinvalbuf(devvp, 0, 0, 0) != 0)
487		panic("ffs_reload: dirty1");
488	VOP_UNLOCK(devvp, 0);
489
490	/*
491	 * Step 2: re-read superblock from disk.
492	 */
493	fs = VFSTOUFS(mp)->um_fs;
494	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
495	    NOCRED, &bp)) != 0)
496		return (error);
497	newfs = (struct fs *)bp->b_data;
498	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
499	     newfs->fs_magic != FS_UFS2_MAGIC) ||
500	    newfs->fs_bsize > MAXBSIZE ||
501	    newfs->fs_bsize < sizeof(struct fs)) {
502			brelse(bp);
503			return (EIO);		/* XXX needs translation */
504	}
505	/*
506	 * Copy pointer fields back into superblock before copying in	XXX
507	 * new superblock. These should really be in the ufsmount.	XXX
508	 * Note that important parameters (eg fs_ncg) are unchanged.
509	 */
510	newfs->fs_csp = fs->fs_csp;
511	newfs->fs_maxcluster = fs->fs_maxcluster;
512	newfs->fs_contigdirs = fs->fs_contigdirs;
513	newfs->fs_active = fs->fs_active;
514	/* The file system is still read-only. */
515	newfs->fs_ronly = 1;
516	sblockloc = fs->fs_sblockloc;
517	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
518	brelse(bp);
519	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
520	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
521	UFS_LOCK(ump);
522	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
523		printf("%s: reload pending error: blocks %jd files %d\n",
524		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
525		    fs->fs_pendinginodes);
526		fs->fs_pendingblocks = 0;
527		fs->fs_pendinginodes = 0;
528	}
529	UFS_UNLOCK(ump);
530
531	/*
532	 * Step 3: re-read summary information from disk.
533	 */
534	blks = howmany(fs->fs_cssize, fs->fs_fsize);
535	space = fs->fs_csp;
536	for (i = 0; i < blks; i += fs->fs_frag) {
537		size = fs->fs_bsize;
538		if (i + fs->fs_frag > blks)
539			size = (blks - i) * fs->fs_fsize;
540		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
541		    NOCRED, &bp);
542		if (error)
543			return (error);
544		bcopy(bp->b_data, space, (u_int)size);
545		space = (char *)space + size;
546		brelse(bp);
547	}
548	/*
549	 * We no longer know anything about clusters per cylinder group.
550	 */
551	if (fs->fs_contigsumsize > 0) {
552		lp = fs->fs_maxcluster;
553		for (i = 0; i < fs->fs_ncg; i++)
554			*lp++ = fs->fs_contigsumsize;
555	}
556
557loop:
558	MNT_ILOCK(mp);
559	MNT_VNODE_FOREACH(vp, mp, mvp) {
560		VI_LOCK(vp);
561		if (vp->v_iflag & VI_DOOMED) {
562			VI_UNLOCK(vp);
563			continue;
564		}
565		MNT_IUNLOCK(mp);
566		/*
567		 * Step 4: invalidate all cached file data.
568		 */
569		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
570			MNT_VNODE_FOREACH_ABORT(mp, mvp);
571			goto loop;
572		}
573		if (vinvalbuf(vp, 0, 0, 0))
574			panic("ffs_reload: dirty2");
575		/*
576		 * Step 5: re-read inode data for all active vnodes.
577		 */
578		ip = VTOI(vp);
579		error =
580		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
581		    (int)fs->fs_bsize, NOCRED, &bp);
582		if (error) {
583			VOP_UNLOCK(vp, 0);
584			vrele(vp);
585			MNT_VNODE_FOREACH_ABORT(mp, mvp);
586			return (error);
587		}
588		ffs_load_inode(bp, ip, fs, ip->i_number);
589		ip->i_effnlink = ip->i_nlink;
590		brelse(bp);
591		VOP_UNLOCK(vp, 0);
592		vrele(vp);
593		MNT_ILOCK(mp);
594	}
595	MNT_IUNLOCK(mp);
596	return (0);
597}
598
599/*
600 * Possible superblock locations ordered from most to least likely.
601 */
602static int sblock_try[] = SBLOCKSEARCH;
603
604/*
605 * Common code for mount and mountroot
606 */
607static int
608ffs_mountfs(devvp, mp, td)
609	struct vnode *devvp;
610	struct mount *mp;
611	struct thread *td;
612{
613	struct ufsmount *ump;
614	struct buf *bp;
615	struct fs *fs;
616	struct cdev *dev;
617	void *space;
618	ufs2_daddr_t sblockloc;
619	int error, i, blks, size, ronly;
620	int32_t *lp;
621	struct ucred *cred;
622	struct g_consumer *cp;
623	struct mount *nmp;
624
625	bp = NULL;
626	ump = NULL;
627	cred = td ? td->td_ucred : NOCRED;
628	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
629
630	dev = devvp->v_rdev;
631	dev_ref(dev);
632	DROP_GIANT();
633	g_topology_lock();
634	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
635
636	/*
637	 * If we are a root mount, drop the E flag so fsck can do its magic.
638	 * We will pick it up again when we remount R/W.
639	 */
640	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
641		error = g_access(cp, 0, 0, -1);
642	g_topology_unlock();
643	PICKUP_GIANT();
644	VOP_UNLOCK(devvp, 0);
645	if (error)
646		goto out;
647	if (devvp->v_rdev->si_iosize_max != 0)
648		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
649	if (mp->mnt_iosize_max > MAXPHYS)
650		mp->mnt_iosize_max = MAXPHYS;
651
652	devvp->v_bufobj.bo_private = cp;
653	devvp->v_bufobj.bo_ops = &ffs_ops;
654
655	fs = NULL;
656	sblockloc = 0;
657	/*
658	 * Try reading the superblock in each of its possible locations.
659	 */
660	for (i = 0; sblock_try[i] != -1; i++) {
661		if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
662			error = EINVAL;
663			vfs_mount_error(mp,
664			    "Invalid sectorsize %d for superblock size %d",
665			    cp->provider->sectorsize, SBLOCKSIZE);
666			goto out;
667		}
668		if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
669		    cred, &bp)) != 0)
670			goto out;
671		fs = (struct fs *)bp->b_data;
672		sblockloc = sblock_try[i];
673		if ((fs->fs_magic == FS_UFS1_MAGIC ||
674		     (fs->fs_magic == FS_UFS2_MAGIC &&
675		      (fs->fs_sblockloc == sblockloc ||
676		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
677		    fs->fs_bsize <= MAXBSIZE &&
678		    fs->fs_bsize >= sizeof(struct fs))
679			break;
680		brelse(bp);
681		bp = NULL;
682	}
683	if (sblock_try[i] == -1) {
684		error = EINVAL;		/* XXX needs translation */
685		goto out;
686	}
687	fs->fs_fmod = 0;
688	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
689	fs->fs_flags &= ~FS_UNCLEAN;
690	if (fs->fs_clean == 0) {
691		fs->fs_flags |= FS_UNCLEAN;
692		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
693		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
694		     (fs->fs_flags & FS_DOSOFTDEP))) {
695			printf(
696"WARNING: %s was not properly dismounted\n",
697			    fs->fs_fsmnt);
698		} else {
699			printf(
700"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
701			    fs->fs_fsmnt);
702			error = EPERM;
703			goto out;
704		}
705		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
706		    (mp->mnt_flag & MNT_FORCE)) {
707			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
708			    (intmax_t)fs->fs_pendingblocks,
709			    fs->fs_pendinginodes);
710			fs->fs_pendingblocks = 0;
711			fs->fs_pendinginodes = 0;
712		}
713	}
714	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
715		printf("%s: mount pending error: blocks %jd files %d\n",
716		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
717		    fs->fs_pendinginodes);
718		fs->fs_pendingblocks = 0;
719		fs->fs_pendinginodes = 0;
720	}
721	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
722#ifdef UFS_GJOURNAL
723		/*
724		 * Get journal provider name.
725		 */
726		size = 1024;
727		mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
728		if (g_io_getattr("GJOURNAL::provider", cp, &size,
729		    mp->mnt_gjprovider) == 0) {
730			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
731			    M_UFSMNT, M_WAITOK);
732			MNT_ILOCK(mp);
733			mp->mnt_flag |= MNT_GJOURNAL;
734			MNT_IUNLOCK(mp);
735		} else {
736			printf(
737"WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
738			    mp->mnt_stat.f_mntonname);
739			free(mp->mnt_gjprovider, M_UFSMNT);
740			mp->mnt_gjprovider = NULL;
741		}
742#else
743		printf(
744"WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
745		    mp->mnt_stat.f_mntonname);
746#endif
747	} else {
748		mp->mnt_gjprovider = NULL;
749	}
750	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
751	ump->um_cp = cp;
752	ump->um_bo = &devvp->v_bufobj;
753	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
754	if (fs->fs_magic == FS_UFS1_MAGIC) {
755		ump->um_fstype = UFS1;
756		ump->um_balloc = ffs_balloc_ufs1;
757	} else {
758		ump->um_fstype = UFS2;
759		ump->um_balloc = ffs_balloc_ufs2;
760	}
761	ump->um_blkatoff = ffs_blkatoff;
762	ump->um_truncate = ffs_truncate;
763	ump->um_update = ffs_update;
764	ump->um_valloc = ffs_valloc;
765	ump->um_vfree = ffs_vfree;
766	ump->um_ifree = ffs_ifree;
767	ump->um_rdonly = ffs_rdonly;
768	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
769	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
770	if (fs->fs_sbsize < SBLOCKSIZE)
771		bp->b_flags |= B_INVAL | B_NOCACHE;
772	brelse(bp);
773	bp = NULL;
774	fs = ump->um_fs;
775	ffs_oldfscompat_read(fs, ump, sblockloc);
776	fs->fs_ronly = ronly;
777	size = fs->fs_cssize;
778	blks = howmany(size, fs->fs_fsize);
779	if (fs->fs_contigsumsize > 0)
780		size += fs->fs_ncg * sizeof(int32_t);
781	size += fs->fs_ncg * sizeof(u_int8_t);
782	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
783	fs->fs_csp = space;
784	for (i = 0; i < blks; i += fs->fs_frag) {
785		size = fs->fs_bsize;
786		if (i + fs->fs_frag > blks)
787			size = (blks - i) * fs->fs_fsize;
788		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
789		    cred, &bp)) != 0) {
790			free(fs->fs_csp, M_UFSMNT);
791			goto out;
792		}
793		bcopy(bp->b_data, space, (u_int)size);
794		space = (char *)space + size;
795		brelse(bp);
796		bp = NULL;
797	}
798	if (fs->fs_contigsumsize > 0) {
799		fs->fs_maxcluster = lp = space;
800		for (i = 0; i < fs->fs_ncg; i++)
801			*lp++ = fs->fs_contigsumsize;
802		space = lp;
803	}
804	size = fs->fs_ncg * sizeof(u_int8_t);
805	fs->fs_contigdirs = (u_int8_t *)space;
806	bzero(fs->fs_contigdirs, size);
807	fs->fs_active = NULL;
808	mp->mnt_data = ump;
809	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
810	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
811	nmp = NULL;
812	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
813	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
814		if (nmp)
815			vfs_rel(nmp);
816		vfs_getnewfsid(mp);
817	}
818	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
819	MNT_ILOCK(mp);
820	mp->mnt_flag |= MNT_LOCAL;
821	MNT_IUNLOCK(mp);
822	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
823#ifdef MAC
824		MNT_ILOCK(mp);
825		mp->mnt_flag |= MNT_MULTILABEL;
826		MNT_IUNLOCK(mp);
827#else
828		printf(
829"WARNING: %s: multilabel flag on fs but no MAC support\n",
830		    mp->mnt_stat.f_mntonname);
831#endif
832	}
833	if ((fs->fs_flags & FS_ACLS) != 0) {
834#ifdef UFS_ACL
835		MNT_ILOCK(mp);
836		mp->mnt_flag |= MNT_ACLS;
837		MNT_IUNLOCK(mp);
838#else
839		printf(
840"WARNING: %s: ACLs flag on fs but no ACLs support\n",
841		    mp->mnt_stat.f_mntonname);
842#endif
843	}
844	ump->um_mountp = mp;
845	ump->um_dev = dev;
846	ump->um_devvp = devvp;
847	ump->um_nindir = fs->fs_nindir;
848	ump->um_bptrtodb = fs->fs_fsbtodb;
849	ump->um_seqinc = fs->fs_frag;
850	for (i = 0; i < MAXQUOTAS; i++)
851		ump->um_quotas[i] = NULLVP;
852#ifdef UFS_EXTATTR
853	ufs_extattr_uepm_init(&ump->um_extattr);
854#endif
855	/*
856	 * Set FS local "last mounted on" information (NULL pad)
857	 */
858	bzero(fs->fs_fsmnt, MAXMNTLEN);
859	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
860
861	if( mp->mnt_flag & MNT_ROOTFS) {
862		/*
863		 * Root mount; update timestamp in mount structure.
864		 * this will be used by the common root mount code
865		 * to update the system clock.
866		 */
867		mp->mnt_time = fs->fs_time;
868	}
869
870	if (ronly == 0) {
871		if ((fs->fs_flags & FS_DOSOFTDEP) &&
872		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
873			free(fs->fs_csp, M_UFSMNT);
874			goto out;
875		}
876		if (fs->fs_snapinum[0] != 0)
877			ffs_snapshot_mount(mp);
878		fs->fs_fmod = 1;
879		fs->fs_clean = 0;
880		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
881	}
882	/*
883	 * Initialize filesystem stat information in mount struct.
884	 */
885	MNT_ILOCK(mp);
886	mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED;
887	MNT_IUNLOCK(mp);
888#ifdef UFS_EXTATTR
889#ifdef UFS_EXTATTR_AUTOSTART
890	/*
891	 *
892	 * Auto-starting does the following:
893	 *	- check for /.attribute in the fs, and extattr_start if so
894	 *	- for each file in .attribute, enable that file with
895	 * 	  an attribute of the same name.
896	 * Not clear how to report errors -- probably eat them.
897	 * This would all happen while the filesystem was busy/not
898	 * available, so would effectively be "atomic".
899	 */
900	mp->mnt_stat.f_iosize = fs->fs_bsize;
901	(void) ufs_extattr_autostart(mp, td);
902#endif /* !UFS_EXTATTR_AUTOSTART */
903#endif /* !UFS_EXTATTR */
904	return (0);
905out:
906	if (bp)
907		brelse(bp);
908	if (cp != NULL) {
909		DROP_GIANT();
910		g_topology_lock();
911		g_vfs_close(cp);
912		g_topology_unlock();
913		PICKUP_GIANT();
914	}
915	if (ump) {
916		mtx_destroy(UFS_MTX(ump));
917		if (mp->mnt_gjprovider != NULL) {
918			free(mp->mnt_gjprovider, M_UFSMNT);
919			mp->mnt_gjprovider = NULL;
920		}
921		free(ump->um_fs, M_UFSMNT);
922		free(ump, M_UFSMNT);
923		mp->mnt_data = NULL;
924	}
925	dev_rel(dev);
926	return (error);
927}
928
929#include <sys/sysctl.h>
930static int bigcgs = 0;
931SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
932
933/*
934 * Sanity checks for loading old filesystem superblocks.
935 * See ffs_oldfscompat_write below for unwound actions.
936 *
937 * XXX - Parts get retired eventually.
938 * Unfortunately new bits get added.
939 */
940static void
941ffs_oldfscompat_read(fs, ump, sblockloc)
942	struct fs *fs;
943	struct ufsmount *ump;
944	ufs2_daddr_t sblockloc;
945{
946	off_t maxfilesize;
947
948	/*
949	 * If not yet done, update fs_flags location and value of fs_sblockloc.
950	 */
951	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
952		fs->fs_flags = fs->fs_old_flags;
953		fs->fs_old_flags |= FS_FLAGS_UPDATED;
954		fs->fs_sblockloc = sblockloc;
955	}
956	/*
957	 * If not yet done, update UFS1 superblock with new wider fields.
958	 */
959	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
960		fs->fs_maxbsize = fs->fs_bsize;
961		fs->fs_time = fs->fs_old_time;
962		fs->fs_size = fs->fs_old_size;
963		fs->fs_dsize = fs->fs_old_dsize;
964		fs->fs_csaddr = fs->fs_old_csaddr;
965		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
966		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
967		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
968		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
969	}
970	if (fs->fs_magic == FS_UFS1_MAGIC &&
971	    fs->fs_old_inodefmt < FS_44INODEFMT) {
972		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
973		fs->fs_qbmask = ~fs->fs_bmask;
974		fs->fs_qfmask = ~fs->fs_fmask;
975	}
976	if (fs->fs_magic == FS_UFS1_MAGIC) {
977		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
978		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
979		if (fs->fs_maxfilesize > maxfilesize)
980			fs->fs_maxfilesize = maxfilesize;
981	}
982	/* Compatibility for old filesystems */
983	if (fs->fs_avgfilesize <= 0)
984		fs->fs_avgfilesize = AVFILESIZ;
985	if (fs->fs_avgfpdir <= 0)
986		fs->fs_avgfpdir = AFPDIR;
987	if (bigcgs) {
988		fs->fs_save_cgsize = fs->fs_cgsize;
989		fs->fs_cgsize = fs->fs_bsize;
990	}
991}
992
993/*
994 * Unwinding superblock updates for old filesystems.
995 * See ffs_oldfscompat_read above for details.
996 *
997 * XXX - Parts get retired eventually.
998 * Unfortunately new bits get added.
999 */
1000static void
1001ffs_oldfscompat_write(fs, ump)
1002	struct fs *fs;
1003	struct ufsmount *ump;
1004{
1005
1006	/*
1007	 * Copy back UFS2 updated fields that UFS1 inspects.
1008	 */
1009	if (fs->fs_magic == FS_UFS1_MAGIC) {
1010		fs->fs_old_time = fs->fs_time;
1011		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1012		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1013		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1014		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1015		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1016	}
1017	if (bigcgs) {
1018		fs->fs_cgsize = fs->fs_save_cgsize;
1019		fs->fs_save_cgsize = 0;
1020	}
1021}
1022
1023/*
1024 * unmount system call
1025 */
1026static int
1027ffs_unmount(mp, mntflags, td)
1028	struct mount *mp;
1029	int mntflags;
1030	struct thread *td;
1031{
1032	struct ufsmount *ump = VFSTOUFS(mp);
1033	struct fs *fs;
1034	int error, flags, susp;
1035#ifdef UFS_EXTATTR
1036	int e_restart;
1037#endif
1038
1039	flags = 0;
1040	fs = ump->um_fs;
1041	if (mntflags & MNT_FORCE) {
1042		flags |= FORCECLOSE;
1043		susp = fs->fs_ronly != 0;
1044	} else
1045		susp = 0;
1046#ifdef UFS_EXTATTR
1047	if ((error = ufs_extattr_stop(mp, td))) {
1048		if (error != EOPNOTSUPP)
1049			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1050			    error);
1051		e_restart = 0;
1052	} else {
1053		ufs_extattr_uepm_destroy(&ump->um_extattr);
1054		e_restart = 1;
1055	}
1056#endif
1057	if (susp) {
1058		/*
1059		 * dounmount already called vn_start_write().
1060		 */
1061		for (;;) {
1062			vn_finished_write(mp);
1063			if ((error = vfs_write_suspend(mp)) != 0)
1064				return (error);
1065			MNT_ILOCK(mp);
1066			if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
1067				mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
1068				    MNTK_SUSPEND2);
1069				wakeup(&mp->mnt_flag);
1070				MNT_IUNLOCK(mp);
1071				curthread->td_pflags |= TDP_IGNSUSP;
1072				break;
1073			}
1074			MNT_IUNLOCK(mp);
1075			vn_start_write(NULL, &mp, V_WAIT);
1076		}
1077	}
1078	if (mp->mnt_flag & MNT_SOFTDEP)
1079		error = softdep_flushfiles(mp, flags, td);
1080	else
1081		error = ffs_flushfiles(mp, flags, td);
1082	if (error != 0 && error != ENXIO)
1083		goto fail;
1084
1085	UFS_LOCK(ump);
1086	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1087		printf("%s: unmount pending error: blocks %jd files %d\n",
1088		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1089		    fs->fs_pendinginodes);
1090		fs->fs_pendingblocks = 0;
1091		fs->fs_pendinginodes = 0;
1092	}
1093	UFS_UNLOCK(ump);
1094	if (fs->fs_ronly == 0) {
1095		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1096		error = ffs_sbupdate(ump, MNT_WAIT, 0);
1097		if (error && error != ENXIO) {
1098			fs->fs_clean = 0;
1099			goto fail;
1100		}
1101	}
1102	if (susp) {
1103		vfs_write_resume(mp);
1104		vn_start_write(NULL, &mp, V_WAIT);
1105	}
1106	DROP_GIANT();
1107	g_topology_lock();
1108	g_vfs_close(ump->um_cp);
1109	g_topology_unlock();
1110	PICKUP_GIANT();
1111	vrele(ump->um_devvp);
1112	dev_rel(ump->um_dev);
1113	mtx_destroy(UFS_MTX(ump));
1114	if (mp->mnt_gjprovider != NULL) {
1115		free(mp->mnt_gjprovider, M_UFSMNT);
1116		mp->mnt_gjprovider = NULL;
1117	}
1118	free(fs->fs_csp, M_UFSMNT);
1119	free(fs, M_UFSMNT);
1120	free(ump, M_UFSMNT);
1121	mp->mnt_data = NULL;
1122	MNT_ILOCK(mp);
1123	mp->mnt_flag &= ~MNT_LOCAL;
1124	MNT_IUNLOCK(mp);
1125	return (error);
1126
1127fail:
1128	if (susp) {
1129		vfs_write_resume(mp);
1130		vn_start_write(NULL, &mp, V_WAIT);
1131	}
1132#ifdef UFS_EXTATTR
1133	if (e_restart) {
1134		ufs_extattr_uepm_init(&ump->um_extattr);
1135#ifdef UFS_EXTATTR_AUTOSTART
1136		(void) ufs_extattr_autostart(mp, td);
1137#endif
1138	}
1139#endif
1140
1141	return (error);
1142}
1143
1144/*
1145 * Flush out all the files in a filesystem.
1146 */
1147int
1148ffs_flushfiles(mp, flags, td)
1149	struct mount *mp;
1150	int flags;
1151	struct thread *td;
1152{
1153	struct ufsmount *ump;
1154	int error;
1155
1156	ump = VFSTOUFS(mp);
1157#ifdef QUOTA
1158	if (mp->mnt_flag & MNT_QUOTA) {
1159		int i;
1160		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1161		if (error)
1162			return (error);
1163		for (i = 0; i < MAXQUOTAS; i++) {
1164			quotaoff(td, mp, i);
1165		}
1166		/*
1167		 * Here we fall through to vflush again to ensure
1168		 * that we have gotten rid of all the system vnodes.
1169		 */
1170	}
1171#endif
1172	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1173	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1174		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1175			return (error);
1176		ffs_snapshot_unmount(mp);
1177		flags |= FORCECLOSE;
1178		/*
1179		 * Here we fall through to vflush again to ensure
1180		 * that we have gotten rid of all the system vnodes.
1181		 */
1182	}
1183        /*
1184	 * Flush all the files.
1185	 */
1186	if ((error = vflush(mp, 0, flags, td)) != 0)
1187		return (error);
1188	/*
1189	 * Flush filesystem metadata.
1190	 */
1191	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1192	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1193	VOP_UNLOCK(ump->um_devvp, 0);
1194	return (error);
1195}
1196
1197/*
1198 * Get filesystem statistics.
1199 */
1200static int
1201ffs_statfs(mp, sbp, td)
1202	struct mount *mp;
1203	struct statfs *sbp;
1204	struct thread *td;
1205{
1206	struct ufsmount *ump;
1207	struct fs *fs;
1208
1209	ump = VFSTOUFS(mp);
1210	fs = ump->um_fs;
1211	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1212		panic("ffs_statfs");
1213	sbp->f_version = STATFS_VERSION;
1214	sbp->f_bsize = fs->fs_fsize;
1215	sbp->f_iosize = fs->fs_bsize;
1216	sbp->f_blocks = fs->fs_dsize;
1217	UFS_LOCK(ump);
1218	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1219	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1220	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1221	    dbtofsb(fs, fs->fs_pendingblocks);
1222	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1223	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1224	UFS_UNLOCK(ump);
1225	sbp->f_namemax = NAME_MAX;
1226	return (0);
1227}
1228
1229/*
1230 * Go through the disk queues to initiate sandbagged IO;
1231 * go through the inodes to write those that have been modified;
1232 * initiate the writing of the super block if it has been modified.
1233 *
1234 * Note: we are always called with the filesystem marked `MPBUSY'.
1235 */
1236static int
1237ffs_sync(mp, waitfor, td)
1238	struct mount *mp;
1239	int waitfor;
1240	struct thread *td;
1241{
1242	struct vnode *mvp, *vp, *devvp;
1243	struct inode *ip;
1244	struct ufsmount *ump = VFSTOUFS(mp);
1245	struct fs *fs;
1246	int error, count, wait, lockreq, allerror = 0;
1247	int suspend;
1248	int suspended;
1249	int secondary_writes;
1250	int secondary_accwrites;
1251	int softdep_deps;
1252	int softdep_accdeps;
1253	struct bufobj *bo;
1254
1255	fs = ump->um_fs;
1256	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1257		printf("fs = %s\n", fs->fs_fsmnt);
1258		panic("ffs_sync: rofs mod");
1259	}
1260	/*
1261	 * Write back each (modified) inode.
1262	 */
1263	wait = 0;
1264	suspend = 0;
1265	suspended = 0;
1266	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1267	if (waitfor == MNT_SUSPEND) {
1268		suspend = 1;
1269		waitfor = MNT_WAIT;
1270	}
1271	if (waitfor == MNT_WAIT) {
1272		wait = 1;
1273		lockreq = LK_EXCLUSIVE;
1274	}
1275	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1276	MNT_ILOCK(mp);
1277loop:
1278	/* Grab snapshot of secondary write counts */
1279	secondary_writes = mp->mnt_secondary_writes;
1280	secondary_accwrites = mp->mnt_secondary_accwrites;
1281
1282	/* Grab snapshot of softdep dependency counts */
1283	MNT_IUNLOCK(mp);
1284	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1285	MNT_ILOCK(mp);
1286
1287	MNT_VNODE_FOREACH(vp, mp, mvp) {
1288		/*
1289		 * Depend on the mntvnode_slock to keep things stable enough
1290		 * for a quick test.  Since there might be hundreds of
1291		 * thousands of vnodes, we cannot afford even a subroutine
1292		 * call unless there's a good chance that we have work to do.
1293		 */
1294		VI_LOCK(vp);
1295		if (vp->v_iflag & VI_DOOMED) {
1296			VI_UNLOCK(vp);
1297			continue;
1298		}
1299		ip = VTOI(vp);
1300		if (vp->v_type == VNON || ((ip->i_flag &
1301		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1302		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1303			VI_UNLOCK(vp);
1304			continue;
1305		}
1306		MNT_IUNLOCK(mp);
1307		if ((error = vget(vp, lockreq, td)) != 0) {
1308			MNT_ILOCK(mp);
1309			if (error == ENOENT || error == ENOLCK) {
1310				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1311				goto loop;
1312			}
1313			continue;
1314		}
1315		if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1316			allerror = error;
1317		vput(vp);
1318		MNT_ILOCK(mp);
1319	}
1320	MNT_IUNLOCK(mp);
1321	/*
1322	 * Force stale filesystem control information to be flushed.
1323	 */
1324	if (waitfor == MNT_WAIT) {
1325		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1326			allerror = error;
1327		/* Flushed work items may create new vnodes to clean */
1328		if (allerror == 0 && count) {
1329			MNT_ILOCK(mp);
1330			goto loop;
1331		}
1332	}
1333#ifdef QUOTA
1334	qsync(mp);
1335#endif
1336	devvp = ump->um_devvp;
1337	bo = &devvp->v_bufobj;
1338	BO_LOCK(bo);
1339	if (waitfor != MNT_LAZY &&
1340	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1341		BO_UNLOCK(bo);
1342		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1343		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1344			allerror = error;
1345		VOP_UNLOCK(devvp, 0);
1346		if (allerror == 0 && waitfor == MNT_WAIT) {
1347			MNT_ILOCK(mp);
1348			goto loop;
1349		}
1350	} else if (suspend != 0) {
1351		if (softdep_check_suspend(mp,
1352					  devvp,
1353					  softdep_deps,
1354					  softdep_accdeps,
1355					  secondary_writes,
1356					  secondary_accwrites) != 0)
1357			goto loop;	/* More work needed */
1358		mtx_assert(MNT_MTX(mp), MA_OWNED);
1359		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1360		MNT_IUNLOCK(mp);
1361		suspended = 1;
1362	} else
1363		BO_UNLOCK(bo);
1364	/*
1365	 * Write back modified superblock.
1366	 */
1367	if (fs->fs_fmod != 0 &&
1368	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1369		allerror = error;
1370	return (allerror);
1371}
1372
1373int
1374ffs_vget(mp, ino, flags, vpp)
1375	struct mount *mp;
1376	ino_t ino;
1377	int flags;
1378	struct vnode **vpp;
1379{
1380	return (ffs_vgetf(mp, ino, flags, vpp, 0));
1381}
1382
1383int
1384ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1385	struct mount *mp;
1386	ino_t ino;
1387	int flags;
1388	struct vnode **vpp;
1389	int ffs_flags;
1390{
1391	struct fs *fs;
1392	struct inode *ip;
1393	struct ufsmount *ump;
1394	struct buf *bp;
1395	struct vnode *vp;
1396	struct cdev *dev;
1397	int error;
1398
1399	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1400	if (error || *vpp != NULL)
1401		return (error);
1402
1403	/*
1404	 * We must promote to an exclusive lock for vnode creation.  This
1405	 * can happen if lookup is passed LOCKSHARED.
1406 	 */
1407	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1408		flags &= ~LK_TYPE_MASK;
1409		flags |= LK_EXCLUSIVE;
1410	}
1411
1412	/*
1413	 * We do not lock vnode creation as it is believed to be too
1414	 * expensive for such rare case as simultaneous creation of vnode
1415	 * for same ino by different processes. We just allow them to race
1416	 * and check later to decide who wins. Let the race begin!
1417	 */
1418
1419	ump = VFSTOUFS(mp);
1420	dev = ump->um_dev;
1421	fs = ump->um_fs;
1422
1423	/*
1424	 * If this malloc() is performed after the getnewvnode()
1425	 * it might block, leaving a vnode with a NULL v_data to be
1426	 * found by ffs_sync() if a sync happens to fire right then,
1427	 * which will cause a panic because ffs_sync() blindly
1428	 * dereferences vp->v_data (as well it should).
1429	 */
1430	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1431
1432	/* Allocate a new vnode/inode. */
1433	if (fs->fs_magic == FS_UFS1_MAGIC)
1434		error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1435	else
1436		error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1437	if (error) {
1438		*vpp = NULL;
1439		uma_zfree(uma_inode, ip);
1440		return (error);
1441	}
1442	/*
1443	 * FFS supports recursive and shared locking.
1444	 */
1445	VN_LOCK_AREC(vp);
1446	VN_LOCK_ASHARE(vp);
1447	vp->v_data = ip;
1448	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1449	ip->i_vnode = vp;
1450	ip->i_ump = ump;
1451	ip->i_fs = fs;
1452	ip->i_dev = dev;
1453	ip->i_number = ino;
1454#ifdef QUOTA
1455	{
1456		int i;
1457		for (i = 0; i < MAXQUOTAS; i++)
1458			ip->i_dquot[i] = NODQUOT;
1459	}
1460#endif
1461
1462	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1463	if (ffs_flags & FFSV_FORCEINSMQ)
1464		vp->v_vflag |= VV_FORCEINSMQ;
1465	error = insmntque(vp, mp);
1466	if (error != 0) {
1467		uma_zfree(uma_inode, ip);
1468		*vpp = NULL;
1469		return (error);
1470	}
1471	vp->v_vflag &= ~VV_FORCEINSMQ;
1472	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1473	if (error || *vpp != NULL)
1474		return (error);
1475
1476	/* Read in the disk contents for the inode, copy into the inode. */
1477	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1478	    (int)fs->fs_bsize, NOCRED, &bp);
1479	if (error) {
1480		/*
1481		 * The inode does not contain anything useful, so it would
1482		 * be misleading to leave it on its hash chain. With mode
1483		 * still zero, it will be unlinked and returned to the free
1484		 * list by vput().
1485		 */
1486		brelse(bp);
1487		vput(vp);
1488		*vpp = NULL;
1489		return (error);
1490	}
1491	if (ip->i_ump->um_fstype == UFS1)
1492		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1493	else
1494		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1495	ffs_load_inode(bp, ip, fs, ino);
1496	if (DOINGSOFTDEP(vp))
1497		softdep_load_inodeblock(ip);
1498	else
1499		ip->i_effnlink = ip->i_nlink;
1500	bqrelse(bp);
1501
1502	/*
1503	 * Initialize the vnode from the inode, check for aliases.
1504	 * Note that the underlying vnode may have changed.
1505	 */
1506	if (ip->i_ump->um_fstype == UFS1)
1507		error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1508	else
1509		error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1510	if (error) {
1511		vput(vp);
1512		*vpp = NULL;
1513		return (error);
1514	}
1515
1516	/*
1517	 * Finish inode initialization.
1518	 */
1519
1520	/*
1521	 * Set up a generation number for this inode if it does not
1522	 * already have one. This should only happen on old filesystems.
1523	 */
1524	if (ip->i_gen == 0) {
1525		ip->i_gen = arc4random() / 2 + 1;
1526		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1527			ip->i_flag |= IN_MODIFIED;
1528			DIP_SET(ip, i_gen, ip->i_gen);
1529		}
1530	}
1531	/*
1532	 * Ensure that uid and gid are correct. This is a temporary
1533	 * fix until fsck has been changed to do the update.
1534	 */
1535	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1536	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1537		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1538		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1539	}						/* XXX */
1540
1541#ifdef MAC
1542	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1543		/*
1544		 * If this vnode is already allocated, and we're running
1545		 * multi-label, attempt to perform a label association
1546		 * from the extended attributes on the inode.
1547		 */
1548		error = mac_vnode_associate_extattr(mp, vp);
1549		if (error) {
1550			/* ufs_inactive will release ip->i_devvp ref. */
1551			vput(vp);
1552			*vpp = NULL;
1553			return (error);
1554		}
1555	}
1556#endif
1557
1558	*vpp = vp;
1559	return (0);
1560}
1561
1562/*
1563 * File handle to vnode
1564 *
1565 * Have to be really careful about stale file handles:
1566 * - check that the inode number is valid
1567 * - call ffs_vget() to get the locked inode
1568 * - check for an unallocated inode (i_mode == 0)
1569 * - check that the given client host has export rights and return
1570 *   those rights via. exflagsp and credanonp
1571 */
1572static int
1573ffs_fhtovp(mp, fhp, vpp)
1574	struct mount *mp;
1575	struct fid *fhp;
1576	struct vnode **vpp;
1577{
1578	struct ufid *ufhp;
1579	struct fs *fs;
1580
1581	ufhp = (struct ufid *)fhp;
1582	fs = VFSTOUFS(mp)->um_fs;
1583	if (ufhp->ufid_ino < ROOTINO ||
1584	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1585		return (ESTALE);
1586	return (ufs_fhtovp(mp, ufhp, vpp));
1587}
1588
1589/*
1590 * Initialize the filesystem.
1591 */
1592static int
1593ffs_init(vfsp)
1594	struct vfsconf *vfsp;
1595{
1596
1597	softdep_initialize();
1598	return (ufs_init(vfsp));
1599}
1600
1601/*
1602 * Undo the work of ffs_init().
1603 */
1604static int
1605ffs_uninit(vfsp)
1606	struct vfsconf *vfsp;
1607{
1608	int ret;
1609
1610	ret = ufs_uninit(vfsp);
1611	softdep_uninitialize();
1612	return (ret);
1613}
1614
1615/*
1616 * Write a superblock and associated information back to disk.
1617 */
1618int
1619ffs_sbupdate(mp, waitfor, suspended)
1620	struct ufsmount *mp;
1621	int waitfor;
1622	int suspended;
1623{
1624	struct fs *fs = mp->um_fs;
1625	struct buf *sbbp;
1626	struct buf *bp;
1627	int blks;
1628	void *space;
1629	int i, size, error, allerror = 0;
1630
1631	if (fs->fs_ronly == 1 &&
1632	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1633	    (MNT_RDONLY | MNT_UPDATE))
1634		panic("ffs_sbupdate: write read-only filesystem");
1635	/*
1636	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1637	 */
1638	sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1639	    0, 0, 0);
1640	/*
1641	 * First write back the summary information.
1642	 */
1643	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1644	space = fs->fs_csp;
1645	for (i = 0; i < blks; i += fs->fs_frag) {
1646		size = fs->fs_bsize;
1647		if (i + fs->fs_frag > blks)
1648			size = (blks - i) * fs->fs_fsize;
1649		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1650		    size, 0, 0, 0);
1651		bcopy(space, bp->b_data, (u_int)size);
1652		space = (char *)space + size;
1653		if (suspended)
1654			bp->b_flags |= B_VALIDSUSPWRT;
1655		if (waitfor != MNT_WAIT)
1656			bawrite(bp);
1657		else if ((error = bwrite(bp)) != 0)
1658			allerror = error;
1659	}
1660	/*
1661	 * Now write back the superblock itself. If any errors occurred
1662	 * up to this point, then fail so that the superblock avoids
1663	 * being written out as clean.
1664	 */
1665	if (allerror) {
1666		brelse(sbbp);
1667		return (allerror);
1668	}
1669	bp = sbbp;
1670	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1671	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1672		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1673		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1674		fs->fs_sblockloc = SBLOCK_UFS1;
1675	}
1676	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1677	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1678		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1679		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1680		fs->fs_sblockloc = SBLOCK_UFS2;
1681	}
1682	fs->fs_fmod = 0;
1683	fs->fs_time = time_second;
1684	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1685	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1686	if (suspended)
1687		bp->b_flags |= B_VALIDSUSPWRT;
1688	if (waitfor != MNT_WAIT)
1689		bawrite(bp);
1690	else if ((error = bwrite(bp)) != 0)
1691		allerror = error;
1692	return (allerror);
1693}
1694
1695static int
1696ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1697	int attrnamespace, const char *attrname, struct thread *td)
1698{
1699
1700#ifdef UFS_EXTATTR
1701	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1702	    attrname, td));
1703#else
1704	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1705	    attrname, td));
1706#endif
1707}
1708
1709static void
1710ffs_ifree(struct ufsmount *ump, struct inode *ip)
1711{
1712
1713	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1714		uma_zfree(uma_ufs1, ip->i_din1);
1715	else if (ip->i_din2 != NULL)
1716		uma_zfree(uma_ufs2, ip->i_din2);
1717	uma_zfree(uma_inode, ip);
1718}
1719
1720static int dobkgrdwrite = 1;
1721SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1722    "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1723
1724/*
1725 * Complete a background write started from bwrite.
1726 */
1727static void
1728ffs_backgroundwritedone(struct buf *bp)
1729{
1730	struct bufobj *bufobj;
1731	struct buf *origbp;
1732
1733	/*
1734	 * Find the original buffer that we are writing.
1735	 */
1736	bufobj = bp->b_bufobj;
1737	BO_LOCK(bufobj);
1738	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1739		panic("backgroundwritedone: lost buffer");
1740	/* Grab an extra reference to be dropped by the bufdone() below. */
1741	bufobj_wrefl(bufobj);
1742	BO_UNLOCK(bufobj);
1743	/*
1744	 * Process dependencies then return any unfinished ones.
1745	 */
1746	if (!LIST_EMPTY(&bp->b_dep))
1747		buf_complete(bp);
1748#ifdef SOFTUPDATES
1749	if (!LIST_EMPTY(&bp->b_dep))
1750		softdep_move_dependencies(bp, origbp);
1751#endif
1752	/*
1753	 * This buffer is marked B_NOCACHE so when it is released
1754	 * by biodone it will be tossed.
1755	 */
1756	bp->b_flags |= B_NOCACHE;
1757	bp->b_flags &= ~B_CACHE;
1758	bufdone(bp);
1759	BO_LOCK(bufobj);
1760	/*
1761	 * Clear the BV_BKGRDINPROG flag in the original buffer
1762	 * and awaken it if it is waiting for the write to complete.
1763	 * If BV_BKGRDINPROG is not set in the original buffer it must
1764	 * have been released and re-instantiated - which is not legal.
1765	 */
1766	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1767	    ("backgroundwritedone: lost buffer2"));
1768	origbp->b_vflags &= ~BV_BKGRDINPROG;
1769	if (origbp->b_vflags & BV_BKGRDWAIT) {
1770		origbp->b_vflags &= ~BV_BKGRDWAIT;
1771		wakeup(&origbp->b_xflags);
1772	}
1773	BO_UNLOCK(bufobj);
1774}
1775
1776
1777/*
1778 * Write, release buffer on completion.  (Done by iodone
1779 * if async).  Do not bother writing anything if the buffer
1780 * is invalid.
1781 *
1782 * Note that we set B_CACHE here, indicating that buffer is
1783 * fully valid and thus cacheable.  This is true even of NFS
1784 * now so we set it generally.  This could be set either here
1785 * or in biodone() since the I/O is synchronous.  We put it
1786 * here.
1787 */
1788static int
1789ffs_bufwrite(struct buf *bp)
1790{
1791	int oldflags, s;
1792	struct buf *newbp;
1793
1794	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1795	if (bp->b_flags & B_INVAL) {
1796		brelse(bp);
1797		return (0);
1798	}
1799
1800	oldflags = bp->b_flags;
1801
1802	if (!BUF_ISLOCKED(bp))
1803		panic("bufwrite: buffer is not busy???");
1804	s = splbio();
1805	/*
1806	 * If a background write is already in progress, delay
1807	 * writing this block if it is asynchronous. Otherwise
1808	 * wait for the background write to complete.
1809	 */
1810	BO_LOCK(bp->b_bufobj);
1811	if (bp->b_vflags & BV_BKGRDINPROG) {
1812		if (bp->b_flags & B_ASYNC) {
1813			BO_UNLOCK(bp->b_bufobj);
1814			splx(s);
1815			bdwrite(bp);
1816			return (0);
1817		}
1818		bp->b_vflags |= BV_BKGRDWAIT;
1819		msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1820		if (bp->b_vflags & BV_BKGRDINPROG)
1821			panic("bufwrite: still writing");
1822	}
1823	BO_UNLOCK(bp->b_bufobj);
1824
1825	/* Mark the buffer clean */
1826	bundirty(bp);
1827
1828	/*
1829	 * If this buffer is marked for background writing and we
1830	 * do not have to wait for it, make a copy and write the
1831	 * copy so as to leave this buffer ready for further use.
1832	 *
1833	 * This optimization eats a lot of memory.  If we have a page
1834	 * or buffer shortfall we can't do it.
1835	 */
1836	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1837	    (bp->b_flags & B_ASYNC) &&
1838	    !vm_page_count_severe() &&
1839	    !buf_dirty_count_severe()) {
1840		KASSERT(bp->b_iodone == NULL,
1841		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1842
1843		/* get a new block */
1844		newbp = geteblk(bp->b_bufsize);
1845
1846		/*
1847		 * set it to be identical to the old block.  We have to
1848		 * set b_lblkno and BKGRDMARKER before calling bgetvp()
1849		 * to avoid confusing the splay tree and gbincore().
1850		 */
1851		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1852		newbp->b_lblkno = bp->b_lblkno;
1853		newbp->b_xflags |= BX_BKGRDMARKER;
1854		BO_LOCK(bp->b_bufobj);
1855		bp->b_vflags |= BV_BKGRDINPROG;
1856		bgetvp(bp->b_vp, newbp);
1857		BO_UNLOCK(bp->b_bufobj);
1858		newbp->b_bufobj = &bp->b_vp->v_bufobj;
1859		newbp->b_blkno = bp->b_blkno;
1860		newbp->b_offset = bp->b_offset;
1861		newbp->b_iodone = ffs_backgroundwritedone;
1862		newbp->b_flags |= B_ASYNC;
1863		newbp->b_flags &= ~B_INVAL;
1864
1865#ifdef SOFTUPDATES
1866		/* move over the dependencies */
1867		if (!LIST_EMPTY(&bp->b_dep))
1868			softdep_move_dependencies(bp, newbp);
1869#endif
1870
1871		/*
1872		 * Initiate write on the copy, release the original to
1873		 * the B_LOCKED queue so that it cannot go away until
1874		 * the background write completes. If not locked it could go
1875		 * away and then be reconstituted while it was being written.
1876		 * If the reconstituted buffer were written, we could end up
1877		 * with two background copies being written at the same time.
1878		 */
1879		bqrelse(bp);
1880		bp = newbp;
1881	}
1882
1883	/* Let the normal bufwrite do the rest for us */
1884	return (bufwrite(bp));
1885}
1886
1887
1888static void
1889ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1890{
1891	struct vnode *vp;
1892	int error;
1893	struct buf *tbp;
1894
1895	vp = bo->__bo_vnode;
1896	if (bp->b_iocmd == BIO_WRITE) {
1897		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1898		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1899		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1900			panic("ffs_geom_strategy: bad I/O");
1901		bp->b_flags &= ~B_VALIDSUSPWRT;
1902		if ((vp->v_vflag & VV_COPYONWRITE) &&
1903		    vp->v_rdev->si_snapdata != NULL) {
1904			if ((bp->b_flags & B_CLUSTER) != 0) {
1905				runningbufwakeup(bp);
1906				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1907					      b_cluster.cluster_entry) {
1908					error = ffs_copyonwrite(vp, tbp);
1909					if (error != 0 &&
1910					    error != EOPNOTSUPP) {
1911						bp->b_error = error;
1912						bp->b_ioflags |= BIO_ERROR;
1913						bufdone(bp);
1914						return;
1915					}
1916				}
1917				bp->b_runningbufspace = bp->b_bufsize;
1918				atomic_add_int(&runningbufspace,
1919					       bp->b_runningbufspace);
1920			} else {
1921				error = ffs_copyonwrite(vp, bp);
1922				if (error != 0 && error != EOPNOTSUPP) {
1923					bp->b_error = error;
1924					bp->b_ioflags |= BIO_ERROR;
1925					bufdone(bp);
1926					return;
1927				}
1928			}
1929		}
1930#ifdef SOFTUPDATES
1931		if ((bp->b_flags & B_CLUSTER) != 0) {
1932			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1933				      b_cluster.cluster_entry) {
1934				if (!LIST_EMPTY(&tbp->b_dep))
1935					buf_start(tbp);
1936			}
1937		} else {
1938			if (!LIST_EMPTY(&bp->b_dep))
1939				buf_start(bp);
1940		}
1941
1942#endif
1943	}
1944	g_vfs_strategy(bo, bp);
1945}
1946
1947#ifdef	DDB
1948
1949static void
1950db_print_ffs(struct ufsmount *ump)
1951{
1952	db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d "
1953		  "su_req %d\n",
1954	    ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
1955	    ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
1956	    ump->softdep_on_worklist_inprogress, ump->softdep_deps,
1957	    ump->softdep_req);
1958}
1959
1960DB_SHOW_COMMAND(ffs, db_show_ffs)
1961{
1962	struct mount *mp;
1963	struct ufsmount *ump;
1964
1965	if (have_addr) {
1966		ump = VFSTOUFS((struct mount *)addr);
1967		db_print_ffs(ump);
1968		return;
1969	}
1970
1971	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
1972		if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
1973			db_print_ffs(VFSTOUFS(mp));
1974	}
1975}
1976
1977#endif	/* DDB */
1978