ffs_vfsops.c revision 193511
1/*-
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 193511 2009-06-05 14:55:22Z rwatson $");
34
35#include "opt_quota.h"
36#include "opt_ufs.h"
37#include "opt_ffs.h"
38#include "opt_ddb.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/priv.h>
44#include <sys/proc.h>
45#include <sys/kernel.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54
55#include <security/mac/mac_framework.h>
56
57#include <ufs/ufs/extattr.h>
58#include <ufs/ufs/gjournal.h>
59#include <ufs/ufs/quota.h>
60#include <ufs/ufs/ufsmount.h>
61#include <ufs/ufs/inode.h>
62#include <ufs/ufs/ufs_extern.h>
63
64#include <ufs/ffs/fs.h>
65#include <ufs/ffs/ffs_extern.h>
66
67#include <vm/vm.h>
68#include <vm/uma.h>
69#include <vm/vm_page.h>
70
71#include <geom/geom.h>
72#include <geom/geom_vfs.h>
73
74#include <ddb/ddb.h>
75
76static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
77
78static int	ffs_reload(struct mount *, struct thread *);
79static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
80static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
81		    ufs2_daddr_t);
82static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
83static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
84static vfs_init_t ffs_init;
85static vfs_uninit_t ffs_uninit;
86static vfs_extattrctl_t ffs_extattrctl;
87static vfs_cmount_t ffs_cmount;
88static vfs_unmount_t ffs_unmount;
89static vfs_mount_t ffs_mount;
90static vfs_statfs_t ffs_statfs;
91static vfs_fhtovp_t ffs_fhtovp;
92static vfs_sync_t ffs_sync;
93
94static struct vfsops ufs_vfsops = {
95	.vfs_extattrctl =	ffs_extattrctl,
96	.vfs_fhtovp =		ffs_fhtovp,
97	.vfs_init =		ffs_init,
98	.vfs_mount =		ffs_mount,
99	.vfs_cmount =		ffs_cmount,
100	.vfs_quotactl =		ufs_quotactl,
101	.vfs_root =		ufs_root,
102	.vfs_statfs =		ffs_statfs,
103	.vfs_sync =		ffs_sync,
104	.vfs_uninit =		ffs_uninit,
105	.vfs_unmount =		ffs_unmount,
106	.vfs_vget =		ffs_vget,
107	.vfs_susp_clean =	process_deferred_inactive,
108};
109
110VFS_SET(ufs_vfsops, ufs, 0);
111MODULE_VERSION(ufs, 1);
112
113static b_strategy_t ffs_geom_strategy;
114static b_write_t ffs_bufwrite;
115
116static struct buf_ops ffs_ops = {
117	.bop_name =	"FFS",
118	.bop_write =	ffs_bufwrite,
119	.bop_strategy =	ffs_geom_strategy,
120	.bop_sync =	bufsync,
121#ifdef NO_FFS_SNAPSHOT
122	.bop_bdflush =	bufbdflush,
123#else
124	.bop_bdflush =	ffs_bdflush,
125#endif
126};
127
128static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
129    "noclusterw", "noexec", "export", "force", "from", "multilabel",
130    "snapshot", "nosuid", "suiddir", "nosymfollow", "sync",
131    "union", NULL };
132
133static int
134ffs_mount(struct mount *mp)
135{
136	struct vnode *devvp;
137	struct thread *td;
138	struct ufsmount *ump = 0;
139	struct fs *fs;
140	int error, flags;
141	u_int mntorflags, mntandnotflags;
142	accmode_t accmode;
143	struct nameidata ndp;
144	char *fspec;
145
146	td = curthread;
147	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
148		return (EINVAL);
149	if (uma_inode == NULL) {
150		uma_inode = uma_zcreate("FFS inode",
151		    sizeof(struct inode), NULL, NULL, NULL, NULL,
152		    UMA_ALIGN_PTR, 0);
153		uma_ufs1 = uma_zcreate("FFS1 dinode",
154		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
155		    UMA_ALIGN_PTR, 0);
156		uma_ufs2 = uma_zcreate("FFS2 dinode",
157		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
158		    UMA_ALIGN_PTR, 0);
159	}
160
161	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
162	if (error)
163		return (error);
164
165	mntorflags = 0;
166	mntandnotflags = 0;
167	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
168		mntorflags |= MNT_ACLS;
169
170	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
171		mntorflags |= MNT_SNAPSHOT;
172		/*
173		 * Once we have set the MNT_SNAPSHOT flag, do not
174		 * persist "snapshot" in the options list.
175		 */
176		vfs_deleteopt(mp->mnt_optnew, "snapshot");
177		vfs_deleteopt(mp->mnt_opt, "snapshot");
178	}
179
180	MNT_ILOCK(mp);
181	mp->mnt_flag = (mp->mnt_flag | mntorflags) & ~mntandnotflags;
182	MNT_IUNLOCK(mp);
183	/*
184	 * If updating, check whether changing from read-only to
185	 * read/write; if there is no device name, that's all we do.
186	 */
187	if (mp->mnt_flag & MNT_UPDATE) {
188		ump = VFSTOUFS(mp);
189		fs = ump->um_fs;
190		devvp = ump->um_devvp;
191		if (fs->fs_ronly == 0 &&
192		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
193			/*
194			 * Flush any dirty data and suspend filesystem.
195			 */
196			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
197				return (error);
198			for (;;) {
199				vn_finished_write(mp);
200				if ((error = vfs_write_suspend(mp)) != 0)
201					return (error);
202				MNT_ILOCK(mp);
203				if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
204					/*
205					 * Allow the secondary writes
206					 * to proceed.
207					 */
208					mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
209					    MNTK_SUSPEND2);
210					wakeup(&mp->mnt_flag);
211					MNT_IUNLOCK(mp);
212					/*
213					 * Allow the curthread to
214					 * ignore the suspension to
215					 * synchronize on-disk state.
216					 */
217					td->td_pflags |= TDP_IGNSUSP;
218					break;
219				}
220				MNT_IUNLOCK(mp);
221				vn_start_write(NULL, &mp, V_WAIT);
222			}
223			/*
224			 * Check for and optionally get rid of files open
225			 * for writing.
226			 */
227			flags = WRITECLOSE;
228			if (mp->mnt_flag & MNT_FORCE)
229				flags |= FORCECLOSE;
230			if (mp->mnt_flag & MNT_SOFTDEP) {
231				error = softdep_flushfiles(mp, flags, td);
232			} else {
233				error = ffs_flushfiles(mp, flags, td);
234			}
235			if (error) {
236				vfs_write_resume(mp);
237				return (error);
238			}
239			if (fs->fs_pendingblocks != 0 ||
240			    fs->fs_pendinginodes != 0) {
241				printf("%s: %s: blocks %jd files %d\n",
242				    fs->fs_fsmnt, "update error",
243				    (intmax_t)fs->fs_pendingblocks,
244				    fs->fs_pendinginodes);
245				fs->fs_pendingblocks = 0;
246				fs->fs_pendinginodes = 0;
247			}
248			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
249				fs->fs_clean = 1;
250			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
251				fs->fs_ronly = 0;
252				fs->fs_clean = 0;
253				vfs_write_resume(mp);
254				return (error);
255			}
256			DROP_GIANT();
257			g_topology_lock();
258			g_access(ump->um_cp, 0, -1, 0);
259			g_topology_unlock();
260			PICKUP_GIANT();
261			fs->fs_ronly = 1;
262			MNT_ILOCK(mp);
263			mp->mnt_flag |= MNT_RDONLY;
264			MNT_IUNLOCK(mp);
265			/*
266			 * Allow the writers to note that filesystem
267			 * is ro now.
268			 */
269			vfs_write_resume(mp);
270		}
271		if ((mp->mnt_flag & MNT_RELOAD) &&
272		    (error = ffs_reload(mp, td)) != 0)
273			return (error);
274		if (fs->fs_ronly &&
275		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
276			/*
277			 * If upgrade to read-write by non-root, then verify
278			 * that user has necessary permissions on the device.
279			 */
280			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
281			error = VOP_ACCESS(devvp, VREAD | VWRITE,
282			    td->td_ucred, td);
283			if (error)
284				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
285			if (error) {
286				VOP_UNLOCK(devvp, 0);
287				return (error);
288			}
289			VOP_UNLOCK(devvp, 0);
290			fs->fs_flags &= ~FS_UNCLEAN;
291			if (fs->fs_clean == 0) {
292				fs->fs_flags |= FS_UNCLEAN;
293				if ((mp->mnt_flag & MNT_FORCE) ||
294				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
295				     (fs->fs_flags & FS_DOSOFTDEP))) {
296					printf("WARNING: %s was not %s\n",
297					   fs->fs_fsmnt, "properly dismounted");
298				} else {
299					printf(
300"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
301					    fs->fs_fsmnt);
302					return (EPERM);
303				}
304			}
305			DROP_GIANT();
306			g_topology_lock();
307			/*
308			 * If we're the root device, we may not have an E count
309			 * yet, get it now.
310			 */
311			if (ump->um_cp->ace == 0)
312				error = g_access(ump->um_cp, 0, 1, 1);
313			else
314				error = g_access(ump->um_cp, 0, 1, 0);
315			g_topology_unlock();
316			PICKUP_GIANT();
317			if (error)
318				return (error);
319			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
320				return (error);
321			fs->fs_ronly = 0;
322			MNT_ILOCK(mp);
323			mp->mnt_flag &= ~MNT_RDONLY;
324			MNT_IUNLOCK(mp);
325			fs->fs_clean = 0;
326			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
327				vn_finished_write(mp);
328				return (error);
329			}
330			/* check to see if we need to start softdep */
331			if ((fs->fs_flags & FS_DOSOFTDEP) &&
332			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
333				vn_finished_write(mp);
334				return (error);
335			}
336			if (fs->fs_snapinum[0] != 0)
337				ffs_snapshot_mount(mp);
338			vn_finished_write(mp);
339		}
340		/*
341		 * Soft updates is incompatible with "async",
342		 * so if we are doing softupdates stop the user
343		 * from setting the async flag in an update.
344		 * Softdep_mount() clears it in an initial mount
345		 * or ro->rw remount.
346		 */
347		if (mp->mnt_flag & MNT_SOFTDEP) {
348			/* XXX: Reset too late ? */
349			MNT_ILOCK(mp);
350			mp->mnt_flag &= ~MNT_ASYNC;
351			MNT_IUNLOCK(mp);
352		}
353		/*
354		 * Keep MNT_ACLS flag if it is stored in superblock.
355		 */
356		if ((fs->fs_flags & FS_ACLS) != 0) {
357			/* XXX: Set too late ? */
358			MNT_ILOCK(mp);
359			mp->mnt_flag |= MNT_ACLS;
360			MNT_IUNLOCK(mp);
361		}
362
363		/*
364		 * If this is a snapshot request, take the snapshot.
365		 */
366		if (mp->mnt_flag & MNT_SNAPSHOT)
367			return (ffs_snapshot(mp, fspec));
368	}
369
370	/*
371	 * Not an update, or updating the name: look up the name
372	 * and verify that it refers to a sensible disk device.
373	 */
374	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
375	if ((error = namei(&ndp)) != 0)
376		return (error);
377	NDFREE(&ndp, NDF_ONLY_PNBUF);
378	devvp = ndp.ni_vp;
379	if (!vn_isdisk(devvp, &error)) {
380		vput(devvp);
381		return (error);
382	}
383
384	/*
385	 * If mount by non-root, then verify that user has necessary
386	 * permissions on the device.
387	 */
388	accmode = VREAD;
389	if ((mp->mnt_flag & MNT_RDONLY) == 0)
390		accmode |= VWRITE;
391	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
392	if (error)
393		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
394	if (error) {
395		vput(devvp);
396		return (error);
397	}
398
399	if (mp->mnt_flag & MNT_UPDATE) {
400		/*
401		 * Update only
402		 *
403		 * If it's not the same vnode, or at least the same device
404		 * then it's not correct.
405		 */
406
407		if (devvp->v_rdev != ump->um_devvp->v_rdev)
408			error = EINVAL;	/* needs translation */
409		vput(devvp);
410		if (error)
411			return (error);
412	} else {
413		/*
414		 * New mount
415		 *
416		 * We need the name for the mount point (also used for
417		 * "last mounted on") copied in. If an error occurs,
418		 * the mount point is discarded by the upper level code.
419		 * Note that vfs_mount() populates f_mntonname for us.
420		 */
421		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
422			vrele(devvp);
423			return (error);
424		}
425	}
426	vfs_mountedfrom(mp, fspec);
427	return (0);
428}
429
430/*
431 * Compatibility with old mount system call.
432 */
433
434static int
435ffs_cmount(struct mntarg *ma, void *data, int flags)
436{
437	struct ufs_args args;
438	int error;
439
440	if (data == NULL)
441		return (EINVAL);
442	error = copyin(data, &args, sizeof args);
443	if (error)
444		return (error);
445
446	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
447	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
448	error = kernel_mount(ma, flags);
449
450	return (error);
451}
452
453/*
454 * Reload all incore data for a filesystem (used after running fsck on
455 * the root filesystem and finding things to fix). The filesystem must
456 * be mounted read-only.
457 *
458 * Things to do to update the mount:
459 *	1) invalidate all cached meta-data.
460 *	2) re-read superblock from disk.
461 *	3) re-read summary information from disk.
462 *	4) invalidate all inactive vnodes.
463 *	5) invalidate all cached file data.
464 *	6) re-read inode data for all active vnodes.
465 */
466static int
467ffs_reload(struct mount *mp, struct thread *td)
468{
469	struct vnode *vp, *mvp, *devvp;
470	struct inode *ip;
471	void *space;
472	struct buf *bp;
473	struct fs *fs, *newfs;
474	struct ufsmount *ump;
475	ufs2_daddr_t sblockloc;
476	int i, blks, size, error;
477	int32_t *lp;
478
479	if ((mp->mnt_flag & MNT_RDONLY) == 0)
480		return (EINVAL);
481	ump = VFSTOUFS(mp);
482	/*
483	 * Step 1: invalidate all cached meta-data.
484	 */
485	devvp = VFSTOUFS(mp)->um_devvp;
486	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
487	if (vinvalbuf(devvp, 0, 0, 0) != 0)
488		panic("ffs_reload: dirty1");
489	VOP_UNLOCK(devvp, 0);
490
491	/*
492	 * Step 2: re-read superblock from disk.
493	 */
494	fs = VFSTOUFS(mp)->um_fs;
495	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
496	    NOCRED, &bp)) != 0)
497		return (error);
498	newfs = (struct fs *)bp->b_data;
499	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
500	     newfs->fs_magic != FS_UFS2_MAGIC) ||
501	    newfs->fs_bsize > MAXBSIZE ||
502	    newfs->fs_bsize < sizeof(struct fs)) {
503			brelse(bp);
504			return (EIO);		/* XXX needs translation */
505	}
506	/*
507	 * Copy pointer fields back into superblock before copying in	XXX
508	 * new superblock. These should really be in the ufsmount.	XXX
509	 * Note that important parameters (eg fs_ncg) are unchanged.
510	 */
511	newfs->fs_csp = fs->fs_csp;
512	newfs->fs_maxcluster = fs->fs_maxcluster;
513	newfs->fs_contigdirs = fs->fs_contigdirs;
514	newfs->fs_active = fs->fs_active;
515	/* The file system is still read-only. */
516	newfs->fs_ronly = 1;
517	sblockloc = fs->fs_sblockloc;
518	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
519	brelse(bp);
520	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
521	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
522	UFS_LOCK(ump);
523	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
524		printf("%s: reload pending error: blocks %jd files %d\n",
525		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
526		    fs->fs_pendinginodes);
527		fs->fs_pendingblocks = 0;
528		fs->fs_pendinginodes = 0;
529	}
530	UFS_UNLOCK(ump);
531
532	/*
533	 * Step 3: re-read summary information from disk.
534	 */
535	blks = howmany(fs->fs_cssize, fs->fs_fsize);
536	space = fs->fs_csp;
537	for (i = 0; i < blks; i += fs->fs_frag) {
538		size = fs->fs_bsize;
539		if (i + fs->fs_frag > blks)
540			size = (blks - i) * fs->fs_fsize;
541		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
542		    NOCRED, &bp);
543		if (error)
544			return (error);
545		bcopy(bp->b_data, space, (u_int)size);
546		space = (char *)space + size;
547		brelse(bp);
548	}
549	/*
550	 * We no longer know anything about clusters per cylinder group.
551	 */
552	if (fs->fs_contigsumsize > 0) {
553		lp = fs->fs_maxcluster;
554		for (i = 0; i < fs->fs_ncg; i++)
555			*lp++ = fs->fs_contigsumsize;
556	}
557
558loop:
559	MNT_ILOCK(mp);
560	MNT_VNODE_FOREACH(vp, mp, mvp) {
561		VI_LOCK(vp);
562		if (vp->v_iflag & VI_DOOMED) {
563			VI_UNLOCK(vp);
564			continue;
565		}
566		MNT_IUNLOCK(mp);
567		/*
568		 * Step 4: invalidate all cached file data.
569		 */
570		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
571			MNT_VNODE_FOREACH_ABORT(mp, mvp);
572			goto loop;
573		}
574		if (vinvalbuf(vp, 0, 0, 0))
575			panic("ffs_reload: dirty2");
576		/*
577		 * Step 5: re-read inode data for all active vnodes.
578		 */
579		ip = VTOI(vp);
580		error =
581		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
582		    (int)fs->fs_bsize, NOCRED, &bp);
583		if (error) {
584			VOP_UNLOCK(vp, 0);
585			vrele(vp);
586			MNT_VNODE_FOREACH_ABORT(mp, mvp);
587			return (error);
588		}
589		ffs_load_inode(bp, ip, fs, ip->i_number);
590		ip->i_effnlink = ip->i_nlink;
591		brelse(bp);
592		VOP_UNLOCK(vp, 0);
593		vrele(vp);
594		MNT_ILOCK(mp);
595	}
596	MNT_IUNLOCK(mp);
597	return (0);
598}
599
600/*
601 * Possible superblock locations ordered from most to least likely.
602 */
603static int sblock_try[] = SBLOCKSEARCH;
604
605/*
606 * Common code for mount and mountroot
607 */
608static int
609ffs_mountfs(devvp, mp, td)
610	struct vnode *devvp;
611	struct mount *mp;
612	struct thread *td;
613{
614	struct ufsmount *ump;
615	struct buf *bp;
616	struct fs *fs;
617	struct cdev *dev;
618	void *space;
619	ufs2_daddr_t sblockloc;
620	int error, i, blks, size, ronly;
621	int32_t *lp;
622	struct ucred *cred;
623	struct g_consumer *cp;
624	struct mount *nmp;
625
626	bp = NULL;
627	ump = NULL;
628	cred = td ? td->td_ucred : NOCRED;
629	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
630
631	dev = devvp->v_rdev;
632	dev_ref(dev);
633	DROP_GIANT();
634	g_topology_lock();
635	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
636
637	/*
638	 * If we are a root mount, drop the E flag so fsck can do its magic.
639	 * We will pick it up again when we remount R/W.
640	 */
641	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
642		error = g_access(cp, 0, 0, -1);
643	g_topology_unlock();
644	PICKUP_GIANT();
645	VOP_UNLOCK(devvp, 0);
646	if (error)
647		goto out;
648	if (devvp->v_rdev->si_iosize_max != 0)
649		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
650	if (mp->mnt_iosize_max > MAXPHYS)
651		mp->mnt_iosize_max = MAXPHYS;
652
653	devvp->v_bufobj.bo_private = cp;
654	devvp->v_bufobj.bo_ops = &ffs_ops;
655
656	fs = NULL;
657	sblockloc = 0;
658	/*
659	 * Try reading the superblock in each of its possible locations.
660	 */
661	for (i = 0; sblock_try[i] != -1; i++) {
662		if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
663			error = EINVAL;
664			vfs_mount_error(mp,
665			    "Invalid sectorsize %d for superblock size %d",
666			    cp->provider->sectorsize, SBLOCKSIZE);
667			goto out;
668		}
669		if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
670		    cred, &bp)) != 0)
671			goto out;
672		fs = (struct fs *)bp->b_data;
673		sblockloc = sblock_try[i];
674		if ((fs->fs_magic == FS_UFS1_MAGIC ||
675		     (fs->fs_magic == FS_UFS2_MAGIC &&
676		      (fs->fs_sblockloc == sblockloc ||
677		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
678		    fs->fs_bsize <= MAXBSIZE &&
679		    fs->fs_bsize >= sizeof(struct fs))
680			break;
681		brelse(bp);
682		bp = NULL;
683	}
684	if (sblock_try[i] == -1) {
685		error = EINVAL;		/* XXX needs translation */
686		goto out;
687	}
688	fs->fs_fmod = 0;
689	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
690	fs->fs_flags &= ~FS_UNCLEAN;
691	if (fs->fs_clean == 0) {
692		fs->fs_flags |= FS_UNCLEAN;
693		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
694		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
695		     (fs->fs_flags & FS_DOSOFTDEP))) {
696			printf(
697"WARNING: %s was not properly dismounted\n",
698			    fs->fs_fsmnt);
699		} else {
700			printf(
701"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
702			    fs->fs_fsmnt);
703			error = EPERM;
704			goto out;
705		}
706		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
707		    (mp->mnt_flag & MNT_FORCE)) {
708			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
709			    (intmax_t)fs->fs_pendingblocks,
710			    fs->fs_pendinginodes);
711			fs->fs_pendingblocks = 0;
712			fs->fs_pendinginodes = 0;
713		}
714	}
715	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
716		printf("%s: mount pending error: blocks %jd files %d\n",
717		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
718		    fs->fs_pendinginodes);
719		fs->fs_pendingblocks = 0;
720		fs->fs_pendinginodes = 0;
721	}
722	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
723#ifdef UFS_GJOURNAL
724		/*
725		 * Get journal provider name.
726		 */
727		size = 1024;
728		mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
729		if (g_io_getattr("GJOURNAL::provider", cp, &size,
730		    mp->mnt_gjprovider) == 0) {
731			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
732			    M_UFSMNT, M_WAITOK);
733			MNT_ILOCK(mp);
734			mp->mnt_flag |= MNT_GJOURNAL;
735			MNT_IUNLOCK(mp);
736		} else {
737			printf(
738"WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
739			    mp->mnt_stat.f_mntonname);
740			free(mp->mnt_gjprovider, M_UFSMNT);
741			mp->mnt_gjprovider = NULL;
742		}
743#else
744		printf(
745"WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
746		    mp->mnt_stat.f_mntonname);
747#endif
748	} else {
749		mp->mnt_gjprovider = NULL;
750	}
751	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
752	ump->um_cp = cp;
753	ump->um_bo = &devvp->v_bufobj;
754	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
755	if (fs->fs_magic == FS_UFS1_MAGIC) {
756		ump->um_fstype = UFS1;
757		ump->um_balloc = ffs_balloc_ufs1;
758	} else {
759		ump->um_fstype = UFS2;
760		ump->um_balloc = ffs_balloc_ufs2;
761	}
762	ump->um_blkatoff = ffs_blkatoff;
763	ump->um_truncate = ffs_truncate;
764	ump->um_update = ffs_update;
765	ump->um_valloc = ffs_valloc;
766	ump->um_vfree = ffs_vfree;
767	ump->um_ifree = ffs_ifree;
768	ump->um_rdonly = ffs_rdonly;
769	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
770	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
771	if (fs->fs_sbsize < SBLOCKSIZE)
772		bp->b_flags |= B_INVAL | B_NOCACHE;
773	brelse(bp);
774	bp = NULL;
775	fs = ump->um_fs;
776	ffs_oldfscompat_read(fs, ump, sblockloc);
777	fs->fs_ronly = ronly;
778	size = fs->fs_cssize;
779	blks = howmany(size, fs->fs_fsize);
780	if (fs->fs_contigsumsize > 0)
781		size += fs->fs_ncg * sizeof(int32_t);
782	size += fs->fs_ncg * sizeof(u_int8_t);
783	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
784	fs->fs_csp = space;
785	for (i = 0; i < blks; i += fs->fs_frag) {
786		size = fs->fs_bsize;
787		if (i + fs->fs_frag > blks)
788			size = (blks - i) * fs->fs_fsize;
789		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
790		    cred, &bp)) != 0) {
791			free(fs->fs_csp, M_UFSMNT);
792			goto out;
793		}
794		bcopy(bp->b_data, space, (u_int)size);
795		space = (char *)space + size;
796		brelse(bp);
797		bp = NULL;
798	}
799	if (fs->fs_contigsumsize > 0) {
800		fs->fs_maxcluster = lp = space;
801		for (i = 0; i < fs->fs_ncg; i++)
802			*lp++ = fs->fs_contigsumsize;
803		space = lp;
804	}
805	size = fs->fs_ncg * sizeof(u_int8_t);
806	fs->fs_contigdirs = (u_int8_t *)space;
807	bzero(fs->fs_contigdirs, size);
808	fs->fs_active = NULL;
809	mp->mnt_data = ump;
810	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
811	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
812	nmp = NULL;
813	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
814	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
815		if (nmp)
816			vfs_rel(nmp);
817		vfs_getnewfsid(mp);
818	}
819	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
820	MNT_ILOCK(mp);
821	mp->mnt_flag |= MNT_LOCAL;
822	MNT_IUNLOCK(mp);
823	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
824#ifdef MAC
825		MNT_ILOCK(mp);
826		mp->mnt_flag |= MNT_MULTILABEL;
827		MNT_IUNLOCK(mp);
828#else
829		printf(
830"WARNING: %s: multilabel flag on fs but no MAC support\n",
831		    mp->mnt_stat.f_mntonname);
832#endif
833	}
834	if ((fs->fs_flags & FS_ACLS) != 0) {
835#ifdef UFS_ACL
836		MNT_ILOCK(mp);
837		mp->mnt_flag |= MNT_ACLS;
838		MNT_IUNLOCK(mp);
839#else
840		printf(
841"WARNING: %s: ACLs flag on fs but no ACLs support\n",
842		    mp->mnt_stat.f_mntonname);
843#endif
844	}
845	ump->um_mountp = mp;
846	ump->um_dev = dev;
847	ump->um_devvp = devvp;
848	ump->um_nindir = fs->fs_nindir;
849	ump->um_bptrtodb = fs->fs_fsbtodb;
850	ump->um_seqinc = fs->fs_frag;
851	for (i = 0; i < MAXQUOTAS; i++)
852		ump->um_quotas[i] = NULLVP;
853#ifdef UFS_EXTATTR
854	ufs_extattr_uepm_init(&ump->um_extattr);
855#endif
856	/*
857	 * Set FS local "last mounted on" information (NULL pad)
858	 */
859	bzero(fs->fs_fsmnt, MAXMNTLEN);
860	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
861
862	if( mp->mnt_flag & MNT_ROOTFS) {
863		/*
864		 * Root mount; update timestamp in mount structure.
865		 * this will be used by the common root mount code
866		 * to update the system clock.
867		 */
868		mp->mnt_time = fs->fs_time;
869	}
870
871	if (ronly == 0) {
872		if ((fs->fs_flags & FS_DOSOFTDEP) &&
873		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
874			free(fs->fs_csp, M_UFSMNT);
875			goto out;
876		}
877		if (fs->fs_snapinum[0] != 0)
878			ffs_snapshot_mount(mp);
879		fs->fs_fmod = 1;
880		fs->fs_clean = 0;
881		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
882	}
883	/*
884	 * Initialize filesystem stat information in mount struct.
885	 */
886	MNT_ILOCK(mp);
887	mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
888	    MNTK_EXTENDED_SHARED;
889	MNT_IUNLOCK(mp);
890#ifdef UFS_EXTATTR
891#ifdef UFS_EXTATTR_AUTOSTART
892	/*
893	 *
894	 * Auto-starting does the following:
895	 *	- check for /.attribute in the fs, and extattr_start if so
896	 *	- for each file in .attribute, enable that file with
897	 * 	  an attribute of the same name.
898	 * Not clear how to report errors -- probably eat them.
899	 * This would all happen while the filesystem was busy/not
900	 * available, so would effectively be "atomic".
901	 */
902	mp->mnt_stat.f_iosize = fs->fs_bsize;
903	(void) ufs_extattr_autostart(mp, td);
904#endif /* !UFS_EXTATTR_AUTOSTART */
905#endif /* !UFS_EXTATTR */
906	return (0);
907out:
908	if (bp)
909		brelse(bp);
910	if (cp != NULL) {
911		DROP_GIANT();
912		g_topology_lock();
913		g_vfs_close(cp);
914		g_topology_unlock();
915		PICKUP_GIANT();
916	}
917	if (ump) {
918		mtx_destroy(UFS_MTX(ump));
919		if (mp->mnt_gjprovider != NULL) {
920			free(mp->mnt_gjprovider, M_UFSMNT);
921			mp->mnt_gjprovider = NULL;
922		}
923		free(ump->um_fs, M_UFSMNT);
924		free(ump, M_UFSMNT);
925		mp->mnt_data = NULL;
926	}
927	dev_rel(dev);
928	return (error);
929}
930
931#include <sys/sysctl.h>
932static int bigcgs = 0;
933SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
934
935/*
936 * Sanity checks for loading old filesystem superblocks.
937 * See ffs_oldfscompat_write below for unwound actions.
938 *
939 * XXX - Parts get retired eventually.
940 * Unfortunately new bits get added.
941 */
942static void
943ffs_oldfscompat_read(fs, ump, sblockloc)
944	struct fs *fs;
945	struct ufsmount *ump;
946	ufs2_daddr_t sblockloc;
947{
948	off_t maxfilesize;
949
950	/*
951	 * If not yet done, update fs_flags location and value of fs_sblockloc.
952	 */
953	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
954		fs->fs_flags = fs->fs_old_flags;
955		fs->fs_old_flags |= FS_FLAGS_UPDATED;
956		fs->fs_sblockloc = sblockloc;
957	}
958	/*
959	 * If not yet done, update UFS1 superblock with new wider fields.
960	 */
961	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
962		fs->fs_maxbsize = fs->fs_bsize;
963		fs->fs_time = fs->fs_old_time;
964		fs->fs_size = fs->fs_old_size;
965		fs->fs_dsize = fs->fs_old_dsize;
966		fs->fs_csaddr = fs->fs_old_csaddr;
967		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
968		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
969		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
970		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
971	}
972	if (fs->fs_magic == FS_UFS1_MAGIC &&
973	    fs->fs_old_inodefmt < FS_44INODEFMT) {
974		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
975		fs->fs_qbmask = ~fs->fs_bmask;
976		fs->fs_qfmask = ~fs->fs_fmask;
977	}
978	if (fs->fs_magic == FS_UFS1_MAGIC) {
979		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
980		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
981		if (fs->fs_maxfilesize > maxfilesize)
982			fs->fs_maxfilesize = maxfilesize;
983	}
984	/* Compatibility for old filesystems */
985	if (fs->fs_avgfilesize <= 0)
986		fs->fs_avgfilesize = AVFILESIZ;
987	if (fs->fs_avgfpdir <= 0)
988		fs->fs_avgfpdir = AFPDIR;
989	if (bigcgs) {
990		fs->fs_save_cgsize = fs->fs_cgsize;
991		fs->fs_cgsize = fs->fs_bsize;
992	}
993}
994
995/*
996 * Unwinding superblock updates for old filesystems.
997 * See ffs_oldfscompat_read above for details.
998 *
999 * XXX - Parts get retired eventually.
1000 * Unfortunately new bits get added.
1001 */
1002static void
1003ffs_oldfscompat_write(fs, ump)
1004	struct fs *fs;
1005	struct ufsmount *ump;
1006{
1007
1008	/*
1009	 * Copy back UFS2 updated fields that UFS1 inspects.
1010	 */
1011	if (fs->fs_magic == FS_UFS1_MAGIC) {
1012		fs->fs_old_time = fs->fs_time;
1013		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1014		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1015		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1016		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1017		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1018	}
1019	if (bigcgs) {
1020		fs->fs_cgsize = fs->fs_save_cgsize;
1021		fs->fs_save_cgsize = 0;
1022	}
1023}
1024
1025/*
1026 * unmount system call
1027 */
1028static int
1029ffs_unmount(mp, mntflags)
1030	struct mount *mp;
1031	int mntflags;
1032{
1033	struct thread *td;
1034	struct ufsmount *ump = VFSTOUFS(mp);
1035	struct fs *fs;
1036	int error, flags, susp;
1037#ifdef UFS_EXTATTR
1038	int e_restart;
1039#endif
1040
1041	flags = 0;
1042	td = curthread;
1043	fs = ump->um_fs;
1044	if (mntflags & MNT_FORCE) {
1045		flags |= FORCECLOSE;
1046		susp = fs->fs_ronly != 0;
1047	} else
1048		susp = 0;
1049#ifdef UFS_EXTATTR
1050	if ((error = ufs_extattr_stop(mp, td))) {
1051		if (error != EOPNOTSUPP)
1052			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1053			    error);
1054		e_restart = 0;
1055	} else {
1056		ufs_extattr_uepm_destroy(&ump->um_extattr);
1057		e_restart = 1;
1058	}
1059#endif
1060	if (susp) {
1061		/*
1062		 * dounmount already called vn_start_write().
1063		 */
1064		for (;;) {
1065			vn_finished_write(mp);
1066			if ((error = vfs_write_suspend(mp)) != 0)
1067				return (error);
1068			MNT_ILOCK(mp);
1069			if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
1070				mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
1071				    MNTK_SUSPEND2);
1072				wakeup(&mp->mnt_flag);
1073				MNT_IUNLOCK(mp);
1074				td->td_pflags |= TDP_IGNSUSP;
1075				break;
1076			}
1077			MNT_IUNLOCK(mp);
1078			vn_start_write(NULL, &mp, V_WAIT);
1079		}
1080	}
1081	if (mp->mnt_flag & MNT_SOFTDEP)
1082		error = softdep_flushfiles(mp, flags, td);
1083	else
1084		error = ffs_flushfiles(mp, flags, td);
1085	if (error != 0 && error != ENXIO)
1086		goto fail;
1087
1088	UFS_LOCK(ump);
1089	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1090		printf("%s: unmount pending error: blocks %jd files %d\n",
1091		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1092		    fs->fs_pendinginodes);
1093		fs->fs_pendingblocks = 0;
1094		fs->fs_pendinginodes = 0;
1095	}
1096	UFS_UNLOCK(ump);
1097	if (fs->fs_ronly == 0) {
1098		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1099		error = ffs_sbupdate(ump, MNT_WAIT, 0);
1100		if (error && error != ENXIO) {
1101			fs->fs_clean = 0;
1102			goto fail;
1103		}
1104	}
1105	if (susp) {
1106		vfs_write_resume(mp);
1107		vn_start_write(NULL, &mp, V_WAIT);
1108	}
1109	DROP_GIANT();
1110	g_topology_lock();
1111	g_vfs_close(ump->um_cp);
1112	g_topology_unlock();
1113	PICKUP_GIANT();
1114	vrele(ump->um_devvp);
1115	dev_rel(ump->um_dev);
1116	mtx_destroy(UFS_MTX(ump));
1117	if (mp->mnt_gjprovider != NULL) {
1118		free(mp->mnt_gjprovider, M_UFSMNT);
1119		mp->mnt_gjprovider = NULL;
1120	}
1121	free(fs->fs_csp, M_UFSMNT);
1122	free(fs, M_UFSMNT);
1123	free(ump, M_UFSMNT);
1124	mp->mnt_data = NULL;
1125	MNT_ILOCK(mp);
1126	mp->mnt_flag &= ~MNT_LOCAL;
1127	MNT_IUNLOCK(mp);
1128	return (error);
1129
1130fail:
1131	if (susp) {
1132		vfs_write_resume(mp);
1133		vn_start_write(NULL, &mp, V_WAIT);
1134	}
1135#ifdef UFS_EXTATTR
1136	if (e_restart) {
1137		ufs_extattr_uepm_init(&ump->um_extattr);
1138#ifdef UFS_EXTATTR_AUTOSTART
1139		(void) ufs_extattr_autostart(mp, td);
1140#endif
1141	}
1142#endif
1143
1144	return (error);
1145}
1146
1147/*
1148 * Flush out all the files in a filesystem.
1149 */
1150int
1151ffs_flushfiles(mp, flags, td)
1152	struct mount *mp;
1153	int flags;
1154	struct thread *td;
1155{
1156	struct ufsmount *ump;
1157	int error;
1158
1159	ump = VFSTOUFS(mp);
1160#ifdef QUOTA
1161	if (mp->mnt_flag & MNT_QUOTA) {
1162		int i;
1163		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1164		if (error)
1165			return (error);
1166		for (i = 0; i < MAXQUOTAS; i++) {
1167			quotaoff(td, mp, i);
1168		}
1169		/*
1170		 * Here we fall through to vflush again to ensure
1171		 * that we have gotten rid of all the system vnodes.
1172		 */
1173	}
1174#endif
1175	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1176	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1177		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1178			return (error);
1179		ffs_snapshot_unmount(mp);
1180		flags |= FORCECLOSE;
1181		/*
1182		 * Here we fall through to vflush again to ensure
1183		 * that we have gotten rid of all the system vnodes.
1184		 */
1185	}
1186        /*
1187	 * Flush all the files.
1188	 */
1189	if ((error = vflush(mp, 0, flags, td)) != 0)
1190		return (error);
1191	/*
1192	 * Flush filesystem metadata.
1193	 */
1194	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1195	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1196	VOP_UNLOCK(ump->um_devvp, 0);
1197	return (error);
1198}
1199
1200/*
1201 * Get filesystem statistics.
1202 */
1203static int
1204ffs_statfs(mp, sbp)
1205	struct mount *mp;
1206	struct statfs *sbp;
1207{
1208	struct ufsmount *ump;
1209	struct fs *fs;
1210
1211	ump = VFSTOUFS(mp);
1212	fs = ump->um_fs;
1213	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1214		panic("ffs_statfs");
1215	sbp->f_version = STATFS_VERSION;
1216	sbp->f_bsize = fs->fs_fsize;
1217	sbp->f_iosize = fs->fs_bsize;
1218	sbp->f_blocks = fs->fs_dsize;
1219	UFS_LOCK(ump);
1220	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1221	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1222	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1223	    dbtofsb(fs, fs->fs_pendingblocks);
1224	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1225	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1226	UFS_UNLOCK(ump);
1227	sbp->f_namemax = NAME_MAX;
1228	return (0);
1229}
1230
1231/*
1232 * Go through the disk queues to initiate sandbagged IO;
1233 * go through the inodes to write those that have been modified;
1234 * initiate the writing of the super block if it has been modified.
1235 *
1236 * Note: we are always called with the filesystem marked `MPBUSY'.
1237 */
1238static int
1239ffs_sync(mp, waitfor)
1240	struct mount *mp;
1241	int waitfor;
1242{
1243	struct vnode *mvp, *vp, *devvp;
1244	struct thread *td;
1245	struct inode *ip;
1246	struct ufsmount *ump = VFSTOUFS(mp);
1247	struct fs *fs;
1248	int error, count, wait, lockreq, allerror = 0;
1249	int suspend;
1250	int suspended;
1251	int secondary_writes;
1252	int secondary_accwrites;
1253	int softdep_deps;
1254	int softdep_accdeps;
1255	struct bufobj *bo;
1256
1257	td = curthread;
1258	fs = ump->um_fs;
1259	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1260		printf("fs = %s\n", fs->fs_fsmnt);
1261		panic("ffs_sync: rofs mod");
1262	}
1263	/*
1264	 * Write back each (modified) inode.
1265	 */
1266	wait = 0;
1267	suspend = 0;
1268	suspended = 0;
1269	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1270	if (waitfor == MNT_SUSPEND) {
1271		suspend = 1;
1272		waitfor = MNT_WAIT;
1273	}
1274	if (waitfor == MNT_WAIT) {
1275		wait = 1;
1276		lockreq = LK_EXCLUSIVE;
1277	}
1278	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1279	MNT_ILOCK(mp);
1280loop:
1281	/* Grab snapshot of secondary write counts */
1282	secondary_writes = mp->mnt_secondary_writes;
1283	secondary_accwrites = mp->mnt_secondary_accwrites;
1284
1285	/* Grab snapshot of softdep dependency counts */
1286	MNT_IUNLOCK(mp);
1287	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1288	MNT_ILOCK(mp);
1289
1290	MNT_VNODE_FOREACH(vp, mp, mvp) {
1291		/*
1292		 * Depend on the mntvnode_slock to keep things stable enough
1293		 * for a quick test.  Since there might be hundreds of
1294		 * thousands of vnodes, we cannot afford even a subroutine
1295		 * call unless there's a good chance that we have work to do.
1296		 */
1297		VI_LOCK(vp);
1298		if (vp->v_iflag & VI_DOOMED) {
1299			VI_UNLOCK(vp);
1300			continue;
1301		}
1302		ip = VTOI(vp);
1303		if (vp->v_type == VNON || ((ip->i_flag &
1304		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1305		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1306			VI_UNLOCK(vp);
1307			continue;
1308		}
1309		MNT_IUNLOCK(mp);
1310		if ((error = vget(vp, lockreq, td)) != 0) {
1311			MNT_ILOCK(mp);
1312			if (error == ENOENT || error == ENOLCK) {
1313				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1314				goto loop;
1315			}
1316			continue;
1317		}
1318		if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1319			allerror = error;
1320		vput(vp);
1321		MNT_ILOCK(mp);
1322	}
1323	MNT_IUNLOCK(mp);
1324	/*
1325	 * Force stale filesystem control information to be flushed.
1326	 */
1327	if (waitfor == MNT_WAIT) {
1328		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1329			allerror = error;
1330		/* Flushed work items may create new vnodes to clean */
1331		if (allerror == 0 && count) {
1332			MNT_ILOCK(mp);
1333			goto loop;
1334		}
1335	}
1336#ifdef QUOTA
1337	qsync(mp);
1338#endif
1339	devvp = ump->um_devvp;
1340	bo = &devvp->v_bufobj;
1341	BO_LOCK(bo);
1342	if (waitfor != MNT_LAZY &&
1343	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1344		BO_UNLOCK(bo);
1345		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1346		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1347			allerror = error;
1348		VOP_UNLOCK(devvp, 0);
1349		if (allerror == 0 && waitfor == MNT_WAIT) {
1350			MNT_ILOCK(mp);
1351			goto loop;
1352		}
1353	} else if (suspend != 0) {
1354		if (softdep_check_suspend(mp,
1355					  devvp,
1356					  softdep_deps,
1357					  softdep_accdeps,
1358					  secondary_writes,
1359					  secondary_accwrites) != 0)
1360			goto loop;	/* More work needed */
1361		mtx_assert(MNT_MTX(mp), MA_OWNED);
1362		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1363		MNT_IUNLOCK(mp);
1364		suspended = 1;
1365	} else
1366		BO_UNLOCK(bo);
1367	/*
1368	 * Write back modified superblock.
1369	 */
1370	if (fs->fs_fmod != 0 &&
1371	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1372		allerror = error;
1373	return (allerror);
1374}
1375
1376int
1377ffs_vget(mp, ino, flags, vpp)
1378	struct mount *mp;
1379	ino_t ino;
1380	int flags;
1381	struct vnode **vpp;
1382{
1383	return (ffs_vgetf(mp, ino, flags, vpp, 0));
1384}
1385
1386int
1387ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1388	struct mount *mp;
1389	ino_t ino;
1390	int flags;
1391	struct vnode **vpp;
1392	int ffs_flags;
1393{
1394	struct fs *fs;
1395	struct inode *ip;
1396	struct ufsmount *ump;
1397	struct buf *bp;
1398	struct vnode *vp;
1399	struct cdev *dev;
1400	int error;
1401
1402	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1403	if (error || *vpp != NULL)
1404		return (error);
1405
1406	/*
1407	 * We must promote to an exclusive lock for vnode creation.  This
1408	 * can happen if lookup is passed LOCKSHARED.
1409 	 */
1410	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1411		flags &= ~LK_TYPE_MASK;
1412		flags |= LK_EXCLUSIVE;
1413	}
1414
1415	/*
1416	 * We do not lock vnode creation as it is believed to be too
1417	 * expensive for such rare case as simultaneous creation of vnode
1418	 * for same ino by different processes. We just allow them to race
1419	 * and check later to decide who wins. Let the race begin!
1420	 */
1421
1422	ump = VFSTOUFS(mp);
1423	dev = ump->um_dev;
1424	fs = ump->um_fs;
1425
1426	/*
1427	 * If this malloc() is performed after the getnewvnode()
1428	 * it might block, leaving a vnode with a NULL v_data to be
1429	 * found by ffs_sync() if a sync happens to fire right then,
1430	 * which will cause a panic because ffs_sync() blindly
1431	 * dereferences vp->v_data (as well it should).
1432	 */
1433	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1434
1435	/* Allocate a new vnode/inode. */
1436	if (fs->fs_magic == FS_UFS1_MAGIC)
1437		error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1438	else
1439		error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1440	if (error) {
1441		*vpp = NULL;
1442		uma_zfree(uma_inode, ip);
1443		return (error);
1444	}
1445	/*
1446	 * FFS supports recursive locking.
1447	 */
1448	VN_LOCK_AREC(vp);
1449	vp->v_data = ip;
1450	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1451	ip->i_vnode = vp;
1452	ip->i_ump = ump;
1453	ip->i_fs = fs;
1454	ip->i_dev = dev;
1455	ip->i_number = ino;
1456	ip->i_ea_refs = 0;
1457#ifdef QUOTA
1458	{
1459		int i;
1460		for (i = 0; i < MAXQUOTAS; i++)
1461			ip->i_dquot[i] = NODQUOT;
1462	}
1463#endif
1464
1465	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1466	if (ffs_flags & FFSV_FORCEINSMQ)
1467		vp->v_vflag |= VV_FORCEINSMQ;
1468	error = insmntque(vp, mp);
1469	if (error != 0) {
1470		*vpp = NULL;
1471		return (error);
1472	}
1473	vp->v_vflag &= ~VV_FORCEINSMQ;
1474	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1475	if (error || *vpp != NULL)
1476		return (error);
1477
1478	/* Read in the disk contents for the inode, copy into the inode. */
1479	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1480	    (int)fs->fs_bsize, NOCRED, &bp);
1481	if (error) {
1482		/*
1483		 * The inode does not contain anything useful, so it would
1484		 * be misleading to leave it on its hash chain. With mode
1485		 * still zero, it will be unlinked and returned to the free
1486		 * list by vput().
1487		 */
1488		brelse(bp);
1489		vput(vp);
1490		*vpp = NULL;
1491		return (error);
1492	}
1493	if (ip->i_ump->um_fstype == UFS1)
1494		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1495	else
1496		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1497	ffs_load_inode(bp, ip, fs, ino);
1498	if (DOINGSOFTDEP(vp))
1499		softdep_load_inodeblock(ip);
1500	else
1501		ip->i_effnlink = ip->i_nlink;
1502	bqrelse(bp);
1503
1504	/*
1505	 * Initialize the vnode from the inode, check for aliases.
1506	 * Note that the underlying vnode may have changed.
1507	 */
1508	if (ip->i_ump->um_fstype == UFS1)
1509		error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1510	else
1511		error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1512	if (error) {
1513		vput(vp);
1514		*vpp = NULL;
1515		return (error);
1516	}
1517
1518	/*
1519	 * Finish inode initialization.
1520	 */
1521	if (vp->v_type != VFIFO) {
1522		/* FFS supports shared locking for all files except fifos. */
1523		VN_LOCK_ASHARE(vp);
1524	}
1525
1526	/*
1527	 * Set up a generation number for this inode if it does not
1528	 * already have one. This should only happen on old filesystems.
1529	 */
1530	if (ip->i_gen == 0) {
1531		ip->i_gen = arc4random() / 2 + 1;
1532		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1533			ip->i_flag |= IN_MODIFIED;
1534			DIP_SET(ip, i_gen, ip->i_gen);
1535		}
1536	}
1537	/*
1538	 * Ensure that uid and gid are correct. This is a temporary
1539	 * fix until fsck has been changed to do the update.
1540	 */
1541	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1542	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1543		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1544		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1545	}						/* XXX */
1546
1547#ifdef MAC
1548	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1549		/*
1550		 * If this vnode is already allocated, and we're running
1551		 * multi-label, attempt to perform a label association
1552		 * from the extended attributes on the inode.
1553		 */
1554		error = mac_vnode_associate_extattr(mp, vp);
1555		if (error) {
1556			/* ufs_inactive will release ip->i_devvp ref. */
1557			vput(vp);
1558			*vpp = NULL;
1559			return (error);
1560		}
1561	}
1562#endif
1563
1564	*vpp = vp;
1565	return (0);
1566}
1567
1568/*
1569 * File handle to vnode
1570 *
1571 * Have to be really careful about stale file handles:
1572 * - check that the inode number is valid
1573 * - call ffs_vget() to get the locked inode
1574 * - check for an unallocated inode (i_mode == 0)
1575 * - check that the given client host has export rights and return
1576 *   those rights via. exflagsp and credanonp
1577 */
1578static int
1579ffs_fhtovp(mp, fhp, vpp)
1580	struct mount *mp;
1581	struct fid *fhp;
1582	struct vnode **vpp;
1583{
1584	struct ufid *ufhp;
1585	struct fs *fs;
1586
1587	ufhp = (struct ufid *)fhp;
1588	fs = VFSTOUFS(mp)->um_fs;
1589	if (ufhp->ufid_ino < ROOTINO ||
1590	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1591		return (ESTALE);
1592	return (ufs_fhtovp(mp, ufhp, vpp));
1593}
1594
1595/*
1596 * Initialize the filesystem.
1597 */
1598static int
1599ffs_init(vfsp)
1600	struct vfsconf *vfsp;
1601{
1602
1603	softdep_initialize();
1604	return (ufs_init(vfsp));
1605}
1606
1607/*
1608 * Undo the work of ffs_init().
1609 */
1610static int
1611ffs_uninit(vfsp)
1612	struct vfsconf *vfsp;
1613{
1614	int ret;
1615
1616	ret = ufs_uninit(vfsp);
1617	softdep_uninitialize();
1618	return (ret);
1619}
1620
1621/*
1622 * Write a superblock and associated information back to disk.
1623 */
1624int
1625ffs_sbupdate(mp, waitfor, suspended)
1626	struct ufsmount *mp;
1627	int waitfor;
1628	int suspended;
1629{
1630	struct fs *fs = mp->um_fs;
1631	struct buf *sbbp;
1632	struct buf *bp;
1633	int blks;
1634	void *space;
1635	int i, size, error, allerror = 0;
1636
1637	if (fs->fs_ronly == 1 &&
1638	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1639	    (MNT_RDONLY | MNT_UPDATE))
1640		panic("ffs_sbupdate: write read-only filesystem");
1641	/*
1642	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1643	 */
1644	sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1645	    0, 0, 0);
1646	/*
1647	 * First write back the summary information.
1648	 */
1649	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1650	space = fs->fs_csp;
1651	for (i = 0; i < blks; i += fs->fs_frag) {
1652		size = fs->fs_bsize;
1653		if (i + fs->fs_frag > blks)
1654			size = (blks - i) * fs->fs_fsize;
1655		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1656		    size, 0, 0, 0);
1657		bcopy(space, bp->b_data, (u_int)size);
1658		space = (char *)space + size;
1659		if (suspended)
1660			bp->b_flags |= B_VALIDSUSPWRT;
1661		if (waitfor != MNT_WAIT)
1662			bawrite(bp);
1663		else if ((error = bwrite(bp)) != 0)
1664			allerror = error;
1665	}
1666	/*
1667	 * Now write back the superblock itself. If any errors occurred
1668	 * up to this point, then fail so that the superblock avoids
1669	 * being written out as clean.
1670	 */
1671	if (allerror) {
1672		brelse(sbbp);
1673		return (allerror);
1674	}
1675	bp = sbbp;
1676	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1677	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1678		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1679		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1680		fs->fs_sblockloc = SBLOCK_UFS1;
1681	}
1682	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1683	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1684		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1685		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1686		fs->fs_sblockloc = SBLOCK_UFS2;
1687	}
1688	fs->fs_fmod = 0;
1689	fs->fs_time = time_second;
1690	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1691	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1692	if (suspended)
1693		bp->b_flags |= B_VALIDSUSPWRT;
1694	if (waitfor != MNT_WAIT)
1695		bawrite(bp);
1696	else if ((error = bwrite(bp)) != 0)
1697		allerror = error;
1698	return (allerror);
1699}
1700
1701static int
1702ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1703	int attrnamespace, const char *attrname)
1704{
1705
1706#ifdef UFS_EXTATTR
1707	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1708	    attrname));
1709#else
1710	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1711	    attrname));
1712#endif
1713}
1714
1715static void
1716ffs_ifree(struct ufsmount *ump, struct inode *ip)
1717{
1718
1719	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1720		uma_zfree(uma_ufs1, ip->i_din1);
1721	else if (ip->i_din2 != NULL)
1722		uma_zfree(uma_ufs2, ip->i_din2);
1723	uma_zfree(uma_inode, ip);
1724}
1725
1726static int dobkgrdwrite = 1;
1727SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1728    "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1729
1730/*
1731 * Complete a background write started from bwrite.
1732 */
1733static void
1734ffs_backgroundwritedone(struct buf *bp)
1735{
1736	struct bufobj *bufobj;
1737	struct buf *origbp;
1738
1739	/*
1740	 * Find the original buffer that we are writing.
1741	 */
1742	bufobj = bp->b_bufobj;
1743	BO_LOCK(bufobj);
1744	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1745		panic("backgroundwritedone: lost buffer");
1746	/* Grab an extra reference to be dropped by the bufdone() below. */
1747	bufobj_wrefl(bufobj);
1748	BO_UNLOCK(bufobj);
1749	/*
1750	 * Process dependencies then return any unfinished ones.
1751	 */
1752	if (!LIST_EMPTY(&bp->b_dep))
1753		buf_complete(bp);
1754#ifdef SOFTUPDATES
1755	if (!LIST_EMPTY(&bp->b_dep))
1756		softdep_move_dependencies(bp, origbp);
1757#endif
1758	/*
1759	 * This buffer is marked B_NOCACHE so when it is released
1760	 * by biodone it will be tossed.
1761	 */
1762	bp->b_flags |= B_NOCACHE;
1763	bp->b_flags &= ~B_CACHE;
1764	bufdone(bp);
1765	BO_LOCK(bufobj);
1766	/*
1767	 * Clear the BV_BKGRDINPROG flag in the original buffer
1768	 * and awaken it if it is waiting for the write to complete.
1769	 * If BV_BKGRDINPROG is not set in the original buffer it must
1770	 * have been released and re-instantiated - which is not legal.
1771	 */
1772	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1773	    ("backgroundwritedone: lost buffer2"));
1774	origbp->b_vflags &= ~BV_BKGRDINPROG;
1775	if (origbp->b_vflags & BV_BKGRDWAIT) {
1776		origbp->b_vflags &= ~BV_BKGRDWAIT;
1777		wakeup(&origbp->b_xflags);
1778	}
1779	BO_UNLOCK(bufobj);
1780}
1781
1782
1783/*
1784 * Write, release buffer on completion.  (Done by iodone
1785 * if async).  Do not bother writing anything if the buffer
1786 * is invalid.
1787 *
1788 * Note that we set B_CACHE here, indicating that buffer is
1789 * fully valid and thus cacheable.  This is true even of NFS
1790 * now so we set it generally.  This could be set either here
1791 * or in biodone() since the I/O is synchronous.  We put it
1792 * here.
1793 */
1794static int
1795ffs_bufwrite(struct buf *bp)
1796{
1797	int oldflags, s;
1798	struct buf *newbp;
1799
1800	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1801	if (bp->b_flags & B_INVAL) {
1802		brelse(bp);
1803		return (0);
1804	}
1805
1806	oldflags = bp->b_flags;
1807
1808	if (!BUF_ISLOCKED(bp))
1809		panic("bufwrite: buffer is not busy???");
1810	s = splbio();
1811	/*
1812	 * If a background write is already in progress, delay
1813	 * writing this block if it is asynchronous. Otherwise
1814	 * wait for the background write to complete.
1815	 */
1816	BO_LOCK(bp->b_bufobj);
1817	if (bp->b_vflags & BV_BKGRDINPROG) {
1818		if (bp->b_flags & B_ASYNC) {
1819			BO_UNLOCK(bp->b_bufobj);
1820			splx(s);
1821			bdwrite(bp);
1822			return (0);
1823		}
1824		bp->b_vflags |= BV_BKGRDWAIT;
1825		msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1826		if (bp->b_vflags & BV_BKGRDINPROG)
1827			panic("bufwrite: still writing");
1828	}
1829	BO_UNLOCK(bp->b_bufobj);
1830
1831	/* Mark the buffer clean */
1832	bundirty(bp);
1833
1834	/*
1835	 * If this buffer is marked for background writing and we
1836	 * do not have to wait for it, make a copy and write the
1837	 * copy so as to leave this buffer ready for further use.
1838	 *
1839	 * This optimization eats a lot of memory.  If we have a page
1840	 * or buffer shortfall we can't do it.
1841	 */
1842	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1843	    (bp->b_flags & B_ASYNC) &&
1844	    !vm_page_count_severe() &&
1845	    !buf_dirty_count_severe()) {
1846		KASSERT(bp->b_iodone == NULL,
1847		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1848
1849		/* get a new block */
1850		newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
1851		if (newbp == NULL)
1852			goto normal_write;
1853
1854		/*
1855		 * set it to be identical to the old block.  We have to
1856		 * set b_lblkno and BKGRDMARKER before calling bgetvp()
1857		 * to avoid confusing the splay tree and gbincore().
1858		 */
1859		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1860		newbp->b_lblkno = bp->b_lblkno;
1861		newbp->b_xflags |= BX_BKGRDMARKER;
1862		BO_LOCK(bp->b_bufobj);
1863		bp->b_vflags |= BV_BKGRDINPROG;
1864		bgetvp(bp->b_vp, newbp);
1865		BO_UNLOCK(bp->b_bufobj);
1866		newbp->b_bufobj = &bp->b_vp->v_bufobj;
1867		newbp->b_blkno = bp->b_blkno;
1868		newbp->b_offset = bp->b_offset;
1869		newbp->b_iodone = ffs_backgroundwritedone;
1870		newbp->b_flags |= B_ASYNC;
1871		newbp->b_flags &= ~B_INVAL;
1872
1873#ifdef SOFTUPDATES
1874		/* move over the dependencies */
1875		if (!LIST_EMPTY(&bp->b_dep))
1876			softdep_move_dependencies(bp, newbp);
1877#endif
1878
1879		/*
1880		 * Initiate write on the copy, release the original to
1881		 * the B_LOCKED queue so that it cannot go away until
1882		 * the background write completes. If not locked it could go
1883		 * away and then be reconstituted while it was being written.
1884		 * If the reconstituted buffer were written, we could end up
1885		 * with two background copies being written at the same time.
1886		 */
1887		bqrelse(bp);
1888		bp = newbp;
1889	}
1890
1891	/* Let the normal bufwrite do the rest for us */
1892normal_write:
1893	return (bufwrite(bp));
1894}
1895
1896
1897static void
1898ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1899{
1900	struct vnode *vp;
1901	int error;
1902	struct buf *tbp;
1903
1904	vp = bo->__bo_vnode;
1905	if (bp->b_iocmd == BIO_WRITE) {
1906		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1907		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1908		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1909			panic("ffs_geom_strategy: bad I/O");
1910		bp->b_flags &= ~B_VALIDSUSPWRT;
1911		if ((vp->v_vflag & VV_COPYONWRITE) &&
1912		    vp->v_rdev->si_snapdata != NULL) {
1913			if ((bp->b_flags & B_CLUSTER) != 0) {
1914				runningbufwakeup(bp);
1915				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1916					      b_cluster.cluster_entry) {
1917					error = ffs_copyonwrite(vp, tbp);
1918					if (error != 0 &&
1919					    error != EOPNOTSUPP) {
1920						bp->b_error = error;
1921						bp->b_ioflags |= BIO_ERROR;
1922						bufdone(bp);
1923						return;
1924					}
1925				}
1926				bp->b_runningbufspace = bp->b_bufsize;
1927				atomic_add_long(&runningbufspace,
1928					       bp->b_runningbufspace);
1929			} else {
1930				error = ffs_copyonwrite(vp, bp);
1931				if (error != 0 && error != EOPNOTSUPP) {
1932					bp->b_error = error;
1933					bp->b_ioflags |= BIO_ERROR;
1934					bufdone(bp);
1935					return;
1936				}
1937			}
1938		}
1939#ifdef SOFTUPDATES
1940		if ((bp->b_flags & B_CLUSTER) != 0) {
1941			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1942				      b_cluster.cluster_entry) {
1943				if (!LIST_EMPTY(&tbp->b_dep))
1944					buf_start(tbp);
1945			}
1946		} else {
1947			if (!LIST_EMPTY(&bp->b_dep))
1948				buf_start(bp);
1949		}
1950
1951#endif
1952	}
1953	g_vfs_strategy(bo, bp);
1954}
1955
1956#ifdef	DDB
1957
1958static void
1959db_print_ffs(struct ufsmount *ump)
1960{
1961	db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d "
1962		  "su_req %d\n",
1963	    ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
1964	    ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
1965	    ump->softdep_on_worklist_inprogress, ump->softdep_deps,
1966	    ump->softdep_req);
1967}
1968
1969DB_SHOW_COMMAND(ffs, db_show_ffs)
1970{
1971	struct mount *mp;
1972	struct ufsmount *ump;
1973
1974	if (have_addr) {
1975		ump = VFSTOUFS((struct mount *)addr);
1976		db_print_ffs(ump);
1977		return;
1978	}
1979
1980	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
1981		if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
1982			db_print_ffs(VFSTOUFS(mp));
1983	}
1984}
1985
1986#endif	/* DDB */
1987