1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1991, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33#include "opt_quota.h"
34#include "opt_ufs.h"
35#include "opt_ffs.h"
36#include "opt_ddb.h"
37
38#include <sys/param.h>
39#include <sys/gsb_crc32.h>
40#include <sys/systm.h>
41#include <sys/namei.h>
42#include <sys/priv.h>
43#include <sys/proc.h>
44#include <sys/taskqueue.h>
45#include <sys/kernel.h>
46#include <sys/ktr.h>
47#include <sys/vnode.h>
48#include <sys/mount.h>
49#include <sys/bio.h>
50#include <sys/buf.h>
51#include <sys/conf.h>
52#include <sys/fcntl.h>
53#include <sys/ioccom.h>
54#include <sys/malloc.h>
55#include <sys/mutex.h>
56#include <sys/rwlock.h>
57#include <sys/sysctl.h>
58#include <sys/vmmeter.h>
59
60#include <security/mac/mac_framework.h>
61
62#include <ufs/ufs/dir.h>
63#include <ufs/ufs/extattr.h>
64#include <ufs/ufs/gjournal.h>
65#include <ufs/ufs/quota.h>
66#include <ufs/ufs/ufsmount.h>
67#include <ufs/ufs/inode.h>
68#include <ufs/ufs/ufs_extern.h>
69
70#include <ufs/ffs/fs.h>
71#include <ufs/ffs/ffs_extern.h>
72
73#include <vm/vm.h>
74#include <vm/uma.h>
75#include <vm/vm_page.h>
76
77#include <geom/geom.h>
78#include <geom/geom_vfs.h>
79
80#include <ddb/ddb.h>
81
82static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
83VFS_SMR_DECLARE;
84
85static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
86static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
87		    ufs2_daddr_t);
88static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
89static int	ffs_sync_lazy(struct mount *mp);
90static int	ffs_use_bread(void *devfd, off_t loc, void **bufp, int size);
91static int	ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size);
92
93static vfs_init_t ffs_init;
94static vfs_uninit_t ffs_uninit;
95static vfs_extattrctl_t ffs_extattrctl;
96static vfs_cmount_t ffs_cmount;
97static vfs_unmount_t ffs_unmount;
98static vfs_mount_t ffs_mount;
99static vfs_statfs_t ffs_statfs;
100static vfs_fhtovp_t ffs_fhtovp;
101static vfs_sync_t ffs_sync;
102
103static struct vfsops ufs_vfsops = {
104	.vfs_extattrctl =	ffs_extattrctl,
105	.vfs_fhtovp =		ffs_fhtovp,
106	.vfs_init =		ffs_init,
107	.vfs_mount =		ffs_mount,
108	.vfs_cmount =		ffs_cmount,
109	.vfs_quotactl =		ufs_quotactl,
110	.vfs_root =		vfs_cache_root,
111	.vfs_cachedroot =	ufs_root,
112	.vfs_statfs =		ffs_statfs,
113	.vfs_sync =		ffs_sync,
114	.vfs_uninit =		ffs_uninit,
115	.vfs_unmount =		ffs_unmount,
116	.vfs_vget =		ffs_vget,
117	.vfs_susp_clean =	process_deferred_inactive,
118};
119
120VFS_SET(ufs_vfsops, ufs, 0);
121MODULE_VERSION(ufs, 1);
122
123static b_strategy_t ffs_geom_strategy;
124static b_write_t ffs_bufwrite;
125
126static struct buf_ops ffs_ops = {
127	.bop_name =	"FFS",
128	.bop_write =	ffs_bufwrite,
129	.bop_strategy =	ffs_geom_strategy,
130	.bop_sync =	bufsync,
131#ifdef NO_FFS_SNAPSHOT
132	.bop_bdflush =	bufbdflush,
133#else
134	.bop_bdflush =	ffs_bdflush,
135#endif
136};
137
138/*
139 * Note that userquota and groupquota options are not currently used
140 * by UFS/FFS code and generally mount(8) does not pass those options
141 * from userland, but they can be passed by loader(8) via
142 * vfs.root.mountfrom.options.
143 */
144static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
145    "noclusterw", "noexec", "export", "force", "from", "groupquota",
146    "multilabel", "nfsv4acls", "snapshot", "nosuid", "suiddir",
147    "nosymfollow", "sync", "union", "userquota", "untrusted", NULL };
148
149static int ffs_enxio_enable = 1;
150SYSCTL_DECL(_vfs_ffs);
151SYSCTL_INT(_vfs_ffs, OID_AUTO, enxio_enable, CTLFLAG_RWTUN,
152    &ffs_enxio_enable, 0,
153    "enable mapping of other disk I/O errors to ENXIO");
154
155/*
156 * Return buffer with the contents of block "offset" from the beginning of
157 * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
158 * remaining space in the directory.
159 */
160static int
161ffs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp)
162{
163	struct inode *ip;
164	struct fs *fs;
165	struct buf *bp;
166	ufs_lbn_t lbn;
167	int bsize, error;
168
169	ip = VTOI(vp);
170	fs = ITOFS(ip);
171	lbn = lblkno(fs, offset);
172	bsize = blksize(fs, ip, lbn);
173
174	*bpp = NULL;
175	error = bread(vp, lbn, bsize, NOCRED, &bp);
176	if (error) {
177		return (error);
178	}
179	if (res)
180		*res = (char *)bp->b_data + blkoff(fs, offset);
181	*bpp = bp;
182	return (0);
183}
184
185/*
186 * Load up the contents of an inode and copy the appropriate pieces
187 * to the incore copy.
188 */
189static int
190ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino)
191{
192	struct ufs1_dinode *dip1;
193	struct ufs2_dinode *dip2;
194	int error;
195
196	if (I_IS_UFS1(ip)) {
197		dip1 = ip->i_din1;
198		*dip1 =
199		    *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
200		ip->i_mode = dip1->di_mode;
201		ip->i_nlink = dip1->di_nlink;
202		ip->i_effnlink = dip1->di_nlink;
203		ip->i_size = dip1->di_size;
204		ip->i_flags = dip1->di_flags;
205		ip->i_gen = dip1->di_gen;
206		ip->i_uid = dip1->di_uid;
207		ip->i_gid = dip1->di_gid;
208		return (0);
209	}
210	dip2 = ((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
211	if ((error = ffs_verify_dinode_ckhash(fs, dip2)) != 0 &&
212	    !ffs_fsfail_cleanup(ITOUMP(ip), error)) {
213		printf("%s: inode %jd: check-hash failed\n", fs->fs_fsmnt,
214		    (intmax_t)ino);
215		return (error);
216	}
217	*ip->i_din2 = *dip2;
218	dip2 = ip->i_din2;
219	ip->i_mode = dip2->di_mode;
220	ip->i_nlink = dip2->di_nlink;
221	ip->i_effnlink = dip2->di_nlink;
222	ip->i_size = dip2->di_size;
223	ip->i_flags = dip2->di_flags;
224	ip->i_gen = dip2->di_gen;
225	ip->i_uid = dip2->di_uid;
226	ip->i_gid = dip2->di_gid;
227	return (0);
228}
229
230/*
231 * Verify that a filesystem block number is a valid data block.
232 * This routine is only called on untrusted filesystems.
233 */
234static int
235ffs_check_blkno(struct mount *mp, ino_t inum, ufs2_daddr_t daddr, int blksize)
236{
237	struct fs *fs;
238	struct ufsmount *ump;
239	ufs2_daddr_t end_daddr;
240	int cg, havemtx;
241
242	KASSERT((mp->mnt_flag & MNT_UNTRUSTED) != 0,
243	    ("ffs_check_blkno called on a trusted file system"));
244	ump = VFSTOUFS(mp);
245	fs = ump->um_fs;
246	cg = dtog(fs, daddr);
247	end_daddr = daddr + numfrags(fs, blksize);
248	/*
249	 * Verify that the block number is a valid data block. Also check
250	 * that it does not point to an inode block or a superblock. Accept
251	 * blocks that are unalloacted (0) or part of snapshot metadata
252	 * (BLK_NOCOPY or BLK_SNAP).
253	 *
254	 * Thus, the block must be in a valid range for the filesystem and
255	 * either in the space before a backup superblock (except the first
256	 * cylinder group where that space is used by the bootstrap code) or
257	 * after the inode blocks and before the end of the cylinder group.
258	 */
259	if ((uint64_t)daddr <= BLK_SNAP ||
260	    ((uint64_t)end_daddr <= fs->fs_size &&
261	    ((cg > 0 && end_daddr <= cgsblock(fs, cg)) ||
262	    (daddr >= cgdmin(fs, cg) &&
263	    end_daddr <= cgbase(fs, cg) + fs->fs_fpg))))
264		return (0);
265	if ((havemtx = mtx_owned(UFS_MTX(ump))) == 0)
266		UFS_LOCK(ump);
267	if (ppsratecheck(&ump->um_last_integritymsg,
268	    &ump->um_secs_integritymsg, 1)) {
269		UFS_UNLOCK(ump);
270		uprintf("\n%s: inode %jd, out-of-range indirect block "
271		    "number %jd\n", mp->mnt_stat.f_mntonname, inum, daddr);
272		if (havemtx)
273			UFS_LOCK(ump);
274	} else if (!havemtx)
275		UFS_UNLOCK(ump);
276	return (EINTEGRITY);
277}
278
279/*
280 * On first ENXIO error, initiate an asynchronous forcible unmount.
281 * Used to unmount filesystems whose underlying media has gone away.
282 *
283 * Return true if a cleanup is in progress.
284 */
285int
286ffs_fsfail_cleanup(struct ufsmount *ump, int error)
287{
288	int retval;
289
290	UFS_LOCK(ump);
291	retval = ffs_fsfail_cleanup_locked(ump, error);
292	UFS_UNLOCK(ump);
293	return (retval);
294}
295
296int
297ffs_fsfail_cleanup_locked(struct ufsmount *ump, int error)
298{
299	mtx_assert(UFS_MTX(ump), MA_OWNED);
300	if (error == ENXIO && (ump->um_flags & UM_FSFAIL_CLEANUP) == 0) {
301		ump->um_flags |= UM_FSFAIL_CLEANUP;
302		if (ump->um_mountp == rootvnode->v_mount)
303			panic("UFS: root fs would be forcibly unmounted");
304
305		/*
306		 * Queue an async forced unmount.
307		 */
308		vfs_ref(ump->um_mountp);
309		dounmount(ump->um_mountp,
310		    MNT_FORCE | MNT_RECURSE | MNT_DEFERRED, curthread);
311		printf("UFS: forcibly unmounting %s from %s\n",
312		    ump->um_mountp->mnt_stat.f_mntfromname,
313		    ump->um_mountp->mnt_stat.f_mntonname);
314	}
315	return ((ump->um_flags & UM_FSFAIL_CLEANUP) != 0);
316}
317
318/*
319 * Wrapper used during ENXIO cleanup to allocate empty buffers when
320 * the kernel is unable to read the real one. They are needed so that
321 * the soft updates code can use them to unwind its dependencies.
322 */
323int
324ffs_breadz(struct ufsmount *ump, struct vnode *vp, daddr_t lblkno,
325    daddr_t dblkno, int size, daddr_t *rablkno, int *rabsize, int cnt,
326    struct ucred *cred, int flags, void (*ckhashfunc)(struct buf *),
327    struct buf **bpp)
328{
329	int error;
330
331	flags |= GB_CVTENXIO;
332	error = breadn_flags(vp, lblkno, dblkno, size, rablkno, rabsize, cnt,
333	    cred, flags, ckhashfunc, bpp);
334	if (error != 0 && ffs_fsfail_cleanup(ump, error)) {
335		error = getblkx(vp, lblkno, dblkno, size, 0, 0, flags, bpp);
336		KASSERT(error == 0, ("getblkx failed"));
337		vfs_bio_bzero_buf(*bpp, 0, size);
338	}
339	return (error);
340}
341
342static int
343ffs_mount(struct mount *mp)
344{
345	struct vnode *devvp, *odevvp;
346	struct thread *td;
347	struct ufsmount *ump = NULL;
348	struct fs *fs;
349	int error, flags;
350	int error1 __diagused;
351	uint64_t mntorflags, saved_mnt_flag;
352	accmode_t accmode;
353	struct nameidata ndp;
354	char *fspec;
355	bool mounted_softdep;
356
357	td = curthread;
358	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
359		return (EINVAL);
360	if (uma_inode == NULL) {
361		uma_inode = uma_zcreate("FFS inode",
362		    sizeof(struct inode), NULL, NULL, NULL, NULL,
363		    UMA_ALIGN_PTR, 0);
364		uma_ufs1 = uma_zcreate("FFS1 dinode",
365		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
366		    UMA_ALIGN_PTR, 0);
367		uma_ufs2 = uma_zcreate("FFS2 dinode",
368		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
369		    UMA_ALIGN_PTR, 0);
370		VFS_SMR_ZONE_SET(uma_inode);
371	}
372
373	vfs_deleteopt(mp->mnt_optnew, "groupquota");
374	vfs_deleteopt(mp->mnt_optnew, "userquota");
375
376	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
377	if (error)
378		return (error);
379
380	mntorflags = 0;
381	if (vfs_getopt(mp->mnt_optnew, "untrusted", NULL, NULL) == 0)
382		mntorflags |= MNT_UNTRUSTED;
383
384	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
385		mntorflags |= MNT_ACLS;
386
387	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
388		mntorflags |= MNT_SNAPSHOT;
389		/*
390		 * Once we have set the MNT_SNAPSHOT flag, do not
391		 * persist "snapshot" in the options list.
392		 */
393		vfs_deleteopt(mp->mnt_optnew, "snapshot");
394		vfs_deleteopt(mp->mnt_opt, "snapshot");
395	}
396
397	if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
398		if (mntorflags & MNT_ACLS) {
399			vfs_mount_error(mp,
400			    "\"acls\" and \"nfsv4acls\" options "
401			    "are mutually exclusive");
402			return (EINVAL);
403		}
404		mntorflags |= MNT_NFS4ACLS;
405	}
406
407	MNT_ILOCK(mp);
408	mp->mnt_kern_flag &= ~MNTK_FPLOOKUP;
409	mp->mnt_flag |= mntorflags;
410	MNT_IUNLOCK(mp);
411
412	/*
413	 * If this is a snapshot request, take the snapshot.
414	 */
415	if (mp->mnt_flag & MNT_SNAPSHOT) {
416		if ((mp->mnt_flag & MNT_UPDATE) == 0)
417			return (EINVAL);
418		return (ffs_snapshot(mp, fspec));
419	}
420
421	/*
422	 * Must not call namei() while owning busy ref.
423	 */
424	if (mp->mnt_flag & MNT_UPDATE)
425		vfs_unbusy(mp);
426
427	/*
428	 * Not an update, or updating the name: look up the name
429	 * and verify that it refers to a sensible disk device.
430	 */
431	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec);
432	error = namei(&ndp);
433	if ((mp->mnt_flag & MNT_UPDATE) != 0) {
434		/*
435		 * Unmount does not start if MNT_UPDATE is set.  Mount
436		 * update busies mp before setting MNT_UPDATE.  We
437		 * must be able to retain our busy ref successfully,
438		 * without sleep.
439		 */
440		error1 = vfs_busy(mp, MBF_NOWAIT);
441		MPASS(error1 == 0);
442	}
443	if (error != 0)
444		return (error);
445	NDFREE_PNBUF(&ndp);
446	if (!vn_isdisk_error(ndp.ni_vp, &error)) {
447		vput(ndp.ni_vp);
448		return (error);
449	}
450
451	/*
452	 * If mount by non-root, then verify that user has necessary
453	 * permissions on the device.
454	 */
455	accmode = VREAD;
456	if ((mp->mnt_flag & MNT_RDONLY) == 0)
457		accmode |= VWRITE;
458	error = VOP_ACCESS(ndp.ni_vp, accmode, td->td_ucred, td);
459	if (error)
460		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
461	if (error) {
462		vput(ndp.ni_vp);
463		return (error);
464	}
465
466	/*
467	 * New mount
468	 *
469	 * We need the name for the mount point (also used for
470	 * "last mounted on") copied in. If an error occurs,
471	 * the mount point is discarded by the upper level code.
472	 * Note that vfs_mount_alloc() populates f_mntonname for us.
473	 */
474	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
475		if ((error = ffs_mountfs(ndp.ni_vp, mp, td)) != 0) {
476			vrele(ndp.ni_vp);
477			return (error);
478		}
479	} else {
480		/*
481		 * When updating, check whether changing from read-only to
482		 * read/write; if there is no device name, that's all we do.
483		 */
484		ump = VFSTOUFS(mp);
485		fs = ump->um_fs;
486		odevvp = ump->um_odevvp;
487		devvp = ump->um_devvp;
488
489		/*
490		 * If it's not the same vnode, or at least the same device
491		 * then it's not correct.
492		 */
493		if (ndp.ni_vp->v_rdev != ump->um_odevvp->v_rdev)
494			error = EINVAL; /* needs translation */
495		vput(ndp.ni_vp);
496		if (error)
497			return (error);
498		if (fs->fs_ronly == 0 &&
499		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
500			/*
501			 * Flush any dirty data and suspend filesystem.
502			 */
503			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
504				return (error);
505			error = vfs_write_suspend_umnt(mp);
506			if (error != 0)
507				return (error);
508
509			fs->fs_ronly = 1;
510			if (MOUNTEDSOFTDEP(mp)) {
511				MNT_ILOCK(mp);
512				mp->mnt_flag &= ~MNT_SOFTDEP;
513				MNT_IUNLOCK(mp);
514				mounted_softdep = true;
515			} else
516				mounted_softdep = false;
517
518			/*
519			 * Check for and optionally get rid of files open
520			 * for writing.
521			 */
522			flags = WRITECLOSE;
523			if (mp->mnt_flag & MNT_FORCE)
524				flags |= FORCECLOSE;
525			if (mounted_softdep) {
526				error = softdep_flushfiles(mp, flags, td);
527			} else {
528				error = ffs_flushfiles(mp, flags, td);
529			}
530			if (error) {
531				fs->fs_ronly = 0;
532				if (mounted_softdep) {
533					MNT_ILOCK(mp);
534					mp->mnt_flag |= MNT_SOFTDEP;
535					MNT_IUNLOCK(mp);
536				}
537				vfs_write_resume(mp, 0);
538				return (error);
539			}
540
541			if (fs->fs_pendingblocks != 0 ||
542			    fs->fs_pendinginodes != 0) {
543				printf("WARNING: %s Update error: blocks %jd "
544				    "files %d\n", fs->fs_fsmnt,
545				    (intmax_t)fs->fs_pendingblocks,
546				    fs->fs_pendinginodes);
547				fs->fs_pendingblocks = 0;
548				fs->fs_pendinginodes = 0;
549			}
550			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
551				fs->fs_clean = 1;
552			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
553				fs->fs_ronly = 0;
554				fs->fs_clean = 0;
555				if (mounted_softdep) {
556					MNT_ILOCK(mp);
557					mp->mnt_flag |= MNT_SOFTDEP;
558					MNT_IUNLOCK(mp);
559				}
560				vfs_write_resume(mp, 0);
561				return (error);
562			}
563			if (mounted_softdep)
564				softdep_unmount(mp);
565			g_topology_lock();
566			/*
567			 * Drop our write and exclusive access.
568			 */
569			g_access(ump->um_cp, 0, -1, -1);
570			g_topology_unlock();
571			MNT_ILOCK(mp);
572			mp->mnt_flag |= MNT_RDONLY;
573			MNT_IUNLOCK(mp);
574			/*
575			 * Allow the writers to note that filesystem
576			 * is ro now.
577			 */
578			vfs_write_resume(mp, 0);
579		}
580		if ((mp->mnt_flag & MNT_RELOAD) &&
581		    (error = ffs_reload(mp, 0)) != 0)
582			return (error);
583		if (fs->fs_ronly &&
584		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
585			/*
586			 * If upgrade to read-write by non-root, then verify
587			 * that user has necessary permissions on the device.
588			 */
589			vn_lock(odevvp, LK_EXCLUSIVE | LK_RETRY);
590			error = VOP_ACCESS(odevvp, VREAD | VWRITE,
591			    td->td_ucred, td);
592			if (error)
593				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
594			VOP_UNLOCK(odevvp);
595			if (error) {
596				return (error);
597			}
598			fs->fs_flags &= ~FS_UNCLEAN;
599			if (fs->fs_clean == 0) {
600				fs->fs_flags |= FS_UNCLEAN;
601				if ((mp->mnt_flag & MNT_FORCE) ||
602				    ((fs->fs_flags &
603				     (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
604				     (fs->fs_flags & FS_DOSOFTDEP))) {
605					printf("WARNING: %s was not properly "
606					   "dismounted\n",
607					   mp->mnt_stat.f_mntonname);
608				} else {
609					vfs_mount_error(mp,
610					   "R/W mount of %s denied. %s.%s",
611					   mp->mnt_stat.f_mntonname,
612					   "Filesystem is not clean - run fsck",
613					   (fs->fs_flags & FS_SUJ) == 0 ? "" :
614					   " Forced mount will invalidate"
615					   " journal contents");
616					return (EPERM);
617				}
618			}
619			g_topology_lock();
620			/*
621			 * Request exclusive write access.
622			 */
623			error = g_access(ump->um_cp, 0, 1, 1);
624			g_topology_unlock();
625			if (error)
626				return (error);
627			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
628				return (error);
629			error = vfs_write_suspend_umnt(mp);
630			if (error != 0)
631				return (error);
632			fs->fs_ronly = 0;
633			MNT_ILOCK(mp);
634			saved_mnt_flag = MNT_RDONLY;
635			if (MOUNTEDSOFTDEP(mp) && (mp->mnt_flag &
636			    MNT_ASYNC) != 0)
637				saved_mnt_flag |= MNT_ASYNC;
638			mp->mnt_flag &= ~saved_mnt_flag;
639			MNT_IUNLOCK(mp);
640			fs->fs_mtime = time_second;
641			/* check to see if we need to start softdep */
642			if ((fs->fs_flags & FS_DOSOFTDEP) &&
643			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
644				fs->fs_ronly = 1;
645				MNT_ILOCK(mp);
646				mp->mnt_flag |= saved_mnt_flag;
647				MNT_IUNLOCK(mp);
648				vfs_write_resume(mp, 0);
649				return (error);
650			}
651			fs->fs_clean = 0;
652			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
653				fs->fs_ronly = 1;
654				if ((fs->fs_flags & FS_DOSOFTDEP) != 0)
655					softdep_unmount(mp);
656				MNT_ILOCK(mp);
657				mp->mnt_flag |= saved_mnt_flag;
658				MNT_IUNLOCK(mp);
659				vfs_write_resume(mp, 0);
660				return (error);
661			}
662			if (fs->fs_snapinum[0] != 0)
663				ffs_snapshot_mount(mp);
664			vfs_write_resume(mp, 0);
665		}
666		/*
667		 * Soft updates is incompatible with "async",
668		 * so if we are doing softupdates stop the user
669		 * from setting the async flag in an update.
670		 * Softdep_mount() clears it in an initial mount
671		 * or ro->rw remount.
672		 */
673		if (MOUNTEDSOFTDEP(mp)) {
674			/* XXX: Reset too late ? */
675			MNT_ILOCK(mp);
676			mp->mnt_flag &= ~MNT_ASYNC;
677			MNT_IUNLOCK(mp);
678		}
679		/*
680		 * Keep MNT_ACLS flag if it is stored in superblock.
681		 */
682		if ((fs->fs_flags & FS_ACLS) != 0) {
683			/* XXX: Set too late ? */
684			MNT_ILOCK(mp);
685			mp->mnt_flag |= MNT_ACLS;
686			MNT_IUNLOCK(mp);
687		}
688
689		if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
690			/* XXX: Set too late ? */
691			MNT_ILOCK(mp);
692			mp->mnt_flag |= MNT_NFS4ACLS;
693			MNT_IUNLOCK(mp);
694		}
695
696	}
697
698	MNT_ILOCK(mp);
699	/*
700	 * This is racy versus lookup, see ufs_fplookup_vexec for details.
701	 */
702	if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) != 0)
703		panic("MNTK_FPLOOKUP set on mount %p when it should not be", mp);
704	if ((mp->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS | MNT_UNION)) == 0)
705		mp->mnt_kern_flag |= MNTK_FPLOOKUP;
706	MNT_IUNLOCK(mp);
707
708	vfs_mountedfrom(mp, fspec);
709	return (0);
710}
711
712/*
713 * Compatibility with old mount system call.
714 */
715
716static int
717ffs_cmount(struct mntarg *ma, void *data, uint64_t flags)
718{
719	struct ufs_args args;
720	int error;
721
722	if (data == NULL)
723		return (EINVAL);
724	error = copyin(data, &args, sizeof args);
725	if (error)
726		return (error);
727
728	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
729	ma = mount_arg(ma, "export", &args.export, sizeof(args.export));
730	error = kernel_mount(ma, flags);
731
732	return (error);
733}
734
735/*
736 * Reload all incore data for a filesystem (used after running fsck on
737 * the root filesystem and finding things to fix). If the 'force' flag
738 * is 0, the filesystem must be mounted read-only.
739 *
740 * Things to do to update the mount:
741 *	1) invalidate all cached meta-data.
742 *	2) re-read superblock from disk.
743 *	3) re-read summary information from disk.
744 *	4) invalidate all inactive vnodes.
745 *	5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary
746 *	   writers, if requested.
747 *	6) invalidate all cached file data.
748 *	7) re-read inode data for all active vnodes.
749 */
750int
751ffs_reload(struct mount *mp, int flags)
752{
753	struct vnode *vp, *mvp, *devvp;
754	struct inode *ip;
755	void *space;
756	struct buf *bp;
757	struct fs *fs, *newfs;
758	struct ufsmount *ump;
759	ufs2_daddr_t sblockloc;
760	int i, blks, error;
761	uint64_t size;
762	int32_t *lp;
763
764	ump = VFSTOUFS(mp);
765
766	MNT_ILOCK(mp);
767	if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) {
768		MNT_IUNLOCK(mp);
769		return (EINVAL);
770	}
771	MNT_IUNLOCK(mp);
772
773	/*
774	 * Step 1: invalidate all cached meta-data.
775	 */
776	devvp = VFSTOUFS(mp)->um_devvp;
777	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
778	if (vinvalbuf(devvp, 0, 0, 0) != 0)
779		panic("ffs_reload: dirty1");
780	VOP_UNLOCK(devvp);
781
782	/*
783	 * Step 2: re-read superblock from disk.
784	 */
785	fs = VFSTOUFS(mp)->um_fs;
786	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
787	    NOCRED, &bp)) != 0)
788		return (error);
789	newfs = (struct fs *)bp->b_data;
790	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
791	     newfs->fs_magic != FS_UFS2_MAGIC) ||
792	    newfs->fs_bsize > MAXBSIZE ||
793	    newfs->fs_bsize < sizeof(struct fs)) {
794			brelse(bp);
795			return (EINTEGRITY);
796	}
797	/*
798	 * Preserve the summary information, read-only status, and
799	 * superblock location by copying these fields into our new
800	 * superblock before using it to update the existing superblock.
801	 */
802	newfs->fs_si = fs->fs_si;
803	newfs->fs_ronly = fs->fs_ronly;
804	sblockloc = fs->fs_sblockloc;
805	bcopy(newfs, fs, (uint64_t)fs->fs_sbsize);
806	brelse(bp);
807	ump->um_bsize = fs->fs_bsize;
808	ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
809	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
810	UFS_LOCK(ump);
811	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
812		printf("WARNING: %s: reload pending error: blocks %jd "
813		    "files %d\n", mp->mnt_stat.f_mntonname,
814		    (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes);
815		fs->fs_pendingblocks = 0;
816		fs->fs_pendinginodes = 0;
817	}
818	UFS_UNLOCK(ump);
819
820	/*
821	 * Step 3: re-read summary information from disk.
822	 */
823	size = fs->fs_cssize;
824	blks = howmany(size, fs->fs_fsize);
825	if (fs->fs_contigsumsize > 0)
826		size += fs->fs_ncg * sizeof(int32_t);
827	size += fs->fs_ncg * sizeof(uint8_t);
828	free(fs->fs_csp, M_UFSMNT);
829	space = malloc(size, M_UFSMNT, M_WAITOK);
830	fs->fs_csp = space;
831	for (i = 0; i < blks; i += fs->fs_frag) {
832		size = fs->fs_bsize;
833		if (i + fs->fs_frag > blks)
834			size = (blks - i) * fs->fs_fsize;
835		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
836		    NOCRED, &bp);
837		if (error)
838			return (error);
839		bcopy(bp->b_data, space, (uint64_t)size);
840		space = (char *)space + size;
841		brelse(bp);
842	}
843	/*
844	 * We no longer know anything about clusters per cylinder group.
845	 */
846	if (fs->fs_contigsumsize > 0) {
847		fs->fs_maxcluster = lp = space;
848		for (i = 0; i < fs->fs_ncg; i++)
849			*lp++ = fs->fs_contigsumsize;
850		space = lp;
851	}
852	size = fs->fs_ncg * sizeof(uint8_t);
853	fs->fs_contigdirs = (uint8_t *)space;
854	bzero(fs->fs_contigdirs, size);
855	if ((flags & FFSR_UNSUSPEND) != 0) {
856		MNT_ILOCK(mp);
857		mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
858		wakeup(&mp->mnt_flag);
859		MNT_IUNLOCK(mp);
860	}
861
862loop:
863	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
864		/*
865		 * Skip syncer vnode.
866		 */
867		if (vp->v_type == VNON) {
868			VI_UNLOCK(vp);
869			continue;
870		}
871		/*
872		 * Step 4: invalidate all cached file data.
873		 */
874		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
875			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
876			goto loop;
877		}
878		if (vinvalbuf(vp, 0, 0, 0))
879			panic("ffs_reload: dirty2");
880		/*
881		 * Step 5: re-read inode data for all active vnodes.
882		 */
883		ip = VTOI(vp);
884		error =
885		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
886		    (int)fs->fs_bsize, NOCRED, &bp);
887		if (error) {
888			vput(vp);
889			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
890			return (error);
891		}
892		if ((error = ffs_load_inode(bp, ip, fs, ip->i_number)) != 0) {
893			brelse(bp);
894			vput(vp);
895			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
896			return (error);
897		}
898		ip->i_effnlink = ip->i_nlink;
899		brelse(bp);
900		vput(vp);
901	}
902	return (0);
903}
904
905/*
906 * Common code for mount and mountroot
907 */
908static int
909ffs_mountfs(struct vnode *odevvp, struct mount *mp, struct thread *td)
910{
911	struct ufsmount *ump;
912	struct fs *fs;
913	struct cdev *dev;
914	int error, i, len, ronly;
915	struct ucred *cred;
916	struct g_consumer *cp;
917	struct mount *nmp;
918	struct vnode *devvp;
919	int candelete, canspeedup;
920
921	fs = NULL;
922	ump = NULL;
923	cred = td ? td->td_ucred : NOCRED;
924	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
925
926	devvp = mntfs_allocvp(mp, odevvp);
927	KASSERT(devvp->v_type == VCHR, ("reclaimed devvp"));
928	dev = devvp->v_rdev;
929	KASSERT(dev->si_snapdata == NULL, ("non-NULL snapshot data"));
930	if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
931	    (uintptr_t)mp) == 0) {
932		mntfs_freevp(devvp);
933		return (EBUSY);
934	}
935	g_topology_lock();
936	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
937	g_topology_unlock();
938	if (error != 0) {
939		atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
940		mntfs_freevp(devvp);
941		return (error);
942	}
943	dev_ref(dev);
944	devvp->v_bufobj.bo_ops = &ffs_ops;
945	BO_LOCK(&odevvp->v_bufobj);
946	odevvp->v_bufobj.bo_flag |= BO_NOBUFS;
947	BO_UNLOCK(&odevvp->v_bufobj);
948	VOP_UNLOCK(devvp);
949	if (dev->si_iosize_max != 0)
950		mp->mnt_iosize_max = dev->si_iosize_max;
951	if (mp->mnt_iosize_max > maxphys)
952		mp->mnt_iosize_max = maxphys;
953	if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
954		error = EINVAL;
955		vfs_mount_error(mp,
956		    "Invalid sectorsize %d for superblock size %d",
957		    cp->provider->sectorsize, SBLOCKSIZE);
958		goto out;
959	}
960	/* fetch the superblock and summary information */
961	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_FORCE)) != 0)
962		error = ffs_sbsearch(devvp, &fs, 0, M_UFSMNT, ffs_use_bread);
963	else
964		error = ffs_sbget(devvp, &fs, UFS_STDSB, 0, M_UFSMNT,
965		    ffs_use_bread);
966	if (error != 0)
967		goto out;
968	fs->fs_flags &= ~FS_UNCLEAN;
969	if (fs->fs_clean == 0) {
970		fs->fs_flags |= FS_UNCLEAN;
971		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
972		    ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
973		     (fs->fs_flags & FS_DOSOFTDEP))) {
974			printf("WARNING: %s was not properly dismounted\n",
975			    mp->mnt_stat.f_mntonname);
976		} else {
977			vfs_mount_error(mp, "R/W mount on %s denied. "
978			    "Filesystem is not clean - run fsck.%s",
979			    mp->mnt_stat.f_mntonname,
980			    (fs->fs_flags & FS_SUJ) == 0 ? "" :
981			    " Forced mount will invalidate journal contents");
982			error = EPERM;
983			goto out;
984		}
985		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
986		    (mp->mnt_flag & MNT_FORCE)) {
987			printf("WARNING: %s: lost blocks %jd files %d\n",
988			    mp->mnt_stat.f_mntonname,
989			    (intmax_t)fs->fs_pendingblocks,
990			    fs->fs_pendinginodes);
991			fs->fs_pendingblocks = 0;
992			fs->fs_pendinginodes = 0;
993		}
994	}
995	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
996		printf("WARNING: %s: mount pending error: blocks %jd "
997		    "files %d\n", mp->mnt_stat.f_mntonname,
998		    (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes);
999		fs->fs_pendingblocks = 0;
1000		fs->fs_pendinginodes = 0;
1001	}
1002	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
1003#ifdef UFS_GJOURNAL
1004		/*
1005		 * Get journal provider name.
1006		 */
1007		len = 1024;
1008		mp->mnt_gjprovider = malloc((uint64_t)len, M_UFSMNT, M_WAITOK);
1009		if (g_io_getattr("GJOURNAL::provider", cp, &len,
1010		    mp->mnt_gjprovider) == 0) {
1011			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len,
1012			    M_UFSMNT, M_WAITOK);
1013			MNT_ILOCK(mp);
1014			mp->mnt_flag |= MNT_GJOURNAL;
1015			MNT_IUNLOCK(mp);
1016		} else {
1017			if ((mp->mnt_flag & MNT_RDONLY) == 0)
1018				printf("WARNING: %s: GJOURNAL flag on fs "
1019				    "but no gjournal provider below\n",
1020				    mp->mnt_stat.f_mntonname);
1021			free(mp->mnt_gjprovider, M_UFSMNT);
1022			mp->mnt_gjprovider = NULL;
1023		}
1024#else
1025		printf("WARNING: %s: GJOURNAL flag on fs but no "
1026		    "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname);
1027#endif
1028	} else {
1029		mp->mnt_gjprovider = NULL;
1030	}
1031	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
1032	ump->um_cp = cp;
1033	ump->um_bo = &devvp->v_bufobj;
1034	ump->um_fs = fs;
1035	if (fs->fs_magic == FS_UFS1_MAGIC) {
1036		ump->um_fstype = UFS1;
1037		ump->um_balloc = ffs_balloc_ufs1;
1038	} else {
1039		ump->um_fstype = UFS2;
1040		ump->um_balloc = ffs_balloc_ufs2;
1041	}
1042	ump->um_blkatoff = ffs_blkatoff;
1043	ump->um_truncate = ffs_truncate;
1044	ump->um_update = ffs_update;
1045	ump->um_valloc = ffs_valloc;
1046	ump->um_vfree = ffs_vfree;
1047	ump->um_ifree = ffs_ifree;
1048	ump->um_rdonly = ffs_rdonly;
1049	ump->um_snapgone = ffs_snapgone;
1050	if ((mp->mnt_flag & MNT_UNTRUSTED) != 0)
1051		ump->um_check_blkno = ffs_check_blkno;
1052	else
1053		ump->um_check_blkno = NULL;
1054	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
1055	sx_init(&ump->um_checkpath_lock, "uchpth");
1056	ffs_oldfscompat_read(fs, ump, fs->fs_sblockloc);
1057	fs->fs_ronly = ronly;
1058	fs->fs_active = NULL;
1059	mp->mnt_data = ump;
1060	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
1061	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
1062	nmp = NULL;
1063	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
1064	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
1065		if (nmp)
1066			vfs_rel(nmp);
1067		vfs_getnewfsid(mp);
1068	}
1069	ump->um_bsize = fs->fs_bsize;
1070	ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
1071	MNT_ILOCK(mp);
1072	mp->mnt_flag |= MNT_LOCAL;
1073	MNT_IUNLOCK(mp);
1074	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
1075#ifdef MAC
1076		MNT_ILOCK(mp);
1077		mp->mnt_flag |= MNT_MULTILABEL;
1078		MNT_IUNLOCK(mp);
1079#else
1080		printf("WARNING: %s: multilabel flag on fs but "
1081		    "no MAC support\n", mp->mnt_stat.f_mntonname);
1082#endif
1083	}
1084	if ((fs->fs_flags & FS_ACLS) != 0) {
1085#ifdef UFS_ACL
1086		MNT_ILOCK(mp);
1087
1088		if (mp->mnt_flag & MNT_NFS4ACLS)
1089			printf("WARNING: %s: ACLs flag on fs conflicts with "
1090			    "\"nfsv4acls\" mount option; option ignored\n",
1091			    mp->mnt_stat.f_mntonname);
1092		mp->mnt_flag &= ~MNT_NFS4ACLS;
1093		mp->mnt_flag |= MNT_ACLS;
1094
1095		MNT_IUNLOCK(mp);
1096#else
1097		printf("WARNING: %s: ACLs flag on fs but no ACLs support\n",
1098		    mp->mnt_stat.f_mntonname);
1099#endif
1100	}
1101	if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
1102#ifdef UFS_ACL
1103		MNT_ILOCK(mp);
1104
1105		if (mp->mnt_flag & MNT_ACLS)
1106			printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts "
1107			    "with \"acls\" mount option; option ignored\n",
1108			    mp->mnt_stat.f_mntonname);
1109		mp->mnt_flag &= ~MNT_ACLS;
1110		mp->mnt_flag |= MNT_NFS4ACLS;
1111
1112		MNT_IUNLOCK(mp);
1113#else
1114		printf("WARNING: %s: NFSv4 ACLs flag on fs but no "
1115		    "ACLs support\n", mp->mnt_stat.f_mntonname);
1116#endif
1117	}
1118	if ((fs->fs_flags & FS_TRIM) != 0) {
1119		len = sizeof(int);
1120		if (g_io_getattr("GEOM::candelete", cp, &len,
1121		    &candelete) == 0) {
1122			if (candelete)
1123				ump->um_flags |= UM_CANDELETE;
1124			else
1125				printf("WARNING: %s: TRIM flag on fs but disk "
1126				    "does not support TRIM\n",
1127				    mp->mnt_stat.f_mntonname);
1128		} else {
1129			printf("WARNING: %s: TRIM flag on fs but disk does "
1130			    "not confirm that it supports TRIM\n",
1131			    mp->mnt_stat.f_mntonname);
1132		}
1133		if (((ump->um_flags) & UM_CANDELETE) != 0) {
1134			ump->um_trim_tq = taskqueue_create("trim", M_WAITOK,
1135			    taskqueue_thread_enqueue, &ump->um_trim_tq);
1136			taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS,
1137			    "%s trim", mp->mnt_stat.f_mntonname);
1138			ump->um_trimhash = hashinit(MAXTRIMIO, M_TRIM,
1139			    &ump->um_trimlisthashsize);
1140		}
1141	}
1142
1143	len = sizeof(int);
1144	if (g_io_getattr("GEOM::canspeedup", cp, &len, &canspeedup) == 0) {
1145		if (canspeedup)
1146			ump->um_flags |= UM_CANSPEEDUP;
1147	}
1148
1149	ump->um_mountp = mp;
1150	ump->um_dev = dev;
1151	ump->um_devvp = devvp;
1152	ump->um_odevvp = odevvp;
1153	ump->um_nindir = fs->fs_nindir;
1154	ump->um_bptrtodb = fs->fs_fsbtodb;
1155	ump->um_seqinc = fs->fs_frag;
1156	for (i = 0; i < MAXQUOTAS; i++)
1157		ump->um_quotas[i] = NULLVP;
1158#ifdef UFS_EXTATTR
1159	ufs_extattr_uepm_init(&ump->um_extattr);
1160#endif
1161	/*
1162	 * Set FS local "last mounted on" information (NULL pad)
1163	 */
1164	bzero(fs->fs_fsmnt, MAXMNTLEN);
1165	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
1166	mp->mnt_stat.f_iosize = fs->fs_bsize;
1167
1168	if (mp->mnt_flag & MNT_ROOTFS) {
1169		/*
1170		 * Root mount; update timestamp in mount structure.
1171		 * this will be used by the common root mount code
1172		 * to update the system clock.
1173		 */
1174		mp->mnt_time = fs->fs_time;
1175	}
1176
1177	if (ronly == 0) {
1178		fs->fs_mtime = time_second;
1179		if ((fs->fs_flags & FS_DOSOFTDEP) &&
1180		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
1181			ffs_flushfiles(mp, FORCECLOSE, td);
1182			goto out;
1183		}
1184		if (fs->fs_snapinum[0] != 0)
1185			ffs_snapshot_mount(mp);
1186		fs->fs_fmod = 1;
1187		fs->fs_clean = 0;
1188		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
1189	}
1190	/*
1191	 * Initialize filesystem state information in mount struct.
1192	 */
1193	MNT_ILOCK(mp);
1194	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
1195	    MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE;
1196	MNT_IUNLOCK(mp);
1197#ifdef UFS_EXTATTR
1198#ifdef UFS_EXTATTR_AUTOSTART
1199	/*
1200	 *
1201	 * Auto-starting does the following:
1202	 *	- check for /.attribute in the fs, and extattr_start if so
1203	 *	- for each file in .attribute, enable that file with
1204	 * 	  an attribute of the same name.
1205	 * Not clear how to report errors -- probably eat them.
1206	 * This would all happen while the filesystem was busy/not
1207	 * available, so would effectively be "atomic".
1208	 */
1209	(void) ufs_extattr_autostart(mp, td);
1210#endif /* !UFS_EXTATTR_AUTOSTART */
1211#endif /* !UFS_EXTATTR */
1212	return (0);
1213out:
1214	if (fs != NULL) {
1215		free(fs->fs_csp, M_UFSMNT);
1216		free(fs->fs_si, M_UFSMNT);
1217		free(fs, M_UFSMNT);
1218	}
1219	if (cp != NULL) {
1220		g_topology_lock();
1221		g_vfs_close(cp);
1222		g_topology_unlock();
1223	}
1224	if (ump != NULL) {
1225		mtx_destroy(UFS_MTX(ump));
1226		sx_destroy(&ump->um_checkpath_lock);
1227		if (mp->mnt_gjprovider != NULL) {
1228			free(mp->mnt_gjprovider, M_UFSMNT);
1229			mp->mnt_gjprovider = NULL;
1230		}
1231		MPASS(ump->um_softdep == NULL);
1232		free(ump, M_UFSMNT);
1233		mp->mnt_data = NULL;
1234	}
1235	BO_LOCK(&odevvp->v_bufobj);
1236	odevvp->v_bufobj.bo_flag &= ~BO_NOBUFS;
1237	BO_UNLOCK(&odevvp->v_bufobj);
1238	atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
1239	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1240	mntfs_freevp(devvp);
1241	dev_rel(dev);
1242	return (error);
1243}
1244
1245/*
1246 * A read function for use by filesystem-layer routines.
1247 */
1248static int
1249ffs_use_bread(void *devfd, off_t loc, void **bufp, int size)
1250{
1251	struct buf *bp;
1252	int error;
1253
1254	KASSERT(*bufp == NULL, ("ffs_use_bread: non-NULL *bufp %p\n", *bufp));
1255	*bufp = malloc(size, M_UFSMNT, M_WAITOK);
1256	if ((error = bread((struct vnode *)devfd, btodb(loc), size, NOCRED,
1257	    &bp)) != 0)
1258		return (error);
1259	bcopy(bp->b_data, *bufp, size);
1260	bp->b_flags |= B_INVAL | B_NOCACHE;
1261	brelse(bp);
1262	return (0);
1263}
1264
1265static int bigcgs = 0;
1266SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
1267
1268/*
1269 * Sanity checks for loading old filesystem superblocks.
1270 * See ffs_oldfscompat_write below for unwound actions.
1271 *
1272 * XXX - Parts get retired eventually.
1273 * Unfortunately new bits get added.
1274 */
1275static void
1276ffs_oldfscompat_read(struct fs *fs,
1277	struct ufsmount *ump,
1278	ufs2_daddr_t sblockloc)
1279{
1280	off_t maxfilesize;
1281
1282	/*
1283	 * If not yet done, update fs_flags location and value of fs_sblockloc.
1284	 */
1285	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1286		fs->fs_flags = fs->fs_old_flags;
1287		fs->fs_old_flags |= FS_FLAGS_UPDATED;
1288		fs->fs_sblockloc = sblockloc;
1289	}
1290	/*
1291	 * If not yet done, update UFS1 superblock with new wider fields.
1292	 */
1293	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
1294		fs->fs_maxbsize = fs->fs_bsize;
1295		fs->fs_time = fs->fs_old_time;
1296		fs->fs_size = fs->fs_old_size;
1297		fs->fs_dsize = fs->fs_old_dsize;
1298		fs->fs_csaddr = fs->fs_old_csaddr;
1299		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1300		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1301		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1302		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1303	}
1304	if (fs->fs_magic == FS_UFS1_MAGIC &&
1305	    fs->fs_old_inodefmt < FS_44INODEFMT) {
1306		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
1307		fs->fs_qbmask = ~fs->fs_bmask;
1308		fs->fs_qfmask = ~fs->fs_fmask;
1309	}
1310	if (fs->fs_magic == FS_UFS1_MAGIC) {
1311		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
1312		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
1313		if (fs->fs_maxfilesize > maxfilesize)
1314			fs->fs_maxfilesize = maxfilesize;
1315	}
1316	/* Compatibility for old filesystems */
1317	if (fs->fs_avgfilesize <= 0)
1318		fs->fs_avgfilesize = AVFILESIZ;
1319	if (fs->fs_avgfpdir <= 0)
1320		fs->fs_avgfpdir = AFPDIR;
1321	if (bigcgs) {
1322		fs->fs_save_cgsize = fs->fs_cgsize;
1323		fs->fs_cgsize = fs->fs_bsize;
1324	}
1325}
1326
1327/*
1328 * Unwinding superblock updates for old filesystems.
1329 * See ffs_oldfscompat_read above for details.
1330 *
1331 * XXX - Parts get retired eventually.
1332 * Unfortunately new bits get added.
1333 */
1334void
1335ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1336{
1337
1338	/*
1339	 * Copy back UFS2 updated fields that UFS1 inspects.
1340	 */
1341	if (fs->fs_magic == FS_UFS1_MAGIC) {
1342		fs->fs_old_time = fs->fs_time;
1343		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1344		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1345		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1346		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1347		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1348	}
1349	if (bigcgs) {
1350		fs->fs_cgsize = fs->fs_save_cgsize;
1351		fs->fs_save_cgsize = 0;
1352	}
1353}
1354
1355/*
1356 * unmount system call
1357 */
1358static int
1359ffs_unmount(struct mount *mp, int mntflags)
1360{
1361	struct thread *td;
1362	struct ufsmount *ump = VFSTOUFS(mp);
1363	struct fs *fs;
1364	int error, flags, susp;
1365#ifdef UFS_EXTATTR
1366	int e_restart;
1367#endif
1368
1369	flags = 0;
1370	td = curthread;
1371	fs = ump->um_fs;
1372	if (mntflags & MNT_FORCE)
1373		flags |= FORCECLOSE;
1374	susp = fs->fs_ronly == 0;
1375#ifdef UFS_EXTATTR
1376	if ((error = ufs_extattr_stop(mp, td))) {
1377		if (error != EOPNOTSUPP)
1378			printf("WARNING: unmount %s: ufs_extattr_stop "
1379			    "returned errno %d\n", mp->mnt_stat.f_mntonname,
1380			    error);
1381		e_restart = 0;
1382	} else {
1383		ufs_extattr_uepm_destroy(&ump->um_extattr);
1384		e_restart = 1;
1385	}
1386#endif
1387	if (susp) {
1388		error = vfs_write_suspend_umnt(mp);
1389		if (error != 0)
1390			goto fail1;
1391	}
1392	if (MOUNTEDSOFTDEP(mp))
1393		error = softdep_flushfiles(mp, flags, td);
1394	else
1395		error = ffs_flushfiles(mp, flags, td);
1396	if (error != 0 && !ffs_fsfail_cleanup(ump, error))
1397		goto fail;
1398
1399	UFS_LOCK(ump);
1400	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1401		printf("WARNING: unmount %s: pending error: blocks %jd "
1402		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1403		    fs->fs_pendinginodes);
1404		fs->fs_pendingblocks = 0;
1405		fs->fs_pendinginodes = 0;
1406	}
1407	UFS_UNLOCK(ump);
1408	if (MOUNTEDSOFTDEP(mp))
1409		softdep_unmount(mp);
1410	MPASS(ump->um_softdep == NULL);
1411	if (fs->fs_ronly == 0) {
1412		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1413		error = ffs_sbupdate(ump, MNT_WAIT, 0);
1414		if (ffs_fsfail_cleanup(ump, error))
1415			error = 0;
1416		if (error != 0 && !ffs_fsfail_cleanup(ump, error)) {
1417			fs->fs_clean = 0;
1418			goto fail;
1419		}
1420	}
1421	if (susp)
1422		vfs_write_resume(mp, VR_START_WRITE);
1423	if (ump->um_trim_tq != NULL) {
1424		MPASS(ump->um_trim_inflight == 0);
1425		taskqueue_free(ump->um_trim_tq);
1426		free (ump->um_trimhash, M_TRIM);
1427	}
1428	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1429	g_topology_lock();
1430	g_vfs_close(ump->um_cp);
1431	g_topology_unlock();
1432	BO_LOCK(&ump->um_odevvp->v_bufobj);
1433	ump->um_odevvp->v_bufobj.bo_flag &= ~BO_NOBUFS;
1434	BO_UNLOCK(&ump->um_odevvp->v_bufobj);
1435	atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0);
1436	mntfs_freevp(ump->um_devvp);
1437	vrele(ump->um_odevvp);
1438	dev_rel(ump->um_dev);
1439	mtx_destroy(UFS_MTX(ump));
1440	sx_destroy(&ump->um_checkpath_lock);
1441	if (mp->mnt_gjprovider != NULL) {
1442		free(mp->mnt_gjprovider, M_UFSMNT);
1443		mp->mnt_gjprovider = NULL;
1444	}
1445	free(fs->fs_csp, M_UFSMNT);
1446	free(fs->fs_si, M_UFSMNT);
1447	free(fs, M_UFSMNT);
1448	free(ump, M_UFSMNT);
1449	mp->mnt_data = NULL;
1450	if (td->td_su == mp) {
1451		td->td_su = NULL;
1452		vfs_rel(mp);
1453	}
1454	return (error);
1455
1456fail:
1457	if (susp)
1458		vfs_write_resume(mp, VR_START_WRITE);
1459fail1:
1460#ifdef UFS_EXTATTR
1461	if (e_restart) {
1462		ufs_extattr_uepm_init(&ump->um_extattr);
1463#ifdef UFS_EXTATTR_AUTOSTART
1464		(void) ufs_extattr_autostart(mp, td);
1465#endif
1466	}
1467#endif
1468
1469	return (error);
1470}
1471
1472/*
1473 * Flush out all the files in a filesystem.
1474 */
1475int
1476ffs_flushfiles(struct mount *mp, int flags, struct thread *td)
1477{
1478	struct ufsmount *ump;
1479	int qerror, error;
1480
1481	ump = VFSTOUFS(mp);
1482	qerror = 0;
1483#ifdef QUOTA
1484	if (mp->mnt_flag & MNT_QUOTA) {
1485		int i;
1486		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1487		if (error)
1488			return (error);
1489		for (i = 0; i < MAXQUOTAS; i++) {
1490			error = quotaoff(td, mp, i);
1491			if (error != 0) {
1492				if ((flags & EARLYFLUSH) == 0)
1493					return (error);
1494				else
1495					qerror = error;
1496			}
1497		}
1498
1499		/*
1500		 * Here we fall through to vflush again to ensure that
1501		 * we have gotten rid of all the system vnodes, unless
1502		 * quotas must not be closed.
1503		 */
1504	}
1505#endif
1506	/* devvp is not locked there */
1507	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1508		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1509			return (error);
1510		ffs_snapshot_unmount(mp);
1511		flags |= FORCECLOSE;
1512		/*
1513		 * Here we fall through to vflush again to ensure
1514		 * that we have gotten rid of all the system vnodes.
1515		 */
1516	}
1517
1518	/*
1519	 * Do not close system files if quotas were not closed, to be
1520	 * able to sync the remaining dquots.  The freeblks softupdate
1521	 * workitems might hold a reference on a dquot, preventing
1522	 * quotaoff() from completing.  Next round of
1523	 * softdep_flushworklist() iteration should process the
1524	 * blockers, allowing the next run of quotaoff() to finally
1525	 * flush held dquots.
1526	 *
1527	 * Otherwise, flush all the files.
1528	 */
1529	if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0)
1530		return (error);
1531
1532	/*
1533	 * If this is a forcible unmount and there were any files that
1534	 * were unlinked but still open, then vflush() will have
1535	 * truncated and freed those files, which might have started
1536	 * some trim work.  Wait here for any trims to complete
1537	 * and process the blkfrees which follow the trims.
1538	 * This may create more dirty devvp buffers and softdep deps.
1539	 */
1540	if (ump->um_trim_tq != NULL) {
1541		while (ump->um_trim_inflight != 0)
1542			pause("ufsutr", hz);
1543		taskqueue_drain_all(ump->um_trim_tq);
1544	}
1545
1546	/*
1547	 * Flush filesystem metadata.
1548	 */
1549	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1550	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1551	VOP_UNLOCK(ump->um_devvp);
1552	return (error);
1553}
1554
1555/*
1556 * Get filesystem statistics.
1557 */
1558static int
1559ffs_statfs(struct mount *mp, struct statfs *sbp)
1560{
1561	struct ufsmount *ump;
1562	struct fs *fs;
1563
1564	ump = VFSTOUFS(mp);
1565	fs = ump->um_fs;
1566	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1567		panic("ffs_statfs");
1568	sbp->f_version = STATFS_VERSION;
1569	sbp->f_bsize = fs->fs_fsize;
1570	sbp->f_iosize = fs->fs_bsize;
1571	sbp->f_blocks = fs->fs_dsize;
1572	UFS_LOCK(ump);
1573	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1574	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1575	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1576	    dbtofsb(fs, fs->fs_pendingblocks);
1577	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO;
1578	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1579	UFS_UNLOCK(ump);
1580	sbp->f_namemax = UFS_MAXNAMLEN;
1581	return (0);
1582}
1583
1584static bool
1585sync_doupdate(struct inode *ip)
1586{
1587
1588	return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
1589	    IN_UPDATE)) != 0);
1590}
1591
1592static int
1593ffs_sync_lazy_filter(struct vnode *vp, void *arg __unused)
1594{
1595	struct inode *ip;
1596
1597	/*
1598	 * Flags are safe to access because ->v_data invalidation
1599	 * is held off by listmtx.
1600	 */
1601	if (vp->v_type == VNON)
1602		return (false);
1603	ip = VTOI(vp);
1604	if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0)
1605		return (false);
1606	return (true);
1607}
1608
1609/*
1610 * For a lazy sync, we only care about access times, quotas and the
1611 * superblock.  Other filesystem changes are already converted to
1612 * cylinder group blocks or inode blocks updates and are written to
1613 * disk by syncer.
1614 */
1615static int
1616ffs_sync_lazy(struct mount *mp)
1617{
1618	struct vnode *mvp, *vp;
1619	struct inode *ip;
1620	int allerror, error;
1621
1622	allerror = 0;
1623	if ((mp->mnt_flag & MNT_NOATIME) != 0) {
1624#ifdef QUOTA
1625		qsync(mp);
1626#endif
1627		goto sbupdate;
1628	}
1629	MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, ffs_sync_lazy_filter, NULL) {
1630		if (vp->v_type == VNON) {
1631			VI_UNLOCK(vp);
1632			continue;
1633		}
1634		ip = VTOI(vp);
1635
1636		/*
1637		 * The IN_ACCESS flag is converted to IN_MODIFIED by
1638		 * ufs_close() and ufs_getattr() by the calls to
1639		 * ufs_itimes_locked(), without subsequent UFS_UPDATE().
1640		 * Test also all the other timestamp flags too, to pick up
1641		 * any other cases that could be missed.
1642		 */
1643		if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) {
1644			VI_UNLOCK(vp);
1645			continue;
1646		}
1647		if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK)) != 0)
1648			continue;
1649#ifdef QUOTA
1650		qsyncvp(vp);
1651#endif
1652		if (sync_doupdate(ip))
1653			error = ffs_update(vp, 0);
1654		if (error != 0)
1655			allerror = error;
1656		vput(vp);
1657	}
1658sbupdate:
1659	if (VFSTOUFS(mp)->um_fs->fs_fmod != 0 &&
1660	    (error = ffs_sbupdate(VFSTOUFS(mp), MNT_LAZY, 0)) != 0)
1661		allerror = error;
1662	return (allerror);
1663}
1664
1665/*
1666 * Go through the disk queues to initiate sandbagged IO;
1667 * go through the inodes to write those that have been modified;
1668 * initiate the writing of the super block if it has been modified.
1669 *
1670 * Note: we are always called with the filesystem marked busy using
1671 * vfs_busy().
1672 */
1673static int
1674ffs_sync(struct mount *mp, int waitfor)
1675{
1676	struct vnode *mvp, *vp, *devvp;
1677	struct thread *td;
1678	struct inode *ip;
1679	struct ufsmount *ump = VFSTOUFS(mp);
1680	struct fs *fs;
1681	int error, count, lockreq, allerror = 0;
1682	int suspend;
1683	int suspended;
1684	int secondary_writes;
1685	int secondary_accwrites;
1686	int softdep_deps;
1687	int softdep_accdeps;
1688	struct bufobj *bo;
1689
1690	suspend = 0;
1691	suspended = 0;
1692	td = curthread;
1693	fs = ump->um_fs;
1694	if (fs->fs_fmod != 0 && fs->fs_ronly != 0)
1695		panic("%s: ffs_sync: modification on read-only filesystem",
1696		    fs->fs_fsmnt);
1697	if (waitfor == MNT_LAZY) {
1698		if (!rebooting)
1699			return (ffs_sync_lazy(mp));
1700		waitfor = MNT_NOWAIT;
1701	}
1702
1703	/*
1704	 * Write back each (modified) inode.
1705	 */
1706	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1707	if (waitfor == MNT_SUSPEND) {
1708		suspend = 1;
1709		waitfor = MNT_WAIT;
1710	}
1711	if (waitfor == MNT_WAIT)
1712		lockreq = LK_EXCLUSIVE;
1713	lockreq |= LK_INTERLOCK;
1714loop:
1715	/* Grab snapshot of secondary write counts */
1716	MNT_ILOCK(mp);
1717	secondary_writes = mp->mnt_secondary_writes;
1718	secondary_accwrites = mp->mnt_secondary_accwrites;
1719	MNT_IUNLOCK(mp);
1720
1721	/* Grab snapshot of softdep dependency counts */
1722	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1723
1724	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1725		/*
1726		 * Depend on the vnode interlock to keep things stable enough
1727		 * for a quick test.  Since there might be hundreds of
1728		 * thousands of vnodes, we cannot afford even a subroutine
1729		 * call unless there's a good chance that we have work to do.
1730		 */
1731		if (vp->v_type == VNON) {
1732			VI_UNLOCK(vp);
1733			continue;
1734		}
1735		ip = VTOI(vp);
1736		if ((ip->i_flag &
1737		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1738		    vp->v_bufobj.bo_dirty.bv_cnt == 0) {
1739			VI_UNLOCK(vp);
1740			continue;
1741		}
1742		if ((error = vget(vp, lockreq)) != 0) {
1743			if (error == ENOENT) {
1744				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1745				goto loop;
1746			}
1747			continue;
1748		}
1749#ifdef QUOTA
1750		qsyncvp(vp);
1751#endif
1752		for (;;) {
1753			error = ffs_syncvnode(vp, waitfor, 0);
1754			if (error == ERELOOKUP)
1755				continue;
1756			if (error != 0)
1757				allerror = error;
1758			break;
1759		}
1760		vput(vp);
1761	}
1762	/*
1763	 * Force stale filesystem control information to be flushed.
1764	 */
1765	if (waitfor == MNT_WAIT || rebooting) {
1766		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1767			allerror = error;
1768		if (ffs_fsfail_cleanup(ump, allerror))
1769			allerror = 0;
1770		/* Flushed work items may create new vnodes to clean */
1771		if (allerror == 0 && count)
1772			goto loop;
1773	}
1774
1775	devvp = ump->um_devvp;
1776	bo = &devvp->v_bufobj;
1777	BO_LOCK(bo);
1778	if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
1779		BO_UNLOCK(bo);
1780		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1781		error = VOP_FSYNC(devvp, waitfor, td);
1782		VOP_UNLOCK(devvp);
1783		if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN))
1784			error = ffs_sbupdate(ump, waitfor, 0);
1785		if (error != 0)
1786			allerror = error;
1787		if (ffs_fsfail_cleanup(ump, allerror))
1788			allerror = 0;
1789		if (allerror == 0 && waitfor == MNT_WAIT)
1790			goto loop;
1791	} else if (suspend != 0) {
1792		if (softdep_check_suspend(mp,
1793					  devvp,
1794					  softdep_deps,
1795					  softdep_accdeps,
1796					  secondary_writes,
1797					  secondary_accwrites) != 0) {
1798			MNT_IUNLOCK(mp);
1799			goto loop;	/* More work needed */
1800		}
1801		mtx_assert(MNT_MTX(mp), MA_OWNED);
1802		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1803		MNT_IUNLOCK(mp);
1804		suspended = 1;
1805	} else
1806		BO_UNLOCK(bo);
1807	/*
1808	 * Write back modified superblock.
1809	 */
1810	if (fs->fs_fmod != 0 &&
1811	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1812		allerror = error;
1813	if (ffs_fsfail_cleanup(ump, allerror))
1814		allerror = 0;
1815	return (allerror);
1816}
1817
1818int
1819ffs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
1820{
1821	return (ffs_vgetf(mp, ino, flags, vpp, 0));
1822}
1823
1824int
1825ffs_vgetf(struct mount *mp,
1826	ino_t ino,
1827	int flags,
1828	struct vnode **vpp,
1829	int ffs_flags)
1830{
1831	struct fs *fs;
1832	struct inode *ip;
1833	struct ufsmount *ump;
1834	struct buf *bp;
1835	struct vnode *vp;
1836	daddr_t dbn;
1837	int error;
1838
1839	MPASS((ffs_flags & (FFSV_REPLACE | FFSV_REPLACE_DOOMED)) == 0 ||
1840	    (flags & LK_EXCLUSIVE) != 0);
1841
1842	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1843	if (error != 0)
1844		return (error);
1845	if (*vpp != NULL) {
1846		if ((ffs_flags & FFSV_REPLACE) == 0 ||
1847		    ((ffs_flags & FFSV_REPLACE_DOOMED) == 0 ||
1848		    !VN_IS_DOOMED(*vpp)))
1849			return (0);
1850		vgone(*vpp);
1851		vput(*vpp);
1852	}
1853
1854	/*
1855	 * We must promote to an exclusive lock for vnode creation.  This
1856	 * can happen if lookup is passed LOCKSHARED.
1857	 */
1858	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1859		flags &= ~LK_TYPE_MASK;
1860		flags |= LK_EXCLUSIVE;
1861	}
1862
1863	/*
1864	 * We do not lock vnode creation as it is believed to be too
1865	 * expensive for such rare case as simultaneous creation of vnode
1866	 * for same ino by different processes. We just allow them to race
1867	 * and check later to decide who wins. Let the race begin!
1868	 */
1869
1870	ump = VFSTOUFS(mp);
1871	fs = ump->um_fs;
1872	ip = uma_zalloc_smr(uma_inode, M_WAITOK | M_ZERO);
1873
1874	/* Allocate a new vnode/inode. */
1875	error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
1876	    &ffs_vnodeops1 : &ffs_vnodeops2, &vp);
1877	if (error) {
1878		*vpp = NULL;
1879		uma_zfree_smr(uma_inode, ip);
1880		return (error);
1881	}
1882	/*
1883	 * FFS supports recursive locking.
1884	 */
1885	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL);
1886	VN_LOCK_AREC(vp);
1887	vp->v_data = ip;
1888	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1889	ip->i_vnode = vp;
1890	ip->i_ump = ump;
1891	ip->i_number = ino;
1892	ip->i_ea_refs = 0;
1893	ip->i_nextclustercg = -1;
1894	ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2;
1895	ip->i_mode = 0; /* ensure error cases below throw away vnode */
1896	cluster_init_vn(&ip->i_clusterw);
1897#ifdef DIAGNOSTIC
1898	ufs_init_trackers(ip);
1899#endif
1900#ifdef QUOTA
1901	{
1902		int i;
1903		for (i = 0; i < MAXQUOTAS; i++)
1904			ip->i_dquot[i] = NODQUOT;
1905	}
1906#endif
1907
1908	if (ffs_flags & FFSV_FORCEINSMQ)
1909		vp->v_vflag |= VV_FORCEINSMQ;
1910	error = insmntque(vp, mp);
1911	if (error != 0) {
1912		uma_zfree_smr(uma_inode, ip);
1913		*vpp = NULL;
1914		return (error);
1915	}
1916	vp->v_vflag &= ~VV_FORCEINSMQ;
1917	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1918	if (error != 0)
1919		return (error);
1920	if (*vpp != NULL) {
1921		/*
1922		 * Calls from ffs_valloc() (i.e. FFSV_REPLACE set)
1923		 * operate on empty inode, which must not be found by
1924		 * other threads until fully filled.  Vnode for empty
1925		 * inode must be not re-inserted on the hash by other
1926		 * thread, after removal by us at the beginning.
1927		 */
1928		MPASS((ffs_flags & FFSV_REPLACE) == 0);
1929		return (0);
1930	}
1931	if (I_IS_UFS1(ip))
1932		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1933	else
1934		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1935
1936	if ((ffs_flags & FFSV_NEWINODE) != 0) {
1937		/* New inode, just zero out its contents. */
1938		if (I_IS_UFS1(ip))
1939			memset(ip->i_din1, 0, sizeof(struct ufs1_dinode));
1940		else
1941			memset(ip->i_din2, 0, sizeof(struct ufs2_dinode));
1942	} else {
1943		/* Read the disk contents for the inode, copy into the inode. */
1944		dbn = fsbtodb(fs, ino_to_fsba(fs, ino));
1945		error = ffs_breadz(ump, ump->um_devvp, dbn, dbn,
1946		    (int)fs->fs_bsize, NULL, NULL, 0, NOCRED, 0, NULL, &bp);
1947		if (error != 0) {
1948			/*
1949			 * The inode does not contain anything useful, so it
1950			 * would be misleading to leave it on its hash chain.
1951			 * With mode still zero, it will be unlinked and
1952			 * returned to the free list by vput().
1953			 */
1954			vgone(vp);
1955			vput(vp);
1956			*vpp = NULL;
1957			return (error);
1958		}
1959		if ((error = ffs_load_inode(bp, ip, fs, ino)) != 0) {
1960			bqrelse(bp);
1961			vgone(vp);
1962			vput(vp);
1963			*vpp = NULL;
1964			return (error);
1965		}
1966		bqrelse(bp);
1967	}
1968	if (DOINGSOFTDEP(vp) && (!fs->fs_ronly ||
1969	    (ffs_flags & FFSV_FORCEINODEDEP) != 0))
1970		softdep_load_inodeblock(ip);
1971	else
1972		ip->i_effnlink = ip->i_nlink;
1973
1974	/*
1975	 * Initialize the vnode from the inode, check for aliases.
1976	 * Note that the underlying vnode may have changed.
1977	 */
1978	error = ufs_vinit(mp, I_IS_UFS1(ip) ? &ffs_fifoops1 : &ffs_fifoops2,
1979	    &vp);
1980	if (error) {
1981		vgone(vp);
1982		vput(vp);
1983		*vpp = NULL;
1984		return (error);
1985	}
1986
1987	/*
1988	 * Finish inode initialization.
1989	 */
1990	if (vp->v_type != VFIFO) {
1991		/* FFS supports shared locking for all files except fifos. */
1992		VN_LOCK_ASHARE(vp);
1993	}
1994
1995	/*
1996	 * Set up a generation number for this inode if it does not
1997	 * already have one. This should only happen on old filesystems.
1998	 */
1999	if (ip->i_gen == 0) {
2000		while (ip->i_gen == 0)
2001			ip->i_gen = arc4random();
2002		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
2003			UFS_INODE_SET_FLAG(ip, IN_MODIFIED);
2004			DIP_SET(ip, i_gen, ip->i_gen);
2005		}
2006	}
2007#ifdef MAC
2008	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
2009		/*
2010		 * If this vnode is already allocated, and we're running
2011		 * multi-label, attempt to perform a label association
2012		 * from the extended attributes on the inode.
2013		 */
2014		error = mac_vnode_associate_extattr(mp, vp);
2015		if (error) {
2016			/* ufs_inactive will release ip->i_devvp ref. */
2017			vgone(vp);
2018			vput(vp);
2019			*vpp = NULL;
2020			return (error);
2021		}
2022	}
2023#endif
2024
2025	vn_set_state(vp, VSTATE_CONSTRUCTED);
2026	*vpp = vp;
2027	return (0);
2028}
2029
2030/*
2031 * File handle to vnode
2032 *
2033 * Have to be really careful about stale file handles:
2034 * - check that the inode number is valid
2035 * - for UFS2 check that the inode number is initialized
2036 * - call ffs_vget() to get the locked inode
2037 * - check for an unallocated inode (i_mode == 0)
2038 * - check that the given client host has export rights and return
2039 *   those rights via. exflagsp and credanonp
2040 */
2041static int
2042ffs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
2043{
2044	struct ufid *ufhp;
2045
2046	ufhp = (struct ufid *)fhp;
2047	return (ffs_inotovp(mp, ufhp->ufid_ino, ufhp->ufid_gen, flags,
2048	    vpp, 0));
2049}
2050
2051/*
2052 * Return a vnode from a mounted filesystem for inode with specified
2053 * generation number. Return ESTALE if the inode with given generation
2054 * number no longer exists on that filesystem.
2055 */
2056int
2057ffs_inotovp(struct mount *mp,
2058	ino_t ino,
2059	uint64_t gen,
2060	int lflags,
2061	struct vnode **vpp,
2062	int ffs_flags)
2063{
2064	struct ufsmount *ump;
2065	struct vnode *nvp;
2066	struct inode *ip;
2067	struct fs *fs;
2068	struct cg *cgp;
2069	struct buf *bp;
2070	uint64_t cg;
2071
2072	ump = VFSTOUFS(mp);
2073	fs = ump->um_fs;
2074	*vpp = NULL;
2075
2076	if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
2077		return (ESTALE);
2078
2079	/*
2080	 * Need to check if inode is initialized because UFS2 does lazy
2081	 * initialization and nfs_fhtovp can offer arbitrary inode numbers.
2082	 */
2083	if (fs->fs_magic == FS_UFS2_MAGIC) {
2084		cg = ino_to_cg(fs, ino);
2085		if (ffs_getcg(fs, ump->um_devvp, cg, 0, &bp, &cgp) != 0)
2086			return (ESTALE);
2087		if (ino >= cg * fs->fs_ipg + cgp->cg_initediblk) {
2088			brelse(bp);
2089			return (ESTALE);
2090		}
2091		brelse(bp);
2092	}
2093
2094	if (ffs_vgetf(mp, ino, lflags, &nvp, ffs_flags) != 0)
2095		return (ESTALE);
2096
2097	ip = VTOI(nvp);
2098	if (ip->i_mode == 0 || ip->i_gen != gen || ip->i_effnlink <= 0) {
2099		if (ip->i_mode == 0)
2100			vgone(nvp);
2101		vput(nvp);
2102		return (ESTALE);
2103	}
2104
2105	vnode_create_vobject(nvp, DIP(ip, i_size), curthread);
2106	*vpp = nvp;
2107	return (0);
2108}
2109
2110/*
2111 * Initialize the filesystem.
2112 */
2113static int
2114ffs_init(struct vfsconf *vfsp)
2115{
2116
2117	ffs_susp_initialize();
2118	softdep_initialize();
2119	return (ufs_init(vfsp));
2120}
2121
2122/*
2123 * Undo the work of ffs_init().
2124 */
2125static int
2126ffs_uninit(struct vfsconf *vfsp)
2127{
2128	int ret;
2129
2130	ret = ufs_uninit(vfsp);
2131	softdep_uninitialize();
2132	ffs_susp_uninitialize();
2133	taskqueue_drain_all(taskqueue_thread);
2134	return (ret);
2135}
2136
2137/*
2138 * Structure used to pass information from ffs_sbupdate to its
2139 * helper routine ffs_use_bwrite.
2140 */
2141struct devfd {
2142	struct ufsmount	*ump;
2143	struct buf	*sbbp;
2144	int		 waitfor;
2145	int		 suspended;
2146	int		 error;
2147};
2148
2149/*
2150 * Write a superblock and associated information back to disk.
2151 */
2152int
2153ffs_sbupdate(struct ufsmount *ump, int waitfor, int suspended)
2154{
2155	struct fs *fs;
2156	struct buf *sbbp;
2157	struct devfd devfd;
2158
2159	fs = ump->um_fs;
2160	if (fs->fs_ronly == 1 &&
2161	    (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
2162	    (MNT_RDONLY | MNT_UPDATE))
2163		panic("ffs_sbupdate: write read-only filesystem");
2164	/*
2165	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
2166	 */
2167	sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
2168	    (int)fs->fs_sbsize, 0, 0, 0);
2169	/*
2170	 * Initialize info needed for write function.
2171	 */
2172	devfd.ump = ump;
2173	devfd.sbbp = sbbp;
2174	devfd.waitfor = waitfor;
2175	devfd.suspended = suspended;
2176	devfd.error = 0;
2177	return (ffs_sbput(&devfd, fs, fs->fs_sblockloc, ffs_use_bwrite));
2178}
2179
2180/*
2181 * Write function for use by filesystem-layer routines.
2182 */
2183static int
2184ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size)
2185{
2186	struct devfd *devfdp;
2187	struct ufsmount *ump;
2188	struct buf *bp;
2189	struct fs *fs;
2190	int error;
2191
2192	devfdp = devfd;
2193	ump = devfdp->ump;
2194	fs = ump->um_fs;
2195	/*
2196	 * Writing the superblock summary information.
2197	 */
2198	if (loc != fs->fs_sblockloc) {
2199		bp = getblk(ump->um_devvp, btodb(loc), size, 0, 0, 0);
2200		bcopy(buf, bp->b_data, (uint64_t)size);
2201		if (devfdp->suspended)
2202			bp->b_flags |= B_VALIDSUSPWRT;
2203		if (devfdp->waitfor != MNT_WAIT)
2204			bawrite(bp);
2205		else if ((error = bwrite(bp)) != 0)
2206			devfdp->error = error;
2207		return (0);
2208	}
2209	/*
2210	 * Writing the superblock itself. We need to do special checks for it.
2211	 */
2212	bp = devfdp->sbbp;
2213	if (ffs_fsfail_cleanup(ump, devfdp->error))
2214		devfdp->error = 0;
2215	if (devfdp->error != 0) {
2216		brelse(bp);
2217		return (devfdp->error);
2218	}
2219	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
2220	    (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
2221		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
2222		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
2223		fs->fs_sblockloc = SBLOCK_UFS1;
2224	}
2225	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
2226	    (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
2227		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
2228		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
2229		fs->fs_sblockloc = SBLOCK_UFS2;
2230	}
2231	if (MOUNTEDSOFTDEP(ump->um_mountp))
2232		softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp);
2233	UFS_LOCK(ump);
2234	bcopy((caddr_t)fs, bp->b_data, (uint64_t)fs->fs_sbsize);
2235	UFS_UNLOCK(ump);
2236	fs = (struct fs *)bp->b_data;
2237	fs->fs_fmod = 0;
2238	ffs_oldfscompat_write(fs, ump);
2239	fs->fs_si = NULL;
2240	/* Recalculate the superblock hash */
2241	fs->fs_ckhash = ffs_calc_sbhash(fs);
2242	if (devfdp->suspended)
2243		bp->b_flags |= B_VALIDSUSPWRT;
2244	if (devfdp->waitfor != MNT_WAIT)
2245		bawrite(bp);
2246	else if ((error = bwrite(bp)) != 0)
2247		devfdp->error = error;
2248	return (devfdp->error);
2249}
2250
2251static int
2252ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
2253	int attrnamespace, const char *attrname)
2254{
2255
2256#ifdef UFS_EXTATTR
2257	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
2258	    attrname));
2259#else
2260	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
2261	    attrname));
2262#endif
2263}
2264
2265static void
2266ffs_ifree(struct ufsmount *ump, struct inode *ip)
2267{
2268
2269	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
2270		uma_zfree(uma_ufs1, ip->i_din1);
2271	else if (ip->i_din2 != NULL)
2272		uma_zfree(uma_ufs2, ip->i_din2);
2273	uma_zfree_smr(uma_inode, ip);
2274}
2275
2276static int dobkgrdwrite = 1;
2277SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
2278    "Do background writes (honoring the BV_BKGRDWRITE flag)?");
2279
2280/*
2281 * Complete a background write started from bwrite.
2282 */
2283static void
2284ffs_backgroundwritedone(struct buf *bp)
2285{
2286	struct bufobj *bufobj;
2287	struct buf *origbp;
2288
2289#ifdef SOFTUPDATES
2290	if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) != 0)
2291		softdep_handle_error(bp);
2292#endif
2293
2294	/*
2295	 * Find the original buffer that we are writing.
2296	 */
2297	bufobj = bp->b_bufobj;
2298	BO_LOCK(bufobj);
2299	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
2300		panic("backgroundwritedone: lost buffer");
2301
2302	/*
2303	 * We should mark the cylinder group buffer origbp as
2304	 * dirty, to not lose the failed write.
2305	 */
2306	if ((bp->b_ioflags & BIO_ERROR) != 0)
2307		origbp->b_vflags |= BV_BKGRDERR;
2308	BO_UNLOCK(bufobj);
2309	/*
2310	 * Process dependencies then return any unfinished ones.
2311	 */
2312	if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
2313		buf_complete(bp);
2314#ifdef SOFTUPDATES
2315	if (!LIST_EMPTY(&bp->b_dep))
2316		softdep_move_dependencies(bp, origbp);
2317#endif
2318	/*
2319	 * This buffer is marked B_NOCACHE so when it is released
2320	 * by biodone it will be tossed.  Clear B_IOSTARTED in case of error.
2321	 */
2322	bp->b_flags |= B_NOCACHE;
2323	bp->b_flags &= ~(B_CACHE | B_IOSTARTED);
2324	pbrelvp(bp);
2325
2326	/*
2327	 * Prevent brelse() from trying to keep and re-dirtying bp on
2328	 * errors. It causes b_bufobj dereference in
2329	 * bdirty()/reassignbuf(), and b_bufobj was cleared in
2330	 * pbrelvp() above.
2331	 */
2332	if ((bp->b_ioflags & BIO_ERROR) != 0)
2333		bp->b_flags |= B_INVAL;
2334	bufdone(bp);
2335	BO_LOCK(bufobj);
2336	/*
2337	 * Clear the BV_BKGRDINPROG flag in the original buffer
2338	 * and awaken it if it is waiting for the write to complete.
2339	 * If BV_BKGRDINPROG is not set in the original buffer it must
2340	 * have been released and re-instantiated - which is not legal.
2341	 */
2342	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
2343	    ("backgroundwritedone: lost buffer2"));
2344	origbp->b_vflags &= ~BV_BKGRDINPROG;
2345	if (origbp->b_vflags & BV_BKGRDWAIT) {
2346		origbp->b_vflags &= ~BV_BKGRDWAIT;
2347		wakeup(&origbp->b_xflags);
2348	}
2349	BO_UNLOCK(bufobj);
2350}
2351
2352/*
2353 * Write, release buffer on completion.  (Done by iodone
2354 * if async).  Do not bother writing anything if the buffer
2355 * is invalid.
2356 *
2357 * Note that we set B_CACHE here, indicating that buffer is
2358 * fully valid and thus cacheable.  This is true even of NFS
2359 * now so we set it generally.  This could be set either here
2360 * or in biodone() since the I/O is synchronous.  We put it
2361 * here.
2362 */
2363static int
2364ffs_bufwrite(struct buf *bp)
2365{
2366	struct buf *newbp;
2367	struct cg *cgp;
2368
2369	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
2370	if (bp->b_flags & B_INVAL) {
2371		brelse(bp);
2372		return (0);
2373	}
2374
2375	if (!BUF_ISLOCKED(bp))
2376		panic("bufwrite: buffer is not busy???");
2377	/*
2378	 * If a background write is already in progress, delay
2379	 * writing this block if it is asynchronous. Otherwise
2380	 * wait for the background write to complete.
2381	 */
2382	BO_LOCK(bp->b_bufobj);
2383	if (bp->b_vflags & BV_BKGRDINPROG) {
2384		if (bp->b_flags & B_ASYNC) {
2385			BO_UNLOCK(bp->b_bufobj);
2386			bdwrite(bp);
2387			return (0);
2388		}
2389		bp->b_vflags |= BV_BKGRDWAIT;
2390		msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO,
2391		    "bwrbg", 0);
2392		if (bp->b_vflags & BV_BKGRDINPROG)
2393			panic("bufwrite: still writing");
2394	}
2395	bp->b_vflags &= ~BV_BKGRDERR;
2396	BO_UNLOCK(bp->b_bufobj);
2397
2398	/*
2399	 * If this buffer is marked for background writing and we
2400	 * do not have to wait for it, make a copy and write the
2401	 * copy so as to leave this buffer ready for further use.
2402	 *
2403	 * This optimization eats a lot of memory.  If we have a page
2404	 * or buffer shortfall we can't do it.
2405	 */
2406	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
2407	    (bp->b_flags & B_ASYNC) &&
2408	    !vm_page_count_severe() &&
2409	    !buf_dirty_count_severe()) {
2410		KASSERT(bp->b_iodone == NULL,
2411		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
2412
2413		/* get a new block */
2414		newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
2415		if (newbp == NULL)
2416			goto normal_write;
2417
2418		KASSERT(buf_mapped(bp), ("Unmapped cg"));
2419		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
2420		BO_LOCK(bp->b_bufobj);
2421		bp->b_vflags |= BV_BKGRDINPROG;
2422		BO_UNLOCK(bp->b_bufobj);
2423		newbp->b_xflags |=
2424		    (bp->b_xflags & BX_FSPRIV) | BX_BKGRDMARKER;
2425		newbp->b_lblkno = bp->b_lblkno;
2426		newbp->b_blkno = bp->b_blkno;
2427		newbp->b_offset = bp->b_offset;
2428		newbp->b_iodone = ffs_backgroundwritedone;
2429		newbp->b_flags |= B_ASYNC;
2430		newbp->b_flags &= ~B_INVAL;
2431		pbgetvp(bp->b_vp, newbp);
2432
2433#ifdef SOFTUPDATES
2434		/*
2435		 * Move over the dependencies.  If there are rollbacks,
2436		 * leave the parent buffer dirtied as it will need to
2437		 * be written again.
2438		 */
2439		if (LIST_EMPTY(&bp->b_dep) ||
2440		    softdep_move_dependencies(bp, newbp) == 0)
2441			bundirty(bp);
2442#else
2443		bundirty(bp);
2444#endif
2445
2446		/*
2447		 * Initiate write on the copy, release the original.  The
2448		 * BKGRDINPROG flag prevents it from going away until
2449		 * the background write completes. We have to recalculate
2450		 * its check hash in case the buffer gets freed and then
2451		 * reconstituted from the buffer cache during a later read.
2452		 */
2453		if ((bp->b_xflags & BX_CYLGRP) != 0) {
2454			cgp = (struct cg *)bp->b_data;
2455			cgp->cg_ckhash = 0;
2456			cgp->cg_ckhash =
2457			    calculate_crc32c(~0L, bp->b_data, bp->b_bcount);
2458		}
2459		bqrelse(bp);
2460		bp = newbp;
2461	} else
2462		/* Mark the buffer clean */
2463		bundirty(bp);
2464
2465	/* Let the normal bufwrite do the rest for us */
2466normal_write:
2467	/*
2468	 * If we are writing a cylinder group, update its time.
2469	 */
2470	if ((bp->b_xflags & BX_CYLGRP) != 0) {
2471		cgp = (struct cg *)bp->b_data;
2472		cgp->cg_old_time = cgp->cg_time = time_second;
2473	}
2474	return (bufwrite(bp));
2475}
2476
2477static void
2478ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
2479{
2480	struct vnode *vp;
2481	struct buf *tbp;
2482	int error, nocopy;
2483
2484	/*
2485	 * This is the bufobj strategy for the private VCHR vnodes
2486	 * used by FFS to access the underlying storage device.
2487	 * We override the default bufobj strategy and thus bypass
2488	 * VOP_STRATEGY() for these vnodes.
2489	 */
2490	vp = bo2vnode(bo);
2491	KASSERT(bp->b_vp == NULL || bp->b_vp->v_type != VCHR ||
2492	    bp->b_vp->v_rdev == NULL ||
2493	    bp->b_vp->v_rdev->si_mountpt == NULL ||
2494	    VFSTOUFS(bp->b_vp->v_rdev->si_mountpt) == NULL ||
2495	    vp == VFSTOUFS(bp->b_vp->v_rdev->si_mountpt)->um_devvp,
2496	    ("ffs_geom_strategy() with wrong vp"));
2497	if (bp->b_iocmd == BIO_WRITE) {
2498		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
2499		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
2500		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
2501			panic("ffs_geom_strategy: bad I/O");
2502		nocopy = bp->b_flags & B_NOCOPY;
2503		bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
2504		if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
2505		    vp->v_rdev->si_snapdata != NULL) {
2506			if ((bp->b_flags & B_CLUSTER) != 0) {
2507				runningbufwakeup(bp);
2508				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2509					      b_cluster.cluster_entry) {
2510					error = ffs_copyonwrite(vp, tbp);
2511					if (error != 0 &&
2512					    error != EOPNOTSUPP) {
2513						bp->b_error = error;
2514						bp->b_ioflags |= BIO_ERROR;
2515						bp->b_flags &= ~B_BARRIER;
2516						bufdone(bp);
2517						return;
2518					}
2519				}
2520				bp->b_runningbufspace = bp->b_bufsize;
2521				atomic_add_long(&runningbufspace,
2522					       bp->b_runningbufspace);
2523			} else {
2524				error = ffs_copyonwrite(vp, bp);
2525				if (error != 0 && error != EOPNOTSUPP) {
2526					bp->b_error = error;
2527					bp->b_ioflags |= BIO_ERROR;
2528					bp->b_flags &= ~B_BARRIER;
2529					bufdone(bp);
2530					return;
2531				}
2532			}
2533		}
2534#ifdef SOFTUPDATES
2535		if ((bp->b_flags & B_CLUSTER) != 0) {
2536			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2537				      b_cluster.cluster_entry) {
2538				if (!LIST_EMPTY(&tbp->b_dep))
2539					buf_start(tbp);
2540			}
2541		} else {
2542			if (!LIST_EMPTY(&bp->b_dep))
2543				buf_start(bp);
2544		}
2545
2546#endif
2547		/*
2548		 * Check for metadata that needs check-hashes and update them.
2549		 */
2550		switch (bp->b_xflags & BX_FSPRIV) {
2551		case BX_CYLGRP:
2552			((struct cg *)bp->b_data)->cg_ckhash = 0;
2553			((struct cg *)bp->b_data)->cg_ckhash =
2554			    calculate_crc32c(~0L, bp->b_data, bp->b_bcount);
2555			break;
2556
2557		case BX_SUPERBLOCK:
2558		case BX_INODE:
2559		case BX_INDIR:
2560		case BX_DIR:
2561			printf("Check-hash write is unimplemented!!!\n");
2562			break;
2563
2564		case 0:
2565			break;
2566
2567		default:
2568			printf("multiple buffer types 0x%b\n",
2569			    (bp->b_xflags & BX_FSPRIV), PRINT_UFS_BUF_XFLAGS);
2570			break;
2571		}
2572	}
2573	if (bp->b_iocmd != BIO_READ && ffs_enxio_enable)
2574		bp->b_xflags |= BX_CVTENXIO;
2575	g_vfs_strategy(bo, bp);
2576}
2577
2578int
2579ffs_own_mount(const struct mount *mp)
2580{
2581
2582	if (mp->mnt_op == &ufs_vfsops)
2583		return (1);
2584	return (0);
2585}
2586
2587#ifdef	DDB
2588#ifdef SOFTUPDATES
2589
2590/* defined in ffs_softdep.c */
2591extern void db_print_ffs(struct ufsmount *ump);
2592
2593DB_SHOW_COMMAND(ffs, db_show_ffs)
2594{
2595	struct mount *mp;
2596	struct ufsmount *ump;
2597
2598	if (have_addr) {
2599		ump = VFSTOUFS((struct mount *)addr);
2600		db_print_ffs(ump);
2601		return;
2602	}
2603
2604	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2605		if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
2606			db_print_ffs(VFSTOUFS(mp));
2607	}
2608}
2609
2610#endif	/* SOFTUPDATES */
2611#endif	/* DDB */
2612