1/*	$NetBSD: ffs_vfsops.c,v 1.275.2.2 2012/09/13 22:27:43 riz Exp $	*/
2
3/*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Copyright (c) 1989, 1991, 1993, 1994
34 *	The Regents of the University of California.  All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
61 */
62
63#include <sys/cdefs.h>
64__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.275.2.2 2012/09/13 22:27:43 riz Exp $");
65
66#if defined(_KERNEL_OPT)
67#include "opt_ffs.h"
68#include "opt_quota.h"
69#include "opt_wapbl.h"
70#endif
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/namei.h>
75#include <sys/proc.h>
76#include <sys/kernel.h>
77#include <sys/vnode.h>
78#include <sys/socket.h>
79#include <sys/mount.h>
80#include <sys/buf.h>
81#include <sys/device.h>
82#include <sys/disk.h>
83#include <sys/mbuf.h>
84#include <sys/file.h>
85#include <sys/disklabel.h>
86#include <sys/ioctl.h>
87#include <sys/errno.h>
88#include <sys/kmem.h>
89#include <sys/pool.h>
90#include <sys/lock.h>
91#include <sys/sysctl.h>
92#include <sys/conf.h>
93#include <sys/kauth.h>
94#include <sys/wapbl.h>
95#include <sys/fstrans.h>
96#include <sys/module.h>
97
98#include <miscfs/genfs/genfs.h>
99#include <miscfs/specfs/specdev.h>
100
101#include <ufs/ufs/quota.h>
102#include <ufs/ufs/ufsmount.h>
103#include <ufs/ufs/inode.h>
104#include <ufs/ufs/dir.h>
105#include <ufs/ufs/ufs_extern.h>
106#include <ufs/ufs/ufs_bswap.h>
107#include <ufs/ufs/ufs_wapbl.h>
108
109#include <ufs/ffs/fs.h>
110#include <ufs/ffs/ffs_extern.h>
111
112MODULE(MODULE_CLASS_VFS, ffs, NULL);
113
114static int	ffs_vfs_fsync(vnode_t *, int);
115
116static struct sysctllog *ffs_sysctl_log;
117
118/* how many times ffs_init() was called */
119int ffs_initcount = 0;
120
121extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
122extern const struct vnodeopv_desc ffs_specop_opv_desc;
123extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
124
125const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
126	&ffs_vnodeop_opv_desc,
127	&ffs_specop_opv_desc,
128	&ffs_fifoop_opv_desc,
129	NULL,
130};
131
132struct vfsops ffs_vfsops = {
133	MOUNT_FFS,
134	sizeof (struct ufs_args),
135	ffs_mount,
136	ufs_start,
137	ffs_unmount,
138	ufs_root,
139	ufs_quotactl,
140	ffs_statvfs,
141	ffs_sync,
142	ffs_vget,
143	ffs_fhtovp,
144	ffs_vptofh,
145	ffs_init,
146	ffs_reinit,
147	ffs_done,
148	ffs_mountroot,
149	ffs_snapshot,
150	ffs_extattrctl,
151	ffs_suspendctl,
152	genfs_renamelock_enter,
153	genfs_renamelock_exit,
154	ffs_vfs_fsync,
155	ffs_vnodeopv_descs,
156	0,
157	{ NULL, NULL },
158};
159
160static const struct genfs_ops ffs_genfsops = {
161	.gop_size = ffs_gop_size,
162	.gop_alloc = ufs_gop_alloc,
163	.gop_write = genfs_gop_write,
164	.gop_markupdate = ufs_gop_markupdate,
165};
166
167static const struct ufs_ops ffs_ufsops = {
168	.uo_itimes = ffs_itimes,
169	.uo_update = ffs_update,
170	.uo_truncate = ffs_truncate,
171	.uo_valloc = ffs_valloc,
172	.uo_vfree = ffs_vfree,
173	.uo_balloc = ffs_balloc,
174	.uo_unmark_vnode = (void (*)(vnode_t *))nullop,
175};
176
177static int
178ffs_modcmd(modcmd_t cmd, void *arg)
179{
180	int error;
181
182#if 0
183	extern int doasyncfree;
184#endif
185#ifdef UFS_EXTATTR
186	extern int ufs_extattr_autocreate;
187#endif
188	extern int ffs_log_changeopt;
189
190	switch (cmd) {
191	case MODULE_CMD_INIT:
192		error = vfs_attach(&ffs_vfsops);
193		if (error != 0)
194			break;
195
196		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
197			       CTLFLAG_PERMANENT,
198			       CTLTYPE_NODE, "vfs", NULL,
199			       NULL, 0, NULL, 0,
200			       CTL_VFS, CTL_EOL);
201		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
202			       CTLFLAG_PERMANENT,
203			       CTLTYPE_NODE, "ffs",
204			       SYSCTL_DESCR("Berkeley Fast File System"),
205			       NULL, 0, NULL, 0,
206			       CTL_VFS, 1, CTL_EOL);
207		/*
208		 * @@@ should we even bother with these first three?
209		 */
210		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
211			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
212			       CTLTYPE_INT, "doclusterread", NULL,
213			       sysctl_notavail, 0, NULL, 0,
214			       CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
215		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
216			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
217			       CTLTYPE_INT, "doclusterwrite", NULL,
218			       sysctl_notavail, 0, NULL, 0,
219			       CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
220		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
221			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
222			       CTLTYPE_INT, "doreallocblks", NULL,
223			       sysctl_notavail, 0, NULL, 0,
224			       CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
225#if 0
226		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
227			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
228			       CTLTYPE_INT, "doasyncfree",
229			       SYSCTL_DESCR("Release dirty blocks asynchronously"),
230			       NULL, 0, &doasyncfree, 0,
231			       CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
232#endif
233		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
234			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
235			       CTLTYPE_INT, "log_changeopt",
236			       SYSCTL_DESCR("Log changes in optimization strategy"),
237			       NULL, 0, &ffs_log_changeopt, 0,
238			       CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
239#ifdef UFS_EXTATTR
240		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
241			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
242			       CTLTYPE_INT, "extattr_autocreate",
243			       SYSCTL_DESCR("Size of attribute for "
244					    "backing file autocreation"),
245			       NULL, 0, &ufs_extattr_autocreate, 0,
246			       CTL_VFS, 1, FFS_EXTATTR_AUTOCREATE, CTL_EOL);
247
248#endif /* UFS_EXTATTR */
249
250		break;
251	case MODULE_CMD_FINI:
252		error = vfs_detach(&ffs_vfsops);
253		if (error != 0)
254			break;
255		sysctl_teardown(&ffs_sysctl_log);
256		break;
257	default:
258		error = ENOTTY;
259		break;
260	}
261
262	return (error);
263}
264
265pool_cache_t ffs_inode_cache;
266pool_cache_t ffs_dinode1_cache;
267pool_cache_t ffs_dinode2_cache;
268
269static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
270static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
271
272/*
273 * Called by main() when ffs is going to be mounted as root.
274 */
275
276int
277ffs_mountroot(void)
278{
279	struct fs *fs;
280	struct mount *mp;
281	struct lwp *l = curlwp;			/* XXX */
282	struct ufsmount *ump;
283	int error;
284
285	if (device_class(root_device) != DV_DISK)
286		return (ENODEV);
287
288	if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
289		vrele(rootvp);
290		return (error);
291	}
292
293	/*
294	 * We always need to be able to mount the root file system.
295	 */
296	mp->mnt_flag |= MNT_FORCE;
297	if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
298		vfs_unbusy(mp, false, NULL);
299		vfs_destroy(mp);
300		return (error);
301	}
302	mp->mnt_flag &= ~MNT_FORCE;
303	mutex_enter(&mountlist_lock);
304	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
305	mutex_exit(&mountlist_lock);
306	ump = VFSTOUFS(mp);
307	fs = ump->um_fs;
308	memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
309	(void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
310	(void)ffs_statvfs(mp, &mp->mnt_stat);
311	vfs_unbusy(mp, false, NULL);
312	setrootfstime((time_t)fs->fs_time);
313	return (0);
314}
315
316/*
317 * VFS Operations.
318 *
319 * mount system call
320 */
321int
322ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
323{
324	struct lwp *l = curlwp;
325	struct vnode *devvp = NULL;
326	struct ufs_args *args = data;
327	struct ufsmount *ump = NULL;
328	struct fs *fs;
329	int error = 0, flags, update;
330	mode_t accessmode;
331
332	if (args == NULL)
333		return EINVAL;
334	if (*data_len < sizeof *args)
335		return EINVAL;
336
337	if (mp->mnt_flag & MNT_GETARGS) {
338		ump = VFSTOUFS(mp);
339		if (ump == NULL)
340			return EIO;
341		args->fspec = NULL;
342		*data_len = sizeof *args;
343		return 0;
344	}
345
346	update = mp->mnt_flag & MNT_UPDATE;
347
348	/* Check arguments */
349	if (args->fspec != NULL) {
350		/*
351		 * Look up the name and verify that it's sane.
352		 */
353		error = namei_simple_user(args->fspec,
354					NSM_FOLLOW_NOEMULROOT, &devvp);
355		if (error != 0)
356			return (error);
357
358		if (!update) {
359			/*
360			 * Be sure this is a valid block device
361			 */
362			if (devvp->v_type != VBLK)
363				error = ENOTBLK;
364			else if (bdevsw_lookup(devvp->v_rdev) == NULL)
365				error = ENXIO;
366		} else {
367			/*
368			 * Be sure we're still naming the same device
369			 * used for our initial mount
370			 */
371			ump = VFSTOUFS(mp);
372			if (devvp != ump->um_devvp) {
373				if (devvp->v_rdev != ump->um_devvp->v_rdev)
374					error = EINVAL;
375				else {
376					vrele(devvp);
377					devvp = ump->um_devvp;
378					vref(devvp);
379				}
380			}
381		}
382	} else {
383		if (!update) {
384			/* New mounts must have a filename for the device */
385			return (EINVAL);
386		} else {
387			/* Use the extant mount */
388			ump = VFSTOUFS(mp);
389			devvp = ump->um_devvp;
390			vref(devvp);
391		}
392	}
393
394	/*
395	 * If mount by non-root, then verify that user has necessary
396	 * permissions on the device.
397	 *
398	 * Permission to update a mount is checked higher, so here we presume
399	 * updating the mount is okay (for example, as far as securelevel goes)
400	 * which leaves us with the normal check.
401	 */
402	if (error == 0) {
403		accessmode = VREAD;
404		if (update ?
405		    (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
406		    (mp->mnt_flag & MNT_RDONLY) == 0)
407			accessmode |= VWRITE;
408		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
409		error = genfs_can_mount(devvp, accessmode, l->l_cred);
410		VOP_UNLOCK(devvp);
411	}
412
413	if (error) {
414		vrele(devvp);
415		return (error);
416	}
417
418#ifdef WAPBL
419	/* WAPBL can only be enabled on a r/w mount. */
420	if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) {
421		mp->mnt_flag &= ~MNT_LOG;
422	}
423#else /* !WAPBL */
424	mp->mnt_flag &= ~MNT_LOG;
425#endif /* !WAPBL */
426
427	if (!update) {
428		int xflags;
429
430		if (mp->mnt_flag & MNT_RDONLY)
431			xflags = FREAD;
432		else
433			xflags = FREAD | FWRITE;
434		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
435		error = VOP_OPEN(devvp, xflags, FSCRED);
436		VOP_UNLOCK(devvp);
437		if (error)
438			goto fail;
439		error = ffs_mountfs(devvp, mp, l);
440		if (error) {
441			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
442			(void)VOP_CLOSE(devvp, xflags, NOCRED);
443			VOP_UNLOCK(devvp);
444			goto fail;
445		}
446
447		ump = VFSTOUFS(mp);
448		fs = ump->um_fs;
449	} else {
450		/*
451		 * Update the mount.
452		 */
453
454		/*
455		 * The initial mount got a reference on this
456		 * device, so drop the one obtained via
457		 * namei(), above.
458		 */
459		vrele(devvp);
460
461		ump = VFSTOUFS(mp);
462		fs = ump->um_fs;
463		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
464			/*
465			 * Changing from r/w to r/o
466			 */
467			flags = WRITECLOSE;
468			if (mp->mnt_flag & MNT_FORCE)
469				flags |= FORCECLOSE;
470			error = ffs_flushfiles(mp, flags, l);
471			if (error == 0)
472				error = UFS_WAPBL_BEGIN(mp);
473			if (error == 0 &&
474			    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
475			    fs->fs_clean & FS_WASCLEAN) {
476				if (mp->mnt_flag & MNT_SOFTDEP)
477					fs->fs_flags &= ~FS_DOSOFTDEP;
478				fs->fs_clean = FS_ISCLEAN;
479				(void) ffs_sbupdate(ump, MNT_WAIT);
480			}
481			if (error == 0)
482				UFS_WAPBL_END(mp);
483			if (error)
484				return (error);
485		}
486
487#ifdef WAPBL
488		if ((mp->mnt_flag & MNT_LOG) == 0) {
489			error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE);
490			if (error)
491				return error;
492		}
493#endif /* WAPBL */
494
495		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
496			/*
497			 * Finish change from r/w to r/o
498			 */
499			fs->fs_ronly = 1;
500			fs->fs_fmod = 0;
501		}
502
503		if (mp->mnt_flag & MNT_RELOAD) {
504			error = ffs_reload(mp, l->l_cred, l);
505			if (error)
506				return (error);
507		}
508
509		if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
510			/*
511			 * Changing from read-only to read/write
512			 */
513#ifndef QUOTA2
514			if (fs->fs_flags & FS_DOQUOTA2) {
515				ump->um_flags |= UFS_QUOTA2;
516				uprintf("%s: options QUOTA2 not enabled%s\n",
517				    mp->mnt_stat.f_mntonname,
518				    (mp->mnt_flag & MNT_FORCE) ? "" :
519				    ", not mounting");
520				return EINVAL;
521			}
522#endif
523			fs->fs_ronly = 0;
524			fs->fs_clean <<= 1;
525			fs->fs_fmod = 1;
526#ifdef WAPBL
527			if (fs->fs_flags & FS_DOWAPBL) {
528				printf("%s: replaying log to disk\n",
529				    mp->mnt_stat.f_mntonname);
530				KDASSERT(mp->mnt_wapbl_replay);
531				error = wapbl_replay_write(mp->mnt_wapbl_replay,
532							   devvp);
533				if (error) {
534					return error;
535				}
536				wapbl_replay_stop(mp->mnt_wapbl_replay);
537				fs->fs_clean = FS_WASCLEAN;
538			}
539#endif /* WAPBL */
540			if (fs->fs_snapinum[0] != 0)
541				ffs_snapshot_mount(mp);
542		}
543
544#ifdef WAPBL
545		error = ffs_wapbl_start(mp);
546		if (error)
547			return error;
548#endif /* WAPBL */
549
550#ifdef QUOTA2
551		if (!fs->fs_ronly) {
552			error = ffs_quota2_mount(mp);
553			if (error) {
554				return error;
555			}
556		}
557#endif
558		if (args->fspec == NULL)
559			return 0;
560	}
561
562	error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
563	    UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
564	if (error == 0)
565		(void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
566		    sizeof(fs->fs_fsmnt));
567	fs->fs_flags &= ~FS_DOSOFTDEP;
568	if (fs->fs_fmod != 0) {	/* XXX */
569		int err;
570
571		fs->fs_fmod = 0;
572		if (fs->fs_clean & FS_WASCLEAN)
573			fs->fs_time = time_second;
574		else {
575			printf("%s: file system not clean (fs_clean=%#x); "
576			    "please fsck(8)\n", mp->mnt_stat.f_mntfromname,
577			    fs->fs_clean);
578			printf("%s: lost blocks %" PRId64 " files %d\n",
579			    mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
580			    fs->fs_pendinginodes);
581		}
582		err = UFS_WAPBL_BEGIN(mp);
583		if (err == 0) {
584			(void) ffs_cgupdate(ump, MNT_WAIT);
585			UFS_WAPBL_END(mp);
586		}
587	}
588	if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
589		printf("%s: `-o softdep' is no longer supported, "
590		    "consider `-o log'\n", mp->mnt_stat.f_mntfromname);
591		mp->mnt_flag &= ~MNT_SOFTDEP;
592	}
593
594	return (error);
595
596fail:
597	vrele(devvp);
598	return (error);
599}
600
601/*
602 * Reload all incore data for a filesystem (used after running fsck on
603 * the root filesystem and finding things to fix). The filesystem must
604 * be mounted read-only.
605 *
606 * Things to do to update the mount:
607 *	1) invalidate all cached meta-data.
608 *	2) re-read superblock from disk.
609 *	3) re-read summary information from disk.
610 *	4) invalidate all inactive vnodes.
611 *	5) invalidate all cached file data.
612 *	6) re-read inode data for all active vnodes.
613 */
614int
615ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
616{
617	struct vnode *vp, *mvp, *devvp;
618	struct inode *ip;
619	void *space;
620	struct buf *bp;
621	struct fs *fs, *newfs;
622	struct dkwedge_info dkw;
623	int i, bsize, blks, error;
624	int32_t *lp;
625	struct ufsmount *ump;
626	daddr_t sblockloc;
627
628	if ((mp->mnt_flag & MNT_RDONLY) == 0)
629		return (EINVAL);
630
631	ump = VFSTOUFS(mp);
632	/*
633	 * Step 1: invalidate all cached meta-data.
634	 */
635	devvp = ump->um_devvp;
636	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
637	error = vinvalbuf(devvp, 0, cred, l, 0, 0);
638	VOP_UNLOCK(devvp);
639	if (error)
640		panic("ffs_reload: dirty1");
641	/*
642	 * Step 2: re-read superblock from disk.
643	 */
644	fs = ump->um_fs;
645
646	/* XXX we don't handle possibility that superblock moved. */
647	error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, fs->fs_sbsize,
648		      NOCRED, 0, &bp);
649	if (error) {
650		brelse(bp, 0);
651		return (error);
652	}
653	newfs = kmem_alloc(fs->fs_sbsize, KM_SLEEP);
654	memcpy(newfs, bp->b_data, fs->fs_sbsize);
655#ifdef FFS_EI
656	if (ump->um_flags & UFS_NEEDSWAP) {
657		ffs_sb_swap((struct fs*)bp->b_data, newfs);
658		fs->fs_flags |= FS_SWAPPED;
659	} else
660#endif
661		fs->fs_flags &= ~FS_SWAPPED;
662	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
663	     newfs->fs_magic != FS_UFS2_MAGIC)||
664	     newfs->fs_bsize > MAXBSIZE ||
665	     newfs->fs_bsize < sizeof(struct fs)) {
666		brelse(bp, 0);
667		kmem_free(newfs, fs->fs_sbsize);
668		return (EIO);		/* XXX needs translation */
669	}
670	/* Store off old fs_sblockloc for fs_oldfscompat_read. */
671	sblockloc = fs->fs_sblockloc;
672	/*
673	 * Copy pointer fields back into superblock before copying in	XXX
674	 * new superblock. These should really be in the ufsmount.	XXX
675	 * Note that important parameters (eg fs_ncg) are unchanged.
676	 */
677	newfs->fs_csp = fs->fs_csp;
678	newfs->fs_maxcluster = fs->fs_maxcluster;
679	newfs->fs_contigdirs = fs->fs_contigdirs;
680	newfs->fs_ronly = fs->fs_ronly;
681	newfs->fs_active = fs->fs_active;
682	memcpy(fs, newfs, (u_int)fs->fs_sbsize);
683	brelse(bp, 0);
684	kmem_free(newfs, fs->fs_sbsize);
685
686	/* Recheck for apple UFS filesystem */
687	ump->um_flags &= ~UFS_ISAPPLEUFS;
688	/* First check to see if this is tagged as an Apple UFS filesystem
689	 * in the disklabel
690	 */
691	if (getdiskinfo(devvp, &dkw) == 0 &&
692	    strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0)
693		ump->um_flags |= UFS_ISAPPLEUFS;
694#ifdef APPLE_UFS
695	else {
696		/* Manually look for an apple ufs label, and if a valid one
697		 * is found, then treat it like an Apple UFS filesystem anyway
698		 *
699		 * EINVAL is most probably a blocksize or alignment problem,
700		 * it is unlikely that this is an Apple UFS filesystem then.
701		 */
702		error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE),
703			APPLEUFS_LABEL_SIZE, cred, 0, &bp);
704		if (error && error != EINVAL) {
705			brelse(bp, 0);
706			return (error);
707		}
708		if (error == 0) {
709			error = ffs_appleufs_validate(fs->fs_fsmnt,
710				(struct appleufslabel *)bp->b_data, NULL);
711			if (error == 0)
712				ump->um_flags |= UFS_ISAPPLEUFS;
713		}
714		brelse(bp, 0);
715		bp = NULL;
716	}
717#else
718	if (ump->um_flags & UFS_ISAPPLEUFS)
719		return (EIO);
720#endif
721
722	if (UFS_MPISAPPLEUFS(ump)) {
723		/* see comment about NeXT below */
724		ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
725		ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
726		mp->mnt_iflag |= IMNT_DTYPE;
727	} else {
728		ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
729		ump->um_dirblksiz = DIRBLKSIZ;
730		if (ump->um_maxsymlinklen > 0)
731			mp->mnt_iflag |= IMNT_DTYPE;
732		else
733			mp->mnt_iflag &= ~IMNT_DTYPE;
734	}
735	ffs_oldfscompat_read(fs, ump, sblockloc);
736
737	mutex_enter(&ump->um_lock);
738	ump->um_maxfilesize = fs->fs_maxfilesize;
739	if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
740		uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
741		    mp->mnt_stat.f_mntonname, fs->fs_flags,
742		    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
743		if ((mp->mnt_flag & MNT_FORCE) == 0) {
744			mutex_exit(&ump->um_lock);
745			return (EINVAL);
746		}
747	}
748	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
749		fs->fs_pendingblocks = 0;
750		fs->fs_pendinginodes = 0;
751	}
752	mutex_exit(&ump->um_lock);
753
754	ffs_statvfs(mp, &mp->mnt_stat);
755	/*
756	 * Step 3: re-read summary information from disk.
757	 */
758	blks = howmany(fs->fs_cssize, fs->fs_fsize);
759	space = fs->fs_csp;
760	for (i = 0; i < blks; i += fs->fs_frag) {
761		bsize = fs->fs_bsize;
762		if (i + fs->fs_frag > blks)
763			bsize = (blks - i) * fs->fs_fsize;
764		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), bsize,
765			      NOCRED, 0, &bp);
766		if (error) {
767			brelse(bp, 0);
768			return (error);
769		}
770#ifdef FFS_EI
771		if (UFS_FSNEEDSWAP(fs))
772			ffs_csum_swap((struct csum *)bp->b_data,
773			    (struct csum *)space, bsize);
774		else
775#endif
776			memcpy(space, bp->b_data, (size_t)bsize);
777		space = (char *)space + bsize;
778		brelse(bp, 0);
779	}
780	if (fs->fs_snapinum[0] != 0)
781		ffs_snapshot_mount(mp);
782	/*
783	 * We no longer know anything about clusters per cylinder group.
784	 */
785	if (fs->fs_contigsumsize > 0) {
786		lp = fs->fs_maxcluster;
787		for (i = 0; i < fs->fs_ncg; i++)
788			*lp++ = fs->fs_contigsumsize;
789	}
790
791	/* Allocate a marker vnode. */
792	mvp = vnalloc(mp);
793	/*
794	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
795	 * and vclean() can be called indirectly
796	 */
797	mutex_enter(&mntvnode_lock);
798 loop:
799	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
800		vmark(mvp, vp);
801		if (vp->v_mount != mp || vismarker(vp))
802			continue;
803		/*
804		 * Step 4: invalidate all inactive vnodes.
805		 */
806		if (vrecycle(vp, &mntvnode_lock, l)) {
807			mutex_enter(&mntvnode_lock);
808			(void)vunmark(mvp);
809			goto loop;
810		}
811		/*
812		 * Step 5: invalidate all cached file data.
813		 */
814		mutex_enter(vp->v_interlock);
815		mutex_exit(&mntvnode_lock);
816		if (vget(vp, LK_EXCLUSIVE)) {
817			(void)vunmark(mvp);
818			goto loop;
819		}
820		if (vinvalbuf(vp, 0, cred, l, 0, 0))
821			panic("ffs_reload: dirty2");
822		/*
823		 * Step 6: re-read inode data for all active vnodes.
824		 */
825		ip = VTOI(vp);
826		error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
827			      (int)fs->fs_bsize, NOCRED, 0, &bp);
828		if (error) {
829			brelse(bp, 0);
830			vput(vp);
831			(void)vunmark(mvp);
832			break;
833		}
834		ffs_load_inode(bp, ip, fs, ip->i_number);
835		brelse(bp, 0);
836		vput(vp);
837		mutex_enter(&mntvnode_lock);
838	}
839	mutex_exit(&mntvnode_lock);
840	vnfree(mvp);
841	return (error);
842}
843
844/*
845 * Possible superblock locations ordered from most to least likely.
846 */
847static const int sblock_try[] = SBLOCKSEARCH;
848
849/*
850 * Common code for mount and mountroot
851 */
852int
853ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
854{
855	struct ufsmount *ump;
856	struct buf *bp;
857	struct fs *fs;
858	dev_t dev;
859	struct dkwedge_info dkw;
860	void *space;
861	daddr_t sblockloc, fsblockloc;
862	int blks, fstype;
863	int error, i, bsize, ronly, bset = 0;
864#ifdef FFS_EI
865	int needswap = 0;		/* keep gcc happy */
866#endif
867	int32_t *lp;
868	kauth_cred_t cred;
869	u_int32_t sbsize = 8192;	/* keep gcc happy*/
870	u_int32_t allocsbsize;
871	int32_t fsbsize;
872
873	dev = devvp->v_rdev;
874	cred = l ? l->l_cred : NOCRED;
875
876	/* Flush out any old buffers remaining from a previous use. */
877	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
878	error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
879	VOP_UNLOCK(devvp);
880	if (error)
881		return (error);
882
883	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
884
885	bp = NULL;
886	ump = NULL;
887	fs = NULL;
888	sblockloc = 0;
889	fstype = 0;
890
891	error = fstrans_mount(mp);
892	if (error)
893		return error;
894
895	ump = kmem_zalloc(sizeof(*ump), KM_SLEEP);
896	mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
897	error = ffs_snapshot_init(ump);
898	if (error)
899		goto out;
900	ump->um_ops = &ffs_ufsops;
901
902#ifdef WAPBL
903 sbagain:
904#endif
905	/*
906	 * Try reading the superblock in each of its possible locations.
907	 */
908	for (i = 0; ; i++) {
909		if (bp != NULL) {
910			brelse(bp, BC_NOCACHE);
911			bp = NULL;
912		}
913		if (sblock_try[i] == -1) {
914			error = EINVAL;
915			fs = NULL;
916			goto out;
917		}
918		error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, cred,
919			      0, &bp);
920		if (error) {
921			fs = NULL;
922			goto out;
923		}
924		fs = (struct fs*)bp->b_data;
925		fsblockloc = sblockloc = sblock_try[i];
926		if (fs->fs_magic == FS_UFS1_MAGIC) {
927			sbsize = fs->fs_sbsize;
928			fstype = UFS1;
929			fsbsize = fs->fs_bsize;
930#ifdef FFS_EI
931			needswap = 0;
932		} else if (fs->fs_magic == FS_UFS1_MAGIC_SWAPPED) {
933			sbsize = bswap32(fs->fs_sbsize);
934			fstype = UFS1;
935			fsbsize = bswap32(fs->fs_bsize);
936			needswap = 1;
937#endif
938		} else if (fs->fs_magic == FS_UFS2_MAGIC) {
939			sbsize = fs->fs_sbsize;
940			fstype = UFS2;
941			fsbsize = fs->fs_bsize;
942#ifdef FFS_EI
943			needswap = 0;
944		} else if (fs->fs_magic == FS_UFS2_MAGIC_SWAPPED) {
945			sbsize = bswap32(fs->fs_sbsize);
946			fstype = UFS2;
947			fsbsize = bswap32(fs->fs_bsize);
948			needswap = 1;
949#endif
950		} else
951			continue;
952
953
954		/* fs->fs_sblockloc isn't defined for old filesystems */
955		if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
956			if (sblockloc == SBLOCK_UFS2)
957				/*
958				 * This is likely to be the first alternate
959				 * in a filesystem with 64k blocks.
960				 * Don't use it.
961				 */
962				continue;
963			fsblockloc = sblockloc;
964		} else {
965			fsblockloc = fs->fs_sblockloc;
966#ifdef FFS_EI
967			if (needswap)
968				fsblockloc = bswap64(fsblockloc);
969#endif
970		}
971
972		/* Check we haven't found an alternate superblock */
973		if (fsblockloc != sblockloc)
974			continue;
975
976		/* Validate size of superblock */
977		if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs))
978			continue;
979
980		/* Check that we can handle the file system blocksize */
981		if (fsbsize > MAXBSIZE) {
982			printf("ffs_mountfs: block size (%d) > MAXBSIZE (%d)\n",
983			    fsbsize, MAXBSIZE);
984			continue;
985		}
986
987		/* Ok seems to be a good superblock */
988		break;
989	}
990
991	fs = kmem_alloc((u_long)sbsize, KM_SLEEP);
992	memcpy(fs, bp->b_data, sbsize);
993	ump->um_fs = fs;
994
995#ifdef FFS_EI
996	if (needswap) {
997		ffs_sb_swap((struct fs*)bp->b_data, fs);
998		fs->fs_flags |= FS_SWAPPED;
999	} else
1000#endif
1001		fs->fs_flags &= ~FS_SWAPPED;
1002
1003#ifdef WAPBL
1004	if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) {
1005		error = ffs_wapbl_replay_start(mp, fs, devvp);
1006		if (error && (mp->mnt_flag & MNT_FORCE) == 0)
1007			goto out;
1008		if (!error) {
1009			if (!ronly) {
1010				/* XXX fsmnt may be stale. */
1011				printf("%s: replaying log to disk\n",
1012				    fs->fs_fsmnt);
1013				error = wapbl_replay_write(mp->mnt_wapbl_replay,
1014				    devvp);
1015				if (error)
1016					goto out;
1017				wapbl_replay_stop(mp->mnt_wapbl_replay);
1018				fs->fs_clean = FS_WASCLEAN;
1019			} else {
1020				/* XXX fsmnt may be stale */
1021				printf("%s: replaying log to memory\n",
1022				    fs->fs_fsmnt);
1023			}
1024
1025			/* Force a re-read of the superblock */
1026			brelse(bp, BC_INVAL);
1027			bp = NULL;
1028			kmem_free(fs, sbsize);
1029			fs = NULL;
1030			goto sbagain;
1031		}
1032	}
1033#else /* !WAPBL */
1034	if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) {
1035		error = EPERM;
1036		goto out;
1037	}
1038#endif /* !WAPBL */
1039
1040	ffs_oldfscompat_read(fs, ump, sblockloc);
1041	ump->um_maxfilesize = fs->fs_maxfilesize;
1042
1043	if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
1044		uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
1045		    mp->mnt_stat.f_mntonname, fs->fs_flags,
1046		    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1047		if ((mp->mnt_flag & MNT_FORCE) == 0) {
1048			error = EINVAL;
1049			goto out;
1050		}
1051	}
1052
1053	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1054		fs->fs_pendingblocks = 0;
1055		fs->fs_pendinginodes = 0;
1056	}
1057
1058	ump->um_fstype = fstype;
1059	if (fs->fs_sbsize < SBLOCKSIZE)
1060		brelse(bp, BC_INVAL);
1061	else
1062		brelse(bp, 0);
1063	bp = NULL;
1064
1065	/* First check to see if this is tagged as an Apple UFS filesystem
1066	 * in the disklabel
1067	 */
1068	if (getdiskinfo(devvp, &dkw) == 0 &&
1069	    strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0)
1070		ump->um_flags |= UFS_ISAPPLEUFS;
1071#ifdef APPLE_UFS
1072	else {
1073		/* Manually look for an apple ufs label, and if a valid one
1074		 * is found, then treat it like an Apple UFS filesystem anyway
1075		 */
1076		error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE),
1077			APPLEUFS_LABEL_SIZE, cred, 0, &bp);
1078		if (error)
1079			goto out;
1080		error = ffs_appleufs_validate(fs->fs_fsmnt,
1081			(struct appleufslabel *)bp->b_data, NULL);
1082		if (error == 0) {
1083			ump->um_flags |= UFS_ISAPPLEUFS;
1084		}
1085		brelse(bp, 0);
1086		bp = NULL;
1087	}
1088#else
1089	if (ump->um_flags & UFS_ISAPPLEUFS) {
1090		error = EINVAL;
1091		goto out;
1092	}
1093#endif
1094
1095#if 0
1096/*
1097 * XXX This code changes the behaviour of mounting dirty filesystems, to
1098 * XXX require "mount -f ..." to mount them.  This doesn't match what
1099 * XXX mount(8) describes and is disabled for now.
1100 */
1101	/*
1102	 * If the file system is not clean, don't allow it to be mounted
1103	 * unless MNT_FORCE is specified.  (Note: MNT_FORCE is always set
1104	 * for the root file system.)
1105	 */
1106	if (fs->fs_flags & FS_DOWAPBL) {
1107		/*
1108		 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
1109		 * bit is set, although there's a window in unmount where it
1110		 * could be FS_ISCLEAN
1111		 */
1112		if ((mp->mnt_flag & MNT_FORCE) == 0 &&
1113		    (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) {
1114			error = EPERM;
1115			goto out;
1116		}
1117	} else
1118		if ((fs->fs_clean & FS_ISCLEAN) == 0 &&
1119		    (mp->mnt_flag & MNT_FORCE) == 0) {
1120			error = EPERM;
1121			goto out;
1122		}
1123#endif
1124
1125	/*
1126	 * verify that we can access the last block in the fs
1127	 * if we're mounting read/write.
1128	 */
1129
1130	if (!ronly) {
1131		error = bread(devvp, fsbtodb(fs, fs->fs_size - 1), fs->fs_fsize,
1132		    cred, 0, &bp);
1133		if (bp->b_bcount != fs->fs_fsize)
1134			error = EINVAL;
1135		if (error) {
1136			bset = BC_INVAL;
1137			goto out;
1138		}
1139		brelse(bp, BC_INVAL);
1140		bp = NULL;
1141	}
1142
1143	fs->fs_ronly = ronly;
1144	/* Don't bump fs_clean if we're replaying journal */
1145	if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN)))
1146		if (ronly == 0) {
1147			fs->fs_clean <<= 1;
1148			fs->fs_fmod = 1;
1149		}
1150	bsize = fs->fs_cssize;
1151	blks = howmany(bsize, fs->fs_fsize);
1152	if (fs->fs_contigsumsize > 0)
1153		bsize += fs->fs_ncg * sizeof(int32_t);
1154	bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1155	allocsbsize = bsize;
1156	space = kmem_alloc((u_long)allocsbsize, KM_SLEEP);
1157	fs->fs_csp = space;
1158	for (i = 0; i < blks; i += fs->fs_frag) {
1159		bsize = fs->fs_bsize;
1160		if (i + fs->fs_frag > blks)
1161			bsize = (blks - i) * fs->fs_fsize;
1162		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), bsize,
1163			      cred, 0, &bp);
1164		if (error) {
1165			kmem_free(fs->fs_csp, allocsbsize);
1166			goto out;
1167		}
1168#ifdef FFS_EI
1169		if (needswap)
1170			ffs_csum_swap((struct csum *)bp->b_data,
1171				(struct csum *)space, bsize);
1172		else
1173#endif
1174			memcpy(space, bp->b_data, (u_int)bsize);
1175
1176		space = (char *)space + bsize;
1177		brelse(bp, 0);
1178		bp = NULL;
1179	}
1180	if (fs->fs_contigsumsize > 0) {
1181		fs->fs_maxcluster = lp = space;
1182		for (i = 0; i < fs->fs_ncg; i++)
1183			*lp++ = fs->fs_contigsumsize;
1184		space = lp;
1185	}
1186	bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1187	fs->fs_contigdirs = space;
1188	space = (char *)space + bsize;
1189	memset(fs->fs_contigdirs, 0, bsize);
1190		/* Compatibility for old filesystems - XXX */
1191	if (fs->fs_avgfilesize <= 0)
1192		fs->fs_avgfilesize = AVFILESIZ;
1193	if (fs->fs_avgfpdir <= 0)
1194		fs->fs_avgfpdir = AFPDIR;
1195	fs->fs_active = NULL;
1196	mp->mnt_data = ump;
1197	mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
1198	mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
1199	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1200	mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
1201	if (UFS_MPISAPPLEUFS(ump)) {
1202		/* NeXT used to keep short symlinks in the inode even
1203		 * when using FS_42INODEFMT.  In that case fs->fs_maxsymlinklen
1204		 * is probably -1, but we still need to be able to identify
1205		 * short symlinks.
1206		 */
1207		ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
1208		ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
1209		mp->mnt_iflag |= IMNT_DTYPE;
1210	} else {
1211		ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
1212		ump->um_dirblksiz = DIRBLKSIZ;
1213		if (ump->um_maxsymlinklen > 0)
1214			mp->mnt_iflag |= IMNT_DTYPE;
1215		else
1216			mp->mnt_iflag &= ~IMNT_DTYPE;
1217	}
1218	mp->mnt_fs_bshift = fs->fs_bshift;
1219	mp->mnt_dev_bshift = DEV_BSHIFT;	/* XXX */
1220	mp->mnt_flag |= MNT_LOCAL;
1221	mp->mnt_iflag |= IMNT_MPSAFE;
1222#ifdef FFS_EI
1223	if (needswap)
1224		ump->um_flags |= UFS_NEEDSWAP;
1225#endif
1226	ump->um_mountp = mp;
1227	ump->um_dev = dev;
1228	ump->um_devvp = devvp;
1229	ump->um_nindir = fs->fs_nindir;
1230	ump->um_lognindir = ffs(fs->fs_nindir) - 1;
1231	ump->um_bptrtodb = fs->fs_fshift - DEV_BSHIFT;
1232	ump->um_seqinc = fs->fs_frag;
1233	for (i = 0; i < MAXQUOTAS; i++)
1234		ump->um_quotas[i] = NULLVP;
1235	devvp->v_specmountpoint = mp;
1236	if (ronly == 0 && fs->fs_snapinum[0] != 0)
1237		ffs_snapshot_mount(mp);
1238#ifdef WAPBL
1239	if (!ronly) {
1240		KDASSERT(fs->fs_ronly == 0);
1241		/*
1242		 * ffs_wapbl_start() needs mp->mnt_stat initialised if it
1243		 * needs to create a new log file in-filesystem.
1244		 */
1245		ffs_statvfs(mp, &mp->mnt_stat);
1246
1247		error = ffs_wapbl_start(mp);
1248		if (error) {
1249			kmem_free(fs->fs_csp, allocsbsize);
1250			goto out;
1251		}
1252	}
1253#endif /* WAPBL */
1254	if (ronly == 0) {
1255#ifdef QUOTA2
1256		error = ffs_quota2_mount(mp);
1257		if (error) {
1258			kmem_free(fs->fs_csp, allocsbsize);
1259			goto out;
1260		}
1261#else
1262		if (fs->fs_flags & FS_DOQUOTA2) {
1263			ump->um_flags |= UFS_QUOTA2;
1264			uprintf("%s: options QUOTA2 not enabled%s\n",
1265			    mp->mnt_stat.f_mntonname,
1266			    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1267			if ((mp->mnt_flag & MNT_FORCE) == 0) {
1268				error = EINVAL;
1269				kmem_free(fs->fs_csp, allocsbsize);
1270				goto out;
1271			}
1272		}
1273#endif
1274	 }
1275#ifdef UFS_EXTATTR
1276	/*
1277	 * Initialize file-backed extended attributes on UFS1 file
1278	 * systems.
1279	 */
1280	if (ump->um_fstype == UFS1)
1281		ufs_extattr_uepm_init(&ump->um_extattr);
1282#endif /* UFS_EXTATTR */
1283
1284	return (0);
1285out:
1286#ifdef WAPBL
1287	if (mp->mnt_wapbl_replay) {
1288		wapbl_replay_stop(mp->mnt_wapbl_replay);
1289		wapbl_replay_free(mp->mnt_wapbl_replay);
1290		mp->mnt_wapbl_replay = 0;
1291	}
1292#endif
1293
1294	fstrans_unmount(mp);
1295	if (fs)
1296		kmem_free(fs, fs->fs_sbsize);
1297	devvp->v_specmountpoint = NULL;
1298	if (bp)
1299		brelse(bp, bset);
1300	if (ump) {
1301		if (ump->um_oldfscompat)
1302			kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t));
1303		mutex_destroy(&ump->um_lock);
1304		kmem_free(ump, sizeof(*ump));
1305		mp->mnt_data = NULL;
1306	}
1307	return (error);
1308}
1309
1310/*
1311 * Sanity checks for loading old filesystem superblocks.
1312 * See ffs_oldfscompat_write below for unwound actions.
1313 *
1314 * XXX - Parts get retired eventually.
1315 * Unfortunately new bits get added.
1316 */
1317static void
1318ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1319{
1320	off_t maxfilesize;
1321	int32_t *extrasave;
1322
1323	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1324	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1325		return;
1326
1327	if (!ump->um_oldfscompat)
1328		ump->um_oldfscompat = kmem_alloc(512 + 3*sizeof(int32_t),
1329		    KM_SLEEP);
1330
1331	memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1332	extrasave = ump->um_oldfscompat;
1333	extrasave += 512/sizeof(int32_t);
1334	extrasave[0] = fs->fs_old_npsect;
1335	extrasave[1] = fs->fs_old_interleave;
1336	extrasave[2] = fs->fs_old_trackskew;
1337
1338	/* These fields will be overwritten by their
1339	 * original values in fs_oldfscompat_write, so it is harmless
1340	 * to modify them here.
1341	 */
1342	fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1343	fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1344	fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1345	fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1346
1347	fs->fs_maxbsize = fs->fs_bsize;
1348	fs->fs_time = fs->fs_old_time;
1349	fs->fs_size = fs->fs_old_size;
1350	fs->fs_dsize = fs->fs_old_dsize;
1351	fs->fs_csaddr = fs->fs_old_csaddr;
1352	fs->fs_sblockloc = sblockloc;
1353
1354	fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1355
1356	if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1357		fs->fs_old_nrpos = 8;
1358		fs->fs_old_npsect = fs->fs_old_nsect;
1359		fs->fs_old_interleave = 1;
1360		fs->fs_old_trackskew = 0;
1361	}
1362
1363	if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1364		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
1365		fs->fs_qbmask = ~fs->fs_bmask;
1366		fs->fs_qfmask = ~fs->fs_fmask;
1367	}
1368
1369	maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1370	if (fs->fs_maxfilesize > maxfilesize)
1371		fs->fs_maxfilesize = maxfilesize;
1372
1373	/* Compatibility for old filesystems */
1374	if (fs->fs_avgfilesize <= 0)
1375		fs->fs_avgfilesize = AVFILESIZ;
1376	if (fs->fs_avgfpdir <= 0)
1377		fs->fs_avgfpdir = AFPDIR;
1378
1379#if 0
1380	if (bigcgs) {
1381		fs->fs_save_cgsize = fs->fs_cgsize;
1382		fs->fs_cgsize = fs->fs_bsize;
1383	}
1384#endif
1385}
1386
1387/*
1388 * Unwinding superblock updates for old filesystems.
1389 * See ffs_oldfscompat_read above for details.
1390 *
1391 * XXX - Parts get retired eventually.
1392 * Unfortunately new bits get added.
1393 */
1394static void
1395ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1396{
1397	int32_t *extrasave;
1398
1399	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1400	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1401		return;
1402
1403	fs->fs_old_time = fs->fs_time;
1404	fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1405	fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1406	fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1407	fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1408	fs->fs_old_flags = fs->fs_flags;
1409
1410#if 0
1411	if (bigcgs) {
1412		fs->fs_cgsize = fs->fs_save_cgsize;
1413	}
1414#endif
1415
1416	memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1417	extrasave = ump->um_oldfscompat;
1418	extrasave += 512/sizeof(int32_t);
1419	fs->fs_old_npsect = extrasave[0];
1420	fs->fs_old_interleave = extrasave[1];
1421	fs->fs_old_trackskew = extrasave[2];
1422
1423}
1424
1425/*
1426 * unmount vfs operation
1427 */
1428int
1429ffs_unmount(struct mount *mp, int mntflags)
1430{
1431	struct lwp *l = curlwp;
1432	struct ufsmount *ump = VFSTOUFS(mp);
1433	struct fs *fs = ump->um_fs;
1434	int error, flags;
1435	u_int32_t bsize;
1436#ifdef WAPBL
1437	extern int doforce;
1438#endif
1439
1440	flags = 0;
1441	if (mntflags & MNT_FORCE)
1442		flags |= FORCECLOSE;
1443	if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1444		return (error);
1445	error = UFS_WAPBL_BEGIN(mp);
1446	if (error == 0)
1447		if (fs->fs_ronly == 0 &&
1448		    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1449		    fs->fs_clean & FS_WASCLEAN) {
1450			fs->fs_clean = FS_ISCLEAN;
1451			fs->fs_fmod = 0;
1452			(void) ffs_sbupdate(ump, MNT_WAIT);
1453		}
1454	if (error == 0)
1455		UFS_WAPBL_END(mp);
1456#ifdef WAPBL
1457	KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl));
1458	if (mp->mnt_wapbl_replay) {
1459		KDASSERT(fs->fs_ronly);
1460		wapbl_replay_stop(mp->mnt_wapbl_replay);
1461		wapbl_replay_free(mp->mnt_wapbl_replay);
1462		mp->mnt_wapbl_replay = 0;
1463	}
1464	error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE));
1465	if (error) {
1466		return error;
1467	}
1468#endif /* WAPBL */
1469
1470	if (ump->um_devvp->v_type != VBAD)
1471		ump->um_devvp->v_specmountpoint = NULL;
1472	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1473	(void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
1474		NOCRED);
1475	vput(ump->um_devvp);
1476
1477	bsize = fs->fs_cssize;
1478	if (fs->fs_contigsumsize > 0)
1479		bsize += fs->fs_ncg * sizeof(int32_t);
1480	bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1481	kmem_free(fs->fs_csp, bsize);
1482
1483	kmem_free(fs, fs->fs_sbsize);
1484	if (ump->um_oldfscompat != NULL)
1485		kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t));
1486	mutex_destroy(&ump->um_lock);
1487	ffs_snapshot_fini(ump);
1488	kmem_free(ump, sizeof(*ump));
1489	mp->mnt_data = NULL;
1490	mp->mnt_flag &= ~MNT_LOCAL;
1491	fstrans_unmount(mp);
1492	return (0);
1493}
1494
1495/*
1496 * Flush out all the files in a filesystem.
1497 */
1498int
1499ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1500{
1501	extern int doforce;
1502	struct ufsmount *ump;
1503	int error;
1504
1505	if (!doforce)
1506		flags &= ~FORCECLOSE;
1507	ump = VFSTOUFS(mp);
1508#ifdef QUOTA
1509	if ((error = quota1_umount(mp, flags)) != 0)
1510		return (error);
1511#endif
1512#ifdef QUOTA2
1513	if ((error = quota2_umount(mp, flags)) != 0)
1514		return (error);
1515#endif
1516#ifdef UFS_EXTATTR
1517	if (ump->um_fstype == UFS1) {
1518		if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)
1519			ufs_extattr_stop(mp, l);
1520		if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED)
1521			ufs_extattr_uepm_destroy(&ump->um_extattr);
1522	}
1523#endif
1524	if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1525		return (error);
1526	ffs_snapshot_unmount(mp);
1527	/*
1528	 * Flush all the files.
1529	 */
1530	error = vflush(mp, NULLVP, flags);
1531	if (error)
1532		return (error);
1533	/*
1534	 * Flush filesystem metadata.
1535	 */
1536	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1537	error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
1538	VOP_UNLOCK(ump->um_devvp);
1539	if (flags & FORCECLOSE) /* XXXDBJ */
1540		error = 0;
1541
1542#ifdef WAPBL
1543	if (error)
1544		return error;
1545	if (mp->mnt_wapbl) {
1546		error = wapbl_flush(mp->mnt_wapbl, 1);
1547		if (flags & FORCECLOSE)
1548			error = 0;
1549	}
1550#endif
1551
1552	return (error);
1553}
1554
1555/*
1556 * Get file system statistics.
1557 */
1558int
1559ffs_statvfs(struct mount *mp, struct statvfs *sbp)
1560{
1561	struct ufsmount *ump;
1562	struct fs *fs;
1563
1564	ump = VFSTOUFS(mp);
1565	fs = ump->um_fs;
1566	mutex_enter(&ump->um_lock);
1567	sbp->f_bsize = fs->fs_bsize;
1568	sbp->f_frsize = fs->fs_fsize;
1569	sbp->f_iosize = fs->fs_bsize;
1570	sbp->f_blocks = fs->fs_dsize;
1571	sbp->f_bfree = blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1572	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1573	sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1574	    fs->fs_minfree) / (u_int64_t) 100;
1575	if (sbp->f_bfree > sbp->f_bresvd)
1576		sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1577	else
1578		sbp->f_bavail = 0;
1579	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1580	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1581	sbp->f_favail = sbp->f_ffree;
1582	sbp->f_fresvd = 0;
1583	mutex_exit(&ump->um_lock);
1584	copy_statvfs_info(sbp, mp);
1585
1586	return (0);
1587}
1588
1589/*
1590 * Go through the disk queues to initiate sandbagged IO;
1591 * go through the inodes to write those that have been modified;
1592 * initiate the writing of the super block if it has been modified.
1593 *
1594 * Note: we are always called with the filesystem marked `MPBUSY'.
1595 */
1596int
1597ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
1598{
1599	struct vnode *vp, *mvp, *nvp;
1600	struct inode *ip;
1601	struct ufsmount *ump = VFSTOUFS(mp);
1602	struct fs *fs;
1603	int error, allerror = 0;
1604	bool is_suspending;
1605
1606	fs = ump->um_fs;
1607	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1608		printf("fs = %s\n", fs->fs_fsmnt);
1609		panic("update: rofs mod");
1610	}
1611
1612	/* Allocate a marker vnode. */
1613	mvp = vnalloc(mp);
1614
1615	fstrans_start(mp, FSTRANS_SHARED);
1616	is_suspending = (fstrans_getstate(mp) == FSTRANS_SUSPENDING);
1617	/*
1618	 * Write back each (modified) inode.
1619	 */
1620	mutex_enter(&mntvnode_lock);
1621loop:
1622	/*
1623	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1624	 * and vclean() can be called indirectly
1625	 */
1626	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1627		nvp = TAILQ_NEXT(vp, v_mntvnodes);
1628		/*
1629		 * If the vnode that we are about to sync is no longer
1630		 * associated with this mount point, start over.
1631		 */
1632		if (vp->v_mount != mp)
1633			goto loop;
1634		/*
1635		 * Don't interfere with concurrent scans of this FS.
1636		 */
1637		if (vismarker(vp))
1638			continue;
1639		mutex_enter(vp->v_interlock);
1640		ip = VTOI(vp);
1641
1642		/*
1643		 * Skip the vnode/inode if inaccessible.
1644		 */
1645		if (ip == NULL || (vp->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0 ||
1646		    vp->v_type == VNON) {
1647			mutex_exit(vp->v_interlock);
1648			continue;
1649		}
1650
1651		/*
1652		 * We deliberately update inode times here.  This will
1653		 * prevent a massive queue of updates accumulating, only
1654		 * to be handled by a call to unmount.
1655		 *
1656		 * XXX It would be better to have the syncer trickle these
1657		 * out.  Adjustment needed to allow registering vnodes for
1658		 * sync when the vnode is clean, but the inode dirty.  Or
1659		 * have ufs itself trickle out inode updates.
1660		 *
1661		 * If doing a lazy sync, we don't care about metadata or
1662		 * data updates, because they are handled by each vnode's
1663		 * synclist entry.  In this case we are only interested in
1664		 * writing back modified inodes.
1665		 */
1666		if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE |
1667		    IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 &&
1668		    (waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) &&
1669		    UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) {
1670			mutex_exit(vp->v_interlock);
1671			continue;
1672		}
1673		if (vp->v_type == VBLK && is_suspending) {
1674			mutex_exit(vp->v_interlock);
1675			continue;
1676		}
1677		vmark(mvp, vp);
1678		mutex_exit(&mntvnode_lock);
1679		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT);
1680		if (error) {
1681			mutex_enter(&mntvnode_lock);
1682			nvp = vunmark(mvp);
1683			if (error == ENOENT) {
1684				goto loop;
1685			}
1686			continue;
1687		}
1688		if (waitfor == MNT_LAZY) {
1689			error = UFS_WAPBL_BEGIN(vp->v_mount);
1690			if (!error) {
1691				error = ffs_update(vp, NULL, NULL,
1692				    UPDATE_CLOSE);
1693				UFS_WAPBL_END(vp->v_mount);
1694			}
1695		} else {
1696			error = VOP_FSYNC(vp, cred, FSYNC_NOLOG |
1697			    (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0);
1698		}
1699		if (error)
1700			allerror = error;
1701		vput(vp);
1702		mutex_enter(&mntvnode_lock);
1703		nvp = vunmark(mvp);
1704	}
1705	mutex_exit(&mntvnode_lock);
1706	/*
1707	 * Force stale file system control information to be flushed.
1708	 */
1709	if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1710	    !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1711		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1712		if ((error = VOP_FSYNC(ump->um_devvp, cred,
1713		    (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG,
1714		    0, 0)) != 0)
1715			allerror = error;
1716		VOP_UNLOCK(ump->um_devvp);
1717		if (allerror == 0 && waitfor == MNT_WAIT && !mp->mnt_wapbl) {
1718			mutex_enter(&mntvnode_lock);
1719			goto loop;
1720		}
1721	}
1722#if defined(QUOTA) || defined(QUOTA2)
1723	qsync(mp);
1724#endif
1725	/*
1726	 * Write back modified superblock.
1727	 */
1728	if (fs->fs_fmod != 0) {
1729		fs->fs_fmod = 0;
1730		fs->fs_time = time_second;
1731		error = UFS_WAPBL_BEGIN(mp);
1732		if (error)
1733			allerror = error;
1734		else {
1735			if ((error = ffs_cgupdate(ump, waitfor)))
1736				allerror = error;
1737			UFS_WAPBL_END(mp);
1738		}
1739	}
1740
1741#ifdef WAPBL
1742	if (mp->mnt_wapbl) {
1743		error = wapbl_flush(mp->mnt_wapbl, 0);
1744		if (error)
1745			allerror = error;
1746	}
1747#endif
1748
1749	fstrans_done(mp);
1750	vnfree(mvp);
1751	return (allerror);
1752}
1753
1754/*
1755 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1756 * in from disk.  If it is in core, wait for the lock bit to clear, then
1757 * return the inode locked.  Detection and handling of mount points must be
1758 * done by the calling routine.
1759 */
1760int
1761ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1762{
1763	struct fs *fs;
1764	struct inode *ip;
1765	struct ufsmount *ump;
1766	struct buf *bp;
1767	struct vnode *vp;
1768	dev_t dev;
1769	int error;
1770
1771	ump = VFSTOUFS(mp);
1772	dev = ump->um_dev;
1773
1774 retry:
1775	if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1776		return (0);
1777
1778	/* Allocate a new vnode/inode. */
1779	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, NULL, &vp);
1780	if (error) {
1781		*vpp = NULL;
1782		return (error);
1783	}
1784	ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
1785
1786	/*
1787	 * If someone beat us to it, put back the freshly allocated
1788	 * vnode/inode pair and retry.
1789	 */
1790	mutex_enter(&ufs_hashlock);
1791	if (ufs_ihashget(dev, ino, 0) != NULL) {
1792		mutex_exit(&ufs_hashlock);
1793		ungetnewvnode(vp);
1794		pool_cache_put(ffs_inode_cache, ip);
1795		goto retry;
1796	}
1797
1798	vp->v_vflag |= VV_LOCKSWORK;
1799
1800	/*
1801	 * XXX MFS ends up here, too, to allocate an inode.  Should we
1802	 * XXX create another pool for MFS inodes?
1803	 */
1804
1805	memset(ip, 0, sizeof(struct inode));
1806	vp->v_data = ip;
1807	ip->i_vnode = vp;
1808	ip->i_ump = ump;
1809	ip->i_fs = fs = ump->um_fs;
1810	ip->i_dev = dev;
1811	ip->i_number = ino;
1812#if defined(QUOTA) || defined(QUOTA2)
1813	ufsquota_init(ip);
1814#endif
1815
1816	/*
1817	 * Initialize genfs node, we might proceed to destroy it in
1818	 * error branches.
1819	 */
1820	genfs_node_init(vp, &ffs_genfsops);
1821
1822	/*
1823	 * Put it onto its hash chain and lock it so that other requests for
1824	 * this inode will block if they arrive while we are sleeping waiting
1825	 * for old data structures to be purged or for the contents of the
1826	 * disk portion of this inode to be read.
1827	 */
1828
1829	ufs_ihashins(ip);
1830	mutex_exit(&ufs_hashlock);
1831
1832	/* Read in the disk contents for the inode, copy into the inode. */
1833	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1834		      (int)fs->fs_bsize, NOCRED, 0, &bp);
1835	if (error) {
1836
1837		/*
1838		 * The inode does not contain anything useful, so it would
1839		 * be misleading to leave it on its hash chain. With mode
1840		 * still zero, it will be unlinked and returned to the free
1841		 * list by vput().
1842		 */
1843
1844		vput(vp);
1845		brelse(bp, 0);
1846		*vpp = NULL;
1847		return (error);
1848	}
1849	if (ip->i_ump->um_fstype == UFS1)
1850		ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
1851		    PR_WAITOK);
1852	else
1853		ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
1854		    PR_WAITOK);
1855	ffs_load_inode(bp, ip, fs, ino);
1856	brelse(bp, 0);
1857
1858	/*
1859	 * Initialize the vnode from the inode, check for aliases.
1860	 * Note that the underlying vnode may have changed.
1861	 */
1862
1863	ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1864
1865	/*
1866	 * Finish inode initialization now that aliasing has been resolved.
1867	 */
1868
1869	ip->i_devvp = ump->um_devvp;
1870	vref(ip->i_devvp);
1871
1872	/*
1873	 * Ensure that uid and gid are correct. This is a temporary
1874	 * fix until fsck has been changed to do the update.
1875	 */
1876
1877	if (fs->fs_old_inodefmt < FS_44INODEFMT) {		/* XXX */
1878		ip->i_uid = ip->i_ffs1_ouid;			/* XXX */
1879		ip->i_gid = ip->i_ffs1_ogid;			/* XXX */
1880	}							/* XXX */
1881	uvm_vnp_setsize(vp, ip->i_size);
1882	*vpp = vp;
1883	return (0);
1884}
1885
1886/*
1887 * File handle to vnode
1888 *
1889 * Have to be really careful about stale file handles:
1890 * - check that the inode number is valid
1891 * - call ffs_vget() to get the locked inode
1892 * - check for an unallocated inode (i_mode == 0)
1893 * - check that the given client host has export rights and return
1894 *   those rights via. exflagsp and credanonp
1895 */
1896int
1897ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1898{
1899	struct ufid ufh;
1900	struct fs *fs;
1901
1902	if (fhp->fid_len != sizeof(struct ufid))
1903		return EINVAL;
1904
1905	memcpy(&ufh, fhp, sizeof(ufh));
1906	fs = VFSTOUFS(mp)->um_fs;
1907	if (ufh.ufid_ino < ROOTINO ||
1908	    ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1909		return (ESTALE);
1910	return (ufs_fhtovp(mp, &ufh, vpp));
1911}
1912
1913/*
1914 * Vnode pointer to File handle
1915 */
1916/* ARGSUSED */
1917int
1918ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
1919{
1920	struct inode *ip;
1921	struct ufid ufh;
1922
1923	if (*fh_size < sizeof(struct ufid)) {
1924		*fh_size = sizeof(struct ufid);
1925		return E2BIG;
1926	}
1927	ip = VTOI(vp);
1928	*fh_size = sizeof(struct ufid);
1929	memset(&ufh, 0, sizeof(ufh));
1930	ufh.ufid_len = sizeof(struct ufid);
1931	ufh.ufid_ino = ip->i_number;
1932	ufh.ufid_gen = ip->i_gen;
1933	memcpy(fhp, &ufh, sizeof(ufh));
1934	return (0);
1935}
1936
1937void
1938ffs_init(void)
1939{
1940	if (ffs_initcount++ > 0)
1941		return;
1942
1943	ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
1944	    "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
1945	ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
1946	    "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
1947	ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
1948	    "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
1949	ufs_init();
1950}
1951
1952void
1953ffs_reinit(void)
1954{
1955
1956	ufs_reinit();
1957}
1958
1959void
1960ffs_done(void)
1961{
1962	if (--ffs_initcount > 0)
1963		return;
1964
1965	ufs_done();
1966	pool_cache_destroy(ffs_dinode2_cache);
1967	pool_cache_destroy(ffs_dinode1_cache);
1968	pool_cache_destroy(ffs_inode_cache);
1969}
1970
1971/*
1972 * Write a superblock and associated information back to disk.
1973 */
1974int
1975ffs_sbupdate(struct ufsmount *mp, int waitfor)
1976{
1977	struct fs *fs = mp->um_fs;
1978	struct buf *bp;
1979	int error = 0;
1980	u_int32_t saveflag;
1981
1982	error = ffs_getblk(mp->um_devvp,
1983	    fs->fs_sblockloc / DEV_BSIZE, FFS_NOBLK,
1984	    fs->fs_sbsize, false, &bp);
1985	if (error)
1986		return error;
1987	saveflag = fs->fs_flags & FS_INTERNAL;
1988	fs->fs_flags &= ~FS_INTERNAL;
1989
1990	memcpy(bp->b_data, fs, fs->fs_sbsize);
1991
1992	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1993#ifdef FFS_EI
1994	if (mp->um_flags & UFS_NEEDSWAP)
1995		ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
1996#endif
1997	fs->fs_flags |= saveflag;
1998
1999	if (waitfor == MNT_WAIT)
2000		error = bwrite(bp);
2001	else
2002		bawrite(bp);
2003	return (error);
2004}
2005
2006int
2007ffs_cgupdate(struct ufsmount *mp, int waitfor)
2008{
2009	struct fs *fs = mp->um_fs;
2010	struct buf *bp;
2011	int blks;
2012	void *space;
2013	int i, size, error = 0, allerror = 0;
2014
2015	allerror = ffs_sbupdate(mp, waitfor);
2016	blks = howmany(fs->fs_cssize, fs->fs_fsize);
2017	space = fs->fs_csp;
2018	for (i = 0; i < blks; i += fs->fs_frag) {
2019		size = fs->fs_bsize;
2020		if (i + fs->fs_frag > blks)
2021			size = (blks - i) * fs->fs_fsize;
2022		error = ffs_getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
2023		    FFS_NOBLK, size, false, &bp);
2024		if (error)
2025			break;
2026#ifdef FFS_EI
2027		if (mp->um_flags & UFS_NEEDSWAP)
2028			ffs_csum_swap((struct csum*)space,
2029			    (struct csum*)bp->b_data, size);
2030		else
2031#endif
2032			memcpy(bp->b_data, space, (u_int)size);
2033		space = (char *)space + size;
2034		if (waitfor == MNT_WAIT)
2035			error = bwrite(bp);
2036		else
2037			bawrite(bp);
2038	}
2039	if (!allerror && error)
2040		allerror = error;
2041	return (allerror);
2042}
2043
2044int
2045ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
2046    int attrnamespace, const char *attrname)
2047{
2048#ifdef UFS_EXTATTR
2049	/*
2050	 * File-backed extended attributes are only supported on UFS1.
2051	 * UFS2 has native extended attributes.
2052	 */
2053	if (VFSTOUFS(mp)->um_fstype == UFS1)
2054		return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
2055#endif
2056	return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
2057}
2058
2059int
2060ffs_suspendctl(struct mount *mp, int cmd)
2061{
2062	int error;
2063	struct lwp *l = curlwp;
2064
2065	switch (cmd) {
2066	case SUSPEND_SUSPEND:
2067		if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0)
2068			return error;
2069		error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred);
2070		if (error == 0)
2071			error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
2072#ifdef WAPBL
2073		if (error == 0 && mp->mnt_wapbl)
2074			error = wapbl_flush(mp->mnt_wapbl, 1);
2075#endif
2076		if (error != 0) {
2077			(void) fstrans_setstate(mp, FSTRANS_NORMAL);
2078			return error;
2079		}
2080		return 0;
2081
2082	case SUSPEND_RESUME:
2083		return fstrans_setstate(mp, FSTRANS_NORMAL);
2084
2085	default:
2086		return EINVAL;
2087	}
2088}
2089
2090/*
2091 * Synch vnode for a mounted file system.
2092 */
2093static int
2094ffs_vfs_fsync(vnode_t *vp, int flags)
2095{
2096	int error, i, pflags;
2097#ifdef WAPBL
2098	struct mount *mp;
2099#endif
2100
2101	KASSERT(vp->v_type == VBLK);
2102	KASSERT(vp->v_specmountpoint != NULL);
2103
2104	/*
2105	 * Flush all dirty data associated with the vnode.
2106	 */
2107	pflags = PGO_ALLPAGES | PGO_CLEANIT;
2108	if ((flags & FSYNC_WAIT) != 0)
2109		pflags |= PGO_SYNCIO;
2110	mutex_enter(vp->v_interlock);
2111	error = VOP_PUTPAGES(vp, 0, 0, pflags);
2112	if (error)
2113		return error;
2114
2115#ifdef WAPBL
2116	mp = vp->v_specmountpoint;
2117	if (mp && mp->mnt_wapbl) {
2118		/*
2119		 * Don't bother writing out metadata if the syncer is
2120		 * making the request.  We will let the sync vnode
2121		 * write it out in a single burst through a call to
2122		 * VFS_SYNC().
2123		 */
2124		if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0)
2125			return 0;
2126
2127		/*
2128		 * Don't flush the log if the vnode being flushed
2129		 * contains no dirty buffers that could be in the log.
2130		 */
2131		if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2132			error = wapbl_flush(mp->mnt_wapbl, 0);
2133			if (error)
2134				return error;
2135		}
2136
2137		if ((flags & FSYNC_WAIT) != 0) {
2138			mutex_enter(vp->v_interlock);
2139			while (vp->v_numoutput)
2140				cv_wait(&vp->v_cv, vp->v_interlock);
2141			mutex_exit(vp->v_interlock);
2142		}
2143
2144		return 0;
2145	}
2146#endif /* WAPBL */
2147
2148	error = vflushbuf(vp, flags);
2149	if (error == 0 && (flags & FSYNC_CACHE) != 0) {
2150		i = 1;
2151		(void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE,
2152		    kauth_cred_get());
2153	}
2154
2155	return error;
2156}
2157