ffs_vfsops.c revision 141526
1/*-
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 141526 2005-02-08 17:40:01Z phk $");
34
35#include "opt_mac.h"
36#include "opt_quota.h"
37#include "opt_ufs.h"
38#include "opt_ffs.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/proc.h>
44#include <sys/kernel.h>
45#include <sys/mac.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54
55#include <ufs/ufs/extattr.h>
56#include <ufs/ufs/quota.h>
57#include <ufs/ufs/ufsmount.h>
58#include <ufs/ufs/inode.h>
59#include <ufs/ufs/ufs_extern.h>
60
61#include <ufs/ffs/fs.h>
62#include <ufs/ffs/ffs_extern.h>
63
64#include <vm/vm.h>
65#include <vm/uma.h>
66#include <vm/vm_page.h>
67
68#include <geom/geom.h>
69#include <geom/geom_vfs.h>
70
71uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
72
73static int	ffs_sbupdate(struct ufsmount *, int);
74static int	ffs_reload(struct mount *, struct thread *);
75static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
76static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
77		    ufs2_daddr_t);
78static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
79static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
80static vfs_init_t ffs_init;
81static vfs_uninit_t ffs_uninit;
82static vfs_extattrctl_t ffs_extattrctl;
83static vfs_cmount_t ffs_cmount;
84static vfs_mount_t ffs_mount;
85
86static struct vfsops ufs_vfsops = {
87	.vfs_extattrctl =	ffs_extattrctl,
88	.vfs_fhtovp =		ffs_fhtovp,
89	.vfs_init =		ffs_init,
90	.vfs_mount =		ffs_mount,
91	.vfs_cmount =		ffs_cmount,
92	.vfs_quotactl =		ufs_quotactl,
93	.vfs_root =		ufs_root,
94	.vfs_statfs =		ffs_statfs,
95	.vfs_sync =		ffs_sync,
96	.vfs_uninit =		ffs_uninit,
97	.vfs_unmount =		ffs_unmount,
98	.vfs_vget =		ffs_vget,
99	.vfs_vptofh =		ffs_vptofh,
100};
101
102VFS_SET(ufs_vfsops, ufs, 0);
103
104static b_strategy_t ffs_geom_strategy;
105
106static struct buf_ops ffs_ops = {
107	.bop_name =	"FFS",
108	.bop_write =	bufwrite,
109	.bop_strategy =	ffs_geom_strategy,
110	.bop_sync =	bufsync,
111};
112
113static const char *ffs_opts[] = { "from", "export", NULL };
114
115static int
116ffs_mount(struct mount *mp, struct thread *td)
117{
118	struct vnode *devvp;
119	struct ufsmount *ump = 0;
120	struct fs *fs;
121	int error, flags;
122	mode_t accessmode;
123	struct nameidata ndp;
124	struct export_args export;
125	char *fspec;
126
127	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
128		return (EINVAL);
129	if (uma_inode == NULL) {
130		uma_inode = uma_zcreate("FFS inode",
131		    sizeof(struct inode), NULL, NULL, NULL, NULL,
132		    UMA_ALIGN_PTR, 0);
133		uma_ufs1 = uma_zcreate("FFS1 dinode",
134		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
135		    UMA_ALIGN_PTR, 0);
136		uma_ufs2 = uma_zcreate("FFS2 dinode",
137		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
138		    UMA_ALIGN_PTR, 0);
139	}
140
141	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
142	if (error)
143		return (error);
144
145	/*
146	 * If updating, check whether changing from read-only to
147	 * read/write; if there is no device name, that's all we do.
148	 */
149	if (mp->mnt_flag & MNT_UPDATE) {
150		ump = VFSTOUFS(mp);
151		fs = ump->um_fs;
152		devvp = ump->um_devvp;
153		if (fs->fs_ronly == 0 &&
154		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
155			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
156				return (error);
157			/*
158			 * Flush any dirty data.
159			 */
160			if ((error = ffs_sync(mp, MNT_WAIT, td)) != 0) {
161				vn_finished_write(mp);
162				return (error);
163			}
164			/*
165			 * Check for and optionally get rid of files open
166			 * for writing.
167			 */
168			flags = WRITECLOSE;
169			if (mp->mnt_flag & MNT_FORCE)
170				flags |= FORCECLOSE;
171			if (mp->mnt_flag & MNT_SOFTDEP) {
172				error = softdep_flushfiles(mp, flags, td);
173			} else {
174				error = ffs_flushfiles(mp, flags, td);
175			}
176			if (error) {
177				vn_finished_write(mp);
178				return (error);
179			}
180			if (fs->fs_pendingblocks != 0 ||
181			    fs->fs_pendinginodes != 0) {
182				printf("%s: %s: blocks %jd files %d\n",
183				    fs->fs_fsmnt, "update error",
184				    (intmax_t)fs->fs_pendingblocks,
185				    fs->fs_pendinginodes);
186				fs->fs_pendingblocks = 0;
187				fs->fs_pendinginodes = 0;
188			}
189			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
190				fs->fs_clean = 1;
191			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
192				fs->fs_ronly = 0;
193				fs->fs_clean = 0;
194				vn_finished_write(mp);
195				return (error);
196			}
197			vn_finished_write(mp);
198			DROP_GIANT();
199			g_topology_lock();
200			g_access(ump->um_cp, 0, -1, 0);
201			g_topology_unlock();
202			PICKUP_GIANT();
203			fs->fs_ronly = 1;
204			mp->mnt_flag |= MNT_RDONLY;
205		}
206		if ((mp->mnt_flag & MNT_RELOAD) &&
207		    (error = ffs_reload(mp, td)) != 0)
208			return (error);
209		if (fs->fs_ronly &&
210		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
211			/*
212			 * If upgrade to read-write by non-root, then verify
213			 * that user has necessary permissions on the device.
214			 */
215			if (suser(td)) {
216				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
217				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
218				    td->td_ucred, td)) != 0) {
219					VOP_UNLOCK(devvp, 0, td);
220					return (error);
221				}
222				VOP_UNLOCK(devvp, 0, td);
223			}
224			fs->fs_flags &= ~FS_UNCLEAN;
225			if (fs->fs_clean == 0) {
226				fs->fs_flags |= FS_UNCLEAN;
227				if ((mp->mnt_flag & MNT_FORCE) ||
228				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
229				     (fs->fs_flags & FS_DOSOFTDEP))) {
230					printf("WARNING: %s was not %s\n",
231					   fs->fs_fsmnt, "properly dismounted");
232				} else {
233					printf(
234"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
235					    fs->fs_fsmnt);
236					return (EPERM);
237				}
238			}
239			DROP_GIANT();
240			g_topology_lock();
241			/*
242			 * If we're the root device, we may not have an E count
243			 * yet, get it now.
244			 */
245			if (ump->um_cp->ace == 0)
246				error = g_access(ump->um_cp, 0, 1, 1);
247			else
248				error = g_access(ump->um_cp, 0, 1, 0);
249			g_topology_unlock();
250			PICKUP_GIANT();
251			if (error)
252				return (error);
253			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
254				return (error);
255			fs->fs_ronly = 0;
256			mp->mnt_flag &= ~MNT_RDONLY;
257			fs->fs_clean = 0;
258			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
259				vn_finished_write(mp);
260				return (error);
261			}
262			/* check to see if we need to start softdep */
263			if ((fs->fs_flags & FS_DOSOFTDEP) &&
264			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
265				vn_finished_write(mp);
266				return (error);
267			}
268			if (fs->fs_snapinum[0] != 0)
269				ffs_snapshot_mount(mp);
270			vn_finished_write(mp);
271		}
272		/*
273		 * Soft updates is incompatible with "async",
274		 * so if we are doing softupdates stop the user
275		 * from setting the async flag in an update.
276		 * Softdep_mount() clears it in an initial mount
277		 * or ro->rw remount.
278		 */
279		if (mp->mnt_flag & MNT_SOFTDEP)
280			mp->mnt_flag &= ~MNT_ASYNC;
281		/*
282		 * Keep MNT_ACLS flag if it is stored in superblock.
283		 */
284		if ((fs->fs_flags & FS_ACLS) != 0)
285			mp->mnt_flag |= MNT_ACLS;
286		/*
287		 * If not updating name, process export requests.
288		 */
289		error = vfs_copyopt(mp->mnt_optnew, "export", &export, sizeof export);
290		if (error == 0 && export.ex_flags != 0)
291			return (vfs_export(mp, &export));
292		/*
293		 * If this is a snapshot request, take the snapshot.
294		 */
295		if (mp->mnt_flag & MNT_SNAPSHOT)
296			return (ffs_snapshot(mp, fspec));
297	}
298
299	/*
300	 * Not an update, or updating the name: look up the name
301	 * and verify that it refers to a sensible disk device.
302	 */
303	NDINIT(&ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td);
304	if ((error = namei(&ndp)) != 0)
305		return (error);
306	NDFREE(&ndp, NDF_ONLY_PNBUF);
307	devvp = ndp.ni_vp;
308	if (!vn_isdisk(devvp, &error)) {
309		vrele(devvp);
310		return (error);
311	}
312
313	/*
314	 * If mount by non-root, then verify that user has necessary
315	 * permissions on the device.
316	 */
317	if (suser(td)) {
318		accessmode = VREAD;
319		if ((mp->mnt_flag & MNT_RDONLY) == 0)
320			accessmode |= VWRITE;
321		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
322		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
323			vput(devvp);
324			return (error);
325		}
326		VOP_UNLOCK(devvp, 0, td);
327	}
328
329	if (mp->mnt_flag & MNT_UPDATE) {
330		/*
331		 * Update only
332		 *
333		 * If it's not the same vnode, or at least the same device
334		 * then it's not correct.
335		 */
336
337		if (devvp->v_rdev != ump->um_devvp->v_rdev)
338			error = EINVAL;	/* needs translation */
339		vrele(devvp);
340		if (error)
341			return (error);
342	} else {
343		/*
344		 * New mount
345		 *
346		 * We need the name for the mount point (also used for
347		 * "last mounted on") copied in. If an error occurs,
348		 * the mount point is discarded by the upper level code.
349		 * Note that vfs_mount() populates f_mntonname for us.
350		 */
351		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
352			vrele(devvp);
353			return (error);
354		}
355	}
356	vfs_mountedfrom(mp, fspec);
357	return (0);
358}
359
360/*
361 * Compatibility with old mount system call.
362 */
363
364static int
365ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
366{
367	struct ufs_args args;
368	int error;
369
370	if (data == NULL)
371		return (EINVAL);
372	error = copyin(data, &args, sizeof args);
373	if (error)
374		return (error);
375
376	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
377	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
378	error = kernel_mount(ma, flags);
379
380	return (error);
381}
382
383/*
384 * Reload all incore data for a filesystem (used after running fsck on
385 * the root filesystem and finding things to fix). The filesystem must
386 * be mounted read-only.
387 *
388 * Things to do to update the mount:
389 *	1) invalidate all cached meta-data.
390 *	2) re-read superblock from disk.
391 *	3) re-read summary information from disk.
392 *	4) invalidate all inactive vnodes.
393 *	5) invalidate all cached file data.
394 *	6) re-read inode data for all active vnodes.
395 */
396static int
397ffs_reload(struct mount *mp, struct thread *td)
398{
399	struct vnode *vp, *nvp, *devvp;
400	struct inode *ip;
401	void *space;
402	struct buf *bp;
403	struct fs *fs, *newfs;
404	struct ufsmount *ump;
405	ufs2_daddr_t sblockloc;
406	int i, blks, size, error;
407	int32_t *lp;
408
409	if ((mp->mnt_flag & MNT_RDONLY) == 0)
410		return (EINVAL);
411	ump = VFSTOUFS(mp);
412	/*
413	 * Step 1: invalidate all cached meta-data.
414	 */
415	devvp = VFSTOUFS(mp)->um_devvp;
416	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
417	if (vinvalbuf(devvp, 0, td, 0, 0) != 0)
418		panic("ffs_reload: dirty1");
419	VOP_UNLOCK(devvp, 0, td);
420
421	/*
422	 * Step 2: re-read superblock from disk.
423	 */
424	fs = VFSTOUFS(mp)->um_fs;
425	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
426	    NOCRED, &bp)) != 0)
427		return (error);
428	newfs = (struct fs *)bp->b_data;
429	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
430	     newfs->fs_magic != FS_UFS2_MAGIC) ||
431	    newfs->fs_bsize > MAXBSIZE ||
432	    newfs->fs_bsize < sizeof(struct fs)) {
433			brelse(bp);
434			return (EIO);		/* XXX needs translation */
435	}
436	/*
437	 * Copy pointer fields back into superblock before copying in	XXX
438	 * new superblock. These should really be in the ufsmount.	XXX
439	 * Note that important parameters (eg fs_ncg) are unchanged.
440	 */
441	newfs->fs_csp = fs->fs_csp;
442	newfs->fs_maxcluster = fs->fs_maxcluster;
443	newfs->fs_contigdirs = fs->fs_contigdirs;
444	newfs->fs_active = fs->fs_active;
445	/* The file system is still read-only. */
446	newfs->fs_ronly = 1;
447	sblockloc = fs->fs_sblockloc;
448	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
449	brelse(bp);
450	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
451	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
452	UFS_LOCK(ump);
453	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
454		printf("%s: reload pending error: blocks %jd files %d\n",
455		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
456		    fs->fs_pendinginodes);
457		fs->fs_pendingblocks = 0;
458		fs->fs_pendinginodes = 0;
459	}
460	UFS_UNLOCK(ump);
461
462	/*
463	 * Step 3: re-read summary information from disk.
464	 */
465	blks = howmany(fs->fs_cssize, fs->fs_fsize);
466	space = fs->fs_csp;
467	for (i = 0; i < blks; i += fs->fs_frag) {
468		size = fs->fs_bsize;
469		if (i + fs->fs_frag > blks)
470			size = (blks - i) * fs->fs_fsize;
471		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
472		    NOCRED, &bp);
473		if (error)
474			return (error);
475		bcopy(bp->b_data, space, (u_int)size);
476		space = (char *)space + size;
477		brelse(bp);
478	}
479	/*
480	 * We no longer know anything about clusters per cylinder group.
481	 */
482	if (fs->fs_contigsumsize > 0) {
483		lp = fs->fs_maxcluster;
484		for (i = 0; i < fs->fs_ncg; i++)
485			*lp++ = fs->fs_contigsumsize;
486	}
487
488loop:
489	MNT_ILOCK(mp);
490	MNT_VNODE_FOREACH(vp, mp, nvp) {
491		VI_LOCK(vp);
492		if (vp->v_iflag & VI_XLOCK) {
493			VI_UNLOCK(vp);
494			continue;
495		}
496		MNT_IUNLOCK(mp);
497		/*
498		 * Step 4: invalidate all inactive vnodes.
499		 */
500		if (vp->v_usecount == 0) {
501			vgonel(vp, td);
502			goto loop;
503		}
504		/*
505		 * Step 5: invalidate all cached file data.
506		 */
507		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
508			goto loop;
509		}
510		if (vinvalbuf(vp, 0, td, 0, 0))
511			panic("ffs_reload: dirty2");
512		/*
513		 * Step 6: re-read inode data for all active vnodes.
514		 */
515		ip = VTOI(vp);
516		error =
517		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
518		    (int)fs->fs_bsize, NOCRED, &bp);
519		if (error) {
520			VOP_UNLOCK(vp, 0, td);
521			vrele(vp);
522			return (error);
523		}
524		ffs_load_inode(bp, ip, fs, ip->i_number);
525		ip->i_effnlink = ip->i_nlink;
526		brelse(bp);
527		VOP_UNLOCK(vp, 0, td);
528		vrele(vp);
529		MNT_ILOCK(mp);
530	}
531	MNT_IUNLOCK(mp);
532	return (0);
533}
534
535/*
536 * Possible superblock locations ordered from most to least likely.
537 */
538static int sblock_try[] = SBLOCKSEARCH;
539
540/*
541 * Common code for mount and mountroot
542 */
543static int
544ffs_mountfs(devvp, mp, td)
545	struct vnode *devvp;
546	struct mount *mp;
547	struct thread *td;
548{
549	struct ufsmount *ump;
550	struct buf *bp;
551	struct fs *fs;
552	struct cdev *dev;
553	void *space;
554	ufs2_daddr_t sblockloc;
555	int error, i, blks, size, ronly;
556	int32_t *lp;
557	struct ucred *cred;
558	struct g_consumer *cp;
559
560	dev = devvp->v_rdev;
561	cred = td ? td->td_ucred : NOCRED;
562
563	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
564	DROP_GIANT();
565	g_topology_lock();
566	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
567
568	/*
569	 * If we are a root mount, drop the E flag so fsck can do its magic.
570	 * We will pick it up again when we remount R/W.
571	 */
572	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
573		error = g_access(cp, 0, 0, -1);
574	g_topology_unlock();
575	PICKUP_GIANT();
576	VOP_UNLOCK(devvp, 0, td);
577	if (error)
578		return (error);
579	if (devvp->v_rdev->si_iosize_max != 0)
580		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
581	if (mp->mnt_iosize_max > MAXPHYS)
582		mp->mnt_iosize_max = MAXPHYS;
583
584	devvp->v_bufobj.bo_private = cp;
585	devvp->v_bufobj.bo_ops = &ffs_ops;
586
587	bp = NULL;
588	ump = NULL;
589	fs = NULL;
590	sblockloc = 0;
591	/*
592	 * Try reading the superblock in each of its possible locations.
593	 */
594	for (i = 0; sblock_try[i] != -1; i++) {
595		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
596		    cred, &bp)) != 0)
597			goto out;
598		fs = (struct fs *)bp->b_data;
599		sblockloc = sblock_try[i];
600		if ((fs->fs_magic == FS_UFS1_MAGIC ||
601		     (fs->fs_magic == FS_UFS2_MAGIC &&
602		      (fs->fs_sblockloc == sblockloc ||
603		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
604		    fs->fs_bsize <= MAXBSIZE &&
605		    fs->fs_bsize >= sizeof(struct fs))
606			break;
607		brelse(bp);
608		bp = NULL;
609	}
610	if (sblock_try[i] == -1) {
611		error = EINVAL;		/* XXX needs translation */
612		goto out;
613	}
614	fs->fs_fmod = 0;
615	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
616	fs->fs_flags &= ~FS_UNCLEAN;
617	if (fs->fs_clean == 0) {
618		fs->fs_flags |= FS_UNCLEAN;
619		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
620		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
621		     (fs->fs_flags & FS_DOSOFTDEP))) {
622			printf(
623"WARNING: %s was not properly dismounted\n",
624			    fs->fs_fsmnt);
625		} else {
626			printf(
627"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
628			    fs->fs_fsmnt);
629			error = EPERM;
630			goto out;
631		}
632		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
633		    (mp->mnt_flag & MNT_FORCE)) {
634			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
635			    (intmax_t)fs->fs_pendingblocks,
636			    fs->fs_pendinginodes);
637			fs->fs_pendingblocks = 0;
638			fs->fs_pendinginodes = 0;
639		}
640	}
641	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
642		printf("%s: mount pending error: blocks %jd files %d\n",
643		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
644		    fs->fs_pendinginodes);
645		fs->fs_pendingblocks = 0;
646		fs->fs_pendinginodes = 0;
647	}
648	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
649	ump->um_cp = cp;
650	ump->um_bo = &devvp->v_bufobj;
651	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
652	if (fs->fs_magic == FS_UFS1_MAGIC) {
653		ump->um_fstype = UFS1;
654		ump->um_balloc = ffs_balloc_ufs1;
655	} else {
656		ump->um_fstype = UFS2;
657		ump->um_balloc = ffs_balloc_ufs2;
658	}
659	ump->um_blkatoff = ffs_blkatoff;
660	ump->um_truncate = ffs_truncate;
661	ump->um_update = ffs_update;
662	ump->um_valloc = ffs_valloc;
663	ump->um_vfree = ffs_vfree;
664	ump->um_ifree = ffs_ifree;
665	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
666	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
667	if (fs->fs_sbsize < SBLOCKSIZE)
668		bp->b_flags |= B_INVAL | B_NOCACHE;
669	brelse(bp);
670	bp = NULL;
671	fs = ump->um_fs;
672	ffs_oldfscompat_read(fs, ump, sblockloc);
673	fs->fs_ronly = ronly;
674	size = fs->fs_cssize;
675	blks = howmany(size, fs->fs_fsize);
676	if (fs->fs_contigsumsize > 0)
677		size += fs->fs_ncg * sizeof(int32_t);
678	size += fs->fs_ncg * sizeof(u_int8_t);
679	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
680	fs->fs_csp = space;
681	for (i = 0; i < blks; i += fs->fs_frag) {
682		size = fs->fs_bsize;
683		if (i + fs->fs_frag > blks)
684			size = (blks - i) * fs->fs_fsize;
685		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
686		    cred, &bp)) != 0) {
687			free(fs->fs_csp, M_UFSMNT);
688			goto out;
689		}
690		bcopy(bp->b_data, space, (u_int)size);
691		space = (char *)space + size;
692		brelse(bp);
693		bp = NULL;
694	}
695	if (fs->fs_contigsumsize > 0) {
696		fs->fs_maxcluster = lp = space;
697		for (i = 0; i < fs->fs_ncg; i++)
698			*lp++ = fs->fs_contigsumsize;
699		space = lp;
700	}
701	size = fs->fs_ncg * sizeof(u_int8_t);
702	fs->fs_contigdirs = (u_int8_t *)space;
703	bzero(fs->fs_contigdirs, size);
704	fs->fs_active = NULL;
705	mp->mnt_data = (qaddr_t)ump;
706	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
707	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
708	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
709	    vfs_getvfs(&mp->mnt_stat.f_fsid))
710		vfs_getnewfsid(mp);
711	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
712	mp->mnt_flag |= MNT_LOCAL;
713	if ((fs->fs_flags & FS_MULTILABEL) != 0)
714#ifdef MAC
715		mp->mnt_flag |= MNT_MULTILABEL;
716#else
717		printf(
718"WARNING: %s: multilabel flag on fs but no MAC support\n",
719		    fs->fs_fsmnt);
720#endif
721	if ((fs->fs_flags & FS_ACLS) != 0)
722#ifdef UFS_ACL
723		mp->mnt_flag |= MNT_ACLS;
724#else
725		printf(
726"WARNING: %s: ACLs flag on fs but no ACLs support\n",
727		    fs->fs_fsmnt);
728#endif
729	ump->um_mountp = mp;
730	ump->um_dev = dev;
731	ump->um_devvp = devvp;
732	ump->um_nindir = fs->fs_nindir;
733	ump->um_bptrtodb = fs->fs_fsbtodb;
734	ump->um_seqinc = fs->fs_frag;
735	for (i = 0; i < MAXQUOTAS; i++)
736		ump->um_quotas[i] = NULLVP;
737#ifdef UFS_EXTATTR
738	ufs_extattr_uepm_init(&ump->um_extattr);
739#endif
740	/*
741	 * Set FS local "last mounted on" information (NULL pad)
742	 */
743	vfs_mountedfrom(mp, fs->fs_fsmnt);
744
745	if( mp->mnt_flag & MNT_ROOTFS) {
746		/*
747		 * Root mount; update timestamp in mount structure.
748		 * this will be used by the common root mount code
749		 * to update the system clock.
750		 */
751		mp->mnt_time = fs->fs_time;
752	}
753
754	if (ronly == 0) {
755		if ((fs->fs_flags & FS_DOSOFTDEP) &&
756		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
757			free(fs->fs_csp, M_UFSMNT);
758			goto out;
759		}
760		if (fs->fs_snapinum[0] != 0)
761			ffs_snapshot_mount(mp);
762		fs->fs_fmod = 1;
763		fs->fs_clean = 0;
764		(void) ffs_sbupdate(ump, MNT_WAIT);
765	}
766	/*
767	 * Initialize filesystem stat information in mount struct.
768	 */
769#ifdef UFS_EXTATTR
770#ifdef UFS_EXTATTR_AUTOSTART
771	/*
772	 *
773	 * Auto-starting does the following:
774	 *	- check for /.attribute in the fs, and extattr_start if so
775	 *	- for each file in .attribute, enable that file with
776	 * 	  an attribute of the same name.
777	 * Not clear how to report errors -- probably eat them.
778	 * This would all happen while the filesystem was busy/not
779	 * available, so would effectively be "atomic".
780	 */
781	(void) ufs_extattr_autostart(mp, td);
782#endif /* !UFS_EXTATTR_AUTOSTART */
783#endif /* !UFS_EXTATTR */
784#ifndef QUOTA
785	mp->mnt_kern_flag |= MNTK_MPSAFE;
786#endif
787	return (0);
788out:
789	if (bp)
790		brelse(bp);
791	vinvalbuf(devvp, V_SAVE, td, 0, 0);
792	if (cp != NULL) {
793		DROP_GIANT();
794		g_topology_lock();
795		g_vfs_close(cp, td);
796		g_topology_unlock();
797		PICKUP_GIANT();
798	}
799	if (ump) {
800		mtx_destroy(UFS_MTX(ump));
801		free(ump->um_fs, M_UFSMNT);
802		free(ump, M_UFSMNT);
803		mp->mnt_data = (qaddr_t)0;
804	}
805	return (error);
806}
807
808#include <sys/sysctl.h>
809int bigcgs = 0;
810SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
811
812/*
813 * Sanity checks for loading old filesystem superblocks.
814 * See ffs_oldfscompat_write below for unwound actions.
815 *
816 * XXX - Parts get retired eventually.
817 * Unfortunately new bits get added.
818 */
819static void
820ffs_oldfscompat_read(fs, ump, sblockloc)
821	struct fs *fs;
822	struct ufsmount *ump;
823	ufs2_daddr_t sblockloc;
824{
825	off_t maxfilesize;
826
827	/*
828	 * If not yet done, update fs_flags location and value of fs_sblockloc.
829	 */
830	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
831		fs->fs_flags = fs->fs_old_flags;
832		fs->fs_old_flags |= FS_FLAGS_UPDATED;
833		fs->fs_sblockloc = sblockloc;
834	}
835	/*
836	 * If not yet done, update UFS1 superblock with new wider fields.
837	 */
838	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
839		fs->fs_maxbsize = fs->fs_bsize;
840		fs->fs_time = fs->fs_old_time;
841		fs->fs_size = fs->fs_old_size;
842		fs->fs_dsize = fs->fs_old_dsize;
843		fs->fs_csaddr = fs->fs_old_csaddr;
844		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
845		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
846		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
847		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
848	}
849	if (fs->fs_magic == FS_UFS1_MAGIC &&
850	    fs->fs_old_inodefmt < FS_44INODEFMT) {
851		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
852		fs->fs_qbmask = ~fs->fs_bmask;
853		fs->fs_qfmask = ~fs->fs_fmask;
854	}
855	if (fs->fs_magic == FS_UFS1_MAGIC) {
856		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
857		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
858		if (fs->fs_maxfilesize > maxfilesize)
859			fs->fs_maxfilesize = maxfilesize;
860	}
861	/* Compatibility for old filesystems */
862	if (fs->fs_avgfilesize <= 0)
863		fs->fs_avgfilesize = AVFILESIZ;
864	if (fs->fs_avgfpdir <= 0)
865		fs->fs_avgfpdir = AFPDIR;
866	if (bigcgs) {
867		fs->fs_save_cgsize = fs->fs_cgsize;
868		fs->fs_cgsize = fs->fs_bsize;
869	}
870}
871
872/*
873 * Unwinding superblock updates for old filesystems.
874 * See ffs_oldfscompat_read above for details.
875 *
876 * XXX - Parts get retired eventually.
877 * Unfortunately new bits get added.
878 */
879static void
880ffs_oldfscompat_write(fs, ump)
881	struct fs *fs;
882	struct ufsmount *ump;
883{
884
885	/*
886	 * Copy back UFS2 updated fields that UFS1 inspects.
887	 */
888	if (fs->fs_magic == FS_UFS1_MAGIC) {
889		fs->fs_old_time = fs->fs_time;
890		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
891		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
892		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
893		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
894		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
895	}
896	if (bigcgs) {
897		fs->fs_cgsize = fs->fs_save_cgsize;
898		fs->fs_save_cgsize = 0;
899	}
900}
901
902/*
903 * unmount system call
904 */
905int
906ffs_unmount(mp, mntflags, td)
907	struct mount *mp;
908	int mntflags;
909	struct thread *td;
910{
911	struct ufsmount *ump = VFSTOUFS(mp);
912	struct fs *fs;
913	int error, flags;
914
915	flags = 0;
916	if (mntflags & MNT_FORCE) {
917		flags |= FORCECLOSE;
918	}
919#ifdef UFS_EXTATTR
920	if ((error = ufs_extattr_stop(mp, td))) {
921		if (error != EOPNOTSUPP)
922			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
923			    error);
924	} else {
925		ufs_extattr_uepm_destroy(&ump->um_extattr);
926	}
927#endif
928	if (mp->mnt_flag & MNT_SOFTDEP) {
929		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
930			return (error);
931	} else {
932		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
933			return (error);
934	}
935	fs = ump->um_fs;
936	UFS_LOCK(ump);
937	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
938		printf("%s: unmount pending error: blocks %jd files %d\n",
939		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
940		    fs->fs_pendinginodes);
941		fs->fs_pendingblocks = 0;
942		fs->fs_pendinginodes = 0;
943	}
944	UFS_UNLOCK(ump);
945	if (fs->fs_ronly == 0) {
946		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
947		error = ffs_sbupdate(ump, MNT_WAIT);
948		if (error) {
949			fs->fs_clean = 0;
950			return (error);
951		}
952	}
953	vinvalbuf(ump->um_devvp, V_SAVE, td, 0, 0);
954	DROP_GIANT();
955	g_topology_lock();
956	g_vfs_close(ump->um_cp, td);
957	g_topology_unlock();
958	PICKUP_GIANT();
959	vrele(ump->um_devvp);
960	mtx_destroy(UFS_MTX(ump));
961	free(fs->fs_csp, M_UFSMNT);
962	free(fs, M_UFSMNT);
963	free(ump, M_UFSMNT);
964	mp->mnt_data = (qaddr_t)0;
965	mp->mnt_flag &= ~MNT_LOCAL;
966	return (error);
967}
968
969/*
970 * Flush out all the files in a filesystem.
971 */
972int
973ffs_flushfiles(mp, flags, td)
974	struct mount *mp;
975	int flags;
976	struct thread *td;
977{
978	struct ufsmount *ump;
979	int error;
980
981	ump = VFSTOUFS(mp);
982#ifdef QUOTA
983	if (mp->mnt_flag & MNT_QUOTA) {
984		int i;
985		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
986		if (error)
987			return (error);
988		for (i = 0; i < MAXQUOTAS; i++) {
989			if (ump->um_quotas[i] == NULLVP)
990				continue;
991			quotaoff(td, mp, i);
992		}
993		/*
994		 * Here we fall through to vflush again to ensure
995		 * that we have gotten rid of all the system vnodes.
996		 */
997	}
998#endif
999	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1000	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1001		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1002			return (error);
1003		ffs_snapshot_unmount(mp);
1004		/*
1005		 * Here we fall through to vflush again to ensure
1006		 * that we have gotten rid of all the system vnodes.
1007		 */
1008	}
1009        /*
1010	 * Flush all the files.
1011	 */
1012	if ((error = vflush(mp, 0, flags, td)) != 0)
1013		return (error);
1014	/*
1015	 * Flush filesystem metadata.
1016	 */
1017	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1018	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1019	VOP_UNLOCK(ump->um_devvp, 0, td);
1020	return (error);
1021}
1022
1023/*
1024 * Get filesystem statistics.
1025 */
1026int
1027ffs_statfs(mp, sbp, td)
1028	struct mount *mp;
1029	struct statfs *sbp;
1030	struct thread *td;
1031{
1032	struct ufsmount *ump;
1033	struct fs *fs;
1034
1035	ump = VFSTOUFS(mp);
1036	fs = ump->um_fs;
1037	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1038		panic("ffs_statfs");
1039	sbp->f_version = STATFS_VERSION;
1040	sbp->f_bsize = fs->fs_fsize;
1041	sbp->f_iosize = fs->fs_bsize;
1042	sbp->f_blocks = fs->fs_dsize;
1043	UFS_LOCK(ump);
1044	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1045	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1046	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1047	    dbtofsb(fs, fs->fs_pendingblocks);
1048	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1049	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1050	UFS_UNLOCK(ump);
1051	sbp->f_namemax = NAME_MAX;
1052	return (0);
1053}
1054
1055/*
1056 * Go through the disk queues to initiate sandbagged IO;
1057 * go through the inodes to write those that have been modified;
1058 * initiate the writing of the super block if it has been modified.
1059 *
1060 * Note: we are always called with the filesystem marked `MPBUSY'.
1061 */
1062int
1063ffs_sync(mp, waitfor, td)
1064	struct mount *mp;
1065	int waitfor;
1066	struct thread *td;
1067{
1068	struct vnode *nvp, *vp, *devvp;
1069	struct inode *ip;
1070	struct ufsmount *ump = VFSTOUFS(mp);
1071	struct fs *fs;
1072	int error, count, wait, lockreq, allerror = 0;
1073	struct bufobj *bo;
1074
1075	fs = ump->um_fs;
1076	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1077		printf("fs = %s\n", fs->fs_fsmnt);
1078		panic("ffs_sync: rofs mod");
1079	}
1080	/*
1081	 * Write back each (modified) inode.
1082	 */
1083	wait = 0;
1084	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1085	if (waitfor == MNT_WAIT) {
1086		wait = 1;
1087		lockreq = LK_EXCLUSIVE;
1088	}
1089	lockreq |= LK_INTERLOCK;
1090	MNT_ILOCK(mp);
1091loop:
1092	MNT_VNODE_FOREACH(vp, mp, nvp) {
1093		/*
1094		 * Depend on the mntvnode_slock to keep things stable enough
1095		 * for a quick test.  Since there might be hundreds of
1096		 * thousands of vnodes, we cannot afford even a subroutine
1097		 * call unless there's a good chance that we have work to do.
1098		 */
1099		VI_LOCK(vp);
1100		if (vp->v_iflag & VI_XLOCK) {
1101			VI_UNLOCK(vp);
1102			continue;
1103		}
1104		ip = VTOI(vp);
1105		if (vp->v_type == VNON || ((ip->i_flag &
1106		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1107		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1108			VI_UNLOCK(vp);
1109			continue;
1110		}
1111		MNT_IUNLOCK(mp);
1112		if ((error = vget(vp, lockreq, td)) != 0) {
1113			MNT_ILOCK(mp);
1114			if (error == ENOENT)
1115				goto loop;
1116			continue;
1117		}
1118		if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1119			allerror = error;
1120		VOP_UNLOCK(vp, 0, td);
1121		vrele(vp);
1122		MNT_ILOCK(mp);
1123	}
1124	MNT_IUNLOCK(mp);
1125	/*
1126	 * Force stale filesystem control information to be flushed.
1127	 */
1128	if (waitfor == MNT_WAIT) {
1129		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1130			allerror = error;
1131		/* Flushed work items may create new vnodes to clean */
1132		if (allerror == 0 && count) {
1133			MNT_ILOCK(mp);
1134			goto loop;
1135		}
1136	}
1137#ifdef QUOTA
1138	qsync(mp);
1139#endif
1140	devvp = ump->um_devvp;
1141	VI_LOCK(devvp);
1142	bo = &devvp->v_bufobj;
1143	if (waitfor != MNT_LAZY &&
1144	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1145		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1146		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1147			allerror = error;
1148		VOP_UNLOCK(devvp, 0, td);
1149		if (allerror == 0 && waitfor == MNT_WAIT) {
1150			MNT_ILOCK(mp);
1151			goto loop;
1152		}
1153	} else
1154		VI_UNLOCK(devvp);
1155	/*
1156	 * Write back modified superblock.
1157	 */
1158	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1159		allerror = error;
1160	return (allerror);
1161}
1162
1163int
1164ffs_vget(mp, ino, flags, vpp)
1165	struct mount *mp;
1166	ino_t ino;
1167	int flags;
1168	struct vnode **vpp;
1169{
1170	struct thread *td = curthread; 		/* XXX */
1171	struct fs *fs;
1172	struct inode *ip;
1173	struct ufsmount *ump;
1174	struct buf *bp;
1175	struct vnode *vp;
1176	struct cdev *dev;
1177	int error;
1178
1179	ump = VFSTOUFS(mp);
1180	dev = ump->um_dev;
1181
1182	/*
1183	 * We do not lock vnode creation as it is believed to be too
1184	 * expensive for such rare case as simultaneous creation of vnode
1185	 * for same ino by different processes. We just allow them to race
1186	 * and check later to decide who wins. Let the race begin!
1187	 */
1188	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1189		return (error);
1190	if (*vpp != NULL)
1191		return (0);
1192
1193	/*
1194	 * If this MALLOC() is performed after the getnewvnode()
1195	 * it might block, leaving a vnode with a NULL v_data to be
1196	 * found by ffs_sync() if a sync happens to fire right then,
1197	 * which will cause a panic because ffs_sync() blindly
1198	 * dereferences vp->v_data (as well it should).
1199	 */
1200	ip = uma_zalloc(uma_inode, M_WAITOK);
1201
1202	/* Allocate a new vnode/inode. */
1203	error = getnewvnode("ufs", mp, &ffs_vnodeops, &vp);
1204	if (error) {
1205		*vpp = NULL;
1206		uma_zfree(uma_inode, ip);
1207		return (error);
1208	}
1209	bzero((caddr_t)ip, sizeof(struct inode));
1210	/*
1211	 * FFS supports recursive locking.
1212	 */
1213	fs = ump->um_fs;
1214	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1215	vp->v_data = ip;
1216	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1217	ip->i_vnode = vp;
1218	ip->i_ump = ump;
1219	ip->i_fs = fs;
1220	ip->i_dev = dev;
1221	ip->i_number = ino;
1222#ifdef QUOTA
1223	{
1224		int i;
1225		for (i = 0; i < MAXQUOTAS; i++)
1226			ip->i_dquot[i] = NODQUOT;
1227	}
1228#endif
1229	/*
1230	 * Exclusively lock the vnode before adding to hash. Note, that we
1231	 * must not release nor downgrade the lock (despite flags argument
1232	 * says) till it is fully initialized.
1233	 */
1234	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1235
1236	/*
1237	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1238	 * duplicate of vnode being created and add it to the hash. If a
1239	 * duplicate vnode was found, it will be vget()ed from hash for us.
1240	 */
1241	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1242		vput(vp);
1243		*vpp = NULL;
1244		return (error);
1245	}
1246
1247	/* We lost the race, then throw away our vnode and return existing */
1248	if (*vpp != NULL) {
1249		vput(vp);
1250		return (0);
1251	}
1252
1253	/* Read in the disk contents for the inode, copy into the inode. */
1254	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1255	    (int)fs->fs_bsize, NOCRED, &bp);
1256	if (error) {
1257		/*
1258		 * The inode does not contain anything useful, so it would
1259		 * be misleading to leave it on its hash chain. With mode
1260		 * still zero, it will be unlinked and returned to the free
1261		 * list by vput().
1262		 */
1263		brelse(bp);
1264		vput(vp);
1265		*vpp = NULL;
1266		return (error);
1267	}
1268	if (ip->i_ump->um_fstype == UFS1)
1269		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1270	else
1271		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1272	ffs_load_inode(bp, ip, fs, ino);
1273	if (DOINGSOFTDEP(vp))
1274		softdep_load_inodeblock(ip);
1275	else
1276		ip->i_effnlink = ip->i_nlink;
1277	bqrelse(bp);
1278
1279	/*
1280	 * Initialize the vnode from the inode, check for aliases.
1281	 * Note that the underlying vnode may have changed.
1282	 */
1283	error = ufs_vinit(mp, &ffs_fifoops, &vp);
1284	if (error) {
1285		vput(vp);
1286		*vpp = NULL;
1287		return (error);
1288	}
1289	/*
1290	 * Finish inode initialization.
1291	 */
1292	VREF(ip->i_devvp);
1293	/*
1294	 * Set up a generation number for this inode if it does not
1295	 * already have one. This should only happen on old filesystems.
1296	 */
1297	if (ip->i_gen == 0) {
1298		ip->i_gen = arc4random() / 2 + 1;
1299		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1300			ip->i_flag |= IN_MODIFIED;
1301			DIP_SET(ip, i_gen, ip->i_gen);
1302		}
1303	}
1304	/*
1305	 * Ensure that uid and gid are correct. This is a temporary
1306	 * fix until fsck has been changed to do the update.
1307	 */
1308	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1309	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1310		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1311		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1312	}						/* XXX */
1313
1314#ifdef MAC
1315	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1316		/*
1317		 * If this vnode is already allocated, and we're running
1318		 * multi-label, attempt to perform a label association
1319		 * from the extended attributes on the inode.
1320		 */
1321		error = mac_associate_vnode_extattr(mp, vp);
1322		if (error) {
1323			/* ufs_inactive will release ip->i_devvp ref. */
1324			vput(vp);
1325			*vpp = NULL;
1326			return (error);
1327		}
1328	}
1329#endif
1330
1331	*vpp = vp;
1332	return (0);
1333}
1334
1335/*
1336 * File handle to vnode
1337 *
1338 * Have to be really careful about stale file handles:
1339 * - check that the inode number is valid
1340 * - call ffs_vget() to get the locked inode
1341 * - check for an unallocated inode (i_mode == 0)
1342 * - check that the given client host has export rights and return
1343 *   those rights via. exflagsp and credanonp
1344 */
1345int
1346ffs_fhtovp(mp, fhp, vpp)
1347	struct mount *mp;
1348	struct fid *fhp;
1349	struct vnode **vpp;
1350{
1351	struct ufid *ufhp;
1352	struct fs *fs;
1353
1354	ufhp = (struct ufid *)fhp;
1355	fs = VFSTOUFS(mp)->um_fs;
1356	if (ufhp->ufid_ino < ROOTINO ||
1357	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1358		return (ESTALE);
1359	return (ufs_fhtovp(mp, ufhp, vpp));
1360}
1361
1362/*
1363 * Vnode pointer to File handle
1364 */
1365/* ARGSUSED */
1366int
1367ffs_vptofh(vp, fhp)
1368	struct vnode *vp;
1369	struct fid *fhp;
1370{
1371	struct inode *ip;
1372	struct ufid *ufhp;
1373
1374	ip = VTOI(vp);
1375	ufhp = (struct ufid *)fhp;
1376	ufhp->ufid_len = sizeof(struct ufid);
1377	ufhp->ufid_ino = ip->i_number;
1378	ufhp->ufid_gen = ip->i_gen;
1379	return (0);
1380}
1381
1382/*
1383 * Initialize the filesystem.
1384 */
1385static int
1386ffs_init(vfsp)
1387	struct vfsconf *vfsp;
1388{
1389
1390	softdep_initialize();
1391	return (ufs_init(vfsp));
1392}
1393
1394/*
1395 * Undo the work of ffs_init().
1396 */
1397static int
1398ffs_uninit(vfsp)
1399	struct vfsconf *vfsp;
1400{
1401	int ret;
1402
1403	ret = ufs_uninit(vfsp);
1404	softdep_uninitialize();
1405	return (ret);
1406}
1407
1408/*
1409 * Write a superblock and associated information back to disk.
1410 */
1411static int
1412ffs_sbupdate(mp, waitfor)
1413	struct ufsmount *mp;
1414	int waitfor;
1415{
1416	struct fs *fs = mp->um_fs;
1417	struct buf *sbbp;
1418	struct buf *bp;
1419	int blks;
1420	void *space;
1421	int i, size, error, allerror = 0;
1422
1423	if (fs->fs_ronly == 1 &&
1424	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1425	    (MNT_RDONLY | MNT_UPDATE))
1426		panic("ffs_sbupdate: write read-only filesystem");
1427	/*
1428	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1429	 */
1430	sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1431	    0, 0, 0);
1432	/*
1433	 * First write back the summary information.
1434	 */
1435	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1436	space = fs->fs_csp;
1437	for (i = 0; i < blks; i += fs->fs_frag) {
1438		size = fs->fs_bsize;
1439		if (i + fs->fs_frag > blks)
1440			size = (blks - i) * fs->fs_fsize;
1441		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1442		    size, 0, 0, 0);
1443		bcopy(space, bp->b_data, (u_int)size);
1444		space = (char *)space + size;
1445		if (waitfor != MNT_WAIT)
1446			bawrite(bp);
1447		else if ((error = bwrite(bp)) != 0)
1448			allerror = error;
1449	}
1450	/*
1451	 * Now write back the superblock itself. If any errors occurred
1452	 * up to this point, then fail so that the superblock avoids
1453	 * being written out as clean.
1454	 */
1455	if (allerror) {
1456		brelse(sbbp);
1457		return (allerror);
1458	}
1459	bp = sbbp;
1460	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1461	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1462		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1463		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1464		fs->fs_sblockloc = SBLOCK_UFS1;
1465	}
1466	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1467	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1468		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1469		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1470		fs->fs_sblockloc = SBLOCK_UFS2;
1471	}
1472	fs->fs_fmod = 0;
1473	fs->fs_time = time_second;
1474	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1475	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1476	if (waitfor != MNT_WAIT)
1477		bawrite(bp);
1478	else if ((error = bwrite(bp)) != 0)
1479		allerror = error;
1480	return (allerror);
1481}
1482
1483static int
1484ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1485	int attrnamespace, const char *attrname, struct thread *td)
1486{
1487
1488#ifdef UFS_EXTATTR
1489	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1490	    attrname, td));
1491#else
1492	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1493	    attrname, td));
1494#endif
1495}
1496
1497static void
1498ffs_ifree(struct ufsmount *ump, struct inode *ip)
1499{
1500
1501	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1502		uma_zfree(uma_ufs1, ip->i_din1);
1503	else if (ip->i_din2 != NULL)
1504		uma_zfree(uma_ufs2, ip->i_din2);
1505	uma_zfree(uma_inode, ip);
1506}
1507
1508static void
1509ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1510{
1511
1512#ifdef SOFTUPDATES
1513	if (bp->b_iocmd == BIO_WRITE && softdep_disk_prewrite(bp))
1514		return;
1515#endif
1516	g_vfs_strategy(bo, bp);
1517}
1518