ffs_vfsops.c revision 138509
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 138509 2004-12-07 08:15:41Z phk $");
34
35#include "opt_mac.h"
36#include "opt_quota.h"
37#include "opt_ufs.h"
38#include "opt_ffs.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/proc.h>
44#include <sys/kernel.h>
45#include <sys/mac.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/disk.h>
53#include <sys/malloc.h>
54#include <sys/mutex.h>
55
56#include <ufs/ufs/extattr.h>
57#include <ufs/ufs/quota.h>
58#include <ufs/ufs/ufsmount.h>
59#include <ufs/ufs/inode.h>
60#include <ufs/ufs/ufs_extern.h>
61
62#include <ufs/ffs/fs.h>
63#include <ufs/ffs/ffs_extern.h>
64
65#include <vm/vm.h>
66#include <vm/uma.h>
67#include <vm/vm_page.h>
68
69#include <geom/geom.h>
70#include <geom/geom_vfs.h>
71
72uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
73
74static int	ffs_sbupdate(struct ufsmount *, int);
75static int	ffs_reload(struct mount *, struct thread *);
76static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
77static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
78		    ufs2_daddr_t);
79static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
80static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
81static vfs_init_t ffs_init;
82static vfs_uninit_t ffs_uninit;
83static vfs_extattrctl_t ffs_extattrctl;
84static vfs_cmount_t ffs_cmount;
85static vfs_mount_t ffs_mount;
86
87static struct vfsops ufs_vfsops = {
88	.vfs_extattrctl =	ffs_extattrctl,
89	.vfs_fhtovp =		ffs_fhtovp,
90	.vfs_init =		ffs_init,
91	.vfs_mount =		ffs_mount,
92	.vfs_cmount =		ffs_cmount,
93	.vfs_quotactl =		ufs_quotactl,
94	.vfs_root =		ufs_root,
95	.vfs_statfs =		ffs_statfs,
96	.vfs_sync =		ffs_sync,
97	.vfs_uninit =		ffs_uninit,
98	.vfs_unmount =		ffs_unmount,
99	.vfs_vget =		ffs_vget,
100	.vfs_vptofh =		ffs_vptofh,
101};
102
103VFS_SET(ufs_vfsops, ufs, 0);
104
105static b_strategy_t ffs_geom_strategy;
106
107static struct buf_ops ffs_ops = {
108	.bop_name =	"FFS",
109	.bop_write =	bufwrite,
110	.bop_strategy =	ffs_geom_strategy,
111};
112
113static const char *ffs_opts[] = { "from", "export", NULL };
114
115static int
116ffs_mount(struct mount *mp, struct thread *td)
117{
118	struct vnode *devvp;
119	struct ufsmount *ump = 0;
120	struct fs *fs;
121	int error, flags;
122	mode_t accessmode;
123	struct nameidata ndp;
124	struct export_args *export;
125	char *fspec;
126	int len;
127
128	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
129		return (EINVAL);
130	if (uma_inode == NULL) {
131		uma_inode = uma_zcreate("FFS inode",
132		    sizeof(struct inode), NULL, NULL, NULL, NULL,
133		    UMA_ALIGN_PTR, 0);
134		uma_ufs1 = uma_zcreate("FFS1 dinode",
135		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
136		    UMA_ALIGN_PTR, 0);
137		uma_ufs2 = uma_zcreate("FFS2 dinode",
138		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
139		    UMA_ALIGN_PTR, 0);
140	}
141
142	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
143	if (error)
144		return (error);
145
146	/*
147	 * If updating, check whether changing from read-only to
148	 * read/write; if there is no device name, that's all we do.
149	 */
150	if (mp->mnt_flag & MNT_UPDATE) {
151		ump = VFSTOUFS(mp);
152		fs = ump->um_fs;
153		devvp = ump->um_devvp;
154		if (fs->fs_ronly == 0 &&
155		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
156			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
157				return (error);
158			/*
159			 * Flush any dirty data.
160			 */
161			if ((error = VFS_SYNC(mp, MNT_WAIT,
162			    td->td_ucred, td)) != 0) {
163				vn_finished_write(mp);
164				return (error);
165			}
166			/*
167			 * Check for and optionally get rid of files open
168			 * for writing.
169			 */
170			flags = WRITECLOSE;
171			if (mp->mnt_flag & MNT_FORCE)
172				flags |= FORCECLOSE;
173			if (mp->mnt_flag & MNT_SOFTDEP) {
174				error = softdep_flushfiles(mp, flags, td);
175			} else {
176				error = ffs_flushfiles(mp, flags, td);
177			}
178			if (error) {
179				vn_finished_write(mp);
180				return (error);
181			}
182			if (fs->fs_pendingblocks != 0 ||
183			    fs->fs_pendinginodes != 0) {
184				printf("%s: %s: blocks %jd files %d\n",
185				    fs->fs_fsmnt, "update error",
186				    (intmax_t)fs->fs_pendingblocks,
187				    fs->fs_pendinginodes);
188				fs->fs_pendingblocks = 0;
189				fs->fs_pendinginodes = 0;
190			}
191			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
192				fs->fs_clean = 1;
193			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
194				fs->fs_ronly = 0;
195				fs->fs_clean = 0;
196				vn_finished_write(mp);
197				return (error);
198			}
199			vn_finished_write(mp);
200			DROP_GIANT();
201			g_topology_lock();
202			g_access(ump->um_cp, 0, -1, 0);
203			g_topology_unlock();
204			PICKUP_GIANT();
205			fs->fs_ronly = 1;
206			mp->mnt_flag |= MNT_RDONLY;
207		}
208		if ((mp->mnt_flag & MNT_RELOAD) &&
209		    (error = ffs_reload(mp, td)) != 0)
210			return (error);
211		if (fs->fs_ronly &&
212		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
213			/*
214			 * If upgrade to read-write by non-root, then verify
215			 * that user has necessary permissions on the device.
216			 */
217			if (suser(td)) {
218				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
219				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
220				    td->td_ucred, td)) != 0) {
221					VOP_UNLOCK(devvp, 0, td);
222					return (error);
223				}
224				VOP_UNLOCK(devvp, 0, td);
225			}
226			fs->fs_flags &= ~FS_UNCLEAN;
227			if (fs->fs_clean == 0) {
228				fs->fs_flags |= FS_UNCLEAN;
229				if ((mp->mnt_flag & MNT_FORCE) ||
230				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
231				     (fs->fs_flags & FS_DOSOFTDEP))) {
232					printf("WARNING: %s was not %s\n",
233					   fs->fs_fsmnt, "properly dismounted");
234				} else {
235					printf(
236"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
237					    fs->fs_fsmnt);
238					return (EPERM);
239				}
240			}
241			DROP_GIANT();
242			g_topology_lock();
243			/*
244			 * If we're the root device, we may not have an E count
245			 * yet, get it now.
246			 */
247			if (ump->um_cp->ace == 0)
248				error = g_access(ump->um_cp, 0, 1, 1);
249			else
250				error = g_access(ump->um_cp, 0, 1, 0);
251			g_topology_unlock();
252			PICKUP_GIANT();
253			if (error)
254				return (error);
255			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
256				return (error);
257			fs->fs_ronly = 0;
258			mp->mnt_flag &= ~MNT_RDONLY;
259			fs->fs_clean = 0;
260			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
261				vn_finished_write(mp);
262				return (error);
263			}
264			/* check to see if we need to start softdep */
265			if ((fs->fs_flags & FS_DOSOFTDEP) &&
266			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
267				vn_finished_write(mp);
268				return (error);
269			}
270			if (fs->fs_snapinum[0] != 0)
271				ffs_snapshot_mount(mp);
272			vn_finished_write(mp);
273		}
274		/*
275		 * Soft updates is incompatible with "async",
276		 * so if we are doing softupdates stop the user
277		 * from setting the async flag in an update.
278		 * Softdep_mount() clears it in an initial mount
279		 * or ro->rw remount.
280		 */
281		if (mp->mnt_flag & MNT_SOFTDEP)
282			mp->mnt_flag &= ~MNT_ASYNC;
283		/*
284		 * If not updating name, process export requests.
285		 */
286		if (fspec == NULL) {
287			error = vfs_getopt(mp->mnt_optnew,
288			    "export", (void **)&export, &len);
289			if (error || len != sizeof *export)
290				return (EINVAL);
291			return (vfs_export(mp, export));
292		}
293		/*
294		 * If this is a snapshot request, take the snapshot.
295		 */
296		if (mp->mnt_flag & MNT_SNAPSHOT)
297			return (ffs_snapshot(mp, fspec));
298	}
299
300	/*
301	 * Not an update, or updating the name: look up the name
302	 * and verify that it refers to a sensible disk device.
303	 */
304	NDINIT(&ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td);
305	if ((error = namei(&ndp)) != 0)
306		return (error);
307	NDFREE(&ndp, NDF_ONLY_PNBUF);
308	devvp = ndp.ni_vp;
309	if (!vn_isdisk(devvp, &error)) {
310		vrele(devvp);
311		return (error);
312	}
313
314	/*
315	 * If mount by non-root, then verify that user has necessary
316	 * permissions on the device.
317	 */
318	if (suser(td)) {
319		accessmode = VREAD;
320		if ((mp->mnt_flag & MNT_RDONLY) == 0)
321			accessmode |= VWRITE;
322		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
323		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
324			vput(devvp);
325			return (error);
326		}
327		VOP_UNLOCK(devvp, 0, td);
328	}
329
330	if (mp->mnt_flag & MNT_UPDATE) {
331		/*
332		 * Update only
333		 *
334		 * If it's not the same vnode, or at least the same device
335		 * then it's not correct.
336		 */
337
338		if (devvp->v_rdev != ump->um_devvp->v_rdev)
339			error = EINVAL;	/* needs translation */
340		vrele(devvp);
341		if (error)
342			return (error);
343	} else {
344		/*
345		 * New mount
346		 *
347		 * We need the name for the mount point (also used for
348		 * "last mounted on") copied in. If an error occurs,
349		 * the mount point is discarded by the upper level code.
350		 * Note that vfs_mount() populates f_mntonname for us.
351		 */
352		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
353			vrele(devvp);
354			return (error);
355		}
356	}
357	vfs_mountedfrom(mp, fspec);
358	return (0);
359}
360
361/*
362 * Compatibility with old mount system call.
363 */
364
365static int
366ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
367{
368	struct ufs_args args;
369	int error;
370
371	if (data == NULL)
372		return (EINVAL);
373	error = copyin(data, &args, sizeof args);
374	if (error)
375		return (error);
376
377	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
378	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
379	error = kernel_mount(ma, flags);
380
381	return (error);
382}
383
384/*
385 * Reload all incore data for a filesystem (used after running fsck on
386 * the root filesystem and finding things to fix). The filesystem must
387 * be mounted read-only.
388 *
389 * Things to do to update the mount:
390 *	1) invalidate all cached meta-data.
391 *	2) re-read superblock from disk.
392 *	3) re-read summary information from disk.
393 *	4) invalidate all inactive vnodes.
394 *	5) invalidate all cached file data.
395 *	6) re-read inode data for all active vnodes.
396 */
397static int
398ffs_reload(struct mount *mp, struct thread *td)
399{
400	struct vnode *vp, *nvp, *devvp;
401	struct inode *ip;
402	void *space;
403	struct buf *bp;
404	struct fs *fs, *newfs;
405	ufs2_daddr_t sblockloc;
406	int i, blks, size, error;
407	int32_t *lp;
408
409	if ((mp->mnt_flag & MNT_RDONLY) == 0)
410		return (EINVAL);
411	/*
412	 * Step 1: invalidate all cached meta-data.
413	 */
414	devvp = VFSTOUFS(mp)->um_devvp;
415	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
416	if (vinvalbuf(devvp, 0, td->td_ucred, td, 0, 0) != 0)
417		panic("ffs_reload: dirty1");
418	vfs_object_create(devvp, td, td->td_ucred);
419	VOP_UNLOCK(devvp, 0, td);
420
421	/*
422	 * Step 2: re-read superblock from disk.
423	 */
424	fs = VFSTOUFS(mp)->um_fs;
425	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
426	    NOCRED, &bp)) != 0)
427		return (error);
428	newfs = (struct fs *)bp->b_data;
429	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
430	     newfs->fs_magic != FS_UFS2_MAGIC) ||
431	    newfs->fs_bsize > MAXBSIZE ||
432	    newfs->fs_bsize < sizeof(struct fs)) {
433			brelse(bp);
434			return (EIO);		/* XXX needs translation */
435	}
436	/*
437	 * Copy pointer fields back into superblock before copying in	XXX
438	 * new superblock. These should really be in the ufsmount.	XXX
439	 * Note that important parameters (eg fs_ncg) are unchanged.
440	 */
441	newfs->fs_csp = fs->fs_csp;
442	newfs->fs_maxcluster = fs->fs_maxcluster;
443	newfs->fs_contigdirs = fs->fs_contigdirs;
444	newfs->fs_active = fs->fs_active;
445	/* The file system is still read-only. */
446	newfs->fs_ronly = 1;
447	sblockloc = fs->fs_sblockloc;
448	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
449	brelse(bp);
450	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
451	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
452	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
453		printf("%s: reload pending error: blocks %jd files %d\n",
454		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
455		    fs->fs_pendinginodes);
456		fs->fs_pendingblocks = 0;
457		fs->fs_pendinginodes = 0;
458	}
459
460	/*
461	 * Step 3: re-read summary information from disk.
462	 */
463	blks = howmany(fs->fs_cssize, fs->fs_fsize);
464	space = fs->fs_csp;
465	for (i = 0; i < blks; i += fs->fs_frag) {
466		size = fs->fs_bsize;
467		if (i + fs->fs_frag > blks)
468			size = (blks - i) * fs->fs_fsize;
469		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
470		    NOCRED, &bp);
471		if (error)
472			return (error);
473		bcopy(bp->b_data, space, (u_int)size);
474		space = (char *)space + size;
475		brelse(bp);
476	}
477	/*
478	 * We no longer know anything about clusters per cylinder group.
479	 */
480	if (fs->fs_contigsumsize > 0) {
481		lp = fs->fs_maxcluster;
482		for (i = 0; i < fs->fs_ncg; i++)
483			*lp++ = fs->fs_contigsumsize;
484	}
485
486loop:
487	MNT_ILOCK(mp);
488	MNT_VNODE_FOREACH(vp, mp, nvp) {
489		VI_LOCK(vp);
490		if (vp->v_iflag & VI_XLOCK) {
491			VI_UNLOCK(vp);
492			continue;
493		}
494		MNT_IUNLOCK(mp);
495		/*
496		 * Step 4: invalidate all inactive vnodes.
497		 */
498		if (vp->v_usecount == 0) {
499			vgonel(vp, td);
500			goto loop;
501		}
502		/*
503		 * Step 5: invalidate all cached file data.
504		 */
505		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
506			goto loop;
507		}
508		if (vinvalbuf(vp, 0, td->td_ucred, td, 0, 0))
509			panic("ffs_reload: dirty2");
510		/*
511		 * Step 6: re-read inode data for all active vnodes.
512		 */
513		ip = VTOI(vp);
514		error =
515		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
516		    (int)fs->fs_bsize, NOCRED, &bp);
517		if (error) {
518			VOP_UNLOCK(vp, 0, td);
519			vrele(vp);
520			return (error);
521		}
522		ffs_load_inode(bp, ip, fs, ip->i_number);
523		ip->i_effnlink = ip->i_nlink;
524		brelse(bp);
525		VOP_UNLOCK(vp, 0, td);
526		vrele(vp);
527		MNT_ILOCK(mp);
528	}
529	MNT_IUNLOCK(mp);
530	return (0);
531}
532
533/*
534 * Possible superblock locations ordered from most to least likely.
535 */
536static int sblock_try[] = SBLOCKSEARCH;
537
538/*
539 * Common code for mount and mountroot
540 */
541static int
542ffs_mountfs(devvp, mp, td)
543	struct vnode *devvp;
544	struct mount *mp;
545	struct thread *td;
546{
547	struct ufsmount *ump;
548	struct buf *bp;
549	struct fs *fs;
550	struct cdev *dev;
551	void *space;
552	ufs2_daddr_t sblockloc;
553	int error, i, blks, size, ronly;
554	int32_t *lp;
555	struct ucred *cred;
556	struct g_consumer *cp;
557
558	dev = devvp->v_rdev;
559	cred = td ? td->td_ucred : NOCRED;
560
561	vfs_object_create(devvp, td, td->td_ucred);
562	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
563	DROP_GIANT();
564	g_topology_lock();
565	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
566
567	/*
568	 * If we are a root mount, drop the E flag so fsck can do its magic.
569	 * We will pick it up again when we remount R/W.
570	 */
571	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
572		error = g_access(cp, 0, 0, -1);
573	g_topology_unlock();
574	PICKUP_GIANT();
575	VOP_UNLOCK(devvp, 0, td);
576	if (error)
577		return (error);
578	if (devvp->v_rdev->si_iosize_max != 0)
579		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
580	if (mp->mnt_iosize_max > MAXPHYS)
581		mp->mnt_iosize_max = MAXPHYS;
582
583	devvp->v_bufobj.bo_private = cp;
584	devvp->v_bufobj.bo_ops = &ffs_ops;
585
586	bp = NULL;
587	ump = NULL;
588	fs = NULL;
589	sblockloc = 0;
590	/*
591	 * Try reading the superblock in each of its possible locations.
592	 */
593	for (i = 0; sblock_try[i] != -1; i++) {
594		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
595		    cred, &bp)) != 0)
596			goto out;
597		fs = (struct fs *)bp->b_data;
598		sblockloc = sblock_try[i];
599		if ((fs->fs_magic == FS_UFS1_MAGIC ||
600		     (fs->fs_magic == FS_UFS2_MAGIC &&
601		      (fs->fs_sblockloc == sblockloc ||
602		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
603		    fs->fs_bsize <= MAXBSIZE &&
604		    fs->fs_bsize >= sizeof(struct fs))
605			break;
606		brelse(bp);
607		bp = NULL;
608	}
609	if (sblock_try[i] == -1) {
610		error = EINVAL;		/* XXX needs translation */
611		goto out;
612	}
613	fs->fs_fmod = 0;
614	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
615	fs->fs_flags &= ~FS_UNCLEAN;
616	if (fs->fs_clean == 0) {
617		fs->fs_flags |= FS_UNCLEAN;
618		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
619		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
620		     (fs->fs_flags & FS_DOSOFTDEP))) {
621			printf(
622"WARNING: %s was not properly dismounted\n",
623			    fs->fs_fsmnt);
624		} else {
625			printf(
626"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
627			    fs->fs_fsmnt);
628			error = EPERM;
629			goto out;
630		}
631		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
632		    (mp->mnt_flag & MNT_FORCE)) {
633			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
634			    (intmax_t)fs->fs_pendingblocks,
635			    fs->fs_pendinginodes);
636			fs->fs_pendingblocks = 0;
637			fs->fs_pendinginodes = 0;
638		}
639	}
640	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
641		printf("%s: mount pending error: blocks %jd files %d\n",
642		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
643		    fs->fs_pendinginodes);
644		fs->fs_pendingblocks = 0;
645		fs->fs_pendinginodes = 0;
646	}
647	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
648	ump->um_cp = cp;
649	ump->um_bo = &devvp->v_bufobj;
650	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
651	if (fs->fs_magic == FS_UFS1_MAGIC) {
652		ump->um_fstype = UFS1;
653		ump->um_balloc = ffs_balloc_ufs1;
654	} else {
655		ump->um_fstype = UFS2;
656		ump->um_balloc = ffs_balloc_ufs2;
657	}
658	ump->um_blkatoff = ffs_blkatoff;
659	ump->um_truncate = ffs_truncate;
660	ump->um_update = ffs_update;
661	ump->um_valloc = ffs_valloc;
662	ump->um_vfree = ffs_vfree;
663	ump->um_ifree = ffs_ifree;
664	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
665	if (fs->fs_sbsize < SBLOCKSIZE)
666		bp->b_flags |= B_INVAL | B_NOCACHE;
667	brelse(bp);
668	bp = NULL;
669	fs = ump->um_fs;
670	ffs_oldfscompat_read(fs, ump, sblockloc);
671	fs->fs_ronly = ronly;
672	size = fs->fs_cssize;
673	blks = howmany(size, fs->fs_fsize);
674	if (fs->fs_contigsumsize > 0)
675		size += fs->fs_ncg * sizeof(int32_t);
676	size += fs->fs_ncg * sizeof(u_int8_t);
677	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
678	fs->fs_csp = space;
679	for (i = 0; i < blks; i += fs->fs_frag) {
680		size = fs->fs_bsize;
681		if (i + fs->fs_frag > blks)
682			size = (blks - i) * fs->fs_fsize;
683		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
684		    cred, &bp)) != 0) {
685			free(fs->fs_csp, M_UFSMNT);
686			goto out;
687		}
688		bcopy(bp->b_data, space, (u_int)size);
689		space = (char *)space + size;
690		brelse(bp);
691		bp = NULL;
692	}
693	if (fs->fs_contigsumsize > 0) {
694		fs->fs_maxcluster = lp = space;
695		for (i = 0; i < fs->fs_ncg; i++)
696			*lp++ = fs->fs_contigsumsize;
697		space = lp;
698	}
699	size = fs->fs_ncg * sizeof(u_int8_t);
700	fs->fs_contigdirs = (u_int8_t *)space;
701	bzero(fs->fs_contigdirs, size);
702	fs->fs_active = NULL;
703	mp->mnt_data = (qaddr_t)ump;
704	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
705	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
706	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
707	    vfs_getvfs(&mp->mnt_stat.f_fsid))
708		vfs_getnewfsid(mp);
709	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
710	mp->mnt_flag |= MNT_LOCAL;
711	if ((fs->fs_flags & FS_MULTILABEL) != 0)
712#ifdef MAC
713		mp->mnt_flag |= MNT_MULTILABEL;
714#else
715		printf(
716"WARNING: %s: multilabel flag on fs but no MAC support\n",
717		    fs->fs_fsmnt);
718#endif
719	if ((fs->fs_flags & FS_ACLS) != 0)
720#ifdef UFS_ACL
721		mp->mnt_flag |= MNT_ACLS;
722#else
723		printf(
724"WARNING: %s: ACLs flag on fs but no ACLs support\n",
725		    fs->fs_fsmnt);
726#endif
727	ump->um_mountp = mp;
728	ump->um_dev = dev;
729	ump->um_devvp = devvp;
730	ump->um_nindir = fs->fs_nindir;
731	ump->um_bptrtodb = fs->fs_fsbtodb;
732	ump->um_seqinc = fs->fs_frag;
733	for (i = 0; i < MAXQUOTAS; i++)
734		ump->um_quotas[i] = NULLVP;
735#ifdef UFS_EXTATTR
736	ufs_extattr_uepm_init(&ump->um_extattr);
737#endif
738	/*
739	 * Set FS local "last mounted on" information (NULL pad)
740	 */
741	vfs_mountedfrom(mp, fs->fs_fsmnt);
742
743	if( mp->mnt_flag & MNT_ROOTFS) {
744		/*
745		 * Root mount; update timestamp in mount structure.
746		 * this will be used by the common root mount code
747		 * to update the system clock.
748		 */
749		mp->mnt_time = fs->fs_time;
750	}
751
752	if (ronly == 0) {
753		if ((fs->fs_flags & FS_DOSOFTDEP) &&
754		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
755			free(fs->fs_csp, M_UFSMNT);
756			goto out;
757		}
758		if (fs->fs_snapinum[0] != 0)
759			ffs_snapshot_mount(mp);
760		fs->fs_fmod = 1;
761		fs->fs_clean = 0;
762		(void) ffs_sbupdate(ump, MNT_WAIT);
763	}
764	/*
765	 * Initialize filesystem stat information in mount struct.
766	 */
767#ifdef UFS_EXTATTR
768#ifdef UFS_EXTATTR_AUTOSTART
769	/*
770	 *
771	 * Auto-starting does the following:
772	 *	- check for /.attribute in the fs, and extattr_start if so
773	 *	- for each file in .attribute, enable that file with
774	 * 	  an attribute of the same name.
775	 * Not clear how to report errors -- probably eat them.
776	 * This would all happen while the filesystem was busy/not
777	 * available, so would effectively be "atomic".
778	 */
779	(void) ufs_extattr_autostart(mp, td);
780#endif /* !UFS_EXTATTR_AUTOSTART */
781#endif /* !UFS_EXTATTR */
782	return (0);
783out:
784	if (bp)
785		brelse(bp);
786	if (cp != NULL) {
787		DROP_GIANT();
788		g_topology_lock();
789		g_wither_geom_close(cp->geom, ENXIO);
790		g_topology_unlock();
791		PICKUP_GIANT();
792	}
793	if (ump) {
794		free(ump->um_fs, M_UFSMNT);
795		free(ump, M_UFSMNT);
796		mp->mnt_data = (qaddr_t)0;
797	}
798	return (error);
799}
800
801#include <sys/sysctl.h>
802int bigcgs = 0;
803SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
804
805/*
806 * Sanity checks for loading old filesystem superblocks.
807 * See ffs_oldfscompat_write below for unwound actions.
808 *
809 * XXX - Parts get retired eventually.
810 * Unfortunately new bits get added.
811 */
812static void
813ffs_oldfscompat_read(fs, ump, sblockloc)
814	struct fs *fs;
815	struct ufsmount *ump;
816	ufs2_daddr_t sblockloc;
817{
818	off_t maxfilesize;
819
820	/*
821	 * If not yet done, update fs_flags location and value of fs_sblockloc.
822	 */
823	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
824		fs->fs_flags = fs->fs_old_flags;
825		fs->fs_old_flags |= FS_FLAGS_UPDATED;
826		fs->fs_sblockloc = sblockloc;
827	}
828	/*
829	 * If not yet done, update UFS1 superblock with new wider fields.
830	 */
831	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
832		fs->fs_maxbsize = fs->fs_bsize;
833		fs->fs_time = fs->fs_old_time;
834		fs->fs_size = fs->fs_old_size;
835		fs->fs_dsize = fs->fs_old_dsize;
836		fs->fs_csaddr = fs->fs_old_csaddr;
837		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
838		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
839		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
840		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
841	}
842	if (fs->fs_magic == FS_UFS1_MAGIC &&
843	    fs->fs_old_inodefmt < FS_44INODEFMT) {
844		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
845		fs->fs_qbmask = ~fs->fs_bmask;
846		fs->fs_qfmask = ~fs->fs_fmask;
847	}
848	if (fs->fs_magic == FS_UFS1_MAGIC) {
849		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
850		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
851		if (fs->fs_maxfilesize > maxfilesize)
852			fs->fs_maxfilesize = maxfilesize;
853	}
854	/* Compatibility for old filesystems */
855	if (fs->fs_avgfilesize <= 0)
856		fs->fs_avgfilesize = AVFILESIZ;
857	if (fs->fs_avgfpdir <= 0)
858		fs->fs_avgfpdir = AFPDIR;
859	if (bigcgs) {
860		fs->fs_save_cgsize = fs->fs_cgsize;
861		fs->fs_cgsize = fs->fs_bsize;
862	}
863}
864
865/*
866 * Unwinding superblock updates for old filesystems.
867 * See ffs_oldfscompat_read above for details.
868 *
869 * XXX - Parts get retired eventually.
870 * Unfortunately new bits get added.
871 */
872static void
873ffs_oldfscompat_write(fs, ump)
874	struct fs *fs;
875	struct ufsmount *ump;
876{
877
878	/*
879	 * Copy back UFS2 updated fields that UFS1 inspects.
880	 */
881	if (fs->fs_magic == FS_UFS1_MAGIC) {
882		fs->fs_old_time = fs->fs_time;
883		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
884		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
885		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
886		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
887		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
888	}
889	if (bigcgs) {
890		fs->fs_cgsize = fs->fs_save_cgsize;
891		fs->fs_save_cgsize = 0;
892	}
893}
894
895/*
896 * unmount system call
897 */
898int
899ffs_unmount(mp, mntflags, td)
900	struct mount *mp;
901	int mntflags;
902	struct thread *td;
903{
904	struct ufsmount *ump = VFSTOUFS(mp);
905	struct fs *fs;
906	int error, flags;
907
908	flags = 0;
909	if (mntflags & MNT_FORCE) {
910		flags |= FORCECLOSE;
911	}
912#ifdef UFS_EXTATTR
913	if ((error = ufs_extattr_stop(mp, td))) {
914		if (error != EOPNOTSUPP)
915			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
916			    error);
917	} else {
918		ufs_extattr_uepm_destroy(&ump->um_extattr);
919	}
920#endif
921	if (mp->mnt_flag & MNT_SOFTDEP) {
922		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
923			return (error);
924	} else {
925		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
926			return (error);
927	}
928	fs = ump->um_fs;
929	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
930		printf("%s: unmount pending error: blocks %jd files %d\n",
931		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
932		    fs->fs_pendinginodes);
933		fs->fs_pendingblocks = 0;
934		fs->fs_pendinginodes = 0;
935	}
936	if (fs->fs_ronly == 0) {
937		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
938		error = ffs_sbupdate(ump, MNT_WAIT);
939		if (error) {
940			fs->fs_clean = 0;
941			return (error);
942		}
943	}
944	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
945	DROP_GIANT();
946	g_topology_lock();
947	g_wither_geom_close(ump->um_cp->geom, ENXIO);
948	g_topology_unlock();
949	PICKUP_GIANT();
950	vrele(ump->um_devvp);
951	free(fs->fs_csp, M_UFSMNT);
952	free(fs, M_UFSMNT);
953	free(ump, M_UFSMNT);
954	mp->mnt_data = (qaddr_t)0;
955	mp->mnt_flag &= ~MNT_LOCAL;
956	return (error);
957}
958
959/*
960 * Flush out all the files in a filesystem.
961 */
962int
963ffs_flushfiles(mp, flags, td)
964	struct mount *mp;
965	int flags;
966	struct thread *td;
967{
968	struct ufsmount *ump;
969	int error;
970
971	ump = VFSTOUFS(mp);
972#ifdef QUOTA
973	if (mp->mnt_flag & MNT_QUOTA) {
974		int i;
975		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
976		if (error)
977			return (error);
978		for (i = 0; i < MAXQUOTAS; i++) {
979			if (ump->um_quotas[i] == NULLVP)
980				continue;
981			quotaoff(td, mp, i);
982		}
983		/*
984		 * Here we fall through to vflush again to ensure
985		 * that we have gotten rid of all the system vnodes.
986		 */
987	}
988#endif
989	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
990	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
991		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
992			return (error);
993		ffs_snapshot_unmount(mp);
994		/*
995		 * Here we fall through to vflush again to ensure
996		 * that we have gotten rid of all the system vnodes.
997		 */
998	}
999        /*
1000	 * Flush all the files.
1001	 */
1002	if ((error = vflush(mp, 0, flags, td)) != 0)
1003		return (error);
1004	/*
1005	 * Flush filesystem metadata.
1006	 */
1007	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1008	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1009	VOP_UNLOCK(ump->um_devvp, 0, td);
1010	return (error);
1011}
1012
1013/*
1014 * Get filesystem statistics.
1015 */
1016int
1017ffs_statfs(mp, sbp, td)
1018	struct mount *mp;
1019	struct statfs *sbp;
1020	struct thread *td;
1021{
1022	struct ufsmount *ump;
1023	struct fs *fs;
1024
1025	ump = VFSTOUFS(mp);
1026	fs = ump->um_fs;
1027	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1028		panic("ffs_statfs");
1029	sbp->f_version = STATFS_VERSION;
1030	sbp->f_bsize = fs->fs_fsize;
1031	sbp->f_iosize = fs->fs_bsize;
1032	sbp->f_blocks = fs->fs_dsize;
1033	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1034	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1035	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1036	    dbtofsb(fs, fs->fs_pendingblocks);
1037	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1038	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1039	sbp->f_namemax = NAME_MAX;
1040	return (0);
1041}
1042
1043/*
1044 * Go through the disk queues to initiate sandbagged IO;
1045 * go through the inodes to write those that have been modified;
1046 * initiate the writing of the super block if it has been modified.
1047 *
1048 * Note: we are always called with the filesystem marked `MPBUSY'.
1049 */
1050int
1051ffs_sync(mp, waitfor, cred, td)
1052	struct mount *mp;
1053	int waitfor;
1054	struct ucred *cred;
1055	struct thread *td;
1056{
1057	struct vnode *nvp, *vp, *devvp;
1058	struct inode *ip;
1059	struct ufsmount *ump = VFSTOUFS(mp);
1060	struct fs *fs;
1061	int error, count, wait, lockreq, allerror = 0;
1062	struct bufobj *bo;
1063
1064	fs = ump->um_fs;
1065	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1066		printf("fs = %s\n", fs->fs_fsmnt);
1067		panic("ffs_sync: rofs mod");
1068	}
1069	/*
1070	 * Write back each (modified) inode.
1071	 */
1072	wait = 0;
1073	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1074	if (waitfor == MNT_WAIT) {
1075		wait = 1;
1076		lockreq = LK_EXCLUSIVE;
1077	}
1078	lockreq |= LK_INTERLOCK;
1079	MNT_ILOCK(mp);
1080loop:
1081	MNT_VNODE_FOREACH(vp, mp, nvp) {
1082		/*
1083		 * Depend on the mntvnode_slock to keep things stable enough
1084		 * for a quick test.  Since there might be hundreds of
1085		 * thousands of vnodes, we cannot afford even a subroutine
1086		 * call unless there's a good chance that we have work to do.
1087		 */
1088		VI_LOCK(vp);
1089		if (vp->v_iflag & VI_XLOCK) {
1090			VI_UNLOCK(vp);
1091			continue;
1092		}
1093		ip = VTOI(vp);
1094		if (vp->v_type == VNON || ((ip->i_flag &
1095		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1096		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1097			VI_UNLOCK(vp);
1098			continue;
1099		}
1100		MNT_IUNLOCK(mp);
1101		if ((error = vget(vp, lockreq, td)) != 0) {
1102			MNT_ILOCK(mp);
1103			if (error == ENOENT)
1104				goto loop;
1105			continue;
1106		}
1107		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1108			allerror = error;
1109		VOP_UNLOCK(vp, 0, td);
1110		vrele(vp);
1111		MNT_ILOCK(mp);
1112	}
1113	MNT_IUNLOCK(mp);
1114	/*
1115	 * Force stale filesystem control information to be flushed.
1116	 */
1117	if (waitfor == MNT_WAIT) {
1118		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1119			allerror = error;
1120		/* Flushed work items may create new vnodes to clean */
1121		if (allerror == 0 && count) {
1122			MNT_ILOCK(mp);
1123			goto loop;
1124		}
1125	}
1126#ifdef QUOTA
1127	qsync(mp);
1128#endif
1129	devvp = ump->um_devvp;
1130	VI_LOCK(devvp);
1131	bo = &devvp->v_bufobj;
1132	if (waitfor != MNT_LAZY &&
1133	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1134		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1135		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1136			allerror = error;
1137		VOP_UNLOCK(devvp, 0, td);
1138		if (allerror == 0 && waitfor == MNT_WAIT) {
1139			MNT_ILOCK(mp);
1140			goto loop;
1141		}
1142	} else
1143		VI_UNLOCK(devvp);
1144	/*
1145	 * Write back modified superblock.
1146	 */
1147	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1148		allerror = error;
1149	return (allerror);
1150}
1151
1152int
1153ffs_vget(mp, ino, flags, vpp)
1154	struct mount *mp;
1155	ino_t ino;
1156	int flags;
1157	struct vnode **vpp;
1158{
1159	struct thread *td = curthread; 		/* XXX */
1160	struct fs *fs;
1161	struct inode *ip;
1162	struct ufsmount *ump;
1163	struct buf *bp;
1164	struct vnode *vp;
1165	struct cdev *dev;
1166	int error;
1167
1168	ump = VFSTOUFS(mp);
1169	dev = ump->um_dev;
1170
1171	/*
1172	 * We do not lock vnode creation as it is believed to be too
1173	 * expensive for such rare case as simultaneous creation of vnode
1174	 * for same ino by different processes. We just allow them to race
1175	 * and check later to decide who wins. Let the race begin!
1176	 */
1177	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1178		return (error);
1179	if (*vpp != NULL)
1180		return (0);
1181
1182	/*
1183	 * If this MALLOC() is performed after the getnewvnode()
1184	 * it might block, leaving a vnode with a NULL v_data to be
1185	 * found by ffs_sync() if a sync happens to fire right then,
1186	 * which will cause a panic because ffs_sync() blindly
1187	 * dereferences vp->v_data (as well it should).
1188	 */
1189	ip = uma_zalloc(uma_inode, M_WAITOK);
1190
1191	/* Allocate a new vnode/inode. */
1192	error = getnewvnode("ufs", mp, &ffs_vnodeops, &vp);
1193	if (error) {
1194		*vpp = NULL;
1195		uma_zfree(uma_inode, ip);
1196		return (error);
1197	}
1198	bzero((caddr_t)ip, sizeof(struct inode));
1199	/*
1200	 * FFS supports recursive locking.
1201	 */
1202	fs = ump->um_fs;
1203	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1204	vp->v_data = ip;
1205	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1206	ip->i_vnode = vp;
1207	ip->i_ump = ump;
1208	ip->i_fs = fs;
1209	ip->i_dev = dev;
1210	ip->i_number = ino;
1211#ifdef QUOTA
1212	{
1213		int i;
1214		for (i = 0; i < MAXQUOTAS; i++)
1215			ip->i_dquot[i] = NODQUOT;
1216	}
1217#endif
1218	/*
1219	 * Exclusively lock the vnode before adding to hash. Note, that we
1220	 * must not release nor downgrade the lock (despite flags argument
1221	 * says) till it is fully initialized.
1222	 */
1223	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1224
1225	/*
1226	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1227	 * duplicate of vnode being created and add it to the hash. If a
1228	 * duplicate vnode was found, it will be vget()ed from hash for us.
1229	 */
1230	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1231		vput(vp);
1232		*vpp = NULL;
1233		return (error);
1234	}
1235
1236	/* We lost the race, then throw away our vnode and return existing */
1237	if (*vpp != NULL) {
1238		vput(vp);
1239		return (0);
1240	}
1241
1242	/* Read in the disk contents for the inode, copy into the inode. */
1243	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1244	    (int)fs->fs_bsize, NOCRED, &bp);
1245	if (error) {
1246		/*
1247		 * The inode does not contain anything useful, so it would
1248		 * be misleading to leave it on its hash chain. With mode
1249		 * still zero, it will be unlinked and returned to the free
1250		 * list by vput().
1251		 */
1252		brelse(bp);
1253		vput(vp);
1254		*vpp = NULL;
1255		return (error);
1256	}
1257	if (ip->i_ump->um_fstype == UFS1)
1258		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1259	else
1260		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1261	ffs_load_inode(bp, ip, fs, ino);
1262	if (DOINGSOFTDEP(vp))
1263		softdep_load_inodeblock(ip);
1264	else
1265		ip->i_effnlink = ip->i_nlink;
1266	bqrelse(bp);
1267
1268	/*
1269	 * Initialize the vnode from the inode, check for aliases.
1270	 * Note that the underlying vnode may have changed.
1271	 */
1272	error = ufs_vinit(mp, &ffs_fifoops, &vp);
1273	if (error) {
1274		vput(vp);
1275		*vpp = NULL;
1276		return (error);
1277	}
1278	/*
1279	 * Finish inode initialization.
1280	 */
1281	VREF(ip->i_devvp);
1282	/*
1283	 * Set up a generation number for this inode if it does not
1284	 * already have one. This should only happen on old filesystems.
1285	 */
1286	if (ip->i_gen == 0) {
1287		ip->i_gen = arc4random() / 2 + 1;
1288		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1289			ip->i_flag |= IN_MODIFIED;
1290			DIP_SET(ip, i_gen, ip->i_gen);
1291		}
1292	}
1293	/*
1294	 * Ensure that uid and gid are correct. This is a temporary
1295	 * fix until fsck has been changed to do the update.
1296	 */
1297	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1298	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1299		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1300		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1301	}						/* XXX */
1302
1303#ifdef MAC
1304	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1305		/*
1306		 * If this vnode is already allocated, and we're running
1307		 * multi-label, attempt to perform a label association
1308		 * from the extended attributes on the inode.
1309		 */
1310		error = mac_associate_vnode_extattr(mp, vp);
1311		if (error) {
1312			/* ufs_inactive will release ip->i_devvp ref. */
1313			vput(vp);
1314			*vpp = NULL;
1315			return (error);
1316		}
1317	}
1318#endif
1319
1320	*vpp = vp;
1321	return (0);
1322}
1323
1324/*
1325 * File handle to vnode
1326 *
1327 * Have to be really careful about stale file handles:
1328 * - check that the inode number is valid
1329 * - call ffs_vget() to get the locked inode
1330 * - check for an unallocated inode (i_mode == 0)
1331 * - check that the given client host has export rights and return
1332 *   those rights via. exflagsp and credanonp
1333 */
1334int
1335ffs_fhtovp(mp, fhp, vpp)
1336	struct mount *mp;
1337	struct fid *fhp;
1338	struct vnode **vpp;
1339{
1340	struct ufid *ufhp;
1341	struct fs *fs;
1342
1343	ufhp = (struct ufid *)fhp;
1344	fs = VFSTOUFS(mp)->um_fs;
1345	if (ufhp->ufid_ino < ROOTINO ||
1346	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1347		return (ESTALE);
1348	return (ufs_fhtovp(mp, ufhp, vpp));
1349}
1350
1351/*
1352 * Vnode pointer to File handle
1353 */
1354/* ARGSUSED */
1355int
1356ffs_vptofh(vp, fhp)
1357	struct vnode *vp;
1358	struct fid *fhp;
1359{
1360	struct inode *ip;
1361	struct ufid *ufhp;
1362
1363	ip = VTOI(vp);
1364	ufhp = (struct ufid *)fhp;
1365	ufhp->ufid_len = sizeof(struct ufid);
1366	ufhp->ufid_ino = ip->i_number;
1367	ufhp->ufid_gen = ip->i_gen;
1368	return (0);
1369}
1370
1371/*
1372 * Initialize the filesystem.
1373 */
1374static int
1375ffs_init(vfsp)
1376	struct vfsconf *vfsp;
1377{
1378
1379	softdep_initialize();
1380	return (ufs_init(vfsp));
1381}
1382
1383/*
1384 * Undo the work of ffs_init().
1385 */
1386static int
1387ffs_uninit(vfsp)
1388	struct vfsconf *vfsp;
1389{
1390	int ret;
1391
1392	ret = ufs_uninit(vfsp);
1393	softdep_uninitialize();
1394	return (ret);
1395}
1396
1397/*
1398 * Write a superblock and associated information back to disk.
1399 */
1400static int
1401ffs_sbupdate(mp, waitfor)
1402	struct ufsmount *mp;
1403	int waitfor;
1404{
1405	struct fs *fs = mp->um_fs;
1406	struct buf *bp;
1407	int blks;
1408	void *space;
1409	int i, size, error, allerror = 0;
1410
1411	if (fs->fs_ronly == 1 &&
1412	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1413	    (MNT_RDONLY | MNT_UPDATE))
1414		panic("ffs_sbupdate: write read-only filesystem");
1415	/*
1416	 * First write back the summary information.
1417	 */
1418	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1419	space = fs->fs_csp;
1420	for (i = 0; i < blks; i += fs->fs_frag) {
1421		size = fs->fs_bsize;
1422		if (i + fs->fs_frag > blks)
1423			size = (blks - i) * fs->fs_fsize;
1424		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1425		    size, 0, 0, 0);
1426		bcopy(space, bp->b_data, (u_int)size);
1427		space = (char *)space + size;
1428		if (waitfor != MNT_WAIT)
1429			bawrite(bp);
1430		else if ((error = bwrite(bp)) != 0)
1431			allerror = error;
1432	}
1433	/*
1434	 * Now write back the superblock itself. If any errors occurred
1435	 * up to this point, then fail so that the superblock avoids
1436	 * being written out as clean.
1437	 */
1438	if (allerror)
1439		return (allerror);
1440	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1441	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1442		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1443		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1444		fs->fs_sblockloc = SBLOCK_UFS1;
1445	}
1446	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1447	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1448		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1449		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1450		fs->fs_sblockloc = SBLOCK_UFS2;
1451	}
1452	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1453	    0, 0, 0);
1454	fs->fs_fmod = 0;
1455	fs->fs_time = time_second;
1456	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1457	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1458	if (waitfor != MNT_WAIT)
1459		bawrite(bp);
1460	else if ((error = bwrite(bp)) != 0)
1461		allerror = error;
1462	return (allerror);
1463}
1464
1465static int
1466ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1467	int attrnamespace, const char *attrname, struct thread *td)
1468{
1469
1470#ifdef UFS_EXTATTR
1471	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1472	    attrname, td));
1473#else
1474	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1475	    attrname, td));
1476#endif
1477}
1478
1479static void
1480ffs_ifree(struct ufsmount *ump, struct inode *ip)
1481{
1482
1483	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1484		uma_zfree(uma_ufs1, ip->i_din1);
1485	else if (ip->i_din2 != NULL)
1486		uma_zfree(uma_ufs2, ip->i_din2);
1487	uma_zfree(uma_inode, ip);
1488}
1489
1490static void
1491ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1492{
1493
1494#ifdef SOFTUPDATES
1495	if (bp->b_iocmd == BIO_WRITE && softdep_disk_prewrite(bp))
1496		return;
1497#endif
1498	g_vfs_strategy(bo, bp);
1499}
1500