ffs_vfsops.c revision 140708
1/*-
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 140708 2005-01-24 10:12:28Z jeff $");
34
35#include "opt_mac.h"
36#include "opt_quota.h"
37#include "opt_ufs.h"
38#include "opt_ffs.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/proc.h>
44#include <sys/kernel.h>
45#include <sys/mac.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/disk.h>
53#include <sys/malloc.h>
54#include <sys/mutex.h>
55
56#include <ufs/ufs/extattr.h>
57#include <ufs/ufs/quota.h>
58#include <ufs/ufs/ufsmount.h>
59#include <ufs/ufs/inode.h>
60#include <ufs/ufs/ufs_extern.h>
61
62#include <ufs/ffs/fs.h>
63#include <ufs/ffs/ffs_extern.h>
64
65#include <vm/vm.h>
66#include <vm/uma.h>
67#include <vm/vm_page.h>
68
69#include <geom/geom.h>
70#include <geom/geom_vfs.h>
71
72uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
73
74static int	ffs_sbupdate(struct ufsmount *, int);
75static int	ffs_reload(struct mount *, struct thread *);
76static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
77static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
78		    ufs2_daddr_t);
79static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
80static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
81static vfs_init_t ffs_init;
82static vfs_uninit_t ffs_uninit;
83static vfs_extattrctl_t ffs_extattrctl;
84static vfs_cmount_t ffs_cmount;
85static vfs_mount_t ffs_mount;
86
87static struct vfsops ufs_vfsops = {
88	.vfs_extattrctl =	ffs_extattrctl,
89	.vfs_fhtovp =		ffs_fhtovp,
90	.vfs_init =		ffs_init,
91	.vfs_mount =		ffs_mount,
92	.vfs_cmount =		ffs_cmount,
93	.vfs_quotactl =		ufs_quotactl,
94	.vfs_root =		ufs_root,
95	.vfs_statfs =		ffs_statfs,
96	.vfs_sync =		ffs_sync,
97	.vfs_uninit =		ffs_uninit,
98	.vfs_unmount =		ffs_unmount,
99	.vfs_vget =		ffs_vget,
100	.vfs_vptofh =		ffs_vptofh,
101};
102
103VFS_SET(ufs_vfsops, ufs, 0);
104
105static b_strategy_t ffs_geom_strategy;
106
107static struct buf_ops ffs_ops = {
108	.bop_name =	"FFS",
109	.bop_write =	bufwrite,
110	.bop_strategy =	ffs_geom_strategy,
111	.bop_sync =	bufsync,
112};
113
114static const char *ffs_opts[] = { "from", "export", NULL };
115
116static int
117ffs_mount(struct mount *mp, struct thread *td)
118{
119	struct vnode *devvp;
120	struct ufsmount *ump = 0;
121	struct fs *fs;
122	int error, flags;
123	mode_t accessmode;
124	struct nameidata ndp;
125	struct export_args export;
126	char *fspec;
127
128	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
129		return (EINVAL);
130	if (uma_inode == NULL) {
131		uma_inode = uma_zcreate("FFS inode",
132		    sizeof(struct inode), NULL, NULL, NULL, NULL,
133		    UMA_ALIGN_PTR, 0);
134		uma_ufs1 = uma_zcreate("FFS1 dinode",
135		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
136		    UMA_ALIGN_PTR, 0);
137		uma_ufs2 = uma_zcreate("FFS2 dinode",
138		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
139		    UMA_ALIGN_PTR, 0);
140	}
141
142	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
143	if (error)
144		return (error);
145
146	/*
147	 * If updating, check whether changing from read-only to
148	 * read/write; if there is no device name, that's all we do.
149	 */
150	if (mp->mnt_flag & MNT_UPDATE) {
151		ump = VFSTOUFS(mp);
152		fs = ump->um_fs;
153		devvp = ump->um_devvp;
154		if (fs->fs_ronly == 0 &&
155		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
156			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
157				return (error);
158			/*
159			 * Flush any dirty data.
160			 */
161			if ((error = VFS_SYNC(mp, MNT_WAIT, td)) != 0) {
162				vn_finished_write(mp);
163				return (error);
164			}
165			/*
166			 * Check for and optionally get rid of files open
167			 * for writing.
168			 */
169			flags = WRITECLOSE;
170			if (mp->mnt_flag & MNT_FORCE)
171				flags |= FORCECLOSE;
172			if (mp->mnt_flag & MNT_SOFTDEP) {
173				error = softdep_flushfiles(mp, flags, td);
174			} else {
175				error = ffs_flushfiles(mp, flags, td);
176			}
177			if (error) {
178				vn_finished_write(mp);
179				return (error);
180			}
181			if (fs->fs_pendingblocks != 0 ||
182			    fs->fs_pendinginodes != 0) {
183				printf("%s: %s: blocks %jd files %d\n",
184				    fs->fs_fsmnt, "update error",
185				    (intmax_t)fs->fs_pendingblocks,
186				    fs->fs_pendinginodes);
187				fs->fs_pendingblocks = 0;
188				fs->fs_pendinginodes = 0;
189			}
190			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
191				fs->fs_clean = 1;
192			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
193				fs->fs_ronly = 0;
194				fs->fs_clean = 0;
195				vn_finished_write(mp);
196				return (error);
197			}
198			vn_finished_write(mp);
199			DROP_GIANT();
200			g_topology_lock();
201			g_access(ump->um_cp, 0, -1, 0);
202			g_topology_unlock();
203			PICKUP_GIANT();
204			fs->fs_ronly = 1;
205			mp->mnt_flag |= MNT_RDONLY;
206		}
207		if ((mp->mnt_flag & MNT_RELOAD) &&
208		    (error = ffs_reload(mp, td)) != 0)
209			return (error);
210		if (fs->fs_ronly &&
211		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
212			/*
213			 * If upgrade to read-write by non-root, then verify
214			 * that user has necessary permissions on the device.
215			 */
216			if (suser(td)) {
217				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
218				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
219				    td->td_ucred, td)) != 0) {
220					VOP_UNLOCK(devvp, 0, td);
221					return (error);
222				}
223				VOP_UNLOCK(devvp, 0, td);
224			}
225			fs->fs_flags &= ~FS_UNCLEAN;
226			if (fs->fs_clean == 0) {
227				fs->fs_flags |= FS_UNCLEAN;
228				if ((mp->mnt_flag & MNT_FORCE) ||
229				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
230				     (fs->fs_flags & FS_DOSOFTDEP))) {
231					printf("WARNING: %s was not %s\n",
232					   fs->fs_fsmnt, "properly dismounted");
233				} else {
234					printf(
235"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
236					    fs->fs_fsmnt);
237					return (EPERM);
238				}
239			}
240			DROP_GIANT();
241			g_topology_lock();
242			/*
243			 * If we're the root device, we may not have an E count
244			 * yet, get it now.
245			 */
246			if (ump->um_cp->ace == 0)
247				error = g_access(ump->um_cp, 0, 1, 1);
248			else
249				error = g_access(ump->um_cp, 0, 1, 0);
250			g_topology_unlock();
251			PICKUP_GIANT();
252			if (error)
253				return (error);
254			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
255				return (error);
256			fs->fs_ronly = 0;
257			mp->mnt_flag &= ~MNT_RDONLY;
258			fs->fs_clean = 0;
259			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
260				vn_finished_write(mp);
261				return (error);
262			}
263			/* check to see if we need to start softdep */
264			if ((fs->fs_flags & FS_DOSOFTDEP) &&
265			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
266				vn_finished_write(mp);
267				return (error);
268			}
269			if (fs->fs_snapinum[0] != 0)
270				ffs_snapshot_mount(mp);
271			vn_finished_write(mp);
272		}
273		/*
274		 * Soft updates is incompatible with "async",
275		 * so if we are doing softupdates stop the user
276		 * from setting the async flag in an update.
277		 * Softdep_mount() clears it in an initial mount
278		 * or ro->rw remount.
279		 */
280		if (mp->mnt_flag & MNT_SOFTDEP)
281			mp->mnt_flag &= ~MNT_ASYNC;
282		/*
283		 * Keep MNT_ACLS flag if it is stored in superblock.
284		 */
285		if ((fs->fs_flags & FS_ACLS) != 0)
286			mp->mnt_flag |= MNT_ACLS;
287		/*
288		 * If not updating name, process export requests.
289		 */
290		error = vfs_copyopt(mp->mnt_optnew, "export", &export, sizeof export);
291		if (error == 0 && export.ex_flags != 0)
292			return (vfs_export(mp, &export));
293		/*
294		 * If this is a snapshot request, take the snapshot.
295		 */
296		if (mp->mnt_flag & MNT_SNAPSHOT)
297			return (ffs_snapshot(mp, fspec));
298	}
299
300	/*
301	 * Not an update, or updating the name: look up the name
302	 * and verify that it refers to a sensible disk device.
303	 */
304	NDINIT(&ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td);
305	if ((error = namei(&ndp)) != 0)
306		return (error);
307	NDFREE(&ndp, NDF_ONLY_PNBUF);
308	devvp = ndp.ni_vp;
309	if (!vn_isdisk(devvp, &error)) {
310		vrele(devvp);
311		return (error);
312	}
313
314	/*
315	 * If mount by non-root, then verify that user has necessary
316	 * permissions on the device.
317	 */
318	if (suser(td)) {
319		accessmode = VREAD;
320		if ((mp->mnt_flag & MNT_RDONLY) == 0)
321			accessmode |= VWRITE;
322		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
323		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
324			vput(devvp);
325			return (error);
326		}
327		VOP_UNLOCK(devvp, 0, td);
328	}
329
330	if (mp->mnt_flag & MNT_UPDATE) {
331		/*
332		 * Update only
333		 *
334		 * If it's not the same vnode, or at least the same device
335		 * then it's not correct.
336		 */
337
338		if (devvp->v_rdev != ump->um_devvp->v_rdev)
339			error = EINVAL;	/* needs translation */
340		vrele(devvp);
341		if (error)
342			return (error);
343	} else {
344		/*
345		 * New mount
346		 *
347		 * We need the name for the mount point (also used for
348		 * "last mounted on") copied in. If an error occurs,
349		 * the mount point is discarded by the upper level code.
350		 * Note that vfs_mount() populates f_mntonname for us.
351		 */
352		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
353			vrele(devvp);
354			return (error);
355		}
356	}
357	vfs_mountedfrom(mp, fspec);
358	return (0);
359}
360
361/*
362 * Compatibility with old mount system call.
363 */
364
365static int
366ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
367{
368	struct ufs_args args;
369	int error;
370
371	if (data == NULL)
372		return (EINVAL);
373	error = copyin(data, &args, sizeof args);
374	if (error)
375		return (error);
376
377	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
378	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
379	error = kernel_mount(ma, flags);
380
381	return (error);
382}
383
384/*
385 * Reload all incore data for a filesystem (used after running fsck on
386 * the root filesystem and finding things to fix). The filesystem must
387 * be mounted read-only.
388 *
389 * Things to do to update the mount:
390 *	1) invalidate all cached meta-data.
391 *	2) re-read superblock from disk.
392 *	3) re-read summary information from disk.
393 *	4) invalidate all inactive vnodes.
394 *	5) invalidate all cached file data.
395 *	6) re-read inode data for all active vnodes.
396 */
397static int
398ffs_reload(struct mount *mp, struct thread *td)
399{
400	struct vnode *vp, *nvp, *devvp;
401	struct inode *ip;
402	void *space;
403	struct buf *bp;
404	struct fs *fs, *newfs;
405	struct ufsmount *ump;
406	ufs2_daddr_t sblockloc;
407	int i, blks, size, error;
408	int32_t *lp;
409
410	if ((mp->mnt_flag & MNT_RDONLY) == 0)
411		return (EINVAL);
412	ump = VFSTOUFS(mp);
413	/*
414	 * Step 1: invalidate all cached meta-data.
415	 */
416	devvp = VFSTOUFS(mp)->um_devvp;
417	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
418	if (vinvalbuf(devvp, 0, td, 0, 0) != 0)
419		panic("ffs_reload: dirty1");
420	VOP_CREATEVOBJECT(devvp, td->td_ucred, td);
421	VOP_UNLOCK(devvp, 0, td);
422
423	/*
424	 * Step 2: re-read superblock from disk.
425	 */
426	fs = VFSTOUFS(mp)->um_fs;
427	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
428	    NOCRED, &bp)) != 0)
429		return (error);
430	newfs = (struct fs *)bp->b_data;
431	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
432	     newfs->fs_magic != FS_UFS2_MAGIC) ||
433	    newfs->fs_bsize > MAXBSIZE ||
434	    newfs->fs_bsize < sizeof(struct fs)) {
435			brelse(bp);
436			return (EIO);		/* XXX needs translation */
437	}
438	/*
439	 * Copy pointer fields back into superblock before copying in	XXX
440	 * new superblock. These should really be in the ufsmount.	XXX
441	 * Note that important parameters (eg fs_ncg) are unchanged.
442	 */
443	newfs->fs_csp = fs->fs_csp;
444	newfs->fs_maxcluster = fs->fs_maxcluster;
445	newfs->fs_contigdirs = fs->fs_contigdirs;
446	newfs->fs_active = fs->fs_active;
447	/* The file system is still read-only. */
448	newfs->fs_ronly = 1;
449	sblockloc = fs->fs_sblockloc;
450	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
451	brelse(bp);
452	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
453	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
454	UFS_LOCK(ump);
455	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
456		printf("%s: reload pending error: blocks %jd files %d\n",
457		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
458		    fs->fs_pendinginodes);
459		fs->fs_pendingblocks = 0;
460		fs->fs_pendinginodes = 0;
461	}
462	UFS_UNLOCK(ump);
463
464	/*
465	 * Step 3: re-read summary information from disk.
466	 */
467	blks = howmany(fs->fs_cssize, fs->fs_fsize);
468	space = fs->fs_csp;
469	for (i = 0; i < blks; i += fs->fs_frag) {
470		size = fs->fs_bsize;
471		if (i + fs->fs_frag > blks)
472			size = (blks - i) * fs->fs_fsize;
473		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
474		    NOCRED, &bp);
475		if (error)
476			return (error);
477		bcopy(bp->b_data, space, (u_int)size);
478		space = (char *)space + size;
479		brelse(bp);
480	}
481	/*
482	 * We no longer know anything about clusters per cylinder group.
483	 */
484	if (fs->fs_contigsumsize > 0) {
485		lp = fs->fs_maxcluster;
486		for (i = 0; i < fs->fs_ncg; i++)
487			*lp++ = fs->fs_contigsumsize;
488	}
489
490loop:
491	MNT_ILOCK(mp);
492	MNT_VNODE_FOREACH(vp, mp, nvp) {
493		VI_LOCK(vp);
494		if (vp->v_iflag & VI_XLOCK) {
495			VI_UNLOCK(vp);
496			continue;
497		}
498		MNT_IUNLOCK(mp);
499		/*
500		 * Step 4: invalidate all inactive vnodes.
501		 */
502		if (vp->v_usecount == 0) {
503			vgonel(vp, td);
504			goto loop;
505		}
506		/*
507		 * Step 5: invalidate all cached file data.
508		 */
509		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
510			goto loop;
511		}
512		if (vinvalbuf(vp, 0, td, 0, 0))
513			panic("ffs_reload: dirty2");
514		/*
515		 * Step 6: re-read inode data for all active vnodes.
516		 */
517		ip = VTOI(vp);
518		error =
519		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
520		    (int)fs->fs_bsize, NOCRED, &bp);
521		if (error) {
522			VOP_UNLOCK(vp, 0, td);
523			vrele(vp);
524			return (error);
525		}
526		ffs_load_inode(bp, ip, fs, ip->i_number);
527		ip->i_effnlink = ip->i_nlink;
528		brelse(bp);
529		VOP_UNLOCK(vp, 0, td);
530		vrele(vp);
531		MNT_ILOCK(mp);
532	}
533	MNT_IUNLOCK(mp);
534	return (0);
535}
536
537/*
538 * Possible superblock locations ordered from most to least likely.
539 */
540static int sblock_try[] = SBLOCKSEARCH;
541
542/*
543 * Common code for mount and mountroot
544 */
545static int
546ffs_mountfs(devvp, mp, td)
547	struct vnode *devvp;
548	struct mount *mp;
549	struct thread *td;
550{
551	struct ufsmount *ump;
552	struct buf *bp;
553	struct fs *fs;
554	struct cdev *dev;
555	void *space;
556	ufs2_daddr_t sblockloc;
557	int error, i, blks, size, ronly;
558	int32_t *lp;
559	struct ucred *cred;
560	struct g_consumer *cp;
561
562	dev = devvp->v_rdev;
563	cred = td ? td->td_ucred : NOCRED;
564
565	VOP_CREATEVOBJECT(devvp, td->td_ucred, td);
566	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
567	DROP_GIANT();
568	g_topology_lock();
569	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
570
571	/*
572	 * If we are a root mount, drop the E flag so fsck can do its magic.
573	 * We will pick it up again when we remount R/W.
574	 */
575	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
576		error = g_access(cp, 0, 0, -1);
577	g_topology_unlock();
578	PICKUP_GIANT();
579	VOP_UNLOCK(devvp, 0, td);
580	if (error)
581		return (error);
582	if (devvp->v_rdev->si_iosize_max != 0)
583		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
584	if (mp->mnt_iosize_max > MAXPHYS)
585		mp->mnt_iosize_max = MAXPHYS;
586
587	devvp->v_bufobj.bo_private = cp;
588	devvp->v_bufobj.bo_ops = &ffs_ops;
589
590	bp = NULL;
591	ump = NULL;
592	fs = NULL;
593	sblockloc = 0;
594	/*
595	 * Try reading the superblock in each of its possible locations.
596	 */
597	for (i = 0; sblock_try[i] != -1; i++) {
598		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
599		    cred, &bp)) != 0)
600			goto out;
601		fs = (struct fs *)bp->b_data;
602		sblockloc = sblock_try[i];
603		if ((fs->fs_magic == FS_UFS1_MAGIC ||
604		     (fs->fs_magic == FS_UFS2_MAGIC &&
605		      (fs->fs_sblockloc == sblockloc ||
606		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
607		    fs->fs_bsize <= MAXBSIZE &&
608		    fs->fs_bsize >= sizeof(struct fs))
609			break;
610		brelse(bp);
611		bp = NULL;
612	}
613	if (sblock_try[i] == -1) {
614		error = EINVAL;		/* XXX needs translation */
615		goto out;
616	}
617	fs->fs_fmod = 0;
618	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
619	fs->fs_flags &= ~FS_UNCLEAN;
620	if (fs->fs_clean == 0) {
621		fs->fs_flags |= FS_UNCLEAN;
622		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
623		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
624		     (fs->fs_flags & FS_DOSOFTDEP))) {
625			printf(
626"WARNING: %s was not properly dismounted\n",
627			    fs->fs_fsmnt);
628		} else {
629			printf(
630"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
631			    fs->fs_fsmnt);
632			error = EPERM;
633			goto out;
634		}
635		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
636		    (mp->mnt_flag & MNT_FORCE)) {
637			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
638			    (intmax_t)fs->fs_pendingblocks,
639			    fs->fs_pendinginodes);
640			fs->fs_pendingblocks = 0;
641			fs->fs_pendinginodes = 0;
642		}
643	}
644	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
645		printf("%s: mount pending error: blocks %jd files %d\n",
646		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
647		    fs->fs_pendinginodes);
648		fs->fs_pendingblocks = 0;
649		fs->fs_pendinginodes = 0;
650	}
651	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
652	ump->um_cp = cp;
653	ump->um_bo = &devvp->v_bufobj;
654	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
655	if (fs->fs_magic == FS_UFS1_MAGIC) {
656		ump->um_fstype = UFS1;
657		ump->um_balloc = ffs_balloc_ufs1;
658	} else {
659		ump->um_fstype = UFS2;
660		ump->um_balloc = ffs_balloc_ufs2;
661	}
662	ump->um_blkatoff = ffs_blkatoff;
663	ump->um_truncate = ffs_truncate;
664	ump->um_update = ffs_update;
665	ump->um_valloc = ffs_valloc;
666	ump->um_vfree = ffs_vfree;
667	ump->um_ifree = ffs_ifree;
668	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
669	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
670	if (fs->fs_sbsize < SBLOCKSIZE)
671		bp->b_flags |= B_INVAL | B_NOCACHE;
672	brelse(bp);
673	bp = NULL;
674	fs = ump->um_fs;
675	ffs_oldfscompat_read(fs, ump, sblockloc);
676	fs->fs_ronly = ronly;
677	size = fs->fs_cssize;
678	blks = howmany(size, fs->fs_fsize);
679	if (fs->fs_contigsumsize > 0)
680		size += fs->fs_ncg * sizeof(int32_t);
681	size += fs->fs_ncg * sizeof(u_int8_t);
682	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
683	fs->fs_csp = space;
684	for (i = 0; i < blks; i += fs->fs_frag) {
685		size = fs->fs_bsize;
686		if (i + fs->fs_frag > blks)
687			size = (blks - i) * fs->fs_fsize;
688		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
689		    cred, &bp)) != 0) {
690			free(fs->fs_csp, M_UFSMNT);
691			goto out;
692		}
693		bcopy(bp->b_data, space, (u_int)size);
694		space = (char *)space + size;
695		brelse(bp);
696		bp = NULL;
697	}
698	if (fs->fs_contigsumsize > 0) {
699		fs->fs_maxcluster = lp = space;
700		for (i = 0; i < fs->fs_ncg; i++)
701			*lp++ = fs->fs_contigsumsize;
702		space = lp;
703	}
704	size = fs->fs_ncg * sizeof(u_int8_t);
705	fs->fs_contigdirs = (u_int8_t *)space;
706	bzero(fs->fs_contigdirs, size);
707	fs->fs_active = NULL;
708	mp->mnt_data = (qaddr_t)ump;
709	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
710	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
711	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
712	    vfs_getvfs(&mp->mnt_stat.f_fsid))
713		vfs_getnewfsid(mp);
714	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
715	mp->mnt_flag |= MNT_LOCAL;
716	if ((fs->fs_flags & FS_MULTILABEL) != 0)
717#ifdef MAC
718		mp->mnt_flag |= MNT_MULTILABEL;
719#else
720		printf(
721"WARNING: %s: multilabel flag on fs but no MAC support\n",
722		    fs->fs_fsmnt);
723#endif
724	if ((fs->fs_flags & FS_ACLS) != 0)
725#ifdef UFS_ACL
726		mp->mnt_flag |= MNT_ACLS;
727#else
728		printf(
729"WARNING: %s: ACLs flag on fs but no ACLs support\n",
730		    fs->fs_fsmnt);
731#endif
732	ump->um_mountp = mp;
733	ump->um_dev = dev;
734	ump->um_devvp = devvp;
735	ump->um_nindir = fs->fs_nindir;
736	ump->um_bptrtodb = fs->fs_fsbtodb;
737	ump->um_seqinc = fs->fs_frag;
738	for (i = 0; i < MAXQUOTAS; i++)
739		ump->um_quotas[i] = NULLVP;
740#ifdef UFS_EXTATTR
741	ufs_extattr_uepm_init(&ump->um_extattr);
742#endif
743	/*
744	 * Set FS local "last mounted on" information (NULL pad)
745	 */
746	vfs_mountedfrom(mp, fs->fs_fsmnt);
747
748	if( mp->mnt_flag & MNT_ROOTFS) {
749		/*
750		 * Root mount; update timestamp in mount structure.
751		 * this will be used by the common root mount code
752		 * to update the system clock.
753		 */
754		mp->mnt_time = fs->fs_time;
755	}
756
757	if (ronly == 0) {
758		if ((fs->fs_flags & FS_DOSOFTDEP) &&
759		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
760			free(fs->fs_csp, M_UFSMNT);
761			goto out;
762		}
763		if (fs->fs_snapinum[0] != 0)
764			ffs_snapshot_mount(mp);
765		fs->fs_fmod = 1;
766		fs->fs_clean = 0;
767		(void) ffs_sbupdate(ump, MNT_WAIT);
768	}
769	/*
770	 * Initialize filesystem stat information in mount struct.
771	 */
772#ifdef UFS_EXTATTR
773#ifdef UFS_EXTATTR_AUTOSTART
774	/*
775	 *
776	 * Auto-starting does the following:
777	 *	- check for /.attribute in the fs, and extattr_start if so
778	 *	- for each file in .attribute, enable that file with
779	 * 	  an attribute of the same name.
780	 * Not clear how to report errors -- probably eat them.
781	 * This would all happen while the filesystem was busy/not
782	 * available, so would effectively be "atomic".
783	 */
784	(void) ufs_extattr_autostart(mp, td);
785#endif /* !UFS_EXTATTR_AUTOSTART */
786#endif /* !UFS_EXTATTR */
787#ifndef QUOTA
788	mp->mnt_kern_flag |= MNTK_MPSAFE;
789#endif
790	return (0);
791out:
792	if (bp)
793		brelse(bp);
794	vinvalbuf(devvp, V_SAVE, td, 0, 0);
795	if (cp != NULL) {
796		DROP_GIANT();
797		g_topology_lock();
798		g_wither_geom_close(cp->geom, ENXIO);
799		g_topology_unlock();
800		PICKUP_GIANT();
801	}
802	if (ump) {
803		mtx_destroy(UFS_MTX(ump));
804		free(ump->um_fs, M_UFSMNT);
805		free(ump, M_UFSMNT);
806		mp->mnt_data = (qaddr_t)0;
807	}
808	return (error);
809}
810
811#include <sys/sysctl.h>
812int bigcgs = 0;
813SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
814
815/*
816 * Sanity checks for loading old filesystem superblocks.
817 * See ffs_oldfscompat_write below for unwound actions.
818 *
819 * XXX - Parts get retired eventually.
820 * Unfortunately new bits get added.
821 */
822static void
823ffs_oldfscompat_read(fs, ump, sblockloc)
824	struct fs *fs;
825	struct ufsmount *ump;
826	ufs2_daddr_t sblockloc;
827{
828	off_t maxfilesize;
829
830	/*
831	 * If not yet done, update fs_flags location and value of fs_sblockloc.
832	 */
833	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
834		fs->fs_flags = fs->fs_old_flags;
835		fs->fs_old_flags |= FS_FLAGS_UPDATED;
836		fs->fs_sblockloc = sblockloc;
837	}
838	/*
839	 * If not yet done, update UFS1 superblock with new wider fields.
840	 */
841	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
842		fs->fs_maxbsize = fs->fs_bsize;
843		fs->fs_time = fs->fs_old_time;
844		fs->fs_size = fs->fs_old_size;
845		fs->fs_dsize = fs->fs_old_dsize;
846		fs->fs_csaddr = fs->fs_old_csaddr;
847		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
848		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
849		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
850		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
851	}
852	if (fs->fs_magic == FS_UFS1_MAGIC &&
853	    fs->fs_old_inodefmt < FS_44INODEFMT) {
854		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
855		fs->fs_qbmask = ~fs->fs_bmask;
856		fs->fs_qfmask = ~fs->fs_fmask;
857	}
858	if (fs->fs_magic == FS_UFS1_MAGIC) {
859		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
860		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
861		if (fs->fs_maxfilesize > maxfilesize)
862			fs->fs_maxfilesize = maxfilesize;
863	}
864	/* Compatibility for old filesystems */
865	if (fs->fs_avgfilesize <= 0)
866		fs->fs_avgfilesize = AVFILESIZ;
867	if (fs->fs_avgfpdir <= 0)
868		fs->fs_avgfpdir = AFPDIR;
869	if (bigcgs) {
870		fs->fs_save_cgsize = fs->fs_cgsize;
871		fs->fs_cgsize = fs->fs_bsize;
872	}
873}
874
875/*
876 * Unwinding superblock updates for old filesystems.
877 * See ffs_oldfscompat_read above for details.
878 *
879 * XXX - Parts get retired eventually.
880 * Unfortunately new bits get added.
881 */
882static void
883ffs_oldfscompat_write(fs, ump)
884	struct fs *fs;
885	struct ufsmount *ump;
886{
887
888	/*
889	 * Copy back UFS2 updated fields that UFS1 inspects.
890	 */
891	if (fs->fs_magic == FS_UFS1_MAGIC) {
892		fs->fs_old_time = fs->fs_time;
893		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
894		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
895		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
896		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
897		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
898	}
899	if (bigcgs) {
900		fs->fs_cgsize = fs->fs_save_cgsize;
901		fs->fs_save_cgsize = 0;
902	}
903}
904
905/*
906 * unmount system call
907 */
908int
909ffs_unmount(mp, mntflags, td)
910	struct mount *mp;
911	int mntflags;
912	struct thread *td;
913{
914	struct ufsmount *ump = VFSTOUFS(mp);
915	struct fs *fs;
916	int error, flags;
917
918	flags = 0;
919	if (mntflags & MNT_FORCE) {
920		flags |= FORCECLOSE;
921	}
922#ifdef UFS_EXTATTR
923	if ((error = ufs_extattr_stop(mp, td))) {
924		if (error != EOPNOTSUPP)
925			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
926			    error);
927	} else {
928		ufs_extattr_uepm_destroy(&ump->um_extattr);
929	}
930#endif
931	if (mp->mnt_flag & MNT_SOFTDEP) {
932		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
933			return (error);
934	} else {
935		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
936			return (error);
937	}
938	fs = ump->um_fs;
939	UFS_LOCK(ump);
940	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
941		printf("%s: unmount pending error: blocks %jd files %d\n",
942		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
943		    fs->fs_pendinginodes);
944		fs->fs_pendingblocks = 0;
945		fs->fs_pendinginodes = 0;
946	}
947	UFS_UNLOCK(ump);
948	if (fs->fs_ronly == 0) {
949		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
950		error = ffs_sbupdate(ump, MNT_WAIT);
951		if (error) {
952			fs->fs_clean = 0;
953			return (error);
954		}
955	}
956	vinvalbuf(ump->um_devvp, V_SAVE, td, 0, 0);
957	DROP_GIANT();
958	g_topology_lock();
959	g_wither_geom_close(ump->um_cp->geom, ENXIO);
960	g_topology_unlock();
961	PICKUP_GIANT();
962	vrele(ump->um_devvp);
963	mtx_destroy(UFS_MTX(ump));
964	free(fs->fs_csp, M_UFSMNT);
965	free(fs, M_UFSMNT);
966	free(ump, M_UFSMNT);
967	mp->mnt_data = (qaddr_t)0;
968	mp->mnt_flag &= ~MNT_LOCAL;
969	return (error);
970}
971
972/*
973 * Flush out all the files in a filesystem.
974 */
975int
976ffs_flushfiles(mp, flags, td)
977	struct mount *mp;
978	int flags;
979	struct thread *td;
980{
981	struct ufsmount *ump;
982	int error;
983
984	ump = VFSTOUFS(mp);
985#ifdef QUOTA
986	if (mp->mnt_flag & MNT_QUOTA) {
987		int i;
988		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
989		if (error)
990			return (error);
991		for (i = 0; i < MAXQUOTAS; i++) {
992			if (ump->um_quotas[i] == NULLVP)
993				continue;
994			quotaoff(td, mp, i);
995		}
996		/*
997		 * Here we fall through to vflush again to ensure
998		 * that we have gotten rid of all the system vnodes.
999		 */
1000	}
1001#endif
1002	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1003	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1004		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1005			return (error);
1006		ffs_snapshot_unmount(mp);
1007		/*
1008		 * Here we fall through to vflush again to ensure
1009		 * that we have gotten rid of all the system vnodes.
1010		 */
1011	}
1012        /*
1013	 * Flush all the files.
1014	 */
1015	if ((error = vflush(mp, 0, flags, td)) != 0)
1016		return (error);
1017	/*
1018	 * Flush filesystem metadata.
1019	 */
1020	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1021	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1022	VOP_UNLOCK(ump->um_devvp, 0, td);
1023	return (error);
1024}
1025
1026/*
1027 * Get filesystem statistics.
1028 */
1029int
1030ffs_statfs(mp, sbp, td)
1031	struct mount *mp;
1032	struct statfs *sbp;
1033	struct thread *td;
1034{
1035	struct ufsmount *ump;
1036	struct fs *fs;
1037
1038	ump = VFSTOUFS(mp);
1039	fs = ump->um_fs;
1040	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1041		panic("ffs_statfs");
1042	sbp->f_version = STATFS_VERSION;
1043	sbp->f_bsize = fs->fs_fsize;
1044	sbp->f_iosize = fs->fs_bsize;
1045	sbp->f_blocks = fs->fs_dsize;
1046	UFS_LOCK(ump);
1047	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1048	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1049	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1050	    dbtofsb(fs, fs->fs_pendingblocks);
1051	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1052	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1053	UFS_UNLOCK(ump);
1054	sbp->f_namemax = NAME_MAX;
1055	return (0);
1056}
1057
1058/*
1059 * Go through the disk queues to initiate sandbagged IO;
1060 * go through the inodes to write those that have been modified;
1061 * initiate the writing of the super block if it has been modified.
1062 *
1063 * Note: we are always called with the filesystem marked `MPBUSY'.
1064 */
1065int
1066ffs_sync(mp, waitfor, td)
1067	struct mount *mp;
1068	int waitfor;
1069	struct thread *td;
1070{
1071	struct vnode *nvp, *vp, *devvp;
1072	struct inode *ip;
1073	struct ufsmount *ump = VFSTOUFS(mp);
1074	struct fs *fs;
1075	int error, count, wait, lockreq, allerror = 0;
1076	struct bufobj *bo;
1077
1078	fs = ump->um_fs;
1079	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1080		printf("fs = %s\n", fs->fs_fsmnt);
1081		panic("ffs_sync: rofs mod");
1082	}
1083	/*
1084	 * Write back each (modified) inode.
1085	 */
1086	wait = 0;
1087	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1088	if (waitfor == MNT_WAIT) {
1089		wait = 1;
1090		lockreq = LK_EXCLUSIVE;
1091	}
1092	lockreq |= LK_INTERLOCK;
1093	MNT_ILOCK(mp);
1094loop:
1095	MNT_VNODE_FOREACH(vp, mp, nvp) {
1096		/*
1097		 * Depend on the mntvnode_slock to keep things stable enough
1098		 * for a quick test.  Since there might be hundreds of
1099		 * thousands of vnodes, we cannot afford even a subroutine
1100		 * call unless there's a good chance that we have work to do.
1101		 */
1102		VI_LOCK(vp);
1103		if (vp->v_iflag & VI_XLOCK) {
1104			VI_UNLOCK(vp);
1105			continue;
1106		}
1107		ip = VTOI(vp);
1108		if (vp->v_type == VNON || ((ip->i_flag &
1109		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1110		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1111			VI_UNLOCK(vp);
1112			continue;
1113		}
1114		MNT_IUNLOCK(mp);
1115		if ((error = vget(vp, lockreq, td)) != 0) {
1116			MNT_ILOCK(mp);
1117			if (error == ENOENT)
1118				goto loop;
1119			continue;
1120		}
1121		if ((error = VOP_FSYNC(vp, waitfor, td)) != 0)
1122			allerror = error;
1123		VOP_UNLOCK(vp, 0, td);
1124		vrele(vp);
1125		MNT_ILOCK(mp);
1126	}
1127	MNT_IUNLOCK(mp);
1128	/*
1129	 * Force stale filesystem control information to be flushed.
1130	 */
1131	if (waitfor == MNT_WAIT) {
1132		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1133			allerror = error;
1134		/* Flushed work items may create new vnodes to clean */
1135		if (allerror == 0 && count) {
1136			MNT_ILOCK(mp);
1137			goto loop;
1138		}
1139	}
1140#ifdef QUOTA
1141	qsync(mp);
1142#endif
1143	devvp = ump->um_devvp;
1144	VI_LOCK(devvp);
1145	bo = &devvp->v_bufobj;
1146	if (waitfor != MNT_LAZY &&
1147	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1148		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1149		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1150			allerror = error;
1151		VOP_UNLOCK(devvp, 0, td);
1152		if (allerror == 0 && waitfor == MNT_WAIT) {
1153			MNT_ILOCK(mp);
1154			goto loop;
1155		}
1156	} else
1157		VI_UNLOCK(devvp);
1158	/*
1159	 * Write back modified superblock.
1160	 */
1161	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1162		allerror = error;
1163	return (allerror);
1164}
1165
1166int
1167ffs_vget(mp, ino, flags, vpp)
1168	struct mount *mp;
1169	ino_t ino;
1170	int flags;
1171	struct vnode **vpp;
1172{
1173	struct thread *td = curthread; 		/* XXX */
1174	struct fs *fs;
1175	struct inode *ip;
1176	struct ufsmount *ump;
1177	struct buf *bp;
1178	struct vnode *vp;
1179	struct cdev *dev;
1180	int error;
1181
1182	ump = VFSTOUFS(mp);
1183	dev = ump->um_dev;
1184
1185	/*
1186	 * We do not lock vnode creation as it is believed to be too
1187	 * expensive for such rare case as simultaneous creation of vnode
1188	 * for same ino by different processes. We just allow them to race
1189	 * and check later to decide who wins. Let the race begin!
1190	 */
1191	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1192		return (error);
1193	if (*vpp != NULL)
1194		return (0);
1195
1196	/*
1197	 * If this MALLOC() is performed after the getnewvnode()
1198	 * it might block, leaving a vnode with a NULL v_data to be
1199	 * found by ffs_sync() if a sync happens to fire right then,
1200	 * which will cause a panic because ffs_sync() blindly
1201	 * dereferences vp->v_data (as well it should).
1202	 */
1203	ip = uma_zalloc(uma_inode, M_WAITOK);
1204
1205	/* Allocate a new vnode/inode. */
1206	error = getnewvnode("ufs", mp, &ffs_vnodeops, &vp);
1207	if (error) {
1208		*vpp = NULL;
1209		uma_zfree(uma_inode, ip);
1210		return (error);
1211	}
1212	bzero((caddr_t)ip, sizeof(struct inode));
1213	/*
1214	 * FFS supports recursive locking.
1215	 */
1216	fs = ump->um_fs;
1217	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1218	vp->v_data = ip;
1219	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1220	ip->i_vnode = vp;
1221	ip->i_ump = ump;
1222	ip->i_fs = fs;
1223	ip->i_dev = dev;
1224	ip->i_number = ino;
1225#ifdef QUOTA
1226	{
1227		int i;
1228		for (i = 0; i < MAXQUOTAS; i++)
1229			ip->i_dquot[i] = NODQUOT;
1230	}
1231#endif
1232	/*
1233	 * Exclusively lock the vnode before adding to hash. Note, that we
1234	 * must not release nor downgrade the lock (despite flags argument
1235	 * says) till it is fully initialized.
1236	 */
1237	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1238
1239	/*
1240	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1241	 * duplicate of vnode being created and add it to the hash. If a
1242	 * duplicate vnode was found, it will be vget()ed from hash for us.
1243	 */
1244	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1245		vput(vp);
1246		*vpp = NULL;
1247		return (error);
1248	}
1249
1250	/* We lost the race, then throw away our vnode and return existing */
1251	if (*vpp != NULL) {
1252		vput(vp);
1253		return (0);
1254	}
1255
1256	/* Read in the disk contents for the inode, copy into the inode. */
1257	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1258	    (int)fs->fs_bsize, NOCRED, &bp);
1259	if (error) {
1260		/*
1261		 * The inode does not contain anything useful, so it would
1262		 * be misleading to leave it on its hash chain. With mode
1263		 * still zero, it will be unlinked and returned to the free
1264		 * list by vput().
1265		 */
1266		brelse(bp);
1267		vput(vp);
1268		*vpp = NULL;
1269		return (error);
1270	}
1271	if (ip->i_ump->um_fstype == UFS1)
1272		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1273	else
1274		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1275	ffs_load_inode(bp, ip, fs, ino);
1276	if (DOINGSOFTDEP(vp))
1277		softdep_load_inodeblock(ip);
1278	else
1279		ip->i_effnlink = ip->i_nlink;
1280	bqrelse(bp);
1281
1282	/*
1283	 * Initialize the vnode from the inode, check for aliases.
1284	 * Note that the underlying vnode may have changed.
1285	 */
1286	error = ufs_vinit(mp, &ffs_fifoops, &vp);
1287	if (error) {
1288		vput(vp);
1289		*vpp = NULL;
1290		return (error);
1291	}
1292	/*
1293	 * Finish inode initialization.
1294	 */
1295	VREF(ip->i_devvp);
1296	/*
1297	 * Set up a generation number for this inode if it does not
1298	 * already have one. This should only happen on old filesystems.
1299	 */
1300	if (ip->i_gen == 0) {
1301		ip->i_gen = arc4random() / 2 + 1;
1302		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1303			ip->i_flag |= IN_MODIFIED;
1304			DIP_SET(ip, i_gen, ip->i_gen);
1305		}
1306	}
1307	/*
1308	 * Ensure that uid and gid are correct. This is a temporary
1309	 * fix until fsck has been changed to do the update.
1310	 */
1311	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1312	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1313		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1314		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1315	}						/* XXX */
1316
1317#ifdef MAC
1318	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1319		/*
1320		 * If this vnode is already allocated, and we're running
1321		 * multi-label, attempt to perform a label association
1322		 * from the extended attributes on the inode.
1323		 */
1324		error = mac_associate_vnode_extattr(mp, vp);
1325		if (error) {
1326			/* ufs_inactive will release ip->i_devvp ref. */
1327			vput(vp);
1328			*vpp = NULL;
1329			return (error);
1330		}
1331	}
1332#endif
1333
1334	*vpp = vp;
1335	return (0);
1336}
1337
1338/*
1339 * File handle to vnode
1340 *
1341 * Have to be really careful about stale file handles:
1342 * - check that the inode number is valid
1343 * - call ffs_vget() to get the locked inode
1344 * - check for an unallocated inode (i_mode == 0)
1345 * - check that the given client host has export rights and return
1346 *   those rights via. exflagsp and credanonp
1347 */
1348int
1349ffs_fhtovp(mp, fhp, vpp)
1350	struct mount *mp;
1351	struct fid *fhp;
1352	struct vnode **vpp;
1353{
1354	struct ufid *ufhp;
1355	struct fs *fs;
1356
1357	ufhp = (struct ufid *)fhp;
1358	fs = VFSTOUFS(mp)->um_fs;
1359	if (ufhp->ufid_ino < ROOTINO ||
1360	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1361		return (ESTALE);
1362	return (ufs_fhtovp(mp, ufhp, vpp));
1363}
1364
1365/*
1366 * Vnode pointer to File handle
1367 */
1368/* ARGSUSED */
1369int
1370ffs_vptofh(vp, fhp)
1371	struct vnode *vp;
1372	struct fid *fhp;
1373{
1374	struct inode *ip;
1375	struct ufid *ufhp;
1376
1377	ip = VTOI(vp);
1378	ufhp = (struct ufid *)fhp;
1379	ufhp->ufid_len = sizeof(struct ufid);
1380	ufhp->ufid_ino = ip->i_number;
1381	ufhp->ufid_gen = ip->i_gen;
1382	return (0);
1383}
1384
1385/*
1386 * Initialize the filesystem.
1387 */
1388static int
1389ffs_init(vfsp)
1390	struct vfsconf *vfsp;
1391{
1392
1393	softdep_initialize();
1394	return (ufs_init(vfsp));
1395}
1396
1397/*
1398 * Undo the work of ffs_init().
1399 */
1400static int
1401ffs_uninit(vfsp)
1402	struct vfsconf *vfsp;
1403{
1404	int ret;
1405
1406	ret = ufs_uninit(vfsp);
1407	softdep_uninitialize();
1408	return (ret);
1409}
1410
1411/*
1412 * Write a superblock and associated information back to disk.
1413 */
1414static int
1415ffs_sbupdate(mp, waitfor)
1416	struct ufsmount *mp;
1417	int waitfor;
1418{
1419	struct fs *fs = mp->um_fs;
1420	struct buf *sbbp;
1421	struct buf *bp;
1422	int blks;
1423	void *space;
1424	int i, size, error, allerror = 0;
1425
1426	if (fs->fs_ronly == 1 &&
1427	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1428	    (MNT_RDONLY | MNT_UPDATE))
1429		panic("ffs_sbupdate: write read-only filesystem");
1430	/*
1431	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1432	 */
1433	sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1434	    0, 0, 0);
1435	/*
1436	 * First write back the summary information.
1437	 */
1438	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1439	space = fs->fs_csp;
1440	for (i = 0; i < blks; i += fs->fs_frag) {
1441		size = fs->fs_bsize;
1442		if (i + fs->fs_frag > blks)
1443			size = (blks - i) * fs->fs_fsize;
1444		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1445		    size, 0, 0, 0);
1446		bcopy(space, bp->b_data, (u_int)size);
1447		space = (char *)space + size;
1448		if (waitfor != MNT_WAIT)
1449			bawrite(bp);
1450		else if ((error = bwrite(bp)) != 0)
1451			allerror = error;
1452	}
1453	/*
1454	 * Now write back the superblock itself. If any errors occurred
1455	 * up to this point, then fail so that the superblock avoids
1456	 * being written out as clean.
1457	 */
1458	if (allerror) {
1459		brelse(sbbp);
1460		return (allerror);
1461	}
1462	bp = sbbp;
1463	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1464	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1465		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1466		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1467		fs->fs_sblockloc = SBLOCK_UFS1;
1468	}
1469	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1470	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1471		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1472		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1473		fs->fs_sblockloc = SBLOCK_UFS2;
1474	}
1475	fs->fs_fmod = 0;
1476	fs->fs_time = time_second;
1477	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1478	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1479	if (waitfor != MNT_WAIT)
1480		bawrite(bp);
1481	else if ((error = bwrite(bp)) != 0)
1482		allerror = error;
1483	return (allerror);
1484}
1485
1486static int
1487ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1488	int attrnamespace, const char *attrname, struct thread *td)
1489{
1490
1491#ifdef UFS_EXTATTR
1492	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1493	    attrname, td));
1494#else
1495	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1496	    attrname, td));
1497#endif
1498}
1499
1500static void
1501ffs_ifree(struct ufsmount *ump, struct inode *ip)
1502{
1503
1504	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1505		uma_zfree(uma_ufs1, ip->i_din1);
1506	else if (ip->i_din2 != NULL)
1507		uma_zfree(uma_ufs2, ip->i_din2);
1508	uma_zfree(uma_inode, ip);
1509}
1510
1511static void
1512ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1513{
1514
1515#ifdef SOFTUPDATES
1516	if (bp->b_iocmd == BIO_WRITE && softdep_disk_prewrite(bp))
1517		return;
1518#endif
1519	g_vfs_strategy(bo, bp);
1520}
1521