ffs_vfsops.c revision 138290
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 138290 2004-12-01 23:16:38Z phk $");
34
35#include "opt_mac.h"
36#include "opt_quota.h"
37#include "opt_ufs.h"
38#include "opt_ffs.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/proc.h>
44#include <sys/kernel.h>
45#include <sys/mac.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/disk.h>
53#include <sys/malloc.h>
54#include <sys/mutex.h>
55
56#include <ufs/ufs/extattr.h>
57#include <ufs/ufs/quota.h>
58#include <ufs/ufs/ufsmount.h>
59#include <ufs/ufs/inode.h>
60#include <ufs/ufs/ufs_extern.h>
61
62#include <ufs/ffs/fs.h>
63#include <ufs/ffs/ffs_extern.h>
64
65#include <vm/vm.h>
66#include <vm/uma.h>
67#include <vm/vm_page.h>
68
69#include <geom/geom.h>
70#include <geom/geom_vfs.h>
71
72uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
73
74static int	ffs_sbupdate(struct ufsmount *, int);
75static int	ffs_reload(struct mount *, struct thread *);
76static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
77static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
78		    ufs2_daddr_t);
79static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
80static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
81static vfs_init_t ffs_init;
82static vfs_uninit_t ffs_uninit;
83static vfs_extattrctl_t ffs_extattrctl;
84static vfs_omount_t ffs_omount;
85
86static struct vfsops ufs_vfsops = {
87	.vfs_extattrctl =	ffs_extattrctl,
88	.vfs_fhtovp =		ffs_fhtovp,
89	.vfs_init =		ffs_init,
90	.vfs_omount =		ffs_omount,
91	.vfs_quotactl =		ufs_quotactl,
92	.vfs_root =		ufs_root,
93	.vfs_statfs =		ffs_statfs,
94	.vfs_sync =		ffs_sync,
95	.vfs_uninit =		ffs_uninit,
96	.vfs_unmount =		ffs_unmount,
97	.vfs_vget =		ffs_vget,
98	.vfs_vptofh =		ffs_vptofh,
99};
100
101VFS_SET(ufs_vfsops, ufs, 0);
102
103static b_strategy_t ffs_geom_strategy;
104
105static struct buf_ops ffs_ops = {
106	.bop_name =	"FFS",
107	.bop_write =	bufwrite,
108	.bop_strategy =	ffs_geom_strategy,
109};
110
111/*
112 * ffs_omount
113 *
114 * Called when mounting local physical media
115 *
116 * PARAMETERS:
117 *		mountroot
118 *			mp	mount point structure
119 *			path	path to mount point
120 *			data	<unused>
121 *			ndp	<unused>
122 *			p	process (user credentials check [statfs])
123 *
124 *		mount
125 *			mp	mount point structure
126 *			path	path to mount point
127 *			data	pointer to argument struct in user space
128 *			ndp	mount point namei() return (used for
129 *				credentials on reload), reused to look
130 *				up block device.
131 *			p	process (user credentials check)
132 *
133 * RETURNS:	0	Success
134 *		!0	error number (errno.h)
135 *
136 * LOCK STATE:
137 *
138 *		ENTRY
139 *			mount point is locked
140 *		EXIT
141 *			mount point is locked
142 *
143 * NOTES:
144 *		A NULL path can be used for a flag since the mount
145 *		system call will fail with EFAULT in copyinstr in
146 *		namei() if it is a genuine NULL from the user.
147 */
148static int
149ffs_omount(struct mount *mp, char *path, caddr_t data, struct thread *td)
150{
151	size_t size;
152	struct vnode *devvp, *rootvp;
153	struct ufs_args args;
154	struct ufsmount *ump = 0;
155	struct fs *fs;
156	int error, flags;
157	mode_t accessmode;
158	struct nameidata ndp;
159
160	if (uma_inode == NULL) {
161		uma_inode = uma_zcreate("FFS inode",
162		    sizeof(struct inode), NULL, NULL, NULL, NULL,
163		    UMA_ALIGN_PTR, 0);
164		uma_ufs1 = uma_zcreate("FFS1 dinode",
165		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
166		    UMA_ALIGN_PTR, 0);
167		uma_ufs2 = uma_zcreate("FFS2 dinode",
168		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
169		    UMA_ALIGN_PTR, 0);
170	}
171	if ((mp->mnt_flag & MNT_ROOTFS) && mp->mnt_data == NULL) {
172		if ((error = bdevvp(rootdev, &rootvp))) {
173			printf("ffs_mountroot: can't find rootvp\n");
174			return (error);
175		}
176
177		if ((error = ffs_mountfs(rootvp, mp, td)) != 0)
178			return (error);
179		return (0);
180	}
181
182	/*
183	 * Get mount options, if any.
184	 */
185	if (data != NULL) {
186		error = copyin(data, (caddr_t)&args, sizeof args);
187		if (error)
188			return (error);
189	} else {
190		memset(&args, 0, sizeof args);
191	}
192
193	/*
194	 * If updating, check whether changing from read-only to
195	 * read/write; if there is no device name, that's all we do.
196	 */
197	if (mp->mnt_flag & MNT_UPDATE) {
198		ump = VFSTOUFS(mp);
199		fs = ump->um_fs;
200		devvp = ump->um_devvp;
201		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
202			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
203				return (error);
204			/*
205			 * Flush any dirty data.
206			 */
207			if ((error = VFS_SYNC(mp, MNT_WAIT,
208			    td->td_ucred, td)) != 0) {
209				vn_finished_write(mp);
210				return (error);
211			}
212			/*
213			 * Check for and optionally get rid of files open
214			 * for writing.
215			 */
216			flags = WRITECLOSE;
217			if (mp->mnt_flag & MNT_FORCE)
218				flags |= FORCECLOSE;
219			if (mp->mnt_flag & MNT_SOFTDEP) {
220				error = softdep_flushfiles(mp, flags, td);
221			} else {
222				error = ffs_flushfiles(mp, flags, td);
223			}
224			if (error) {
225				vn_finished_write(mp);
226				return (error);
227			}
228			if (fs->fs_pendingblocks != 0 ||
229			    fs->fs_pendinginodes != 0) {
230				printf("%s: %s: blocks %jd files %d\n",
231				    fs->fs_fsmnt, "update error",
232				    (intmax_t)fs->fs_pendingblocks,
233				    fs->fs_pendinginodes);
234				fs->fs_pendingblocks = 0;
235				fs->fs_pendinginodes = 0;
236			}
237			fs->fs_ronly = 1;
238			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
239				fs->fs_clean = 1;
240			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
241				fs->fs_ronly = 0;
242				fs->fs_clean = 0;
243				vn_finished_write(mp);
244				return (error);
245			}
246			vn_finished_write(mp);
247			DROP_GIANT();
248			g_topology_lock();
249			g_access(ump->um_cp, 0, -1, 0);
250			g_topology_unlock();
251			PICKUP_GIANT();
252		}
253		if ((mp->mnt_flag & MNT_RELOAD) &&
254		    (error = ffs_reload(mp, td)) != 0)
255			return (error);
256		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
257			/*
258			 * If upgrade to read-write by non-root, then verify
259			 * that user has necessary permissions on the device.
260			 */
261			if (suser(td)) {
262				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
263				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
264				    td->td_ucred, td)) != 0) {
265					VOP_UNLOCK(devvp, 0, td);
266					return (error);
267				}
268				VOP_UNLOCK(devvp, 0, td);
269			}
270			fs->fs_flags &= ~FS_UNCLEAN;
271			if (fs->fs_clean == 0) {
272				fs->fs_flags |= FS_UNCLEAN;
273				if ((mp->mnt_flag & MNT_FORCE) ||
274				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
275				     (fs->fs_flags & FS_DOSOFTDEP))) {
276					printf("WARNING: %s was not %s\n",
277					   fs->fs_fsmnt, "properly dismounted");
278				} else {
279					printf(
280"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
281					    fs->fs_fsmnt);
282					return (EPERM);
283				}
284			}
285			DROP_GIANT();
286			g_topology_lock();
287			/*
288			 * If we're the root device, we may not have an E count
289			 * yet, get it now.
290			 */
291			if (ump->um_cp->ace == 0)
292				error = g_access(ump->um_cp, 0, 1, 1);
293			else
294				error = g_access(ump->um_cp, 0, 1, 0);
295			g_topology_unlock();
296			PICKUP_GIANT();
297			if (error)
298				return (error);
299			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
300				return (error);
301			fs->fs_ronly = 0;
302			fs->fs_clean = 0;
303			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
304				vn_finished_write(mp);
305				return (error);
306			}
307			/* check to see if we need to start softdep */
308			if ((fs->fs_flags & FS_DOSOFTDEP) &&
309			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
310				vn_finished_write(mp);
311				return (error);
312			}
313			if (fs->fs_snapinum[0] != 0)
314				ffs_snapshot_mount(mp);
315			vn_finished_write(mp);
316		}
317		/*
318		 * Soft updates is incompatible with "async",
319		 * so if we are doing softupdates stop the user
320		 * from setting the async flag in an update.
321		 * Softdep_mount() clears it in an initial mount
322		 * or ro->rw remount.
323		 */
324		if (mp->mnt_flag & MNT_SOFTDEP)
325			mp->mnt_flag &= ~MNT_ASYNC;
326		/*
327		 * If not updating name, process export requests.
328		 */
329		if (args.fspec == 0)
330			return (vfs_export(mp, &args.export));
331		/*
332		 * If this is a snapshot request, take the snapshot.
333		 */
334		if (mp->mnt_flag & MNT_SNAPSHOT)
335			return (ffs_snapshot(mp, args.fspec));
336	}
337
338	/*
339	 * Not an update, or updating the name: look up the name
340	 * and verify that it refers to a sensible disk device.
341	 */
342	NDINIT(&ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
343	if ((error = namei(&ndp)) != 0)
344		return (error);
345	NDFREE(&ndp, NDF_ONLY_PNBUF);
346	devvp = ndp.ni_vp;
347	if (!vn_isdisk(devvp, &error)) {
348		vrele(devvp);
349		return (error);
350	}
351
352	/*
353	 * If mount by non-root, then verify that user has necessary
354	 * permissions on the device.
355	 */
356	if (suser(td)) {
357		accessmode = VREAD;
358		if ((mp->mnt_flag & MNT_RDONLY) == 0)
359			accessmode |= VWRITE;
360		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
361		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
362			vput(devvp);
363			return (error);
364		}
365		VOP_UNLOCK(devvp, 0, td);
366	}
367
368	if (mp->mnt_flag & MNT_UPDATE) {
369		/*
370		 * Update only
371		 *
372		 * If it's not the same vnode, or at least the same device
373		 * then it's not correct.
374		 */
375
376		if (devvp->v_rdev != ump->um_devvp->v_rdev)
377			error = EINVAL;	/* needs translation */
378		vrele(devvp);
379		if (error)
380			return (error);
381	} else {
382		/*
383		 * New mount
384		 *
385		 * We need the name for the mount point (also used for
386		 * "last mounted on") copied in. If an error occurs,
387		 * the mount point is discarded by the upper level code.
388		 * Note that vfs_mount() populates f_mntonname for us.
389		 */
390		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
391			vrele(devvp);
392			return (error);
393		}
394	}
395	/*
396	 * Save "mounted from" device name info for mount point (NULL pad).
397	 */
398	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
399	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
400	return (0);
401}
402
403/*
404 * Reload all incore data for a filesystem (used after running fsck on
405 * the root filesystem and finding things to fix). The filesystem must
406 * be mounted read-only.
407 *
408 * Things to do to update the mount:
409 *	1) invalidate all cached meta-data.
410 *	2) re-read superblock from disk.
411 *	3) re-read summary information from disk.
412 *	4) invalidate all inactive vnodes.
413 *	5) invalidate all cached file data.
414 *	6) re-read inode data for all active vnodes.
415 */
416static int
417ffs_reload(struct mount *mp, struct thread *td)
418{
419	struct vnode *vp, *nvp, *devvp;
420	struct inode *ip;
421	void *space;
422	struct buf *bp;
423	struct fs *fs, *newfs;
424	ufs2_daddr_t sblockloc;
425	int i, blks, size, error;
426	int32_t *lp;
427
428	if ((mp->mnt_flag & MNT_RDONLY) == 0)
429		return (EINVAL);
430	/*
431	 * Step 1: invalidate all cached meta-data.
432	 */
433	devvp = VFSTOUFS(mp)->um_devvp;
434	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
435	if (vinvalbuf(devvp, 0, td->td_ucred, td, 0, 0) != 0)
436		panic("ffs_reload: dirty1");
437	vfs_object_create(devvp, td, td->td_ucred);
438	VOP_UNLOCK(devvp, 0, td);
439
440	/*
441	 * Step 2: re-read superblock from disk.
442	 */
443	fs = VFSTOUFS(mp)->um_fs;
444	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
445	    NOCRED, &bp)) != 0)
446		return (error);
447	newfs = (struct fs *)bp->b_data;
448	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
449	     newfs->fs_magic != FS_UFS2_MAGIC) ||
450	    newfs->fs_bsize > MAXBSIZE ||
451	    newfs->fs_bsize < sizeof(struct fs)) {
452			brelse(bp);
453			return (EIO);		/* XXX needs translation */
454	}
455	/*
456	 * Copy pointer fields back into superblock before copying in	XXX
457	 * new superblock. These should really be in the ufsmount.	XXX
458	 * Note that important parameters (eg fs_ncg) are unchanged.
459	 */
460	newfs->fs_csp = fs->fs_csp;
461	newfs->fs_maxcluster = fs->fs_maxcluster;
462	newfs->fs_contigdirs = fs->fs_contigdirs;
463	newfs->fs_active = fs->fs_active;
464	/* The file system is still read-only. */
465	newfs->fs_ronly = 1;
466	sblockloc = fs->fs_sblockloc;
467	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
468	brelse(bp);
469	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
470	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
471	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
472		printf("%s: reload pending error: blocks %jd files %d\n",
473		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
474		    fs->fs_pendinginodes);
475		fs->fs_pendingblocks = 0;
476		fs->fs_pendinginodes = 0;
477	}
478
479	/*
480	 * Step 3: re-read summary information from disk.
481	 */
482	blks = howmany(fs->fs_cssize, fs->fs_fsize);
483	space = fs->fs_csp;
484	for (i = 0; i < blks; i += fs->fs_frag) {
485		size = fs->fs_bsize;
486		if (i + fs->fs_frag > blks)
487			size = (blks - i) * fs->fs_fsize;
488		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
489		    NOCRED, &bp);
490		if (error)
491			return (error);
492		bcopy(bp->b_data, space, (u_int)size);
493		space = (char *)space + size;
494		brelse(bp);
495	}
496	/*
497	 * We no longer know anything about clusters per cylinder group.
498	 */
499	if (fs->fs_contigsumsize > 0) {
500		lp = fs->fs_maxcluster;
501		for (i = 0; i < fs->fs_ncg; i++)
502			*lp++ = fs->fs_contigsumsize;
503	}
504
505loop:
506	MNT_ILOCK(mp);
507	MNT_VNODE_FOREACH(vp, mp, nvp) {
508		VI_LOCK(vp);
509		if (vp->v_iflag & VI_XLOCK) {
510			VI_UNLOCK(vp);
511			continue;
512		}
513		MNT_IUNLOCK(mp);
514		/*
515		 * Step 4: invalidate all inactive vnodes.
516		 */
517		if (vp->v_usecount == 0) {
518			vgonel(vp, td);
519			goto loop;
520		}
521		/*
522		 * Step 5: invalidate all cached file data.
523		 */
524		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
525			goto loop;
526		}
527		if (vinvalbuf(vp, 0, td->td_ucred, td, 0, 0))
528			panic("ffs_reload: dirty2");
529		/*
530		 * Step 6: re-read inode data for all active vnodes.
531		 */
532		ip = VTOI(vp);
533		error =
534		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
535		    (int)fs->fs_bsize, NOCRED, &bp);
536		if (error) {
537			VOP_UNLOCK(vp, 0, td);
538			vrele(vp);
539			return (error);
540		}
541		ffs_load_inode(bp, ip, fs, ip->i_number);
542		ip->i_effnlink = ip->i_nlink;
543		brelse(bp);
544		VOP_UNLOCK(vp, 0, td);
545		vrele(vp);
546		MNT_ILOCK(mp);
547	}
548	MNT_IUNLOCK(mp);
549	return (0);
550}
551
552/*
553 * Possible superblock locations ordered from most to least likely.
554 */
555static int sblock_try[] = SBLOCKSEARCH;
556
557/*
558 * Common code for mount and mountroot
559 */
560static int
561ffs_mountfs(devvp, mp, td)
562	struct vnode *devvp;
563	struct mount *mp;
564	struct thread *td;
565{
566	struct ufsmount *ump;
567	struct buf *bp;
568	struct fs *fs;
569	struct cdev *dev;
570	void *space;
571	ufs2_daddr_t sblockloc;
572	int error, i, blks, size, ronly;
573	int32_t *lp;
574	struct ucred *cred;
575	size_t strsize;
576	struct g_consumer *cp;
577
578	dev = devvp->v_rdev;
579	cred = td ? td->td_ucred : NOCRED;
580
581	vfs_object_create(devvp, td, td->td_ucred);
582	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
583#if 0
584	/*
585	 * XXX: check filesystem permissions, they may be more strict
586	 * XXX: than what geom enforces.
587	 * XXX: But since we're root, they wouldn't matter, would they ?
588	 */
589	error = VOP_ACCESS(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, td);
590	if (error) {
591		VOP_UNLOCK(devvp, 0, td);
592		return (error);
593	}
594#endif
595	DROP_GIANT();
596	g_topology_lock();
597	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
598
599	/*
600	 * If we are a root mount, drop the E flag so fsck can do its magic.
601	 * We will pick it up again when we remounte R/W.
602	 */
603	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
604		error = g_access(cp, 0, 0, -1);
605	g_topology_unlock();
606	PICKUP_GIANT();
607	VOP_UNLOCK(devvp, 0, td);
608	if (error)
609		return (error);
610	if (devvp->v_rdev->si_iosize_max != 0)
611		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
612	if (mp->mnt_iosize_max > MAXPHYS)
613		mp->mnt_iosize_max = MAXPHYS;
614
615	devvp->v_bufobj.bo_private = cp;
616	devvp->v_bufobj.bo_ops = &ffs_ops;
617
618	bp = NULL;
619	ump = NULL;
620	fs = NULL;
621	sblockloc = 0;
622	/*
623	 * Try reading the superblock in each of its possible locations.
624	 */
625	for (i = 0; sblock_try[i] != -1; i++) {
626		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
627		    cred, &bp)) != 0)
628			goto out;
629		fs = (struct fs *)bp->b_data;
630		sblockloc = sblock_try[i];
631		if ((fs->fs_magic == FS_UFS1_MAGIC ||
632		     (fs->fs_magic == FS_UFS2_MAGIC &&
633		      (fs->fs_sblockloc == sblockloc ||
634		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
635		    fs->fs_bsize <= MAXBSIZE &&
636		    fs->fs_bsize >= sizeof(struct fs))
637			break;
638		brelse(bp);
639		bp = NULL;
640	}
641	if (sblock_try[i] == -1) {
642		error = EINVAL;		/* XXX needs translation */
643		goto out;
644	}
645	fs->fs_fmod = 0;
646	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
647	fs->fs_flags &= ~FS_UNCLEAN;
648	if (fs->fs_clean == 0) {
649		fs->fs_flags |= FS_UNCLEAN;
650		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
651		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
652		     (fs->fs_flags & FS_DOSOFTDEP))) {
653			printf(
654"WARNING: %s was not properly dismounted\n",
655			    fs->fs_fsmnt);
656		} else {
657			printf(
658"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
659			    fs->fs_fsmnt);
660			error = EPERM;
661			goto out;
662		}
663		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
664		    (mp->mnt_flag & MNT_FORCE)) {
665			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
666			    (intmax_t)fs->fs_pendingblocks,
667			    fs->fs_pendinginodes);
668			fs->fs_pendingblocks = 0;
669			fs->fs_pendinginodes = 0;
670		}
671	}
672	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
673		printf("%s: mount pending error: blocks %jd files %d\n",
674		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
675		    fs->fs_pendinginodes);
676		fs->fs_pendingblocks = 0;
677		fs->fs_pendinginodes = 0;
678	}
679	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
680	ump->um_cp = cp;
681	ump->um_bo = &devvp->v_bufobj;
682	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
683	if (fs->fs_magic == FS_UFS1_MAGIC) {
684		ump->um_fstype = UFS1;
685		ump->um_balloc = ffs_balloc_ufs1;
686	} else {
687		ump->um_fstype = UFS2;
688		ump->um_balloc = ffs_balloc_ufs2;
689	}
690	ump->um_blkatoff = ffs_blkatoff;
691	ump->um_truncate = ffs_truncate;
692	ump->um_update = ffs_update;
693	ump->um_valloc = ffs_valloc;
694	ump->um_vfree = ffs_vfree;
695	ump->um_ifree = ffs_ifree;
696	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
697	if (fs->fs_sbsize < SBLOCKSIZE)
698		bp->b_flags |= B_INVAL | B_NOCACHE;
699	brelse(bp);
700	bp = NULL;
701	fs = ump->um_fs;
702	ffs_oldfscompat_read(fs, ump, sblockloc);
703	fs->fs_ronly = ronly;
704	size = fs->fs_cssize;
705	blks = howmany(size, fs->fs_fsize);
706	if (fs->fs_contigsumsize > 0)
707		size += fs->fs_ncg * sizeof(int32_t);
708	size += fs->fs_ncg * sizeof(u_int8_t);
709	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
710	fs->fs_csp = space;
711	for (i = 0; i < blks; i += fs->fs_frag) {
712		size = fs->fs_bsize;
713		if (i + fs->fs_frag > blks)
714			size = (blks - i) * fs->fs_fsize;
715		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
716		    cred, &bp)) != 0) {
717			free(fs->fs_csp, M_UFSMNT);
718			goto out;
719		}
720		bcopy(bp->b_data, space, (u_int)size);
721		space = (char *)space + size;
722		brelse(bp);
723		bp = NULL;
724	}
725	if (fs->fs_contigsumsize > 0) {
726		fs->fs_maxcluster = lp = space;
727		for (i = 0; i < fs->fs_ncg; i++)
728			*lp++ = fs->fs_contigsumsize;
729		space = lp;
730	}
731	size = fs->fs_ncg * sizeof(u_int8_t);
732	fs->fs_contigdirs = (u_int8_t *)space;
733	bzero(fs->fs_contigdirs, size);
734	fs->fs_active = NULL;
735	mp->mnt_data = (qaddr_t)ump;
736	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
737	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
738	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
739	    vfs_getvfs(&mp->mnt_stat.f_fsid))
740		vfs_getnewfsid(mp);
741	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
742	mp->mnt_flag |= MNT_LOCAL;
743	if ((fs->fs_flags & FS_MULTILABEL) != 0)
744#ifdef MAC
745		mp->mnt_flag |= MNT_MULTILABEL;
746#else
747		printf(
748"WARNING: %s: multilabel flag on fs but no MAC support\n",
749		    fs->fs_fsmnt);
750#endif
751	if ((fs->fs_flags & FS_ACLS) != 0)
752#ifdef UFS_ACL
753		mp->mnt_flag |= MNT_ACLS;
754#else
755		printf(
756"WARNING: %s: ACLs flag on fs but no ACLs support\n",
757		    fs->fs_fsmnt);
758#endif
759	ump->um_mountp = mp;
760	ump->um_dev = dev;
761	ump->um_devvp = devvp;
762	ump->um_nindir = fs->fs_nindir;
763	ump->um_bptrtodb = fs->fs_fsbtodb;
764	ump->um_seqinc = fs->fs_frag;
765	for (i = 0; i < MAXQUOTAS; i++)
766		ump->um_quotas[i] = NULLVP;
767#ifdef UFS_EXTATTR
768	ufs_extattr_uepm_init(&ump->um_extattr);
769#endif
770	/*
771	 * Set FS local "last mounted on" information (NULL pad)
772	 */
773	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
774			fs->fs_fsmnt,			/* copy area*/
775			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
776			&strsize);			/* real size*/
777	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
778
779	if( mp->mnt_flag & MNT_ROOTFS) {
780		/*
781		 * Root mount; update timestamp in mount structure.
782		 * this will be used by the common root mount code
783		 * to update the system clock.
784		 */
785		mp->mnt_time = fs->fs_time;
786	}
787
788	if (ronly == 0) {
789		if ((fs->fs_flags & FS_DOSOFTDEP) &&
790		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
791			free(fs->fs_csp, M_UFSMNT);
792			goto out;
793		}
794		if (fs->fs_snapinum[0] != 0)
795			ffs_snapshot_mount(mp);
796		fs->fs_fmod = 1;
797		fs->fs_clean = 0;
798		(void) ffs_sbupdate(ump, MNT_WAIT);
799	}
800	/*
801	 * Initialize filesystem stat information in mount struct.
802	 */
803	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
804#ifdef UFS_EXTATTR
805#ifdef UFS_EXTATTR_AUTOSTART
806	/*
807	 *
808	 * Auto-starting does the following:
809	 *	- check for /.attribute in the fs, and extattr_start if so
810	 *	- for each file in .attribute, enable that file with
811	 * 	  an attribute of the same name.
812	 * Not clear how to report errors -- probably eat them.
813	 * This would all happen while the filesystem was busy/not
814	 * available, so would effectively be "atomic".
815	 */
816	(void) ufs_extattr_autostart(mp, td);
817#endif /* !UFS_EXTATTR_AUTOSTART */
818#endif /* !UFS_EXTATTR */
819	return (0);
820out:
821	if (bp)
822		brelse(bp);
823	if (cp != NULL) {
824		DROP_GIANT();
825		g_topology_lock();
826		g_wither_geom_close(cp->geom, ENXIO);
827		g_topology_unlock();
828		PICKUP_GIANT();
829	}
830	if (ump) {
831		free(ump->um_fs, M_UFSMNT);
832		free(ump, M_UFSMNT);
833		mp->mnt_data = (qaddr_t)0;
834	}
835	return (error);
836}
837
838#include <sys/sysctl.h>
839int bigcgs = 0;
840SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
841
842/*
843 * Sanity checks for loading old filesystem superblocks.
844 * See ffs_oldfscompat_write below for unwound actions.
845 *
846 * XXX - Parts get retired eventually.
847 * Unfortunately new bits get added.
848 */
849static void
850ffs_oldfscompat_read(fs, ump, sblockloc)
851	struct fs *fs;
852	struct ufsmount *ump;
853	ufs2_daddr_t sblockloc;
854{
855	off_t maxfilesize;
856
857	/*
858	 * If not yet done, update fs_flags location and value of fs_sblockloc.
859	 */
860	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
861		fs->fs_flags = fs->fs_old_flags;
862		fs->fs_old_flags |= FS_FLAGS_UPDATED;
863		fs->fs_sblockloc = sblockloc;
864	}
865	/*
866	 * If not yet done, update UFS1 superblock with new wider fields.
867	 */
868	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
869		fs->fs_maxbsize = fs->fs_bsize;
870		fs->fs_time = fs->fs_old_time;
871		fs->fs_size = fs->fs_old_size;
872		fs->fs_dsize = fs->fs_old_dsize;
873		fs->fs_csaddr = fs->fs_old_csaddr;
874		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
875		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
876		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
877		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
878	}
879	if (fs->fs_magic == FS_UFS1_MAGIC &&
880	    fs->fs_old_inodefmt < FS_44INODEFMT) {
881		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
882		fs->fs_qbmask = ~fs->fs_bmask;
883		fs->fs_qfmask = ~fs->fs_fmask;
884	}
885	if (fs->fs_magic == FS_UFS1_MAGIC) {
886		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
887		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
888		if (fs->fs_maxfilesize > maxfilesize)
889			fs->fs_maxfilesize = maxfilesize;
890	}
891	/* Compatibility for old filesystems */
892	if (fs->fs_avgfilesize <= 0)
893		fs->fs_avgfilesize = AVFILESIZ;
894	if (fs->fs_avgfpdir <= 0)
895		fs->fs_avgfpdir = AFPDIR;
896	if (bigcgs) {
897		fs->fs_save_cgsize = fs->fs_cgsize;
898		fs->fs_cgsize = fs->fs_bsize;
899	}
900}
901
902/*
903 * Unwinding superblock updates for old filesystems.
904 * See ffs_oldfscompat_read above for details.
905 *
906 * XXX - Parts get retired eventually.
907 * Unfortunately new bits get added.
908 */
909static void
910ffs_oldfscompat_write(fs, ump)
911	struct fs *fs;
912	struct ufsmount *ump;
913{
914
915	/*
916	 * Copy back UFS2 updated fields that UFS1 inspects.
917	 */
918	if (fs->fs_magic == FS_UFS1_MAGIC) {
919		fs->fs_old_time = fs->fs_time;
920		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
921		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
922		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
923		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
924		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
925	}
926	if (bigcgs) {
927		fs->fs_cgsize = fs->fs_save_cgsize;
928		fs->fs_save_cgsize = 0;
929	}
930}
931
932/*
933 * unmount system call
934 */
935int
936ffs_unmount(mp, mntflags, td)
937	struct mount *mp;
938	int mntflags;
939	struct thread *td;
940{
941	struct ufsmount *ump = VFSTOUFS(mp);
942	struct fs *fs;
943	int error, flags;
944
945	flags = 0;
946	if (mntflags & MNT_FORCE) {
947		flags |= FORCECLOSE;
948	}
949#ifdef UFS_EXTATTR
950	if ((error = ufs_extattr_stop(mp, td))) {
951		if (error != EOPNOTSUPP)
952			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
953			    error);
954	} else {
955		ufs_extattr_uepm_destroy(&ump->um_extattr);
956	}
957#endif
958	if (mp->mnt_flag & MNT_SOFTDEP) {
959		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
960			return (error);
961	} else {
962		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
963			return (error);
964	}
965	fs = ump->um_fs;
966	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
967		printf("%s: unmount pending error: blocks %jd files %d\n",
968		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
969		    fs->fs_pendinginodes);
970		fs->fs_pendingblocks = 0;
971		fs->fs_pendinginodes = 0;
972	}
973	if (fs->fs_ronly == 0) {
974		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
975		error = ffs_sbupdate(ump, MNT_WAIT);
976		if (error) {
977			fs->fs_clean = 0;
978			return (error);
979		}
980	}
981	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
982	DROP_GIANT();
983	g_topology_lock();
984	g_wither_geom_close(ump->um_cp->geom, ENXIO);
985	g_topology_unlock();
986	PICKUP_GIANT();
987	vrele(ump->um_devvp);
988	free(fs->fs_csp, M_UFSMNT);
989	free(fs, M_UFSMNT);
990	free(ump, M_UFSMNT);
991	mp->mnt_data = (qaddr_t)0;
992	mp->mnt_flag &= ~MNT_LOCAL;
993	return (error);
994}
995
996/*
997 * Flush out all the files in a filesystem.
998 */
999int
1000ffs_flushfiles(mp, flags, td)
1001	struct mount *mp;
1002	int flags;
1003	struct thread *td;
1004{
1005	struct ufsmount *ump;
1006	int error;
1007
1008	ump = VFSTOUFS(mp);
1009#ifdef QUOTA
1010	if (mp->mnt_flag & MNT_QUOTA) {
1011		int i;
1012		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1013		if (error)
1014			return (error);
1015		for (i = 0; i < MAXQUOTAS; i++) {
1016			if (ump->um_quotas[i] == NULLVP)
1017				continue;
1018			quotaoff(td, mp, i);
1019		}
1020		/*
1021		 * Here we fall through to vflush again to ensure
1022		 * that we have gotten rid of all the system vnodes.
1023		 */
1024	}
1025#endif
1026	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1027	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1028		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1029			return (error);
1030		ffs_snapshot_unmount(mp);
1031		/*
1032		 * Here we fall through to vflush again to ensure
1033		 * that we have gotten rid of all the system vnodes.
1034		 */
1035	}
1036        /*
1037	 * Flush all the files.
1038	 */
1039	if ((error = vflush(mp, 0, flags, td)) != 0)
1040		return (error);
1041	/*
1042	 * Flush filesystem metadata.
1043	 */
1044	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1045	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1046	VOP_UNLOCK(ump->um_devvp, 0, td);
1047	return (error);
1048}
1049
1050/*
1051 * Get filesystem statistics.
1052 */
1053int
1054ffs_statfs(mp, sbp, td)
1055	struct mount *mp;
1056	struct statfs *sbp;
1057	struct thread *td;
1058{
1059	struct ufsmount *ump;
1060	struct fs *fs;
1061
1062	ump = VFSTOUFS(mp);
1063	fs = ump->um_fs;
1064	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1065		panic("ffs_statfs");
1066	sbp->f_version = STATFS_VERSION;
1067	sbp->f_bsize = fs->fs_fsize;
1068	sbp->f_iosize = fs->fs_bsize;
1069	sbp->f_blocks = fs->fs_dsize;
1070	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1071	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1072	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1073	    dbtofsb(fs, fs->fs_pendingblocks);
1074	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1075	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1076	sbp->f_namemax = NAME_MAX;
1077	if (sbp != &mp->mnt_stat) {
1078		sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1079		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1080		sbp->f_syncwrites = mp->mnt_stat.f_syncwrites;
1081		sbp->f_asyncwrites = mp->mnt_stat.f_asyncwrites;
1082		sbp->f_syncreads = mp->mnt_stat.f_syncreads;
1083		sbp->f_asyncreads = mp->mnt_stat.f_asyncreads;
1084		sbp->f_owner = mp->mnt_stat.f_owner;
1085		sbp->f_fsid = mp->mnt_stat.f_fsid;
1086		bcopy((caddr_t)mp->mnt_stat.f_fstypename,
1087			(caddr_t)&sbp->f_fstypename[0], MFSNAMELEN);
1088		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1089			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1090		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1091			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1092	}
1093	return (0);
1094}
1095
1096/*
1097 * Go through the disk queues to initiate sandbagged IO;
1098 * go through the inodes to write those that have been modified;
1099 * initiate the writing of the super block if it has been modified.
1100 *
1101 * Note: we are always called with the filesystem marked `MPBUSY'.
1102 */
1103int
1104ffs_sync(mp, waitfor, cred, td)
1105	struct mount *mp;
1106	int waitfor;
1107	struct ucred *cred;
1108	struct thread *td;
1109{
1110	struct vnode *nvp, *vp, *devvp;
1111	struct inode *ip;
1112	struct ufsmount *ump = VFSTOUFS(mp);
1113	struct fs *fs;
1114	int error, count, wait, lockreq, allerror = 0;
1115	struct bufobj *bo;
1116
1117	fs = ump->um_fs;
1118	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1119		printf("fs = %s\n", fs->fs_fsmnt);
1120		panic("ffs_sync: rofs mod");
1121	}
1122	/*
1123	 * Write back each (modified) inode.
1124	 */
1125	wait = 0;
1126	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1127	if (waitfor == MNT_WAIT) {
1128		wait = 1;
1129		lockreq = LK_EXCLUSIVE;
1130	}
1131	lockreq |= LK_INTERLOCK;
1132	MNT_ILOCK(mp);
1133loop:
1134	MNT_VNODE_FOREACH(vp, mp, nvp) {
1135		/*
1136		 * Depend on the mntvnode_slock to keep things stable enough
1137		 * for a quick test.  Since there might be hundreds of
1138		 * thousands of vnodes, we cannot afford even a subroutine
1139		 * call unless there's a good chance that we have work to do.
1140		 */
1141		VI_LOCK(vp);
1142		if (vp->v_iflag & VI_XLOCK) {
1143			VI_UNLOCK(vp);
1144			continue;
1145		}
1146		ip = VTOI(vp);
1147		if (vp->v_type == VNON || ((ip->i_flag &
1148		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1149		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1150			VI_UNLOCK(vp);
1151			continue;
1152		}
1153		MNT_IUNLOCK(mp);
1154		if ((error = vget(vp, lockreq, td)) != 0) {
1155			MNT_ILOCK(mp);
1156			if (error == ENOENT)
1157				goto loop;
1158			continue;
1159		}
1160		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1161			allerror = error;
1162		VOP_UNLOCK(vp, 0, td);
1163		vrele(vp);
1164		MNT_ILOCK(mp);
1165	}
1166	MNT_IUNLOCK(mp);
1167	/*
1168	 * Force stale filesystem control information to be flushed.
1169	 */
1170	if (waitfor == MNT_WAIT) {
1171		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1172			allerror = error;
1173		/* Flushed work items may create new vnodes to clean */
1174		if (allerror == 0 && count) {
1175			MNT_ILOCK(mp);
1176			goto loop;
1177		}
1178	}
1179#ifdef QUOTA
1180	qsync(mp);
1181#endif
1182	devvp = ump->um_devvp;
1183	VI_LOCK(devvp);
1184	bo = &devvp->v_bufobj;
1185	if (waitfor != MNT_LAZY &&
1186	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1187		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1188		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1189			allerror = error;
1190		VOP_UNLOCK(devvp, 0, td);
1191		if (allerror == 0 && waitfor == MNT_WAIT) {
1192			MNT_ILOCK(mp);
1193			goto loop;
1194		}
1195	} else
1196		VI_UNLOCK(devvp);
1197	/*
1198	 * Write back modified superblock.
1199	 */
1200	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1201		allerror = error;
1202	return (allerror);
1203}
1204
1205int
1206ffs_vget(mp, ino, flags, vpp)
1207	struct mount *mp;
1208	ino_t ino;
1209	int flags;
1210	struct vnode **vpp;
1211{
1212	struct thread *td = curthread; 		/* XXX */
1213	struct fs *fs;
1214	struct inode *ip;
1215	struct ufsmount *ump;
1216	struct buf *bp;
1217	struct vnode *vp;
1218	struct cdev *dev;
1219	int error;
1220
1221	ump = VFSTOUFS(mp);
1222	dev = ump->um_dev;
1223
1224	/*
1225	 * We do not lock vnode creation as it is believed to be too
1226	 * expensive for such rare case as simultaneous creation of vnode
1227	 * for same ino by different processes. We just allow them to race
1228	 * and check later to decide who wins. Let the race begin!
1229	 */
1230	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1231		return (error);
1232	if (*vpp != NULL)
1233		return (0);
1234
1235	/*
1236	 * If this MALLOC() is performed after the getnewvnode()
1237	 * it might block, leaving a vnode with a NULL v_data to be
1238	 * found by ffs_sync() if a sync happens to fire right then,
1239	 * which will cause a panic because ffs_sync() blindly
1240	 * dereferences vp->v_data (as well it should).
1241	 */
1242	ip = uma_zalloc(uma_inode, M_WAITOK);
1243
1244	/* Allocate a new vnode/inode. */
1245	error = getnewvnode("ufs", mp, &ffs_vnodeops, &vp);
1246	if (error) {
1247		*vpp = NULL;
1248		uma_zfree(uma_inode, ip);
1249		return (error);
1250	}
1251	bzero((caddr_t)ip, sizeof(struct inode));
1252	/*
1253	 * FFS supports recursive locking.
1254	 */
1255	fs = ump->um_fs;
1256	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1257	vp->v_data = ip;
1258	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1259	ip->i_vnode = vp;
1260	ip->i_ump = ump;
1261	ip->i_fs = fs;
1262	ip->i_dev = dev;
1263	ip->i_number = ino;
1264#ifdef QUOTA
1265	{
1266		int i;
1267		for (i = 0; i < MAXQUOTAS; i++)
1268			ip->i_dquot[i] = NODQUOT;
1269	}
1270#endif
1271	/*
1272	 * Exclusively lock the vnode before adding to hash. Note, that we
1273	 * must not release nor downgrade the lock (despite flags argument
1274	 * says) till it is fully initialized.
1275	 */
1276	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1277
1278	/*
1279	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1280	 * duplicate of vnode being created and add it to the hash. If a
1281	 * duplicate vnode was found, it will be vget()ed from hash for us.
1282	 */
1283	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1284		vput(vp);
1285		*vpp = NULL;
1286		return (error);
1287	}
1288
1289	/* We lost the race, then throw away our vnode and return existing */
1290	if (*vpp != NULL) {
1291		vput(vp);
1292		return (0);
1293	}
1294
1295	/* Read in the disk contents for the inode, copy into the inode. */
1296	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1297	    (int)fs->fs_bsize, NOCRED, &bp);
1298	if (error) {
1299		/*
1300		 * The inode does not contain anything useful, so it would
1301		 * be misleading to leave it on its hash chain. With mode
1302		 * still zero, it will be unlinked and returned to the free
1303		 * list by vput().
1304		 */
1305		brelse(bp);
1306		vput(vp);
1307		*vpp = NULL;
1308		return (error);
1309	}
1310	if (ip->i_ump->um_fstype == UFS1)
1311		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1312	else
1313		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1314	ffs_load_inode(bp, ip, fs, ino);
1315	if (DOINGSOFTDEP(vp))
1316		softdep_load_inodeblock(ip);
1317	else
1318		ip->i_effnlink = ip->i_nlink;
1319	bqrelse(bp);
1320
1321	/*
1322	 * Initialize the vnode from the inode, check for aliases.
1323	 * Note that the underlying vnode may have changed.
1324	 */
1325	error = ufs_vinit(mp, &ffs_fifoops, &vp);
1326	if (error) {
1327		vput(vp);
1328		*vpp = NULL;
1329		return (error);
1330	}
1331	/*
1332	 * Finish inode initialization.
1333	 */
1334	VREF(ip->i_devvp);
1335	/*
1336	 * Set up a generation number for this inode if it does not
1337	 * already have one. This should only happen on old filesystems.
1338	 */
1339	if (ip->i_gen == 0) {
1340		ip->i_gen = arc4random() / 2 + 1;
1341		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1342			ip->i_flag |= IN_MODIFIED;
1343			DIP_SET(ip, i_gen, ip->i_gen);
1344		}
1345	}
1346	/*
1347	 * Ensure that uid and gid are correct. This is a temporary
1348	 * fix until fsck has been changed to do the update.
1349	 */
1350	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1351	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1352		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1353		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1354	}						/* XXX */
1355
1356#ifdef MAC
1357	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1358		/*
1359		 * If this vnode is already allocated, and we're running
1360		 * multi-label, attempt to perform a label association
1361		 * from the extended attributes on the inode.
1362		 */
1363		error = mac_associate_vnode_extattr(mp, vp);
1364		if (error) {
1365			/* ufs_inactive will release ip->i_devvp ref. */
1366			vput(vp);
1367			*vpp = NULL;
1368			return (error);
1369		}
1370	}
1371#endif
1372
1373	*vpp = vp;
1374	return (0);
1375}
1376
1377/*
1378 * File handle to vnode
1379 *
1380 * Have to be really careful about stale file handles:
1381 * - check that the inode number is valid
1382 * - call ffs_vget() to get the locked inode
1383 * - check for an unallocated inode (i_mode == 0)
1384 * - check that the given client host has export rights and return
1385 *   those rights via. exflagsp and credanonp
1386 */
1387int
1388ffs_fhtovp(mp, fhp, vpp)
1389	struct mount *mp;
1390	struct fid *fhp;
1391	struct vnode **vpp;
1392{
1393	struct ufid *ufhp;
1394	struct fs *fs;
1395
1396	ufhp = (struct ufid *)fhp;
1397	fs = VFSTOUFS(mp)->um_fs;
1398	if (ufhp->ufid_ino < ROOTINO ||
1399	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1400		return (ESTALE);
1401	return (ufs_fhtovp(mp, ufhp, vpp));
1402}
1403
1404/*
1405 * Vnode pointer to File handle
1406 */
1407/* ARGSUSED */
1408int
1409ffs_vptofh(vp, fhp)
1410	struct vnode *vp;
1411	struct fid *fhp;
1412{
1413	struct inode *ip;
1414	struct ufid *ufhp;
1415
1416	ip = VTOI(vp);
1417	ufhp = (struct ufid *)fhp;
1418	ufhp->ufid_len = sizeof(struct ufid);
1419	ufhp->ufid_ino = ip->i_number;
1420	ufhp->ufid_gen = ip->i_gen;
1421	return (0);
1422}
1423
1424/*
1425 * Initialize the filesystem.
1426 */
1427static int
1428ffs_init(vfsp)
1429	struct vfsconf *vfsp;
1430{
1431
1432	softdep_initialize();
1433	return (ufs_init(vfsp));
1434}
1435
1436/*
1437 * Undo the work of ffs_init().
1438 */
1439static int
1440ffs_uninit(vfsp)
1441	struct vfsconf *vfsp;
1442{
1443	int ret;
1444
1445	ret = ufs_uninit(vfsp);
1446	softdep_uninitialize();
1447	return (ret);
1448}
1449
1450/*
1451 * Write a superblock and associated information back to disk.
1452 */
1453static int
1454ffs_sbupdate(mp, waitfor)
1455	struct ufsmount *mp;
1456	int waitfor;
1457{
1458	struct fs *fs = mp->um_fs;
1459	struct buf *bp;
1460	int blks;
1461	void *space;
1462	int i, size, error, allerror = 0;
1463
1464	if (fs->fs_ronly == 1 &&
1465	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1466	    (MNT_RDONLY | MNT_UPDATE))
1467		panic("ffs_sbupdate: write read-only filesystem");
1468	/*
1469	 * First write back the summary information.
1470	 */
1471	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1472	space = fs->fs_csp;
1473	for (i = 0; i < blks; i += fs->fs_frag) {
1474		size = fs->fs_bsize;
1475		if (i + fs->fs_frag > blks)
1476			size = (blks - i) * fs->fs_fsize;
1477		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1478		    size, 0, 0, 0);
1479		bcopy(space, bp->b_data, (u_int)size);
1480		space = (char *)space + size;
1481		if (waitfor != MNT_WAIT)
1482			bawrite(bp);
1483		else if ((error = bwrite(bp)) != 0)
1484			allerror = error;
1485	}
1486	/*
1487	 * Now write back the superblock itself. If any errors occurred
1488	 * up to this point, then fail so that the superblock avoids
1489	 * being written out as clean.
1490	 */
1491	if (allerror)
1492		return (allerror);
1493	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1494	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1495		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1496		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1497		fs->fs_sblockloc = SBLOCK_UFS1;
1498	}
1499	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1500	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1501		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1502		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1503		fs->fs_sblockloc = SBLOCK_UFS2;
1504	}
1505	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1506	    0, 0, 0);
1507	fs->fs_fmod = 0;
1508	fs->fs_time = time_second;
1509	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1510	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1511	if (waitfor != MNT_WAIT)
1512		bawrite(bp);
1513	else if ((error = bwrite(bp)) != 0)
1514		allerror = error;
1515	return (allerror);
1516}
1517
1518static int
1519ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1520	int attrnamespace, const char *attrname, struct thread *td)
1521{
1522
1523#ifdef UFS_EXTATTR
1524	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1525	    attrname, td));
1526#else
1527	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1528	    attrname, td));
1529#endif
1530}
1531
1532static void
1533ffs_ifree(struct ufsmount *ump, struct inode *ip)
1534{
1535
1536	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1537		uma_zfree(uma_ufs1, ip->i_din1);
1538	else if (ip->i_din2 != NULL)
1539		uma_zfree(uma_ufs2, ip->i_din2);
1540	uma_zfree(uma_inode, ip);
1541}
1542
1543static void
1544ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1545{
1546
1547#ifdef SOFTUPDATES
1548	if (bp->b_iocmd == BIO_WRITE && softdep_disk_prewrite(bp))
1549		return;
1550#endif
1551	g_vfs_strategy(bo, bp);
1552}
1553