ffs_vfsops.c revision 138517
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 138517 2004-12-07 15:09:30Z phk $");
34
35#include "opt_mac.h"
36#include "opt_quota.h"
37#include "opt_ufs.h"
38#include "opt_ffs.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/proc.h>
44#include <sys/kernel.h>
45#include <sys/mac.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/disk.h>
53#include <sys/malloc.h>
54#include <sys/mutex.h>
55
56#include <ufs/ufs/extattr.h>
57#include <ufs/ufs/quota.h>
58#include <ufs/ufs/ufsmount.h>
59#include <ufs/ufs/inode.h>
60#include <ufs/ufs/ufs_extern.h>
61
62#include <ufs/ffs/fs.h>
63#include <ufs/ffs/ffs_extern.h>
64
65#include <vm/vm.h>
66#include <vm/uma.h>
67#include <vm/vm_page.h>
68
69#include <geom/geom.h>
70#include <geom/geom_vfs.h>
71
72uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
73
74static int	ffs_sbupdate(struct ufsmount *, int);
75static int	ffs_reload(struct mount *, struct thread *);
76static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
77static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
78		    ufs2_daddr_t);
79static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
80static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
81static vfs_init_t ffs_init;
82static vfs_uninit_t ffs_uninit;
83static vfs_extattrctl_t ffs_extattrctl;
84static vfs_cmount_t ffs_cmount;
85static vfs_mount_t ffs_mount;
86
87static struct vfsops ufs_vfsops = {
88	.vfs_extattrctl =	ffs_extattrctl,
89	.vfs_fhtovp =		ffs_fhtovp,
90	.vfs_init =		ffs_init,
91	.vfs_mount =		ffs_mount,
92	.vfs_cmount =		ffs_cmount,
93	.vfs_quotactl =		ufs_quotactl,
94	.vfs_root =		ufs_root,
95	.vfs_statfs =		ffs_statfs,
96	.vfs_sync =		ffs_sync,
97	.vfs_uninit =		ffs_uninit,
98	.vfs_unmount =		ffs_unmount,
99	.vfs_vget =		ffs_vget,
100	.vfs_vptofh =		ffs_vptofh,
101};
102
103VFS_SET(ufs_vfsops, ufs, 0);
104
105static b_strategy_t ffs_geom_strategy;
106
107static struct buf_ops ffs_ops = {
108	.bop_name =	"FFS",
109	.bop_write =	bufwrite,
110	.bop_strategy =	ffs_geom_strategy,
111};
112
113static const char *ffs_opts[] = { "from", "export", NULL };
114
115static int
116ffs_mount(struct mount *mp, struct thread *td)
117{
118	struct vnode *devvp;
119	struct ufsmount *ump = 0;
120	struct fs *fs;
121	int error, flags;
122	mode_t accessmode;
123	struct nameidata ndp;
124	struct export_args export;
125	char *fspec;
126
127	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
128		return (EINVAL);
129	if (uma_inode == NULL) {
130		uma_inode = uma_zcreate("FFS inode",
131		    sizeof(struct inode), NULL, NULL, NULL, NULL,
132		    UMA_ALIGN_PTR, 0);
133		uma_ufs1 = uma_zcreate("FFS1 dinode",
134		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
135		    UMA_ALIGN_PTR, 0);
136		uma_ufs2 = uma_zcreate("FFS2 dinode",
137		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
138		    UMA_ALIGN_PTR, 0);
139	}
140
141	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
142	if (error)
143		return (error);
144
145	/*
146	 * If updating, check whether changing from read-only to
147	 * read/write; if there is no device name, that's all we do.
148	 */
149	if (mp->mnt_flag & MNT_UPDATE) {
150		ump = VFSTOUFS(mp);
151		fs = ump->um_fs;
152		devvp = ump->um_devvp;
153		if (fs->fs_ronly == 0 &&
154		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
155			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
156				return (error);
157			/*
158			 * Flush any dirty data.
159			 */
160			if ((error = VFS_SYNC(mp, MNT_WAIT,
161			    td->td_ucred, td)) != 0) {
162				vn_finished_write(mp);
163				return (error);
164			}
165			/*
166			 * Check for and optionally get rid of files open
167			 * for writing.
168			 */
169			flags = WRITECLOSE;
170			if (mp->mnt_flag & MNT_FORCE)
171				flags |= FORCECLOSE;
172			if (mp->mnt_flag & MNT_SOFTDEP) {
173				error = softdep_flushfiles(mp, flags, td);
174			} else {
175				error = ffs_flushfiles(mp, flags, td);
176			}
177			if (error) {
178				vn_finished_write(mp);
179				return (error);
180			}
181			if (fs->fs_pendingblocks != 0 ||
182			    fs->fs_pendinginodes != 0) {
183				printf("%s: %s: blocks %jd files %d\n",
184				    fs->fs_fsmnt, "update error",
185				    (intmax_t)fs->fs_pendingblocks,
186				    fs->fs_pendinginodes);
187				fs->fs_pendingblocks = 0;
188				fs->fs_pendinginodes = 0;
189			}
190			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
191				fs->fs_clean = 1;
192			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
193				fs->fs_ronly = 0;
194				fs->fs_clean = 0;
195				vn_finished_write(mp);
196				return (error);
197			}
198			vn_finished_write(mp);
199			DROP_GIANT();
200			g_topology_lock();
201			g_access(ump->um_cp, 0, -1, 0);
202			g_topology_unlock();
203			PICKUP_GIANT();
204			fs->fs_ronly = 1;
205			mp->mnt_flag |= MNT_RDONLY;
206		}
207		if ((mp->mnt_flag & MNT_RELOAD) &&
208		    (error = ffs_reload(mp, td)) != 0)
209			return (error);
210		if (fs->fs_ronly &&
211		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
212			/*
213			 * If upgrade to read-write by non-root, then verify
214			 * that user has necessary permissions on the device.
215			 */
216			if (suser(td)) {
217				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
218				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
219				    td->td_ucred, td)) != 0) {
220					VOP_UNLOCK(devvp, 0, td);
221					return (error);
222				}
223				VOP_UNLOCK(devvp, 0, td);
224			}
225			fs->fs_flags &= ~FS_UNCLEAN;
226			if (fs->fs_clean == 0) {
227				fs->fs_flags |= FS_UNCLEAN;
228				if ((mp->mnt_flag & MNT_FORCE) ||
229				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
230				     (fs->fs_flags & FS_DOSOFTDEP))) {
231					printf("WARNING: %s was not %s\n",
232					   fs->fs_fsmnt, "properly dismounted");
233				} else {
234					printf(
235"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
236					    fs->fs_fsmnt);
237					return (EPERM);
238				}
239			}
240			DROP_GIANT();
241			g_topology_lock();
242			/*
243			 * If we're the root device, we may not have an E count
244			 * yet, get it now.
245			 */
246			if (ump->um_cp->ace == 0)
247				error = g_access(ump->um_cp, 0, 1, 1);
248			else
249				error = g_access(ump->um_cp, 0, 1, 0);
250			g_topology_unlock();
251			PICKUP_GIANT();
252			if (error)
253				return (error);
254			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
255				return (error);
256			fs->fs_ronly = 0;
257			mp->mnt_flag &= ~MNT_RDONLY;
258			fs->fs_clean = 0;
259			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
260				vn_finished_write(mp);
261				return (error);
262			}
263			/* check to see if we need to start softdep */
264			if ((fs->fs_flags & FS_DOSOFTDEP) &&
265			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
266				vn_finished_write(mp);
267				return (error);
268			}
269			if (fs->fs_snapinum[0] != 0)
270				ffs_snapshot_mount(mp);
271			vn_finished_write(mp);
272		}
273		/*
274		 * Soft updates is incompatible with "async",
275		 * so if we are doing softupdates stop the user
276		 * from setting the async flag in an update.
277		 * Softdep_mount() clears it in an initial mount
278		 * or ro->rw remount.
279		 */
280		if (mp->mnt_flag & MNT_SOFTDEP)
281			mp->mnt_flag &= ~MNT_ASYNC;
282		/*
283		 * If not updating name, process export requests.
284		 */
285		error = vfs_copyopt(mp->mnt_optnew, "export", &export, sizeof export);
286		if (error == 0 && export.ex_flags != 0)
287			return (vfs_export(mp, &export));
288		/*
289		 * If this is a snapshot request, take the snapshot.
290		 */
291		if (mp->mnt_flag & MNT_SNAPSHOT)
292			return (ffs_snapshot(mp, fspec));
293	}
294
295	/*
296	 * Not an update, or updating the name: look up the name
297	 * and verify that it refers to a sensible disk device.
298	 */
299	NDINIT(&ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td);
300	if ((error = namei(&ndp)) != 0)
301		return (error);
302	NDFREE(&ndp, NDF_ONLY_PNBUF);
303	devvp = ndp.ni_vp;
304	if (!vn_isdisk(devvp, &error)) {
305		vrele(devvp);
306		return (error);
307	}
308
309	/*
310	 * If mount by non-root, then verify that user has necessary
311	 * permissions on the device.
312	 */
313	if (suser(td)) {
314		accessmode = VREAD;
315		if ((mp->mnt_flag & MNT_RDONLY) == 0)
316			accessmode |= VWRITE;
317		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
318		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
319			vput(devvp);
320			return (error);
321		}
322		VOP_UNLOCK(devvp, 0, td);
323	}
324
325	if (mp->mnt_flag & MNT_UPDATE) {
326		/*
327		 * Update only
328		 *
329		 * If it's not the same vnode, or at least the same device
330		 * then it's not correct.
331		 */
332
333		if (devvp->v_rdev != ump->um_devvp->v_rdev)
334			error = EINVAL;	/* needs translation */
335		vrele(devvp);
336		if (error)
337			return (error);
338	} else {
339		/*
340		 * New mount
341		 *
342		 * We need the name for the mount point (also used for
343		 * "last mounted on") copied in. If an error occurs,
344		 * the mount point is discarded by the upper level code.
345		 * Note that vfs_mount() populates f_mntonname for us.
346		 */
347		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
348			vrele(devvp);
349			return (error);
350		}
351	}
352	vfs_mountedfrom(mp, fspec);
353	return (0);
354}
355
356/*
357 * Compatibility with old mount system call.
358 */
359
360static int
361ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
362{
363	struct ufs_args args;
364	int error;
365
366	if (data == NULL)
367		return (EINVAL);
368	error = copyin(data, &args, sizeof args);
369	if (error)
370		return (error);
371
372	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
373	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
374	error = kernel_mount(ma, flags);
375
376	return (error);
377}
378
379/*
380 * Reload all incore data for a filesystem (used after running fsck on
381 * the root filesystem and finding things to fix). The filesystem must
382 * be mounted read-only.
383 *
384 * Things to do to update the mount:
385 *	1) invalidate all cached meta-data.
386 *	2) re-read superblock from disk.
387 *	3) re-read summary information from disk.
388 *	4) invalidate all inactive vnodes.
389 *	5) invalidate all cached file data.
390 *	6) re-read inode data for all active vnodes.
391 */
392static int
393ffs_reload(struct mount *mp, struct thread *td)
394{
395	struct vnode *vp, *nvp, *devvp;
396	struct inode *ip;
397	void *space;
398	struct buf *bp;
399	struct fs *fs, *newfs;
400	ufs2_daddr_t sblockloc;
401	int i, blks, size, error;
402	int32_t *lp;
403
404	if ((mp->mnt_flag & MNT_RDONLY) == 0)
405		return (EINVAL);
406	/*
407	 * Step 1: invalidate all cached meta-data.
408	 */
409	devvp = VFSTOUFS(mp)->um_devvp;
410	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
411	if (vinvalbuf(devvp, 0, td->td_ucred, td, 0, 0) != 0)
412		panic("ffs_reload: dirty1");
413	vfs_object_create(devvp, td, td->td_ucred);
414	VOP_UNLOCK(devvp, 0, td);
415
416	/*
417	 * Step 2: re-read superblock from disk.
418	 */
419	fs = VFSTOUFS(mp)->um_fs;
420	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
421	    NOCRED, &bp)) != 0)
422		return (error);
423	newfs = (struct fs *)bp->b_data;
424	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
425	     newfs->fs_magic != FS_UFS2_MAGIC) ||
426	    newfs->fs_bsize > MAXBSIZE ||
427	    newfs->fs_bsize < sizeof(struct fs)) {
428			brelse(bp);
429			return (EIO);		/* XXX needs translation */
430	}
431	/*
432	 * Copy pointer fields back into superblock before copying in	XXX
433	 * new superblock. These should really be in the ufsmount.	XXX
434	 * Note that important parameters (eg fs_ncg) are unchanged.
435	 */
436	newfs->fs_csp = fs->fs_csp;
437	newfs->fs_maxcluster = fs->fs_maxcluster;
438	newfs->fs_contigdirs = fs->fs_contigdirs;
439	newfs->fs_active = fs->fs_active;
440	/* The file system is still read-only. */
441	newfs->fs_ronly = 1;
442	sblockloc = fs->fs_sblockloc;
443	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
444	brelse(bp);
445	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
446	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
447	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
448		printf("%s: reload pending error: blocks %jd files %d\n",
449		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
450		    fs->fs_pendinginodes);
451		fs->fs_pendingblocks = 0;
452		fs->fs_pendinginodes = 0;
453	}
454
455	/*
456	 * Step 3: re-read summary information from disk.
457	 */
458	blks = howmany(fs->fs_cssize, fs->fs_fsize);
459	space = fs->fs_csp;
460	for (i = 0; i < blks; i += fs->fs_frag) {
461		size = fs->fs_bsize;
462		if (i + fs->fs_frag > blks)
463			size = (blks - i) * fs->fs_fsize;
464		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
465		    NOCRED, &bp);
466		if (error)
467			return (error);
468		bcopy(bp->b_data, space, (u_int)size);
469		space = (char *)space + size;
470		brelse(bp);
471	}
472	/*
473	 * We no longer know anything about clusters per cylinder group.
474	 */
475	if (fs->fs_contigsumsize > 0) {
476		lp = fs->fs_maxcluster;
477		for (i = 0; i < fs->fs_ncg; i++)
478			*lp++ = fs->fs_contigsumsize;
479	}
480
481loop:
482	MNT_ILOCK(mp);
483	MNT_VNODE_FOREACH(vp, mp, nvp) {
484		VI_LOCK(vp);
485		if (vp->v_iflag & VI_XLOCK) {
486			VI_UNLOCK(vp);
487			continue;
488		}
489		MNT_IUNLOCK(mp);
490		/*
491		 * Step 4: invalidate all inactive vnodes.
492		 */
493		if (vp->v_usecount == 0) {
494			vgonel(vp, td);
495			goto loop;
496		}
497		/*
498		 * Step 5: invalidate all cached file data.
499		 */
500		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
501			goto loop;
502		}
503		if (vinvalbuf(vp, 0, td->td_ucred, td, 0, 0))
504			panic("ffs_reload: dirty2");
505		/*
506		 * Step 6: re-read inode data for all active vnodes.
507		 */
508		ip = VTOI(vp);
509		error =
510		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
511		    (int)fs->fs_bsize, NOCRED, &bp);
512		if (error) {
513			VOP_UNLOCK(vp, 0, td);
514			vrele(vp);
515			return (error);
516		}
517		ffs_load_inode(bp, ip, fs, ip->i_number);
518		ip->i_effnlink = ip->i_nlink;
519		brelse(bp);
520		VOP_UNLOCK(vp, 0, td);
521		vrele(vp);
522		MNT_ILOCK(mp);
523	}
524	MNT_IUNLOCK(mp);
525	return (0);
526}
527
528/*
529 * Possible superblock locations ordered from most to least likely.
530 */
531static int sblock_try[] = SBLOCKSEARCH;
532
533/*
534 * Common code for mount and mountroot
535 */
536static int
537ffs_mountfs(devvp, mp, td)
538	struct vnode *devvp;
539	struct mount *mp;
540	struct thread *td;
541{
542	struct ufsmount *ump;
543	struct buf *bp;
544	struct fs *fs;
545	struct cdev *dev;
546	void *space;
547	ufs2_daddr_t sblockloc;
548	int error, i, blks, size, ronly;
549	int32_t *lp;
550	struct ucred *cred;
551	struct g_consumer *cp;
552
553	dev = devvp->v_rdev;
554	cred = td ? td->td_ucred : NOCRED;
555
556	vfs_object_create(devvp, td, td->td_ucred);
557	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
558	DROP_GIANT();
559	g_topology_lock();
560	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
561
562	/*
563	 * If we are a root mount, drop the E flag so fsck can do its magic.
564	 * We will pick it up again when we remount R/W.
565	 */
566	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
567		error = g_access(cp, 0, 0, -1);
568	g_topology_unlock();
569	PICKUP_GIANT();
570	VOP_UNLOCK(devvp, 0, td);
571	if (error)
572		return (error);
573	if (devvp->v_rdev->si_iosize_max != 0)
574		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
575	if (mp->mnt_iosize_max > MAXPHYS)
576		mp->mnt_iosize_max = MAXPHYS;
577
578	devvp->v_bufobj.bo_private = cp;
579	devvp->v_bufobj.bo_ops = &ffs_ops;
580
581	bp = NULL;
582	ump = NULL;
583	fs = NULL;
584	sblockloc = 0;
585	/*
586	 * Try reading the superblock in each of its possible locations.
587	 */
588	for (i = 0; sblock_try[i] != -1; i++) {
589		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
590		    cred, &bp)) != 0)
591			goto out;
592		fs = (struct fs *)bp->b_data;
593		sblockloc = sblock_try[i];
594		if ((fs->fs_magic == FS_UFS1_MAGIC ||
595		     (fs->fs_magic == FS_UFS2_MAGIC &&
596		      (fs->fs_sblockloc == sblockloc ||
597		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
598		    fs->fs_bsize <= MAXBSIZE &&
599		    fs->fs_bsize >= sizeof(struct fs))
600			break;
601		brelse(bp);
602		bp = NULL;
603	}
604	if (sblock_try[i] == -1) {
605		error = EINVAL;		/* XXX needs translation */
606		goto out;
607	}
608	fs->fs_fmod = 0;
609	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
610	fs->fs_flags &= ~FS_UNCLEAN;
611	if (fs->fs_clean == 0) {
612		fs->fs_flags |= FS_UNCLEAN;
613		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
614		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
615		     (fs->fs_flags & FS_DOSOFTDEP))) {
616			printf(
617"WARNING: %s was not properly dismounted\n",
618			    fs->fs_fsmnt);
619		} else {
620			printf(
621"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
622			    fs->fs_fsmnt);
623			error = EPERM;
624			goto out;
625		}
626		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
627		    (mp->mnt_flag & MNT_FORCE)) {
628			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
629			    (intmax_t)fs->fs_pendingblocks,
630			    fs->fs_pendinginodes);
631			fs->fs_pendingblocks = 0;
632			fs->fs_pendinginodes = 0;
633		}
634	}
635	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
636		printf("%s: mount pending error: blocks %jd files %d\n",
637		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
638		    fs->fs_pendinginodes);
639		fs->fs_pendingblocks = 0;
640		fs->fs_pendinginodes = 0;
641	}
642	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
643	ump->um_cp = cp;
644	ump->um_bo = &devvp->v_bufobj;
645	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
646	if (fs->fs_magic == FS_UFS1_MAGIC) {
647		ump->um_fstype = UFS1;
648		ump->um_balloc = ffs_balloc_ufs1;
649	} else {
650		ump->um_fstype = UFS2;
651		ump->um_balloc = ffs_balloc_ufs2;
652	}
653	ump->um_blkatoff = ffs_blkatoff;
654	ump->um_truncate = ffs_truncate;
655	ump->um_update = ffs_update;
656	ump->um_valloc = ffs_valloc;
657	ump->um_vfree = ffs_vfree;
658	ump->um_ifree = ffs_ifree;
659	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
660	if (fs->fs_sbsize < SBLOCKSIZE)
661		bp->b_flags |= B_INVAL | B_NOCACHE;
662	brelse(bp);
663	bp = NULL;
664	fs = ump->um_fs;
665	ffs_oldfscompat_read(fs, ump, sblockloc);
666	fs->fs_ronly = ronly;
667	size = fs->fs_cssize;
668	blks = howmany(size, fs->fs_fsize);
669	if (fs->fs_contigsumsize > 0)
670		size += fs->fs_ncg * sizeof(int32_t);
671	size += fs->fs_ncg * sizeof(u_int8_t);
672	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
673	fs->fs_csp = space;
674	for (i = 0; i < blks; i += fs->fs_frag) {
675		size = fs->fs_bsize;
676		if (i + fs->fs_frag > blks)
677			size = (blks - i) * fs->fs_fsize;
678		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
679		    cred, &bp)) != 0) {
680			free(fs->fs_csp, M_UFSMNT);
681			goto out;
682		}
683		bcopy(bp->b_data, space, (u_int)size);
684		space = (char *)space + size;
685		brelse(bp);
686		bp = NULL;
687	}
688	if (fs->fs_contigsumsize > 0) {
689		fs->fs_maxcluster = lp = space;
690		for (i = 0; i < fs->fs_ncg; i++)
691			*lp++ = fs->fs_contigsumsize;
692		space = lp;
693	}
694	size = fs->fs_ncg * sizeof(u_int8_t);
695	fs->fs_contigdirs = (u_int8_t *)space;
696	bzero(fs->fs_contigdirs, size);
697	fs->fs_active = NULL;
698	mp->mnt_data = (qaddr_t)ump;
699	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
700	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
701	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
702	    vfs_getvfs(&mp->mnt_stat.f_fsid))
703		vfs_getnewfsid(mp);
704	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
705	mp->mnt_flag |= MNT_LOCAL;
706	if ((fs->fs_flags & FS_MULTILABEL) != 0)
707#ifdef MAC
708		mp->mnt_flag |= MNT_MULTILABEL;
709#else
710		printf(
711"WARNING: %s: multilabel flag on fs but no MAC support\n",
712		    fs->fs_fsmnt);
713#endif
714	if ((fs->fs_flags & FS_ACLS) != 0)
715#ifdef UFS_ACL
716		mp->mnt_flag |= MNT_ACLS;
717#else
718		printf(
719"WARNING: %s: ACLs flag on fs but no ACLs support\n",
720		    fs->fs_fsmnt);
721#endif
722	ump->um_mountp = mp;
723	ump->um_dev = dev;
724	ump->um_devvp = devvp;
725	ump->um_nindir = fs->fs_nindir;
726	ump->um_bptrtodb = fs->fs_fsbtodb;
727	ump->um_seqinc = fs->fs_frag;
728	for (i = 0; i < MAXQUOTAS; i++)
729		ump->um_quotas[i] = NULLVP;
730#ifdef UFS_EXTATTR
731	ufs_extattr_uepm_init(&ump->um_extattr);
732#endif
733	/*
734	 * Set FS local "last mounted on" information (NULL pad)
735	 */
736	vfs_mountedfrom(mp, fs->fs_fsmnt);
737
738	if( mp->mnt_flag & MNT_ROOTFS) {
739		/*
740		 * Root mount; update timestamp in mount structure.
741		 * this will be used by the common root mount code
742		 * to update the system clock.
743		 */
744		mp->mnt_time = fs->fs_time;
745	}
746
747	if (ronly == 0) {
748		if ((fs->fs_flags & FS_DOSOFTDEP) &&
749		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
750			free(fs->fs_csp, M_UFSMNT);
751			goto out;
752		}
753		if (fs->fs_snapinum[0] != 0)
754			ffs_snapshot_mount(mp);
755		fs->fs_fmod = 1;
756		fs->fs_clean = 0;
757		(void) ffs_sbupdate(ump, MNT_WAIT);
758	}
759	/*
760	 * Initialize filesystem stat information in mount struct.
761	 */
762#ifdef UFS_EXTATTR
763#ifdef UFS_EXTATTR_AUTOSTART
764	/*
765	 *
766	 * Auto-starting does the following:
767	 *	- check for /.attribute in the fs, and extattr_start if so
768	 *	- for each file in .attribute, enable that file with
769	 * 	  an attribute of the same name.
770	 * Not clear how to report errors -- probably eat them.
771	 * This would all happen while the filesystem was busy/not
772	 * available, so would effectively be "atomic".
773	 */
774	(void) ufs_extattr_autostart(mp, td);
775#endif /* !UFS_EXTATTR_AUTOSTART */
776#endif /* !UFS_EXTATTR */
777	return (0);
778out:
779	if (bp)
780		brelse(bp);
781	if (cp != NULL) {
782		DROP_GIANT();
783		g_topology_lock();
784		g_wither_geom_close(cp->geom, ENXIO);
785		g_topology_unlock();
786		PICKUP_GIANT();
787	}
788	if (ump) {
789		free(ump->um_fs, M_UFSMNT);
790		free(ump, M_UFSMNT);
791		mp->mnt_data = (qaddr_t)0;
792	}
793	return (error);
794}
795
796#include <sys/sysctl.h>
797int bigcgs = 0;
798SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
799
800/*
801 * Sanity checks for loading old filesystem superblocks.
802 * See ffs_oldfscompat_write below for unwound actions.
803 *
804 * XXX - Parts get retired eventually.
805 * Unfortunately new bits get added.
806 */
807static void
808ffs_oldfscompat_read(fs, ump, sblockloc)
809	struct fs *fs;
810	struct ufsmount *ump;
811	ufs2_daddr_t sblockloc;
812{
813	off_t maxfilesize;
814
815	/*
816	 * If not yet done, update fs_flags location and value of fs_sblockloc.
817	 */
818	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
819		fs->fs_flags = fs->fs_old_flags;
820		fs->fs_old_flags |= FS_FLAGS_UPDATED;
821		fs->fs_sblockloc = sblockloc;
822	}
823	/*
824	 * If not yet done, update UFS1 superblock with new wider fields.
825	 */
826	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
827		fs->fs_maxbsize = fs->fs_bsize;
828		fs->fs_time = fs->fs_old_time;
829		fs->fs_size = fs->fs_old_size;
830		fs->fs_dsize = fs->fs_old_dsize;
831		fs->fs_csaddr = fs->fs_old_csaddr;
832		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
833		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
834		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
835		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
836	}
837	if (fs->fs_magic == FS_UFS1_MAGIC &&
838	    fs->fs_old_inodefmt < FS_44INODEFMT) {
839		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
840		fs->fs_qbmask = ~fs->fs_bmask;
841		fs->fs_qfmask = ~fs->fs_fmask;
842	}
843	if (fs->fs_magic == FS_UFS1_MAGIC) {
844		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
845		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
846		if (fs->fs_maxfilesize > maxfilesize)
847			fs->fs_maxfilesize = maxfilesize;
848	}
849	/* Compatibility for old filesystems */
850	if (fs->fs_avgfilesize <= 0)
851		fs->fs_avgfilesize = AVFILESIZ;
852	if (fs->fs_avgfpdir <= 0)
853		fs->fs_avgfpdir = AFPDIR;
854	if (bigcgs) {
855		fs->fs_save_cgsize = fs->fs_cgsize;
856		fs->fs_cgsize = fs->fs_bsize;
857	}
858}
859
860/*
861 * Unwinding superblock updates for old filesystems.
862 * See ffs_oldfscompat_read above for details.
863 *
864 * XXX - Parts get retired eventually.
865 * Unfortunately new bits get added.
866 */
867static void
868ffs_oldfscompat_write(fs, ump)
869	struct fs *fs;
870	struct ufsmount *ump;
871{
872
873	/*
874	 * Copy back UFS2 updated fields that UFS1 inspects.
875	 */
876	if (fs->fs_magic == FS_UFS1_MAGIC) {
877		fs->fs_old_time = fs->fs_time;
878		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
879		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
880		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
881		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
882		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
883	}
884	if (bigcgs) {
885		fs->fs_cgsize = fs->fs_save_cgsize;
886		fs->fs_save_cgsize = 0;
887	}
888}
889
890/*
891 * unmount system call
892 */
893int
894ffs_unmount(mp, mntflags, td)
895	struct mount *mp;
896	int mntflags;
897	struct thread *td;
898{
899	struct ufsmount *ump = VFSTOUFS(mp);
900	struct fs *fs;
901	int error, flags;
902
903	flags = 0;
904	if (mntflags & MNT_FORCE) {
905		flags |= FORCECLOSE;
906	}
907#ifdef UFS_EXTATTR
908	if ((error = ufs_extattr_stop(mp, td))) {
909		if (error != EOPNOTSUPP)
910			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
911			    error);
912	} else {
913		ufs_extattr_uepm_destroy(&ump->um_extattr);
914	}
915#endif
916	if (mp->mnt_flag & MNT_SOFTDEP) {
917		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
918			return (error);
919	} else {
920		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
921			return (error);
922	}
923	fs = ump->um_fs;
924	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
925		printf("%s: unmount pending error: blocks %jd files %d\n",
926		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
927		    fs->fs_pendinginodes);
928		fs->fs_pendingblocks = 0;
929		fs->fs_pendinginodes = 0;
930	}
931	if (fs->fs_ronly == 0) {
932		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
933		error = ffs_sbupdate(ump, MNT_WAIT);
934		if (error) {
935			fs->fs_clean = 0;
936			return (error);
937		}
938	}
939	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
940	DROP_GIANT();
941	g_topology_lock();
942	g_wither_geom_close(ump->um_cp->geom, ENXIO);
943	g_topology_unlock();
944	PICKUP_GIANT();
945	vrele(ump->um_devvp);
946	free(fs->fs_csp, M_UFSMNT);
947	free(fs, M_UFSMNT);
948	free(ump, M_UFSMNT);
949	mp->mnt_data = (qaddr_t)0;
950	mp->mnt_flag &= ~MNT_LOCAL;
951	return (error);
952}
953
954/*
955 * Flush out all the files in a filesystem.
956 */
957int
958ffs_flushfiles(mp, flags, td)
959	struct mount *mp;
960	int flags;
961	struct thread *td;
962{
963	struct ufsmount *ump;
964	int error;
965
966	ump = VFSTOUFS(mp);
967#ifdef QUOTA
968	if (mp->mnt_flag & MNT_QUOTA) {
969		int i;
970		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
971		if (error)
972			return (error);
973		for (i = 0; i < MAXQUOTAS; i++) {
974			if (ump->um_quotas[i] == NULLVP)
975				continue;
976			quotaoff(td, mp, i);
977		}
978		/*
979		 * Here we fall through to vflush again to ensure
980		 * that we have gotten rid of all the system vnodes.
981		 */
982	}
983#endif
984	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
985	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
986		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
987			return (error);
988		ffs_snapshot_unmount(mp);
989		/*
990		 * Here we fall through to vflush again to ensure
991		 * that we have gotten rid of all the system vnodes.
992		 */
993	}
994        /*
995	 * Flush all the files.
996	 */
997	if ((error = vflush(mp, 0, flags, td)) != 0)
998		return (error);
999	/*
1000	 * Flush filesystem metadata.
1001	 */
1002	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1003	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1004	VOP_UNLOCK(ump->um_devvp, 0, td);
1005	return (error);
1006}
1007
1008/*
1009 * Get filesystem statistics.
1010 */
1011int
1012ffs_statfs(mp, sbp, td)
1013	struct mount *mp;
1014	struct statfs *sbp;
1015	struct thread *td;
1016{
1017	struct ufsmount *ump;
1018	struct fs *fs;
1019
1020	ump = VFSTOUFS(mp);
1021	fs = ump->um_fs;
1022	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1023		panic("ffs_statfs");
1024	sbp->f_version = STATFS_VERSION;
1025	sbp->f_bsize = fs->fs_fsize;
1026	sbp->f_iosize = fs->fs_bsize;
1027	sbp->f_blocks = fs->fs_dsize;
1028	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1029	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1030	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1031	    dbtofsb(fs, fs->fs_pendingblocks);
1032	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1033	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1034	sbp->f_namemax = NAME_MAX;
1035	return (0);
1036}
1037
1038/*
1039 * Go through the disk queues to initiate sandbagged IO;
1040 * go through the inodes to write those that have been modified;
1041 * initiate the writing of the super block if it has been modified.
1042 *
1043 * Note: we are always called with the filesystem marked `MPBUSY'.
1044 */
1045int
1046ffs_sync(mp, waitfor, cred, td)
1047	struct mount *mp;
1048	int waitfor;
1049	struct ucred *cred;
1050	struct thread *td;
1051{
1052	struct vnode *nvp, *vp, *devvp;
1053	struct inode *ip;
1054	struct ufsmount *ump = VFSTOUFS(mp);
1055	struct fs *fs;
1056	int error, count, wait, lockreq, allerror = 0;
1057	struct bufobj *bo;
1058
1059	fs = ump->um_fs;
1060	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1061		printf("fs = %s\n", fs->fs_fsmnt);
1062		panic("ffs_sync: rofs mod");
1063	}
1064	/*
1065	 * Write back each (modified) inode.
1066	 */
1067	wait = 0;
1068	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1069	if (waitfor == MNT_WAIT) {
1070		wait = 1;
1071		lockreq = LK_EXCLUSIVE;
1072	}
1073	lockreq |= LK_INTERLOCK;
1074	MNT_ILOCK(mp);
1075loop:
1076	MNT_VNODE_FOREACH(vp, mp, nvp) {
1077		/*
1078		 * Depend on the mntvnode_slock to keep things stable enough
1079		 * for a quick test.  Since there might be hundreds of
1080		 * thousands of vnodes, we cannot afford even a subroutine
1081		 * call unless there's a good chance that we have work to do.
1082		 */
1083		VI_LOCK(vp);
1084		if (vp->v_iflag & VI_XLOCK) {
1085			VI_UNLOCK(vp);
1086			continue;
1087		}
1088		ip = VTOI(vp);
1089		if (vp->v_type == VNON || ((ip->i_flag &
1090		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1091		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1092			VI_UNLOCK(vp);
1093			continue;
1094		}
1095		MNT_IUNLOCK(mp);
1096		if ((error = vget(vp, lockreq, td)) != 0) {
1097			MNT_ILOCK(mp);
1098			if (error == ENOENT)
1099				goto loop;
1100			continue;
1101		}
1102		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1103			allerror = error;
1104		VOP_UNLOCK(vp, 0, td);
1105		vrele(vp);
1106		MNT_ILOCK(mp);
1107	}
1108	MNT_IUNLOCK(mp);
1109	/*
1110	 * Force stale filesystem control information to be flushed.
1111	 */
1112	if (waitfor == MNT_WAIT) {
1113		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1114			allerror = error;
1115		/* Flushed work items may create new vnodes to clean */
1116		if (allerror == 0 && count) {
1117			MNT_ILOCK(mp);
1118			goto loop;
1119		}
1120	}
1121#ifdef QUOTA
1122	qsync(mp);
1123#endif
1124	devvp = ump->um_devvp;
1125	VI_LOCK(devvp);
1126	bo = &devvp->v_bufobj;
1127	if (waitfor != MNT_LAZY &&
1128	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1129		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1130		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1131			allerror = error;
1132		VOP_UNLOCK(devvp, 0, td);
1133		if (allerror == 0 && waitfor == MNT_WAIT) {
1134			MNT_ILOCK(mp);
1135			goto loop;
1136		}
1137	} else
1138		VI_UNLOCK(devvp);
1139	/*
1140	 * Write back modified superblock.
1141	 */
1142	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1143		allerror = error;
1144	return (allerror);
1145}
1146
1147int
1148ffs_vget(mp, ino, flags, vpp)
1149	struct mount *mp;
1150	ino_t ino;
1151	int flags;
1152	struct vnode **vpp;
1153{
1154	struct thread *td = curthread; 		/* XXX */
1155	struct fs *fs;
1156	struct inode *ip;
1157	struct ufsmount *ump;
1158	struct buf *bp;
1159	struct vnode *vp;
1160	struct cdev *dev;
1161	int error;
1162
1163	ump = VFSTOUFS(mp);
1164	dev = ump->um_dev;
1165
1166	/*
1167	 * We do not lock vnode creation as it is believed to be too
1168	 * expensive for such rare case as simultaneous creation of vnode
1169	 * for same ino by different processes. We just allow them to race
1170	 * and check later to decide who wins. Let the race begin!
1171	 */
1172	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1173		return (error);
1174	if (*vpp != NULL)
1175		return (0);
1176
1177	/*
1178	 * If this MALLOC() is performed after the getnewvnode()
1179	 * it might block, leaving a vnode with a NULL v_data to be
1180	 * found by ffs_sync() if a sync happens to fire right then,
1181	 * which will cause a panic because ffs_sync() blindly
1182	 * dereferences vp->v_data (as well it should).
1183	 */
1184	ip = uma_zalloc(uma_inode, M_WAITOK);
1185
1186	/* Allocate a new vnode/inode. */
1187	error = getnewvnode("ufs", mp, &ffs_vnodeops, &vp);
1188	if (error) {
1189		*vpp = NULL;
1190		uma_zfree(uma_inode, ip);
1191		return (error);
1192	}
1193	bzero((caddr_t)ip, sizeof(struct inode));
1194	/*
1195	 * FFS supports recursive locking.
1196	 */
1197	fs = ump->um_fs;
1198	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1199	vp->v_data = ip;
1200	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1201	ip->i_vnode = vp;
1202	ip->i_ump = ump;
1203	ip->i_fs = fs;
1204	ip->i_dev = dev;
1205	ip->i_number = ino;
1206#ifdef QUOTA
1207	{
1208		int i;
1209		for (i = 0; i < MAXQUOTAS; i++)
1210			ip->i_dquot[i] = NODQUOT;
1211	}
1212#endif
1213	/*
1214	 * Exclusively lock the vnode before adding to hash. Note, that we
1215	 * must not release nor downgrade the lock (despite flags argument
1216	 * says) till it is fully initialized.
1217	 */
1218	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1219
1220	/*
1221	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1222	 * duplicate of vnode being created and add it to the hash. If a
1223	 * duplicate vnode was found, it will be vget()ed from hash for us.
1224	 */
1225	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1226		vput(vp);
1227		*vpp = NULL;
1228		return (error);
1229	}
1230
1231	/* We lost the race, then throw away our vnode and return existing */
1232	if (*vpp != NULL) {
1233		vput(vp);
1234		return (0);
1235	}
1236
1237	/* Read in the disk contents for the inode, copy into the inode. */
1238	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1239	    (int)fs->fs_bsize, NOCRED, &bp);
1240	if (error) {
1241		/*
1242		 * The inode does not contain anything useful, so it would
1243		 * be misleading to leave it on its hash chain. With mode
1244		 * still zero, it will be unlinked and returned to the free
1245		 * list by vput().
1246		 */
1247		brelse(bp);
1248		vput(vp);
1249		*vpp = NULL;
1250		return (error);
1251	}
1252	if (ip->i_ump->um_fstype == UFS1)
1253		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1254	else
1255		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1256	ffs_load_inode(bp, ip, fs, ino);
1257	if (DOINGSOFTDEP(vp))
1258		softdep_load_inodeblock(ip);
1259	else
1260		ip->i_effnlink = ip->i_nlink;
1261	bqrelse(bp);
1262
1263	/*
1264	 * Initialize the vnode from the inode, check for aliases.
1265	 * Note that the underlying vnode may have changed.
1266	 */
1267	error = ufs_vinit(mp, &ffs_fifoops, &vp);
1268	if (error) {
1269		vput(vp);
1270		*vpp = NULL;
1271		return (error);
1272	}
1273	/*
1274	 * Finish inode initialization.
1275	 */
1276	VREF(ip->i_devvp);
1277	/*
1278	 * Set up a generation number for this inode if it does not
1279	 * already have one. This should only happen on old filesystems.
1280	 */
1281	if (ip->i_gen == 0) {
1282		ip->i_gen = arc4random() / 2 + 1;
1283		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1284			ip->i_flag |= IN_MODIFIED;
1285			DIP_SET(ip, i_gen, ip->i_gen);
1286		}
1287	}
1288	/*
1289	 * Ensure that uid and gid are correct. This is a temporary
1290	 * fix until fsck has been changed to do the update.
1291	 */
1292	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1293	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1294		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1295		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1296	}						/* XXX */
1297
1298#ifdef MAC
1299	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1300		/*
1301		 * If this vnode is already allocated, and we're running
1302		 * multi-label, attempt to perform a label association
1303		 * from the extended attributes on the inode.
1304		 */
1305		error = mac_associate_vnode_extattr(mp, vp);
1306		if (error) {
1307			/* ufs_inactive will release ip->i_devvp ref. */
1308			vput(vp);
1309			*vpp = NULL;
1310			return (error);
1311		}
1312	}
1313#endif
1314
1315	*vpp = vp;
1316	return (0);
1317}
1318
1319/*
1320 * File handle to vnode
1321 *
1322 * Have to be really careful about stale file handles:
1323 * - check that the inode number is valid
1324 * - call ffs_vget() to get the locked inode
1325 * - check for an unallocated inode (i_mode == 0)
1326 * - check that the given client host has export rights and return
1327 *   those rights via. exflagsp and credanonp
1328 */
1329int
1330ffs_fhtovp(mp, fhp, vpp)
1331	struct mount *mp;
1332	struct fid *fhp;
1333	struct vnode **vpp;
1334{
1335	struct ufid *ufhp;
1336	struct fs *fs;
1337
1338	ufhp = (struct ufid *)fhp;
1339	fs = VFSTOUFS(mp)->um_fs;
1340	if (ufhp->ufid_ino < ROOTINO ||
1341	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1342		return (ESTALE);
1343	return (ufs_fhtovp(mp, ufhp, vpp));
1344}
1345
1346/*
1347 * Vnode pointer to File handle
1348 */
1349/* ARGSUSED */
1350int
1351ffs_vptofh(vp, fhp)
1352	struct vnode *vp;
1353	struct fid *fhp;
1354{
1355	struct inode *ip;
1356	struct ufid *ufhp;
1357
1358	ip = VTOI(vp);
1359	ufhp = (struct ufid *)fhp;
1360	ufhp->ufid_len = sizeof(struct ufid);
1361	ufhp->ufid_ino = ip->i_number;
1362	ufhp->ufid_gen = ip->i_gen;
1363	return (0);
1364}
1365
1366/*
1367 * Initialize the filesystem.
1368 */
1369static int
1370ffs_init(vfsp)
1371	struct vfsconf *vfsp;
1372{
1373
1374	softdep_initialize();
1375	return (ufs_init(vfsp));
1376}
1377
1378/*
1379 * Undo the work of ffs_init().
1380 */
1381static int
1382ffs_uninit(vfsp)
1383	struct vfsconf *vfsp;
1384{
1385	int ret;
1386
1387	ret = ufs_uninit(vfsp);
1388	softdep_uninitialize();
1389	return (ret);
1390}
1391
1392/*
1393 * Write a superblock and associated information back to disk.
1394 */
1395static int
1396ffs_sbupdate(mp, waitfor)
1397	struct ufsmount *mp;
1398	int waitfor;
1399{
1400	struct fs *fs = mp->um_fs;
1401	struct buf *bp;
1402	int blks;
1403	void *space;
1404	int i, size, error, allerror = 0;
1405
1406	if (fs->fs_ronly == 1 &&
1407	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1408	    (MNT_RDONLY | MNT_UPDATE))
1409		panic("ffs_sbupdate: write read-only filesystem");
1410	/*
1411	 * First write back the summary information.
1412	 */
1413	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1414	space = fs->fs_csp;
1415	for (i = 0; i < blks; i += fs->fs_frag) {
1416		size = fs->fs_bsize;
1417		if (i + fs->fs_frag > blks)
1418			size = (blks - i) * fs->fs_fsize;
1419		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1420		    size, 0, 0, 0);
1421		bcopy(space, bp->b_data, (u_int)size);
1422		space = (char *)space + size;
1423		if (waitfor != MNT_WAIT)
1424			bawrite(bp);
1425		else if ((error = bwrite(bp)) != 0)
1426			allerror = error;
1427	}
1428	/*
1429	 * Now write back the superblock itself. If any errors occurred
1430	 * up to this point, then fail so that the superblock avoids
1431	 * being written out as clean.
1432	 */
1433	if (allerror)
1434		return (allerror);
1435	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1436	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1437		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1438		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1439		fs->fs_sblockloc = SBLOCK_UFS1;
1440	}
1441	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1442	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1443		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1444		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1445		fs->fs_sblockloc = SBLOCK_UFS2;
1446	}
1447	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1448	    0, 0, 0);
1449	fs->fs_fmod = 0;
1450	fs->fs_time = time_second;
1451	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1452	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1453	if (waitfor != MNT_WAIT)
1454		bawrite(bp);
1455	else if ((error = bwrite(bp)) != 0)
1456		allerror = error;
1457	return (allerror);
1458}
1459
1460static int
1461ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1462	int attrnamespace, const char *attrname, struct thread *td)
1463{
1464
1465#ifdef UFS_EXTATTR
1466	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1467	    attrname, td));
1468#else
1469	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1470	    attrname, td));
1471#endif
1472}
1473
1474static void
1475ffs_ifree(struct ufsmount *ump, struct inode *ip)
1476{
1477
1478	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1479		uma_zfree(uma_ufs1, ip->i_din1);
1480	else if (ip->i_din2 != NULL)
1481		uma_zfree(uma_ufs2, ip->i_din2);
1482	uma_zfree(uma_inode, ip);
1483}
1484
1485static void
1486ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1487{
1488
1489#ifdef SOFTUPDATES
1490	if (bp->b_iocmd == BIO_WRITE && softdep_disk_prewrite(bp))
1491		return;
1492#endif
1493	g_vfs_strategy(bo, bp);
1494}
1495