ffs_vfsops.c revision 91406
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34 * $FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 91406 2002-02-27 18:32:23Z jhb $
35 */
36
37#include "opt_quota.h"
38#include "opt_ufs.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/proc.h>
44#include <sys/kernel.h>
45#include <sys/vnode.h>
46#include <sys/mount.h>
47#include <sys/bio.h>
48#include <sys/buf.h>
49#include <sys/conf.h>
50#include <sys/fcntl.h>
51#include <sys/disklabel.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54
55#include <ufs/ufs/extattr.h>
56#include <ufs/ufs/quota.h>
57#include <ufs/ufs/ufsmount.h>
58#include <ufs/ufs/inode.h>
59#include <ufs/ufs/ufs_extern.h>
60
61#include <ufs/ffs/fs.h>
62#include <ufs/ffs/ffs_extern.h>
63
64#include <vm/vm.h>
65#include <vm/vm_page.h>
66
67static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
68
69static int	ffs_sbupdate __P((struct ufsmount *, int));
70int	ffs_reload __P((struct mount *,struct ucred *,struct thread *));
71static int	ffs_oldfscompat __P((struct fs *));
72static int	ffs_init __P((struct vfsconf *));
73
74static struct vfsops ufs_vfsops = {
75	ffs_mount,
76	ufs_start,
77	ffs_unmount,
78	ufs_root,
79	ufs_quotactl,
80	ffs_statfs,
81	ffs_sync,
82	ffs_vget,
83	ffs_fhtovp,
84	vfs_stdcheckexp,
85	ffs_vptofh,
86	ffs_init,
87	vfs_stduninit,
88#ifdef UFS_EXTATTR
89	ufs_extattrctl,
90#else
91	vfs_stdextattrctl,
92#endif
93};
94
95VFS_SET(ufs_vfsops, ufs, 0);
96
97/*
98 * ffs_mount
99 *
100 * Called when mounting local physical media
101 *
102 * PARAMETERS:
103 *		mountroot
104 *			mp	mount point structure
105 *			path	NULL (flag for root mount!!!)
106 *			data	<unused>
107 *			ndp	<unused>
108 *			p	process (user credentials check [statfs])
109 *
110 *		mount
111 *			mp	mount point structure
112 *			path	path to mount point
113 *			data	pointer to argument struct in user space
114 *			ndp	mount point namei() return (used for
115 *				credentials on reload), reused to look
116 *				up block device.
117 *			p	process (user credentials check)
118 *
119 * RETURNS:	0	Success
120 *		!0	error number (errno.h)
121 *
122 * LOCK STATE:
123 *
124 *		ENTRY
125 *			mount point is locked
126 *		EXIT
127 *			mount point is locked
128 *
129 * NOTES:
130 *		A NULL path can be used for a flag since the mount
131 *		system call will fail with EFAULT in copyinstr in
132 *		namei() if it is a genuine NULL from the user.
133 */
134int
135ffs_mount(mp, path, data, ndp, td)
136        struct mount		*mp;	/* mount struct pointer*/
137        char			*path;	/* path to mount point*/
138        caddr_t			data;	/* arguments to FS specific mount*/
139        struct nameidata	*ndp;	/* mount point credentials*/
140        struct thread		*td;	/* process requesting mount*/
141{
142	size_t		size;
143	struct vnode	*devvp;
144	struct ufs_args args;
145	struct ufsmount *ump = 0;
146	register struct fs *fs;
147	int error, flags;
148	mode_t accessmode;
149
150	/*
151	 * Use NULL path to indicate we are mounting the root file system.
152	 */
153	if (path == NULL) {
154		if ((error = bdevvp(rootdev, &rootvp))) {
155			printf("ffs_mountroot: can't find rootvp\n");
156			return (error);
157		}
158
159		if ((error = ffs_mountfs(rootvp, mp, td, M_FFSNODE)) != 0)
160			return (error);
161
162		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
163		return (0);
164	}
165
166	/*
167	 * Mounting non-root file system or updating a file system
168	 */
169	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
170		return (error);
171
172	/*
173	 * If updating, check whether changing from read-only to
174	 * read/write; if there is no device name, that's all we do.
175	 */
176	if (mp->mnt_flag & MNT_UPDATE) {
177		ump = VFSTOUFS(mp);
178		fs = ump->um_fs;
179		devvp = ump->um_devvp;
180		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
181			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
182				return (error);
183			/*
184			 * Flush any dirty data.
185			 */
186			VFS_SYNC(mp, MNT_WAIT, td->td_proc->p_ucred, td);
187			/*
188			 * Check for and optionally get rid of files open
189			 * for writing.
190			 */
191			flags = WRITECLOSE;
192			if (mp->mnt_flag & MNT_FORCE)
193				flags |= FORCECLOSE;
194			if (mp->mnt_flag & MNT_SOFTDEP) {
195				error = softdep_flushfiles(mp, flags, td);
196			} else {
197				error = ffs_flushfiles(mp, flags, td);
198			}
199			if (error) {
200				vn_finished_write(mp);
201				return (error);
202			}
203			if (fs->fs_pendingblocks != 0 ||
204			    fs->fs_pendinginodes != 0) {
205				printf("%s: update error: blocks %d files %d\n",
206				    fs->fs_fsmnt, fs->fs_pendingblocks,
207				    fs->fs_pendinginodes);
208				fs->fs_pendingblocks = 0;
209				fs->fs_pendinginodes = 0;
210			}
211			fs->fs_ronly = 1;
212			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
213				fs->fs_clean = 1;
214			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
215				fs->fs_ronly = 0;
216				fs->fs_clean = 0;
217				vn_finished_write(mp);
218				return (error);
219			}
220			vn_finished_write(mp);
221		}
222		if ((mp->mnt_flag & MNT_RELOAD) &&
223		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
224			return (error);
225		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
226			/*
227			 * If upgrade to read-write by non-root, then verify
228			 * that user has necessary permissions on the device.
229			 */
230			if (suser_td(td)) {
231				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
232				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
233				    td->td_ucred, td)) != 0) {
234					VOP_UNLOCK(devvp, 0, td);
235					return (error);
236				}
237				VOP_UNLOCK(devvp, 0, td);
238			}
239			fs->fs_flags &= ~FS_UNCLEAN;
240			if (fs->fs_clean == 0) {
241				fs->fs_flags |= FS_UNCLEAN;
242				if ((mp->mnt_flag & MNT_FORCE) ||
243				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
244				     (fs->fs_flags & FS_DOSOFTDEP))) {
245					printf("WARNING: %s was not %s\n",
246					   fs->fs_fsmnt, "properly dismounted");
247				} else {
248					printf(
249"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
250					    fs->fs_fsmnt);
251					return (EPERM);
252				}
253			}
254			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
255				return (error);
256			fs->fs_ronly = 0;
257			fs->fs_clean = 0;
258			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
259				vn_finished_write(mp);
260				return (error);
261			}
262			/* check to see if we need to start softdep */
263			if ((fs->fs_flags & FS_DOSOFTDEP) &&
264			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
265				vn_finished_write(mp);
266				return (error);
267			}
268			if (fs->fs_snapinum[0] != 0)
269				ffs_snapshot_mount(mp);
270			vn_finished_write(mp);
271		}
272		/*
273		 * Soft updates is incompatible with "async",
274		 * so if we are doing softupdates stop the user
275		 * from setting the async flag in an update.
276		 * Softdep_mount() clears it in an initial mount
277		 * or ro->rw remount.
278		 */
279		if (mp->mnt_flag & MNT_SOFTDEP)
280			mp->mnt_flag &= ~MNT_ASYNC;
281		/*
282		 * If not updating name, process export requests.
283		 */
284		if (args.fspec == 0)
285			return (vfs_export(mp, &args.export));
286		/*
287		 * If this is a snapshot request, take the snapshot.
288		 */
289		if (mp->mnt_flag & MNT_SNAPSHOT)
290			return (ffs_snapshot(mp, args.fspec));
291	}
292
293	/*
294	 * Not an update, or updating the name: look up the name
295	 * and verify that it refers to a sensible block device.
296	 */
297	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
298	if ((error = namei(ndp)) != 0)
299		return (error);
300	NDFREE(ndp, NDF_ONLY_PNBUF);
301	devvp = ndp->ni_vp;
302	if (!vn_isdisk(devvp, &error)) {
303		vrele(devvp);
304		return (error);
305	}
306
307	/*
308	 * If mount by non-root, then verify that user has necessary
309	 * permissions on the device.
310	 */
311	if (suser_td(td)) {
312		accessmode = VREAD;
313		if ((mp->mnt_flag & MNT_RDONLY) == 0)
314			accessmode |= VWRITE;
315		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
316		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
317			vput(devvp);
318			return (error);
319		}
320		VOP_UNLOCK(devvp, 0, td);
321	}
322
323	if (mp->mnt_flag & MNT_UPDATE) {
324		/*
325		 * Update only
326		 *
327		 * If it's not the same vnode, or at least the same device
328		 * then it's not correct.
329		 */
330
331		if (devvp != ump->um_devvp &&
332		    devvp->v_rdev != ump->um_devvp->v_rdev)
333			error = EINVAL;	/* needs translation */
334		vrele(devvp);
335		if (error)
336			return (error);
337	} else {
338		/*
339		 * New mount
340		 *
341		 * We need the name for the mount point (also used for
342		 * "last mounted on") copied in. If an error occurs,
343		 * the mount point is discarded by the upper level code.
344		 * Note that vfs_mount() populates f_mntonname for us.
345		 */
346		if ((error = ffs_mountfs(devvp, mp, td, M_FFSNODE)) != 0) {
347			vrele(devvp);
348			return (error);
349		}
350	}
351	/*
352	 * Save "mounted from" device name info for mount point (NULL pad).
353	 */
354	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
355	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
356	/*
357	 * Initialize filesystem stat information in mount struct.
358	 */
359	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
360	return (0);
361}
362
363/*
364 * Reload all incore data for a filesystem (used after running fsck on
365 * the root filesystem and finding things to fix). The filesystem must
366 * be mounted read-only.
367 *
368 * Things to do to update the mount:
369 *	1) invalidate all cached meta-data.
370 *	2) re-read superblock from disk.
371 *	3) re-read summary information from disk.
372 *	4) invalidate all inactive vnodes.
373 *	5) invalidate all cached file data.
374 *	6) re-read inode data for all active vnodes.
375 */
376int
377ffs_reload(mp, cred, td)
378	register struct mount *mp;
379	struct ucred *cred;
380	struct thread *td;
381{
382	register struct vnode *vp, *nvp, *devvp;
383	struct inode *ip;
384	void *space;
385	struct buf *bp;
386	struct fs *fs, *newfs;
387	struct partinfo dpart;
388	dev_t dev;
389	int i, blks, size, error;
390	int32_t *lp;
391
392	if ((mp->mnt_flag & MNT_RDONLY) == 0)
393		return (EINVAL);
394	/*
395	 * Step 1: invalidate all cached meta-data.
396	 */
397	devvp = VFSTOUFS(mp)->um_devvp;
398	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
399	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
400	VOP_UNLOCK(devvp, 0, td);
401	if (error)
402		panic("ffs_reload: dirty1");
403
404	dev = devvp->v_rdev;
405
406	/*
407	 * Only VMIO the backing device if the backing device is a real
408	 * block device.
409	 */
410	if (vn_isdisk(devvp, NULL)) {
411		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
412		vfs_object_create(devvp, td, td->td_ucred);
413		mtx_lock(&devvp->v_interlock);
414		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
415	}
416
417	/*
418	 * Step 2: re-read superblock from disk.
419	 */
420	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, td) != 0)
421		size = DEV_BSIZE;
422	else
423		size = dpart.disklab->d_secsize;
424	if ((error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) != 0)
425		return (error);
426	newfs = (struct fs *)bp->b_data;
427	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
428		newfs->fs_bsize < sizeof(struct fs)) {
429			brelse(bp);
430			return (EIO);		/* XXX needs translation */
431	}
432	fs = VFSTOUFS(mp)->um_fs;
433	/*
434	 * Copy pointer fields back into superblock before copying in	XXX
435	 * new superblock. These should really be in the ufsmount.	XXX
436	 * Note that important parameters (eg fs_ncg) are unchanged.
437	 */
438	newfs->fs_csp = fs->fs_csp;
439	newfs->fs_maxcluster = fs->fs_maxcluster;
440	newfs->fs_contigdirs = fs->fs_contigdirs;
441	newfs->fs_active = fs->fs_active;
442	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
443	if (fs->fs_sbsize < SBSIZE)
444		bp->b_flags |= B_INVAL | B_NOCACHE;
445	brelse(bp);
446	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
447	ffs_oldfscompat(fs);
448	/* An old fsck may have zeroed these fields, so recheck them. */
449	if (fs->fs_avgfilesize <= 0)		/* XXX */
450		fs->fs_avgfilesize = AVFILESIZ;	/* XXX */
451	if (fs->fs_avgfpdir <= 0)		/* XXX */
452		fs->fs_avgfpdir = AFPDIR;	/* XXX */
453	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
454		printf("%s: reload pending error: blocks %d files %d\n",
455		    fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes);
456		fs->fs_pendingblocks = 0;
457		fs->fs_pendinginodes = 0;
458	}
459
460	/*
461	 * Step 3: re-read summary information from disk.
462	 */
463	blks = howmany(fs->fs_cssize, fs->fs_fsize);
464	space = fs->fs_csp;
465	for (i = 0; i < blks; i += fs->fs_frag) {
466		size = fs->fs_bsize;
467		if (i + fs->fs_frag > blks)
468			size = (blks - i) * fs->fs_fsize;
469		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
470		    NOCRED, &bp);
471		if (error)
472			return (error);
473		bcopy(bp->b_data, space, (u_int)size);
474		space = (char *)space + size;
475		brelse(bp);
476	}
477	/*
478	 * We no longer know anything about clusters per cylinder group.
479	 */
480	if (fs->fs_contigsumsize > 0) {
481		lp = fs->fs_maxcluster;
482		for (i = 0; i < fs->fs_ncg; i++)
483			*lp++ = fs->fs_contigsumsize;
484	}
485
486loop:
487	mtx_lock(&mntvnode_mtx);
488	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
489		if (vp->v_mount != mp) {
490			mtx_unlock(&mntvnode_mtx);
491			goto loop;
492		}
493		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
494		mtx_unlock(&mntvnode_mtx);
495		/*
496		 * Step 4: invalidate all inactive vnodes.
497		 */
498		if (vrecycle(vp, NULL, td))
499			goto loop;
500		/*
501		 * Step 5: invalidate all cached file data.
502		 */
503		mtx_lock(&vp->v_interlock);
504		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
505			goto loop;
506		}
507		if (vinvalbuf(vp, 0, cred, td, 0, 0))
508			panic("ffs_reload: dirty2");
509		/*
510		 * Step 6: re-read inode data for all active vnodes.
511		 */
512		ip = VTOI(vp);
513		error =
514		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
515		    (int)fs->fs_bsize, NOCRED, &bp);
516		if (error) {
517			vput(vp);
518			return (error);
519		}
520		ip->i_din = *((struct dinode *)bp->b_data +
521		    ino_to_fsbo(fs, ip->i_number));
522		ip->i_effnlink = ip->i_nlink;
523		brelse(bp);
524		vput(vp);
525		mtx_lock(&mntvnode_mtx);
526	}
527	mtx_unlock(&mntvnode_mtx);
528	return (0);
529}
530
531#include <sys/sysctl.h>
532int bigcgs = 0;
533SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
534
535/*
536 * Common code for mount and mountroot
537 */
538int
539ffs_mountfs(devvp, mp, td, malloctype)
540	register struct vnode *devvp;
541	struct mount *mp;
542	struct thread *td;
543	struct malloc_type *malloctype;
544{
545	register struct ufsmount *ump;
546	struct buf *bp;
547	register struct fs *fs;
548	dev_t dev;
549	struct partinfo dpart;
550	void *space;
551	int error, i, blks, size, ronly;
552	int32_t *lp;
553	struct ucred *cred;
554	u_int64_t maxfilesize;					/* XXX */
555	size_t strsize;
556	int ncount;
557
558	dev = devvp->v_rdev;
559	cred = td ? td->td_ucred : NOCRED;
560	/*
561	 * Disallow multiple mounts of the same device.
562	 * Disallow mounting of a device that is currently in use
563	 * (except for root, which might share swap device for miniroot).
564	 * Flush out any old buffers remaining from a previous use.
565	 */
566	error = vfs_mountedon(devvp);
567	if (error)
568		return (error);
569	ncount = vcount(devvp);
570
571	if (ncount > 1 && devvp != rootvp)
572		return (EBUSY);
573	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
574	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
575	VOP_UNLOCK(devvp, 0, td);
576	if (error)
577		return (error);
578
579	/*
580	 * Only VMIO the backing device if the backing device is a real
581	 * block device.
582	 * Note that it is optional that the backing device be VMIOed.  This
583	 * increases the opportunity for metadata caching.
584	 */
585	if (vn_isdisk(devvp, NULL)) {
586		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
587		vfs_object_create(devvp, td, cred);
588		mtx_lock(&devvp->v_interlock);
589		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
590	}
591
592	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
593	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
594	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
595	VOP_UNLOCK(devvp, 0, td);
596	if (error)
597		return (error);
598	if (devvp->v_rdev->si_iosize_max > mp->mnt_iosize_max)
599		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
600	if (mp->mnt_iosize_max > MAXPHYS)
601		mp->mnt_iosize_max = MAXPHYS;
602
603	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, td) != 0)
604		size = DEV_BSIZE;
605	else
606		size = dpart.disklab->d_secsize;
607
608	bp = NULL;
609	ump = NULL;
610	if ((error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) != 0)
611		goto out;
612	fs = (struct fs *)bp->b_data;
613	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
614	    fs->fs_bsize < sizeof(struct fs)) {
615		error = EINVAL;		/* XXX needs translation */
616		goto out;
617	}
618	fs->fs_fmod = 0;
619	fs->fs_flags &= ~FS_UNCLEAN;
620	if (fs->fs_clean == 0) {
621		fs->fs_flags |= FS_UNCLEAN;
622		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
623		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
624		     (fs->fs_flags & FS_DOSOFTDEP))) {
625			printf(
626"WARNING: %s was not properly dismounted\n",
627			    fs->fs_fsmnt);
628		} else {
629			printf(
630"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
631			    fs->fs_fsmnt);
632			error = EPERM;
633			goto out;
634		}
635		if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
636			printf("%s: lost blocks %d files %d\n", fs->fs_fsmnt,
637			    fs->fs_pendingblocks, fs->fs_pendinginodes);
638			fs->fs_pendingblocks = 0;
639			fs->fs_pendinginodes = 0;
640		}
641	}
642	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
643		printf("%s: mount pending error: blocks %d files %d\n",
644		    fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes);
645		fs->fs_pendingblocks = 0;
646		fs->fs_pendinginodes = 0;
647	}
648	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
649	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
650		error = EROFS;          /* needs translation */
651		goto out;
652	}
653	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
654	ump->um_malloctype = malloctype;
655	ump->um_i_effnlink_valid = 1;
656	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
657	    M_WAITOK);
658	ump->um_blkatoff = ffs_blkatoff;
659	ump->um_truncate = ffs_truncate;
660	ump->um_update = ffs_update;
661	ump->um_valloc = ffs_valloc;
662	ump->um_vfree = ffs_vfree;
663	ump->um_balloc = ffs_balloc;
664	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
665	if (fs->fs_sbsize < SBSIZE)
666		bp->b_flags |= B_INVAL | B_NOCACHE;
667	brelse(bp);
668	bp = NULL;
669	fs = ump->um_fs;
670	fs->fs_ronly = ronly;
671	size = fs->fs_cssize;
672	blks = howmany(size, fs->fs_fsize);
673	if (fs->fs_contigsumsize > 0)
674		size += fs->fs_ncg * sizeof(int32_t);
675	size += fs->fs_ncg * sizeof(u_int8_t);
676	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
677	fs->fs_csp = space;
678	for (i = 0; i < blks; i += fs->fs_frag) {
679		size = fs->fs_bsize;
680		if (i + fs->fs_frag > blks)
681			size = (blks - i) * fs->fs_fsize;
682		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
683		    cred, &bp)) != 0) {
684			free(fs->fs_csp, M_UFSMNT);
685			goto out;
686		}
687		bcopy(bp->b_data, space, (u_int)size);
688		space = (char *)space + size;
689		brelse(bp);
690		bp = NULL;
691	}
692	if (fs->fs_contigsumsize > 0) {
693		fs->fs_maxcluster = lp = space;
694		for (i = 0; i < fs->fs_ncg; i++)
695			*lp++ = fs->fs_contigsumsize;
696		space = lp;
697	}
698	size = fs->fs_ncg * sizeof(u_int8_t);
699	fs->fs_contigdirs = (u_int8_t *)space;
700	bzero(fs->fs_contigdirs, size);
701	fs->fs_active = NULL;
702	/* Compatibility for old filesystems 	   XXX */
703	if (fs->fs_avgfilesize <= 0)		/* XXX */
704		fs->fs_avgfilesize = AVFILESIZ;	/* XXX */
705	if (fs->fs_avgfpdir <= 0)		/* XXX */
706		fs->fs_avgfpdir = AFPDIR;	/* XXX */
707	mp->mnt_data = (qaddr_t)ump;
708	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
709	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
710	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
711	    vfs_getvfs(&mp->mnt_stat.f_fsid))
712		vfs_getnewfsid(mp);
713	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
714	mp->mnt_flag |= MNT_LOCAL;
715	ump->um_mountp = mp;
716	ump->um_dev = dev;
717	ump->um_devvp = devvp;
718	ump->um_nindir = fs->fs_nindir;
719	ump->um_bptrtodb = fs->fs_fsbtodb;
720	ump->um_seqinc = fs->fs_frag;
721	for (i = 0; i < MAXQUOTAS; i++)
722		ump->um_quotas[i] = NULLVP;
723#ifdef UFS_EXTATTR
724	ufs_extattr_uepm_init(&ump->um_extattr);
725#endif
726	devvp->v_rdev->si_mountpoint = mp;
727	ffs_oldfscompat(fs);
728
729	/*
730	 * Set FS local "last mounted on" information (NULL pad)
731	 */
732	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
733			fs->fs_fsmnt,			/* copy area*/
734			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
735			&strsize);			/* real size*/
736	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
737
738	if( mp->mnt_flag & MNT_ROOTFS) {
739		/*
740		 * Root mount; update timestamp in mount structure.
741		 * this will be used by the common root mount code
742		 * to update the system clock.
743		 */
744		mp->mnt_time = fs->fs_time;
745	}
746
747	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
748	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
749	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
750		fs->fs_maxfilesize = maxfilesize;		/* XXX */
751	if (bigcgs) {
752		if (fs->fs_sparecon[0] <= 0)
753			fs->fs_sparecon[0] = fs->fs_cgsize;
754		fs->fs_cgsize = fs->fs_bsize;
755	}
756	if (ronly == 0) {
757		if ((fs->fs_flags & FS_DOSOFTDEP) &&
758		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
759			free(fs->fs_csp, M_UFSMNT);
760			goto out;
761		}
762		if (fs->fs_snapinum[0] != 0)
763			ffs_snapshot_mount(mp);
764		fs->fs_fmod = 1;
765		fs->fs_clean = 0;
766		(void) ffs_sbupdate(ump, MNT_WAIT);
767	}
768#ifdef UFS_EXTATTR
769#ifdef UFS_EXTATTR_AUTOSTART
770	/*
771	 *
772	 * Auto-starting does the following:
773	 *	- check for /.attribute in the fs, and extattr_start if so
774	 *	- for each file in .attribute, enable that file with
775	 * 	  an attribute of the same name.
776	 * Not clear how to report errors -- probably eat them.
777	 * This would all happen while the file system was busy/not
778	 * available, so would effectively be "atomic".
779	 */
780	(void) ufs_extattr_autostart(mp, td);
781#endif /* !UFS_EXTATTR_AUTOSTART */
782#endif /* !UFS_EXTATTR */
783	return (0);
784out:
785	devvp->v_rdev->si_mountpoint = NULL;
786	if (bp)
787		brelse(bp);
788	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
789	if (ump) {
790		free(ump->um_fs, M_UFSMNT);
791		free(ump, M_UFSMNT);
792		mp->mnt_data = (qaddr_t)0;
793	}
794	return (error);
795}
796
797/*
798 * Sanity checks for old file systems.
799 *
800 * XXX - goes away some day.
801 */
802static int
803ffs_oldfscompat(fs)
804	struct fs *fs;
805{
806
807	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
808	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
809	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
810		fs->fs_nrpos = 8;				/* XXX */
811	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
812#if 0
813		int i;						/* XXX */
814		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
815								/* XXX */
816		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
817		for (i = 0; i < NIADDR; i++) {			/* XXX */
818			sizepb *= NINDIR(fs);			/* XXX */
819			fs->fs_maxfilesize += sizepb;		/* XXX */
820		}						/* XXX */
821#endif
822		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
823		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
824		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
825	}							/* XXX */
826	return (0);
827}
828
829/*
830 * unmount system call
831 */
832int
833ffs_unmount(mp, mntflags, td)
834	struct mount *mp;
835	int mntflags;
836	struct thread *td;
837{
838	register struct ufsmount *ump = VFSTOUFS(mp);
839	register struct fs *fs;
840	int error, flags;
841
842	flags = 0;
843	if (mntflags & MNT_FORCE) {
844		flags |= FORCECLOSE;
845	}
846#ifdef UFS_EXTATTR
847	if ((error = ufs_extattr_stop(mp, td))) {
848		if (error != EOPNOTSUPP)
849			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
850			    error);
851	} else {
852		ufs_extattr_uepm_destroy(&ump->um_extattr);
853	}
854#endif
855	if (mp->mnt_flag & MNT_SOFTDEP) {
856		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
857			return (error);
858	} else {
859		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
860			return (error);
861	}
862	fs = ump->um_fs;
863	if (bigcgs) {
864		fs->fs_cgsize = fs->fs_sparecon[0];
865		fs->fs_sparecon[0] = 0;
866	}
867	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
868		printf("%s: unmount pending error: blocks %d files %d\n",
869		    fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes);
870		fs->fs_pendingblocks = 0;
871		fs->fs_pendinginodes = 0;
872	}
873	if (fs->fs_ronly == 0) {
874		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
875		error = ffs_sbupdate(ump, MNT_WAIT);
876		if (error) {
877			fs->fs_clean = 0;
878			return (error);
879		}
880	}
881	ump->um_devvp->v_rdev->si_mountpoint = NULL;
882
883	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
884	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
885		NOCRED, td);
886
887	vrele(ump->um_devvp);
888
889	free(fs->fs_csp, M_UFSMNT);
890	free(fs, M_UFSMNT);
891	free(ump, M_UFSMNT);
892	mp->mnt_data = (qaddr_t)0;
893	mp->mnt_flag &= ~MNT_LOCAL;
894	return (error);
895}
896
897/*
898 * Flush out all the files in a filesystem.
899 */
900int
901ffs_flushfiles(mp, flags, td)
902	register struct mount *mp;
903	int flags;
904	struct thread *td;
905{
906	register struct ufsmount *ump;
907	int error;
908
909	ump = VFSTOUFS(mp);
910#ifdef QUOTA
911	if (mp->mnt_flag & MNT_QUOTA) {
912		int i;
913		error = vflush(mp, 0, SKIPSYSTEM|flags);
914		if (error)
915			return (error);
916		for (i = 0; i < MAXQUOTAS; i++) {
917			if (ump->um_quotas[i] == NULLVP)
918				continue;
919			quotaoff(td, mp, i);
920		}
921		/*
922		 * Here we fall through to vflush again to ensure
923		 * that we have gotten rid of all the system vnodes.
924		 */
925	}
926#endif
927	if (ump->um_devvp->v_flag & VCOPYONWRITE) {
928		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
929			return (error);
930		ffs_snapshot_unmount(mp);
931		/*
932		 * Here we fall through to vflush again to ensure
933		 * that we have gotten rid of all the system vnodes.
934		 */
935	}
936        /*
937	 * Flush all the files.
938	 */
939	if ((error = vflush(mp, 0, flags)) != 0)
940		return (error);
941	/*
942	 * Flush filesystem metadata.
943	 */
944	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
945	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
946	VOP_UNLOCK(ump->um_devvp, 0, td);
947	return (error);
948}
949
950/*
951 * Get file system statistics.
952 */
953int
954ffs_statfs(mp, sbp, td)
955	struct mount *mp;
956	register struct statfs *sbp;
957	struct thread *td;
958{
959	register struct ufsmount *ump;
960	register struct fs *fs;
961
962	ump = VFSTOUFS(mp);
963	fs = ump->um_fs;
964	if (fs->fs_magic != FS_MAGIC)
965		panic("ffs_statfs");
966	sbp->f_bsize = fs->fs_fsize;
967	sbp->f_iosize = fs->fs_bsize;
968	sbp->f_blocks = fs->fs_dsize;
969	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
970	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
971	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
972	    dbtofsb(fs, fs->fs_pendingblocks);
973	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
974	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
975	if (sbp != &mp->mnt_stat) {
976		sbp->f_type = mp->mnt_vfc->vfc_typenum;
977		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
978			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
979		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
980			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
981	}
982	return (0);
983}
984
985/*
986 * Go through the disk queues to initiate sandbagged IO;
987 * go through the inodes to write those that have been modified;
988 * initiate the writing of the super block if it has been modified.
989 *
990 * Note: we are always called with the filesystem marked `MPBUSY'.
991 */
992int
993ffs_sync(mp, waitfor, cred, td)
994	struct mount *mp;
995	int waitfor;
996	struct ucred *cred;
997	struct thread *td;
998{
999	struct vnode *nvp, *vp, *devvp;
1000	struct inode *ip;
1001	struct ufsmount *ump = VFSTOUFS(mp);
1002	struct fs *fs;
1003	int error, count, wait, lockreq, allerror = 0;
1004
1005	fs = ump->um_fs;
1006	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1007		printf("fs = %s\n", fs->fs_fsmnt);
1008		panic("ffs_sync: rofs mod");
1009	}
1010	/*
1011	 * Write back each (modified) inode.
1012	 */
1013	wait = 0;
1014	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1015	if (waitfor == MNT_WAIT) {
1016		wait = 1;
1017		lockreq = LK_EXCLUSIVE;
1018	}
1019	mtx_lock(&mntvnode_mtx);
1020loop:
1021	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1022		/*
1023		 * If the vnode that we are about to sync is no longer
1024		 * associated with this mount point, start over.
1025		 */
1026		if (vp->v_mount != mp)
1027			goto loop;
1028
1029		/*
1030		 * Depend on the mntvnode_slock to keep things stable enough
1031		 * for a quick test.  Since there might be hundreds of
1032		 * thousands of vnodes, we cannot afford even a subroutine
1033		 * call unless there's a good chance that we have work to do.
1034		 */
1035		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1036		ip = VTOI(vp);
1037		if (vp->v_type == VNON || ((ip->i_flag &
1038		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1039		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1040			continue;
1041		}
1042		if (vp->v_type != VCHR) {
1043			mtx_unlock(&mntvnode_mtx);
1044			if ((error = vget(vp, lockreq, td)) != 0) {
1045				mtx_lock(&mntvnode_mtx);
1046				if (error == ENOENT)
1047					goto loop;
1048			} else {
1049				if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1050					allerror = error;
1051				VOP_UNLOCK(vp, 0, td);
1052				vrele(vp);
1053				mtx_lock(&mntvnode_mtx);
1054			}
1055		} else {
1056			mtx_unlock(&mntvnode_mtx);
1057			UFS_UPDATE(vp, wait);
1058			mtx_lock(&mntvnode_mtx);
1059		}
1060		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1061			goto loop;
1062	}
1063	mtx_unlock(&mntvnode_mtx);
1064	/*
1065	 * Force stale file system control information to be flushed.
1066	 */
1067	if (waitfor == MNT_WAIT) {
1068		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1069			allerror = error;
1070		/* Flushed work items may create new vnodes to clean */
1071		if (count) {
1072			mtx_lock(&mntvnode_mtx);
1073			goto loop;
1074		}
1075	}
1076#ifdef QUOTA
1077	qsync(mp);
1078#endif
1079	devvp = ump->um_devvp;
1080	mtx_lock(&devvp->v_interlock);
1081	if (waitfor != MNT_LAZY &&
1082	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1083		mtx_unlock(&devvp->v_interlock);
1084		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
1085		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1086			allerror = error;
1087		VOP_UNLOCK(devvp, 0, td);
1088		if (waitfor == MNT_WAIT) {
1089			mtx_lock(&mntvnode_mtx);
1090			goto loop;
1091		}
1092	} else
1093		mtx_unlock(&devvp->v_interlock);
1094	/*
1095	 * Write back modified superblock.
1096	 */
1097	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1098		allerror = error;
1099	return (allerror);
1100}
1101
1102/*
1103 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1104 * in from disk.  If it is in core, wait for the lock bit to clear, then
1105 * return the inode locked.  Detection and handling of mount points must be
1106 * done by the calling routine.
1107 */
1108static int ffs_inode_hash_lock;
1109/*
1110 * ffs_inode_hash_lock is a variable to manage mutual exclusion
1111 * of vnode allocation and intertion to the hash, especially to
1112 * avoid holding more than one vnodes for the same inode in the
1113 * hash table. ffs_inode_hash_lock must hence be tested-and-set
1114 * or cleared atomically, accomplished by ffs_inode_hash_mtx.
1115 *
1116 * As vnode allocation may block during MALLOC() and zone
1117 * allocation, we should also do msleep() to give away the CPU
1118 * if anyone else is allocating a vnode. lockmgr is not suitable
1119 * here because someone else may insert to the hash table the
1120 * vnode we are trying to allocate during our sleep, in which
1121 * case the hash table needs to be examined once again after
1122 * waking up.
1123 */
1124static struct mtx ffs_inode_hash_mtx;
1125
1126int
1127ffs_vget(mp, ino, vpp)
1128	struct mount *mp;
1129	ino_t ino;
1130	struct vnode **vpp;
1131{
1132	struct fs *fs;
1133	struct inode *ip;
1134	struct ufsmount *ump;
1135	struct buf *bp;
1136	struct vnode *vp;
1137	dev_t dev;
1138	int error, want_wakeup;
1139
1140	ump = VFSTOUFS(mp);
1141	dev = ump->um_dev;
1142restart:
1143	if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1144		return (0);
1145	}
1146
1147	/*
1148	 * Lock out the creation of new entries in the FFS hash table in
1149	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
1150	 * may occur!
1151	 */
1152	mtx_lock(&ffs_inode_hash_mtx);
1153	if (ffs_inode_hash_lock) {
1154		while (ffs_inode_hash_lock) {
1155			ffs_inode_hash_lock = -1;
1156			msleep(&ffs_inode_hash_lock, &ffs_inode_hash_mtx, PVM, "ffsvgt", 0);
1157		}
1158		mtx_unlock(&ffs_inode_hash_mtx);
1159		goto restart;
1160	}
1161	ffs_inode_hash_lock = 1;
1162	mtx_unlock(&ffs_inode_hash_mtx);
1163
1164	/*
1165	 * If this MALLOC() is performed after the getnewvnode()
1166	 * it might block, leaving a vnode with a NULL v_data to be
1167	 * found by ffs_sync() if a sync happens to fire right then,
1168	 * which will cause a panic because ffs_sync() blindly
1169	 * dereferences vp->v_data (as well it should).
1170	 */
1171	MALLOC(ip, struct inode *, sizeof(struct inode),
1172	    ump->um_malloctype, M_WAITOK);
1173
1174	/* Allocate a new vnode/inode. */
1175	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
1176	if (error) {
1177		/*
1178		 * Do not wake up processes while holding the mutex,
1179		 * otherwise the processes waken up immediately hit
1180		 * themselves into the mutex.
1181		 */
1182		mtx_lock(&ffs_inode_hash_mtx);
1183		want_wakeup = ffs_inode_hash_lock < 0;
1184		ffs_inode_hash_lock = 0;
1185		mtx_unlock(&ffs_inode_hash_mtx);
1186		if (want_wakeup)
1187			wakeup(&ffs_inode_hash_lock);
1188		*vpp = NULL;
1189		FREE(ip, ump->um_malloctype);
1190		return (error);
1191	}
1192	bzero((caddr_t)ip, sizeof(struct inode));
1193	/*
1194	 * FFS supports lock sharing in the stack of vnodes
1195	 */
1196	vp->v_vnlock = &vp->v_lock;
1197	lockinit(vp->v_vnlock, PINOD, "inode", VLKTIMEOUT, LK_CANRECURSE);
1198	vp->v_data = ip;
1199	ip->i_vnode = vp;
1200	ip->i_fs = fs = ump->um_fs;
1201	ip->i_dev = dev;
1202	ip->i_number = ino;
1203#ifdef QUOTA
1204	{
1205		int i;
1206		for (i = 0; i < MAXQUOTAS; i++)
1207			ip->i_dquot[i] = NODQUOT;
1208	}
1209#endif
1210	/*
1211	 * Put it onto its hash chain and lock it so that other requests for
1212	 * this inode will block if they arrive while we are sleeping waiting
1213	 * for old data structures to be purged or for the contents of the
1214	 * disk portion of this inode to be read.
1215	 */
1216	ufs_ihashins(ip);
1217
1218	/*
1219	 * Do not wake up processes while holding the mutex,
1220	 * otherwise the processes waken up immediately hit
1221	 * themselves into the mutex.
1222	 */
1223	mtx_lock(&ffs_inode_hash_mtx);
1224	want_wakeup = ffs_inode_hash_lock < 0;
1225	ffs_inode_hash_lock = 0;
1226	mtx_unlock(&ffs_inode_hash_mtx);
1227	if (want_wakeup)
1228		wakeup(&ffs_inode_hash_lock);
1229
1230	/* Read in the disk contents for the inode, copy into the inode. */
1231	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1232	    (int)fs->fs_bsize, NOCRED, &bp);
1233	if (error) {
1234		/*
1235		 * The inode does not contain anything useful, so it would
1236		 * be misleading to leave it on its hash chain. With mode
1237		 * still zero, it will be unlinked and returned to the free
1238		 * list by vput().
1239		 */
1240		brelse(bp);
1241		vput(vp);
1242		*vpp = NULL;
1243		return (error);
1244	}
1245	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
1246	if (DOINGSOFTDEP(vp))
1247		softdep_load_inodeblock(ip);
1248	else
1249		ip->i_effnlink = ip->i_nlink;
1250	bqrelse(bp);
1251
1252	/*
1253	 * Initialize the vnode from the inode, check for aliases.
1254	 * Note that the underlying vnode may have changed.
1255	 */
1256	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1257	if (error) {
1258		vput(vp);
1259		*vpp = NULL;
1260		return (error);
1261	}
1262	/*
1263	 * Finish inode initialization now that aliasing has been resolved.
1264	 */
1265	ip->i_devvp = ump->um_devvp;
1266	VREF(ip->i_devvp);
1267	/*
1268	 * Set up a generation number for this inode if it does not
1269	 * already have one. This should only happen on old filesystems.
1270	 */
1271	if (ip->i_gen == 0) {
1272		ip->i_gen = random() / 2 + 1;
1273		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1274			ip->i_flag |= IN_MODIFIED;
1275	}
1276	/*
1277	 * Ensure that uid and gid are correct. This is a temporary
1278	 * fix until fsck has been changed to do the update.
1279	 */
1280	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1281		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1282		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1283	}						/* XXX */
1284
1285	*vpp = vp;
1286	return (0);
1287}
1288
1289/*
1290 * File handle to vnode
1291 *
1292 * Have to be really careful about stale file handles:
1293 * - check that the inode number is valid
1294 * - call ffs_vget() to get the locked inode
1295 * - check for an unallocated inode (i_mode == 0)
1296 * - check that the given client host has export rights and return
1297 *   those rights via. exflagsp and credanonp
1298 */
1299int
1300ffs_fhtovp(mp, fhp, vpp)
1301	register struct mount *mp;
1302	struct fid *fhp;
1303	struct vnode **vpp;
1304{
1305	register struct ufid *ufhp;
1306	struct fs *fs;
1307
1308	ufhp = (struct ufid *)fhp;
1309	fs = VFSTOUFS(mp)->um_fs;
1310	if (ufhp->ufid_ino < ROOTINO ||
1311	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1312		return (ESTALE);
1313	return (ufs_fhtovp(mp, ufhp, vpp));
1314}
1315
1316/*
1317 * Vnode pointer to File handle
1318 */
1319/* ARGSUSED */
1320int
1321ffs_vptofh(vp, fhp)
1322	struct vnode *vp;
1323	struct fid *fhp;
1324{
1325	register struct inode *ip;
1326	register struct ufid *ufhp;
1327
1328	ip = VTOI(vp);
1329	ufhp = (struct ufid *)fhp;
1330	ufhp->ufid_len = sizeof(struct ufid);
1331	ufhp->ufid_ino = ip->i_number;
1332	ufhp->ufid_gen = ip->i_gen;
1333	return (0);
1334}
1335
1336/*
1337 * Initialize the filesystem; just use ufs_init.
1338 */
1339static int
1340ffs_init(vfsp)
1341	struct vfsconf *vfsp;
1342{
1343
1344	softdep_initialize();
1345	mtx_init(&ffs_inode_hash_mtx, "ifsvgt", MTX_DEF);
1346	return (ufs_init(vfsp));
1347}
1348
1349/*
1350 * Write a superblock and associated information back to disk.
1351 */
1352static int
1353ffs_sbupdate(mp, waitfor)
1354	struct ufsmount *mp;
1355	int waitfor;
1356{
1357	register struct fs *dfs, *fs = mp->um_fs;
1358	register struct buf *bp;
1359	int blks;
1360	void *space;
1361	int i, size, error, allerror = 0;
1362
1363	/*
1364	 * First write back the summary information.
1365	 */
1366	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1367	space = fs->fs_csp;
1368	for (i = 0; i < blks; i += fs->fs_frag) {
1369		size = fs->fs_bsize;
1370		if (i + fs->fs_frag > blks)
1371			size = (blks - i) * fs->fs_fsize;
1372		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1373		    size, 0, 0);
1374		bcopy(space, bp->b_data, (u_int)size);
1375		space = (char *)space + size;
1376		if (waitfor != MNT_WAIT)
1377			bawrite(bp);
1378		else if ((error = bwrite(bp)) != 0)
1379			allerror = error;
1380	}
1381	/*
1382	 * Now write back the superblock itself. If any errors occurred
1383	 * up to this point, then fail so that the superblock avoids
1384	 * being written out as clean.
1385	 */
1386	if (allerror)
1387		return (allerror);
1388	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1389	fs->fs_fmod = 0;
1390	fs->fs_time = time_second;
1391	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1392	/* Restore compatibility to old file systems.		   XXX */
1393	dfs = (struct fs *)bp->b_data;				/* XXX */
1394	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1395		dfs->fs_nrpos = -1;				/* XXX */
1396	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1397		int32_t *lp, tmp;				/* XXX */
1398								/* XXX */
1399		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1400		tmp = lp[4];					/* XXX */
1401		for (i = 4; i > 0; i--)				/* XXX */
1402			lp[i] = lp[i-1];			/* XXX */
1403		lp[0] = tmp;					/* XXX */
1404	}							/* XXX */
1405	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1406	if (waitfor != MNT_WAIT)
1407		bawrite(bp);
1408	else if ((error = bwrite(bp)) != 0)
1409		allerror = error;
1410	return (allerror);
1411}
1412