ffs_vfsops.c revision 92462
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34 * $FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 92462 2002-03-17 01:25:47Z mckusick $
35 */
36
37#include "opt_quota.h"
38#include "opt_ufs.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/proc.h>
44#include <sys/kernel.h>
45#include <sys/vnode.h>
46#include <sys/mount.h>
47#include <sys/bio.h>
48#include <sys/buf.h>
49#include <sys/conf.h>
50#include <sys/fcntl.h>
51#include <sys/disklabel.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54
55#include <ufs/ufs/extattr.h>
56#include <ufs/ufs/quota.h>
57#include <ufs/ufs/ufsmount.h>
58#include <ufs/ufs/inode.h>
59#include <ufs/ufs/ufs_extern.h>
60
61#include <ufs/ffs/fs.h>
62#include <ufs/ffs/ffs_extern.h>
63
64#include <vm/vm.h>
65#include <vm/vm_page.h>
66
67static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
68
69static int	ffs_sbupdate __P((struct ufsmount *, int));
70int	ffs_reload __P((struct mount *,struct ucred *,struct thread *));
71static int	ffs_oldfscompat __P((struct fs *));
72static int	ffs_init __P((struct vfsconf *));
73
74static struct vfsops ufs_vfsops = {
75	ffs_mount,
76	ufs_start,
77	ffs_unmount,
78	ufs_root,
79	ufs_quotactl,
80	ffs_statfs,
81	ffs_sync,
82	ffs_vget,
83	ffs_fhtovp,
84	vfs_stdcheckexp,
85	ffs_vptofh,
86	ffs_init,
87	vfs_stduninit,
88#ifdef UFS_EXTATTR
89	ufs_extattrctl,
90#else
91	vfs_stdextattrctl,
92#endif
93};
94
95VFS_SET(ufs_vfsops, ufs, 0);
96
97/*
98 * ffs_mount
99 *
100 * Called when mounting local physical media
101 *
102 * PARAMETERS:
103 *		mountroot
104 *			mp	mount point structure
105 *			path	NULL (flag for root mount!!!)
106 *			data	<unused>
107 *			ndp	<unused>
108 *			p	process (user credentials check [statfs])
109 *
110 *		mount
111 *			mp	mount point structure
112 *			path	path to mount point
113 *			data	pointer to argument struct in user space
114 *			ndp	mount point namei() return (used for
115 *				credentials on reload), reused to look
116 *				up block device.
117 *			p	process (user credentials check)
118 *
119 * RETURNS:	0	Success
120 *		!0	error number (errno.h)
121 *
122 * LOCK STATE:
123 *
124 *		ENTRY
125 *			mount point is locked
126 *		EXIT
127 *			mount point is locked
128 *
129 * NOTES:
130 *		A NULL path can be used for a flag since the mount
131 *		system call will fail with EFAULT in copyinstr in
132 *		namei() if it is a genuine NULL from the user.
133 */
134int
135ffs_mount(mp, path, data, ndp, td)
136        struct mount		*mp;	/* mount struct pointer*/
137        char			*path;	/* path to mount point*/
138        caddr_t			data;	/* arguments to FS specific mount*/
139        struct nameidata	*ndp;	/* mount point credentials*/
140        struct thread		*td;	/* process requesting mount*/
141{
142	size_t		size;
143	struct vnode	*devvp;
144	struct ufs_args args;
145	struct ufsmount *ump = 0;
146	register struct fs *fs;
147	int error, flags;
148	mode_t accessmode;
149
150	/*
151	 * Use NULL path to indicate we are mounting the root file system.
152	 */
153	if (path == NULL) {
154		if ((error = bdevvp(rootdev, &rootvp))) {
155			printf("ffs_mountroot: can't find rootvp\n");
156			return (error);
157		}
158
159		if ((error = ffs_mountfs(rootvp, mp, td, M_FFSNODE)) != 0)
160			return (error);
161		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
162		return (0);
163	}
164
165	/*
166	 * Mounting non-root file system or updating a file system
167	 */
168	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
169		return (error);
170
171	/*
172	 * If updating, check whether changing from read-only to
173	 * read/write; if there is no device name, that's all we do.
174	 */
175	if (mp->mnt_flag & MNT_UPDATE) {
176		ump = VFSTOUFS(mp);
177		fs = ump->um_fs;
178		devvp = ump->um_devvp;
179		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
180			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
181				return (error);
182			/*
183			 * Flush any dirty data.
184			 */
185			VFS_SYNC(mp, MNT_WAIT, td->td_proc->p_ucred, td);
186			/*
187			 * Check for and optionally get rid of files open
188			 * for writing.
189			 */
190			flags = WRITECLOSE;
191			if (mp->mnt_flag & MNT_FORCE)
192				flags |= FORCECLOSE;
193			if (mp->mnt_flag & MNT_SOFTDEP) {
194				error = softdep_flushfiles(mp, flags, td);
195			} else {
196				error = ffs_flushfiles(mp, flags, td);
197			}
198			if (error) {
199				vn_finished_write(mp);
200				return (error);
201			}
202			if (fs->fs_pendingblocks != 0 ||
203			    fs->fs_pendinginodes != 0) {
204				printf("%s: update error: blocks %d files %d\n",
205				    fs->fs_fsmnt, fs->fs_pendingblocks,
206				    fs->fs_pendinginodes);
207				fs->fs_pendingblocks = 0;
208				fs->fs_pendinginodes = 0;
209			}
210			fs->fs_ronly = 1;
211			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
212				fs->fs_clean = 1;
213			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
214				fs->fs_ronly = 0;
215				fs->fs_clean = 0;
216				vn_finished_write(mp);
217				return (error);
218			}
219			vn_finished_write(mp);
220		}
221		if ((mp->mnt_flag & MNT_RELOAD) &&
222		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
223			return (error);
224		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
225			/*
226			 * If upgrade to read-write by non-root, then verify
227			 * that user has necessary permissions on the device.
228			 */
229			if (suser_td(td)) {
230				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
231				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
232				    td->td_ucred, td)) != 0) {
233					VOP_UNLOCK(devvp, 0, td);
234					return (error);
235				}
236				VOP_UNLOCK(devvp, 0, td);
237			}
238			fs->fs_flags &= ~FS_UNCLEAN;
239			if (fs->fs_clean == 0) {
240				fs->fs_flags |= FS_UNCLEAN;
241				if ((mp->mnt_flag & MNT_FORCE) ||
242				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
243				     (fs->fs_flags & FS_DOSOFTDEP))) {
244					printf("WARNING: %s was not %s\n",
245					   fs->fs_fsmnt, "properly dismounted");
246				} else {
247					printf(
248"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
249					    fs->fs_fsmnt);
250					return (EPERM);
251				}
252			}
253			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
254				return (error);
255			fs->fs_ronly = 0;
256			fs->fs_clean = 0;
257			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
258				vn_finished_write(mp);
259				return (error);
260			}
261			/* check to see if we need to start softdep */
262			if ((fs->fs_flags & FS_DOSOFTDEP) &&
263			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
264				vn_finished_write(mp);
265				return (error);
266			}
267			if (fs->fs_snapinum[0] != 0)
268				ffs_snapshot_mount(mp);
269			vn_finished_write(mp);
270		}
271		/*
272		 * Soft updates is incompatible with "async",
273		 * so if we are doing softupdates stop the user
274		 * from setting the async flag in an update.
275		 * Softdep_mount() clears it in an initial mount
276		 * or ro->rw remount.
277		 */
278		if (mp->mnt_flag & MNT_SOFTDEP)
279			mp->mnt_flag &= ~MNT_ASYNC;
280		/*
281		 * If not updating name, process export requests.
282		 */
283		if (args.fspec == 0)
284			return (vfs_export(mp, &args.export));
285		/*
286		 * If this is a snapshot request, take the snapshot.
287		 */
288		if (mp->mnt_flag & MNT_SNAPSHOT)
289			return (ffs_snapshot(mp, args.fspec));
290	}
291
292	/*
293	 * Not an update, or updating the name: look up the name
294	 * and verify that it refers to a sensible block device.
295	 */
296	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
297	if ((error = namei(ndp)) != 0)
298		return (error);
299	NDFREE(ndp, NDF_ONLY_PNBUF);
300	devvp = ndp->ni_vp;
301	if (!vn_isdisk(devvp, &error)) {
302		vrele(devvp);
303		return (error);
304	}
305
306	/*
307	 * If mount by non-root, then verify that user has necessary
308	 * permissions on the device.
309	 */
310	if (suser_td(td)) {
311		accessmode = VREAD;
312		if ((mp->mnt_flag & MNT_RDONLY) == 0)
313			accessmode |= VWRITE;
314		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
315		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
316			vput(devvp);
317			return (error);
318		}
319		VOP_UNLOCK(devvp, 0, td);
320	}
321
322	if (mp->mnt_flag & MNT_UPDATE) {
323		/*
324		 * Update only
325		 *
326		 * If it's not the same vnode, or at least the same device
327		 * then it's not correct.
328		 */
329
330		if (devvp != ump->um_devvp &&
331		    devvp->v_rdev != ump->um_devvp->v_rdev)
332			error = EINVAL;	/* needs translation */
333		vrele(devvp);
334		if (error)
335			return (error);
336	} else {
337		/*
338		 * New mount
339		 *
340		 * We need the name for the mount point (also used for
341		 * "last mounted on") copied in. If an error occurs,
342		 * the mount point is discarded by the upper level code.
343		 * Note that vfs_mount() populates f_mntonname for us.
344		 */
345		if ((error = ffs_mountfs(devvp, mp, td, M_FFSNODE)) != 0) {
346			vrele(devvp);
347			return (error);
348		}
349	}
350	/*
351	 * Save "mounted from" device name info for mount point (NULL pad).
352	 */
353	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
354	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
355	/*
356	 * Initialize filesystem stat information in mount struct.
357	 */
358	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
359	return (0);
360}
361
362/*
363 * Reload all incore data for a filesystem (used after running fsck on
364 * the root filesystem and finding things to fix). The filesystem must
365 * be mounted read-only.
366 *
367 * Things to do to update the mount:
368 *	1) invalidate all cached meta-data.
369 *	2) re-read superblock from disk.
370 *	3) re-read summary information from disk.
371 *	4) invalidate all inactive vnodes.
372 *	5) invalidate all cached file data.
373 *	6) re-read inode data for all active vnodes.
374 */
375int
376ffs_reload(mp, cred, td)
377	register struct mount *mp;
378	struct ucred *cred;
379	struct thread *td;
380{
381	register struct vnode *vp, *nvp, *devvp;
382	struct inode *ip;
383	void *space;
384	struct buf *bp;
385	struct fs *fs, *newfs;
386	struct partinfo dpart;
387	dev_t dev;
388	int i, blks, size, error;
389	int32_t *lp;
390
391	if ((mp->mnt_flag & MNT_RDONLY) == 0)
392		return (EINVAL);
393	/*
394	 * Step 1: invalidate all cached meta-data.
395	 */
396	devvp = VFSTOUFS(mp)->um_devvp;
397	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
398	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
399	VOP_UNLOCK(devvp, 0, td);
400	if (error)
401		panic("ffs_reload: dirty1");
402
403	dev = devvp->v_rdev;
404
405	/*
406	 * Only VMIO the backing device if the backing device is a real
407	 * block device.
408	 */
409	if (vn_isdisk(devvp, NULL)) {
410		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
411		vfs_object_create(devvp, td, td->td_ucred);
412		mtx_lock(&devvp->v_interlock);
413		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
414	}
415
416	/*
417	 * Step 2: re-read superblock from disk.
418	 */
419	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, td) != 0)
420		size = DEV_BSIZE;
421	else
422		size = dpart.disklab->d_secsize;
423	if ((error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) != 0)
424		return (error);
425	newfs = (struct fs *)bp->b_data;
426	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
427		newfs->fs_bsize < sizeof(struct fs)) {
428			brelse(bp);
429			return (EIO);		/* XXX needs translation */
430	}
431	fs = VFSTOUFS(mp)->um_fs;
432	/*
433	 * Copy pointer fields back into superblock before copying in	XXX
434	 * new superblock. These should really be in the ufsmount.	XXX
435	 * Note that important parameters (eg fs_ncg) are unchanged.
436	 */
437	newfs->fs_csp = fs->fs_csp;
438	newfs->fs_maxcluster = fs->fs_maxcluster;
439	newfs->fs_contigdirs = fs->fs_contigdirs;
440	newfs->fs_active = fs->fs_active;
441	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
442	if (fs->fs_sbsize < SBSIZE)
443		bp->b_flags |= B_INVAL | B_NOCACHE;
444	brelse(bp);
445	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
446	ffs_oldfscompat(fs);
447	/* An old fsck may have zeroed these fields, so recheck them. */
448	if (fs->fs_avgfilesize <= 0)		/* XXX */
449		fs->fs_avgfilesize = AVFILESIZ;	/* XXX */
450	if (fs->fs_avgfpdir <= 0)		/* XXX */
451		fs->fs_avgfpdir = AFPDIR;	/* XXX */
452	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
453		printf("%s: reload pending error: blocks %d files %d\n",
454		    fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes);
455		fs->fs_pendingblocks = 0;
456		fs->fs_pendinginodes = 0;
457	}
458
459	/*
460	 * Step 3: re-read summary information from disk.
461	 */
462	blks = howmany(fs->fs_cssize, fs->fs_fsize);
463	space = fs->fs_csp;
464	for (i = 0; i < blks; i += fs->fs_frag) {
465		size = fs->fs_bsize;
466		if (i + fs->fs_frag > blks)
467			size = (blks - i) * fs->fs_fsize;
468		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
469		    NOCRED, &bp);
470		if (error)
471			return (error);
472		bcopy(bp->b_data, space, (u_int)size);
473		space = (char *)space + size;
474		brelse(bp);
475	}
476	/*
477	 * We no longer know anything about clusters per cylinder group.
478	 */
479	if (fs->fs_contigsumsize > 0) {
480		lp = fs->fs_maxcluster;
481		for (i = 0; i < fs->fs_ncg; i++)
482			*lp++ = fs->fs_contigsumsize;
483	}
484
485loop:
486	mtx_lock(&mntvnode_mtx);
487	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
488		if (vp->v_mount != mp) {
489			mtx_unlock(&mntvnode_mtx);
490			goto loop;
491		}
492		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
493		mtx_unlock(&mntvnode_mtx);
494		/*
495		 * Step 4: invalidate all inactive vnodes.
496		 */
497		if (vrecycle(vp, NULL, td))
498			goto loop;
499		/*
500		 * Step 5: invalidate all cached file data.
501		 */
502		mtx_lock(&vp->v_interlock);
503		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
504			goto loop;
505		}
506		if (vinvalbuf(vp, 0, cred, td, 0, 0))
507			panic("ffs_reload: dirty2");
508		/*
509		 * Step 6: re-read inode data for all active vnodes.
510		 */
511		ip = VTOI(vp);
512		error =
513		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
514		    (int)fs->fs_bsize, NOCRED, &bp);
515		if (error) {
516			vput(vp);
517			return (error);
518		}
519		ip->i_din = *((struct dinode *)bp->b_data +
520		    ino_to_fsbo(fs, ip->i_number));
521		ip->i_effnlink = ip->i_nlink;
522		brelse(bp);
523		vput(vp);
524		mtx_lock(&mntvnode_mtx);
525	}
526	mtx_unlock(&mntvnode_mtx);
527	return (0);
528}
529
530#include <sys/sysctl.h>
531int bigcgs = 0;
532SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
533
534/*
535 * Common code for mount and mountroot
536 */
537int
538ffs_mountfs(devvp, mp, td, malloctype)
539	register struct vnode *devvp;
540	struct mount *mp;
541	struct thread *td;
542	struct malloc_type *malloctype;
543{
544	register struct ufsmount *ump;
545	struct buf *bp;
546	register struct fs *fs;
547	dev_t dev;
548	struct partinfo dpart;
549	void *space;
550	int error, i, blks, size, ronly;
551	int32_t *lp;
552	struct ucred *cred;
553	u_int64_t maxfilesize;					/* XXX */
554	size_t strsize;
555	int ncount;
556
557	dev = devvp->v_rdev;
558	cred = td ? td->td_ucred : NOCRED;
559	/*
560	 * Disallow multiple mounts of the same device.
561	 * Disallow mounting of a device that is currently in use
562	 * (except for root, which might share swap device for miniroot).
563	 * Flush out any old buffers remaining from a previous use.
564	 */
565	error = vfs_mountedon(devvp);
566	if (error)
567		return (error);
568	ncount = vcount(devvp);
569
570	if (ncount > 1 && devvp != rootvp)
571		return (EBUSY);
572	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
573	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
574	VOP_UNLOCK(devvp, 0, td);
575	if (error)
576		return (error);
577
578	/*
579	 * Only VMIO the backing device if the backing device is a real
580	 * block device.
581	 * Note that it is optional that the backing device be VMIOed.  This
582	 * increases the opportunity for metadata caching.
583	 */
584	if (vn_isdisk(devvp, NULL)) {
585		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
586		vfs_object_create(devvp, td, cred);
587		mtx_lock(&devvp->v_interlock);
588		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
589	}
590
591	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
592	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
593	/*
594	 * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem
595	 * XXX: is subsequently remounted, so open it FREAD|FWRITE from the
596	 * XXX: start to avoid getting trashed later on.
597	 */
598#ifdef notyet
599	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
600#else
601	error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td);
602#endif
603	VOP_UNLOCK(devvp, 0, td);
604	if (error)
605		return (error);
606	if (devvp->v_rdev->si_iosize_max > mp->mnt_iosize_max)
607		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
608	if (mp->mnt_iosize_max > MAXPHYS)
609		mp->mnt_iosize_max = MAXPHYS;
610
611	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, td) != 0)
612		size = DEV_BSIZE;
613	else
614		size = dpart.disklab->d_secsize;
615
616	bp = NULL;
617	ump = NULL;
618	if ((error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) != 0)
619		goto out;
620	fs = (struct fs *)bp->b_data;
621	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
622	    fs->fs_bsize < sizeof(struct fs)) {
623		error = EINVAL;		/* XXX needs translation */
624		goto out;
625	}
626	fs->fs_fmod = 0;
627	fs->fs_flags &= ~FS_UNCLEAN;
628	if (fs->fs_clean == 0) {
629		fs->fs_flags |= FS_UNCLEAN;
630		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
631		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
632		     (fs->fs_flags & FS_DOSOFTDEP))) {
633			printf(
634"WARNING: %s was not properly dismounted\n",
635			    fs->fs_fsmnt);
636		} else {
637			printf(
638"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
639			    fs->fs_fsmnt);
640			error = EPERM;
641			goto out;
642		}
643		if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
644			printf("%s: lost blocks %d files %d\n", fs->fs_fsmnt,
645			    fs->fs_pendingblocks, fs->fs_pendinginodes);
646			fs->fs_pendingblocks = 0;
647			fs->fs_pendinginodes = 0;
648		}
649	}
650	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
651		printf("%s: mount pending error: blocks %d files %d\n",
652		    fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes);
653		fs->fs_pendingblocks = 0;
654		fs->fs_pendinginodes = 0;
655	}
656	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
657	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
658		error = EROFS;          /* needs translation */
659		goto out;
660	}
661	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
662	ump->um_malloctype = malloctype;
663	ump->um_i_effnlink_valid = 1;
664	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
665	    M_WAITOK);
666	ump->um_blkatoff = ffs_blkatoff;
667	ump->um_truncate = ffs_truncate;
668	ump->um_update = ffs_update;
669	ump->um_valloc = ffs_valloc;
670	ump->um_vfree = ffs_vfree;
671	ump->um_balloc = ffs_balloc;
672	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
673	if (fs->fs_sbsize < SBSIZE)
674		bp->b_flags |= B_INVAL | B_NOCACHE;
675	brelse(bp);
676	bp = NULL;
677	fs = ump->um_fs;
678	fs->fs_ronly = ronly;
679	size = fs->fs_cssize;
680	blks = howmany(size, fs->fs_fsize);
681	if (fs->fs_contigsumsize > 0)
682		size += fs->fs_ncg * sizeof(int32_t);
683	size += fs->fs_ncg * sizeof(u_int8_t);
684	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
685	fs->fs_csp = space;
686	for (i = 0; i < blks; i += fs->fs_frag) {
687		size = fs->fs_bsize;
688		if (i + fs->fs_frag > blks)
689			size = (blks - i) * fs->fs_fsize;
690		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
691		    cred, &bp)) != 0) {
692			free(fs->fs_csp, M_UFSMNT);
693			goto out;
694		}
695		bcopy(bp->b_data, space, (u_int)size);
696		space = (char *)space + size;
697		brelse(bp);
698		bp = NULL;
699	}
700	if (fs->fs_contigsumsize > 0) {
701		fs->fs_maxcluster = lp = space;
702		for (i = 0; i < fs->fs_ncg; i++)
703			*lp++ = fs->fs_contigsumsize;
704		space = lp;
705	}
706	size = fs->fs_ncg * sizeof(u_int8_t);
707	fs->fs_contigdirs = (u_int8_t *)space;
708	bzero(fs->fs_contigdirs, size);
709	fs->fs_active = NULL;
710	/* Compatibility for old filesystems 	   XXX */
711	if (fs->fs_avgfilesize <= 0)		/* XXX */
712		fs->fs_avgfilesize = AVFILESIZ;	/* XXX */
713	if (fs->fs_avgfpdir <= 0)		/* XXX */
714		fs->fs_avgfpdir = AFPDIR;	/* XXX */
715	mp->mnt_data = (qaddr_t)ump;
716	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
717	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
718	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
719	    vfs_getvfs(&mp->mnt_stat.f_fsid))
720		vfs_getnewfsid(mp);
721	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
722	mp->mnt_flag |= MNT_LOCAL;
723	ump->um_mountp = mp;
724	ump->um_dev = dev;
725	ump->um_devvp = devvp;
726	ump->um_nindir = fs->fs_nindir;
727	ump->um_bptrtodb = fs->fs_fsbtodb;
728	ump->um_seqinc = fs->fs_frag;
729	for (i = 0; i < MAXQUOTAS; i++)
730		ump->um_quotas[i] = NULLVP;
731#ifdef UFS_EXTATTR
732	ufs_extattr_uepm_init(&ump->um_extattr);
733#endif
734	devvp->v_rdev->si_mountpoint = mp;
735	ffs_oldfscompat(fs);
736
737	/*
738	 * Set FS local "last mounted on" information (NULL pad)
739	 */
740	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
741			fs->fs_fsmnt,			/* copy area*/
742			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
743			&strsize);			/* real size*/
744	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
745
746	if( mp->mnt_flag & MNT_ROOTFS) {
747		/*
748		 * Root mount; update timestamp in mount structure.
749		 * this will be used by the common root mount code
750		 * to update the system clock.
751		 */
752		mp->mnt_time = fs->fs_time;
753	}
754
755	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
756	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
757	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
758		fs->fs_maxfilesize = maxfilesize;		/* XXX */
759	if (bigcgs) {
760		if (fs->fs_sparecon[0] <= 0)
761			fs->fs_sparecon[0] = fs->fs_cgsize;
762		fs->fs_cgsize = fs->fs_bsize;
763	}
764	if (ronly == 0) {
765		if ((fs->fs_flags & FS_DOSOFTDEP) &&
766		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
767			free(fs->fs_csp, M_UFSMNT);
768			goto out;
769		}
770		if (fs->fs_snapinum[0] != 0)
771			ffs_snapshot_mount(mp);
772		fs->fs_fmod = 1;
773		fs->fs_clean = 0;
774		(void) ffs_sbupdate(ump, MNT_WAIT);
775	}
776#ifdef UFS_EXTATTR
777#ifdef UFS_EXTATTR_AUTOSTART
778	/*
779	 *
780	 * Auto-starting does the following:
781	 *	- check for /.attribute in the fs, and extattr_start if so
782	 *	- for each file in .attribute, enable that file with
783	 * 	  an attribute of the same name.
784	 * Not clear how to report errors -- probably eat them.
785	 * This would all happen while the file system was busy/not
786	 * available, so would effectively be "atomic".
787	 */
788	(void) ufs_extattr_autostart(mp, td);
789#endif /* !UFS_EXTATTR_AUTOSTART */
790#endif /* !UFS_EXTATTR */
791	return (0);
792out:
793	devvp->v_rdev->si_mountpoint = NULL;
794	if (bp)
795		brelse(bp);
796	/* XXX: see comment above VOP_OPEN */
797#ifdef notyet
798	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
799#else
800	(void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td);
801#endif
802	if (ump) {
803		free(ump->um_fs, M_UFSMNT);
804		free(ump, M_UFSMNT);
805		mp->mnt_data = (qaddr_t)0;
806	}
807	return (error);
808}
809
810/*
811 * Sanity checks for old file systems.
812 *
813 * XXX - goes away some day.
814 */
815static int
816ffs_oldfscompat(fs)
817	struct fs *fs;
818{
819
820	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
821	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
822	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
823		fs->fs_nrpos = 8;				/* XXX */
824	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
825#if 0
826		int i;						/* XXX */
827		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
828								/* XXX */
829		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
830		for (i = 0; i < NIADDR; i++) {			/* XXX */
831			sizepb *= NINDIR(fs);			/* XXX */
832			fs->fs_maxfilesize += sizepb;		/* XXX */
833		}						/* XXX */
834#endif
835		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
836		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
837		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
838	}							/* XXX */
839	return (0);
840}
841
842/*
843 * unmount system call
844 */
845int
846ffs_unmount(mp, mntflags, td)
847	struct mount *mp;
848	int mntflags;
849	struct thread *td;
850{
851	register struct ufsmount *ump = VFSTOUFS(mp);
852	register struct fs *fs;
853	int error, flags;
854
855	flags = 0;
856	if (mntflags & MNT_FORCE) {
857		flags |= FORCECLOSE;
858	}
859#ifdef UFS_EXTATTR
860	if ((error = ufs_extattr_stop(mp, td))) {
861		if (error != EOPNOTSUPP)
862			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
863			    error);
864	} else {
865		ufs_extattr_uepm_destroy(&ump->um_extattr);
866	}
867#endif
868	if (mp->mnt_flag & MNT_SOFTDEP) {
869		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
870			return (error);
871	} else {
872		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
873			return (error);
874	}
875	fs = ump->um_fs;
876	if (bigcgs) {
877		fs->fs_cgsize = fs->fs_sparecon[0];
878		fs->fs_sparecon[0] = 0;
879	}
880	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
881		printf("%s: unmount pending error: blocks %d files %d\n",
882		    fs->fs_fsmnt, fs->fs_pendingblocks, fs->fs_pendinginodes);
883		fs->fs_pendingblocks = 0;
884		fs->fs_pendinginodes = 0;
885	}
886	if (fs->fs_ronly == 0) {
887		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
888		error = ffs_sbupdate(ump, MNT_WAIT);
889		if (error) {
890			fs->fs_clean = 0;
891			return (error);
892		}
893	}
894	ump->um_devvp->v_rdev->si_mountpoint = NULL;
895
896	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
897	/* XXX: see comment above VOP_OPEN */
898#ifdef notyet
899	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
900		NOCRED, td);
901#else
902	error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td);
903#endif
904
905	vrele(ump->um_devvp);
906
907	free(fs->fs_csp, M_UFSMNT);
908	free(fs, M_UFSMNT);
909	free(ump, M_UFSMNT);
910	mp->mnt_data = (qaddr_t)0;
911	mp->mnt_flag &= ~MNT_LOCAL;
912	return (error);
913}
914
915/*
916 * Flush out all the files in a filesystem.
917 */
918int
919ffs_flushfiles(mp, flags, td)
920	register struct mount *mp;
921	int flags;
922	struct thread *td;
923{
924	register struct ufsmount *ump;
925	int error;
926
927	ump = VFSTOUFS(mp);
928#ifdef QUOTA
929	if (mp->mnt_flag & MNT_QUOTA) {
930		int i;
931		error = vflush(mp, 0, SKIPSYSTEM|flags);
932		if (error)
933			return (error);
934		for (i = 0; i < MAXQUOTAS; i++) {
935			if (ump->um_quotas[i] == NULLVP)
936				continue;
937			quotaoff(td, mp, i);
938		}
939		/*
940		 * Here we fall through to vflush again to ensure
941		 * that we have gotten rid of all the system vnodes.
942		 */
943	}
944#endif
945	if (ump->um_devvp->v_flag & VCOPYONWRITE) {
946		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
947			return (error);
948		ffs_snapshot_unmount(mp);
949		/*
950		 * Here we fall through to vflush again to ensure
951		 * that we have gotten rid of all the system vnodes.
952		 */
953	}
954        /*
955	 * Flush all the files.
956	 */
957	if ((error = vflush(mp, 0, flags)) != 0)
958		return (error);
959	/*
960	 * Flush filesystem metadata.
961	 */
962	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
963	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
964	VOP_UNLOCK(ump->um_devvp, 0, td);
965	return (error);
966}
967
968/*
969 * Get file system statistics.
970 */
971int
972ffs_statfs(mp, sbp, td)
973	struct mount *mp;
974	register struct statfs *sbp;
975	struct thread *td;
976{
977	register struct ufsmount *ump;
978	register struct fs *fs;
979
980	ump = VFSTOUFS(mp);
981	fs = ump->um_fs;
982	if (fs->fs_magic != FS_MAGIC)
983		panic("ffs_statfs");
984	sbp->f_bsize = fs->fs_fsize;
985	sbp->f_iosize = fs->fs_bsize;
986	sbp->f_blocks = fs->fs_dsize;
987	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
988	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
989	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
990	    dbtofsb(fs, fs->fs_pendingblocks);
991	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
992	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
993	if (sbp != &mp->mnt_stat) {
994		sbp->f_type = mp->mnt_vfc->vfc_typenum;
995		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
996			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
997		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
998			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
999	}
1000	return (0);
1001}
1002
1003/*
1004 * Go through the disk queues to initiate sandbagged IO;
1005 * go through the inodes to write those that have been modified;
1006 * initiate the writing of the super block if it has been modified.
1007 *
1008 * Note: we are always called with the filesystem marked `MPBUSY'.
1009 */
1010int
1011ffs_sync(mp, waitfor, cred, td)
1012	struct mount *mp;
1013	int waitfor;
1014	struct ucred *cred;
1015	struct thread *td;
1016{
1017	struct vnode *nvp, *vp, *devvp;
1018	struct inode *ip;
1019	struct ufsmount *ump = VFSTOUFS(mp);
1020	struct fs *fs;
1021	int error, count, wait, lockreq, allerror = 0;
1022
1023	fs = ump->um_fs;
1024	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1025		printf("fs = %s\n", fs->fs_fsmnt);
1026		panic("ffs_sync: rofs mod");
1027	}
1028	/*
1029	 * Write back each (modified) inode.
1030	 */
1031	wait = 0;
1032	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1033	if (waitfor == MNT_WAIT) {
1034		wait = 1;
1035		lockreq = LK_EXCLUSIVE;
1036	}
1037	mtx_lock(&mntvnode_mtx);
1038loop:
1039	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1040		/*
1041		 * If the vnode that we are about to sync is no longer
1042		 * associated with this mount point, start over.
1043		 */
1044		if (vp->v_mount != mp)
1045			goto loop;
1046
1047		/*
1048		 * Depend on the mntvnode_slock to keep things stable enough
1049		 * for a quick test.  Since there might be hundreds of
1050		 * thousands of vnodes, we cannot afford even a subroutine
1051		 * call unless there's a good chance that we have work to do.
1052		 */
1053		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1054		ip = VTOI(vp);
1055		if (vp->v_type == VNON || ((ip->i_flag &
1056		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1057		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1058			continue;
1059		}
1060		if (vp->v_type != VCHR) {
1061			mtx_unlock(&mntvnode_mtx);
1062			if ((error = vget(vp, lockreq, td)) != 0) {
1063				mtx_lock(&mntvnode_mtx);
1064				if (error == ENOENT)
1065					goto loop;
1066			} else {
1067				if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1068					allerror = error;
1069				VOP_UNLOCK(vp, 0, td);
1070				vrele(vp);
1071				mtx_lock(&mntvnode_mtx);
1072			}
1073		} else {
1074			mtx_unlock(&mntvnode_mtx);
1075			UFS_UPDATE(vp, wait);
1076			mtx_lock(&mntvnode_mtx);
1077		}
1078		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1079			goto loop;
1080	}
1081	mtx_unlock(&mntvnode_mtx);
1082	/*
1083	 * Force stale file system control information to be flushed.
1084	 */
1085	if (waitfor == MNT_WAIT) {
1086		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1087			allerror = error;
1088		/* Flushed work items may create new vnodes to clean */
1089		if (count) {
1090			mtx_lock(&mntvnode_mtx);
1091			goto loop;
1092		}
1093	}
1094#ifdef QUOTA
1095	qsync(mp);
1096#endif
1097	devvp = ump->um_devvp;
1098	mtx_lock(&devvp->v_interlock);
1099	if (waitfor != MNT_LAZY &&
1100	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1101		mtx_unlock(&devvp->v_interlock);
1102		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
1103		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1104			allerror = error;
1105		VOP_UNLOCK(devvp, 0, td);
1106		if (waitfor == MNT_WAIT) {
1107			mtx_lock(&mntvnode_mtx);
1108			goto loop;
1109		}
1110	} else
1111		mtx_unlock(&devvp->v_interlock);
1112	/*
1113	 * Write back modified superblock.
1114	 */
1115	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1116		allerror = error;
1117	return (allerror);
1118}
1119
1120/*
1121 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1122 * in from disk.  If it is in core, wait for the lock bit to clear, then
1123 * return the inode locked.  Detection and handling of mount points must be
1124 * done by the calling routine.
1125 */
1126static int ffs_inode_hash_lock;
1127/*
1128 * ffs_inode_hash_lock is a variable to manage mutual exclusion
1129 * of vnode allocation and intertion to the hash, especially to
1130 * avoid holding more than one vnodes for the same inode in the
1131 * hash table. ffs_inode_hash_lock must hence be tested-and-set
1132 * or cleared atomically, accomplished by ffs_inode_hash_mtx.
1133 *
1134 * As vnode allocation may block during MALLOC() and zone
1135 * allocation, we should also do msleep() to give away the CPU
1136 * if anyone else is allocating a vnode. lockmgr is not suitable
1137 * here because someone else may insert to the hash table the
1138 * vnode we are trying to allocate during our sleep, in which
1139 * case the hash table needs to be examined once again after
1140 * waking up.
1141 */
1142static struct mtx ffs_inode_hash_mtx;
1143
1144int
1145ffs_vget(mp, ino, flags, vpp)
1146	struct mount *mp;
1147	ino_t ino;
1148	int flags;
1149	struct vnode **vpp;
1150{
1151	struct fs *fs;
1152	struct inode *ip;
1153	struct ufsmount *ump;
1154	struct buf *bp;
1155	struct vnode *vp;
1156	dev_t dev;
1157	int error, want_wakeup;
1158
1159	ump = VFSTOUFS(mp);
1160	dev = ump->um_dev;
1161restart:
1162	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1163		return (error);
1164	if (*vpp != NULL)
1165		return (0);
1166
1167	/*
1168	 * Lock out the creation of new entries in the FFS hash table in
1169	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
1170	 * may occur!
1171	 */
1172	mtx_lock(&ffs_inode_hash_mtx);
1173	if (ffs_inode_hash_lock) {
1174		while (ffs_inode_hash_lock) {
1175			ffs_inode_hash_lock = -1;
1176			msleep(&ffs_inode_hash_lock, &ffs_inode_hash_mtx, PVM, "ffsvgt", 0);
1177		}
1178		mtx_unlock(&ffs_inode_hash_mtx);
1179		goto restart;
1180	}
1181	ffs_inode_hash_lock = 1;
1182	mtx_unlock(&ffs_inode_hash_mtx);
1183
1184	/*
1185	 * If this MALLOC() is performed after the getnewvnode()
1186	 * it might block, leaving a vnode with a NULL v_data to be
1187	 * found by ffs_sync() if a sync happens to fire right then,
1188	 * which will cause a panic because ffs_sync() blindly
1189	 * dereferences vp->v_data (as well it should).
1190	 */
1191	MALLOC(ip, struct inode *, sizeof(struct inode),
1192	    ump->um_malloctype, M_WAITOK);
1193
1194	/* Allocate a new vnode/inode. */
1195	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
1196	if (error) {
1197		/*
1198		 * Do not wake up processes while holding the mutex,
1199		 * otherwise the processes waken up immediately hit
1200		 * themselves into the mutex.
1201		 */
1202		mtx_lock(&ffs_inode_hash_mtx);
1203		want_wakeup = ffs_inode_hash_lock < 0;
1204		ffs_inode_hash_lock = 0;
1205		mtx_unlock(&ffs_inode_hash_mtx);
1206		if (want_wakeup)
1207			wakeup(&ffs_inode_hash_lock);
1208		*vpp = NULL;
1209		FREE(ip, ump->um_malloctype);
1210		return (error);
1211	}
1212	bzero((caddr_t)ip, sizeof(struct inode));
1213	/*
1214	 * FFS supports lock sharing in the stack of vnodes
1215	 */
1216	vp->v_vnlock = &vp->v_lock;
1217	lockinit(vp->v_vnlock, PINOD, "inode", VLKTIMEOUT, LK_CANRECURSE);
1218	vp->v_data = ip;
1219	ip->i_vnode = vp;
1220	ip->i_fs = fs = ump->um_fs;
1221	ip->i_dev = dev;
1222	ip->i_number = ino;
1223#ifdef QUOTA
1224	{
1225		int i;
1226		for (i = 0; i < MAXQUOTAS; i++)
1227			ip->i_dquot[i] = NODQUOT;
1228	}
1229#endif
1230	/*
1231	 * Put it onto its hash chain and lock it so that other requests for
1232	 * this inode will block if they arrive while we are sleeping waiting
1233	 * for old data structures to be purged or for the contents of the
1234	 * disk portion of this inode to be read.
1235	 */
1236	ufs_ihashins(ip);
1237
1238	/*
1239	 * Do not wake up processes while holding the mutex,
1240	 * otherwise the processes waken up immediately hit
1241	 * themselves into the mutex.
1242	 */
1243	mtx_lock(&ffs_inode_hash_mtx);
1244	want_wakeup = ffs_inode_hash_lock < 0;
1245	ffs_inode_hash_lock = 0;
1246	mtx_unlock(&ffs_inode_hash_mtx);
1247	if (want_wakeup)
1248		wakeup(&ffs_inode_hash_lock);
1249
1250	/* Read in the disk contents for the inode, copy into the inode. */
1251	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1252	    (int)fs->fs_bsize, NOCRED, &bp);
1253	if (error) {
1254		/*
1255		 * The inode does not contain anything useful, so it would
1256		 * be misleading to leave it on its hash chain. With mode
1257		 * still zero, it will be unlinked and returned to the free
1258		 * list by vput().
1259		 */
1260		brelse(bp);
1261		vput(vp);
1262		*vpp = NULL;
1263		return (error);
1264	}
1265	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
1266	if (DOINGSOFTDEP(vp))
1267		softdep_load_inodeblock(ip);
1268	else
1269		ip->i_effnlink = ip->i_nlink;
1270	bqrelse(bp);
1271
1272	/*
1273	 * Initialize the vnode from the inode, check for aliases.
1274	 * Note that the underlying vnode may have changed.
1275	 */
1276	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1277	if (error) {
1278		vput(vp);
1279		*vpp = NULL;
1280		return (error);
1281	}
1282	/*
1283	 * Finish inode initialization now that aliasing has been resolved.
1284	 */
1285	ip->i_devvp = ump->um_devvp;
1286	VREF(ip->i_devvp);
1287	/*
1288	 * Set up a generation number for this inode if it does not
1289	 * already have one. This should only happen on old filesystems.
1290	 */
1291	if (ip->i_gen == 0) {
1292		ip->i_gen = random() / 2 + 1;
1293		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1294			ip->i_flag |= IN_MODIFIED;
1295	}
1296	/*
1297	 * Ensure that uid and gid are correct. This is a temporary
1298	 * fix until fsck has been changed to do the update.
1299	 */
1300	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1301		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1302		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1303	}						/* XXX */
1304
1305	*vpp = vp;
1306	return (0);
1307}
1308
1309/*
1310 * File handle to vnode
1311 *
1312 * Have to be really careful about stale file handles:
1313 * - check that the inode number is valid
1314 * - call ffs_vget() to get the locked inode
1315 * - check for an unallocated inode (i_mode == 0)
1316 * - check that the given client host has export rights and return
1317 *   those rights via. exflagsp and credanonp
1318 */
1319int
1320ffs_fhtovp(mp, fhp, vpp)
1321	register struct mount *mp;
1322	struct fid *fhp;
1323	struct vnode **vpp;
1324{
1325	register struct ufid *ufhp;
1326	struct fs *fs;
1327
1328	ufhp = (struct ufid *)fhp;
1329	fs = VFSTOUFS(mp)->um_fs;
1330	if (ufhp->ufid_ino < ROOTINO ||
1331	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1332		return (ESTALE);
1333	return (ufs_fhtovp(mp, ufhp, vpp));
1334}
1335
1336/*
1337 * Vnode pointer to File handle
1338 */
1339/* ARGSUSED */
1340int
1341ffs_vptofh(vp, fhp)
1342	struct vnode *vp;
1343	struct fid *fhp;
1344{
1345	register struct inode *ip;
1346	register struct ufid *ufhp;
1347
1348	ip = VTOI(vp);
1349	ufhp = (struct ufid *)fhp;
1350	ufhp->ufid_len = sizeof(struct ufid);
1351	ufhp->ufid_ino = ip->i_number;
1352	ufhp->ufid_gen = ip->i_gen;
1353	return (0);
1354}
1355
1356/*
1357 * Initialize the filesystem; just use ufs_init.
1358 */
1359static int
1360ffs_init(vfsp)
1361	struct vfsconf *vfsp;
1362{
1363
1364	softdep_initialize();
1365	mtx_init(&ffs_inode_hash_mtx, "ifsvgt", MTX_DEF);
1366	return (ufs_init(vfsp));
1367}
1368
1369/*
1370 * Write a superblock and associated information back to disk.
1371 */
1372static int
1373ffs_sbupdate(mp, waitfor)
1374	struct ufsmount *mp;
1375	int waitfor;
1376{
1377	register struct fs *dfs, *fs = mp->um_fs;
1378	register struct buf *bp;
1379	int blks;
1380	void *space;
1381	int i, size, error, allerror = 0;
1382
1383	/*
1384	 * First write back the summary information.
1385	 */
1386	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1387	space = fs->fs_csp;
1388	for (i = 0; i < blks; i += fs->fs_frag) {
1389		size = fs->fs_bsize;
1390		if (i + fs->fs_frag > blks)
1391			size = (blks - i) * fs->fs_fsize;
1392		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1393		    size, 0, 0);
1394		bcopy(space, bp->b_data, (u_int)size);
1395		space = (char *)space + size;
1396		if (waitfor != MNT_WAIT)
1397			bawrite(bp);
1398		else if ((error = bwrite(bp)) != 0)
1399			allerror = error;
1400	}
1401	/*
1402	 * Now write back the superblock itself. If any errors occurred
1403	 * up to this point, then fail so that the superblock avoids
1404	 * being written out as clean.
1405	 */
1406	if (allerror)
1407		return (allerror);
1408	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1409	fs->fs_fmod = 0;
1410	fs->fs_time = time_second;
1411	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1412	/* Restore compatibility to old file systems.		   XXX */
1413	dfs = (struct fs *)bp->b_data;				/* XXX */
1414	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1415		dfs->fs_nrpos = -1;				/* XXX */
1416	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1417		int32_t *lp, tmp;				/* XXX */
1418								/* XXX */
1419		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1420		tmp = lp[4];					/* XXX */
1421		for (i = 4; i > 0; i--)				/* XXX */
1422			lp[i] = lp[i-1];			/* XXX */
1423		lp[0] = tmp;					/* XXX */
1424	}							/* XXX */
1425	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1426	if (waitfor != MNT_WAIT)
1427		bawrite(bp);
1428	else if ((error = bwrite(bp)) != 0)
1429		allerror = error;
1430	return (allerror);
1431}
1432