ffs_vfsops.c revision 98687
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34 * $FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 98687 2002-06-23 18:17:27Z mux $
35 */
36
37#include "opt_quota.h"
38#include "opt_ufs.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/stdint.h>
43#include <sys/namei.h>
44#include <sys/proc.h>
45#include <sys/kernel.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/disk.h>
53#include <sys/malloc.h>
54#include <sys/mutex.h>
55
56#include <ufs/ufs/extattr.h>
57#include <ufs/ufs/quota.h>
58#include <ufs/ufs/ufsmount.h>
59#include <ufs/ufs/inode.h>
60#include <ufs/ufs/ufs_extern.h>
61
62#include <ufs/ffs/fs.h>
63#include <ufs/ffs/ffs_extern.h>
64
65#include <vm/vm.h>
66#include <vm/vm_page.h>
67
68static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
69
70static int	ffs_sbupdate(struct ufsmount *, int);
71       int	ffs_reload(struct mount *,struct ucred *,struct thread *);
72static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
73		    ufs2_daddr_t);
74static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
75static int	ffs_init(struct vfsconf *);
76
77static struct vfsops ufs_vfsops = {
78	ffs_mount,
79	ufs_start,
80	ffs_unmount,
81	ufs_root,
82	ufs_quotactl,
83	ffs_statfs,
84	ffs_sync,
85	ffs_vget,
86	ffs_fhtovp,
87	vfs_stdcheckexp,
88	ffs_vptofh,
89	ffs_init,
90	vfs_stduninit,
91#ifdef UFS_EXTATTR
92	ufs_extattrctl,
93#else
94	vfs_stdextattrctl,
95#endif
96};
97
98VFS_SET(ufs_vfsops, ufs, 0);
99
100/*
101 * ffs_mount
102 *
103 * Called when mounting local physical media
104 *
105 * PARAMETERS:
106 *		mountroot
107 *			mp	mount point structure
108 *			path	NULL (flag for root mount!!!)
109 *			data	<unused>
110 *			ndp	<unused>
111 *			p	process (user credentials check [statfs])
112 *
113 *		mount
114 *			mp	mount point structure
115 *			path	path to mount point
116 *			data	pointer to argument struct in user space
117 *			ndp	mount point namei() return (used for
118 *				credentials on reload), reused to look
119 *				up block device.
120 *			p	process (user credentials check)
121 *
122 * RETURNS:	0	Success
123 *		!0	error number (errno.h)
124 *
125 * LOCK STATE:
126 *
127 *		ENTRY
128 *			mount point is locked
129 *		EXIT
130 *			mount point is locked
131 *
132 * NOTES:
133 *		A NULL path can be used for a flag since the mount
134 *		system call will fail with EFAULT in copyinstr in
135 *		namei() if it is a genuine NULL from the user.
136 */
137int
138ffs_mount(mp, path, data, ndp, td)
139        struct mount		*mp;	/* mount struct pointer*/
140        char			*path;	/* path to mount point*/
141        caddr_t			data;	/* arguments to FS specific mount*/
142        struct nameidata	*ndp;	/* mount point credentials*/
143        struct thread		*td;	/* process requesting mount*/
144{
145	size_t size;
146	struct vnode *devvp;
147	struct ufs_args args;
148	struct ufsmount *ump = 0;
149	struct fs *fs;
150	int error, flags;
151	mode_t accessmode;
152
153	/*
154	 * Use NULL path to indicate we are mounting the root filesystem.
155	 */
156	if (path == NULL) {
157		if ((error = bdevvp(rootdev, &rootvp))) {
158			printf("ffs_mountroot: can't find rootvp\n");
159			return (error);
160		}
161
162		if ((error = ffs_mountfs(rootvp, mp, td, M_FFSNODE)) != 0)
163			return (error);
164		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
165		return (0);
166	}
167
168	/*
169	 * Mounting non-root filesystem or updating a filesystem
170	 */
171	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
172		return (error);
173
174	/*
175	 * If updating, check whether changing from read-only to
176	 * read/write; if there is no device name, that's all we do.
177	 */
178	if (mp->mnt_flag & MNT_UPDATE) {
179		ump = VFSTOUFS(mp);
180		fs = ump->um_fs;
181		devvp = ump->um_devvp;
182		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
183			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
184				return (error);
185			/*
186			 * Flush any dirty data.
187			 */
188			VFS_SYNC(mp, MNT_WAIT, td->td_proc->p_ucred, td);
189			/*
190			 * Check for and optionally get rid of files open
191			 * for writing.
192			 */
193			flags = WRITECLOSE;
194			if (mp->mnt_flag & MNT_FORCE)
195				flags |= FORCECLOSE;
196			if (mp->mnt_flag & MNT_SOFTDEP) {
197				error = softdep_flushfiles(mp, flags, td);
198			} else {
199				error = ffs_flushfiles(mp, flags, td);
200			}
201			if (error) {
202				vn_finished_write(mp);
203				return (error);
204			}
205			if (fs->fs_pendingblocks != 0 ||
206			    fs->fs_pendinginodes != 0) {
207				printf("%s: %s: blocks %jd files %d\n",
208				    fs->fs_fsmnt, "update error",
209				    (intmax_t)fs->fs_pendingblocks,
210				    fs->fs_pendinginodes);
211				fs->fs_pendingblocks = 0;
212				fs->fs_pendinginodes = 0;
213			}
214			fs->fs_ronly = 1;
215			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
216				fs->fs_clean = 1;
217			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
218				fs->fs_ronly = 0;
219				fs->fs_clean = 0;
220				vn_finished_write(mp);
221				return (error);
222			}
223			vn_finished_write(mp);
224		}
225		if ((mp->mnt_flag & MNT_RELOAD) &&
226		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
227			return (error);
228		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
229			/*
230			 * If upgrade to read-write by non-root, then verify
231			 * that user has necessary permissions on the device.
232			 */
233			if (suser(td)) {
234				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
235				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
236				    td->td_ucred, td)) != 0) {
237					VOP_UNLOCK(devvp, 0, td);
238					return (error);
239				}
240				VOP_UNLOCK(devvp, 0, td);
241			}
242			fs->fs_flags &= ~FS_UNCLEAN;
243			if (fs->fs_clean == 0) {
244				fs->fs_flags |= FS_UNCLEAN;
245				if ((mp->mnt_flag & MNT_FORCE) ||
246				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
247				     (fs->fs_flags & FS_DOSOFTDEP))) {
248					printf("WARNING: %s was not %s\n",
249					   fs->fs_fsmnt, "properly dismounted");
250				} else {
251					printf(
252"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
253					    fs->fs_fsmnt);
254					return (EPERM);
255				}
256			}
257			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
258				return (error);
259			fs->fs_ronly = 0;
260			fs->fs_clean = 0;
261			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
262				vn_finished_write(mp);
263				return (error);
264			}
265			/* check to see if we need to start softdep */
266			if ((fs->fs_flags & FS_DOSOFTDEP) &&
267			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
268				vn_finished_write(mp);
269				return (error);
270			}
271			if (fs->fs_snapinum[0] != 0)
272				ffs_snapshot_mount(mp);
273			vn_finished_write(mp);
274		}
275		/*
276		 * Soft updates is incompatible with "async",
277		 * so if we are doing softupdates stop the user
278		 * from setting the async flag in an update.
279		 * Softdep_mount() clears it in an initial mount
280		 * or ro->rw remount.
281		 */
282		if (mp->mnt_flag & MNT_SOFTDEP)
283			mp->mnt_flag &= ~MNT_ASYNC;
284		/*
285		 * If not updating name, process export requests.
286		 */
287		if (args.fspec == 0)
288			return (vfs_export(mp, &args.export));
289		/*
290		 * If this is a snapshot request, take the snapshot.
291		 */
292		if (mp->mnt_flag & MNT_SNAPSHOT)
293			return (ffs_snapshot(mp, args.fspec));
294	}
295
296	/*
297	 * Not an update, or updating the name: look up the name
298	 * and verify that it refers to a sensible block device.
299	 */
300	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
301	if ((error = namei(ndp)) != 0)
302		return (error);
303	NDFREE(ndp, NDF_ONLY_PNBUF);
304	devvp = ndp->ni_vp;
305	if (!vn_isdisk(devvp, &error)) {
306		vrele(devvp);
307		return (error);
308	}
309
310	/*
311	 * If mount by non-root, then verify that user has necessary
312	 * permissions on the device.
313	 */
314	if (suser(td)) {
315		accessmode = VREAD;
316		if ((mp->mnt_flag & MNT_RDONLY) == 0)
317			accessmode |= VWRITE;
318		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
319		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
320			vput(devvp);
321			return (error);
322		}
323		VOP_UNLOCK(devvp, 0, td);
324	}
325
326	if (mp->mnt_flag & MNT_UPDATE) {
327		/*
328		 * Update only
329		 *
330		 * If it's not the same vnode, or at least the same device
331		 * then it's not correct.
332		 */
333
334		if (devvp != ump->um_devvp &&
335		    devvp->v_rdev != ump->um_devvp->v_rdev)
336			error = EINVAL;	/* needs translation */
337		vrele(devvp);
338		if (error)
339			return (error);
340	} else {
341		/*
342		 * New mount
343		 *
344		 * We need the name for the mount point (also used for
345		 * "last mounted on") copied in. If an error occurs,
346		 * the mount point is discarded by the upper level code.
347		 * Note that vfs_mount() populates f_mntonname for us.
348		 */
349		if ((error = ffs_mountfs(devvp, mp, td, M_FFSNODE)) != 0) {
350			vrele(devvp);
351			return (error);
352		}
353	}
354	/*
355	 * Save "mounted from" device name info for mount point (NULL pad).
356	 */
357	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
358	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
359	/*
360	 * Initialize filesystem stat information in mount struct.
361	 */
362	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
363	return (0);
364}
365
366/*
367 * Reload all incore data for a filesystem (used after running fsck on
368 * the root filesystem and finding things to fix). The filesystem must
369 * be mounted read-only.
370 *
371 * Things to do to update the mount:
372 *	1) invalidate all cached meta-data.
373 *	2) re-read superblock from disk.
374 *	3) re-read summary information from disk.
375 *	4) invalidate all inactive vnodes.
376 *	5) invalidate all cached file data.
377 *	6) re-read inode data for all active vnodes.
378 */
379int
380ffs_reload(mp, cred, td)
381	struct mount *mp;
382	struct ucred *cred;
383	struct thread *td;
384{
385	struct vnode *vp, *nvp, *devvp;
386	struct inode *ip;
387	void *space;
388	struct buf *bp;
389	struct fs *fs, *newfs;
390	dev_t dev;
391	ufs2_daddr_t sblockloc;
392	int i, blks, size, error;
393	int32_t *lp;
394
395	if ((mp->mnt_flag & MNT_RDONLY) == 0)
396		return (EINVAL);
397	/*
398	 * Step 1: invalidate all cached meta-data.
399	 */
400	devvp = VFSTOUFS(mp)->um_devvp;
401	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
402	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
403	VOP_UNLOCK(devvp, 0, td);
404	if (error)
405		panic("ffs_reload: dirty1");
406
407	dev = devvp->v_rdev;
408
409	/*
410	 * Only VMIO the backing device if the backing device is a real
411	 * block device.
412	 */
413	if (vn_isdisk(devvp, NULL)) {
414		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
415		vfs_object_create(devvp, td, td->td_ucred);
416		mtx_lock(&devvp->v_interlock);
417		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
418	}
419
420	/*
421	 * Step 2: re-read superblock from disk.
422	 */
423	fs = VFSTOUFS(mp)->um_fs;
424	if ((error = bread(devvp, fsbtodb(fs, fs->fs_sblockloc), fs->fs_sbsize,
425	    NOCRED, &bp)) != 0)
426		return (error);
427	newfs = (struct fs *)bp->b_data;
428	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
429	     newfs->fs_magic != FS_UFS2_MAGIC) ||
430	    newfs->fs_bsize > MAXBSIZE ||
431	    newfs->fs_bsize < sizeof(struct fs)) {
432			brelse(bp);
433			return (EIO);		/* XXX needs translation */
434	}
435	/*
436	 * Copy pointer fields back into superblock before copying in	XXX
437	 * new superblock. These should really be in the ufsmount.	XXX
438	 * Note that important parameters (eg fs_ncg) are unchanged.
439	 */
440	newfs->fs_csp = fs->fs_csp;
441	newfs->fs_maxcluster = fs->fs_maxcluster;
442	newfs->fs_contigdirs = fs->fs_contigdirs;
443	newfs->fs_active = fs->fs_active;
444	sblockloc = fs->fs_sblockloc;
445	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
446	brelse(bp);
447	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
448	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
449	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
450		printf("%s: reload pending error: blocks %jd files %d\n",
451		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
452		    fs->fs_pendinginodes);
453		fs->fs_pendingblocks = 0;
454		fs->fs_pendinginodes = 0;
455	}
456
457	/*
458	 * Step 3: re-read summary information from disk.
459	 */
460	blks = howmany(fs->fs_cssize, fs->fs_fsize);
461	space = fs->fs_csp;
462	for (i = 0; i < blks; i += fs->fs_frag) {
463		size = fs->fs_bsize;
464		if (i + fs->fs_frag > blks)
465			size = (blks - i) * fs->fs_fsize;
466		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
467		    NOCRED, &bp);
468		if (error)
469			return (error);
470		bcopy(bp->b_data, space, (u_int)size);
471		space = (char *)space + size;
472		brelse(bp);
473	}
474	/*
475	 * We no longer know anything about clusters per cylinder group.
476	 */
477	if (fs->fs_contigsumsize > 0) {
478		lp = fs->fs_maxcluster;
479		for (i = 0; i < fs->fs_ncg; i++)
480			*lp++ = fs->fs_contigsumsize;
481	}
482
483loop:
484	mtx_lock(&mntvnode_mtx);
485	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
486		if (vp->v_mount != mp) {
487			mtx_unlock(&mntvnode_mtx);
488			goto loop;
489		}
490		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
491		mtx_unlock(&mntvnode_mtx);
492		/*
493		 * Step 4: invalidate all inactive vnodes.
494		 */
495		if (vrecycle(vp, NULL, td))
496			goto loop;
497		/*
498		 * Step 5: invalidate all cached file data.
499		 */
500		mtx_lock(&vp->v_interlock);
501		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
502			goto loop;
503		}
504		if (vinvalbuf(vp, 0, cred, td, 0, 0))
505			panic("ffs_reload: dirty2");
506		/*
507		 * Step 6: re-read inode data for all active vnodes.
508		 */
509		ip = VTOI(vp);
510		error =
511		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
512		    (int)fs->fs_bsize, NOCRED, &bp);
513		if (error) {
514			vput(vp);
515			return (error);
516		}
517		ffs_load_inode(bp, ip, NULL, fs, ip->i_number);
518		ip->i_effnlink = ip->i_nlink;
519		brelse(bp);
520		vput(vp);
521		mtx_lock(&mntvnode_mtx);
522	}
523	mtx_unlock(&mntvnode_mtx);
524	return (0);
525}
526
527/*
528 * Possible superblock locations ordered from most to least likely.
529 */
530static int sblock_try[] = SBLOCKSEARCH;
531
532/*
533 * Common code for mount and mountroot
534 */
535int
536ffs_mountfs(devvp, mp, td, malloctype)
537	struct vnode *devvp;
538	struct mount *mp;
539	struct thread *td;
540	struct malloc_type *malloctype;
541{
542	struct ufsmount *ump;
543	struct buf *bp;
544	struct fs *fs;
545	dev_t dev;
546	void *space;
547	ufs2_daddr_t sblockloc;
548	int error, i, blks, size, ronly;
549	int32_t *lp;
550	struct ucred *cred;
551	size_t strsize;
552	int ncount;
553	u_int sectorsize;
554
555	dev = devvp->v_rdev;
556	cred = td ? td->td_ucred : NOCRED;
557	/*
558	 * Disallow multiple mounts of the same device.
559	 * Disallow mounting of a device that is currently in use
560	 * (except for root, which might share swap device for miniroot).
561	 * Flush out any old buffers remaining from a previous use.
562	 */
563	error = vfs_mountedon(devvp);
564	if (error)
565		return (error);
566	ncount = vcount(devvp);
567
568	if (ncount > 1 && devvp != rootvp)
569		return (EBUSY);
570	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
571	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
572	VOP_UNLOCK(devvp, 0, td);
573	if (error)
574		return (error);
575
576	/*
577	 * Only VMIO the backing device if the backing device is a real
578	 * block device.
579	 * Note that it is optional that the backing device be VMIOed.  This
580	 * increases the opportunity for metadata caching.
581	 */
582	if (vn_isdisk(devvp, NULL)) {
583		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
584		vfs_object_create(devvp, td, cred);
585		mtx_lock(&devvp->v_interlock);
586		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
587	}
588
589	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
590	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
591	/*
592	 * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem
593	 * XXX: is subsequently remounted, so open it FREAD|FWRITE from the
594	 * XXX: start to avoid getting trashed later on.
595	 */
596#ifdef notyet
597	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
598#else
599	error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td);
600#endif
601	VOP_UNLOCK(devvp, 0, td);
602	if (error)
603		return (error);
604	if (devvp->v_rdev->si_iosize_max != 0)
605		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
606	if (mp->mnt_iosize_max > MAXPHYS)
607		mp->mnt_iosize_max = MAXPHYS;
608
609	if (VOP_IOCTL(devvp, DIOCGSECTORSIZE, (caddr_t)&sectorsize,
610	    FREAD, cred, td) != 0)
611		size = DEV_BSIZE;
612	else
613		size = sectorsize;
614
615	bp = NULL;
616	ump = NULL;
617	fs = NULL;
618	sblockloc = 0;
619	/*
620	 * Try reading the superblock in each of its possible locations.
621	 */
622	for (i = 0; sblock_try[i] != -1; i++) {
623		if ((error = bread(devvp, sblock_try[i] / size, SBLOCKSIZE,
624		    cred, &bp)) != 0)
625			goto out;
626		fs = (struct fs *)bp->b_data;
627		sblockloc = numfrags(fs, sblock_try[i]);
628		if ((fs->fs_magic == FS_UFS1_MAGIC ||
629		     (fs->fs_magic == FS_UFS2_MAGIC &&
630		      fs->fs_sblockloc == sblockloc)) &&
631		    fs->fs_bsize <= MAXBSIZE &&
632		    fs->fs_bsize >= sizeof(struct fs))
633			break;
634		brelse(bp);
635		bp = NULL;
636	}
637	if (sblock_try[i] == -1) {
638		error = EINVAL;		/* XXX needs translation */
639		goto out;
640	}
641	fs->fs_fmod = 0;
642	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
643	fs->fs_flags &= ~FS_UNCLEAN;
644	if (fs->fs_clean == 0) {
645		fs->fs_flags |= FS_UNCLEAN;
646		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
647		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
648		     (fs->fs_flags & FS_DOSOFTDEP))) {
649			printf(
650"WARNING: %s was not properly dismounted\n",
651			    fs->fs_fsmnt);
652		} else {
653			printf(
654"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
655			    fs->fs_fsmnt);
656			error = EPERM;
657			goto out;
658		}
659		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
660		    (mp->mnt_flag & MNT_FORCE)) {
661			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
662			    (intmax_t)fs->fs_pendingblocks,
663			    fs->fs_pendinginodes);
664			fs->fs_pendingblocks = 0;
665			fs->fs_pendinginodes = 0;
666		}
667	}
668	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
669		printf("%s: mount pending error: blocks %jd files %d\n",
670		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
671		    fs->fs_pendinginodes);
672		fs->fs_pendingblocks = 0;
673		fs->fs_pendinginodes = 0;
674	}
675	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
676	ump->um_malloctype = malloctype;
677	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
678	    M_WAITOK);
679	if (fs->fs_magic == FS_UFS1_MAGIC) {
680		ump->um_fstype = UFS1;
681		ump->um_balloc = ffs_balloc_ufs1;
682	} else {
683		ump->um_fstype = UFS2;
684		ump->um_balloc = ffs_balloc_ufs2;
685	}
686	ump->um_blkatoff = ffs_blkatoff;
687	ump->um_truncate = ffs_truncate;
688	ump->um_update = ffs_update;
689	ump->um_valloc = ffs_valloc;
690	ump->um_vfree = ffs_vfree;
691	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
692	if (fs->fs_sbsize < SBLOCKSIZE)
693		bp->b_flags |= B_INVAL | B_NOCACHE;
694	brelse(bp);
695	bp = NULL;
696	fs = ump->um_fs;
697	ffs_oldfscompat_read(fs, ump, sblockloc);
698	fs->fs_ronly = ronly;
699	size = fs->fs_cssize;
700	blks = howmany(size, fs->fs_fsize);
701	if (fs->fs_contigsumsize > 0)
702		size += fs->fs_ncg * sizeof(int32_t);
703	size += fs->fs_ncg * sizeof(u_int8_t);
704	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
705	fs->fs_csp = space;
706	for (i = 0; i < blks; i += fs->fs_frag) {
707		size = fs->fs_bsize;
708		if (i + fs->fs_frag > blks)
709			size = (blks - i) * fs->fs_fsize;
710		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
711		    cred, &bp)) != 0) {
712			free(fs->fs_csp, M_UFSMNT);
713			goto out;
714		}
715		bcopy(bp->b_data, space, (u_int)size);
716		space = (char *)space + size;
717		brelse(bp);
718		bp = NULL;
719	}
720	if (fs->fs_contigsumsize > 0) {
721		fs->fs_maxcluster = lp = space;
722		for (i = 0; i < fs->fs_ncg; i++)
723			*lp++ = fs->fs_contigsumsize;
724		space = lp;
725	}
726	size = fs->fs_ncg * sizeof(u_int8_t);
727	fs->fs_contigdirs = (u_int8_t *)space;
728	bzero(fs->fs_contigdirs, size);
729	fs->fs_active = NULL;
730	mp->mnt_data = (qaddr_t)ump;
731	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
732	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
733	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
734	    vfs_getvfs(&mp->mnt_stat.f_fsid))
735		vfs_getnewfsid(mp);
736	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
737	mp->mnt_flag |= MNT_LOCAL;
738	ump->um_mountp = mp;
739	ump->um_dev = dev;
740	ump->um_devvp = devvp;
741	ump->um_nindir = fs->fs_nindir;
742	ump->um_bptrtodb = fs->fs_fsbtodb;
743	ump->um_seqinc = fs->fs_frag;
744	for (i = 0; i < MAXQUOTAS; i++)
745		ump->um_quotas[i] = NULLVP;
746#ifdef UFS_EXTATTR
747	ufs_extattr_uepm_init(&ump->um_extattr);
748#endif
749	devvp->v_rdev->si_mountpoint = mp;
750
751	/*
752	 * Set FS local "last mounted on" information (NULL pad)
753	 */
754	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
755			fs->fs_fsmnt,			/* copy area*/
756			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
757			&strsize);			/* real size*/
758	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
759
760	if( mp->mnt_flag & MNT_ROOTFS) {
761		/*
762		 * Root mount; update timestamp in mount structure.
763		 * this will be used by the common root mount code
764		 * to update the system clock.
765		 */
766		mp->mnt_time = fs->fs_time;
767	}
768
769	if (ronly == 0) {
770		if ((fs->fs_flags & FS_DOSOFTDEP) &&
771		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
772			free(fs->fs_csp, M_UFSMNT);
773			goto out;
774		}
775		if (fs->fs_snapinum[0] != 0)
776			ffs_snapshot_mount(mp);
777		fs->fs_fmod = 1;
778		fs->fs_clean = 0;
779		(void) ffs_sbupdate(ump, MNT_WAIT);
780	}
781#ifdef UFS_EXTATTR
782#ifdef UFS_EXTATTR_AUTOSTART
783	/*
784	 *
785	 * Auto-starting does the following:
786	 *	- check for /.attribute in the fs, and extattr_start if so
787	 *	- for each file in .attribute, enable that file with
788	 * 	  an attribute of the same name.
789	 * Not clear how to report errors -- probably eat them.
790	 * This would all happen while the filesystem was busy/not
791	 * available, so would effectively be "atomic".
792	 */
793	(void) ufs_extattr_autostart(mp, td);
794#endif /* !UFS_EXTATTR_AUTOSTART */
795#endif /* !UFS_EXTATTR */
796	return (0);
797out:
798	devvp->v_rdev->si_mountpoint = NULL;
799	if (bp)
800		brelse(bp);
801	/* XXX: see comment above VOP_OPEN */
802#ifdef notyet
803	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
804#else
805	(void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td);
806#endif
807	if (ump) {
808		free(ump->um_fs, M_UFSMNT);
809		free(ump, M_UFSMNT);
810		mp->mnt_data = (qaddr_t)0;
811	}
812	return (error);
813}
814
815#include <sys/sysctl.h>
816int bigcgs = 0;
817SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
818
819/*
820 * Sanity checks for loading old filesystem superblocks.
821 * See ffs_oldfscompat_write below for unwound actions.
822 *
823 * XXX - Parts get retired eventually.
824 * Unfortunately new bits get added.
825 */
826static void
827ffs_oldfscompat_read(fs, ump, sblockloc)
828	struct fs *fs;
829	struct ufsmount *ump;
830	ufs2_daddr_t sblockloc;
831{
832	off_t maxfilesize;
833
834	/*
835	 * If not yet done, update UFS1 superblock with new wider fields.
836	 */
837	if (fs->fs_magic == FS_UFS1_MAGIC &&
838	    fs->fs_sblockloc != sblockloc) {
839		fs->fs_maxbsize = fs->fs_bsize;
840		fs->fs_sblockloc = sblockloc;
841		fs->fs_time = fs->fs_old_time;
842		fs->fs_size = fs->fs_old_size;
843		fs->fs_dsize = fs->fs_old_dsize;
844		fs->fs_csaddr = fs->fs_old_csaddr;
845		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
846		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
847		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
848		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
849	}
850	if (fs->fs_magic == FS_UFS1_MAGIC &&
851	    fs->fs_old_inodefmt < FS_44INODEFMT) {
852		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
853		fs->fs_qbmask = ~fs->fs_bmask;
854		fs->fs_qfmask = ~fs->fs_fmask;
855	}
856	ump->um_savedmaxfilesize = fs->fs_maxfilesize;
857	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
858	if (fs->fs_maxfilesize > maxfilesize)
859		fs->fs_maxfilesize = maxfilesize;
860	/* Compatibility for old filesystems */
861	if (fs->fs_avgfilesize <= 0)
862		fs->fs_avgfilesize = AVFILESIZ;
863	if (fs->fs_avgfpdir <= 0)
864		fs->fs_avgfpdir = AFPDIR;
865	if (bigcgs) {
866		fs->fs_save_cgsize = fs->fs_cgsize;
867		fs->fs_cgsize = fs->fs_bsize;
868	}
869}
870
871/*
872 * Unwinding superblock updates for old filesystems.
873 * See ffs_oldfscompat_read above for details.
874 *
875 * XXX - Parts get retired eventually.
876 * Unfortunately new bits get added.
877 */
878static void
879ffs_oldfscompat_write(fs, ump)
880	struct fs *fs;
881	struct ufsmount *ump;
882{
883
884	/*
885	 * Copy back UFS2 updated fields that UFS1 inspects.
886	 */
887	if (fs->fs_magic == FS_UFS1_MAGIC) {
888		fs->fs_old_time = fs->fs_time;
889		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
890		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
891		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
892		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
893	}
894	fs->fs_maxfilesize = ump->um_savedmaxfilesize;
895	if (bigcgs) {
896		fs->fs_cgsize = fs->fs_save_cgsize;
897		fs->fs_save_cgsize = 0;
898	}
899}
900
901/*
902 * unmount system call
903 */
904int
905ffs_unmount(mp, mntflags, td)
906	struct mount *mp;
907	int mntflags;
908	struct thread *td;
909{
910	struct ufsmount *ump = VFSTOUFS(mp);
911	struct fs *fs;
912	int error, flags;
913
914	flags = 0;
915	if (mntflags & MNT_FORCE) {
916		flags |= FORCECLOSE;
917	}
918#ifdef UFS_EXTATTR
919	if ((error = ufs_extattr_stop(mp, td))) {
920		if (error != EOPNOTSUPP)
921			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
922			    error);
923	} else {
924		ufs_extattr_uepm_destroy(&ump->um_extattr);
925	}
926#endif
927	if (mp->mnt_flag & MNT_SOFTDEP) {
928		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
929			return (error);
930	} else {
931		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
932			return (error);
933	}
934	fs = ump->um_fs;
935	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
936		printf("%s: unmount pending error: blocks %jd files %d\n",
937		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
938		    fs->fs_pendinginodes);
939		fs->fs_pendingblocks = 0;
940		fs->fs_pendinginodes = 0;
941	}
942	if (fs->fs_ronly == 0) {
943		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
944		error = ffs_sbupdate(ump, MNT_WAIT);
945		if (error) {
946			fs->fs_clean = 0;
947			return (error);
948		}
949	}
950	ump->um_devvp->v_rdev->si_mountpoint = NULL;
951
952	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
953	/* XXX: see comment above VOP_OPEN */
954#ifdef notyet
955	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
956		NOCRED, td);
957#else
958	error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td);
959#endif
960
961	vrele(ump->um_devvp);
962
963	free(fs->fs_csp, M_UFSMNT);
964	free(fs, M_UFSMNT);
965	free(ump, M_UFSMNT);
966	mp->mnt_data = (qaddr_t)0;
967	mp->mnt_flag &= ~MNT_LOCAL;
968	return (error);
969}
970
971/*
972 * Flush out all the files in a filesystem.
973 */
974int
975ffs_flushfiles(mp, flags, td)
976	struct mount *mp;
977	int flags;
978	struct thread *td;
979{
980	struct ufsmount *ump;
981	int error;
982
983	ump = VFSTOUFS(mp);
984#ifdef QUOTA
985	if (mp->mnt_flag & MNT_QUOTA) {
986		int i;
987		error = vflush(mp, 0, SKIPSYSTEM|flags);
988		if (error)
989			return (error);
990		for (i = 0; i < MAXQUOTAS; i++) {
991			if (ump->um_quotas[i] == NULLVP)
992				continue;
993			quotaoff(td, mp, i);
994		}
995		/*
996		 * Here we fall through to vflush again to ensure
997		 * that we have gotten rid of all the system vnodes.
998		 */
999	}
1000#endif
1001	if (ump->um_devvp->v_flag & VCOPYONWRITE) {
1002		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1003			return (error);
1004		ffs_snapshot_unmount(mp);
1005		/*
1006		 * Here we fall through to vflush again to ensure
1007		 * that we have gotten rid of all the system vnodes.
1008		 */
1009	}
1010        /*
1011	 * Flush all the files.
1012	 */
1013	if ((error = vflush(mp, 0, flags)) != 0)
1014		return (error);
1015	/*
1016	 * Flush filesystem metadata.
1017	 */
1018	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1019	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1020	VOP_UNLOCK(ump->um_devvp, 0, td);
1021	return (error);
1022}
1023
1024/*
1025 * Get filesystem statistics.
1026 */
1027int
1028ffs_statfs(mp, sbp, td)
1029	struct mount *mp;
1030	struct statfs *sbp;
1031	struct thread *td;
1032{
1033	struct ufsmount *ump;
1034	struct fs *fs;
1035
1036	ump = VFSTOUFS(mp);
1037	fs = ump->um_fs;
1038	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1039		panic("ffs_statfs");
1040	sbp->f_bsize = fs->fs_fsize;
1041	sbp->f_iosize = fs->fs_bsize;
1042	sbp->f_blocks = fs->fs_dsize;
1043	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1044	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1045	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1046	    dbtofsb(fs, fs->fs_pendingblocks);
1047	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1048	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1049	if (sbp != &mp->mnt_stat) {
1050		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1051		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1052			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1053		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1054			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1055	}
1056	return (0);
1057}
1058
1059/*
1060 * Go through the disk queues to initiate sandbagged IO;
1061 * go through the inodes to write those that have been modified;
1062 * initiate the writing of the super block if it has been modified.
1063 *
1064 * Note: we are always called with the filesystem marked `MPBUSY'.
1065 */
1066int
1067ffs_sync(mp, waitfor, cred, td)
1068	struct mount *mp;
1069	int waitfor;
1070	struct ucred *cred;
1071	struct thread *td;
1072{
1073	struct vnode *nvp, *vp, *devvp;
1074	struct inode *ip;
1075	struct ufsmount *ump = VFSTOUFS(mp);
1076	struct fs *fs;
1077	int error, count, wait, lockreq, allerror = 0;
1078
1079	fs = ump->um_fs;
1080	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1081		printf("fs = %s\n", fs->fs_fsmnt);
1082		panic("ffs_sync: rofs mod");
1083	}
1084	/*
1085	 * Write back each (modified) inode.
1086	 */
1087	wait = 0;
1088	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1089	if (waitfor == MNT_WAIT) {
1090		wait = 1;
1091		lockreq = LK_EXCLUSIVE;
1092	}
1093	mtx_lock(&mntvnode_mtx);
1094loop:
1095	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1096		/*
1097		 * If the vnode that we are about to sync is no longer
1098		 * associated with this mount point, start over.
1099		 */
1100		if (vp->v_mount != mp)
1101			goto loop;
1102
1103		/*
1104		 * Depend on the mntvnode_slock to keep things stable enough
1105		 * for a quick test.  Since there might be hundreds of
1106		 * thousands of vnodes, we cannot afford even a subroutine
1107		 * call unless there's a good chance that we have work to do.
1108		 */
1109		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1110		ip = VTOI(vp);
1111		if (vp->v_type == VNON || ((ip->i_flag &
1112		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1113		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1114			continue;
1115		}
1116		if (vp->v_type != VCHR) {
1117			mtx_unlock(&mntvnode_mtx);
1118			if ((error = vget(vp, lockreq, td)) != 0) {
1119				mtx_lock(&mntvnode_mtx);
1120				if (error == ENOENT)
1121					goto loop;
1122			} else {
1123				if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1124					allerror = error;
1125				VOP_UNLOCK(vp, 0, td);
1126				vrele(vp);
1127				mtx_lock(&mntvnode_mtx);
1128			}
1129		} else {
1130			mtx_unlock(&mntvnode_mtx);
1131			UFS_UPDATE(vp, wait);
1132			mtx_lock(&mntvnode_mtx);
1133		}
1134		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1135			goto loop;
1136	}
1137	mtx_unlock(&mntvnode_mtx);
1138	/*
1139	 * Force stale filesystem control information to be flushed.
1140	 */
1141	if (waitfor == MNT_WAIT) {
1142		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1143			allerror = error;
1144		/* Flushed work items may create new vnodes to clean */
1145		if (count) {
1146			mtx_lock(&mntvnode_mtx);
1147			goto loop;
1148		}
1149	}
1150#ifdef QUOTA
1151	qsync(mp);
1152#endif
1153	devvp = ump->um_devvp;
1154	mtx_lock(&devvp->v_interlock);
1155	if (waitfor != MNT_LAZY &&
1156	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1157		mtx_unlock(&devvp->v_interlock);
1158		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
1159		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1160			allerror = error;
1161		VOP_UNLOCK(devvp, 0, td);
1162		if (waitfor == MNT_WAIT) {
1163			mtx_lock(&mntvnode_mtx);
1164			goto loop;
1165		}
1166	} else
1167		mtx_unlock(&devvp->v_interlock);
1168	/*
1169	 * Write back modified superblock.
1170	 */
1171	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1172		allerror = error;
1173	return (allerror);
1174}
1175
1176int
1177ffs_vget(mp, ino, flags, vpp)
1178	struct mount *mp;
1179	ino_t ino;
1180	int flags;
1181	struct vnode **vpp;
1182{
1183	struct thread *td = curthread; 		/* XXX */
1184	struct fs *fs;
1185	struct inode *ip;
1186	struct ufsmount *ump;
1187	struct buf *bp;
1188	struct vnode *vp;
1189	dev_t dev;
1190	int error;
1191
1192	ump = VFSTOUFS(mp);
1193	dev = ump->um_dev;
1194
1195	/*
1196	 * We do not lock vnode creation as it is believed to be too
1197	 * expensive for such rare case as simultaneous creation of vnode
1198	 * for same ino by different processes. We just allow them to race
1199	 * and check later to decide who wins. Let the race begin!
1200	 */
1201	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1202		return (error);
1203	if (*vpp != NULL)
1204		return (0);
1205
1206	/*
1207	 * If this MALLOC() is performed after the getnewvnode()
1208	 * it might block, leaving a vnode with a NULL v_data to be
1209	 * found by ffs_sync() if a sync happens to fire right then,
1210	 * which will cause a panic because ffs_sync() blindly
1211	 * dereferences vp->v_data (as well it should).
1212	 */
1213	MALLOC(ip, struct inode *, sizeof(struct inode),
1214	    ump->um_malloctype, M_WAITOK);
1215
1216	/* Allocate a new vnode/inode. */
1217	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
1218	if (error) {
1219		*vpp = NULL;
1220		FREE(ip, ump->um_malloctype);
1221		return (error);
1222	}
1223	bzero((caddr_t)ip, sizeof(struct inode));
1224	/*
1225	 * FFS supports lock sharing in the stack of vnodes
1226	 */
1227	vp->v_vnlock = &vp->v_lock;
1228	lockinit(vp->v_vnlock, PINOD, "inode", VLKTIMEOUT, LK_CANRECURSE);
1229	vp->v_data = ip;
1230	ip->i_vnode = vp;
1231	ip->i_ump = ump;
1232	ip->i_fs = fs = ump->um_fs;
1233	ip->i_dev = dev;
1234	ip->i_number = ino;
1235#ifdef QUOTA
1236	{
1237		int i;
1238		for (i = 0; i < MAXQUOTAS; i++)
1239			ip->i_dquot[i] = NODQUOT;
1240	}
1241#endif
1242	/*
1243	 * Exclusively lock the vnode before adding to hash. Note, that we
1244	 * must not release nor downgrade the lock (despite flags argument
1245	 * says) till it is fully initialized.
1246	 */
1247	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1248
1249	/*
1250	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1251	 * duplicate of vnode being created and add it to the hash. If a
1252	 * duplicate vnode was found, it will be vget()ed from hash for us.
1253	 */
1254	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1255		vput(vp);
1256		*vpp = NULL;
1257		return (error);
1258	}
1259
1260	/* We lost the race, then throw away our vnode and return existing */
1261	if (*vpp != NULL) {
1262		vput(vp);
1263		return (0);
1264	}
1265
1266	/* Read in the disk contents for the inode, copy into the inode. */
1267	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1268	    (int)fs->fs_bsize, NOCRED, &bp);
1269	if (error) {
1270		/*
1271		 * The inode does not contain anything useful, so it would
1272		 * be misleading to leave it on its hash chain. With mode
1273		 * still zero, it will be unlinked and returned to the free
1274		 * list by vput().
1275		 */
1276		brelse(bp);
1277		vput(vp);
1278		*vpp = NULL;
1279		return (error);
1280	}
1281	ffs_load_inode(bp, ip, ump->um_malloctype, fs, ino);
1282	if (DOINGSOFTDEP(vp))
1283		softdep_load_inodeblock(ip);
1284	else
1285		ip->i_effnlink = ip->i_nlink;
1286	bqrelse(bp);
1287
1288	/*
1289	 * Initialize the vnode from the inode, check for aliases.
1290	 * Note that the underlying vnode may have changed.
1291	 */
1292	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1293	if (error) {
1294		vput(vp);
1295		*vpp = NULL;
1296		return (error);
1297	}
1298	/*
1299	 * Finish inode initialization now that aliasing has been resolved.
1300	 */
1301	ip->i_devvp = ump->um_devvp;
1302	VREF(ip->i_devvp);
1303	/*
1304	 * Set up a generation number for this inode if it does not
1305	 * already have one. This should only happen on old filesystems.
1306	 */
1307	if (ip->i_gen == 0) {
1308		ip->i_gen = random() / 2 + 1;
1309		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1310			ip->i_flag |= IN_MODIFIED;
1311			DIP(ip, i_gen) = ip->i_gen;
1312		}
1313	}
1314	/*
1315	 * Ensure that uid and gid are correct. This is a temporary
1316	 * fix until fsck has been changed to do the update.
1317	 */
1318	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1319	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1320		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1321		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1322	}						/* XXX */
1323
1324	*vpp = vp;
1325	return (0);
1326}
1327
1328/*
1329 * File handle to vnode
1330 *
1331 * Have to be really careful about stale file handles:
1332 * - check that the inode number is valid
1333 * - call ffs_vget() to get the locked inode
1334 * - check for an unallocated inode (i_mode == 0)
1335 * - check that the given client host has export rights and return
1336 *   those rights via. exflagsp and credanonp
1337 */
1338int
1339ffs_fhtovp(mp, fhp, vpp)
1340	struct mount *mp;
1341	struct fid *fhp;
1342	struct vnode **vpp;
1343{
1344	struct ufid *ufhp;
1345	struct fs *fs;
1346
1347	ufhp = (struct ufid *)fhp;
1348	fs = VFSTOUFS(mp)->um_fs;
1349	if (ufhp->ufid_ino < ROOTINO ||
1350	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1351		return (ESTALE);
1352	return (ufs_fhtovp(mp, ufhp, vpp));
1353}
1354
1355/*
1356 * Vnode pointer to File handle
1357 */
1358/* ARGSUSED */
1359int
1360ffs_vptofh(vp, fhp)
1361	struct vnode *vp;
1362	struct fid *fhp;
1363{
1364	struct inode *ip;
1365	struct ufid *ufhp;
1366
1367	ip = VTOI(vp);
1368	ufhp = (struct ufid *)fhp;
1369	ufhp->ufid_len = sizeof(struct ufid);
1370	ufhp->ufid_ino = ip->i_number;
1371	ufhp->ufid_gen = ip->i_gen;
1372	return (0);
1373}
1374
1375/*
1376 * Initialize the filesystem; just use ufs_init.
1377 */
1378static int
1379ffs_init(vfsp)
1380	struct vfsconf *vfsp;
1381{
1382
1383	softdep_initialize();
1384	return (ufs_init(vfsp));
1385}
1386
1387/*
1388 * Write a superblock and associated information back to disk.
1389 */
1390static int
1391ffs_sbupdate(mp, waitfor)
1392	struct ufsmount *mp;
1393	int waitfor;
1394{
1395	struct fs *fs = mp->um_fs;
1396	struct buf *bp;
1397	int blks;
1398	void *space;
1399	int i, size, error, allerror = 0;
1400
1401	/*
1402	 * First write back the summary information.
1403	 */
1404	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1405	space = fs->fs_csp;
1406	for (i = 0; i < blks; i += fs->fs_frag) {
1407		size = fs->fs_bsize;
1408		if (i + fs->fs_frag > blks)
1409			size = (blks - i) * fs->fs_fsize;
1410		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1411		    size, 0, 0);
1412		bcopy(space, bp->b_data, (u_int)size);
1413		space = (char *)space + size;
1414		if (waitfor != MNT_WAIT)
1415			bawrite(bp);
1416		else if ((error = bwrite(bp)) != 0)
1417			allerror = error;
1418	}
1419	/*
1420	 * Now write back the superblock itself. If any errors occurred
1421	 * up to this point, then fail so that the superblock avoids
1422	 * being written out as clean.
1423	 */
1424	if (allerror)
1425		return (allerror);
1426	bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_sblockloc),
1427	    (int)fs->fs_sbsize, 0, 0);
1428	fs->fs_fmod = 0;
1429	fs->fs_time = time_second;
1430	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1431	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1432	if (waitfor != MNT_WAIT)
1433		bawrite(bp);
1434	else if ((error = bwrite(bp)) != 0)
1435		allerror = error;
1436	return (allerror);
1437}
1438