ffs_vfsops.c revision 125796
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 125796 2004-02-14 04:41:13Z bde $");
38
39#include "opt_mac.h"
40#include "opt_quota.h"
41#include "opt_ufs.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/kernel.h>
48#include <sys/mac.h>
49#include <sys/vnode.h>
50#include <sys/mount.h>
51#include <sys/bio.h>
52#include <sys/buf.h>
53#include <sys/conf.h>
54#include <sys/fcntl.h>
55#include <sys/disk.h>
56#include <sys/malloc.h>
57#include <sys/mutex.h>
58
59#include <ufs/ufs/extattr.h>
60#include <ufs/ufs/quota.h>
61#include <ufs/ufs/ufsmount.h>
62#include <ufs/ufs/inode.h>
63#include <ufs/ufs/ufs_extern.h>
64
65#include <ufs/ffs/fs.h>
66#include <ufs/ffs/ffs_extern.h>
67
68#include <vm/vm.h>
69#include <vm/uma.h>
70#include <vm/vm_page.h>
71
72uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
73
74static int	ffs_sbupdate(struct ufsmount *, int);
75       int	ffs_reload(struct mount *,struct ucred *,struct thread *);
76static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
77static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
78		    ufs2_daddr_t);
79static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
80static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
81static vfs_init_t ffs_init;
82static vfs_uninit_t ffs_uninit;
83static vfs_extattrctl_t ffs_extattrctl;
84
85static struct vfsops ufs_vfsops = {
86	.vfs_extattrctl =	ffs_extattrctl,
87	.vfs_fhtovp =		ffs_fhtovp,
88	.vfs_init =		ffs_init,
89	.vfs_mount =		ffs_mount,
90	.vfs_quotactl =		ufs_quotactl,
91	.vfs_root =		ufs_root,
92	.vfs_start =		ufs_start,
93	.vfs_statfs =		ffs_statfs,
94	.vfs_sync =		ffs_sync,
95	.vfs_uninit =		ffs_uninit,
96	.vfs_unmount =		ffs_unmount,
97	.vfs_vget =		ffs_vget,
98	.vfs_vptofh =		ffs_vptofh,
99};
100
101VFS_SET(ufs_vfsops, ufs, 0);
102
103/*
104 * ffs_mount
105 *
106 * Called when mounting local physical media
107 *
108 * PARAMETERS:
109 *		mountroot
110 *			mp	mount point structure
111 *			path	NULL (flag for root mount!!!)
112 *			data	<unused>
113 *			ndp	<unused>
114 *			p	process (user credentials check [statfs])
115 *
116 *		mount
117 *			mp	mount point structure
118 *			path	path to mount point
119 *			data	pointer to argument struct in user space
120 *			ndp	mount point namei() return (used for
121 *				credentials on reload), reused to look
122 *				up block device.
123 *			p	process (user credentials check)
124 *
125 * RETURNS:	0	Success
126 *		!0	error number (errno.h)
127 *
128 * LOCK STATE:
129 *
130 *		ENTRY
131 *			mount point is locked
132 *		EXIT
133 *			mount point is locked
134 *
135 * NOTES:
136 *		A NULL path can be used for a flag since the mount
137 *		system call will fail with EFAULT in copyinstr in
138 *		namei() if it is a genuine NULL from the user.
139 */
140int
141ffs_mount(mp, path, data, ndp, td)
142        struct mount		*mp;	/* mount struct pointer*/
143        char			*path;	/* path to mount point*/
144        caddr_t			data;	/* arguments to FS specific mount*/
145        struct nameidata	*ndp;	/* mount point credentials*/
146        struct thread		*td;	/* process requesting mount*/
147{
148	size_t size;
149	struct vnode *devvp;
150	struct ufs_args args;
151	struct ufsmount *ump = 0;
152	struct fs *fs;
153	int error, flags;
154	mode_t accessmode;
155
156	if (uma_inode == NULL) {
157		uma_inode = uma_zcreate("FFS inode",
158		    sizeof(struct inode), NULL, NULL, NULL, NULL,
159		    UMA_ALIGN_PTR, 0);
160		uma_ufs1 = uma_zcreate("FFS1 dinode",
161		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
162		    UMA_ALIGN_PTR, 0);
163		uma_ufs2 = uma_zcreate("FFS2 dinode",
164		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
165		    UMA_ALIGN_PTR, 0);
166	}
167	/*
168	 * Use NULL path to indicate we are mounting the root filesystem.
169	 */
170	if (path == NULL) {
171		if ((error = bdevvp(rootdev, &rootvp))) {
172			printf("ffs_mountroot: can't find rootvp\n");
173			return (error);
174		}
175
176		if ((error = ffs_mountfs(rootvp, mp, td)) != 0)
177			return (error);
178		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
179		return (0);
180	}
181
182	/*
183	 * Mounting non-root filesystem or updating a filesystem
184	 */
185	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
186		return (error);
187
188	/*
189	 * If updating, check whether changing from read-only to
190	 * read/write; if there is no device name, that's all we do.
191	 */
192	if (mp->mnt_flag & MNT_UPDATE) {
193		ump = VFSTOUFS(mp);
194		fs = ump->um_fs;
195		devvp = ump->um_devvp;
196		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
197			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
198				return (error);
199			/*
200			 * Flush any dirty data.
201			 */
202			if ((error = VFS_SYNC(mp, MNT_WAIT,
203			    td->td_ucred, td)) != 0) {
204				vn_finished_write(mp);
205				return (error);
206			}
207			/*
208			 * Check for and optionally get rid of files open
209			 * for writing.
210			 */
211			flags = WRITECLOSE;
212			if (mp->mnt_flag & MNT_FORCE)
213				flags |= FORCECLOSE;
214			if (mp->mnt_flag & MNT_SOFTDEP) {
215				error = softdep_flushfiles(mp, flags, td);
216			} else {
217				error = ffs_flushfiles(mp, flags, td);
218			}
219			if (error) {
220				vn_finished_write(mp);
221				return (error);
222			}
223			if (fs->fs_pendingblocks != 0 ||
224			    fs->fs_pendinginodes != 0) {
225				printf("%s: %s: blocks %jd files %d\n",
226				    fs->fs_fsmnt, "update error",
227				    (intmax_t)fs->fs_pendingblocks,
228				    fs->fs_pendinginodes);
229				fs->fs_pendingblocks = 0;
230				fs->fs_pendinginodes = 0;
231			}
232			fs->fs_ronly = 1;
233			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
234				fs->fs_clean = 1;
235			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
236				fs->fs_ronly = 0;
237				fs->fs_clean = 0;
238				vn_finished_write(mp);
239				return (error);
240			}
241			vn_finished_write(mp);
242		}
243		if ((mp->mnt_flag & MNT_RELOAD) &&
244		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
245			return (error);
246		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
247			/*
248			 * If upgrade to read-write by non-root, then verify
249			 * that user has necessary permissions on the device.
250			 */
251			if (suser(td)) {
252				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
253				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
254				    td->td_ucred, td)) != 0) {
255					VOP_UNLOCK(devvp, 0, td);
256					return (error);
257				}
258				VOP_UNLOCK(devvp, 0, td);
259			}
260			fs->fs_flags &= ~FS_UNCLEAN;
261			if (fs->fs_clean == 0) {
262				fs->fs_flags |= FS_UNCLEAN;
263				if ((mp->mnt_flag & MNT_FORCE) ||
264				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
265				     (fs->fs_flags & FS_DOSOFTDEP))) {
266					printf("WARNING: %s was not %s\n",
267					   fs->fs_fsmnt, "properly dismounted");
268				} else {
269					printf(
270"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
271					    fs->fs_fsmnt);
272					return (EPERM);
273				}
274			}
275			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
276				return (error);
277			fs->fs_ronly = 0;
278			fs->fs_clean = 0;
279			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
280				vn_finished_write(mp);
281				return (error);
282			}
283			/* check to see if we need to start softdep */
284			if ((fs->fs_flags & FS_DOSOFTDEP) &&
285			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
286				vn_finished_write(mp);
287				return (error);
288			}
289			if (fs->fs_snapinum[0] != 0)
290				ffs_snapshot_mount(mp);
291			vn_finished_write(mp);
292		}
293		/*
294		 * Soft updates is incompatible with "async",
295		 * so if we are doing softupdates stop the user
296		 * from setting the async flag in an update.
297		 * Softdep_mount() clears it in an initial mount
298		 * or ro->rw remount.
299		 */
300		if (mp->mnt_flag & MNT_SOFTDEP)
301			mp->mnt_flag &= ~MNT_ASYNC;
302		/*
303		 * If not updating name, process export requests.
304		 */
305		if (args.fspec == 0)
306			return (vfs_export(mp, &args.export));
307		/*
308		 * If this is a snapshot request, take the snapshot.
309		 */
310		if (mp->mnt_flag & MNT_SNAPSHOT)
311			return (ffs_snapshot(mp, args.fspec));
312	}
313
314	/*
315	 * Not an update, or updating the name: look up the name
316	 * and verify that it refers to a sensible disk device.
317	 */
318	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
319	if ((error = namei(ndp)) != 0)
320		return (error);
321	NDFREE(ndp, NDF_ONLY_PNBUF);
322	devvp = ndp->ni_vp;
323	if (!vn_isdisk(devvp, &error)) {
324		vrele(devvp);
325		return (error);
326	}
327
328	/*
329	 * If mount by non-root, then verify that user has necessary
330	 * permissions on the device.
331	 */
332	if (suser(td)) {
333		accessmode = VREAD;
334		if ((mp->mnt_flag & MNT_RDONLY) == 0)
335			accessmode |= VWRITE;
336		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
337		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
338			vput(devvp);
339			return (error);
340		}
341		VOP_UNLOCK(devvp, 0, td);
342	}
343
344	if (mp->mnt_flag & MNT_UPDATE) {
345		/*
346		 * Update only
347		 *
348		 * If it's not the same vnode, or at least the same device
349		 * then it's not correct.
350		 */
351
352		if (devvp != ump->um_devvp &&
353		    devvp->v_rdev != ump->um_devvp->v_rdev)
354			error = EINVAL;	/* needs translation */
355		vrele(devvp);
356		if (error)
357			return (error);
358	} else {
359		/*
360		 * New mount
361		 *
362		 * We need the name for the mount point (also used for
363		 * "last mounted on") copied in. If an error occurs,
364		 * the mount point is discarded by the upper level code.
365		 * Note that vfs_mount() populates f_mntonname for us.
366		 */
367		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
368			vrele(devvp);
369			return (error);
370		}
371	}
372	/*
373	 * Save "mounted from" device name info for mount point (NULL pad).
374	 */
375	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
376	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
377	/*
378	 * Initialize filesystem stat information in mount struct.
379	 */
380	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
381	return (0);
382}
383
384/*
385 * Reload all incore data for a filesystem (used after running fsck on
386 * the root filesystem and finding things to fix). The filesystem must
387 * be mounted read-only.
388 *
389 * Things to do to update the mount:
390 *	1) invalidate all cached meta-data.
391 *	2) re-read superblock from disk.
392 *	3) re-read summary information from disk.
393 *	4) invalidate all inactive vnodes.
394 *	5) invalidate all cached file data.
395 *	6) re-read inode data for all active vnodes.
396 */
397int
398ffs_reload(mp, cred, td)
399	struct mount *mp;
400	struct ucred *cred;
401	struct thread *td;
402{
403	struct vnode *vp, *nvp, *devvp;
404	struct inode *ip;
405	void *space;
406	struct buf *bp;
407	struct fs *fs, *newfs;
408	ufs2_daddr_t sblockloc;
409	int i, blks, size, error;
410	int32_t *lp;
411
412	if ((mp->mnt_flag & MNT_RDONLY) == 0)
413		return (EINVAL);
414	/*
415	 * Step 1: invalidate all cached meta-data.
416	 */
417	devvp = VFSTOUFS(mp)->um_devvp;
418	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
419	if (vinvalbuf(devvp, 0, cred, td, 0, 0) != 0)
420		panic("ffs_reload: dirty1");
421	/*
422	 * Only VMIO the backing device if the backing device is a real
423	 * disk device.  See ffs_mountfs() for more details.
424	 */
425	if (vn_isdisk(devvp, NULL))
426		vfs_object_create(devvp, td, td->td_ucred);
427	VOP_UNLOCK(devvp, 0, td);
428
429	/*
430	 * Step 2: re-read superblock from disk.
431	 */
432	fs = VFSTOUFS(mp)->um_fs;
433	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
434	    NOCRED, &bp)) != 0)
435		return (error);
436	newfs = (struct fs *)bp->b_data;
437	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
438	     newfs->fs_magic != FS_UFS2_MAGIC) ||
439	    newfs->fs_bsize > MAXBSIZE ||
440	    newfs->fs_bsize < sizeof(struct fs)) {
441			brelse(bp);
442			return (EIO);		/* XXX needs translation */
443	}
444	/*
445	 * Copy pointer fields back into superblock before copying in	XXX
446	 * new superblock. These should really be in the ufsmount.	XXX
447	 * Note that important parameters (eg fs_ncg) are unchanged.
448	 */
449	newfs->fs_csp = fs->fs_csp;
450	newfs->fs_maxcluster = fs->fs_maxcluster;
451	newfs->fs_contigdirs = fs->fs_contigdirs;
452	newfs->fs_active = fs->fs_active;
453	/* The file system is still read-only. */
454	newfs->fs_ronly = 1;
455	sblockloc = fs->fs_sblockloc;
456	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
457	brelse(bp);
458	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
459	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
460	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
461		printf("%s: reload pending error: blocks %jd files %d\n",
462		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
463		    fs->fs_pendinginodes);
464		fs->fs_pendingblocks = 0;
465		fs->fs_pendinginodes = 0;
466	}
467
468	/*
469	 * Step 3: re-read summary information from disk.
470	 */
471	blks = howmany(fs->fs_cssize, fs->fs_fsize);
472	space = fs->fs_csp;
473	for (i = 0; i < blks; i += fs->fs_frag) {
474		size = fs->fs_bsize;
475		if (i + fs->fs_frag > blks)
476			size = (blks - i) * fs->fs_fsize;
477		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
478		    NOCRED, &bp);
479		if (error)
480			return (error);
481		bcopy(bp->b_data, space, (u_int)size);
482		space = (char *)space + size;
483		brelse(bp);
484	}
485	/*
486	 * We no longer know anything about clusters per cylinder group.
487	 */
488	if (fs->fs_contigsumsize > 0) {
489		lp = fs->fs_maxcluster;
490		for (i = 0; i < fs->fs_ncg; i++)
491			*lp++ = fs->fs_contigsumsize;
492	}
493
494loop:
495	MNT_ILOCK(mp);
496	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
497		if (vp->v_mount != mp) {
498			MNT_IUNLOCK(mp);
499			goto loop;
500		}
501		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
502		VI_LOCK(vp);
503		if (vp->v_iflag & VI_XLOCK) {
504			VI_UNLOCK(vp);
505			continue;
506		}
507		MNT_IUNLOCK(mp);
508		/*
509		 * Step 4: invalidate all inactive vnodes.
510		 */
511		if (vp->v_usecount == 0) {
512			vgonel(vp, td);
513			goto loop;
514		}
515		/*
516		 * Step 5: invalidate all cached file data.
517		 */
518		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
519			goto loop;
520		}
521		if (vinvalbuf(vp, 0, cred, td, 0, 0))
522			panic("ffs_reload: dirty2");
523		/*
524		 * Step 6: re-read inode data for all active vnodes.
525		 */
526		ip = VTOI(vp);
527		error =
528		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
529		    (int)fs->fs_bsize, NOCRED, &bp);
530		if (error) {
531			VOP_UNLOCK(vp, 0, td);
532			vrele(vp);
533			return (error);
534		}
535		ffs_load_inode(bp, ip, fs, ip->i_number);
536		ip->i_effnlink = ip->i_nlink;
537		brelse(bp);
538		VOP_UNLOCK(vp, 0, td);
539		vrele(vp);
540		MNT_ILOCK(mp);
541	}
542	MNT_IUNLOCK(mp);
543	return (0);
544}
545
546/*
547 * Possible superblock locations ordered from most to least likely.
548 */
549static int sblock_try[] = SBLOCKSEARCH;
550
551/*
552 * Common code for mount and mountroot
553 */
554static int
555ffs_mountfs(devvp, mp, td)
556	struct vnode *devvp;
557	struct mount *mp;
558	struct thread *td;
559{
560	struct ufsmount *ump;
561	struct buf *bp;
562	struct fs *fs;
563	dev_t dev;
564	void *space;
565	ufs2_daddr_t sblockloc;
566	int error, i, blks, size, ronly;
567	int32_t *lp;
568	struct ucred *cred;
569	size_t strsize;
570
571	dev = devvp->v_rdev;
572	cred = td ? td->td_ucred : NOCRED;
573	/*
574	 * Disallow multiple mounts of the same device.
575	 * Disallow mounting of a device that is currently in use
576	 * (except for root, which might share swap device for miniroot).
577	 * Flush out any old buffers remaining from a previous use.
578	 */
579	error = vfs_mountedon(devvp);
580	if (error)
581		return (error);
582	if (vcount(devvp) > 1 && devvp != rootvp)
583		return (EBUSY);
584	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
585	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
586	if (error) {
587		VOP_UNLOCK(devvp, 0, td);
588		return (error);
589	}
590
591	/*
592	 * Only VMIO the backing device if the backing device is a real
593	 * disk device.
594	 * Note that it is optional that the backing device be VMIOed.  This
595	 * increases the opportunity for metadata caching.
596	 */
597	if (vn_isdisk(devvp, NULL))
598		vfs_object_create(devvp, td, cred);
599
600	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
601	/*
602	 * XXX: open the device with read and write access even if only
603	 * read access is needed now.  Write access is needed if the
604	 * filesystem is ever mounted read/write, and we don't change the
605	 * access mode for remounts.
606	 */
607#ifdef notyet
608	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, td, -1);
609#else
610	error = VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, td, -1);
611#endif
612	VOP_UNLOCK(devvp, 0, td);
613	if (error)
614		return (error);
615	if (devvp->v_rdev->si_iosize_max != 0)
616		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
617	if (mp->mnt_iosize_max > MAXPHYS)
618		mp->mnt_iosize_max = MAXPHYS;
619
620	bp = NULL;
621	ump = NULL;
622	fs = NULL;
623	sblockloc = 0;
624	/*
625	 * Try reading the superblock in each of its possible locations.
626	 */
627	for (i = 0; sblock_try[i] != -1; i++) {
628		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
629		    cred, &bp)) != 0)
630			goto out;
631		fs = (struct fs *)bp->b_data;
632		sblockloc = sblock_try[i];
633		if ((fs->fs_magic == FS_UFS1_MAGIC ||
634		     (fs->fs_magic == FS_UFS2_MAGIC &&
635		      (fs->fs_sblockloc == sblockloc ||
636		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
637		    fs->fs_bsize <= MAXBSIZE &&
638		    fs->fs_bsize >= sizeof(struct fs))
639			break;
640		brelse(bp);
641		bp = NULL;
642	}
643	if (sblock_try[i] == -1) {
644		error = EINVAL;		/* XXX needs translation */
645		goto out;
646	}
647	fs->fs_fmod = 0;
648	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
649	fs->fs_flags &= ~FS_UNCLEAN;
650	if (fs->fs_clean == 0) {
651		fs->fs_flags |= FS_UNCLEAN;
652		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
653		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
654		     (fs->fs_flags & FS_DOSOFTDEP))) {
655			printf(
656"WARNING: %s was not properly dismounted\n",
657			    fs->fs_fsmnt);
658		} else {
659			printf(
660"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
661			    fs->fs_fsmnt);
662			error = EPERM;
663			goto out;
664		}
665		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
666		    (mp->mnt_flag & MNT_FORCE)) {
667			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
668			    (intmax_t)fs->fs_pendingblocks,
669			    fs->fs_pendinginodes);
670			fs->fs_pendingblocks = 0;
671			fs->fs_pendinginodes = 0;
672		}
673	}
674	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
675		printf("%s: mount pending error: blocks %jd files %d\n",
676		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
677		    fs->fs_pendinginodes);
678		fs->fs_pendingblocks = 0;
679		fs->fs_pendinginodes = 0;
680	}
681	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
682	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
683	    M_WAITOK);
684	if (fs->fs_magic == FS_UFS1_MAGIC) {
685		ump->um_fstype = UFS1;
686		ump->um_balloc = ffs_balloc_ufs1;
687	} else {
688		ump->um_fstype = UFS2;
689		ump->um_balloc = ffs_balloc_ufs2;
690	}
691	ump->um_blkatoff = ffs_blkatoff;
692	ump->um_truncate = ffs_truncate;
693	ump->um_update = ffs_update;
694	ump->um_valloc = ffs_valloc;
695	ump->um_vfree = ffs_vfree;
696	ump->um_ifree = ffs_ifree;
697	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
698	if (fs->fs_sbsize < SBLOCKSIZE)
699		bp->b_flags |= B_INVAL | B_NOCACHE;
700	brelse(bp);
701	bp = NULL;
702	fs = ump->um_fs;
703	ffs_oldfscompat_read(fs, ump, sblockloc);
704	fs->fs_ronly = ronly;
705	size = fs->fs_cssize;
706	blks = howmany(size, fs->fs_fsize);
707	if (fs->fs_contigsumsize > 0)
708		size += fs->fs_ncg * sizeof(int32_t);
709	size += fs->fs_ncg * sizeof(u_int8_t);
710	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
711	fs->fs_csp = space;
712	for (i = 0; i < blks; i += fs->fs_frag) {
713		size = fs->fs_bsize;
714		if (i + fs->fs_frag > blks)
715			size = (blks - i) * fs->fs_fsize;
716		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
717		    cred, &bp)) != 0) {
718			free(fs->fs_csp, M_UFSMNT);
719			goto out;
720		}
721		bcopy(bp->b_data, space, (u_int)size);
722		space = (char *)space + size;
723		brelse(bp);
724		bp = NULL;
725	}
726	if (fs->fs_contigsumsize > 0) {
727		fs->fs_maxcluster = lp = space;
728		for (i = 0; i < fs->fs_ncg; i++)
729			*lp++ = fs->fs_contigsumsize;
730		space = lp;
731	}
732	size = fs->fs_ncg * sizeof(u_int8_t);
733	fs->fs_contigdirs = (u_int8_t *)space;
734	bzero(fs->fs_contigdirs, size);
735	fs->fs_active = NULL;
736	mp->mnt_data = (qaddr_t)ump;
737	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
738	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
739	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
740	    vfs_getvfs(&mp->mnt_stat.f_fsid))
741		vfs_getnewfsid(mp);
742	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
743	mp->mnt_flag |= MNT_LOCAL;
744	if ((fs->fs_flags & FS_MULTILABEL) != 0)
745#ifdef MAC
746		mp->mnt_flag |= MNT_MULTILABEL;
747#else
748		printf(
749"WARNING: %s: multilabel flag on fs but no MAC support\n",
750		    fs->fs_fsmnt);
751#endif
752	if ((fs->fs_flags & FS_ACLS) != 0)
753#ifdef UFS_ACL
754		mp->mnt_flag |= MNT_ACLS;
755#else
756		printf(
757"WARNING: %s: ACLs flag on fs but no ACLs support\n",
758		    fs->fs_fsmnt);
759#endif
760	ump->um_mountp = mp;
761	ump->um_dev = dev;
762	ump->um_devvp = devvp;
763	ump->um_nindir = fs->fs_nindir;
764	ump->um_bptrtodb = fs->fs_fsbtodb;
765	ump->um_seqinc = fs->fs_frag;
766	for (i = 0; i < MAXQUOTAS; i++)
767		ump->um_quotas[i] = NULLVP;
768#ifdef UFS_EXTATTR
769	ufs_extattr_uepm_init(&ump->um_extattr);
770#endif
771	devvp->v_rdev->si_mountpoint = mp;
772
773	/*
774	 * Set FS local "last mounted on" information (NULL pad)
775	 */
776	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
777			fs->fs_fsmnt,			/* copy area*/
778			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
779			&strsize);			/* real size*/
780	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
781
782	if( mp->mnt_flag & MNT_ROOTFS) {
783		/*
784		 * Root mount; update timestamp in mount structure.
785		 * this will be used by the common root mount code
786		 * to update the system clock.
787		 */
788		mp->mnt_time = fs->fs_time;
789	}
790
791	if (ronly == 0) {
792		if ((fs->fs_flags & FS_DOSOFTDEP) &&
793		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
794			free(fs->fs_csp, M_UFSMNT);
795			goto out;
796		}
797		if (fs->fs_snapinum[0] != 0)
798			ffs_snapshot_mount(mp);
799		fs->fs_fmod = 1;
800		fs->fs_clean = 0;
801		(void) ffs_sbupdate(ump, MNT_WAIT);
802	}
803#ifdef UFS_EXTATTR
804#ifdef UFS_EXTATTR_AUTOSTART
805	/*
806	 *
807	 * Auto-starting does the following:
808	 *	- check for /.attribute in the fs, and extattr_start if so
809	 *	- for each file in .attribute, enable that file with
810	 * 	  an attribute of the same name.
811	 * Not clear how to report errors -- probably eat them.
812	 * This would all happen while the filesystem was busy/not
813	 * available, so would effectively be "atomic".
814	 */
815	(void) ufs_extattr_autostart(mp, td);
816#endif /* !UFS_EXTATTR_AUTOSTART */
817#endif /* !UFS_EXTATTR */
818	return (0);
819out:
820	devvp->v_rdev->si_mountpoint = NULL;
821	if (bp)
822		brelse(bp);
823	/* XXX: see comment above VOP_OPEN. */
824#ifdef notyet
825	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, cred, td);
826#else
827	(void)VOP_CLOSE(devvp, FREAD | FWRITE, cred, td);
828#endif
829	if (ump) {
830		free(ump->um_fs, M_UFSMNT);
831		free(ump, M_UFSMNT);
832		mp->mnt_data = (qaddr_t)0;
833	}
834	return (error);
835}
836
837#include <sys/sysctl.h>
838int bigcgs = 0;
839SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
840
841/*
842 * Sanity checks for loading old filesystem superblocks.
843 * See ffs_oldfscompat_write below for unwound actions.
844 *
845 * XXX - Parts get retired eventually.
846 * Unfortunately new bits get added.
847 */
848static void
849ffs_oldfscompat_read(fs, ump, sblockloc)
850	struct fs *fs;
851	struct ufsmount *ump;
852	ufs2_daddr_t sblockloc;
853{
854	off_t maxfilesize;
855
856	/*
857	 * If not yet done, update fs_flags location and value of fs_sblockloc.
858	 */
859	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
860		fs->fs_flags = fs->fs_old_flags;
861		fs->fs_old_flags |= FS_FLAGS_UPDATED;
862		fs->fs_sblockloc = sblockloc;
863	}
864	/*
865	 * If not yet done, update UFS1 superblock with new wider fields.
866	 */
867	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
868		fs->fs_maxbsize = fs->fs_bsize;
869		fs->fs_time = fs->fs_old_time;
870		fs->fs_size = fs->fs_old_size;
871		fs->fs_dsize = fs->fs_old_dsize;
872		fs->fs_csaddr = fs->fs_old_csaddr;
873		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
874		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
875		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
876		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
877	}
878	if (fs->fs_magic == FS_UFS1_MAGIC &&
879	    fs->fs_old_inodefmt < FS_44INODEFMT) {
880		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
881		fs->fs_qbmask = ~fs->fs_bmask;
882		fs->fs_qfmask = ~fs->fs_fmask;
883	}
884	if (fs->fs_magic == FS_UFS1_MAGIC) {
885		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
886		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
887		if (fs->fs_maxfilesize > maxfilesize)
888			fs->fs_maxfilesize = maxfilesize;
889	}
890	/* Compatibility for old filesystems */
891	if (fs->fs_avgfilesize <= 0)
892		fs->fs_avgfilesize = AVFILESIZ;
893	if (fs->fs_avgfpdir <= 0)
894		fs->fs_avgfpdir = AFPDIR;
895	if (bigcgs) {
896		fs->fs_save_cgsize = fs->fs_cgsize;
897		fs->fs_cgsize = fs->fs_bsize;
898	}
899}
900
901/*
902 * Unwinding superblock updates for old filesystems.
903 * See ffs_oldfscompat_read above for details.
904 *
905 * XXX - Parts get retired eventually.
906 * Unfortunately new bits get added.
907 */
908static void
909ffs_oldfscompat_write(fs, ump)
910	struct fs *fs;
911	struct ufsmount *ump;
912{
913
914	/*
915	 * Copy back UFS2 updated fields that UFS1 inspects.
916	 */
917	if (fs->fs_magic == FS_UFS1_MAGIC) {
918		fs->fs_old_time = fs->fs_time;
919		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
920		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
921		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
922		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
923		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
924	}
925	if (bigcgs) {
926		fs->fs_cgsize = fs->fs_save_cgsize;
927		fs->fs_save_cgsize = 0;
928	}
929}
930
931/*
932 * unmount system call
933 */
934int
935ffs_unmount(mp, mntflags, td)
936	struct mount *mp;
937	int mntflags;
938	struct thread *td;
939{
940	struct ufsmount *ump = VFSTOUFS(mp);
941	struct fs *fs;
942	int error, flags;
943
944	flags = 0;
945	if (mntflags & MNT_FORCE) {
946		flags |= FORCECLOSE;
947	}
948#ifdef UFS_EXTATTR
949	if ((error = ufs_extattr_stop(mp, td))) {
950		if (error != EOPNOTSUPP)
951			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
952			    error);
953	} else {
954		ufs_extattr_uepm_destroy(&ump->um_extattr);
955	}
956#endif
957	if (mp->mnt_flag & MNT_SOFTDEP) {
958		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
959			return (error);
960	} else {
961		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
962			return (error);
963	}
964	fs = ump->um_fs;
965	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
966		printf("%s: unmount pending error: blocks %jd files %d\n",
967		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
968		    fs->fs_pendinginodes);
969		fs->fs_pendingblocks = 0;
970		fs->fs_pendinginodes = 0;
971	}
972	if (fs->fs_ronly == 0) {
973		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
974		error = ffs_sbupdate(ump, MNT_WAIT);
975		if (error) {
976			fs->fs_clean = 0;
977			return (error);
978		}
979	}
980	ump->um_devvp->v_rdev->si_mountpoint = NULL;
981
982	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
983	/* XXX: see comment above VOP_OPEN. */
984#ifdef notyet
985	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
986	    NOCRED, td);
987#else
988	error = VOP_CLOSE(ump->um_devvp, FREAD | FWRITE, NOCRED, td);
989#endif
990	vrele(ump->um_devvp);
991	free(fs->fs_csp, M_UFSMNT);
992	free(fs, M_UFSMNT);
993	free(ump, M_UFSMNT);
994	mp->mnt_data = (qaddr_t)0;
995	mp->mnt_flag &= ~MNT_LOCAL;
996	return (error);
997}
998
999/*
1000 * Flush out all the files in a filesystem.
1001 */
1002int
1003ffs_flushfiles(mp, flags, td)
1004	struct mount *mp;
1005	int flags;
1006	struct thread *td;
1007{
1008	struct ufsmount *ump;
1009	int error;
1010
1011	ump = VFSTOUFS(mp);
1012#ifdef QUOTA
1013	if (mp->mnt_flag & MNT_QUOTA) {
1014		int i;
1015		error = vflush(mp, 0, SKIPSYSTEM|flags);
1016		if (error)
1017			return (error);
1018		for (i = 0; i < MAXQUOTAS; i++) {
1019			if (ump->um_quotas[i] == NULLVP)
1020				continue;
1021			quotaoff(td, mp, i);
1022		}
1023		/*
1024		 * Here we fall through to vflush again to ensure
1025		 * that we have gotten rid of all the system vnodes.
1026		 */
1027	}
1028#endif
1029	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1030	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1031		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1032			return (error);
1033		ffs_snapshot_unmount(mp);
1034		/*
1035		 * Here we fall through to vflush again to ensure
1036		 * that we have gotten rid of all the system vnodes.
1037		 */
1038	}
1039        /*
1040	 * Flush all the files.
1041	 */
1042	if ((error = vflush(mp, 0, flags)) != 0)
1043		return (error);
1044	/*
1045	 * Flush filesystem metadata.
1046	 */
1047	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1048	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1049	VOP_UNLOCK(ump->um_devvp, 0, td);
1050	return (error);
1051}
1052
1053/*
1054 * Get filesystem statistics.
1055 */
1056int
1057ffs_statfs(mp, sbp, td)
1058	struct mount *mp;
1059	struct statfs *sbp;
1060	struct thread *td;
1061{
1062	struct ufsmount *ump;
1063	struct fs *fs;
1064
1065	ump = VFSTOUFS(mp);
1066	fs = ump->um_fs;
1067	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1068		panic("ffs_statfs");
1069	sbp->f_version = STATFS_VERSION;
1070	sbp->f_bsize = fs->fs_fsize;
1071	sbp->f_iosize = fs->fs_bsize;
1072	sbp->f_blocks = fs->fs_dsize;
1073	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1074	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1075	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1076	    dbtofsb(fs, fs->fs_pendingblocks);
1077	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1078	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1079	sbp->f_namemax = NAME_MAX;
1080	if (sbp != &mp->mnt_stat) {
1081		sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1082		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1083		sbp->f_syncwrites = mp->mnt_stat.f_syncwrites;
1084		sbp->f_asyncwrites = mp->mnt_stat.f_asyncwrites;
1085		sbp->f_syncreads = mp->mnt_stat.f_syncreads;
1086		sbp->f_asyncreads = mp->mnt_stat.f_asyncreads;
1087		sbp->f_owner = mp->mnt_stat.f_owner;
1088		sbp->f_fsid = mp->mnt_stat.f_fsid;
1089		bcopy((caddr_t)mp->mnt_stat.f_fstypename,
1090			(caddr_t)&sbp->f_fstypename[0], MFSNAMELEN);
1091		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1092			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1093		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1094			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1095	}
1096	return (0);
1097}
1098
1099/*
1100 * Go through the disk queues to initiate sandbagged IO;
1101 * go through the inodes to write those that have been modified;
1102 * initiate the writing of the super block if it has been modified.
1103 *
1104 * Note: we are always called with the filesystem marked `MPBUSY'.
1105 */
1106int
1107ffs_sync(mp, waitfor, cred, td)
1108	struct mount *mp;
1109	int waitfor;
1110	struct ucred *cred;
1111	struct thread *td;
1112{
1113	struct vnode *nvp, *vp, *devvp;
1114	struct inode *ip;
1115	struct ufsmount *ump = VFSTOUFS(mp);
1116	struct fs *fs;
1117	int error, count, wait, lockreq, allerror = 0;
1118
1119	fs = ump->um_fs;
1120	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1121		printf("fs = %s\n", fs->fs_fsmnt);
1122		panic("ffs_sync: rofs mod");
1123	}
1124	/*
1125	 * Write back each (modified) inode.
1126	 */
1127	wait = 0;
1128	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1129	if (waitfor == MNT_WAIT) {
1130		wait = 1;
1131		lockreq = LK_EXCLUSIVE;
1132	}
1133	lockreq |= LK_INTERLOCK;
1134	MNT_ILOCK(mp);
1135loop:
1136	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1137		/*
1138		 * If the vnode that we are about to sync is no longer
1139		 * associated with this mount point, start over.
1140		 */
1141		if (vp->v_mount != mp)
1142			goto loop;
1143
1144		/*
1145		 * Depend on the mntvnode_slock to keep things stable enough
1146		 * for a quick test.  Since there might be hundreds of
1147		 * thousands of vnodes, we cannot afford even a subroutine
1148		 * call unless there's a good chance that we have work to do.
1149		 */
1150		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1151		VI_LOCK(vp);
1152		if (vp->v_iflag & VI_XLOCK) {
1153			VI_UNLOCK(vp);
1154			continue;
1155		}
1156		ip = VTOI(vp);
1157		if (vp->v_type == VNON || ((ip->i_flag &
1158		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1159		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1160			VI_UNLOCK(vp);
1161			continue;
1162		}
1163		MNT_IUNLOCK(mp);
1164		if ((error = vget(vp, lockreq, td)) != 0) {
1165			MNT_ILOCK(mp);
1166			if (error == ENOENT)
1167				goto loop;
1168			continue;
1169		}
1170		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1171			allerror = error;
1172		VOP_UNLOCK(vp, 0, td);
1173		vrele(vp);
1174		MNT_ILOCK(mp);
1175		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1176			goto loop;
1177	}
1178	MNT_IUNLOCK(mp);
1179	/*
1180	 * Force stale filesystem control information to be flushed.
1181	 */
1182	if (waitfor == MNT_WAIT) {
1183		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1184			allerror = error;
1185		/* Flushed work items may create new vnodes to clean */
1186		if (allerror == 0 && count) {
1187			MNT_ILOCK(mp);
1188			goto loop;
1189		}
1190	}
1191#ifdef QUOTA
1192	qsync(mp);
1193#endif
1194	devvp = ump->um_devvp;
1195	VI_LOCK(devvp);
1196	if (waitfor != MNT_LAZY &&
1197	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1198		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1199		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1200			allerror = error;
1201		VOP_UNLOCK(devvp, 0, td);
1202		if (allerror == 0 && waitfor == MNT_WAIT) {
1203			MNT_ILOCK(mp);
1204			goto loop;
1205		}
1206	} else
1207		VI_UNLOCK(devvp);
1208	/*
1209	 * Write back modified superblock.
1210	 */
1211	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1212		allerror = error;
1213	return (allerror);
1214}
1215
1216int
1217ffs_vget(mp, ino, flags, vpp)
1218	struct mount *mp;
1219	ino_t ino;
1220	int flags;
1221	struct vnode **vpp;
1222{
1223	struct thread *td = curthread; 		/* XXX */
1224	struct fs *fs;
1225	struct inode *ip;
1226	struct ufsmount *ump;
1227	struct buf *bp;
1228	struct vnode *vp;
1229	dev_t dev;
1230	int error;
1231
1232	ump = VFSTOUFS(mp);
1233	dev = ump->um_dev;
1234
1235	/*
1236	 * We do not lock vnode creation as it is believed to be too
1237	 * expensive for such rare case as simultaneous creation of vnode
1238	 * for same ino by different processes. We just allow them to race
1239	 * and check later to decide who wins. Let the race begin!
1240	 */
1241	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1242		return (error);
1243	if (*vpp != NULL)
1244		return (0);
1245
1246	/*
1247	 * If this MALLOC() is performed after the getnewvnode()
1248	 * it might block, leaving a vnode with a NULL v_data to be
1249	 * found by ffs_sync() if a sync happens to fire right then,
1250	 * which will cause a panic because ffs_sync() blindly
1251	 * dereferences vp->v_data (as well it should).
1252	 */
1253	ip = uma_zalloc(uma_inode, M_WAITOK);
1254
1255	/* Allocate a new vnode/inode. */
1256	error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp);
1257	if (error) {
1258		*vpp = NULL;
1259		uma_zfree(uma_inode, ip);
1260		return (error);
1261	}
1262	bzero((caddr_t)ip, sizeof(struct inode));
1263	/*
1264	 * FFS supports recursive locking.
1265	 */
1266	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1267	vp->v_data = ip;
1268	ip->i_vnode = vp;
1269	ip->i_ump = ump;
1270	ip->i_fs = fs = ump->um_fs;
1271	ip->i_dev = dev;
1272	ip->i_number = ino;
1273#ifdef QUOTA
1274	{
1275		int i;
1276		for (i = 0; i < MAXQUOTAS; i++)
1277			ip->i_dquot[i] = NODQUOT;
1278	}
1279#endif
1280	/*
1281	 * Exclusively lock the vnode before adding to hash. Note, that we
1282	 * must not release nor downgrade the lock (despite flags argument
1283	 * says) till it is fully initialized.
1284	 */
1285	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1286
1287	/*
1288	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1289	 * duplicate of vnode being created and add it to the hash. If a
1290	 * duplicate vnode was found, it will be vget()ed from hash for us.
1291	 */
1292	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1293		vput(vp);
1294		*vpp = NULL;
1295		return (error);
1296	}
1297
1298	/* We lost the race, then throw away our vnode and return existing */
1299	if (*vpp != NULL) {
1300		vput(vp);
1301		return (0);
1302	}
1303
1304	/* Read in the disk contents for the inode, copy into the inode. */
1305	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1306	    (int)fs->fs_bsize, NOCRED, &bp);
1307	if (error) {
1308		/*
1309		 * The inode does not contain anything useful, so it would
1310		 * be misleading to leave it on its hash chain. With mode
1311		 * still zero, it will be unlinked and returned to the free
1312		 * list by vput().
1313		 */
1314		brelse(bp);
1315		vput(vp);
1316		*vpp = NULL;
1317		return (error);
1318	}
1319	if (ip->i_ump->um_fstype == UFS1)
1320		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1321	else
1322		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1323	ffs_load_inode(bp, ip, fs, ino);
1324	if (DOINGSOFTDEP(vp))
1325		softdep_load_inodeblock(ip);
1326	else
1327		ip->i_effnlink = ip->i_nlink;
1328	bqrelse(bp);
1329
1330	/*
1331	 * Initialize the vnode from the inode, check for aliases.
1332	 * Note that the underlying vnode may have changed.
1333	 */
1334	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1335	if (error) {
1336		vput(vp);
1337		*vpp = NULL;
1338		return (error);
1339	}
1340	/*
1341	 * Finish inode initialization.
1342	 */
1343	VREF(ip->i_devvp);
1344	/*
1345	 * Set up a generation number for this inode if it does not
1346	 * already have one. This should only happen on old filesystems.
1347	 */
1348	if (ip->i_gen == 0) {
1349		ip->i_gen = arc4random() / 2 + 1;
1350		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1351			ip->i_flag |= IN_MODIFIED;
1352			DIP(ip, i_gen) = ip->i_gen;
1353		}
1354	}
1355	/*
1356	 * Ensure that uid and gid are correct. This is a temporary
1357	 * fix until fsck has been changed to do the update.
1358	 */
1359	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1360	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1361		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1362		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1363	}						/* XXX */
1364
1365#ifdef MAC
1366	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1367		/*
1368		 * If this vnode is already allocated, and we're running
1369		 * multi-label, attempt to perform a label association
1370		 * from the extended attributes on the inode.
1371		 */
1372		error = mac_associate_vnode_extattr(mp, vp);
1373		if (error) {
1374			/* ufs_inactive will release ip->i_devvp ref. */
1375			vput(vp);
1376			*vpp = NULL;
1377			return (error);
1378		}
1379	}
1380#endif
1381
1382	*vpp = vp;
1383	return (0);
1384}
1385
1386/*
1387 * File handle to vnode
1388 *
1389 * Have to be really careful about stale file handles:
1390 * - check that the inode number is valid
1391 * - call ffs_vget() to get the locked inode
1392 * - check for an unallocated inode (i_mode == 0)
1393 * - check that the given client host has export rights and return
1394 *   those rights via. exflagsp and credanonp
1395 */
1396int
1397ffs_fhtovp(mp, fhp, vpp)
1398	struct mount *mp;
1399	struct fid *fhp;
1400	struct vnode **vpp;
1401{
1402	struct ufid *ufhp;
1403	struct fs *fs;
1404
1405	ufhp = (struct ufid *)fhp;
1406	fs = VFSTOUFS(mp)->um_fs;
1407	if (ufhp->ufid_ino < ROOTINO ||
1408	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1409		return (ESTALE);
1410	return (ufs_fhtovp(mp, ufhp, vpp));
1411}
1412
1413/*
1414 * Vnode pointer to File handle
1415 */
1416/* ARGSUSED */
1417int
1418ffs_vptofh(vp, fhp)
1419	struct vnode *vp;
1420	struct fid *fhp;
1421{
1422	struct inode *ip;
1423	struct ufid *ufhp;
1424
1425	ip = VTOI(vp);
1426	ufhp = (struct ufid *)fhp;
1427	ufhp->ufid_len = sizeof(struct ufid);
1428	ufhp->ufid_ino = ip->i_number;
1429	ufhp->ufid_gen = ip->i_gen;
1430	return (0);
1431}
1432
1433/*
1434 * Initialize the filesystem.
1435 */
1436static int
1437ffs_init(vfsp)
1438	struct vfsconf *vfsp;
1439{
1440
1441	softdep_initialize();
1442	return (ufs_init(vfsp));
1443}
1444
1445/*
1446 * Undo the work of ffs_init().
1447 */
1448static int
1449ffs_uninit(vfsp)
1450	struct vfsconf *vfsp;
1451{
1452	int ret;
1453
1454	ret = ufs_uninit(vfsp);
1455	softdep_uninitialize();
1456	return (ret);
1457}
1458
1459/*
1460 * Write a superblock and associated information back to disk.
1461 */
1462static int
1463ffs_sbupdate(mp, waitfor)
1464	struct ufsmount *mp;
1465	int waitfor;
1466{
1467	struct fs *fs = mp->um_fs;
1468	struct buf *bp;
1469	int blks;
1470	void *space;
1471	int i, size, error, allerror = 0;
1472
1473	if (fs->fs_ronly == 1 &&
1474	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1475	    (MNT_RDONLY | MNT_UPDATE))
1476		panic("ffs_sbupdate: write read-only filesystem");
1477	/*
1478	 * First write back the summary information.
1479	 */
1480	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1481	space = fs->fs_csp;
1482	for (i = 0; i < blks; i += fs->fs_frag) {
1483		size = fs->fs_bsize;
1484		if (i + fs->fs_frag > blks)
1485			size = (blks - i) * fs->fs_fsize;
1486		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1487		    size, 0, 0, 0);
1488		bcopy(space, bp->b_data, (u_int)size);
1489		space = (char *)space + size;
1490		if (waitfor != MNT_WAIT)
1491			bawrite(bp);
1492		else if ((error = bwrite(bp)) != 0)
1493			allerror = error;
1494	}
1495	/*
1496	 * Now write back the superblock itself. If any errors occurred
1497	 * up to this point, then fail so that the superblock avoids
1498	 * being written out as clean.
1499	 */
1500	if (allerror)
1501		return (allerror);
1502	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1503	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1504		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1505		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1506		fs->fs_sblockloc = SBLOCK_UFS1;
1507	}
1508	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1509	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1510		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1511		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1512		fs->fs_sblockloc = SBLOCK_UFS2;
1513	}
1514	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1515	    0, 0, 0);
1516	fs->fs_fmod = 0;
1517	fs->fs_time = time_second;
1518	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1519	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1520	if (waitfor != MNT_WAIT)
1521		bawrite(bp);
1522	else if ((error = bwrite(bp)) != 0)
1523		allerror = error;
1524	return (allerror);
1525}
1526
1527static int
1528ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1529	int attrnamespace, const char *attrname, struct thread *td)
1530{
1531
1532#ifdef UFS_EXTATTR
1533	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1534	    attrname, td));
1535#else
1536	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1537	    attrname, td));
1538#endif
1539}
1540
1541static void
1542ffs_ifree(struct ufsmount *ump, struct inode *ip)
1543{
1544
1545	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1546		uma_zfree(uma_ufs1, ip->i_din1);
1547	else if (ip->i_din2 != NULL)
1548		uma_zfree(uma_ufs2, ip->i_din2);
1549	uma_zfree(uma_inode, ip);
1550}
1551