ffs_vfsops.c revision 120825
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 120825 2003-10-05 22:56:33Z jeff $");
38
39#include "opt_mac.h"
40#include "opt_quota.h"
41#include "opt_ufs.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/kernel.h>
48#include <sys/mac.h>
49#include <sys/vnode.h>
50#include <sys/mount.h>
51#include <sys/bio.h>
52#include <sys/buf.h>
53#include <sys/conf.h>
54#include <sys/fcntl.h>
55#include <sys/disk.h>
56#include <sys/malloc.h>
57#include <sys/mutex.h>
58
59#include <ufs/ufs/extattr.h>
60#include <ufs/ufs/quota.h>
61#include <ufs/ufs/ufsmount.h>
62#include <ufs/ufs/inode.h>
63#include <ufs/ufs/ufs_extern.h>
64
65#include <ufs/ffs/fs.h>
66#include <ufs/ffs/ffs_extern.h>
67
68#include <vm/vm.h>
69#include <vm/uma.h>
70#include <vm/vm_page.h>
71
72uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
73
74static int	ffs_sbupdate(struct ufsmount *, int);
75       int	ffs_reload(struct mount *,struct ucred *,struct thread *);
76static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
77static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
78		    ufs2_daddr_t);
79static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
80static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
81static vfs_init_t ffs_init;
82static vfs_uninit_t ffs_uninit;
83static vfs_extattrctl_t ffs_extattrctl;
84
85static struct vfsops ufs_vfsops = {
86	.vfs_extattrctl =	ffs_extattrctl,
87	.vfs_fhtovp =		ffs_fhtovp,
88	.vfs_init =		ffs_init,
89	.vfs_mount =		ffs_mount,
90	.vfs_quotactl =		ufs_quotactl,
91	.vfs_root =		ufs_root,
92	.vfs_start =		ufs_start,
93	.vfs_statfs =		ffs_statfs,
94	.vfs_sync =		ffs_sync,
95	.vfs_uninit =		ffs_uninit,
96	.vfs_unmount =		ffs_unmount,
97	.vfs_vget =		ffs_vget,
98	.vfs_vptofh =		ffs_vptofh,
99};
100
101VFS_SET(ufs_vfsops, ufs, 0);
102
103/*
104 * ffs_mount
105 *
106 * Called when mounting local physical media
107 *
108 * PARAMETERS:
109 *		mountroot
110 *			mp	mount point structure
111 *			path	NULL (flag for root mount!!!)
112 *			data	<unused>
113 *			ndp	<unused>
114 *			p	process (user credentials check [statfs])
115 *
116 *		mount
117 *			mp	mount point structure
118 *			path	path to mount point
119 *			data	pointer to argument struct in user space
120 *			ndp	mount point namei() return (used for
121 *				credentials on reload), reused to look
122 *				up block device.
123 *			p	process (user credentials check)
124 *
125 * RETURNS:	0	Success
126 *		!0	error number (errno.h)
127 *
128 * LOCK STATE:
129 *
130 *		ENTRY
131 *			mount point is locked
132 *		EXIT
133 *			mount point is locked
134 *
135 * NOTES:
136 *		A NULL path can be used for a flag since the mount
137 *		system call will fail with EFAULT in copyinstr in
138 *		namei() if it is a genuine NULL from the user.
139 */
140int
141ffs_mount(mp, path, data, ndp, td)
142        struct mount		*mp;	/* mount struct pointer*/
143        char			*path;	/* path to mount point*/
144        caddr_t			data;	/* arguments to FS specific mount*/
145        struct nameidata	*ndp;	/* mount point credentials*/
146        struct thread		*td;	/* process requesting mount*/
147{
148	size_t size;
149	struct vnode *devvp;
150	struct ufs_args args;
151	struct ufsmount *ump = 0;
152	struct fs *fs;
153	int error, flags;
154	mode_t accessmode;
155
156	if (uma_inode == NULL) {
157		uma_inode = uma_zcreate("FFS inode",
158		    sizeof(struct inode), NULL, NULL, NULL, NULL,
159		    UMA_ALIGN_PTR, 0);
160		uma_ufs1 = uma_zcreate("FFS1 dinode",
161		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
162		    UMA_ALIGN_PTR, 0);
163		uma_ufs2 = uma_zcreate("FFS2 dinode",
164		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
165		    UMA_ALIGN_PTR, 0);
166	}
167	/*
168	 * Use NULL path to indicate we are mounting the root filesystem.
169	 */
170	if (path == NULL) {
171		if ((error = bdevvp(rootdev, &rootvp))) {
172			printf("ffs_mountroot: can't find rootvp\n");
173			return (error);
174		}
175
176		if ((error = ffs_mountfs(rootvp, mp, td)) != 0)
177			return (error);
178		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
179		return (0);
180	}
181
182	/*
183	 * Mounting non-root filesystem or updating a filesystem
184	 */
185	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
186		return (error);
187
188	/*
189	 * If updating, check whether changing from read-only to
190	 * read/write; if there is no device name, that's all we do.
191	 */
192	if (mp->mnt_flag & MNT_UPDATE) {
193		ump = VFSTOUFS(mp);
194		fs = ump->um_fs;
195		devvp = ump->um_devvp;
196		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
197			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
198				return (error);
199			/*
200			 * Flush any dirty data.
201			 */
202			if ((error = VFS_SYNC(mp, MNT_WAIT,
203			    td->td_ucred, td)) != 0) {
204				vn_finished_write(mp);
205				return (error);
206			}
207			/*
208			 * Check for and optionally get rid of files open
209			 * for writing.
210			 */
211			flags = WRITECLOSE;
212			if (mp->mnt_flag & MNT_FORCE)
213				flags |= FORCECLOSE;
214			if (mp->mnt_flag & MNT_SOFTDEP) {
215				error = softdep_flushfiles(mp, flags, td);
216			} else {
217				error = ffs_flushfiles(mp, flags, td);
218			}
219			if (error) {
220				vn_finished_write(mp);
221				return (error);
222			}
223			if (fs->fs_pendingblocks != 0 ||
224			    fs->fs_pendinginodes != 0) {
225				printf("%s: %s: blocks %jd files %d\n",
226				    fs->fs_fsmnt, "update error",
227				    (intmax_t)fs->fs_pendingblocks,
228				    fs->fs_pendinginodes);
229				fs->fs_pendingblocks = 0;
230				fs->fs_pendinginodes = 0;
231			}
232			fs->fs_ronly = 1;
233			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
234				fs->fs_clean = 1;
235			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
236				fs->fs_ronly = 0;
237				fs->fs_clean = 0;
238				vn_finished_write(mp);
239				return (error);
240			}
241			vn_finished_write(mp);
242		}
243		if ((mp->mnt_flag & MNT_RELOAD) &&
244		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
245			return (error);
246		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
247			/*
248			 * If upgrade to read-write by non-root, then verify
249			 * that user has necessary permissions on the device.
250			 */
251			if (suser(td)) {
252				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
253				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
254				    td->td_ucred, td)) != 0) {
255					VOP_UNLOCK(devvp, 0, td);
256					return (error);
257				}
258				VOP_UNLOCK(devvp, 0, td);
259			}
260			fs->fs_flags &= ~FS_UNCLEAN;
261			if (fs->fs_clean == 0) {
262				fs->fs_flags |= FS_UNCLEAN;
263				if ((mp->mnt_flag & MNT_FORCE) ||
264				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
265				     (fs->fs_flags & FS_DOSOFTDEP))) {
266					printf("WARNING: %s was not %s\n",
267					   fs->fs_fsmnt, "properly dismounted");
268				} else {
269					printf(
270"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
271					    fs->fs_fsmnt);
272					return (EPERM);
273				}
274			}
275			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
276				return (error);
277			fs->fs_ronly = 0;
278			fs->fs_clean = 0;
279			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
280				vn_finished_write(mp);
281				return (error);
282			}
283			/* check to see if we need to start softdep */
284			if ((fs->fs_flags & FS_DOSOFTDEP) &&
285			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
286				vn_finished_write(mp);
287				return (error);
288			}
289			if (fs->fs_snapinum[0] != 0)
290				ffs_snapshot_mount(mp);
291			vn_finished_write(mp);
292		}
293		/*
294		 * Soft updates is incompatible with "async",
295		 * so if we are doing softupdates stop the user
296		 * from setting the async flag in an update.
297		 * Softdep_mount() clears it in an initial mount
298		 * or ro->rw remount.
299		 */
300		if (mp->mnt_flag & MNT_SOFTDEP)
301			mp->mnt_flag &= ~MNT_ASYNC;
302		/*
303		 * If not updating name, process export requests.
304		 */
305		if (args.fspec == 0)
306			return (vfs_export(mp, &args.export));
307		/*
308		 * If this is a snapshot request, take the snapshot.
309		 */
310		if (mp->mnt_flag & MNT_SNAPSHOT)
311			return (ffs_snapshot(mp, args.fspec));
312	}
313
314	/*
315	 * Not an update, or updating the name: look up the name
316	 * and verify that it refers to a sensible block device.
317	 */
318	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
319	if ((error = namei(ndp)) != 0)
320		return (error);
321	NDFREE(ndp, NDF_ONLY_PNBUF);
322	devvp = ndp->ni_vp;
323	if (!vn_isdisk(devvp, &error)) {
324		vrele(devvp);
325		return (error);
326	}
327
328	/*
329	 * If mount by non-root, then verify that user has necessary
330	 * permissions on the device.
331	 */
332	if (suser(td)) {
333		accessmode = VREAD;
334		if ((mp->mnt_flag & MNT_RDONLY) == 0)
335			accessmode |= VWRITE;
336		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
337		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
338			vput(devvp);
339			return (error);
340		}
341		VOP_UNLOCK(devvp, 0, td);
342	}
343
344	if (mp->mnt_flag & MNT_UPDATE) {
345		/*
346		 * Update only
347		 *
348		 * If it's not the same vnode, or at least the same device
349		 * then it's not correct.
350		 */
351
352		if (devvp != ump->um_devvp &&
353		    devvp->v_rdev != ump->um_devvp->v_rdev)
354			error = EINVAL;	/* needs translation */
355		vrele(devvp);
356		if (error)
357			return (error);
358	} else {
359		/*
360		 * New mount
361		 *
362		 * We need the name for the mount point (also used for
363		 * "last mounted on") copied in. If an error occurs,
364		 * the mount point is discarded by the upper level code.
365		 * Note that vfs_mount() populates f_mntonname for us.
366		 */
367		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
368			vrele(devvp);
369			return (error);
370		}
371	}
372	/*
373	 * Save "mounted from" device name info for mount point (NULL pad).
374	 */
375	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
376	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
377	/*
378	 * Initialize filesystem stat information in mount struct.
379	 */
380	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
381	return (0);
382}
383
384/*
385 * Reload all incore data for a filesystem (used after running fsck on
386 * the root filesystem and finding things to fix). The filesystem must
387 * be mounted read-only.
388 *
389 * Things to do to update the mount:
390 *	1) invalidate all cached meta-data.
391 *	2) re-read superblock from disk.
392 *	3) re-read summary information from disk.
393 *	4) invalidate all inactive vnodes.
394 *	5) invalidate all cached file data.
395 *	6) re-read inode data for all active vnodes.
396 */
397int
398ffs_reload(mp, cred, td)
399	struct mount *mp;
400	struct ucred *cred;
401	struct thread *td;
402{
403	struct vnode *vp, *nvp, *devvp;
404	struct inode *ip;
405	void *space;
406	struct buf *bp;
407	struct fs *fs, *newfs;
408	ufs2_daddr_t sblockloc;
409	int i, blks, size, error;
410	int32_t *lp;
411
412	if ((mp->mnt_flag & MNT_RDONLY) == 0)
413		return (EINVAL);
414	/*
415	 * Step 1: invalidate all cached meta-data.
416	 */
417	devvp = VFSTOUFS(mp)->um_devvp;
418	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
419	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
420	VOP_UNLOCK(devvp, 0, td);
421	if (error)
422		panic("ffs_reload: dirty1");
423
424	/*
425	 * Only VMIO the backing device if the backing device is a real
426	 * block device.
427	 */
428	if (vn_isdisk(devvp, NULL)) {
429		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
430		vfs_object_create(devvp, td, td->td_ucred);
431		VOP_UNLOCK(devvp, 0, td);
432	}
433
434	/*
435	 * Step 2: re-read superblock from disk.
436	 */
437	fs = VFSTOUFS(mp)->um_fs;
438	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
439	    NOCRED, &bp)) != 0)
440		return (error);
441	newfs = (struct fs *)bp->b_data;
442	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
443	     newfs->fs_magic != FS_UFS2_MAGIC) ||
444	    newfs->fs_bsize > MAXBSIZE ||
445	    newfs->fs_bsize < sizeof(struct fs)) {
446			brelse(bp);
447			return (EIO);		/* XXX needs translation */
448	}
449	/*
450	 * Copy pointer fields back into superblock before copying in	XXX
451	 * new superblock. These should really be in the ufsmount.	XXX
452	 * Note that important parameters (eg fs_ncg) are unchanged.
453	 */
454	newfs->fs_csp = fs->fs_csp;
455	newfs->fs_maxcluster = fs->fs_maxcluster;
456	newfs->fs_contigdirs = fs->fs_contigdirs;
457	newfs->fs_active = fs->fs_active;
458	sblockloc = fs->fs_sblockloc;
459	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
460	brelse(bp);
461	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
462	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
463	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
464		printf("%s: reload pending error: blocks %jd files %d\n",
465		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
466		    fs->fs_pendinginodes);
467		fs->fs_pendingblocks = 0;
468		fs->fs_pendinginodes = 0;
469	}
470
471	/*
472	 * Step 3: re-read summary information from disk.
473	 */
474	blks = howmany(fs->fs_cssize, fs->fs_fsize);
475	space = fs->fs_csp;
476	for (i = 0; i < blks; i += fs->fs_frag) {
477		size = fs->fs_bsize;
478		if (i + fs->fs_frag > blks)
479			size = (blks - i) * fs->fs_fsize;
480		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
481		    NOCRED, &bp);
482		if (error)
483			return (error);
484		bcopy(bp->b_data, space, (u_int)size);
485		space = (char *)space + size;
486		brelse(bp);
487	}
488	/*
489	 * We no longer know anything about clusters per cylinder group.
490	 */
491	if (fs->fs_contigsumsize > 0) {
492		lp = fs->fs_maxcluster;
493		for (i = 0; i < fs->fs_ncg; i++)
494			*lp++ = fs->fs_contigsumsize;
495	}
496
497loop:
498	mtx_lock(&mntvnode_mtx);
499	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
500		if (vp->v_mount != mp) {
501			mtx_unlock(&mntvnode_mtx);
502			goto loop;
503		}
504		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
505		VI_LOCK(vp);
506		if (vp->v_iflag & VI_XLOCK) {
507			VI_UNLOCK(vp);
508			continue;
509		}
510		mtx_unlock(&mntvnode_mtx);
511		/*
512		 * Step 4: invalidate all inactive vnodes.
513		 */
514		if (vp->v_usecount == 0) {
515			vgonel(vp, td);
516			goto loop;
517		}
518		/*
519		 * Step 5: invalidate all cached file data.
520		 */
521		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
522			goto loop;
523		}
524		if (vinvalbuf(vp, 0, cred, td, 0, 0))
525			panic("ffs_reload: dirty2");
526		/*
527		 * Step 6: re-read inode data for all active vnodes.
528		 */
529		ip = VTOI(vp);
530		error =
531		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
532		    (int)fs->fs_bsize, NOCRED, &bp);
533		if (error) {
534			vput(vp);
535			return (error);
536		}
537		ffs_load_inode(bp, ip, fs, ip->i_number);
538		ip->i_effnlink = ip->i_nlink;
539		brelse(bp);
540		vput(vp);
541		mtx_lock(&mntvnode_mtx);
542	}
543	mtx_unlock(&mntvnode_mtx);
544	return (0);
545}
546
547/*
548 * Possible superblock locations ordered from most to least likely.
549 */
550static int sblock_try[] = SBLOCKSEARCH;
551
552/*
553 * Common code for mount and mountroot
554 */
555static int
556ffs_mountfs(devvp, mp, td)
557	struct vnode *devvp;
558	struct mount *mp;
559	struct thread *td;
560{
561	struct ufsmount *ump;
562	struct buf *bp;
563	struct fs *fs;
564	dev_t dev;
565	void *space;
566	ufs2_daddr_t sblockloc;
567	int error, i, blks, size, ronly;
568	int32_t *lp;
569	struct ucred *cred;
570	size_t strsize;
571	int ncount;
572
573	dev = devvp->v_rdev;
574	cred = td ? td->td_ucred : NOCRED;
575	/*
576	 * Disallow multiple mounts of the same device.
577	 * Disallow mounting of a device that is currently in use
578	 * (except for root, which might share swap device for miniroot).
579	 * Flush out any old buffers remaining from a previous use.
580	 */
581	error = vfs_mountedon(devvp);
582	if (error)
583		return (error);
584	ncount = vcount(devvp);
585
586	if (ncount > 1 && devvp != rootvp)
587		return (EBUSY);
588	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
589	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
590	VOP_UNLOCK(devvp, 0, td);
591	if (error)
592		return (error);
593
594	/*
595	 * Only VMIO the backing device if the backing device is a real
596	 * block device.
597	 * Note that it is optional that the backing device be VMIOed.  This
598	 * increases the opportunity for metadata caching.
599	 */
600	if (vn_isdisk(devvp, NULL)) {
601		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
602		vfs_object_create(devvp, td, cred);
603		VOP_UNLOCK(devvp, 0, td);
604	}
605
606	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
607	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
608	/*
609	 * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem
610	 * XXX: is subsequently remounted, so open it FREAD|FWRITE from the
611	 * XXX: start to avoid getting trashed later on.
612	 */
613#ifdef notyet
614	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td, -1);
615#else
616	error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td, -1);
617#endif
618	VOP_UNLOCK(devvp, 0, td);
619	if (error)
620		return (error);
621	if (devvp->v_rdev->si_iosize_max != 0)
622		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
623	if (mp->mnt_iosize_max > MAXPHYS)
624		mp->mnt_iosize_max = MAXPHYS;
625
626	bp = NULL;
627	ump = NULL;
628	fs = NULL;
629	sblockloc = 0;
630	/*
631	 * Try reading the superblock in each of its possible locations.
632	 */
633	for (i = 0; sblock_try[i] != -1; i++) {
634		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
635		    cred, &bp)) != 0)
636			goto out;
637		fs = (struct fs *)bp->b_data;
638		sblockloc = sblock_try[i];
639		if ((fs->fs_magic == FS_UFS1_MAGIC ||
640		     (fs->fs_magic == FS_UFS2_MAGIC &&
641		      (fs->fs_sblockloc == sblockloc ||
642		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
643		    fs->fs_bsize <= MAXBSIZE &&
644		    fs->fs_bsize >= sizeof(struct fs))
645			break;
646		brelse(bp);
647		bp = NULL;
648	}
649	if (sblock_try[i] == -1) {
650		error = EINVAL;		/* XXX needs translation */
651		goto out;
652	}
653	fs->fs_fmod = 0;
654	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
655	fs->fs_flags &= ~FS_UNCLEAN;
656	if (fs->fs_clean == 0) {
657		fs->fs_flags |= FS_UNCLEAN;
658		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
659		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
660		     (fs->fs_flags & FS_DOSOFTDEP))) {
661			printf(
662"WARNING: %s was not properly dismounted\n",
663			    fs->fs_fsmnt);
664		} else {
665			printf(
666"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
667			    fs->fs_fsmnt);
668			error = EPERM;
669			goto out;
670		}
671		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
672		    (mp->mnt_flag & MNT_FORCE)) {
673			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
674			    (intmax_t)fs->fs_pendingblocks,
675			    fs->fs_pendinginodes);
676			fs->fs_pendingblocks = 0;
677			fs->fs_pendinginodes = 0;
678		}
679	}
680	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
681		printf("%s: mount pending error: blocks %jd files %d\n",
682		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
683		    fs->fs_pendinginodes);
684		fs->fs_pendingblocks = 0;
685		fs->fs_pendinginodes = 0;
686	}
687	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
688	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
689	    M_WAITOK);
690	if (fs->fs_magic == FS_UFS1_MAGIC) {
691		ump->um_fstype = UFS1;
692		ump->um_balloc = ffs_balloc_ufs1;
693	} else {
694		ump->um_fstype = UFS2;
695		ump->um_balloc = ffs_balloc_ufs2;
696	}
697	ump->um_blkatoff = ffs_blkatoff;
698	ump->um_truncate = ffs_truncate;
699	ump->um_update = ffs_update;
700	ump->um_valloc = ffs_valloc;
701	ump->um_vfree = ffs_vfree;
702	ump->um_ifree = ffs_ifree;
703	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
704	if (fs->fs_sbsize < SBLOCKSIZE)
705		bp->b_flags |= B_INVAL | B_NOCACHE;
706	brelse(bp);
707	bp = NULL;
708	fs = ump->um_fs;
709	ffs_oldfscompat_read(fs, ump, sblockloc);
710	fs->fs_ronly = ronly;
711	size = fs->fs_cssize;
712	blks = howmany(size, fs->fs_fsize);
713	if (fs->fs_contigsumsize > 0)
714		size += fs->fs_ncg * sizeof(int32_t);
715	size += fs->fs_ncg * sizeof(u_int8_t);
716	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
717	fs->fs_csp = space;
718	for (i = 0; i < blks; i += fs->fs_frag) {
719		size = fs->fs_bsize;
720		if (i + fs->fs_frag > blks)
721			size = (blks - i) * fs->fs_fsize;
722		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
723		    cred, &bp)) != 0) {
724			free(fs->fs_csp, M_UFSMNT);
725			goto out;
726		}
727		bcopy(bp->b_data, space, (u_int)size);
728		space = (char *)space + size;
729		brelse(bp);
730		bp = NULL;
731	}
732	if (fs->fs_contigsumsize > 0) {
733		fs->fs_maxcluster = lp = space;
734		for (i = 0; i < fs->fs_ncg; i++)
735			*lp++ = fs->fs_contigsumsize;
736		space = lp;
737	}
738	size = fs->fs_ncg * sizeof(u_int8_t);
739	fs->fs_contigdirs = (u_int8_t *)space;
740	bzero(fs->fs_contigdirs, size);
741	fs->fs_active = NULL;
742	mp->mnt_data = (qaddr_t)ump;
743	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
744	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
745	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
746	    vfs_getvfs(&mp->mnt_stat.f_fsid))
747		vfs_getnewfsid(mp);
748	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
749	mp->mnt_flag |= MNT_LOCAL;
750	if ((fs->fs_flags & FS_MULTILABEL) != 0)
751#ifdef MAC
752		mp->mnt_flag |= MNT_MULTILABEL;
753#else
754		printf(
755"WARNING: %s: multilabel flag on fs but no MAC support\n",
756		    fs->fs_fsmnt);
757#endif
758	if ((fs->fs_flags & FS_ACLS) != 0)
759#ifdef UFS_ACL
760		mp->mnt_flag |= MNT_ACLS;
761#else
762		printf(
763"WARNING: %s: ACLs flag on fs but no ACLs support\n",
764		    fs->fs_fsmnt);
765#endif
766	ump->um_mountp = mp;
767	ump->um_dev = dev;
768	ump->um_devvp = devvp;
769	ump->um_nindir = fs->fs_nindir;
770	ump->um_bptrtodb = fs->fs_fsbtodb;
771	ump->um_seqinc = fs->fs_frag;
772	for (i = 0; i < MAXQUOTAS; i++)
773		ump->um_quotas[i] = NULLVP;
774#ifdef UFS_EXTATTR
775	ufs_extattr_uepm_init(&ump->um_extattr);
776#endif
777	devvp->v_rdev->si_mountpoint = mp;
778
779	/*
780	 * Set FS local "last mounted on" information (NULL pad)
781	 */
782	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
783			fs->fs_fsmnt,			/* copy area*/
784			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
785			&strsize);			/* real size*/
786	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
787
788	if( mp->mnt_flag & MNT_ROOTFS) {
789		/*
790		 * Root mount; update timestamp in mount structure.
791		 * this will be used by the common root mount code
792		 * to update the system clock.
793		 */
794		mp->mnt_time = fs->fs_time;
795	}
796
797	if (ronly == 0) {
798		if ((fs->fs_flags & FS_DOSOFTDEP) &&
799		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
800			free(fs->fs_csp, M_UFSMNT);
801			goto out;
802		}
803		if (fs->fs_snapinum[0] != 0)
804			ffs_snapshot_mount(mp);
805		fs->fs_fmod = 1;
806		fs->fs_clean = 0;
807		(void) ffs_sbupdate(ump, MNT_WAIT);
808	}
809#ifdef UFS_EXTATTR
810#ifdef UFS_EXTATTR_AUTOSTART
811	/*
812	 *
813	 * Auto-starting does the following:
814	 *	- check for /.attribute in the fs, and extattr_start if so
815	 *	- for each file in .attribute, enable that file with
816	 * 	  an attribute of the same name.
817	 * Not clear how to report errors -- probably eat them.
818	 * This would all happen while the filesystem was busy/not
819	 * available, so would effectively be "atomic".
820	 */
821	(void) ufs_extattr_autostart(mp, td);
822#endif /* !UFS_EXTATTR_AUTOSTART */
823#endif /* !UFS_EXTATTR */
824	return (0);
825out:
826	devvp->v_rdev->si_mountpoint = NULL;
827	if (bp)
828		brelse(bp);
829	/* XXX: see comment above VOP_OPEN */
830#ifdef notyet
831	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
832#else
833	(void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td);
834#endif
835	if (ump) {
836		free(ump->um_fs, M_UFSMNT);
837		free(ump, M_UFSMNT);
838		mp->mnt_data = (qaddr_t)0;
839	}
840	return (error);
841}
842
843#include <sys/sysctl.h>
844int bigcgs = 0;
845SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
846
847/*
848 * Sanity checks for loading old filesystem superblocks.
849 * See ffs_oldfscompat_write below for unwound actions.
850 *
851 * XXX - Parts get retired eventually.
852 * Unfortunately new bits get added.
853 */
854static void
855ffs_oldfscompat_read(fs, ump, sblockloc)
856	struct fs *fs;
857	struct ufsmount *ump;
858	ufs2_daddr_t sblockloc;
859{
860	off_t maxfilesize;
861
862	/*
863	 * If not yet done, update fs_flags location and value of fs_sblockloc.
864	 */
865	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
866		fs->fs_flags = fs->fs_old_flags;
867		fs->fs_old_flags |= FS_FLAGS_UPDATED;
868		fs->fs_sblockloc = sblockloc;
869	}
870	/*
871	 * If not yet done, update UFS1 superblock with new wider fields.
872	 */
873	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
874		fs->fs_maxbsize = fs->fs_bsize;
875		fs->fs_time = fs->fs_old_time;
876		fs->fs_size = fs->fs_old_size;
877		fs->fs_dsize = fs->fs_old_dsize;
878		fs->fs_csaddr = fs->fs_old_csaddr;
879		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
880		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
881		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
882		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
883	}
884	if (fs->fs_magic == FS_UFS1_MAGIC &&
885	    fs->fs_old_inodefmt < FS_44INODEFMT) {
886		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
887		fs->fs_qbmask = ~fs->fs_bmask;
888		fs->fs_qfmask = ~fs->fs_fmask;
889	}
890	if (fs->fs_magic == FS_UFS1_MAGIC) {
891		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
892		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
893		if (fs->fs_maxfilesize > maxfilesize)
894			fs->fs_maxfilesize = maxfilesize;
895	}
896	/* Compatibility for old filesystems */
897	if (fs->fs_avgfilesize <= 0)
898		fs->fs_avgfilesize = AVFILESIZ;
899	if (fs->fs_avgfpdir <= 0)
900		fs->fs_avgfpdir = AFPDIR;
901	if (bigcgs) {
902		fs->fs_save_cgsize = fs->fs_cgsize;
903		fs->fs_cgsize = fs->fs_bsize;
904	}
905}
906
907/*
908 * Unwinding superblock updates for old filesystems.
909 * See ffs_oldfscompat_read above for details.
910 *
911 * XXX - Parts get retired eventually.
912 * Unfortunately new bits get added.
913 */
914static void
915ffs_oldfscompat_write(fs, ump)
916	struct fs *fs;
917	struct ufsmount *ump;
918{
919
920	/*
921	 * Copy back UFS2 updated fields that UFS1 inspects.
922	 */
923	if (fs->fs_magic == FS_UFS1_MAGIC) {
924		fs->fs_old_time = fs->fs_time;
925		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
926		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
927		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
928		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
929		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
930	}
931	if (bigcgs) {
932		fs->fs_cgsize = fs->fs_save_cgsize;
933		fs->fs_save_cgsize = 0;
934	}
935}
936
937/*
938 * unmount system call
939 */
940int
941ffs_unmount(mp, mntflags, td)
942	struct mount *mp;
943	int mntflags;
944	struct thread *td;
945{
946	struct ufsmount *ump = VFSTOUFS(mp);
947	struct fs *fs;
948	int error, flags;
949
950	flags = 0;
951	if (mntflags & MNT_FORCE) {
952		flags |= FORCECLOSE;
953	}
954#ifdef UFS_EXTATTR
955	if ((error = ufs_extattr_stop(mp, td))) {
956		if (error != EOPNOTSUPP)
957			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
958			    error);
959	} else {
960		ufs_extattr_uepm_destroy(&ump->um_extattr);
961	}
962#endif
963	if (mp->mnt_flag & MNT_SOFTDEP) {
964		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
965			return (error);
966	} else {
967		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
968			return (error);
969	}
970	fs = ump->um_fs;
971	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
972		printf("%s: unmount pending error: blocks %jd files %d\n",
973		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
974		    fs->fs_pendinginodes);
975		fs->fs_pendingblocks = 0;
976		fs->fs_pendinginodes = 0;
977	}
978	if (fs->fs_ronly == 0) {
979		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
980		error = ffs_sbupdate(ump, MNT_WAIT);
981		if (error) {
982			fs->fs_clean = 0;
983			return (error);
984		}
985	}
986	ump->um_devvp->v_rdev->si_mountpoint = NULL;
987
988	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
989	/* XXX: see comment above VOP_OPEN */
990#ifdef notyet
991	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
992		NOCRED, td);
993#else
994	error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td);
995#endif
996
997	vrele(ump->um_devvp);
998
999	free(fs->fs_csp, M_UFSMNT);
1000	free(fs, M_UFSMNT);
1001	free(ump, M_UFSMNT);
1002	mp->mnt_data = (qaddr_t)0;
1003	mp->mnt_flag &= ~MNT_LOCAL;
1004	return (error);
1005}
1006
1007/*
1008 * Flush out all the files in a filesystem.
1009 */
1010int
1011ffs_flushfiles(mp, flags, td)
1012	struct mount *mp;
1013	int flags;
1014	struct thread *td;
1015{
1016	struct ufsmount *ump;
1017	int error;
1018
1019	ump = VFSTOUFS(mp);
1020#ifdef QUOTA
1021	if (mp->mnt_flag & MNT_QUOTA) {
1022		int i;
1023		error = vflush(mp, 0, SKIPSYSTEM|flags);
1024		if (error)
1025			return (error);
1026		for (i = 0; i < MAXQUOTAS; i++) {
1027			if (ump->um_quotas[i] == NULLVP)
1028				continue;
1029			quotaoff(td, mp, i);
1030		}
1031		/*
1032		 * Here we fall through to vflush again to ensure
1033		 * that we have gotten rid of all the system vnodes.
1034		 */
1035	}
1036#endif
1037	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1038	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1039		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1040			return (error);
1041		ffs_snapshot_unmount(mp);
1042		/*
1043		 * Here we fall through to vflush again to ensure
1044		 * that we have gotten rid of all the system vnodes.
1045		 */
1046	}
1047        /*
1048	 * Flush all the files.
1049	 */
1050	if ((error = vflush(mp, 0, flags)) != 0)
1051		return (error);
1052	/*
1053	 * Flush filesystem metadata.
1054	 */
1055	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1056	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1057	VOP_UNLOCK(ump->um_devvp, 0, td);
1058	return (error);
1059}
1060
1061/*
1062 * Get filesystem statistics.
1063 */
1064int
1065ffs_statfs(mp, sbp, td)
1066	struct mount *mp;
1067	struct statfs *sbp;
1068	struct thread *td;
1069{
1070	struct ufsmount *ump;
1071	struct fs *fs;
1072
1073	ump = VFSTOUFS(mp);
1074	fs = ump->um_fs;
1075	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1076		panic("ffs_statfs");
1077	sbp->f_bsize = fs->fs_fsize;
1078	sbp->f_iosize = fs->fs_bsize;
1079	sbp->f_blocks = fs->fs_dsize;
1080	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1081	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1082	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1083	    dbtofsb(fs, fs->fs_pendingblocks);
1084	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1085	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1086	if (sbp != &mp->mnt_stat) {
1087		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1088		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1089			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1090		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1091			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1092	}
1093	return (0);
1094}
1095
1096/*
1097 * Go through the disk queues to initiate sandbagged IO;
1098 * go through the inodes to write those that have been modified;
1099 * initiate the writing of the super block if it has been modified.
1100 *
1101 * Note: we are always called with the filesystem marked `MPBUSY'.
1102 */
1103int
1104ffs_sync(mp, waitfor, cred, td)
1105	struct mount *mp;
1106	int waitfor;
1107	struct ucred *cred;
1108	struct thread *td;
1109{
1110	struct vnode *nvp, *vp, *devvp;
1111	struct inode *ip;
1112	struct ufsmount *ump = VFSTOUFS(mp);
1113	struct fs *fs;
1114	int error, count, wait, lockreq, allerror = 0;
1115	int restart;
1116
1117	fs = ump->um_fs;
1118	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1119		printf("fs = %s\n", fs->fs_fsmnt);
1120		panic("ffs_sync: rofs mod");
1121	}
1122	/*
1123	 * Write back each (modified) inode.
1124	 */
1125	wait = 0;
1126	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1127	if (waitfor == MNT_WAIT) {
1128		wait = 1;
1129		lockreq = LK_EXCLUSIVE;
1130	}
1131	lockreq |= LK_INTERLOCK;
1132	mtx_lock(&mntvnode_mtx);
1133loop:
1134	restart = 0;
1135	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1136		/*
1137		 * If the vnode that we are about to sync is no longer
1138		 * associated with this mount point, start over.
1139		 */
1140		if (vp->v_mount != mp)
1141			goto loop;
1142
1143		/*
1144		 * Depend on the mntvnode_slock to keep things stable enough
1145		 * for a quick test.  Since there might be hundreds of
1146		 * thousands of vnodes, we cannot afford even a subroutine
1147		 * call unless there's a good chance that we have work to do.
1148		 */
1149		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1150		VI_LOCK(vp);
1151		if (vp->v_iflag & VI_XLOCK) {
1152			VI_UNLOCK(vp);
1153			continue;
1154		}
1155		ip = VTOI(vp);
1156		if (vp->v_type == VNON || ((ip->i_flag &
1157		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1158		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1159			VI_UNLOCK(vp);
1160			continue;
1161		}
1162		mtx_unlock(&mntvnode_mtx);
1163		if ((error = vget(vp, lockreq, td)) != 0) {
1164			mtx_lock(&mntvnode_mtx);
1165			if (error == ENOENT)
1166				goto loop;
1167			continue;
1168		}
1169		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1170			allerror = error;
1171		VOP_UNLOCK(vp, 0, td);
1172		mtx_lock(&mntvnode_mtx);
1173		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1174			restart = 1;
1175		vrele(vp);
1176		if (restart)
1177			goto loop;
1178	}
1179	mtx_unlock(&mntvnode_mtx);
1180	/*
1181	 * Force stale filesystem control information to be flushed.
1182	 */
1183	if (waitfor == MNT_WAIT) {
1184		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1185			allerror = error;
1186		/* Flushed work items may create new vnodes to clean */
1187		if (allerror == 0 && count) {
1188			mtx_lock(&mntvnode_mtx);
1189			goto loop;
1190		}
1191	}
1192#ifdef QUOTA
1193	qsync(mp);
1194#endif
1195	devvp = ump->um_devvp;
1196	VI_LOCK(devvp);
1197	if (waitfor != MNT_LAZY &&
1198	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1199		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1200		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1201			allerror = error;
1202		VOP_UNLOCK(devvp, 0, td);
1203		if (allerror == 0 && waitfor == MNT_WAIT) {
1204			mtx_lock(&mntvnode_mtx);
1205			goto loop;
1206		}
1207	} else
1208		VI_UNLOCK(devvp);
1209	/*
1210	 * Write back modified superblock.
1211	 */
1212	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1213		allerror = error;
1214	return (allerror);
1215}
1216
1217int
1218ffs_vget(mp, ino, flags, vpp)
1219	struct mount *mp;
1220	ino_t ino;
1221	int flags;
1222	struct vnode **vpp;
1223{
1224	struct thread *td = curthread; 		/* XXX */
1225	struct fs *fs;
1226	struct inode *ip;
1227	struct ufsmount *ump;
1228	struct buf *bp;
1229	struct vnode *vp;
1230	dev_t dev;
1231	int error;
1232
1233	ump = VFSTOUFS(mp);
1234	dev = ump->um_dev;
1235
1236	/*
1237	 * We do not lock vnode creation as it is believed to be too
1238	 * expensive for such rare case as simultaneous creation of vnode
1239	 * for same ino by different processes. We just allow them to race
1240	 * and check later to decide who wins. Let the race begin!
1241	 */
1242	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1243		return (error);
1244	if (*vpp != NULL)
1245		return (0);
1246
1247	/*
1248	 * If this MALLOC() is performed after the getnewvnode()
1249	 * it might block, leaving a vnode with a NULL v_data to be
1250	 * found by ffs_sync() if a sync happens to fire right then,
1251	 * which will cause a panic because ffs_sync() blindly
1252	 * dereferences vp->v_data (as well it should).
1253	 */
1254	ip = uma_zalloc(uma_inode, M_WAITOK);
1255
1256	/* Allocate a new vnode/inode. */
1257	error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp);
1258	if (error) {
1259		*vpp = NULL;
1260		uma_zfree(uma_inode, ip);
1261		return (error);
1262	}
1263	bzero((caddr_t)ip, sizeof(struct inode));
1264	/*
1265	 * FFS supports recursive locking.
1266	 */
1267	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1268	vp->v_data = ip;
1269	ip->i_vnode = vp;
1270	ip->i_ump = ump;
1271	ip->i_fs = fs = ump->um_fs;
1272	ip->i_dev = dev;
1273	ip->i_number = ino;
1274#ifdef QUOTA
1275	{
1276		int i;
1277		for (i = 0; i < MAXQUOTAS; i++)
1278			ip->i_dquot[i] = NODQUOT;
1279	}
1280#endif
1281	/*
1282	 * Exclusively lock the vnode before adding to hash. Note, that we
1283	 * must not release nor downgrade the lock (despite flags argument
1284	 * says) till it is fully initialized.
1285	 */
1286	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1287
1288	/*
1289	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1290	 * duplicate of vnode being created and add it to the hash. If a
1291	 * duplicate vnode was found, it will be vget()ed from hash for us.
1292	 */
1293	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1294		vput(vp);
1295		*vpp = NULL;
1296		return (error);
1297	}
1298
1299	/* We lost the race, then throw away our vnode and return existing */
1300	if (*vpp != NULL) {
1301		vput(vp);
1302		return (0);
1303	}
1304
1305	/* Read in the disk contents for the inode, copy into the inode. */
1306	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1307	    (int)fs->fs_bsize, NOCRED, &bp);
1308	if (error) {
1309		/*
1310		 * The inode does not contain anything useful, so it would
1311		 * be misleading to leave it on its hash chain. With mode
1312		 * still zero, it will be unlinked and returned to the free
1313		 * list by vput().
1314		 */
1315		brelse(bp);
1316		vput(vp);
1317		*vpp = NULL;
1318		return (error);
1319	}
1320	if (ip->i_ump->um_fstype == UFS1)
1321		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1322	else
1323		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1324	ffs_load_inode(bp, ip, fs, ino);
1325	if (DOINGSOFTDEP(vp))
1326		softdep_load_inodeblock(ip);
1327	else
1328		ip->i_effnlink = ip->i_nlink;
1329	bqrelse(bp);
1330
1331	/*
1332	 * Initialize the vnode from the inode, check for aliases.
1333	 * Note that the underlying vnode may have changed.
1334	 */
1335	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1336	if (error) {
1337		vput(vp);
1338		*vpp = NULL;
1339		return (error);
1340	}
1341	/*
1342	 * Finish inode initialization.
1343	 */
1344	VREF(ip->i_devvp);
1345	/*
1346	 * Set up a generation number for this inode if it does not
1347	 * already have one. This should only happen on old filesystems.
1348	 */
1349	if (ip->i_gen == 0) {
1350		ip->i_gen = arc4random() / 2 + 1;
1351		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1352			ip->i_flag |= IN_MODIFIED;
1353			DIP(ip, i_gen) = ip->i_gen;
1354		}
1355	}
1356	/*
1357	 * Ensure that uid and gid are correct. This is a temporary
1358	 * fix until fsck has been changed to do the update.
1359	 */
1360	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1361	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1362		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1363		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1364	}						/* XXX */
1365
1366#ifdef MAC
1367	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1368		/*
1369		 * If this vnode is already allocated, and we're running
1370		 * multi-label, attempt to perform a label association
1371		 * from the extended attributes on the inode.
1372		 */
1373		error = mac_associate_vnode_extattr(mp, vp);
1374		if (error) {
1375			/* ufs_inactive will release ip->i_devvp ref. */
1376			vput(vp);
1377			*vpp = NULL;
1378			return (error);
1379		}
1380	}
1381#endif
1382
1383	*vpp = vp;
1384	return (0);
1385}
1386
1387/*
1388 * File handle to vnode
1389 *
1390 * Have to be really careful about stale file handles:
1391 * - check that the inode number is valid
1392 * - call ffs_vget() to get the locked inode
1393 * - check for an unallocated inode (i_mode == 0)
1394 * - check that the given client host has export rights and return
1395 *   those rights via. exflagsp and credanonp
1396 */
1397int
1398ffs_fhtovp(mp, fhp, vpp)
1399	struct mount *mp;
1400	struct fid *fhp;
1401	struct vnode **vpp;
1402{
1403	struct ufid *ufhp;
1404	struct fs *fs;
1405
1406	ufhp = (struct ufid *)fhp;
1407	fs = VFSTOUFS(mp)->um_fs;
1408	if (ufhp->ufid_ino < ROOTINO ||
1409	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1410		return (ESTALE);
1411	return (ufs_fhtovp(mp, ufhp, vpp));
1412}
1413
1414/*
1415 * Vnode pointer to File handle
1416 */
1417/* ARGSUSED */
1418int
1419ffs_vptofh(vp, fhp)
1420	struct vnode *vp;
1421	struct fid *fhp;
1422{
1423	struct inode *ip;
1424	struct ufid *ufhp;
1425
1426	ip = VTOI(vp);
1427	ufhp = (struct ufid *)fhp;
1428	ufhp->ufid_len = sizeof(struct ufid);
1429	ufhp->ufid_ino = ip->i_number;
1430	ufhp->ufid_gen = ip->i_gen;
1431	return (0);
1432}
1433
1434/*
1435 * Initialize the filesystem.
1436 */
1437static int
1438ffs_init(vfsp)
1439	struct vfsconf *vfsp;
1440{
1441
1442	softdep_initialize();
1443	return (ufs_init(vfsp));
1444}
1445
1446/*
1447 * Undo the work of ffs_init().
1448 */
1449static int
1450ffs_uninit(vfsp)
1451	struct vfsconf *vfsp;
1452{
1453	int ret;
1454
1455	ret = ufs_uninit(vfsp);
1456	softdep_uninitialize();
1457	return (ret);
1458}
1459
1460/*
1461 * Write a superblock and associated information back to disk.
1462 */
1463static int
1464ffs_sbupdate(mp, waitfor)
1465	struct ufsmount *mp;
1466	int waitfor;
1467{
1468	struct fs *fs = mp->um_fs;
1469	struct buf *bp;
1470	int blks;
1471	void *space;
1472	int i, size, error, allerror = 0;
1473
1474	if (fs->fs_ronly == 1 &&
1475	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1476	    (MNT_RDONLY | MNT_UPDATE))
1477		panic("ffs_sbupdate: write read-only filesystem");
1478	/*
1479	 * First write back the summary information.
1480	 */
1481	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1482	space = fs->fs_csp;
1483	for (i = 0; i < blks; i += fs->fs_frag) {
1484		size = fs->fs_bsize;
1485		if (i + fs->fs_frag > blks)
1486			size = (blks - i) * fs->fs_fsize;
1487		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1488		    size, 0, 0, 0);
1489		bcopy(space, bp->b_data, (u_int)size);
1490		space = (char *)space + size;
1491		if (waitfor != MNT_WAIT)
1492			bawrite(bp);
1493		else if ((error = bwrite(bp)) != 0)
1494			allerror = error;
1495	}
1496	/*
1497	 * Now write back the superblock itself. If any errors occurred
1498	 * up to this point, then fail so that the superblock avoids
1499	 * being written out as clean.
1500	 */
1501	if (allerror)
1502		return (allerror);
1503	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1504	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1505		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1506		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1507		fs->fs_sblockloc = SBLOCK_UFS1;
1508	}
1509	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1510	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1511		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1512		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1513		fs->fs_sblockloc = SBLOCK_UFS2;
1514	}
1515	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1516	    0, 0, 0);
1517	fs->fs_fmod = 0;
1518	fs->fs_time = time_second;
1519	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1520	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1521	if (waitfor != MNT_WAIT)
1522		bawrite(bp);
1523	else if ((error = bwrite(bp)) != 0)
1524		allerror = error;
1525	return (allerror);
1526}
1527
1528static int
1529ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1530	int attrnamespace, const char *attrname, struct thread *td)
1531{
1532
1533#ifdef UFS_EXTATTR
1534	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1535	    attrname, td));
1536#else
1537	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1538	    attrname, td));
1539#endif
1540}
1541
1542static void
1543ffs_ifree(struct ufsmount *ump, struct inode *ip)
1544{
1545
1546	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1547		uma_zfree(uma_ufs1, ip->i_din1);
1548	else if (ip->i_din2 != NULL)
1549		uma_zfree(uma_ufs2, ip->i_din2);
1550	uma_zfree(uma_inode, ip);
1551}
1552