ffs_vfsops.c revision 128658
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 128658 2004-04-26 15:13:46Z bmilekic $");
34
35#include "opt_mac.h"
36#include "opt_quota.h"
37#include "opt_ufs.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/namei.h>
42#include <sys/proc.h>
43#include <sys/kernel.h>
44#include <sys/mac.h>
45#include <sys/vnode.h>
46#include <sys/mount.h>
47#include <sys/bio.h>
48#include <sys/buf.h>
49#include <sys/conf.h>
50#include <sys/fcntl.h>
51#include <sys/disk.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54
55#include <ufs/ufs/extattr.h>
56#include <ufs/ufs/quota.h>
57#include <ufs/ufs/ufsmount.h>
58#include <ufs/ufs/inode.h>
59#include <ufs/ufs/ufs_extern.h>
60
61#include <ufs/ffs/fs.h>
62#include <ufs/ffs/ffs_extern.h>
63
64#include <vm/vm.h>
65#include <vm/uma.h>
66#include <vm/vm_page.h>
67
68uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
69
70static int	ffs_sbupdate(struct ufsmount *, int);
71       int	ffs_reload(struct mount *,struct ucred *,struct thread *);
72static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
73static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
74		    ufs2_daddr_t);
75static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
76static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
77static vfs_init_t ffs_init;
78static vfs_uninit_t ffs_uninit;
79static vfs_extattrctl_t ffs_extattrctl;
80
81static struct vfsops ufs_vfsops = {
82	.vfs_extattrctl =	ffs_extattrctl,
83	.vfs_fhtovp =		ffs_fhtovp,
84	.vfs_init =		ffs_init,
85	.vfs_mount =		ffs_mount,
86	.vfs_quotactl =		ufs_quotactl,
87	.vfs_root =		ufs_root,
88	.vfs_start =		ufs_start,
89	.vfs_statfs =		ffs_statfs,
90	.vfs_sync =		ffs_sync,
91	.vfs_uninit =		ffs_uninit,
92	.vfs_unmount =		ffs_unmount,
93	.vfs_vget =		ffs_vget,
94	.vfs_vptofh =		ffs_vptofh,
95};
96
97VFS_SET(ufs_vfsops, ufs, 0);
98
99/*
100 * ffs_mount
101 *
102 * Called when mounting local physical media
103 *
104 * PARAMETERS:
105 *		mountroot
106 *			mp	mount point structure
107 *			path	NULL (flag for root mount!!!)
108 *			data	<unused>
109 *			ndp	<unused>
110 *			p	process (user credentials check [statfs])
111 *
112 *		mount
113 *			mp	mount point structure
114 *			path	path to mount point
115 *			data	pointer to argument struct in user space
116 *			ndp	mount point namei() return (used for
117 *				credentials on reload), reused to look
118 *				up block device.
119 *			p	process (user credentials check)
120 *
121 * RETURNS:	0	Success
122 *		!0	error number (errno.h)
123 *
124 * LOCK STATE:
125 *
126 *		ENTRY
127 *			mount point is locked
128 *		EXIT
129 *			mount point is locked
130 *
131 * NOTES:
132 *		A NULL path can be used for a flag since the mount
133 *		system call will fail with EFAULT in copyinstr in
134 *		namei() if it is a genuine NULL from the user.
135 */
136int
137ffs_mount(mp, path, data, ndp, td)
138        struct mount		*mp;	/* mount struct pointer*/
139        char			*path;	/* path to mount point*/
140        caddr_t			data;	/* arguments to FS specific mount*/
141        struct nameidata	*ndp;	/* mount point credentials*/
142        struct thread		*td;	/* process requesting mount*/
143{
144	size_t size;
145	struct vnode *devvp;
146	struct ufs_args args;
147	struct ufsmount *ump = 0;
148	struct fs *fs;
149	int error, flags;
150	mode_t accessmode;
151
152	if (uma_inode == NULL) {
153		uma_inode = uma_zcreate("FFS inode",
154		    sizeof(struct inode), NULL, NULL, NULL, NULL,
155		    UMA_ALIGN_PTR, 0);
156		uma_ufs1 = uma_zcreate("FFS1 dinode",
157		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
158		    UMA_ALIGN_PTR, 0);
159		uma_ufs2 = uma_zcreate("FFS2 dinode",
160		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
161		    UMA_ALIGN_PTR, 0);
162	}
163	/*
164	 * Use NULL path to indicate we are mounting the root filesystem.
165	 */
166	if (path == NULL) {
167		if ((error = bdevvp(rootdev, &rootvp))) {
168			printf("ffs_mountroot: can't find rootvp\n");
169			return (error);
170		}
171
172		if ((error = ffs_mountfs(rootvp, mp, td)) != 0)
173			return (error);
174		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
175		return (0);
176	}
177
178	/*
179	 * Mounting non-root filesystem or updating a filesystem
180	 */
181	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
182		return (error);
183
184	/*
185	 * If updating, check whether changing from read-only to
186	 * read/write; if there is no device name, that's all we do.
187	 */
188	if (mp->mnt_flag & MNT_UPDATE) {
189		ump = VFSTOUFS(mp);
190		fs = ump->um_fs;
191		devvp = ump->um_devvp;
192		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
193			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
194				return (error);
195			/*
196			 * Flush any dirty data.
197			 */
198			if ((error = VFS_SYNC(mp, MNT_WAIT,
199			    td->td_ucred, td)) != 0) {
200				vn_finished_write(mp);
201				return (error);
202			}
203			/*
204			 * Check for and optionally get rid of files open
205			 * for writing.
206			 */
207			flags = WRITECLOSE;
208			if (mp->mnt_flag & MNT_FORCE)
209				flags |= FORCECLOSE;
210			if (mp->mnt_flag & MNT_SOFTDEP) {
211				error = softdep_flushfiles(mp, flags, td);
212			} else {
213				error = ffs_flushfiles(mp, flags, td);
214			}
215			if (error) {
216				vn_finished_write(mp);
217				return (error);
218			}
219			if (fs->fs_pendingblocks != 0 ||
220			    fs->fs_pendinginodes != 0) {
221				printf("%s: %s: blocks %jd files %d\n",
222				    fs->fs_fsmnt, "update error",
223				    (intmax_t)fs->fs_pendingblocks,
224				    fs->fs_pendinginodes);
225				fs->fs_pendingblocks = 0;
226				fs->fs_pendinginodes = 0;
227			}
228			fs->fs_ronly = 1;
229			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
230				fs->fs_clean = 1;
231			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
232				fs->fs_ronly = 0;
233				fs->fs_clean = 0;
234				vn_finished_write(mp);
235				return (error);
236			}
237			vn_finished_write(mp);
238		}
239		if ((mp->mnt_flag & MNT_RELOAD) &&
240		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
241			return (error);
242		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
243			/*
244			 * If upgrade to read-write by non-root, then verify
245			 * that user has necessary permissions on the device.
246			 */
247			if (suser(td)) {
248				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
249				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
250				    td->td_ucred, td)) != 0) {
251					VOP_UNLOCK(devvp, 0, td);
252					return (error);
253				}
254				VOP_UNLOCK(devvp, 0, td);
255			}
256			fs->fs_flags &= ~FS_UNCLEAN;
257			if (fs->fs_clean == 0) {
258				fs->fs_flags |= FS_UNCLEAN;
259				if ((mp->mnt_flag & MNT_FORCE) ||
260				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
261				     (fs->fs_flags & FS_DOSOFTDEP))) {
262					printf("WARNING: %s was not %s\n",
263					   fs->fs_fsmnt, "properly dismounted");
264				} else {
265					printf(
266"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
267					    fs->fs_fsmnt);
268					return (EPERM);
269				}
270			}
271			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
272				return (error);
273			fs->fs_ronly = 0;
274			fs->fs_clean = 0;
275			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
276				vn_finished_write(mp);
277				return (error);
278			}
279			/* check to see if we need to start softdep */
280			if ((fs->fs_flags & FS_DOSOFTDEP) &&
281			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
282				vn_finished_write(mp);
283				return (error);
284			}
285			if (fs->fs_snapinum[0] != 0)
286				ffs_snapshot_mount(mp);
287			vn_finished_write(mp);
288		}
289		/*
290		 * Soft updates is incompatible with "async",
291		 * so if we are doing softupdates stop the user
292		 * from setting the async flag in an update.
293		 * Softdep_mount() clears it in an initial mount
294		 * or ro->rw remount.
295		 */
296		if (mp->mnt_flag & MNT_SOFTDEP)
297			mp->mnt_flag &= ~MNT_ASYNC;
298		/*
299		 * If not updating name, process export requests.
300		 */
301		if (args.fspec == 0)
302			return (vfs_export(mp, &args.export));
303		/*
304		 * If this is a snapshot request, take the snapshot.
305		 */
306		if (mp->mnt_flag & MNT_SNAPSHOT)
307			return (ffs_snapshot(mp, args.fspec));
308	}
309
310	/*
311	 * Not an update, or updating the name: look up the name
312	 * and verify that it refers to a sensible disk device.
313	 */
314	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
315	if ((error = namei(ndp)) != 0)
316		return (error);
317	NDFREE(ndp, NDF_ONLY_PNBUF);
318	devvp = ndp->ni_vp;
319	if (!vn_isdisk(devvp, &error)) {
320		vrele(devvp);
321		return (error);
322	}
323
324	/*
325	 * If mount by non-root, then verify that user has necessary
326	 * permissions on the device.
327	 */
328	if (suser(td)) {
329		accessmode = VREAD;
330		if ((mp->mnt_flag & MNT_RDONLY) == 0)
331			accessmode |= VWRITE;
332		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
333		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
334			vput(devvp);
335			return (error);
336		}
337		VOP_UNLOCK(devvp, 0, td);
338	}
339
340	if (mp->mnt_flag & MNT_UPDATE) {
341		/*
342		 * Update only
343		 *
344		 * If it's not the same vnode, or at least the same device
345		 * then it's not correct.
346		 */
347
348		if (devvp != ump->um_devvp &&
349		    devvp->v_rdev != ump->um_devvp->v_rdev)
350			error = EINVAL;	/* needs translation */
351		vrele(devvp);
352		if (error)
353			return (error);
354	} else {
355		/*
356		 * New mount
357		 *
358		 * We need the name for the mount point (also used for
359		 * "last mounted on") copied in. If an error occurs,
360		 * the mount point is discarded by the upper level code.
361		 * Note that vfs_mount() populates f_mntonname for us.
362		 */
363		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
364			vrele(devvp);
365			return (error);
366		}
367	}
368	/*
369	 * Save "mounted from" device name info for mount point (NULL pad).
370	 */
371	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
372	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
373	/*
374	 * Initialize filesystem stat information in mount struct.
375	 */
376	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
377	return (0);
378}
379
380/*
381 * Reload all incore data for a filesystem (used after running fsck on
382 * the root filesystem and finding things to fix). The filesystem must
383 * be mounted read-only.
384 *
385 * Things to do to update the mount:
386 *	1) invalidate all cached meta-data.
387 *	2) re-read superblock from disk.
388 *	3) re-read summary information from disk.
389 *	4) invalidate all inactive vnodes.
390 *	5) invalidate all cached file data.
391 *	6) re-read inode data for all active vnodes.
392 */
393int
394ffs_reload(mp, cred, td)
395	struct mount *mp;
396	struct ucred *cred;
397	struct thread *td;
398{
399	struct vnode *vp, *nvp, *devvp;
400	struct inode *ip;
401	void *space;
402	struct buf *bp;
403	struct fs *fs, *newfs;
404	ufs2_daddr_t sblockloc;
405	int i, blks, size, error;
406	int32_t *lp;
407
408	if ((mp->mnt_flag & MNT_RDONLY) == 0)
409		return (EINVAL);
410	/*
411	 * Step 1: invalidate all cached meta-data.
412	 */
413	devvp = VFSTOUFS(mp)->um_devvp;
414	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
415	if (vinvalbuf(devvp, 0, cred, td, 0, 0) != 0)
416		panic("ffs_reload: dirty1");
417	/*
418	 * Only VMIO the backing device if the backing device is a real
419	 * disk device.  See ffs_mountfs() for more details.
420	 */
421	if (vn_isdisk(devvp, NULL))
422		vfs_object_create(devvp, td, td->td_ucred);
423	VOP_UNLOCK(devvp, 0, td);
424
425	/*
426	 * Step 2: re-read superblock from disk.
427	 */
428	fs = VFSTOUFS(mp)->um_fs;
429	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
430	    NOCRED, &bp)) != 0)
431		return (error);
432	newfs = (struct fs *)bp->b_data;
433	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
434	     newfs->fs_magic != FS_UFS2_MAGIC) ||
435	    newfs->fs_bsize > MAXBSIZE ||
436	    newfs->fs_bsize < sizeof(struct fs)) {
437			brelse(bp);
438			return (EIO);		/* XXX needs translation */
439	}
440	/*
441	 * Copy pointer fields back into superblock before copying in	XXX
442	 * new superblock. These should really be in the ufsmount.	XXX
443	 * Note that important parameters (eg fs_ncg) are unchanged.
444	 */
445	newfs->fs_csp = fs->fs_csp;
446	newfs->fs_maxcluster = fs->fs_maxcluster;
447	newfs->fs_contigdirs = fs->fs_contigdirs;
448	newfs->fs_active = fs->fs_active;
449	/* The file system is still read-only. */
450	newfs->fs_ronly = 1;
451	sblockloc = fs->fs_sblockloc;
452	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
453	brelse(bp);
454	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
455	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
456	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
457		printf("%s: reload pending error: blocks %jd files %d\n",
458		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
459		    fs->fs_pendinginodes);
460		fs->fs_pendingblocks = 0;
461		fs->fs_pendinginodes = 0;
462	}
463
464	/*
465	 * Step 3: re-read summary information from disk.
466	 */
467	blks = howmany(fs->fs_cssize, fs->fs_fsize);
468	space = fs->fs_csp;
469	for (i = 0; i < blks; i += fs->fs_frag) {
470		size = fs->fs_bsize;
471		if (i + fs->fs_frag > blks)
472			size = (blks - i) * fs->fs_fsize;
473		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
474		    NOCRED, &bp);
475		if (error)
476			return (error);
477		bcopy(bp->b_data, space, (u_int)size);
478		space = (char *)space + size;
479		brelse(bp);
480	}
481	/*
482	 * We no longer know anything about clusters per cylinder group.
483	 */
484	if (fs->fs_contigsumsize > 0) {
485		lp = fs->fs_maxcluster;
486		for (i = 0; i < fs->fs_ncg; i++)
487			*lp++ = fs->fs_contigsumsize;
488	}
489
490loop:
491	MNT_ILOCK(mp);
492	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
493		if (vp->v_mount != mp) {
494			MNT_IUNLOCK(mp);
495			goto loop;
496		}
497		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
498		VI_LOCK(vp);
499		if (vp->v_iflag & VI_XLOCK) {
500			VI_UNLOCK(vp);
501			continue;
502		}
503		MNT_IUNLOCK(mp);
504		/*
505		 * Step 4: invalidate all inactive vnodes.
506		 */
507		if (vp->v_usecount == 0) {
508			vgonel(vp, td);
509			goto loop;
510		}
511		/*
512		 * Step 5: invalidate all cached file data.
513		 */
514		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
515			goto loop;
516		}
517		if (vinvalbuf(vp, 0, cred, td, 0, 0))
518			panic("ffs_reload: dirty2");
519		/*
520		 * Step 6: re-read inode data for all active vnodes.
521		 */
522		ip = VTOI(vp);
523		error =
524		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
525		    (int)fs->fs_bsize, NOCRED, &bp);
526		if (error) {
527			VOP_UNLOCK(vp, 0, td);
528			vrele(vp);
529			return (error);
530		}
531		ffs_load_inode(bp, ip, fs, ip->i_number);
532		ip->i_effnlink = ip->i_nlink;
533		brelse(bp);
534		VOP_UNLOCK(vp, 0, td);
535		vrele(vp);
536		MNT_ILOCK(mp);
537	}
538	MNT_IUNLOCK(mp);
539	return (0);
540}
541
542/*
543 * Possible superblock locations ordered from most to least likely.
544 */
545static int sblock_try[] = SBLOCKSEARCH;
546
547/*
548 * Common code for mount and mountroot
549 */
550static int
551ffs_mountfs(devvp, mp, td)
552	struct vnode *devvp;
553	struct mount *mp;
554	struct thread *td;
555{
556	struct ufsmount *ump;
557	struct buf *bp;
558	struct fs *fs;
559	dev_t dev;
560	void *space;
561	ufs2_daddr_t sblockloc;
562	int error, i, blks, size, ronly;
563	int32_t *lp;
564	struct ucred *cred;
565	size_t strsize;
566
567	dev = devvp->v_rdev;
568	cred = td ? td->td_ucred : NOCRED;
569	/*
570	 * Disallow multiple mounts of the same device.
571	 * Disallow mounting of a device that is currently in use
572	 * (except for root, which might share swap device for miniroot).
573	 * Flush out any old buffers remaining from a previous use.
574	 */
575	error = vfs_mountedon(devvp);
576	if (error)
577		return (error);
578	if (vcount(devvp) > 1 && devvp != rootvp)
579		return (EBUSY);
580	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
581	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
582	if (error) {
583		VOP_UNLOCK(devvp, 0, td);
584		return (error);
585	}
586
587	/*
588	 * Only VMIO the backing device if the backing device is a real
589	 * disk device.
590	 * Note that it is optional that the backing device be VMIOed.  This
591	 * increases the opportunity for metadata caching.
592	 */
593	if (vn_isdisk(devvp, NULL))
594		vfs_object_create(devvp, td, cred);
595
596	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
597	/*
598	 * XXX: open the device with read and write access even if only
599	 * read access is needed now.  Write access is needed if the
600	 * filesystem is ever mounted read/write, and we don't change the
601	 * access mode for remounts.
602	 */
603#ifdef notyet
604	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, td, -1);
605#else
606	error = VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, td, -1);
607#endif
608	VOP_UNLOCK(devvp, 0, td);
609	if (error)
610		return (error);
611	if (devvp->v_rdev->si_iosize_max != 0)
612		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
613	if (mp->mnt_iosize_max > MAXPHYS)
614		mp->mnt_iosize_max = MAXPHYS;
615
616	bp = NULL;
617	ump = NULL;
618	fs = NULL;
619	sblockloc = 0;
620	/*
621	 * Try reading the superblock in each of its possible locations.
622	 */
623	for (i = 0; sblock_try[i] != -1; i++) {
624		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
625		    cred, &bp)) != 0)
626			goto out;
627		fs = (struct fs *)bp->b_data;
628		sblockloc = sblock_try[i];
629		if ((fs->fs_magic == FS_UFS1_MAGIC ||
630		     (fs->fs_magic == FS_UFS2_MAGIC &&
631		      (fs->fs_sblockloc == sblockloc ||
632		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
633		    fs->fs_bsize <= MAXBSIZE &&
634		    fs->fs_bsize >= sizeof(struct fs))
635			break;
636		brelse(bp);
637		bp = NULL;
638	}
639	if (sblock_try[i] == -1) {
640		error = EINVAL;		/* XXX needs translation */
641		goto out;
642	}
643	fs->fs_fmod = 0;
644	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
645	fs->fs_flags &= ~FS_UNCLEAN;
646	if (fs->fs_clean == 0) {
647		fs->fs_flags |= FS_UNCLEAN;
648		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
649		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
650		     (fs->fs_flags & FS_DOSOFTDEP))) {
651			printf(
652"WARNING: %s was not properly dismounted\n",
653			    fs->fs_fsmnt);
654		} else {
655			printf(
656"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
657			    fs->fs_fsmnt);
658			error = EPERM;
659			goto out;
660		}
661		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
662		    (mp->mnt_flag & MNT_FORCE)) {
663			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
664			    (intmax_t)fs->fs_pendingblocks,
665			    fs->fs_pendinginodes);
666			fs->fs_pendingblocks = 0;
667			fs->fs_pendinginodes = 0;
668		}
669	}
670	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
671		printf("%s: mount pending error: blocks %jd files %d\n",
672		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
673		    fs->fs_pendinginodes);
674		fs->fs_pendingblocks = 0;
675		fs->fs_pendinginodes = 0;
676	}
677	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
678	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
679	    M_WAITOK);
680	if (fs->fs_magic == FS_UFS1_MAGIC) {
681		ump->um_fstype = UFS1;
682		ump->um_balloc = ffs_balloc_ufs1;
683	} else {
684		ump->um_fstype = UFS2;
685		ump->um_balloc = ffs_balloc_ufs2;
686	}
687	ump->um_blkatoff = ffs_blkatoff;
688	ump->um_truncate = ffs_truncate;
689	ump->um_update = ffs_update;
690	ump->um_valloc = ffs_valloc;
691	ump->um_vfree = ffs_vfree;
692	ump->um_ifree = ffs_ifree;
693	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
694	if (fs->fs_sbsize < SBLOCKSIZE)
695		bp->b_flags |= B_INVAL | B_NOCACHE;
696	brelse(bp);
697	bp = NULL;
698	fs = ump->um_fs;
699	ffs_oldfscompat_read(fs, ump, sblockloc);
700	fs->fs_ronly = ronly;
701	size = fs->fs_cssize;
702	blks = howmany(size, fs->fs_fsize);
703	if (fs->fs_contigsumsize > 0)
704		size += fs->fs_ncg * sizeof(int32_t);
705	size += fs->fs_ncg * sizeof(u_int8_t);
706	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
707	fs->fs_csp = space;
708	for (i = 0; i < blks; i += fs->fs_frag) {
709		size = fs->fs_bsize;
710		if (i + fs->fs_frag > blks)
711			size = (blks - i) * fs->fs_fsize;
712		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
713		    cred, &bp)) != 0) {
714			free(fs->fs_csp, M_UFSMNT);
715			goto out;
716		}
717		bcopy(bp->b_data, space, (u_int)size);
718		space = (char *)space + size;
719		brelse(bp);
720		bp = NULL;
721	}
722	if (fs->fs_contigsumsize > 0) {
723		fs->fs_maxcluster = lp = space;
724		for (i = 0; i < fs->fs_ncg; i++)
725			*lp++ = fs->fs_contigsumsize;
726		space = lp;
727	}
728	size = fs->fs_ncg * sizeof(u_int8_t);
729	fs->fs_contigdirs = (u_int8_t *)space;
730	bzero(fs->fs_contigdirs, size);
731	fs->fs_active = NULL;
732	mp->mnt_data = (qaddr_t)ump;
733	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
734	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
735	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
736	    vfs_getvfs(&mp->mnt_stat.f_fsid))
737		vfs_getnewfsid(mp);
738	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
739	mp->mnt_flag |= MNT_LOCAL;
740	if ((fs->fs_flags & FS_MULTILABEL) != 0)
741#ifdef MAC
742		mp->mnt_flag |= MNT_MULTILABEL;
743#else
744		printf(
745"WARNING: %s: multilabel flag on fs but no MAC support\n",
746		    fs->fs_fsmnt);
747#endif
748	if ((fs->fs_flags & FS_ACLS) != 0)
749#ifdef UFS_ACL
750		mp->mnt_flag |= MNT_ACLS;
751#else
752		printf(
753"WARNING: %s: ACLs flag on fs but no ACLs support\n",
754		    fs->fs_fsmnt);
755#endif
756	ump->um_mountp = mp;
757	ump->um_dev = dev;
758	ump->um_devvp = devvp;
759	ump->um_nindir = fs->fs_nindir;
760	ump->um_bptrtodb = fs->fs_fsbtodb;
761	ump->um_seqinc = fs->fs_frag;
762	for (i = 0; i < MAXQUOTAS; i++)
763		ump->um_quotas[i] = NULLVP;
764#ifdef UFS_EXTATTR
765	ufs_extattr_uepm_init(&ump->um_extattr);
766#endif
767	devvp->v_rdev->si_mountpoint = mp;
768
769	/*
770	 * Set FS local "last mounted on" information (NULL pad)
771	 */
772	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
773			fs->fs_fsmnt,			/* copy area*/
774			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
775			&strsize);			/* real size*/
776	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
777
778	if( mp->mnt_flag & MNT_ROOTFS) {
779		/*
780		 * Root mount; update timestamp in mount structure.
781		 * this will be used by the common root mount code
782		 * to update the system clock.
783		 */
784		mp->mnt_time = fs->fs_time;
785	}
786
787	if (ronly == 0) {
788		if ((fs->fs_flags & FS_DOSOFTDEP) &&
789		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
790			free(fs->fs_csp, M_UFSMNT);
791			goto out;
792		}
793		if (fs->fs_snapinum[0] != 0)
794			ffs_snapshot_mount(mp);
795		fs->fs_fmod = 1;
796		fs->fs_clean = 0;
797		(void) ffs_sbupdate(ump, MNT_WAIT);
798	}
799#ifdef UFS_EXTATTR
800#ifdef UFS_EXTATTR_AUTOSTART
801	/*
802	 *
803	 * Auto-starting does the following:
804	 *	- check for /.attribute in the fs, and extattr_start if so
805	 *	- for each file in .attribute, enable that file with
806	 * 	  an attribute of the same name.
807	 * Not clear how to report errors -- probably eat them.
808	 * This would all happen while the filesystem was busy/not
809	 * available, so would effectively be "atomic".
810	 */
811	(void) ufs_extattr_autostart(mp, td);
812#endif /* !UFS_EXTATTR_AUTOSTART */
813#endif /* !UFS_EXTATTR */
814	return (0);
815out:
816	devvp->v_rdev->si_mountpoint = NULL;
817	if (bp)
818		brelse(bp);
819	/* XXX: see comment above VOP_OPEN. */
820#ifdef notyet
821	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, cred, td);
822#else
823	(void)VOP_CLOSE(devvp, FREAD | FWRITE, cred, td);
824#endif
825	if (ump) {
826		free(ump->um_fs, M_UFSMNT);
827		free(ump, M_UFSMNT);
828		mp->mnt_data = (qaddr_t)0;
829	}
830	return (error);
831}
832
833#include <sys/sysctl.h>
834int bigcgs = 0;
835SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
836
837/*
838 * Sanity checks for loading old filesystem superblocks.
839 * See ffs_oldfscompat_write below for unwound actions.
840 *
841 * XXX - Parts get retired eventually.
842 * Unfortunately new bits get added.
843 */
844static void
845ffs_oldfscompat_read(fs, ump, sblockloc)
846	struct fs *fs;
847	struct ufsmount *ump;
848	ufs2_daddr_t sblockloc;
849{
850	off_t maxfilesize;
851
852	/*
853	 * If not yet done, update fs_flags location and value of fs_sblockloc.
854	 */
855	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
856		fs->fs_flags = fs->fs_old_flags;
857		fs->fs_old_flags |= FS_FLAGS_UPDATED;
858		fs->fs_sblockloc = sblockloc;
859	}
860	/*
861	 * If not yet done, update UFS1 superblock with new wider fields.
862	 */
863	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
864		fs->fs_maxbsize = fs->fs_bsize;
865		fs->fs_time = fs->fs_old_time;
866		fs->fs_size = fs->fs_old_size;
867		fs->fs_dsize = fs->fs_old_dsize;
868		fs->fs_csaddr = fs->fs_old_csaddr;
869		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
870		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
871		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
872		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
873	}
874	if (fs->fs_magic == FS_UFS1_MAGIC &&
875	    fs->fs_old_inodefmt < FS_44INODEFMT) {
876		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
877		fs->fs_qbmask = ~fs->fs_bmask;
878		fs->fs_qfmask = ~fs->fs_fmask;
879	}
880	if (fs->fs_magic == FS_UFS1_MAGIC) {
881		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
882		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
883		if (fs->fs_maxfilesize > maxfilesize)
884			fs->fs_maxfilesize = maxfilesize;
885	}
886	/* Compatibility for old filesystems */
887	if (fs->fs_avgfilesize <= 0)
888		fs->fs_avgfilesize = AVFILESIZ;
889	if (fs->fs_avgfpdir <= 0)
890		fs->fs_avgfpdir = AFPDIR;
891	if (bigcgs) {
892		fs->fs_save_cgsize = fs->fs_cgsize;
893		fs->fs_cgsize = fs->fs_bsize;
894	}
895}
896
897/*
898 * Unwinding superblock updates for old filesystems.
899 * See ffs_oldfscompat_read above for details.
900 *
901 * XXX - Parts get retired eventually.
902 * Unfortunately new bits get added.
903 */
904static void
905ffs_oldfscompat_write(fs, ump)
906	struct fs *fs;
907	struct ufsmount *ump;
908{
909
910	/*
911	 * Copy back UFS2 updated fields that UFS1 inspects.
912	 */
913	if (fs->fs_magic == FS_UFS1_MAGIC) {
914		fs->fs_old_time = fs->fs_time;
915		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
916		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
917		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
918		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
919		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
920	}
921	if (bigcgs) {
922		fs->fs_cgsize = fs->fs_save_cgsize;
923		fs->fs_save_cgsize = 0;
924	}
925}
926
927/*
928 * unmount system call
929 */
930int
931ffs_unmount(mp, mntflags, td)
932	struct mount *mp;
933	int mntflags;
934	struct thread *td;
935{
936	struct ufsmount *ump = VFSTOUFS(mp);
937	struct fs *fs;
938	int error, flags;
939
940	flags = 0;
941	if (mntflags & MNT_FORCE) {
942		flags |= FORCECLOSE;
943	}
944#ifdef UFS_EXTATTR
945	if ((error = ufs_extattr_stop(mp, td))) {
946		if (error != EOPNOTSUPP)
947			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
948			    error);
949	} else {
950		ufs_extattr_uepm_destroy(&ump->um_extattr);
951	}
952#endif
953	if (mp->mnt_flag & MNT_SOFTDEP) {
954		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
955			return (error);
956	} else {
957		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
958			return (error);
959	}
960	fs = ump->um_fs;
961	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
962		printf("%s: unmount pending error: blocks %jd files %d\n",
963		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
964		    fs->fs_pendinginodes);
965		fs->fs_pendingblocks = 0;
966		fs->fs_pendinginodes = 0;
967	}
968	if (fs->fs_ronly == 0) {
969		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
970		error = ffs_sbupdate(ump, MNT_WAIT);
971		if (error) {
972			fs->fs_clean = 0;
973			return (error);
974		}
975	}
976	ump->um_devvp->v_rdev->si_mountpoint = NULL;
977
978	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
979	/* XXX: see comment above VOP_OPEN. */
980#ifdef notyet
981	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
982	    NOCRED, td);
983#else
984	error = VOP_CLOSE(ump->um_devvp, FREAD | FWRITE, NOCRED, td);
985#endif
986	vrele(ump->um_devvp);
987	free(fs->fs_csp, M_UFSMNT);
988	free(fs, M_UFSMNT);
989	free(ump, M_UFSMNT);
990	mp->mnt_data = (qaddr_t)0;
991	mp->mnt_flag &= ~MNT_LOCAL;
992	return (error);
993}
994
995/*
996 * Flush out all the files in a filesystem.
997 */
998int
999ffs_flushfiles(mp, flags, td)
1000	struct mount *mp;
1001	int flags;
1002	struct thread *td;
1003{
1004	struct ufsmount *ump;
1005	int error;
1006
1007	ump = VFSTOUFS(mp);
1008#ifdef QUOTA
1009	if (mp->mnt_flag & MNT_QUOTA) {
1010		int i;
1011		error = vflush(mp, 0, SKIPSYSTEM|flags);
1012		if (error)
1013			return (error);
1014		for (i = 0; i < MAXQUOTAS; i++) {
1015			if (ump->um_quotas[i] == NULLVP)
1016				continue;
1017			quotaoff(td, mp, i);
1018		}
1019		/*
1020		 * Here we fall through to vflush again to ensure
1021		 * that we have gotten rid of all the system vnodes.
1022		 */
1023	}
1024#endif
1025	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1026	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1027		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1028			return (error);
1029		ffs_snapshot_unmount(mp);
1030		/*
1031		 * Here we fall through to vflush again to ensure
1032		 * that we have gotten rid of all the system vnodes.
1033		 */
1034	}
1035        /*
1036	 * Flush all the files.
1037	 */
1038	if ((error = vflush(mp, 0, flags)) != 0)
1039		return (error);
1040	/*
1041	 * Flush filesystem metadata.
1042	 */
1043	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1044	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1045	VOP_UNLOCK(ump->um_devvp, 0, td);
1046	return (error);
1047}
1048
1049/*
1050 * Get filesystem statistics.
1051 */
1052int
1053ffs_statfs(mp, sbp, td)
1054	struct mount *mp;
1055	struct statfs *sbp;
1056	struct thread *td;
1057{
1058	struct ufsmount *ump;
1059	struct fs *fs;
1060
1061	ump = VFSTOUFS(mp);
1062	fs = ump->um_fs;
1063	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1064		panic("ffs_statfs");
1065	if (fs->fs_magic == FS_UFS2_MAGIC)
1066		bcopy((caddr_t)"ufs2",
1067			(caddr_t)&sbp->f_fstypename[0], MFSNAMELEN);
1068	sbp->f_version = STATFS_VERSION;
1069	sbp->f_bsize = fs->fs_fsize;
1070	sbp->f_iosize = fs->fs_bsize;
1071	sbp->f_blocks = fs->fs_dsize;
1072	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1073	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1074	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1075	    dbtofsb(fs, fs->fs_pendingblocks);
1076	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1077	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1078	sbp->f_namemax = NAME_MAX;
1079	if (sbp != &mp->mnt_stat) {
1080		sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1081		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1082		sbp->f_syncwrites = mp->mnt_stat.f_syncwrites;
1083		sbp->f_asyncwrites = mp->mnt_stat.f_asyncwrites;
1084		sbp->f_syncreads = mp->mnt_stat.f_syncreads;
1085		sbp->f_asyncreads = mp->mnt_stat.f_asyncreads;
1086		sbp->f_owner = mp->mnt_stat.f_owner;
1087		sbp->f_fsid = mp->mnt_stat.f_fsid;
1088		if (fs->fs_magic == FS_UFS2_MAGIC)
1089			bcopy((caddr_t)"ufs2",
1090				(caddr_t)&sbp->f_fstypename[0], MFSNAMELEN);
1091		else
1092			bcopy((caddr_t)mp->mnt_stat.f_fstypename,
1093				(caddr_t)&sbp->f_fstypename[0], MFSNAMELEN);
1094		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1095			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1096		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1097			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1098	}
1099	return (0);
1100}
1101
1102/*
1103 * Go through the disk queues to initiate sandbagged IO;
1104 * go through the inodes to write those that have been modified;
1105 * initiate the writing of the super block if it has been modified.
1106 *
1107 * Note: we are always called with the filesystem marked `MPBUSY'.
1108 */
1109int
1110ffs_sync(mp, waitfor, cred, td)
1111	struct mount *mp;
1112	int waitfor;
1113	struct ucred *cred;
1114	struct thread *td;
1115{
1116	struct vnode *nvp, *vp, *devvp;
1117	struct inode *ip;
1118	struct ufsmount *ump = VFSTOUFS(mp);
1119	struct fs *fs;
1120	int error, count, wait, lockreq, allerror = 0;
1121
1122	fs = ump->um_fs;
1123	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1124		printf("fs = %s\n", fs->fs_fsmnt);
1125		panic("ffs_sync: rofs mod");
1126	}
1127	/*
1128	 * Write back each (modified) inode.
1129	 */
1130	wait = 0;
1131	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1132	if (waitfor == MNT_WAIT) {
1133		wait = 1;
1134		lockreq = LK_EXCLUSIVE;
1135	}
1136	lockreq |= LK_INTERLOCK;
1137	MNT_ILOCK(mp);
1138loop:
1139	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1140		/*
1141		 * If the vnode that we are about to sync is no longer
1142		 * associated with this mount point, start over.
1143		 */
1144		if (vp->v_mount != mp)
1145			goto loop;
1146
1147		/*
1148		 * Depend on the mntvnode_slock to keep things stable enough
1149		 * for a quick test.  Since there might be hundreds of
1150		 * thousands of vnodes, we cannot afford even a subroutine
1151		 * call unless there's a good chance that we have work to do.
1152		 */
1153		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1154		VI_LOCK(vp);
1155		if (vp->v_iflag & VI_XLOCK) {
1156			VI_UNLOCK(vp);
1157			continue;
1158		}
1159		ip = VTOI(vp);
1160		if (vp->v_type == VNON || ((ip->i_flag &
1161		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1162		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1163			VI_UNLOCK(vp);
1164			continue;
1165		}
1166		MNT_IUNLOCK(mp);
1167		if ((error = vget(vp, lockreq, td)) != 0) {
1168			MNT_ILOCK(mp);
1169			if (error == ENOENT)
1170				goto loop;
1171			continue;
1172		}
1173		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1174			allerror = error;
1175		VOP_UNLOCK(vp, 0, td);
1176		vrele(vp);
1177		MNT_ILOCK(mp);
1178		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1179			goto loop;
1180	}
1181	MNT_IUNLOCK(mp);
1182	/*
1183	 * Force stale filesystem control information to be flushed.
1184	 */
1185	if (waitfor == MNT_WAIT) {
1186		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1187			allerror = error;
1188		/* Flushed work items may create new vnodes to clean */
1189		if (allerror == 0 && count) {
1190			MNT_ILOCK(mp);
1191			goto loop;
1192		}
1193	}
1194#ifdef QUOTA
1195	qsync(mp);
1196#endif
1197	devvp = ump->um_devvp;
1198	VI_LOCK(devvp);
1199	if (waitfor != MNT_LAZY &&
1200	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1201		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1202		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1203			allerror = error;
1204		VOP_UNLOCK(devvp, 0, td);
1205		if (allerror == 0 && waitfor == MNT_WAIT) {
1206			MNT_ILOCK(mp);
1207			goto loop;
1208		}
1209	} else
1210		VI_UNLOCK(devvp);
1211	/*
1212	 * Write back modified superblock.
1213	 */
1214	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1215		allerror = error;
1216	return (allerror);
1217}
1218
1219int
1220ffs_vget(mp, ino, flags, vpp)
1221	struct mount *mp;
1222	ino_t ino;
1223	int flags;
1224	struct vnode **vpp;
1225{
1226	struct thread *td = curthread; 		/* XXX */
1227	struct fs *fs;
1228	struct inode *ip;
1229	struct ufsmount *ump;
1230	struct buf *bp;
1231	struct vnode *vp;
1232	dev_t dev;
1233	int error;
1234
1235	ump = VFSTOUFS(mp);
1236	dev = ump->um_dev;
1237
1238	/*
1239	 * We do not lock vnode creation as it is believed to be too
1240	 * expensive for such rare case as simultaneous creation of vnode
1241	 * for same ino by different processes. We just allow them to race
1242	 * and check later to decide who wins. Let the race begin!
1243	 */
1244	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1245		return (error);
1246	if (*vpp != NULL)
1247		return (0);
1248
1249	/*
1250	 * If this MALLOC() is performed after the getnewvnode()
1251	 * it might block, leaving a vnode with a NULL v_data to be
1252	 * found by ffs_sync() if a sync happens to fire right then,
1253	 * which will cause a panic because ffs_sync() blindly
1254	 * dereferences vp->v_data (as well it should).
1255	 */
1256	ip = uma_zalloc(uma_inode, M_WAITOK);
1257
1258	/* Allocate a new vnode/inode. */
1259	error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp);
1260	if (error) {
1261		*vpp = NULL;
1262		uma_zfree(uma_inode, ip);
1263		return (error);
1264	}
1265	bzero((caddr_t)ip, sizeof(struct inode));
1266	/*
1267	 * FFS supports recursive locking.
1268	 */
1269	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1270	vp->v_data = ip;
1271	ip->i_vnode = vp;
1272	ip->i_ump = ump;
1273	ip->i_fs = fs = ump->um_fs;
1274	ip->i_dev = dev;
1275	ip->i_number = ino;
1276#ifdef QUOTA
1277	{
1278		int i;
1279		for (i = 0; i < MAXQUOTAS; i++)
1280			ip->i_dquot[i] = NODQUOT;
1281	}
1282#endif
1283	/*
1284	 * Exclusively lock the vnode before adding to hash. Note, that we
1285	 * must not release nor downgrade the lock (despite flags argument
1286	 * says) till it is fully initialized.
1287	 */
1288	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1289
1290	/*
1291	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1292	 * duplicate of vnode being created and add it to the hash. If a
1293	 * duplicate vnode was found, it will be vget()ed from hash for us.
1294	 */
1295	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1296		vput(vp);
1297		*vpp = NULL;
1298		return (error);
1299	}
1300
1301	/* We lost the race, then throw away our vnode and return existing */
1302	if (*vpp != NULL) {
1303		vput(vp);
1304		return (0);
1305	}
1306
1307	/* Read in the disk contents for the inode, copy into the inode. */
1308	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1309	    (int)fs->fs_bsize, NOCRED, &bp);
1310	if (error) {
1311		/*
1312		 * The inode does not contain anything useful, so it would
1313		 * be misleading to leave it on its hash chain. With mode
1314		 * still zero, it will be unlinked and returned to the free
1315		 * list by vput().
1316		 */
1317		brelse(bp);
1318		vput(vp);
1319		*vpp = NULL;
1320		return (error);
1321	}
1322	if (ip->i_ump->um_fstype == UFS1)
1323		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1324	else
1325		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1326	ffs_load_inode(bp, ip, fs, ino);
1327	if (DOINGSOFTDEP(vp))
1328		softdep_load_inodeblock(ip);
1329	else
1330		ip->i_effnlink = ip->i_nlink;
1331	bqrelse(bp);
1332
1333	/*
1334	 * Initialize the vnode from the inode, check for aliases.
1335	 * Note that the underlying vnode may have changed.
1336	 */
1337	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1338	if (error) {
1339		vput(vp);
1340		*vpp = NULL;
1341		return (error);
1342	}
1343	/*
1344	 * Finish inode initialization.
1345	 */
1346	VREF(ip->i_devvp);
1347	/*
1348	 * Set up a generation number for this inode if it does not
1349	 * already have one. This should only happen on old filesystems.
1350	 */
1351	if (ip->i_gen == 0) {
1352		ip->i_gen = arc4random() / 2 + 1;
1353		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1354			ip->i_flag |= IN_MODIFIED;
1355			DIP(ip, i_gen) = ip->i_gen;
1356		}
1357	}
1358	/*
1359	 * Ensure that uid and gid are correct. This is a temporary
1360	 * fix until fsck has been changed to do the update.
1361	 */
1362	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1363	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1364		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1365		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1366	}						/* XXX */
1367
1368#ifdef MAC
1369	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1370		/*
1371		 * If this vnode is already allocated, and we're running
1372		 * multi-label, attempt to perform a label association
1373		 * from the extended attributes on the inode.
1374		 */
1375		error = mac_associate_vnode_extattr(mp, vp);
1376		if (error) {
1377			/* ufs_inactive will release ip->i_devvp ref. */
1378			vput(vp);
1379			*vpp = NULL;
1380			return (error);
1381		}
1382	}
1383#endif
1384
1385	*vpp = vp;
1386	return (0);
1387}
1388
1389/*
1390 * File handle to vnode
1391 *
1392 * Have to be really careful about stale file handles:
1393 * - check that the inode number is valid
1394 * - call ffs_vget() to get the locked inode
1395 * - check for an unallocated inode (i_mode == 0)
1396 * - check that the given client host has export rights and return
1397 *   those rights via. exflagsp and credanonp
1398 */
1399int
1400ffs_fhtovp(mp, fhp, vpp)
1401	struct mount *mp;
1402	struct fid *fhp;
1403	struct vnode **vpp;
1404{
1405	struct ufid *ufhp;
1406	struct fs *fs;
1407
1408	ufhp = (struct ufid *)fhp;
1409	fs = VFSTOUFS(mp)->um_fs;
1410	if (ufhp->ufid_ino < ROOTINO ||
1411	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1412		return (ESTALE);
1413	return (ufs_fhtovp(mp, ufhp, vpp));
1414}
1415
1416/*
1417 * Vnode pointer to File handle
1418 */
1419/* ARGSUSED */
1420int
1421ffs_vptofh(vp, fhp)
1422	struct vnode *vp;
1423	struct fid *fhp;
1424{
1425	struct inode *ip;
1426	struct ufid *ufhp;
1427
1428	ip = VTOI(vp);
1429	ufhp = (struct ufid *)fhp;
1430	ufhp->ufid_len = sizeof(struct ufid);
1431	ufhp->ufid_ino = ip->i_number;
1432	ufhp->ufid_gen = ip->i_gen;
1433	return (0);
1434}
1435
1436/*
1437 * Initialize the filesystem.
1438 */
1439static int
1440ffs_init(vfsp)
1441	struct vfsconf *vfsp;
1442{
1443
1444	softdep_initialize();
1445	return (ufs_init(vfsp));
1446}
1447
1448/*
1449 * Undo the work of ffs_init().
1450 */
1451static int
1452ffs_uninit(vfsp)
1453	struct vfsconf *vfsp;
1454{
1455	int ret;
1456
1457	ret = ufs_uninit(vfsp);
1458	softdep_uninitialize();
1459	return (ret);
1460}
1461
1462/*
1463 * Write a superblock and associated information back to disk.
1464 */
1465static int
1466ffs_sbupdate(mp, waitfor)
1467	struct ufsmount *mp;
1468	int waitfor;
1469{
1470	struct fs *fs = mp->um_fs;
1471	struct buf *bp;
1472	int blks;
1473	void *space;
1474	int i, size, error, allerror = 0;
1475
1476	if (fs->fs_ronly == 1 &&
1477	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1478	    (MNT_RDONLY | MNT_UPDATE))
1479		panic("ffs_sbupdate: write read-only filesystem");
1480	/*
1481	 * First write back the summary information.
1482	 */
1483	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1484	space = fs->fs_csp;
1485	for (i = 0; i < blks; i += fs->fs_frag) {
1486		size = fs->fs_bsize;
1487		if (i + fs->fs_frag > blks)
1488			size = (blks - i) * fs->fs_fsize;
1489		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1490		    size, 0, 0, 0);
1491		bcopy(space, bp->b_data, (u_int)size);
1492		space = (char *)space + size;
1493		if (waitfor != MNT_WAIT)
1494			bawrite(bp);
1495		else if ((error = bwrite(bp)) != 0)
1496			allerror = error;
1497	}
1498	/*
1499	 * Now write back the superblock itself. If any errors occurred
1500	 * up to this point, then fail so that the superblock avoids
1501	 * being written out as clean.
1502	 */
1503	if (allerror)
1504		return (allerror);
1505	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1506	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1507		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1508		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1509		fs->fs_sblockloc = SBLOCK_UFS1;
1510	}
1511	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1512	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1513		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1514		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1515		fs->fs_sblockloc = SBLOCK_UFS2;
1516	}
1517	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1518	    0, 0, 0);
1519	fs->fs_fmod = 0;
1520	fs->fs_time = time_second;
1521	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1522	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1523	if (waitfor != MNT_WAIT)
1524		bawrite(bp);
1525	else if ((error = bwrite(bp)) != 0)
1526		allerror = error;
1527	return (allerror);
1528}
1529
1530static int
1531ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1532	int attrnamespace, const char *attrname, struct thread *td)
1533{
1534
1535#ifdef UFS_EXTATTR
1536	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1537	    attrname, td));
1538#else
1539	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1540	    attrname, td));
1541#endif
1542}
1543
1544static void
1545ffs_ifree(struct ufsmount *ump, struct inode *ip)
1546{
1547
1548	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1549		uma_zfree(uma_ufs1, ip->i_din1);
1550	else if (ip->i_din2 != NULL)
1551		uma_zfree(uma_ufs2, ip->i_din2);
1552	uma_zfree(uma_inode, ip);
1553}
1554