ffs_vfsops.c revision 30354
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34 * $Id: ffs_vfsops.c,v 1.58 1997/10/11 18:31:36 phk Exp $
35 */
36
37#include "opt_quota.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/namei.h>
42#include <sys/proc.h>
43#include <sys/kernel.h>
44#include <sys/vnode.h>
45#include <sys/mount.h>
46#include <sys/buf.h>
47#include <sys/conf.h>
48#include <sys/fcntl.h>
49#include <sys/disklabel.h>
50#include <sys/malloc.h>
51
52#include <miscfs/specfs/specdev.h>
53
54#include <ufs/ufs/quota.h>
55#include <ufs/ufs/ufsmount.h>
56#include <ufs/ufs/inode.h>
57#include <ufs/ufs/ufs_extern.h>
58
59#include <ufs/ffs/fs.h>
60#include <ufs/ffs/ffs_extern.h>
61
62#include <vm/vm.h>
63#include <vm/vm_prot.h>
64#include <vm/vm_page.h>
65#include <vm/vm_extern.h>
66
67static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
68
69static int	ffs_sbupdate __P((struct ufsmount *, int));
70static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
71static int	ffs_oldfscompat __P((struct fs *));
72static int	ffs_mount __P((struct mount *, char *, caddr_t,
73				struct nameidata *, struct proc *));
74static int	ffs_init __P((struct vfsconf *));
75
76struct vfsops ufs_vfsops = {
77	ffs_mount,
78	ufs_start,
79	ffs_unmount,
80	ufs_root,
81	ufs_quotactl,
82	ffs_statfs,
83	ffs_sync,
84	ffs_vget,
85	ffs_fhtovp,
86	ffs_vptofh,
87	ffs_init,
88};
89
90VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0);
91
92/*
93 * ffs_mount
94 *
95 * Called when mounting local physical media
96 *
97 * PARAMETERS:
98 *		mountroot
99 *			mp	mount point structure
100 *			path	NULL (flag for root mount!!!)
101 *			data	<unused>
102 *			ndp	<unused>
103 *			p	process (user credentials check [statfs])
104 *
105 *		mount
106 *			mp	mount point structure
107 *			path	path to mount point
108 *			data	pointer to argument struct in user space
109 *			ndp	mount point namei() return (used for
110 *				credentials on reload), reused to look
111 *				up block device.
112 *			p	process (user credentials check)
113 *
114 * RETURNS:	0	Success
115 *		!0	error number (errno.h)
116 *
117 * LOCK STATE:
118 *
119 *		ENTRY
120 *			mount point is locked
121 *		EXIT
122 *			mount point is locked
123 *
124 * NOTES:
125 *		A NULL path can be used for a flag since the mount
126 *		system call will fail with EFAULT in copyinstr in
127 *		namei() if it is a genuine NULL from the user.
128 */
129static int
130ffs_mount( mp, path, data, ndp, p)
131        struct mount		*mp;	/* mount struct pointer*/
132        char			*path;	/* path to mount point*/
133        caddr_t			data;	/* arguments to FS specific mount*/
134        struct nameidata	*ndp;	/* mount point credentials*/
135        struct proc		*p;	/* process requesting mount*/
136{
137	u_int		size;
138	int		err = 0;
139	struct vnode	*devvp;
140
141	struct ufs_args args;
142	struct ufsmount *ump = 0;
143	register struct fs *fs;
144	int flags;
145
146	/*
147	 * Use NULL path to flag a root mount
148	 */
149	if( path == NULL) {
150		/*
151		 ***
152		 * Mounting root file system
153		 ***
154		 */
155
156		if ((err = bdevvp(rootdev, &rootvp))) {
157			printf("ffs_mountroot: can't find rootvp");
158			return (err);
159		}
160
161		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR)
162			mp->mnt_flag |= MNT_NOCLUSTERR;
163		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERW)
164			mp->mnt_flag |= MNT_NOCLUSTERW;
165		if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) {
166			/* fs specific cleanup (if any)*/
167			goto error_1;
168		}
169
170		goto dostatfs;		/* success*/
171
172	}
173
174	/*
175	 ***
176	 * Mounting non-root file system or updating a file system
177	 ***
178	 */
179
180	/* copy in user arguments*/
181	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
182	if (err)
183		goto error_1;		/* can't get arguments*/
184
185	/*
186	 * If updating, check whether changing from read-only to
187	 * read/write; if there is no device name, that's all we do.
188	 * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags,
189	 * if block device requests.
190	 */
191	if (mp->mnt_flag & MNT_UPDATE) {
192		ump = VFSTOUFS(mp);
193		fs = ump->um_fs;
194		err = 0;
195		if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERR)
196			mp->mnt_flag |= MNT_NOCLUSTERR;
197		if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERW)
198			mp->mnt_flag |= MNT_NOCLUSTERW;
199		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
200			flags = WRITECLOSE;
201			if (mp->mnt_flag & MNT_FORCE)
202				flags |= FORCECLOSE;
203			err = ffs_flushfiles(mp, flags, p);
204		}
205		if (!err && (mp->mnt_flag & MNT_RELOAD))
206			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
207		if (err) {
208			goto error_1;
209		}
210		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
211			if (!fs->fs_clean) {
212				if (mp->mnt_flag & MNT_FORCE) {
213					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
214				} else {
215					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
216					    fs->fs_fsmnt);
217					err = EPERM;
218					goto error_1;
219				}
220			}
221			fs->fs_ronly = 0;
222		}
223		if (fs->fs_ronly == 0) {
224			fs->fs_clean = 0;
225			ffs_sbupdate(ump, MNT_WAIT);
226		}
227		/* if not updating name...*/
228		if (args.fspec == 0) {
229			/*
230			 * Process export requests.  Jumping to "success"
231			 * will return the vfs_export() error code.
232			 */
233			err = vfs_export(mp, &ump->um_export, &args.export);
234			goto success;
235		}
236	}
237
238	/*
239	 * Not an update, or updating the name: look up the name
240	 * and verify that it refers to a sensible block device.
241	 */
242	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
243	err = namei(ndp);
244	if (err) {
245		/* can't get devvp!*/
246		goto error_1;
247	}
248
249	devvp = ndp->ni_vp;
250
251	if (devvp->v_type != VBLK) {
252		err = ENOTBLK;
253		goto error_2;
254	}
255	if (major(devvp->v_rdev) >= nblkdev) {
256		err = ENXIO;
257		goto error_2;
258	}
259	if (mp->mnt_flag & MNT_UPDATE) {
260		/*
261		 ********************
262		 * UPDATE
263		 ********************
264		 */
265
266		if (devvp != ump->um_devvp)
267			err = EINVAL;	/* needs translation */
268		else
269			vrele(devvp);
270		/*
271		 * Update device name only on success
272		 */
273		if( !err) {
274			/* Save "mounted from" info for mount point (NULL pad)*/
275			copyinstr(	args.fspec,
276					mp->mnt_stat.f_mntfromname,
277					MNAMELEN - 1,
278					&size);
279			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
280		}
281	} else {
282		/*
283		 ********************
284		 * NEW MOUNT
285		 ********************
286		 */
287
288		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR)
289			mp->mnt_flag |= MNT_NOCLUSTERR;
290		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERW)
291			mp->mnt_flag |= MNT_NOCLUSTERW;
292
293		/*
294		 * Since this is a new mount, we want the names for
295		 * the device and the mount point copied in.  If an
296		 * error occurs,  the mountpoint is discarded by the
297		 * upper level code.
298		 */
299		/* Save "last mounted on" info for mount point (NULL pad)*/
300		copyinstr(	path,				/* mount point*/
301				mp->mnt_stat.f_mntonname,	/* save area*/
302				MNAMELEN - 1,			/* max size*/
303				&size);				/* real size*/
304		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
305
306		/* Save "mounted from" info for mount point (NULL pad)*/
307		copyinstr(	args.fspec,			/* device name*/
308				mp->mnt_stat.f_mntfromname,	/* save area*/
309				MNAMELEN - 1,			/* max size*/
310				&size);				/* real size*/
311		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
312
313		err = ffs_mountfs(devvp, mp, p, M_FFSNODE);
314	}
315	if (err) {
316		goto error_2;
317	}
318
319dostatfs:
320	/*
321	 * Initialize FS stat information in mount struct; uses both
322	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
323	 *
324	 * This code is common to root and non-root mounts
325	 */
326	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
327
328	goto success;
329
330
331error_2:	/* error with devvp held*/
332
333	/* release devvp before failing*/
334	vrele(devvp);
335
336error_1:	/* no state to back out*/
337
338success:
339	return( err);
340}
341
342/*
343 * Reload all incore data for a filesystem (used after running fsck on
344 * the root filesystem and finding things to fix). The filesystem must
345 * be mounted read-only.
346 *
347 * Things to do to update the mount:
348 *	1) invalidate all cached meta-data.
349 *	2) re-read superblock from disk.
350 *	3) re-read summary information from disk.
351 *	4) invalidate all inactive vnodes.
352 *	5) invalidate all cached file data.
353 *	6) re-read inode data for all active vnodes.
354 */
355static int
356ffs_reload(mp, cred, p)
357	register struct mount *mp;
358	struct ucred *cred;
359	struct proc *p;
360{
361	register struct vnode *vp, *nvp, *devvp;
362	struct inode *ip;
363	struct csum *space;
364	struct buf *bp;
365	struct fs *fs, *newfs;
366	struct partinfo dpart;
367	int i, blks, size, error;
368	int32_t *lp;
369
370	if ((mp->mnt_flag & MNT_RDONLY) == 0)
371		return (EINVAL);
372	/*
373	 * Step 1: invalidate all cached meta-data.
374	 */
375	devvp = VFSTOUFS(mp)->um_devvp;
376	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
377		panic("ffs_reload: dirty1");
378	/*
379	 * Step 2: re-read superblock from disk.
380	 */
381	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
382		size = DEV_BSIZE;
383	else
384		size = dpart.disklab->d_secsize;
385	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp))
386		return (error);
387	newfs = (struct fs *)bp->b_data;
388	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
389		newfs->fs_bsize < sizeof(struct fs)) {
390			brelse(bp);
391			return (EIO);		/* XXX needs translation */
392	}
393	fs = VFSTOUFS(mp)->um_fs;
394	/*
395	 * Copy pointer fields back into superblock before copying in	XXX
396	 * new superblock. These should really be in the ufsmount.	XXX
397	 * Note that important parameters (eg fs_ncg) are unchanged.
398	 */
399	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
400	newfs->fs_maxcluster = fs->fs_maxcluster;
401	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
402	if (fs->fs_sbsize < SBSIZE)
403		bp->b_flags |= B_INVAL;
404	brelse(bp);
405	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
406	ffs_oldfscompat(fs);
407
408	/*
409	 * Step 3: re-read summary information from disk.
410	 */
411	blks = howmany(fs->fs_cssize, fs->fs_fsize);
412	space = fs->fs_csp[0];
413	for (i = 0; i < blks; i += fs->fs_frag) {
414		size = fs->fs_bsize;
415		if (i + fs->fs_frag > blks)
416			size = (blks - i) * fs->fs_fsize;
417		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
418		    NOCRED, &bp);
419		if (error)
420			return (error);
421		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
422		brelse(bp);
423	}
424	/*
425	 * We no longer know anything about clusters per cylinder group.
426	 */
427	if (fs->fs_contigsumsize > 0) {
428		lp = fs->fs_maxcluster;
429		for (i = 0; i < fs->fs_ncg; i++)
430			*lp++ = fs->fs_contigsumsize;
431	}
432
433loop:
434	simple_lock(&mntvnode_slock);
435	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
436		if (vp->v_mount != mp) {
437			simple_unlock(&mntvnode_slock);
438			goto loop;
439		}
440		nvp = vp->v_mntvnodes.le_next;
441		/*
442		 * Step 4: invalidate all inactive vnodes.
443		 */
444		if (vrecycle(vp, &mntvnode_slock, p))
445			goto loop;
446		/*
447		 * Step 5: invalidate all cached file data.
448		 */
449		simple_lock(&vp->v_interlock);
450		simple_unlock(&mntvnode_slock);
451		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
452			goto loop;
453		}
454		if (vinvalbuf(vp, 0, cred, p, 0, 0))
455			panic("ffs_reload: dirty2");
456		/*
457		 * Step 6: re-read inode data for all active vnodes.
458		 */
459		ip = VTOI(vp);
460		error =
461		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
462		    (int)fs->fs_bsize, NOCRED, &bp);
463		if (error) {
464			vput(vp);
465			return (error);
466		}
467		ip->i_din = *((struct dinode *)bp->b_data +
468		    ino_to_fsbo(fs, ip->i_number));
469		brelse(bp);
470		vput(vp);
471		simple_lock(&mntvnode_slock);
472	}
473	simple_unlock(&mntvnode_slock);
474	return (0);
475}
476
477/*
478 * Common code for mount and mountroot
479 */
480int
481ffs_mountfs(devvp, mp, p, malloctype)
482	register struct vnode *devvp;
483	struct mount *mp;
484	struct proc *p;
485	struct malloc_type *malloctype;
486{
487	register struct ufsmount *ump;
488	struct buf *bp;
489	register struct fs *fs;
490	dev_t dev;
491	struct partinfo dpart;
492	caddr_t base, space;
493	int error, i, blks, size, ronly;
494	int32_t *lp;
495	struct ucred *cred;
496	u_int64_t maxfilesize;					/* XXX */
497	u_int strsize;
498	int ncount;
499
500	dev = devvp->v_rdev;
501	cred = p ? p->p_ucred : NOCRED;
502	/*
503	 * Disallow multiple mounts of the same device.
504	 * Disallow mounting of a device that is currently in use
505	 * (except for root, which might share swap device for miniroot).
506	 * Flush out any old buffers remaining from a previous use.
507	 */
508	error = vfs_mountedon(devvp);
509	if (error)
510		return (error);
511	ncount = vcount(devvp);
512	if (devvp->v_object)
513		ncount -= 1;
514	if (ncount > 1 && devvp != rootvp)
515		return (EBUSY);
516	if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0))
517		return (error);
518
519	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
520	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
521	if (error)
522		return (error);
523	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
524		size = DEV_BSIZE;
525	else
526		size = dpart.disklab->d_secsize;
527
528	bp = NULL;
529	ump = NULL;
530	if (error = bread(devvp, SBLOCK, SBSIZE, cred, &bp))
531		goto out;
532	fs = (struct fs *)bp->b_data;
533	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
534	    fs->fs_bsize < sizeof(struct fs)) {
535		error = EINVAL;		/* XXX needs translation */
536		goto out;
537	}
538	fs->fs_fmod = 0;
539	if (!fs->fs_clean) {
540		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
541			printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
542		} else {
543			printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt);
544			error = EPERM;
545			goto out;
546		}
547	}
548	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
549	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
550		error = EROFS;          /* needs translation */
551		goto out;
552	}
553	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
554	bzero((caddr_t)ump, sizeof *ump);
555	ump->um_malloctype = malloctype;
556	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
557	    M_WAITOK);
558	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
559	if (fs->fs_sbsize < SBSIZE)
560		bp->b_flags |= B_INVAL;
561	brelse(bp);
562	bp = NULL;
563	fs = ump->um_fs;
564	fs->fs_ronly = ronly;
565	if (ronly == 0) {
566		fs->fs_fmod = 1;
567		fs->fs_clean = 0;
568	}
569	size = fs->fs_cssize;
570	blks = howmany(size, fs->fs_fsize);
571	if (fs->fs_contigsumsize > 0)
572		size += fs->fs_ncg * sizeof(int32_t);
573	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
574	for (i = 0; i < blks; i += fs->fs_frag) {
575		size = fs->fs_bsize;
576		if (i + fs->fs_frag > blks)
577			size = (blks - i) * fs->fs_fsize;
578		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
579		    cred, &bp)) {
580			free(base, M_UFSMNT);
581			goto out;
582		}
583		bcopy(bp->b_data, space, (u_int)size);
584		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
585		space += size;
586		brelse(bp);
587		bp = NULL;
588	}
589	if (fs->fs_contigsumsize > 0) {
590		fs->fs_maxcluster = lp = (int32_t *)space;
591		for (i = 0; i < fs->fs_ncg; i++)
592			*lp++ = fs->fs_contigsumsize;
593	}
594	mp->mnt_data = (qaddr_t)ump;
595	mp->mnt_stat.f_fsid.val[0] = (long)dev;
596	if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0)
597		mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
598	else
599		mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
600	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
601	mp->mnt_flag |= MNT_LOCAL;
602	ump->um_mountp = mp;
603	ump->um_dev = dev;
604	ump->um_devvp = devvp;
605	ump->um_nindir = fs->fs_nindir;
606	ump->um_bptrtodb = fs->fs_fsbtodb;
607	ump->um_seqinc = fs->fs_frag;
608	for (i = 0; i < MAXQUOTAS; i++)
609		ump->um_quotas[i] = NULLVP;
610	devvp->v_specflags |= SI_MOUNTEDON;
611	ffs_oldfscompat(fs);
612
613	/*
614	 * Set FS local "last mounted on" information (NULL pad)
615	 */
616	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
617			fs->fs_fsmnt,			/* copy area*/
618			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
619			&strsize);			/* real size*/
620	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
621
622	if( mp->mnt_flag & MNT_ROOTFS) {
623		/*
624		 * Root mount; update timestamp in mount structure.
625		 * this will be used by the common root mount code
626		 * to update the system clock.
627		 */
628		mp->mnt_time = fs->fs_time;
629	}
630
631	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
632	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
633	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
634		fs->fs_maxfilesize = maxfilesize;		/* XXX */
635	if (ronly == 0) {
636		fs->fs_clean = 0;
637		(void) ffs_sbupdate(ump, MNT_WAIT);
638	}
639	/*
640	 * Only VMIO the backing device if the backing device is a real
641	 * block device.  This excludes the original MFS implementation.
642	 * Note that it is optional that the backing device be VMIOed.  This
643	 * increases the opportunity for metadata caching.
644	 */
645	if ((devvp->v_type == VBLK) && (major(devvp->v_rdev) < nblkdev)) {
646		vfs_object_create(devvp, p, p->p_ucred, 0);
647	}
648	return (0);
649out:
650	if (bp)
651		brelse(bp);
652	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
653	if (ump) {
654		free(ump->um_fs, M_UFSMNT);
655		free(ump, M_UFSMNT);
656		mp->mnt_data = (qaddr_t)0;
657	}
658	return (error);
659}
660
661/*
662 * Sanity checks for old file systems.
663 *
664 * XXX - goes away some day.
665 */
666static int
667ffs_oldfscompat(fs)
668	struct fs *fs;
669{
670
671	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
672	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
673	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
674		fs->fs_nrpos = 8;				/* XXX */
675	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
676#if 0
677		int i;						/* XXX */
678		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
679								/* XXX */
680		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
681		for (i = 0; i < NIADDR; i++) {			/* XXX */
682			sizepb *= NINDIR(fs);			/* XXX */
683			fs->fs_maxfilesize += sizepb;		/* XXX */
684		}						/* XXX */
685#endif
686		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
687		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
688		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
689	}							/* XXX */
690	return (0);
691}
692
693/*
694 * unmount system call
695 */
696int
697ffs_unmount(mp, mntflags, p)
698	struct mount *mp;
699	int mntflags;
700	struct proc *p;
701{
702	register struct ufsmount *ump;
703	register struct fs *fs;
704	int error, flags;
705
706	flags = 0;
707	if (mntflags & MNT_FORCE) {
708		flags |= FORCECLOSE;
709	}
710	error = ffs_flushfiles(mp, flags, p);
711	if (error)
712		return (error);
713	ump = VFSTOUFS(mp);
714	fs = ump->um_fs;
715	if (fs->fs_ronly == 0) {
716		fs->fs_clean = 1;
717		error = ffs_sbupdate(ump, MNT_WAIT);
718		if (error) {
719			fs->fs_clean = 0;
720			return (error);
721		}
722	}
723	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
724
725	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
726	vnode_pager_uncache(ump->um_devvp, p);
727	VOP_UNLOCK(ump->um_devvp, 0, p);
728
729	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
730		NOCRED, p);
731
732	vrele(ump->um_devvp);
733
734	free(fs->fs_csp[0], M_UFSMNT);
735	free(fs, M_UFSMNT);
736	free(ump, M_UFSMNT);
737	mp->mnt_data = (qaddr_t)0;
738	mp->mnt_flag &= ~MNT_LOCAL;
739	return (error);
740}
741
742/*
743 * Flush out all the files in a filesystem.
744 */
745int
746ffs_flushfiles(mp, flags, p)
747	register struct mount *mp;
748	int flags;
749	struct proc *p;
750{
751	register struct ufsmount *ump;
752	int error;
753
754	ump = VFSTOUFS(mp);
755#ifdef QUOTA
756	if (mp->mnt_flag & MNT_QUOTA) {
757		int i;
758		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
759		if (error)
760			return (error);
761		for (i = 0; i < MAXQUOTAS; i++) {
762			if (ump->um_quotas[i] == NULLVP)
763				continue;
764			quotaoff(p, mp, i);
765		}
766		/*
767		 * Here we fall through to vflush again to ensure
768		 * that we have gotten rid of all the system vnodes.
769		 */
770	}
771#endif
772	error = vflush(mp, NULLVP, flags);
773	return (error);
774}
775
776/*
777 * Get file system statistics.
778 */
779int
780ffs_statfs(mp, sbp, p)
781	struct mount *mp;
782	register struct statfs *sbp;
783	struct proc *p;
784{
785	register struct ufsmount *ump;
786	register struct fs *fs;
787
788	ump = VFSTOUFS(mp);
789	fs = ump->um_fs;
790	if (fs->fs_magic != FS_MAGIC)
791		panic("ffs_statfs");
792	sbp->f_bsize = fs->fs_fsize;
793	sbp->f_iosize = fs->fs_bsize;
794	sbp->f_blocks = fs->fs_dsize;
795	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
796		fs->fs_cstotal.cs_nffree;
797	sbp->f_bavail = freespace(fs, fs->fs_minfree);
798	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
799	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
800	if (sbp != &mp->mnt_stat) {
801		sbp->f_type = mp->mnt_vfc->vfc_typenum;
802		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
803			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
804		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
805			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
806	}
807	return (0);
808}
809
810/*
811 * Go through the disk queues to initiate sandbagged IO;
812 * go through the inodes to write those that have been modified;
813 * initiate the writing of the super block if it has been modified.
814 *
815 * Note: we are always called with the filesystem marked `MPBUSY'.
816 */
817int
818ffs_sync(mp, waitfor, cred, p)
819	struct mount *mp;
820	int waitfor;
821	struct ucred *cred;
822	struct proc *p;
823{
824	struct vnode *nvp, *vp;
825	struct inode *ip;
826	struct ufsmount *ump = VFSTOUFS(mp);
827	struct fs *fs;
828	struct timeval tv;
829	int error, allerror = 0;
830
831	fs = ump->um_fs;
832	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
833		printf("fs = %s\n", fs->fs_fsmnt);
834		panic("ffs_sync: rofs mod");
835	}
836	/*
837	 * Write back each (modified) inode.
838	 */
839	simple_lock(&mntvnode_slock);
840loop:
841	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
842		/*
843		 * If the vnode that we are about to sync is no longer
844		 * associated with this mount point, start over.
845		 */
846		if (vp->v_mount != mp)
847			goto loop;
848		simple_lock(&vp->v_interlock);
849		nvp = vp->v_mntvnodes.le_next;
850		ip = VTOI(vp);
851		if (((ip->i_flag &
852		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
853		    vp->v_dirtyblkhd.lh_first == NULL) {
854			simple_unlock(&vp->v_interlock);
855			continue;
856		}
857		if (vp->v_type != VCHR) {
858			simple_unlock(&mntvnode_slock);
859			error =
860			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
861			if (error) {
862				simple_lock(&mntvnode_slock);
863				if (error == ENOENT)
864					goto loop;
865				continue;
866			}
867			if (error = VOP_FSYNC(vp, cred, waitfor, p))
868				allerror = error;
869			VOP_UNLOCK(vp, 0, p);
870			vrele(vp);
871			simple_lock(&mntvnode_slock);
872		} else {
873			simple_unlock(&mntvnode_slock);
874			simple_unlock(&vp->v_interlock);
875			gettime(&tv);
876			/* VOP_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */
877			VOP_UPDATE(vp, &tv, &tv, 0);
878			simple_lock(&mntvnode_slock);
879		}
880	}
881	simple_unlock(&mntvnode_slock);
882	/*
883	 * Force stale file system control information to be flushed.
884	 */
885	error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p);
886	if (error)
887		allerror = error;
888#ifdef QUOTA
889	qsync(mp);
890#endif
891	/*
892	 * Write back modified superblock.
893	 */
894	if (fs->fs_fmod != 0) {
895		fs->fs_fmod = 0;
896		fs->fs_time = time.tv_sec;
897		if (error = ffs_sbupdate(ump, waitfor))
898			allerror = error;
899	}
900	return (allerror);
901}
902
903/*
904 * Look up a FFS dinode number to find its incore vnode, otherwise read it
905 * in from disk.  If it is in core, wait for the lock bit to clear, then
906 * return the inode locked.  Detection and handling of mount points must be
907 * done by the calling routine.
908 */
909static int ffs_inode_hash_lock;
910
911int
912ffs_vget(mp, ino, vpp)
913	struct mount *mp;
914	ino_t ino;
915	struct vnode **vpp;
916{
917	struct fs *fs;
918	struct inode *ip;
919	struct ufsmount *ump;
920	struct buf *bp;
921	struct vnode *vp;
922	dev_t dev;
923	int error;
924
925	ump = VFSTOUFS(mp);
926	dev = ump->um_dev;
927restart:
928	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
929		return (0);
930
931	/*
932	 * Lock out the creation of new entries in the FFS hash table in
933	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
934	 * may occur!
935	 */
936	if (ffs_inode_hash_lock) {
937		while (ffs_inode_hash_lock) {
938			ffs_inode_hash_lock = -1;
939			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
940		}
941		goto restart;
942	}
943	ffs_inode_hash_lock = 1;
944
945	/*
946	 * If this MALLOC() is performed after the getnewvnode()
947	 * it might block, leaving a vnode with a NULL v_data to be
948	 * found by ffs_sync() if a sync happens to fire right then,
949	 * which will cause a panic because ffs_sync() blindly
950	 * dereferences vp->v_data (as well it should).
951	 */
952	MALLOC(ip, struct inode *, sizeof(struct inode),
953	    ump->um_malloctype, M_WAITOK);
954
955	/* Allocate a new vnode/inode. */
956	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
957	if (error) {
958		if (ffs_inode_hash_lock < 0)
959			wakeup(&ffs_inode_hash_lock);
960		ffs_inode_hash_lock = 0;
961		*vpp = NULL;
962		FREE(ip, ump->um_malloctype);
963		return (error);
964	}
965	bzero((caddr_t)ip, sizeof(struct inode));
966	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
967	vp->v_data = ip;
968	ip->i_vnode = vp;
969	ip->i_fs = fs = ump->um_fs;
970	ip->i_dev = dev;
971	ip->i_number = ino;
972#ifdef QUOTA
973	{
974		int i;
975		for (i = 0; i < MAXQUOTAS; i++)
976			ip->i_dquot[i] = NODQUOT;
977	}
978#endif
979	/*
980	 * Put it onto its hash chain and lock it so that other requests for
981	 * this inode will block if they arrive while we are sleeping waiting
982	 * for old data structures to be purged or for the contents of the
983	 * disk portion of this inode to be read.
984	 */
985	ufs_ihashins(ip);
986
987	if (ffs_inode_hash_lock < 0)
988		wakeup(&ffs_inode_hash_lock);
989	ffs_inode_hash_lock = 0;
990
991	/* Read in the disk contents for the inode, copy into the inode. */
992	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
993	    (int)fs->fs_bsize, NOCRED, &bp);
994	if (error) {
995		/*
996		 * The inode does not contain anything useful, so it would
997		 * be misleading to leave it on its hash chain. With mode
998		 * still zero, it will be unlinked and returned to the free
999		 * list by vput().
1000		 */
1001		brelse(bp);
1002		vput(vp);
1003		*vpp = NULL;
1004		return (error);
1005	}
1006	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
1007	bqrelse(bp);
1008
1009	/*
1010	 * Initialize the vnode from the inode, check for aliases.
1011	 * Note that the underlying vnode may have changed.
1012	 */
1013	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1014	if (error) {
1015		vput(vp);
1016		*vpp = NULL;
1017		return (error);
1018	}
1019	/*
1020	 * Finish inode initialization now that aliasing has been resolved.
1021	 */
1022	ip->i_devvp = ump->um_devvp;
1023	VREF(ip->i_devvp);
1024	/*
1025	 * Set up a generation number for this inode if it does not
1026	 * already have one. This should only happen on old filesystems.
1027	 */
1028	if (ip->i_gen == 0) {
1029		ip->i_gen = random() / 2 + 1;
1030		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1031			ip->i_flag |= IN_MODIFIED;
1032	}
1033	/*
1034	 * Ensure that uid and gid are correct. This is a temporary
1035	 * fix until fsck has been changed to do the update.
1036	 */
1037	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1038		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1039		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1040	}						/* XXX */
1041
1042	*vpp = vp;
1043	return (0);
1044}
1045
1046/*
1047 * File handle to vnode
1048 *
1049 * Have to be really careful about stale file handles:
1050 * - check that the inode number is valid
1051 * - call ffs_vget() to get the locked inode
1052 * - check for an unallocated inode (i_mode == 0)
1053 * - check that the given client host has export rights and return
1054 *   those rights via. exflagsp and credanonp
1055 */
1056int
1057ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
1058	register struct mount *mp;
1059	struct fid *fhp;
1060	struct sockaddr *nam;
1061	struct vnode **vpp;
1062	int *exflagsp;
1063	struct ucred **credanonp;
1064{
1065	register struct ufid *ufhp;
1066	struct fs *fs;
1067
1068	ufhp = (struct ufid *)fhp;
1069	fs = VFSTOUFS(mp)->um_fs;
1070	if (ufhp->ufid_ino < ROOTINO ||
1071	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1072		return (ESTALE);
1073	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
1074}
1075
1076/*
1077 * Vnode pointer to File handle
1078 */
1079/* ARGSUSED */
1080int
1081ffs_vptofh(vp, fhp)
1082	struct vnode *vp;
1083	struct fid *fhp;
1084{
1085	register struct inode *ip;
1086	register struct ufid *ufhp;
1087
1088	ip = VTOI(vp);
1089	ufhp = (struct ufid *)fhp;
1090	ufhp->ufid_len = sizeof(struct ufid);
1091	ufhp->ufid_ino = ip->i_number;
1092	ufhp->ufid_gen = ip->i_gen;
1093	return (0);
1094}
1095
1096/*
1097 * Initialize the filesystem; just use ufs_init.
1098 */
1099static int
1100ffs_init(vfsp)
1101	struct vfsconf *vfsp;
1102{
1103
1104	return (ufs_init(vfsp));
1105}
1106
1107/*
1108 * Write a superblock and associated information back to disk.
1109 */
1110static int
1111ffs_sbupdate(mp, waitfor)
1112	struct ufsmount *mp;
1113	int waitfor;
1114{
1115	register struct fs *dfs, *fs = mp->um_fs;
1116	register struct buf *bp;
1117	int blks;
1118	caddr_t space;
1119	int i, size, error, allerror = 0;
1120
1121	/*
1122	 * First write back the summary information.
1123	 */
1124	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1125	space = (caddr_t)fs->fs_csp[0];
1126	for (i = 0; i < blks; i += fs->fs_frag) {
1127		size = fs->fs_bsize;
1128		if (i + fs->fs_frag > blks)
1129			size = (blks - i) * fs->fs_fsize;
1130		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1131		    size, 0, 0);
1132		bcopy(space, bp->b_data, (u_int)size);
1133		space += size;
1134		if (waitfor != MNT_WAIT)
1135			bawrite(bp);
1136		else if (error = bwrite(bp))
1137			allerror = error;
1138	}
1139	/*
1140	 * Now write back the superblock itself. If any errors occurred
1141	 * up to this point, then fail so that the superblock avoids
1142	 * being written out as clean.
1143	 */
1144	if (allerror)
1145		return (allerror);
1146	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1147	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1148	/* Restore compatibility to old file systems.		   XXX */
1149	dfs = (struct fs *)bp->b_data;				/* XXX */
1150	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1151		dfs->fs_nrpos = -1;				/* XXX */
1152	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1153		int32_t *lp, tmp;				/* XXX */
1154								/* XXX */
1155		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1156		tmp = lp[4];					/* XXX */
1157		for (i = 4; i > 0; i--)				/* XXX */
1158			lp[i] = lp[i-1];			/* XXX */
1159		lp[0] = tmp;					/* XXX */
1160	}							/* XXX */
1161	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1162	if (waitfor != MNT_WAIT)
1163		bawrite(bp);
1164	else if (error = bwrite(bp))
1165		allerror = error;
1166	return (allerror);
1167}
1168