ffs_vfsops.c revision 23560
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34 * $Id: ffs_vfsops.c,v 1.45 1997/02/22 09:47:08 peter Exp $
35 */
36
37#include "opt_quota.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/namei.h>
42#include <sys/proc.h>
43#include <sys/kernel.h>
44#include <sys/vnode.h>
45#include <sys/socket.h>
46#include <sys/mount.h>
47#include <sys/buf.h>
48#include <sys/mbuf.h>
49#include <sys/file.h>
50#include <sys/disklabel.h>
51#include <sys/ioctl.h>
52#include <sys/errno.h>
53#include <sys/malloc.h>
54
55#include <miscfs/specfs/specdev.h>
56
57#include <ufs/ufs/quota.h>
58#include <ufs/ufs/ufsmount.h>
59#include <ufs/ufs/inode.h>
60#include <ufs/ufs/ufs_extern.h>
61
62#include <ufs/ffs/fs.h>
63#include <ufs/ffs/ffs_extern.h>
64
65#include <vm/vm.h>
66#include <vm/vm_param.h>
67#include <vm/vm_prot.h>
68#include <vm/vm_page.h>
69#include <vm/vm_object.h>
70#include <vm/vm_extern.h>
71
72static int	ffs_sbupdate __P((struct ufsmount *, int));
73static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
74static int	ffs_oldfscompat __P((struct fs *));
75static int	ffs_mount __P((struct mount *, char *, caddr_t,
76				struct nameidata *, struct proc *));
77static int	ffs_init __P((struct vfsconf *));
78
79struct vfsops ufs_vfsops = {
80	ffs_mount,
81	ufs_start,
82	ffs_unmount,
83	ufs_root,
84	ufs_quotactl,
85	ffs_statfs,
86	ffs_sync,
87	ffs_vget,
88	ffs_fhtovp,
89	ffs_vptofh,
90	ffs_init,
91};
92
93VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0);
94
95extern u_long nextgennumber;
96
97/*
98 * ffs_mount
99 *
100 * Called when mounting local physical media
101 *
102 * PARAMETERS:
103 *		mountroot
104 *			mp	mount point structure
105 *			path	NULL (flag for root mount!!!)
106 *			data	<unused>
107 *			ndp	<unused>
108 *			p	process (user credentials check [statfs])
109 *
110 *		mount
111 *			mp	mount point structure
112 *			path	path to mount point
113 *			data	pointer to argument struct in user space
114 *			ndp	mount point namei() return (used for
115 *				credentials on reload), reused to look
116 *				up block device.
117 *			p	process (user credentials check)
118 *
119 * RETURNS:	0	Success
120 *		!0	error number (errno.h)
121 *
122 * LOCK STATE:
123 *
124 *		ENTRY
125 *			mount point is locked
126 *		EXIT
127 *			mount point is locked
128 *
129 * NOTES:
130 *		A NULL path can be used for a flag since the mount
131 *		system call will fail with EFAULT in copyinstr in
132 *		namei() if it is a genuine NULL from the user.
133 */
134static int
135ffs_mount( mp, path, data, ndp, p)
136        struct mount		*mp;	/* mount struct pointer*/
137        char			*path;	/* path to mount point*/
138        caddr_t			data;	/* arguments to FS specific mount*/
139        struct nameidata	*ndp;	/* mount point credentials*/
140        struct proc		*p;	/* process requesting mount*/
141{
142	u_int		size;
143	int		err = 0;
144	struct vnode	*devvp;
145
146	struct ufs_args args;
147	struct ufsmount *ump = 0;
148	register struct fs *fs;
149	int flags;
150
151	/*
152	 * Use NULL path to flag a root mount
153	 */
154	if( path == NULL) {
155		/*
156		 ***
157		 * Mounting root file system
158		 ***
159		 */
160
161		/* Get vnode for root device*/
162		if ((err = bdevvp( rootdev, &rootvp))) {
163			printf("ffs_mountroot: can't setup bdevvp for root");
164			return (err);
165		}
166
167		/*
168		 * Attempt mount
169		 */
170		if( ( err = ffs_mountfs(rootvp, mp, p)) != 0) {
171			/* fs specific cleanup (if any)*/
172			goto error_1;
173		}
174
175		goto dostatfs;		/* success*/
176
177	}
178
179	/*
180	 ***
181	 * Mounting non-root file system or updating a file system
182	 ***
183	 */
184
185	/* copy in user arguments*/
186	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
187	if (err)
188		goto error_1;		/* can't get arguments*/
189
190	/*
191	 * If updating, check whether changing from read-only to
192	 * read/write; if there is no device name, that's all we do.
193	 */
194	if (mp->mnt_flag & MNT_UPDATE) {
195		ump = VFSTOUFS(mp);
196		fs = ump->um_fs;
197		err = 0;
198		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
199			flags = WRITECLOSE;
200			if (mp->mnt_flag & MNT_FORCE)
201				flags |= FORCECLOSE;
202			err = ffs_flushfiles(mp, flags, p);
203		}
204		if (!err && (mp->mnt_flag & MNT_RELOAD))
205			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
206		if (err) {
207			goto error_1;
208		}
209		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
210			if (!fs->fs_clean) {
211				if (mp->mnt_flag & MNT_FORCE) {
212					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
213				} else {
214					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
215					    fs->fs_fsmnt);
216					err = EPERM;
217					goto error_1;
218				}
219			}
220			fs->fs_ronly = 0;
221		}
222		if (fs->fs_ronly == 0) {
223			fs->fs_clean = 0;
224			ffs_sbupdate(ump, MNT_WAIT);
225		}
226		/* if not updating name...*/
227		if (args.fspec == 0) {
228			/*
229			 * Process export requests.  Jumping to "success"
230			 * will return the vfs_export() error code.
231			 */
232			err = vfs_export(mp, &ump->um_export, &args.export);
233			goto success;
234		}
235	}
236
237	/*
238	 * Not an update, or updating the name: look up the name
239	 * and verify that it refers to a sensible block device.
240	 */
241	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
242	err = namei(ndp);
243	if (err) {
244		/* can't get devvp!*/
245		goto error_1;
246	}
247
248	devvp = ndp->ni_vp;
249
250	if (devvp->v_type != VBLK) {
251		err = ENOTBLK;
252		goto error_2;
253	}
254	if (major(devvp->v_rdev) >= nblkdev) {
255		err = ENXIO;
256		goto error_2;
257	}
258	if (mp->mnt_flag & MNT_UPDATE) {
259		/*
260		 ********************
261		 * UPDATE
262		 ********************
263		 */
264
265		if (devvp != ump->um_devvp)
266			err = EINVAL;	/* needs translation */
267		else
268			vrele(devvp);
269		/*
270		 * Update device name only on success
271		 */
272		if( !err) {
273			/* Save "mounted from" info for mount point (NULL pad)*/
274			copyinstr(	args.fspec,
275					mp->mnt_stat.f_mntfromname,
276					MNAMELEN - 1,
277					&size);
278			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
279		}
280	} else {
281		/*
282		 ********************
283		 * NEW MOUNT
284		 ********************
285		 */
286
287		/*
288		 * Since this is a new mount, we want the names for
289		 * the device and the mount point copied in.  If an
290		 * error occurs,  the mountpoint is discarded by the
291		 * upper level code.
292		 */
293		/* Save "last mounted on" info for mount point (NULL pad)*/
294		copyinstr(	path,				/* mount point*/
295				mp->mnt_stat.f_mntonname,	/* save area*/
296				MNAMELEN - 1,			/* max size*/
297				&size);				/* real size*/
298		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
299
300		/* Save "mounted from" info for mount point (NULL pad)*/
301		copyinstr(	args.fspec,			/* device name*/
302				mp->mnt_stat.f_mntfromname,	/* save area*/
303				MNAMELEN - 1,			/* max size*/
304				&size);				/* real size*/
305		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
306
307		err = ffs_mountfs(devvp, mp, p);
308	}
309	if (err) {
310		goto error_2;
311	}
312
313dostatfs:
314	/*
315	 * Initialize FS stat information in mount struct; uses both
316	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
317	 *
318	 * This code is common to root and non-root mounts
319	 */
320	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
321
322	goto success;
323
324
325error_2:	/* error with devvp held*/
326
327	/* release devvp before failing*/
328	vrele(devvp);
329
330error_1:	/* no state to back out*/
331
332success:
333	return( err);
334}
335
336/*
337 * Reload all incore data for a filesystem (used after running fsck on
338 * the root filesystem and finding things to fix). The filesystem must
339 * be mounted read-only.
340 *
341 * Things to do to update the mount:
342 *	1) invalidate all cached meta-data.
343 *	2) re-read superblock from disk.
344 *	3) re-read summary information from disk.
345 *	4) invalidate all inactive vnodes.
346 *	5) invalidate all cached file data.
347 *	6) re-read inode data for all active vnodes.
348 */
349static int
350ffs_reload(mp, cred, p)
351	register struct mount *mp;
352	struct ucred *cred;
353	struct proc *p;
354{
355	register struct vnode *vp, *nvp, *devvp;
356	struct inode *ip;
357	struct csum *space;
358	struct buf *bp;
359	struct fs *fs, *newfs;
360	struct partinfo dpart;
361	int i, blks, size, error;
362	int32_t *lp;
363
364	if ((mp->mnt_flag & MNT_RDONLY) == 0)
365		return (EINVAL);
366	/*
367	 * Step 1: invalidate all cached meta-data.
368	 */
369	devvp = VFSTOUFS(mp)->um_devvp;
370	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
371		panic("ffs_reload: dirty1");
372	/*
373	 * Step 2: re-read superblock from disk.
374	 */
375	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
376		size = DEV_BSIZE;
377	else
378		size = dpart.disklab->d_secsize;
379	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp))
380		return (error);
381	newfs = (struct fs *)bp->b_data;
382	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
383		newfs->fs_bsize < sizeof(struct fs)) {
384			brelse(bp);
385			return (EIO);		/* XXX needs translation */
386	}
387	fs = VFSTOUFS(mp)->um_fs;
388	/*
389	 * Copy pointer fields back into superblock before copying in	XXX
390	 * new superblock. These should really be in the ufsmount.	XXX
391	 * Note that important parameters (eg fs_ncg) are unchanged.
392	 */
393	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
394	newfs->fs_maxcluster = fs->fs_maxcluster;
395	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
396	if (fs->fs_sbsize < SBSIZE)
397		bp->b_flags |= B_INVAL;
398	brelse(bp);
399	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
400	ffs_oldfscompat(fs);
401
402	/*
403	 * Step 3: re-read summary information from disk.
404	 */
405	blks = howmany(fs->fs_cssize, fs->fs_fsize);
406	space = fs->fs_csp[0];
407	for (i = 0; i < blks; i += fs->fs_frag) {
408		size = fs->fs_bsize;
409		if (i + fs->fs_frag > blks)
410			size = (blks - i) * fs->fs_fsize;
411		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
412		    NOCRED, &bp);
413		if (error)
414			return (error);
415		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
416		brelse(bp);
417	}
418	/*
419	 * We no longer know anything about clusters per cylinder group.
420	 */
421	if (fs->fs_contigsumsize > 0) {
422		lp = fs->fs_maxcluster;
423		for (i = 0; i < fs->fs_ncg; i++)
424			*lp++ = fs->fs_contigsumsize;
425	}
426
427loop:
428	simple_lock(&mntvnode_slock);
429	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
430		if (vp->v_mount != mp) {
431			simple_unlock(&mntvnode_slock);
432			goto loop;
433		}
434		nvp = vp->v_mntvnodes.le_next;
435		/*
436		 * Step 4: invalidate all inactive vnodes.
437		 */
438		if (vrecycle(vp, &mntvnode_slock, p))
439			goto loop;
440		/*
441		 * Step 5: invalidate all cached file data.
442		 */
443		simple_lock(&vp->v_interlock);
444		simple_unlock(&mntvnode_slock);
445		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
446			goto loop;
447		}
448		if (vinvalbuf(vp, 0, cred, p, 0, 0))
449			panic("ffs_reload: dirty2");
450		/*
451		 * Step 6: re-read inode data for all active vnodes.
452		 */
453		ip = VTOI(vp);
454		error =
455		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
456		    (int)fs->fs_bsize, NOCRED, &bp);
457		if (error) {
458			vput(vp);
459			return (error);
460		}
461		ip->i_din = *((struct dinode *)bp->b_data +
462		    ino_to_fsbo(fs, ip->i_number));
463		brelse(bp);
464		vput(vp);
465		simple_lock(&mntvnode_slock);
466	}
467	simple_unlock(&mntvnode_slock);
468	return (0);
469}
470
471/*
472 * Common code for mount and mountroot
473 */
474int
475ffs_mountfs(devvp, mp, p)
476	register struct vnode *devvp;
477	struct mount *mp;
478	struct proc *p;
479{
480	register struct ufsmount *ump;
481	struct buf *bp;
482	register struct fs *fs;
483	dev_t dev;
484	struct partinfo dpart;
485	caddr_t base, space;
486	int error, i, blks, size, ronly;
487	int32_t *lp;
488	struct ucred *cred;
489	u_int64_t maxfilesize;					/* XXX */
490	u_int strsize;
491	int ncount;
492
493	dev = devvp->v_rdev;
494	cred = p ? p->p_ucred : NOCRED;
495	/*
496	 * Disallow multiple mounts of the same device.
497	 * Disallow mounting of a device that is currently in use
498	 * (except for root, which might share swap device for miniroot).
499	 * Flush out any old buffers remaining from a previous use.
500	 */
501	error = vfs_mountedon(devvp);
502	if (error)
503		return (error);
504	ncount = vcount(devvp);
505	if (devvp->v_object)
506		ncount -= 1;
507	if (ncount > 1 && devvp != rootvp)
508		return (EBUSY);
509	if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0))
510		return (error);
511
512	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
513	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
514	if (error)
515		return (error);
516	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
517		size = DEV_BSIZE;
518	else
519		size = dpart.disklab->d_secsize;
520
521	bp = NULL;
522	ump = NULL;
523	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, cred, &bp))
524		goto out;
525	fs = (struct fs *)bp->b_data;
526	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
527	    fs->fs_bsize < sizeof(struct fs)) {
528		error = EINVAL;		/* XXX needs translation */
529		goto out;
530	}
531	fs->fs_fmod = 0;
532	if (!fs->fs_clean) {
533		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
534			printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
535		} else {
536			printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt);
537			error = EPERM;
538			goto out;
539		}
540	}
541	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
542	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
543		error = EROFS;          /* needs translation */
544		goto out;
545	}
546	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
547	bzero((caddr_t)ump, sizeof *ump);
548	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
549	    M_WAITOK);
550	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
551	if (fs->fs_sbsize < SBSIZE)
552		bp->b_flags |= B_INVAL;
553	brelse(bp);
554	bp = NULL;
555	fs = ump->um_fs;
556	fs->fs_ronly = ronly;
557	if (ronly == 0) {
558		fs->fs_fmod = 1;
559		fs->fs_clean = 0;
560	}
561	size = fs->fs_cssize;
562	blks = howmany(size, fs->fs_fsize);
563	if (fs->fs_contigsumsize > 0)
564		size += fs->fs_ncg * sizeof(int32_t);
565	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
566	for (i = 0; i < blks; i += fs->fs_frag) {
567		size = fs->fs_bsize;
568		if (i + fs->fs_frag > blks)
569			size = (blks - i) * fs->fs_fsize;
570		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
571		    cred, &bp)) {
572			free(base, M_UFSMNT);
573			goto out;
574		}
575		bcopy(bp->b_data, space, (u_int)size);
576		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
577		space += size;
578		brelse(bp);
579		bp = NULL;
580	}
581	if (fs->fs_contigsumsize > 0) {
582		fs->fs_maxcluster = lp = (int32_t *)space;
583		for (i = 0; i < fs->fs_ncg; i++)
584			*lp++ = fs->fs_contigsumsize;
585	}
586	mp->mnt_data = (qaddr_t)ump;
587	mp->mnt_stat.f_fsid.val[0] = (long)dev;
588	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
589	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
590	ump->um_mountp = mp;
591	ump->um_dev = dev;
592	ump->um_devvp = devvp;
593	ump->um_nindir = fs->fs_nindir;
594	ump->um_bptrtodb = fs->fs_fsbtodb;
595	ump->um_seqinc = fs->fs_frag;
596	for (i = 0; i < MAXQUOTAS; i++)
597		ump->um_quotas[i] = NULLVP;
598	devvp->v_specflags |= SI_MOUNTEDON;
599	ffs_oldfscompat(fs);
600
601	/*
602	 * Set FS local "last mounted on" information (NULL pad)
603	 */
604	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
605			fs->fs_fsmnt,			/* copy area*/
606			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
607			&strsize);			/* real size*/
608	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
609
610	if( mp->mnt_flag & MNT_ROOTFS) {
611		/*
612		 * Root mount; update timestamp in mount structure.
613		 * this will be used by the common root mount code
614		 * to update the system clock.
615		 */
616		mp->mnt_time = fs->fs_time;
617	}
618
619	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
620	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
621	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
622		fs->fs_maxfilesize = maxfilesize;		/* XXX */
623	if (ronly == 0) {
624		fs->fs_clean = 0;
625		(void) ffs_sbupdate(ump, MNT_WAIT);
626	}
627	/*
628	 * Only VMIO the backing device if the backing device is a real
629	 * block device.  This excludes the original MFS implementation.
630	 * Note that it is optional that the backing device be VMIOed.  This
631	 * increases the opportunity for metadata caching.
632	 */
633	if ((devvp->v_type == VBLK) && (major(devvp->v_rdev) < nblkdev)) {
634		vfs_object_create(devvp, p, p->p_ucred, 0);
635	}
636	return (0);
637out:
638	if (bp)
639		brelse(bp);
640	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
641	if (ump) {
642		free(ump->um_fs, M_UFSMNT);
643		free(ump, M_UFSMNT);
644		mp->mnt_data = (qaddr_t)0;
645	}
646	return (error);
647}
648
649/*
650 * Sanity checks for old file systems.
651 *
652 * XXX - goes away some day.
653 */
654static int
655ffs_oldfscompat(fs)
656	struct fs *fs;
657{
658
659	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
660	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
661	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
662		fs->fs_nrpos = 8;				/* XXX */
663	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
664#if 0
665		int i;						/* XXX */
666		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
667								/* XXX */
668		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
669		for (i = 0; i < NIADDR; i++) {			/* XXX */
670			sizepb *= NINDIR(fs);			/* XXX */
671			fs->fs_maxfilesize += sizepb;		/* XXX */
672		}						/* XXX */
673#endif
674		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
675		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
676		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
677	}							/* XXX */
678	return (0);
679}
680
681/*
682 * unmount system call
683 */
684int
685ffs_unmount(mp, mntflags, p)
686	struct mount *mp;
687	int mntflags;
688	struct proc *p;
689{
690	register struct ufsmount *ump;
691	register struct fs *fs;
692	int error, flags;
693
694	flags = 0;
695	if (mntflags & MNT_FORCE) {
696		flags |= FORCECLOSE;
697	}
698	error = ffs_flushfiles(mp, flags, p);
699	if (error)
700		return (error);
701	ump = VFSTOUFS(mp);
702	fs = ump->um_fs;
703	if (fs->fs_ronly == 0) {
704		fs->fs_clean = 1;
705		error = ffs_sbupdate(ump, MNT_WAIT);
706		if (error) {
707			fs->fs_clean = 0;
708			return (error);
709		}
710	}
711	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
712
713	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
714	vnode_pager_uncache(ump->um_devvp, p);
715	VOP_UNLOCK(ump->um_devvp, 0, p);
716
717	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
718		NOCRED, p);
719
720	vrele(ump->um_devvp);
721
722	free(fs->fs_csp[0], M_UFSMNT);
723	free(fs, M_UFSMNT);
724	free(ump, M_UFSMNT);
725	mp->mnt_data = (qaddr_t)0;
726	return (error);
727}
728
729/*
730 * Flush out all the files in a filesystem.
731 */
732int
733ffs_flushfiles(mp, flags, p)
734	register struct mount *mp;
735	int flags;
736	struct proc *p;
737{
738	register struct ufsmount *ump;
739	int error;
740
741	ump = VFSTOUFS(mp);
742#ifdef QUOTA
743	if (mp->mnt_flag & MNT_QUOTA) {
744		int i;
745		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
746		if (error)
747			return (error);
748		for (i = 0; i < MAXQUOTAS; i++) {
749			if (ump->um_quotas[i] == NULLVP)
750				continue;
751			quotaoff(p, mp, i);
752		}
753		/*
754		 * Here we fall through to vflush again to ensure
755		 * that we have gotten rid of all the system vnodes.
756		 */
757	}
758#endif
759	error = vflush(mp, NULLVP, flags);
760	return (error);
761}
762
763/*
764 * Get file system statistics.
765 */
766int
767ffs_statfs(mp, sbp, p)
768	struct mount *mp;
769	register struct statfs *sbp;
770	struct proc *p;
771{
772	register struct ufsmount *ump;
773	register struct fs *fs;
774
775	ump = VFSTOUFS(mp);
776	fs = ump->um_fs;
777	if (fs->fs_magic != FS_MAGIC)
778		panic("ffs_statfs");
779	sbp->f_bsize = fs->fs_fsize;
780	sbp->f_iosize = fs->fs_bsize;
781	sbp->f_blocks = fs->fs_dsize;
782	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
783		fs->fs_cstotal.cs_nffree;
784	sbp->f_bavail = freespace(fs, fs->fs_minfree);
785	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
786	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
787	if (sbp != &mp->mnt_stat) {
788		sbp->f_type = mp->mnt_vfc->vfc_typenum;
789		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
790			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
791		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
792			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
793	}
794	return (0);
795}
796
797/*
798 * Go through the disk queues to initiate sandbagged IO;
799 * go through the inodes to write those that have been modified;
800 * initiate the writing of the super block if it has been modified.
801 *
802 * Note: we are always called with the filesystem marked `MPBUSY'.
803 */
804int
805ffs_sync(mp, waitfor, cred, p)
806	struct mount *mp;
807	int waitfor;
808	struct ucred *cred;
809	struct proc *p;
810{
811	struct vnode *nvp, *vp;
812	struct inode *ip;
813	struct ufsmount *ump = VFSTOUFS(mp);
814	struct fs *fs;
815	struct timeval tv;
816	int error, allerror = 0;
817
818	fs = ump->um_fs;
819	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
820		printf("fs = %s\n", fs->fs_fsmnt);
821		panic("ffs_sync: rofs mod");
822	}
823	/*
824	 * Write back each (modified) inode.
825	 */
826	simple_lock(&mntvnode_slock);
827loop:
828	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
829		/*
830		 * If the vnode that we are about to sync is no longer
831		 * associated with this mount point, start over.
832		 */
833		if (vp->v_mount != mp)
834			goto loop;
835		simple_lock(&vp->v_interlock);
836		nvp = vp->v_mntvnodes.le_next;
837		ip = VTOI(vp);
838		if (((ip->i_flag &
839		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
840		    vp->v_dirtyblkhd.lh_first == NULL) {
841			simple_unlock(&vp->v_interlock);
842			continue;
843		}
844		if (vp->v_type != VCHR) {
845			simple_unlock(&mntvnode_slock);
846			error =
847			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
848			if (error) {
849				simple_lock(&mntvnode_slock);
850				if (error == ENOENT)
851					goto loop;
852				continue;
853			}
854			if (error = VOP_FSYNC(vp, cred, waitfor, p))
855				allerror = error;
856			VOP_UNLOCK(vp, 0, p);
857			vrele(vp);
858			simple_lock(&mntvnode_slock);
859		} else {
860			simple_unlock(&mntvnode_slock);
861			simple_unlock(&vp->v_interlock);
862			tv = time;
863			/* VOP_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */
864			VOP_UPDATE(vp, &tv, &tv, 0);
865			simple_lock(&mntvnode_slock);
866		}
867	}
868	simple_unlock(&mntvnode_slock);
869	/*
870	 * Force stale file system control information to be flushed.
871	 */
872	error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p);
873	if (error)
874		allerror = error;
875#ifdef QUOTA
876	qsync(mp);
877#endif
878	/*
879	 * Write back modified superblock.
880	 */
881	if (fs->fs_fmod != 0) {
882		fs->fs_fmod = 0;
883		fs->fs_time = time.tv_sec;
884		if (error = ffs_sbupdate(ump, waitfor))
885			allerror = error;
886	}
887	return (allerror);
888}
889
890/*
891 * Look up a FFS dinode number to find its incore vnode, otherwise read it
892 * in from disk.  If it is in core, wait for the lock bit to clear, then
893 * return the inode locked.  Detection and handling of mount points must be
894 * done by the calling routine.
895 */
896static int ffs_inode_hash_lock;
897
898int
899ffs_vget(mp, ino, vpp)
900	struct mount *mp;
901	ino_t ino;
902	struct vnode **vpp;
903{
904	struct fs *fs;
905	struct inode *ip;
906	struct ufsmount *ump;
907	struct buf *bp;
908	struct vnode *vp;
909	dev_t dev;
910	int type, error;
911
912	ump = VFSTOUFS(mp);
913	dev = ump->um_dev;
914restart:
915	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
916		return (0);
917
918	/*
919	 * Lock out the creation of new entries in the FFS hash table in
920	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
921	 * may occur!
922	 */
923	if (ffs_inode_hash_lock) {
924		while (ffs_inode_hash_lock) {
925			ffs_inode_hash_lock = -1;
926			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
927		}
928		goto restart;
929	}
930	ffs_inode_hash_lock = 1;
931
932	/*
933	 * If this MALLOC() is performed after the getnewvnode()
934	 * it might block, leaving a vnode with a NULL v_data to be
935	 * found by ffs_sync() if a sync happens to fire right then,
936	 * which will cause a panic because ffs_sync() blindly
937	 * dereferences vp->v_data (as well it should).
938	 */
939	type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
940	MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
941
942	/* Allocate a new vnode/inode. */
943	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
944	if (error) {
945		if (ffs_inode_hash_lock < 0)
946			wakeup(&ffs_inode_hash_lock);
947		ffs_inode_hash_lock = 0;
948		*vpp = NULL;
949		FREE(ip, type);
950		return (error);
951	}
952	bzero((caddr_t)ip, sizeof(struct inode));
953	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
954	vp->v_data = ip;
955	ip->i_vnode = vp;
956	ip->i_fs = fs = ump->um_fs;
957	ip->i_dev = dev;
958	ip->i_number = ino;
959#ifdef QUOTA
960	{
961		int i;
962		for (i = 0; i < MAXQUOTAS; i++)
963			ip->i_dquot[i] = NODQUOT;
964	}
965#endif
966	/*
967	 * Put it onto its hash chain and lock it so that other requests for
968	 * this inode will block if they arrive while we are sleeping waiting
969	 * for old data structures to be purged or for the contents of the
970	 * disk portion of this inode to be read.
971	 */
972	ufs_ihashins(ip);
973
974	if (ffs_inode_hash_lock < 0)
975		wakeup(&ffs_inode_hash_lock);
976	ffs_inode_hash_lock = 0;
977
978	/* Read in the disk contents for the inode, copy into the inode. */
979	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
980	    (int)fs->fs_bsize, NOCRED, &bp);
981	if (error) {
982		/*
983		 * The inode does not contain anything useful, so it would
984		 * be misleading to leave it on its hash chain. With mode
985		 * still zero, it will be unlinked and returned to the free
986		 * list by vput().
987		 */
988		brelse(bp);
989		vput(vp);
990		*vpp = NULL;
991		return (error);
992	}
993	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
994	bqrelse(bp);
995
996	/*
997	 * Initialize the vnode from the inode, check for aliases.
998	 * Note that the underlying vnode may have changed.
999	 */
1000	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1001	if (error) {
1002		vput(vp);
1003		*vpp = NULL;
1004		return (error);
1005	}
1006	/*
1007	 * Finish inode initialization now that aliasing has been resolved.
1008	 */
1009	ip->i_devvp = ump->um_devvp;
1010	VREF(ip->i_devvp);
1011	/*
1012	 * Set up a generation number for this inode if it does not
1013	 * already have one. This should only happen on old filesystems.
1014	 */
1015	if (ip->i_gen == 0) {
1016		if (++nextgennumber < (u_long)time.tv_sec)
1017			nextgennumber = time.tv_sec;
1018		ip->i_gen = nextgennumber;
1019		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1020			ip->i_flag |= IN_MODIFIED;
1021	}
1022	/*
1023	 * Ensure that uid and gid are correct. This is a temporary
1024	 * fix until fsck has been changed to do the update.
1025	 */
1026	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1027		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1028		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1029	}						/* XXX */
1030
1031	*vpp = vp;
1032	return (0);
1033}
1034
1035/*
1036 * File handle to vnode
1037 *
1038 * Have to be really careful about stale file handles:
1039 * - check that the inode number is valid
1040 * - call ffs_vget() to get the locked inode
1041 * - check for an unallocated inode (i_mode == 0)
1042 * - check that the given client host has export rights and return
1043 *   those rights via. exflagsp and credanonp
1044 */
1045int
1046ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
1047	register struct mount *mp;
1048	struct fid *fhp;
1049	struct mbuf *nam;
1050	struct vnode **vpp;
1051	int *exflagsp;
1052	struct ucred **credanonp;
1053{
1054	register struct ufid *ufhp;
1055	struct fs *fs;
1056
1057	ufhp = (struct ufid *)fhp;
1058	fs = VFSTOUFS(mp)->um_fs;
1059	if (ufhp->ufid_ino < ROOTINO ||
1060	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1061		return (ESTALE);
1062	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
1063}
1064
1065/*
1066 * Vnode pointer to File handle
1067 */
1068/* ARGSUSED */
1069int
1070ffs_vptofh(vp, fhp)
1071	struct vnode *vp;
1072	struct fid *fhp;
1073{
1074	register struct inode *ip;
1075	register struct ufid *ufhp;
1076
1077	ip = VTOI(vp);
1078	ufhp = (struct ufid *)fhp;
1079	ufhp->ufid_len = sizeof(struct ufid);
1080	ufhp->ufid_ino = ip->i_number;
1081	ufhp->ufid_gen = ip->i_gen;
1082	return (0);
1083}
1084
1085/*
1086 * Initialize the filesystem; just use ufs_init.
1087 */
1088static int
1089ffs_init(vfsp)
1090	struct vfsconf *vfsp;
1091{
1092
1093	return (ufs_init(vfsp));
1094}
1095
1096/*
1097 * Write a superblock and associated information back to disk.
1098 */
1099static int
1100ffs_sbupdate(mp, waitfor)
1101	struct ufsmount *mp;
1102	int waitfor;
1103{
1104	register struct fs *dfs, *fs = mp->um_fs;
1105	register struct buf *bp;
1106	int blks;
1107	caddr_t space;
1108	int i, size, error, allerror = 0;
1109
1110	/*
1111	 * First write back the summary information.
1112	 */
1113	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1114	space = (caddr_t)fs->fs_csp[0];
1115	for (i = 0; i < blks; i += fs->fs_frag) {
1116		size = fs->fs_bsize;
1117		if (i + fs->fs_frag > blks)
1118			size = (blks - i) * fs->fs_fsize;
1119		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1120		    size, 0, 0);
1121		bcopy(space, bp->b_data, (u_int)size);
1122		space += size;
1123		if (waitfor != MNT_WAIT)
1124			bawrite(bp);
1125		else if (error = bwrite(bp))
1126			allerror = error;
1127	}
1128	/*
1129	 * Now write back the superblock itself. If any errors occurred
1130	 * up to this point, then fail so that the superblock avoids
1131	 * being written out as clean.
1132	 */
1133	if (allerror)
1134		return (allerror);
1135	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1136	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1137	/* Restore compatibility to old file systems.		   XXX */
1138	dfs = (struct fs *)bp->b_data;				/* XXX */
1139	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1140		dfs->fs_nrpos = -1;				/* XXX */
1141	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1142		int32_t *lp, tmp;				/* XXX */
1143								/* XXX */
1144		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1145		tmp = lp[4];					/* XXX */
1146		for (i = 4; i > 0; i--)				/* XXX */
1147			lp[i] = lp[i-1];			/* XXX */
1148		lp[0] = tmp;					/* XXX */
1149	}							/* XXX */
1150	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1151	if (waitfor != MNT_WAIT)
1152		bawrite(bp);
1153	else if (error = bwrite(bp))
1154		allerror = error;
1155	return (allerror);
1156}
1157