ffs_vfsops.c revision 32071
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34 * $Id: ffs_vfsops.c,v 1.62 1997/11/12 05:42:25 julian Exp $
35 */
36
37#include "opt_quota.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/namei.h>
42#include <sys/proc.h>
43#include <sys/kernel.h>
44#include <sys/vnode.h>
45#include <sys/mount.h>
46#include <sys/buf.h>
47#include <sys/conf.h>
48#include <sys/fcntl.h>
49#include <sys/disklabel.h>
50#include <sys/malloc.h>
51
52#include <miscfs/specfs/specdev.h>
53
54#include <ufs/ufs/quota.h>
55#include <ufs/ufs/ufsmount.h>
56#include <ufs/ufs/inode.h>
57#include <ufs/ufs/ufs_extern.h>
58
59#include <ufs/ffs/fs.h>
60#include <ufs/ffs/ffs_extern.h>
61
62#include <vm/vm.h>
63#include <vm/vm_prot.h>
64#include <vm/vm_page.h>
65#include <vm/vm_extern.h>
66
67static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
68
69static int	ffs_sbupdate __P((struct ufsmount *, int));
70static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
71static int	ffs_oldfscompat __P((struct fs *));
72static int	ffs_mount __P((struct mount *, char *, caddr_t,
73				struct nameidata *, struct proc *));
74static int	ffs_init __P((struct vfsconf *));
75
76struct vfsops ufs_vfsops = {
77	ffs_mount,
78	ufs_start,
79	ffs_unmount,
80	ufs_root,
81	ufs_quotactl,
82	ffs_statfs,
83	ffs_sync,
84	ffs_vget,
85	ffs_fhtovp,
86	ffs_vptofh,
87	ffs_init,
88};
89
90VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0);
91
92/*
93 * ffs_mount
94 *
95 * Called when mounting local physical media
96 *
97 * PARAMETERS:
98 *		mountroot
99 *			mp	mount point structure
100 *			path	NULL (flag for root mount!!!)
101 *			data	<unused>
102 *			ndp	<unused>
103 *			p	process (user credentials check [statfs])
104 *
105 *		mount
106 *			mp	mount point structure
107 *			path	path to mount point
108 *			data	pointer to argument struct in user space
109 *			ndp	mount point namei() return (used for
110 *				credentials on reload), reused to look
111 *				up block device.
112 *			p	process (user credentials check)
113 *
114 * RETURNS:	0	Success
115 *		!0	error number (errno.h)
116 *
117 * LOCK STATE:
118 *
119 *		ENTRY
120 *			mount point is locked
121 *		EXIT
122 *			mount point is locked
123 *
124 * NOTES:
125 *		A NULL path can be used for a flag since the mount
126 *		system call will fail with EFAULT in copyinstr in
127 *		namei() if it is a genuine NULL from the user.
128 */
129static int
130ffs_mount( mp, path, data, ndp, p)
131        struct mount		*mp;	/* mount struct pointer*/
132        char			*path;	/* path to mount point*/
133        caddr_t			data;	/* arguments to FS specific mount*/
134        struct nameidata	*ndp;	/* mount point credentials*/
135        struct proc		*p;	/* process requesting mount*/
136{
137	u_int		size;
138	int		err = 0;
139	struct vnode	*devvp;
140
141	struct ufs_args args;
142	struct ufsmount *ump = 0;
143	register struct fs *fs;
144	int flags;
145
146	/*
147	 * Use NULL path to flag a root mount
148	 */
149	if( path == NULL) {
150		/*
151		 ***
152		 * Mounting root file system
153		 ***
154		 */
155
156		if ((err = bdevvp(rootdev, &rootvp))) {
157			printf("ffs_mountroot: can't find rootvp");
158			return (err);
159		}
160
161		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR)
162			mp->mnt_flag |= MNT_NOCLUSTERR;
163		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERW)
164			mp->mnt_flag |= MNT_NOCLUSTERW;
165		if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) {
166			/* fs specific cleanup (if any)*/
167			goto error_1;
168		}
169
170		goto dostatfs;		/* success*/
171
172	}
173
174	/*
175	 ***
176	 * Mounting non-root file system or updating a file system
177	 ***
178	 */
179
180	/* copy in user arguments*/
181	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
182	if (err)
183		goto error_1;		/* can't get arguments*/
184
185	/*
186	 * If updating, check whether changing from read-only to
187	 * read/write; if there is no device name, that's all we do.
188	 * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags,
189	 * if block device requests.
190	 */
191	if (mp->mnt_flag & MNT_UPDATE) {
192		ump = VFSTOUFS(mp);
193		fs = ump->um_fs;
194		err = 0;
195		if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERR)
196			mp->mnt_flag |= MNT_NOCLUSTERR;
197		if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERW)
198			mp->mnt_flag |= MNT_NOCLUSTERW;
199		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
200			flags = WRITECLOSE;
201			if (mp->mnt_flag & MNT_FORCE)
202				flags |= FORCECLOSE;
203			err = ffs_flushfiles(mp, flags, p);
204		}
205		if (!err && (mp->mnt_flag & MNT_RELOAD))
206			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
207		if (err) {
208			goto error_1;
209		}
210		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
211			if (!fs->fs_clean) {
212				if (mp->mnt_flag & MNT_FORCE) {
213					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
214				} else {
215					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
216					    fs->fs_fsmnt);
217					err = EPERM;
218					goto error_1;
219				}
220			}
221			fs->fs_ronly = 0;
222		}
223		if (fs->fs_ronly == 0) {
224			fs->fs_clean = 0;
225			ffs_sbupdate(ump, MNT_WAIT);
226		}
227		/* if not updating name...*/
228		if (args.fspec == 0) {
229			/*
230			 * Process export requests.  Jumping to "success"
231			 * will return the vfs_export() error code.
232			 */
233			err = vfs_export(mp, &ump->um_export, &args.export);
234			goto success;
235		}
236	}
237
238	/*
239	 * Not an update, or updating the name: look up the name
240	 * and verify that it refers to a sensible block device.
241	 */
242	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
243	err = namei(ndp);
244	if (err) {
245		/* can't get devvp!*/
246		goto error_1;
247	}
248
249	devvp = ndp->ni_vp;
250
251	if (devvp->v_type != VBLK) {
252		err = ENOTBLK;
253		goto error_2;
254	}
255	if (major(devvp->v_rdev) >= nblkdev) {
256		err = ENXIO;
257		goto error_2;
258	}
259	if (mp->mnt_flag & MNT_UPDATE) {
260		/*
261		 ********************
262		 * UPDATE
263		 ********************
264		 */
265
266		if (devvp != ump->um_devvp)
267			err = EINVAL;	/* needs translation */
268		else
269			vrele(devvp);
270		/*
271		 * Update device name only on success
272		 */
273		if( !err) {
274			/* Save "mounted from" info for mount point (NULL pad)*/
275			copyinstr(	args.fspec,
276					mp->mnt_stat.f_mntfromname,
277					MNAMELEN - 1,
278					&size);
279			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
280		}
281	} else {
282		/*
283		 ********************
284		 * NEW MOUNT
285		 ********************
286		 */
287
288		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR)
289			mp->mnt_flag |= MNT_NOCLUSTERR;
290		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERW)
291			mp->mnt_flag |= MNT_NOCLUSTERW;
292
293		/*
294		 * Since this is a new mount, we want the names for
295		 * the device and the mount point copied in.  If an
296		 * error occurs,  the mountpoint is discarded by the
297		 * upper level code.
298		 */
299		/* Save "last mounted on" info for mount point (NULL pad)*/
300		copyinstr(	path,				/* mount point*/
301				mp->mnt_stat.f_mntonname,	/* save area*/
302				MNAMELEN - 1,			/* max size*/
303				&size);				/* real size*/
304		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
305
306		/* Save "mounted from" info for mount point (NULL pad)*/
307		copyinstr(	args.fspec,			/* device name*/
308				mp->mnt_stat.f_mntfromname,	/* save area*/
309				MNAMELEN - 1,			/* max size*/
310				&size);				/* real size*/
311		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
312
313		err = ffs_mountfs(devvp, mp, p, M_FFSNODE);
314	}
315	if (err) {
316		goto error_2;
317	}
318
319dostatfs:
320	/*
321	 * Initialize FS stat information in mount struct; uses both
322	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
323	 *
324	 * This code is common to root and non-root mounts
325	 */
326	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
327
328	goto success;
329
330
331error_2:	/* error with devvp held*/
332
333	/* release devvp before failing*/
334	vrele(devvp);
335
336error_1:	/* no state to back out*/
337
338success:
339	return( err);
340}
341
342/*
343 * Reload all incore data for a filesystem (used after running fsck on
344 * the root filesystem and finding things to fix). The filesystem must
345 * be mounted read-only.
346 *
347 * Things to do to update the mount:
348 *	1) invalidate all cached meta-data.
349 *	2) re-read superblock from disk.
350 *	3) re-read summary information from disk.
351 *	4) invalidate all inactive vnodes.
352 *	5) invalidate all cached file data.
353 *	6) re-read inode data for all active vnodes.
354 */
355static int
356ffs_reload(mp, cred, p)
357	register struct mount *mp;
358	struct ucred *cred;
359	struct proc *p;
360{
361	register struct vnode *vp, *nvp, *devvp;
362	struct inode *ip;
363	struct csum *space;
364	struct buf *bp;
365	struct fs *fs, *newfs;
366	struct partinfo dpart;
367	int i, blks, size, error;
368	int32_t *lp;
369
370	if ((mp->mnt_flag & MNT_RDONLY) == 0)
371		return (EINVAL);
372	/*
373	 * Step 1: invalidate all cached meta-data.
374	 */
375	devvp = VFSTOUFS(mp)->um_devvp;
376	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
377		panic("ffs_reload: dirty1");
378	/*
379	 * Step 2: re-read superblock from disk.
380	 */
381	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
382		size = DEV_BSIZE;
383	else
384		size = dpart.disklab->d_secsize;
385	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp))
386		return (error);
387	newfs = (struct fs *)bp->b_data;
388	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
389		newfs->fs_bsize < sizeof(struct fs)) {
390			brelse(bp);
391			return (EIO);		/* XXX needs translation */
392	}
393	fs = VFSTOUFS(mp)->um_fs;
394	/*
395	 * Copy pointer fields back into superblock before copying in	XXX
396	 * new superblock. These should really be in the ufsmount.	XXX
397	 * Note that important parameters (eg fs_ncg) are unchanged.
398	 */
399	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
400	newfs->fs_maxcluster = fs->fs_maxcluster;
401	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
402	if (fs->fs_sbsize < SBSIZE)
403		bp->b_flags |= B_INVAL;
404	brelse(bp);
405	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
406	ffs_oldfscompat(fs);
407
408	/*
409	 * Step 3: re-read summary information from disk.
410	 */
411	blks = howmany(fs->fs_cssize, fs->fs_fsize);
412	space = fs->fs_csp[0];
413	for (i = 0; i < blks; i += fs->fs_frag) {
414		size = fs->fs_bsize;
415		if (i + fs->fs_frag > blks)
416			size = (blks - i) * fs->fs_fsize;
417		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
418		    NOCRED, &bp);
419		if (error)
420			return (error);
421		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
422		brelse(bp);
423	}
424	/*
425	 * We no longer know anything about clusters per cylinder group.
426	 */
427	if (fs->fs_contigsumsize > 0) {
428		lp = fs->fs_maxcluster;
429		for (i = 0; i < fs->fs_ncg; i++)
430			*lp++ = fs->fs_contigsumsize;
431	}
432
433loop:
434	simple_lock(&mntvnode_slock);
435	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
436		if (vp->v_mount != mp) {
437			simple_unlock(&mntvnode_slock);
438			goto loop;
439		}
440		nvp = vp->v_mntvnodes.le_next;
441		/*
442		 * Step 4: invalidate all inactive vnodes.
443		 */
444		if (vrecycle(vp, &mntvnode_slock, p))
445			goto loop;
446		/*
447		 * Step 5: invalidate all cached file data.
448		 */
449		simple_lock(&vp->v_interlock);
450		simple_unlock(&mntvnode_slock);
451		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
452			goto loop;
453		}
454		if (vinvalbuf(vp, 0, cred, p, 0, 0))
455			panic("ffs_reload: dirty2");
456		/*
457		 * Step 6: re-read inode data for all active vnodes.
458		 */
459		ip = VTOI(vp);
460		error =
461		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
462		    (int)fs->fs_bsize, NOCRED, &bp);
463		if (error) {
464			vput(vp);
465			return (error);
466		}
467		ip->i_din = *((struct dinode *)bp->b_data +
468		    ino_to_fsbo(fs, ip->i_number));
469		brelse(bp);
470		vput(vp);
471		simple_lock(&mntvnode_slock);
472	}
473	simple_unlock(&mntvnode_slock);
474	return (0);
475}
476
477/*
478 * Common code for mount and mountroot
479 */
480int
481ffs_mountfs(devvp, mp, p, malloctype)
482	register struct vnode *devvp;
483	struct mount *mp;
484	struct proc *p;
485	struct malloc_type *malloctype;
486{
487	register struct ufsmount *ump;
488	struct buf *bp;
489	register struct fs *fs;
490	dev_t dev;
491	struct partinfo dpart;
492	caddr_t base, space;
493	int error, i, blks, size, ronly;
494	int32_t *lp;
495	struct ucred *cred;
496	u_int64_t maxfilesize;					/* XXX */
497	u_int strsize;
498	int ncount;
499
500	dev = devvp->v_rdev;
501	cred = p ? p->p_ucred : NOCRED;
502	/*
503	 * Disallow multiple mounts of the same device.
504	 * Disallow mounting of a device that is currently in use
505	 * (except for root, which might share swap device for miniroot).
506	 * Flush out any old buffers remaining from a previous use.
507	 */
508	error = vfs_mountedon(devvp);
509	if (error)
510		return (error);
511	ncount = vcount(devvp);
512	if (devvp->v_object)
513		ncount -= 1;
514	if (ncount > 1 && devvp != rootvp)
515		return (EBUSY);
516	if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0))
517		return (error);
518
519	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
520	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
521	if (error)
522		return (error);
523	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
524		size = DEV_BSIZE;
525	else
526		size = dpart.disklab->d_secsize;
527
528	bp = NULL;
529	ump = NULL;
530	if (error = bread(devvp, SBLOCK, SBSIZE, cred, &bp))
531		goto out;
532	fs = (struct fs *)bp->b_data;
533	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
534	    fs->fs_bsize < sizeof(struct fs)) {
535		error = EINVAL;		/* XXX needs translation */
536		goto out;
537	}
538	fs->fs_fmod = 0;
539	if (!fs->fs_clean) {
540		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
541			printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
542		} else {
543			printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt);
544			error = EPERM;
545			goto out;
546		}
547	}
548	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
549	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
550		error = EROFS;          /* needs translation */
551		goto out;
552	}
553	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
554	bzero((caddr_t)ump, sizeof *ump);
555	ump->um_malloctype = malloctype;
556	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
557	    M_WAITOK);
558	ump->um_blkatoff = ffs_blkatoff;
559	ump->um_truncate = ffs_truncate;
560	ump->um_update = ffs_update;
561	ump->um_valloc = ffs_valloc;
562	ump->um_vfree = ffs_vfree;
563	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
564	if (fs->fs_sbsize < SBSIZE)
565		bp->b_flags |= B_INVAL;
566	brelse(bp);
567	bp = NULL;
568	fs = ump->um_fs;
569	fs->fs_ronly = ronly;
570	if (ronly == 0) {
571		fs->fs_fmod = 1;
572		fs->fs_clean = 0;
573	}
574	size = fs->fs_cssize;
575	blks = howmany(size, fs->fs_fsize);
576	if (fs->fs_contigsumsize > 0)
577		size += fs->fs_ncg * sizeof(int32_t);
578	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
579	for (i = 0; i < blks; i += fs->fs_frag) {
580		size = fs->fs_bsize;
581		if (i + fs->fs_frag > blks)
582			size = (blks - i) * fs->fs_fsize;
583		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
584		    cred, &bp)) {
585			free(base, M_UFSMNT);
586			goto out;
587		}
588		bcopy(bp->b_data, space, (u_int)size);
589		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
590		space += size;
591		brelse(bp);
592		bp = NULL;
593	}
594	if (fs->fs_contigsumsize > 0) {
595		fs->fs_maxcluster = lp = (int32_t *)space;
596		for (i = 0; i < fs->fs_ncg; i++)
597			*lp++ = fs->fs_contigsumsize;
598	}
599	mp->mnt_data = (qaddr_t)ump;
600	mp->mnt_stat.f_fsid.val[0] = (long)dev;
601	if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0)
602		mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
603	else
604		mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
605	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
606	mp->mnt_flag |= MNT_LOCAL;
607	ump->um_mountp = mp;
608	ump->um_dev = dev;
609	ump->um_devvp = devvp;
610	ump->um_nindir = fs->fs_nindir;
611	ump->um_bptrtodb = fs->fs_fsbtodb;
612	ump->um_seqinc = fs->fs_frag;
613	for (i = 0; i < MAXQUOTAS; i++)
614		ump->um_quotas[i] = NULLVP;
615	devvp->v_specflags |= SI_MOUNTEDON;
616	ffs_oldfscompat(fs);
617
618	/*
619	 * Set FS local "last mounted on" information (NULL pad)
620	 */
621	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
622			fs->fs_fsmnt,			/* copy area*/
623			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
624			&strsize);			/* real size*/
625	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
626
627	if( mp->mnt_flag & MNT_ROOTFS) {
628		/*
629		 * Root mount; update timestamp in mount structure.
630		 * this will be used by the common root mount code
631		 * to update the system clock.
632		 */
633		mp->mnt_time = fs->fs_time;
634	}
635
636	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
637	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
638	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
639		fs->fs_maxfilesize = maxfilesize;		/* XXX */
640	if (ronly == 0) {
641		fs->fs_clean = 0;
642		(void) ffs_sbupdate(ump, MNT_WAIT);
643	}
644	/*
645	 * Only VMIO the backing device if the backing device is a real
646	 * block device.  This excludes the original MFS implementation.
647	 * Note that it is optional that the backing device be VMIOed.  This
648	 * increases the opportunity for metadata caching.
649	 */
650	if ((devvp->v_type == VBLK) && (major(devvp->v_rdev) < nblkdev)) {
651		vfs_object_create(devvp, p, p->p_ucred, 0);
652	}
653	return (0);
654out:
655	if (bp)
656		brelse(bp);
657	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
658	if (ump) {
659		free(ump->um_fs, M_UFSMNT);
660		free(ump, M_UFSMNT);
661		mp->mnt_data = (qaddr_t)0;
662	}
663	return (error);
664}
665
666/*
667 * Sanity checks for old file systems.
668 *
669 * XXX - goes away some day.
670 */
671static int
672ffs_oldfscompat(fs)
673	struct fs *fs;
674{
675
676	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
677	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
678	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
679		fs->fs_nrpos = 8;				/* XXX */
680	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
681#if 0
682		int i;						/* XXX */
683		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
684								/* XXX */
685		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
686		for (i = 0; i < NIADDR; i++) {			/* XXX */
687			sizepb *= NINDIR(fs);			/* XXX */
688			fs->fs_maxfilesize += sizepb;		/* XXX */
689		}						/* XXX */
690#endif
691		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
692		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
693		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
694	}							/* XXX */
695	return (0);
696}
697
698/*
699 * unmount system call
700 */
701int
702ffs_unmount(mp, mntflags, p)
703	struct mount *mp;
704	int mntflags;
705	struct proc *p;
706{
707	register struct ufsmount *ump;
708	register struct fs *fs;
709	int error, flags;
710
711	flags = 0;
712	if (mntflags & MNT_FORCE) {
713		flags |= FORCECLOSE;
714	}
715	error = ffs_flushfiles(mp, flags, p);
716	if (error)
717		return (error);
718	ump = VFSTOUFS(mp);
719	fs = ump->um_fs;
720	if (fs->fs_ronly == 0) {
721		fs->fs_clean = 1;
722		error = ffs_sbupdate(ump, MNT_WAIT);
723		if (error) {
724			fs->fs_clean = 0;
725			return (error);
726		}
727	}
728	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
729
730	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
731		NOCRED, p);
732
733	vrele(ump->um_devvp);
734
735	free(fs->fs_csp[0], M_UFSMNT);
736	free(fs, M_UFSMNT);
737	free(ump, M_UFSMNT);
738	mp->mnt_data = (qaddr_t)0;
739	mp->mnt_flag &= ~MNT_LOCAL;
740	return (error);
741}
742
743/*
744 * Flush out all the files in a filesystem.
745 */
746int
747ffs_flushfiles(mp, flags, p)
748	register struct mount *mp;
749	int flags;
750	struct proc *p;
751{
752	register struct ufsmount *ump;
753	int error;
754
755	ump = VFSTOUFS(mp);
756#ifdef QUOTA
757	if (mp->mnt_flag & MNT_QUOTA) {
758		int i;
759		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
760		if (error)
761			return (error);
762		for (i = 0; i < MAXQUOTAS; i++) {
763			if (ump->um_quotas[i] == NULLVP)
764				continue;
765			quotaoff(p, mp, i);
766		}
767		/*
768		 * Here we fall through to vflush again to ensure
769		 * that we have gotten rid of all the system vnodes.
770		 */
771	}
772#endif
773	error = vflush(mp, NULLVP, flags);
774	return (error);
775}
776
777/*
778 * Get file system statistics.
779 */
780int
781ffs_statfs(mp, sbp, p)
782	struct mount *mp;
783	register struct statfs *sbp;
784	struct proc *p;
785{
786	register struct ufsmount *ump;
787	register struct fs *fs;
788
789	ump = VFSTOUFS(mp);
790	fs = ump->um_fs;
791	if (fs->fs_magic != FS_MAGIC)
792		panic("ffs_statfs");
793	sbp->f_bsize = fs->fs_fsize;
794	sbp->f_iosize = fs->fs_bsize;
795	sbp->f_blocks = fs->fs_dsize;
796	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
797		fs->fs_cstotal.cs_nffree;
798	sbp->f_bavail = freespace(fs, fs->fs_minfree);
799	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
800	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
801	if (sbp != &mp->mnt_stat) {
802		sbp->f_type = mp->mnt_vfc->vfc_typenum;
803		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
804			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
805		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
806			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
807	}
808	return (0);
809}
810
811/*
812 * Go through the disk queues to initiate sandbagged IO;
813 * go through the inodes to write those that have been modified;
814 * initiate the writing of the super block if it has been modified.
815 *
816 * Note: we are always called with the filesystem marked `MPBUSY'.
817 */
818int
819ffs_sync(mp, waitfor, cred, p)
820	struct mount *mp;
821	int waitfor;
822	struct ucred *cred;
823	struct proc *p;
824{
825	struct vnode *nvp, *vp;
826	struct inode *ip;
827	struct ufsmount *ump = VFSTOUFS(mp);
828	struct fs *fs;
829	struct timeval tv;
830	int error, allerror = 0;
831
832	fs = ump->um_fs;
833	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
834		printf("fs = %s\n", fs->fs_fsmnt);
835		panic("ffs_sync: rofs mod");
836	}
837	/*
838	 * Write back each (modified) inode.
839	 */
840	simple_lock(&mntvnode_slock);
841loop:
842	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
843		/*
844		 * If the vnode that we are about to sync is no longer
845		 * associated with this mount point, start over.
846		 */
847		if (vp->v_mount != mp)
848			goto loop;
849		simple_lock(&vp->v_interlock);
850		nvp = vp->v_mntvnodes.le_next;
851		ip = VTOI(vp);
852		if (((ip->i_flag &
853		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
854		    vp->v_dirtyblkhd.lh_first == NULL) {
855			simple_unlock(&vp->v_interlock);
856			continue;
857		}
858		if (vp->v_type != VCHR) {
859			simple_unlock(&mntvnode_slock);
860			error =
861			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
862			if (error) {
863				simple_lock(&mntvnode_slock);
864				if (error == ENOENT)
865					goto loop;
866				continue;
867			}
868			if (error = VOP_FSYNC(vp, cred, waitfor, p))
869				allerror = error;
870			VOP_UNLOCK(vp, 0, p);
871			vrele(vp);
872			simple_lock(&mntvnode_slock);
873		} else {
874			simple_unlock(&mntvnode_slock);
875			simple_unlock(&vp->v_interlock);
876			gettime(&tv);
877			/* UFS_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */
878			UFS_UPDATE(vp, &tv, &tv, 0);
879			simple_lock(&mntvnode_slock);
880		}
881	}
882	simple_unlock(&mntvnode_slock);
883	/*
884	 * Force stale file system control information to be flushed.
885	 */
886	error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p);
887	if (error)
888		allerror = error;
889#ifdef QUOTA
890	qsync(mp);
891#endif
892	/*
893	 * Write back modified superblock.
894	 */
895	if (fs->fs_fmod != 0) {
896		fs->fs_fmod = 0;
897		fs->fs_time = time.tv_sec;
898		if (error = ffs_sbupdate(ump, waitfor))
899			allerror = error;
900	}
901	return (allerror);
902}
903
904/*
905 * Look up a FFS dinode number to find its incore vnode, otherwise read it
906 * in from disk.  If it is in core, wait for the lock bit to clear, then
907 * return the inode locked.  Detection and handling of mount points must be
908 * done by the calling routine.
909 */
910static int ffs_inode_hash_lock;
911
912int
913ffs_vget(mp, ino, vpp)
914	struct mount *mp;
915	ino_t ino;
916	struct vnode **vpp;
917{
918	struct fs *fs;
919	struct inode *ip;
920	struct ufsmount *ump;
921	struct buf *bp;
922	struct vnode *vp;
923	dev_t dev;
924	int error;
925
926	ump = VFSTOUFS(mp);
927	dev = ump->um_dev;
928restart:
929	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
930		return (0);
931
932	/*
933	 * Lock out the creation of new entries in the FFS hash table in
934	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
935	 * may occur!
936	 */
937	if (ffs_inode_hash_lock) {
938		while (ffs_inode_hash_lock) {
939			ffs_inode_hash_lock = -1;
940			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
941		}
942		goto restart;
943	}
944	ffs_inode_hash_lock = 1;
945
946	/*
947	 * If this MALLOC() is performed after the getnewvnode()
948	 * it might block, leaving a vnode with a NULL v_data to be
949	 * found by ffs_sync() if a sync happens to fire right then,
950	 * which will cause a panic because ffs_sync() blindly
951	 * dereferences vp->v_data (as well it should).
952	 */
953	MALLOC(ip, struct inode *, sizeof(struct inode),
954	    ump->um_malloctype, M_WAITOK);
955
956	/* Allocate a new vnode/inode. */
957	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
958	if (error) {
959		if (ffs_inode_hash_lock < 0)
960			wakeup(&ffs_inode_hash_lock);
961		ffs_inode_hash_lock = 0;
962		*vpp = NULL;
963		FREE(ip, ump->um_malloctype);
964		return (error);
965	}
966	bzero((caddr_t)ip, sizeof(struct inode));
967	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
968	vp->v_data = ip;
969	ip->i_vnode = vp;
970	ip->i_fs = fs = ump->um_fs;
971	ip->i_dev = dev;
972	ip->i_number = ino;
973#ifdef QUOTA
974	{
975		int i;
976		for (i = 0; i < MAXQUOTAS; i++)
977			ip->i_dquot[i] = NODQUOT;
978	}
979#endif
980	/*
981	 * Put it onto its hash chain and lock it so that other requests for
982	 * this inode will block if they arrive while we are sleeping waiting
983	 * for old data structures to be purged or for the contents of the
984	 * disk portion of this inode to be read.
985	 */
986	ufs_ihashins(ip);
987
988	if (ffs_inode_hash_lock < 0)
989		wakeup(&ffs_inode_hash_lock);
990	ffs_inode_hash_lock = 0;
991
992	/* Read in the disk contents for the inode, copy into the inode. */
993	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
994	    (int)fs->fs_bsize, NOCRED, &bp);
995	if (error) {
996		/*
997		 * The inode does not contain anything useful, so it would
998		 * be misleading to leave it on its hash chain. With mode
999		 * still zero, it will be unlinked and returned to the free
1000		 * list by vput().
1001		 */
1002		brelse(bp);
1003		vput(vp);
1004		*vpp = NULL;
1005		return (error);
1006	}
1007	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
1008	bqrelse(bp);
1009
1010	/*
1011	 * Initialize the vnode from the inode, check for aliases.
1012	 * Note that the underlying vnode may have changed.
1013	 */
1014	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1015	if (error) {
1016		vput(vp);
1017		*vpp = NULL;
1018		return (error);
1019	}
1020	/*
1021	 * Finish inode initialization now that aliasing has been resolved.
1022	 */
1023	ip->i_devvp = ump->um_devvp;
1024	VREF(ip->i_devvp);
1025	/*
1026	 * Set up a generation number for this inode if it does not
1027	 * already have one. This should only happen on old filesystems.
1028	 */
1029	if (ip->i_gen == 0) {
1030		ip->i_gen = random() / 2 + 1;
1031		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1032			ip->i_flag |= IN_MODIFIED;
1033	}
1034	/*
1035	 * Ensure that uid and gid are correct. This is a temporary
1036	 * fix until fsck has been changed to do the update.
1037	 */
1038	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1039		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1040		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1041	}						/* XXX */
1042
1043	*vpp = vp;
1044	return (0);
1045}
1046
1047/*
1048 * File handle to vnode
1049 *
1050 * Have to be really careful about stale file handles:
1051 * - check that the inode number is valid
1052 * - call ffs_vget() to get the locked inode
1053 * - check for an unallocated inode (i_mode == 0)
1054 * - check that the given client host has export rights and return
1055 *   those rights via. exflagsp and credanonp
1056 */
1057int
1058ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
1059	register struct mount *mp;
1060	struct fid *fhp;
1061	struct sockaddr *nam;
1062	struct vnode **vpp;
1063	int *exflagsp;
1064	struct ucred **credanonp;
1065{
1066	register struct ufid *ufhp;
1067	struct fs *fs;
1068
1069	ufhp = (struct ufid *)fhp;
1070	fs = VFSTOUFS(mp)->um_fs;
1071	if (ufhp->ufid_ino < ROOTINO ||
1072	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1073		return (ESTALE);
1074	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
1075}
1076
1077/*
1078 * Vnode pointer to File handle
1079 */
1080/* ARGSUSED */
1081int
1082ffs_vptofh(vp, fhp)
1083	struct vnode *vp;
1084	struct fid *fhp;
1085{
1086	register struct inode *ip;
1087	register struct ufid *ufhp;
1088
1089	ip = VTOI(vp);
1090	ufhp = (struct ufid *)fhp;
1091	ufhp->ufid_len = sizeof(struct ufid);
1092	ufhp->ufid_ino = ip->i_number;
1093	ufhp->ufid_gen = ip->i_gen;
1094	return (0);
1095}
1096
1097/*
1098 * Initialize the filesystem; just use ufs_init.
1099 */
1100static int
1101ffs_init(vfsp)
1102	struct vfsconf *vfsp;
1103{
1104
1105	return (ufs_init(vfsp));
1106}
1107
1108/*
1109 * Write a superblock and associated information back to disk.
1110 */
1111static int
1112ffs_sbupdate(mp, waitfor)
1113	struct ufsmount *mp;
1114	int waitfor;
1115{
1116	register struct fs *dfs, *fs = mp->um_fs;
1117	register struct buf *bp;
1118	int blks;
1119	caddr_t space;
1120	int i, size, error, allerror = 0;
1121
1122	/*
1123	 * First write back the summary information.
1124	 */
1125	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1126	space = (caddr_t)fs->fs_csp[0];
1127	for (i = 0; i < blks; i += fs->fs_frag) {
1128		size = fs->fs_bsize;
1129		if (i + fs->fs_frag > blks)
1130			size = (blks - i) * fs->fs_fsize;
1131		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1132		    size, 0, 0);
1133		bcopy(space, bp->b_data, (u_int)size);
1134		space += size;
1135		if (waitfor != MNT_WAIT)
1136			bawrite(bp);
1137		else if (error = bwrite(bp))
1138			allerror = error;
1139	}
1140	/*
1141	 * Now write back the superblock itself. If any errors occurred
1142	 * up to this point, then fail so that the superblock avoids
1143	 * being written out as clean.
1144	 */
1145	if (allerror)
1146		return (allerror);
1147	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1148	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1149	/* Restore compatibility to old file systems.		   XXX */
1150	dfs = (struct fs *)bp->b_data;				/* XXX */
1151	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1152		dfs->fs_nrpos = -1;				/* XXX */
1153	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1154		int32_t *lp, tmp;				/* XXX */
1155								/* XXX */
1156		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1157		tmp = lp[4];					/* XXX */
1158		for (i = 4; i > 0; i--)				/* XXX */
1159			lp[i] = lp[i-1];			/* XXX */
1160		lp[0] = tmp;					/* XXX */
1161	}							/* XXX */
1162	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1163	if (waitfor != MNT_WAIT)
1164		bawrite(bp);
1165	else if (error = bwrite(bp))
1166		allerror = error;
1167	return (allerror);
1168}
1169