ffs_vfsops.c revision 24203
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34 * $Id: ffs_vfsops.c,v 1.51 1997/03/23 20:08:19 guido Exp $
35 */
36
37#include "opt_quota.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/namei.h>
42#include <sys/proc.h>
43#include <sys/kernel.h>
44#include <sys/vnode.h>
45#include <sys/socket.h>
46#include <sys/mount.h>
47#include <sys/buf.h>
48#include <sys/mbuf.h>
49#include <sys/fcntl.h>
50#include <sys/disklabel.h>
51#include <sys/errno.h>
52#include <sys/malloc.h>
53
54#include <miscfs/specfs/specdev.h>
55
56#include <ufs/ufs/quota.h>
57#include <ufs/ufs/ufsmount.h>
58#include <ufs/ufs/inode.h>
59#include <ufs/ufs/ufs_extern.h>
60
61#include <ufs/ffs/fs.h>
62#include <ufs/ffs/ffs_extern.h>
63
64#include <vm/vm.h>
65#include <vm/vm_param.h>
66#include <vm/vm_prot.h>
67#include <vm/vm_page.h>
68#include <vm/vm_object.h>
69#include <vm/vm_extern.h>
70
71static int	ffs_sbupdate __P((struct ufsmount *, int));
72static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
73static int	ffs_oldfscompat __P((struct fs *));
74static int	ffs_mount __P((struct mount *, char *, caddr_t,
75				struct nameidata *, struct proc *));
76static int	ffs_init __P((struct vfsconf *));
77
78struct vfsops ufs_vfsops = {
79	ffs_mount,
80	ufs_start,
81	ffs_unmount,
82	ufs_root,
83	ufs_quotactl,
84	ffs_statfs,
85	ffs_sync,
86	ffs_vget,
87	ffs_fhtovp,
88	ffs_vptofh,
89	ffs_init,
90};
91
92VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0);
93
94/*
95 * ffs_mount
96 *
97 * Called when mounting local physical media
98 *
99 * PARAMETERS:
100 *		mountroot
101 *			mp	mount point structure
102 *			path	NULL (flag for root mount!!!)
103 *			data	<unused>
104 *			ndp	<unused>
105 *			p	process (user credentials check [statfs])
106 *
107 *		mount
108 *			mp	mount point structure
109 *			path	path to mount point
110 *			data	pointer to argument struct in user space
111 *			ndp	mount point namei() return (used for
112 *				credentials on reload), reused to look
113 *				up block device.
114 *			p	process (user credentials check)
115 *
116 * RETURNS:	0	Success
117 *		!0	error number (errno.h)
118 *
119 * LOCK STATE:
120 *
121 *		ENTRY
122 *			mount point is locked
123 *		EXIT
124 *			mount point is locked
125 *
126 * NOTES:
127 *		A NULL path can be used for a flag since the mount
128 *		system call will fail with EFAULT in copyinstr in
129 *		namei() if it is a genuine NULL from the user.
130 */
131static int
132ffs_mount( mp, path, data, ndp, p)
133        struct mount		*mp;	/* mount struct pointer*/
134        char			*path;	/* path to mount point*/
135        caddr_t			data;	/* arguments to FS specific mount*/
136        struct nameidata	*ndp;	/* mount point credentials*/
137        struct proc		*p;	/* process requesting mount*/
138{
139	u_int		size;
140	int		err = 0;
141	struct vnode	*devvp;
142
143	struct ufs_args args;
144	struct ufsmount *ump = 0;
145	register struct fs *fs;
146	int flags;
147
148	/*
149	 * Use NULL path to flag a root mount
150	 */
151	if( path == NULL) {
152		/*
153		 ***
154		 * Mounting root file system
155		 ***
156		 */
157
158		/* Get vnode for root device*/
159		if ((err = bdevvp( rootdev, &rootvp))) {
160			printf("ffs_mountroot: can't setup bdevvp for root");
161			return (err);
162		}
163
164		/*
165		 * Attempt mount
166		 */
167		if( ( err = ffs_mountfs(rootvp, mp, p)) != 0) {
168			/* fs specific cleanup (if any)*/
169			goto error_1;
170		}
171
172		goto dostatfs;		/* success*/
173
174	}
175
176	/*
177	 ***
178	 * Mounting non-root file system or updating a file system
179	 ***
180	 */
181
182	/* copy in user arguments*/
183	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
184	if (err)
185		goto error_1;		/* can't get arguments*/
186
187	/*
188	 * If updating, check whether changing from read-only to
189	 * read/write; if there is no device name, that's all we do.
190	 */
191	if (mp->mnt_flag & MNT_UPDATE) {
192		ump = VFSTOUFS(mp);
193		fs = ump->um_fs;
194		err = 0;
195		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
196			flags = WRITECLOSE;
197			if (mp->mnt_flag & MNT_FORCE)
198				flags |= FORCECLOSE;
199			err = ffs_flushfiles(mp, flags, p);
200		}
201		if (!err && (mp->mnt_flag & MNT_RELOAD))
202			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
203		if (err) {
204			goto error_1;
205		}
206		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
207			if (!fs->fs_clean) {
208				if (mp->mnt_flag & MNT_FORCE) {
209					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
210				} else {
211					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
212					    fs->fs_fsmnt);
213					err = EPERM;
214					goto error_1;
215				}
216			}
217			fs->fs_ronly = 0;
218		}
219		if (fs->fs_ronly == 0) {
220			fs->fs_clean = 0;
221			ffs_sbupdate(ump, MNT_WAIT);
222		}
223		/* if not updating name...*/
224		if (args.fspec == 0) {
225			/*
226			 * Process export requests.  Jumping to "success"
227			 * will return the vfs_export() error code.
228			 */
229			err = vfs_export(mp, &ump->um_export, &args.export);
230			goto success;
231		}
232	}
233
234	/*
235	 * Not an update, or updating the name: look up the name
236	 * and verify that it refers to a sensible block device.
237	 */
238	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
239	err = namei(ndp);
240	if (err) {
241		/* can't get devvp!*/
242		goto error_1;
243	}
244
245	devvp = ndp->ni_vp;
246
247	if (devvp->v_type != VBLK) {
248		err = ENOTBLK;
249		goto error_2;
250	}
251	if (major(devvp->v_rdev) >= nblkdev) {
252		err = ENXIO;
253		goto error_2;
254	}
255	if (mp->mnt_flag & MNT_UPDATE) {
256		/*
257		 ********************
258		 * UPDATE
259		 ********************
260		 */
261
262		if (devvp != ump->um_devvp)
263			err = EINVAL;	/* needs translation */
264		else
265			vrele(devvp);
266		/*
267		 * Update device name only on success
268		 */
269		if( !err) {
270			/* Save "mounted from" info for mount point (NULL pad)*/
271			copyinstr(	args.fspec,
272					mp->mnt_stat.f_mntfromname,
273					MNAMELEN - 1,
274					&size);
275			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
276		}
277	} else {
278		/*
279		 ********************
280		 * NEW MOUNT
281		 ********************
282		 */
283
284		/*
285		 * Since this is a new mount, we want the names for
286		 * the device and the mount point copied in.  If an
287		 * error occurs,  the mountpoint is discarded by the
288		 * upper level code.
289		 */
290		/* Save "last mounted on" info for mount point (NULL pad)*/
291		copyinstr(	path,				/* mount point*/
292				mp->mnt_stat.f_mntonname,	/* save area*/
293				MNAMELEN - 1,			/* max size*/
294				&size);				/* real size*/
295		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
296
297		/* Save "mounted from" info for mount point (NULL pad)*/
298		copyinstr(	args.fspec,			/* device name*/
299				mp->mnt_stat.f_mntfromname,	/* save area*/
300				MNAMELEN - 1,			/* max size*/
301				&size);				/* real size*/
302		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
303
304		err = ffs_mountfs(devvp, mp, p);
305	}
306	if (err) {
307		goto error_2;
308	}
309
310dostatfs:
311	/*
312	 * Initialize FS stat information in mount struct; uses both
313	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
314	 *
315	 * This code is common to root and non-root mounts
316	 */
317	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
318
319	goto success;
320
321
322error_2:	/* error with devvp held*/
323
324	/* release devvp before failing*/
325	vrele(devvp);
326
327error_1:	/* no state to back out*/
328
329success:
330	return( err);
331}
332
333/*
334 * Reload all incore data for a filesystem (used after running fsck on
335 * the root filesystem and finding things to fix). The filesystem must
336 * be mounted read-only.
337 *
338 * Things to do to update the mount:
339 *	1) invalidate all cached meta-data.
340 *	2) re-read superblock from disk.
341 *	3) re-read summary information from disk.
342 *	4) invalidate all inactive vnodes.
343 *	5) invalidate all cached file data.
344 *	6) re-read inode data for all active vnodes.
345 */
346static int
347ffs_reload(mp, cred, p)
348	register struct mount *mp;
349	struct ucred *cred;
350	struct proc *p;
351{
352	register struct vnode *vp, *nvp, *devvp;
353	struct inode *ip;
354	struct csum *space;
355	struct buf *bp;
356	struct fs *fs, *newfs;
357	struct partinfo dpart;
358	int i, blks, size, error;
359	int32_t *lp;
360
361	if ((mp->mnt_flag & MNT_RDONLY) == 0)
362		return (EINVAL);
363	/*
364	 * Step 1: invalidate all cached meta-data.
365	 */
366	devvp = VFSTOUFS(mp)->um_devvp;
367	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
368		panic("ffs_reload: dirty1");
369	/*
370	 * Step 2: re-read superblock from disk.
371	 */
372	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
373		size = DEV_BSIZE;
374	else
375		size = dpart.disklab->d_secsize;
376	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp))
377		return (error);
378	newfs = (struct fs *)bp->b_data;
379	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
380		newfs->fs_bsize < sizeof(struct fs)) {
381			brelse(bp);
382			return (EIO);		/* XXX needs translation */
383	}
384	fs = VFSTOUFS(mp)->um_fs;
385	/*
386	 * Copy pointer fields back into superblock before copying in	XXX
387	 * new superblock. These should really be in the ufsmount.	XXX
388	 * Note that important parameters (eg fs_ncg) are unchanged.
389	 */
390	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
391	newfs->fs_maxcluster = fs->fs_maxcluster;
392	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
393	if (fs->fs_sbsize < SBSIZE)
394		bp->b_flags |= B_INVAL;
395	brelse(bp);
396	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
397	ffs_oldfscompat(fs);
398
399	/*
400	 * Step 3: re-read summary information from disk.
401	 */
402	blks = howmany(fs->fs_cssize, fs->fs_fsize);
403	space = fs->fs_csp[0];
404	for (i = 0; i < blks; i += fs->fs_frag) {
405		size = fs->fs_bsize;
406		if (i + fs->fs_frag > blks)
407			size = (blks - i) * fs->fs_fsize;
408		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
409		    NOCRED, &bp);
410		if (error)
411			return (error);
412		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
413		brelse(bp);
414	}
415	/*
416	 * We no longer know anything about clusters per cylinder group.
417	 */
418	if (fs->fs_contigsumsize > 0) {
419		lp = fs->fs_maxcluster;
420		for (i = 0; i < fs->fs_ncg; i++)
421			*lp++ = fs->fs_contigsumsize;
422	}
423
424loop:
425	simple_lock(&mntvnode_slock);
426	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
427		if (vp->v_mount != mp) {
428			simple_unlock(&mntvnode_slock);
429			goto loop;
430		}
431		nvp = vp->v_mntvnodes.le_next;
432		/*
433		 * Step 4: invalidate all inactive vnodes.
434		 */
435		if (vrecycle(vp, &mntvnode_slock, p))
436			goto loop;
437		/*
438		 * Step 5: invalidate all cached file data.
439		 */
440		simple_lock(&vp->v_interlock);
441		simple_unlock(&mntvnode_slock);
442		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
443			goto loop;
444		}
445		if (vinvalbuf(vp, 0, cred, p, 0, 0))
446			panic("ffs_reload: dirty2");
447		/*
448		 * Step 6: re-read inode data for all active vnodes.
449		 */
450		ip = VTOI(vp);
451		error =
452		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
453		    (int)fs->fs_bsize, NOCRED, &bp);
454		if (error) {
455			vput(vp);
456			return (error);
457		}
458		ip->i_din = *((struct dinode *)bp->b_data +
459		    ino_to_fsbo(fs, ip->i_number));
460		brelse(bp);
461		vput(vp);
462		simple_lock(&mntvnode_slock);
463	}
464	simple_unlock(&mntvnode_slock);
465	return (0);
466}
467
468/*
469 * Common code for mount and mountroot
470 */
471int
472ffs_mountfs(devvp, mp, p)
473	register struct vnode *devvp;
474	struct mount *mp;
475	struct proc *p;
476{
477	register struct ufsmount *ump;
478	struct buf *bp;
479	register struct fs *fs;
480	dev_t dev;
481	struct partinfo dpart;
482	caddr_t base, space;
483	int error, i, blks, size, ronly;
484	int32_t *lp;
485	struct ucred *cred;
486	u_int64_t maxfilesize;					/* XXX */
487	u_int strsize;
488	int ncount;
489
490	dev = devvp->v_rdev;
491	cred = p ? p->p_ucred : NOCRED;
492	/*
493	 * Disallow multiple mounts of the same device.
494	 * Disallow mounting of a device that is currently in use
495	 * (except for root, which might share swap device for miniroot).
496	 * Flush out any old buffers remaining from a previous use.
497	 */
498	error = vfs_mountedon(devvp);
499	if (error)
500		return (error);
501	ncount = vcount(devvp);
502	if (devvp->v_object)
503		ncount -= 1;
504	if (ncount > 1 && devvp != rootvp)
505		return (EBUSY);
506	if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0))
507		return (error);
508
509	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
510	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
511	if (error)
512		return (error);
513	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
514		size = DEV_BSIZE;
515	else
516		size = dpart.disklab->d_secsize;
517
518	bp = NULL;
519	ump = NULL;
520	if (error = bread(devvp, SBLOCK, SBSIZE, cred, &bp))
521		goto out;
522	fs = (struct fs *)bp->b_data;
523	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
524	    fs->fs_bsize < sizeof(struct fs)) {
525		error = EINVAL;		/* XXX needs translation */
526		goto out;
527	}
528	fs->fs_fmod = 0;
529	if (!fs->fs_clean) {
530		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
531			printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
532		} else {
533			printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt);
534			error = EPERM;
535			goto out;
536		}
537	}
538	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
539	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
540		error = EROFS;          /* needs translation */
541		goto out;
542	}
543	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
544	bzero((caddr_t)ump, sizeof *ump);
545	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
546	    M_WAITOK);
547	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
548	if (fs->fs_sbsize < SBSIZE)
549		bp->b_flags |= B_INVAL;
550	brelse(bp);
551	bp = NULL;
552	fs = ump->um_fs;
553	fs->fs_ronly = ronly;
554	if (ronly == 0) {
555		fs->fs_fmod = 1;
556		fs->fs_clean = 0;
557	}
558	size = fs->fs_cssize;
559	blks = howmany(size, fs->fs_fsize);
560	if (fs->fs_contigsumsize > 0)
561		size += fs->fs_ncg * sizeof(int32_t);
562	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
563	for (i = 0; i < blks; i += fs->fs_frag) {
564		size = fs->fs_bsize;
565		if (i + fs->fs_frag > blks)
566			size = (blks - i) * fs->fs_fsize;
567		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
568		    cred, &bp)) {
569			free(base, M_UFSMNT);
570			goto out;
571		}
572		bcopy(bp->b_data, space, (u_int)size);
573		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
574		space += size;
575		brelse(bp);
576		bp = NULL;
577	}
578	if (fs->fs_contigsumsize > 0) {
579		fs->fs_maxcluster = lp = (int32_t *)space;
580		for (i = 0; i < fs->fs_ncg; i++)
581			*lp++ = fs->fs_contigsumsize;
582	}
583	mp->mnt_data = (qaddr_t)ump;
584	mp->mnt_stat.f_fsid.val[0] = (long)dev;
585	if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0)
586		mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
587	else
588		mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
589	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
590	mp->mnt_flag |= MNT_LOCAL;
591	ump->um_mountp = mp;
592	ump->um_dev = dev;
593	ump->um_devvp = devvp;
594	ump->um_nindir = fs->fs_nindir;
595	ump->um_bptrtodb = fs->fs_fsbtodb;
596	ump->um_seqinc = fs->fs_frag;
597	for (i = 0; i < MAXQUOTAS; i++)
598		ump->um_quotas[i] = NULLVP;
599	devvp->v_specflags |= SI_MOUNTEDON;
600	ffs_oldfscompat(fs);
601
602	/*
603	 * Set FS local "last mounted on" information (NULL pad)
604	 */
605	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
606			fs->fs_fsmnt,			/* copy area*/
607			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
608			&strsize);			/* real size*/
609	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
610
611	if( mp->mnt_flag & MNT_ROOTFS) {
612		/*
613		 * Root mount; update timestamp in mount structure.
614		 * this will be used by the common root mount code
615		 * to update the system clock.
616		 */
617		mp->mnt_time = fs->fs_time;
618	}
619
620	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
621	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
622	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
623		fs->fs_maxfilesize = maxfilesize;		/* XXX */
624	if (ronly == 0) {
625		fs->fs_clean = 0;
626		(void) ffs_sbupdate(ump, MNT_WAIT);
627	}
628	/*
629	 * Only VMIO the backing device if the backing device is a real
630	 * block device.  This excludes the original MFS implementation.
631	 * Note that it is optional that the backing device be VMIOed.  This
632	 * increases the opportunity for metadata caching.
633	 */
634	if ((devvp->v_type == VBLK) && (major(devvp->v_rdev) < nblkdev)) {
635		vfs_object_create(devvp, p, p->p_ucred, 0);
636	}
637	return (0);
638out:
639	if (bp)
640		brelse(bp);
641	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
642	if (ump) {
643		free(ump->um_fs, M_UFSMNT);
644		free(ump, M_UFSMNT);
645		mp->mnt_data = (qaddr_t)0;
646	}
647	return (error);
648}
649
650/*
651 * Sanity checks for old file systems.
652 *
653 * XXX - goes away some day.
654 */
655static int
656ffs_oldfscompat(fs)
657	struct fs *fs;
658{
659
660	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
661	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
662	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
663		fs->fs_nrpos = 8;				/* XXX */
664	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
665#if 0
666		int i;						/* XXX */
667		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
668								/* XXX */
669		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
670		for (i = 0; i < NIADDR; i++) {			/* XXX */
671			sizepb *= NINDIR(fs);			/* XXX */
672			fs->fs_maxfilesize += sizepb;		/* XXX */
673		}						/* XXX */
674#endif
675		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
676		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
677		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
678	}							/* XXX */
679	return (0);
680}
681
682/*
683 * unmount system call
684 */
685int
686ffs_unmount(mp, mntflags, p)
687	struct mount *mp;
688	int mntflags;
689	struct proc *p;
690{
691	register struct ufsmount *ump;
692	register struct fs *fs;
693	int error, flags;
694
695	flags = 0;
696	if (mntflags & MNT_FORCE) {
697		flags |= FORCECLOSE;
698	}
699	error = ffs_flushfiles(mp, flags, p);
700	if (error)
701		return (error);
702	ump = VFSTOUFS(mp);
703	fs = ump->um_fs;
704	if (fs->fs_ronly == 0) {
705		fs->fs_clean = 1;
706		error = ffs_sbupdate(ump, MNT_WAIT);
707		if (error) {
708			fs->fs_clean = 0;
709			return (error);
710		}
711	}
712	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
713
714	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
715	vnode_pager_uncache(ump->um_devvp, p);
716	VOP_UNLOCK(ump->um_devvp, 0, p);
717
718	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
719		NOCRED, p);
720
721	vrele(ump->um_devvp);
722
723	free(fs->fs_csp[0], M_UFSMNT);
724	free(fs, M_UFSMNT);
725	free(ump, M_UFSMNT);
726	mp->mnt_data = (qaddr_t)0;
727	mp->mnt_flag &= ~MNT_LOCAL;
728	return (error);
729}
730
731/*
732 * Flush out all the files in a filesystem.
733 */
734int
735ffs_flushfiles(mp, flags, p)
736	register struct mount *mp;
737	int flags;
738	struct proc *p;
739{
740	register struct ufsmount *ump;
741	int error;
742
743	ump = VFSTOUFS(mp);
744#ifdef QUOTA
745	if (mp->mnt_flag & MNT_QUOTA) {
746		int i;
747		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
748		if (error)
749			return (error);
750		for (i = 0; i < MAXQUOTAS; i++) {
751			if (ump->um_quotas[i] == NULLVP)
752				continue;
753			quotaoff(p, mp, i);
754		}
755		/*
756		 * Here we fall through to vflush again to ensure
757		 * that we have gotten rid of all the system vnodes.
758		 */
759	}
760#endif
761	error = vflush(mp, NULLVP, flags);
762	return (error);
763}
764
765/*
766 * Get file system statistics.
767 */
768int
769ffs_statfs(mp, sbp, p)
770	struct mount *mp;
771	register struct statfs *sbp;
772	struct proc *p;
773{
774	register struct ufsmount *ump;
775	register struct fs *fs;
776
777	ump = VFSTOUFS(mp);
778	fs = ump->um_fs;
779	if (fs->fs_magic != FS_MAGIC)
780		panic("ffs_statfs");
781	sbp->f_bsize = fs->fs_fsize;
782	sbp->f_iosize = fs->fs_bsize;
783	sbp->f_blocks = fs->fs_dsize;
784	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
785		fs->fs_cstotal.cs_nffree;
786	sbp->f_bavail = freespace(fs, fs->fs_minfree);
787	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
788	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
789	if (sbp != &mp->mnt_stat) {
790		sbp->f_type = mp->mnt_vfc->vfc_typenum;
791		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
792			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
793		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
794			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
795	}
796	return (0);
797}
798
799/*
800 * Go through the disk queues to initiate sandbagged IO;
801 * go through the inodes to write those that have been modified;
802 * initiate the writing of the super block if it has been modified.
803 *
804 * Note: we are always called with the filesystem marked `MPBUSY'.
805 */
806int
807ffs_sync(mp, waitfor, cred, p)
808	struct mount *mp;
809	int waitfor;
810	struct ucred *cred;
811	struct proc *p;
812{
813	struct vnode *nvp, *vp;
814	struct inode *ip;
815	struct ufsmount *ump = VFSTOUFS(mp);
816	struct fs *fs;
817	struct timeval tv;
818	int error, allerror = 0;
819
820	fs = ump->um_fs;
821	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
822		printf("fs = %s\n", fs->fs_fsmnt);
823		panic("ffs_sync: rofs mod");
824	}
825	/*
826	 * Write back each (modified) inode.
827	 */
828	simple_lock(&mntvnode_slock);
829loop:
830	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
831		/*
832		 * If the vnode that we are about to sync is no longer
833		 * associated with this mount point, start over.
834		 */
835		if (vp->v_mount != mp)
836			goto loop;
837		simple_lock(&vp->v_interlock);
838		nvp = vp->v_mntvnodes.le_next;
839		ip = VTOI(vp);
840		if (((ip->i_flag &
841		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
842		    vp->v_dirtyblkhd.lh_first == NULL) {
843			simple_unlock(&vp->v_interlock);
844			continue;
845		}
846		if (vp->v_type != VCHR) {
847			simple_unlock(&mntvnode_slock);
848			error =
849			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
850			if (error) {
851				simple_lock(&mntvnode_slock);
852				if (error == ENOENT)
853					goto loop;
854				continue;
855			}
856			if (error = VOP_FSYNC(vp, cred, waitfor, p))
857				allerror = error;
858			VOP_UNLOCK(vp, 0, p);
859			vrele(vp);
860			simple_lock(&mntvnode_slock);
861		} else {
862			simple_unlock(&mntvnode_slock);
863			simple_unlock(&vp->v_interlock);
864			gettime(&tv);
865			/* VOP_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */
866			VOP_UPDATE(vp, &tv, &tv, 0);
867			simple_lock(&mntvnode_slock);
868		}
869	}
870	simple_unlock(&mntvnode_slock);
871	/*
872	 * Force stale file system control information to be flushed.
873	 */
874	error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p);
875	if (error)
876		allerror = error;
877#ifdef QUOTA
878	qsync(mp);
879#endif
880	/*
881	 * Write back modified superblock.
882	 */
883	if (fs->fs_fmod != 0) {
884		fs->fs_fmod = 0;
885		fs->fs_time = time.tv_sec;
886		if (error = ffs_sbupdate(ump, waitfor))
887			allerror = error;
888	}
889	return (allerror);
890}
891
892/*
893 * Look up a FFS dinode number to find its incore vnode, otherwise read it
894 * in from disk.  If it is in core, wait for the lock bit to clear, then
895 * return the inode locked.  Detection and handling of mount points must be
896 * done by the calling routine.
897 */
898static int ffs_inode_hash_lock;
899
900int
901ffs_vget(mp, ino, vpp)
902	struct mount *mp;
903	ino_t ino;
904	struct vnode **vpp;
905{
906	struct fs *fs;
907	struct inode *ip;
908	struct ufsmount *ump;
909	struct buf *bp;
910	struct vnode *vp;
911	dev_t dev;
912	int type, error;
913
914	ump = VFSTOUFS(mp);
915	dev = ump->um_dev;
916restart:
917	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
918		return (0);
919
920	/*
921	 * Lock out the creation of new entries in the FFS hash table in
922	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
923	 * may occur!
924	 */
925	if (ffs_inode_hash_lock) {
926		while (ffs_inode_hash_lock) {
927			ffs_inode_hash_lock = -1;
928			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
929		}
930		goto restart;
931	}
932	ffs_inode_hash_lock = 1;
933
934	/*
935	 * If this MALLOC() is performed after the getnewvnode()
936	 * it might block, leaving a vnode with a NULL v_data to be
937	 * found by ffs_sync() if a sync happens to fire right then,
938	 * which will cause a panic because ffs_sync() blindly
939	 * dereferences vp->v_data (as well it should).
940	 */
941	type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
942	MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
943
944	/* Allocate a new vnode/inode. */
945	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
946	if (error) {
947		if (ffs_inode_hash_lock < 0)
948			wakeup(&ffs_inode_hash_lock);
949		ffs_inode_hash_lock = 0;
950		*vpp = NULL;
951		FREE(ip, type);
952		return (error);
953	}
954	bzero((caddr_t)ip, sizeof(struct inode));
955	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
956	vp->v_data = ip;
957	ip->i_vnode = vp;
958	ip->i_fs = fs = ump->um_fs;
959	ip->i_dev = dev;
960	ip->i_number = ino;
961#ifdef QUOTA
962	{
963		int i;
964		for (i = 0; i < MAXQUOTAS; i++)
965			ip->i_dquot[i] = NODQUOT;
966	}
967#endif
968	/*
969	 * Put it onto its hash chain and lock it so that other requests for
970	 * this inode will block if they arrive while we are sleeping waiting
971	 * for old data structures to be purged or for the contents of the
972	 * disk portion of this inode to be read.
973	 */
974	ufs_ihashins(ip);
975
976	if (ffs_inode_hash_lock < 0)
977		wakeup(&ffs_inode_hash_lock);
978	ffs_inode_hash_lock = 0;
979
980	/* Read in the disk contents for the inode, copy into the inode. */
981	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
982	    (int)fs->fs_bsize, NOCRED, &bp);
983	if (error) {
984		/*
985		 * The inode does not contain anything useful, so it would
986		 * be misleading to leave it on its hash chain. With mode
987		 * still zero, it will be unlinked and returned to the free
988		 * list by vput().
989		 */
990		brelse(bp);
991		vput(vp);
992		*vpp = NULL;
993		return (error);
994	}
995	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
996	bqrelse(bp);
997
998	/*
999	 * Initialize the vnode from the inode, check for aliases.
1000	 * Note that the underlying vnode may have changed.
1001	 */
1002	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1003	if (error) {
1004		vput(vp);
1005		*vpp = NULL;
1006		return (error);
1007	}
1008	/*
1009	 * Finish inode initialization now that aliasing has been resolved.
1010	 */
1011	ip->i_devvp = ump->um_devvp;
1012	VREF(ip->i_devvp);
1013	/*
1014	 * Set up a generation number for this inode if it does not
1015	 * already have one. This should only happen on old filesystems.
1016	 */
1017	if (ip->i_gen == 0) {
1018		ip->i_gen = random() / 2 + 1;
1019		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1020			ip->i_flag |= IN_MODIFIED;
1021	}
1022	/*
1023	 * Ensure that uid and gid are correct. This is a temporary
1024	 * fix until fsck has been changed to do the update.
1025	 */
1026	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1027		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1028		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1029	}						/* XXX */
1030
1031	*vpp = vp;
1032	return (0);
1033}
1034
1035/*
1036 * File handle to vnode
1037 *
1038 * Have to be really careful about stale file handles:
1039 * - check that the inode number is valid
1040 * - call ffs_vget() to get the locked inode
1041 * - check for an unallocated inode (i_mode == 0)
1042 * - check that the given client host has export rights and return
1043 *   those rights via. exflagsp and credanonp
1044 */
1045int
1046ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
1047	register struct mount *mp;
1048	struct fid *fhp;
1049	struct mbuf *nam;
1050	struct vnode **vpp;
1051	int *exflagsp;
1052	struct ucred **credanonp;
1053{
1054	register struct ufid *ufhp;
1055	struct fs *fs;
1056
1057	ufhp = (struct ufid *)fhp;
1058	fs = VFSTOUFS(mp)->um_fs;
1059	if (ufhp->ufid_ino < ROOTINO ||
1060	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1061		return (ESTALE);
1062	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
1063}
1064
1065/*
1066 * Vnode pointer to File handle
1067 */
1068/* ARGSUSED */
1069int
1070ffs_vptofh(vp, fhp)
1071	struct vnode *vp;
1072	struct fid *fhp;
1073{
1074	register struct inode *ip;
1075	register struct ufid *ufhp;
1076
1077	ip = VTOI(vp);
1078	ufhp = (struct ufid *)fhp;
1079	ufhp->ufid_len = sizeof(struct ufid);
1080	ufhp->ufid_ino = ip->i_number;
1081	ufhp->ufid_gen = ip->i_gen;
1082	return (0);
1083}
1084
1085/*
1086 * Initialize the filesystem; just use ufs_init.
1087 */
1088static int
1089ffs_init(vfsp)
1090	struct vfsconf *vfsp;
1091{
1092
1093	return (ufs_init(vfsp));
1094}
1095
1096/*
1097 * Write a superblock and associated information back to disk.
1098 */
1099static int
1100ffs_sbupdate(mp, waitfor)
1101	struct ufsmount *mp;
1102	int waitfor;
1103{
1104	register struct fs *dfs, *fs = mp->um_fs;
1105	register struct buf *bp;
1106	int blks;
1107	caddr_t space;
1108	int i, size, error, allerror = 0;
1109
1110	/*
1111	 * First write back the summary information.
1112	 */
1113	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1114	space = (caddr_t)fs->fs_csp[0];
1115	for (i = 0; i < blks; i += fs->fs_frag) {
1116		size = fs->fs_bsize;
1117		if (i + fs->fs_frag > blks)
1118			size = (blks - i) * fs->fs_fsize;
1119		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1120		    size, 0, 0);
1121		bcopy(space, bp->b_data, (u_int)size);
1122		space += size;
1123		if (waitfor != MNT_WAIT)
1124			bawrite(bp);
1125		else if (error = bwrite(bp))
1126			allerror = error;
1127	}
1128	/*
1129	 * Now write back the superblock itself. If any errors occurred
1130	 * up to this point, then fail so that the superblock avoids
1131	 * being written out as clean.
1132	 */
1133	if (allerror)
1134		return (allerror);
1135	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1136	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1137	/* Restore compatibility to old file systems.		   XXX */
1138	dfs = (struct fs *)bp->b_data;				/* XXX */
1139	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1140		dfs->fs_nrpos = -1;				/* XXX */
1141	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1142		int32_t *lp, tmp;				/* XXX */
1143								/* XXX */
1144		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1145		tmp = lp[4];					/* XXX */
1146		for (i = 4; i > 0; i--)				/* XXX */
1147			lp[i] = lp[i-1];			/* XXX */
1148		lp[0] = tmp;					/* XXX */
1149	}							/* XXX */
1150	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1151	if (waitfor != MNT_WAIT)
1152		bawrite(bp);
1153	else if (error = bwrite(bp))
1154		allerror = error;
1155	return (allerror);
1156}
1157