ffs_vfsops.c revision 52838
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34 * $FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 52838 1999-11-03 12:05:39Z bde $
35 */
36
37#include "opt_quota.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/namei.h>
42#include <sys/proc.h>
43#include <sys/kernel.h>
44#include <sys/vnode.h>
45#include <sys/mount.h>
46#include <sys/buf.h>
47#include <sys/conf.h>
48#include <sys/fcntl.h>
49#include <sys/disklabel.h>
50#include <sys/malloc.h>
51
52#include <ufs/ufs/quota.h>
53#include <ufs/ufs/ufsmount.h>
54#include <ufs/ufs/inode.h>
55#include <ufs/ufs/ufs_extern.h>
56
57#include <ufs/ffs/fs.h>
58#include <ufs/ffs/ffs_extern.h>
59
60#include <vm/vm.h>
61#include <vm/vm_page.h>
62
63static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
64
65static int	ffs_sbupdate __P((struct ufsmount *, int));
66static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
67static int	ffs_oldfscompat __P((struct fs *));
68static int	ffs_mount __P((struct mount *, char *, caddr_t,
69				struct nameidata *, struct proc *));
70static int	ffs_init __P((struct vfsconf *));
71
72static struct vfsops ufs_vfsops = {
73	ffs_mount,
74	ufs_start,
75	ffs_unmount,
76	ufs_root,
77	ufs_quotactl,
78	ffs_statfs,
79	ffs_sync,
80	ffs_vget,
81	ffs_fhtovp,
82	ufs_check_export,
83	ffs_vptofh,
84	ffs_init,
85};
86
87VFS_SET(ufs_vfsops, ufs, 0);
88
89/*
90 * ffs_mount
91 *
92 * Called when mounting local physical media
93 *
94 * PARAMETERS:
95 *		mountroot
96 *			mp	mount point structure
97 *			path	NULL (flag for root mount!!!)
98 *			data	<unused>
99 *			ndp	<unused>
100 *			p	process (user credentials check [statfs])
101 *
102 *		mount
103 *			mp	mount point structure
104 *			path	path to mount point
105 *			data	pointer to argument struct in user space
106 *			ndp	mount point namei() return (used for
107 *				credentials on reload), reused to look
108 *				up block device.
109 *			p	process (user credentials check)
110 *
111 * RETURNS:	0	Success
112 *		!0	error number (errno.h)
113 *
114 * LOCK STATE:
115 *
116 *		ENTRY
117 *			mount point is locked
118 *		EXIT
119 *			mount point is locked
120 *
121 * NOTES:
122 *		A NULL path can be used for a flag since the mount
123 *		system call will fail with EFAULT in copyinstr in
124 *		namei() if it is a genuine NULL from the user.
125 */
126static int
127ffs_mount( mp, path, data, ndp, p)
128        struct mount		*mp;	/* mount struct pointer*/
129        char			*path;	/* path to mount point*/
130        caddr_t			data;	/* arguments to FS specific mount*/
131        struct nameidata	*ndp;	/* mount point credentials*/
132        struct proc		*p;	/* process requesting mount*/
133{
134	size_t		size;
135	int		err = 0;
136	struct vnode	*devvp;
137
138	struct ufs_args args;
139	struct ufsmount *ump = 0;
140	register struct fs *fs;
141	int error, flags, ronly = 0;
142	mode_t accessmode;
143
144	/*
145	 * Use NULL path to flag a root mount
146	 */
147	if( path == NULL) {
148		/*
149		 ***
150		 * Mounting root file system
151		 ***
152		 */
153
154		if ((err = bdevvp(rootdev, &rootvp))) {
155			printf("ffs_mountroot: can't find rootvp\n");
156			return (err);
157		}
158
159		if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) {
160			/* fs specific cleanup (if any)*/
161			goto error_1;
162		}
163
164		goto dostatfs;		/* success*/
165
166	}
167
168	/*
169	 ***
170	 * Mounting non-root file system or updating a file system
171	 ***
172	 */
173
174	/* copy in user arguments*/
175	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
176	if (err)
177		goto error_1;		/* can't get arguments*/
178
179	/*
180	 * If updating, check whether changing from read-only to
181	 * read/write; if there is no device name, that's all we do.
182	 */
183	if (mp->mnt_flag & MNT_UPDATE) {
184		ump = VFSTOUFS(mp);
185		fs = ump->um_fs;
186		devvp = ump->um_devvp;
187		err = 0;
188		ronly = fs->fs_ronly;	/* MNT_RELOAD might change this */
189		if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
190			flags = WRITECLOSE;
191			if (mp->mnt_flag & MNT_FORCE)
192				flags |= FORCECLOSE;
193			if (mp->mnt_flag & MNT_SOFTDEP) {
194				err = softdep_flushfiles(mp, flags, p);
195			} else {
196				err = ffs_flushfiles(mp, flags, p);
197			}
198			ronly = 1;
199		}
200		if (!err && (mp->mnt_flag & MNT_RELOAD))
201			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
202		if (err) {
203			goto error_1;
204		}
205		if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
206			/*
207			 * If upgrade to read-write by non-root, then verify
208			 * that user has necessary permissions on the device.
209			 */
210			if (p->p_ucred->cr_uid != 0) {
211				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
212				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
213				    p->p_ucred, p)) != 0) {
214					VOP_UNLOCK(devvp, 0, p);
215					return (error);
216				}
217				VOP_UNLOCK(devvp, 0, p);
218			}
219
220			if (fs->fs_clean == 0) {
221				if (mp->mnt_flag & MNT_FORCE) {
222					printf(
223"WARNING: %s was not properly dismounted\n",
224					    fs->fs_fsmnt);
225				} else {
226					printf(
227"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
228					    fs->fs_fsmnt);
229					err = EPERM;
230					goto error_1;
231				}
232			}
233
234			/* check to see if we need to start softdep */
235			if (fs->fs_flags & FS_DOSOFTDEP) {
236				err = softdep_mount(devvp, mp, fs, p->p_ucred);
237				if (err)
238					goto error_1;
239			}
240
241			ronly = 0;
242		}
243		/*
244		 * Soft updates is incompatible with "async",
245		 * so if we are doing softupdates stop the user
246		 * from setting the async flag in an update.
247		 * Softdep_mount() clears it in an initial mount
248		 * or ro->rw remount.
249		 */
250		if (mp->mnt_flag & MNT_SOFTDEP) {
251			mp->mnt_flag &= ~MNT_ASYNC;
252		}
253		/* if not updating name...*/
254		if (args.fspec == 0) {
255			/*
256			 * Process export requests.  Jumping to "success"
257			 * will return the vfs_export() error code.
258			 */
259			err = vfs_export(mp, &ump->um_export, &args.export);
260			goto success;
261		}
262	}
263
264	/*
265	 * Not an update, or updating the name: look up the name
266	 * and verify that it refers to a sensible block device.
267	 */
268	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
269	err = namei(ndp);
270	if (err) {
271		/* can't get devvp!*/
272		goto error_1;
273	}
274
275	devvp = ndp->ni_vp;
276
277	if (!vn_isdisk(devvp)) {
278		err = ENOTBLK;
279		goto error_2;
280	}
281
282	/*
283	 * If mount by non-root, then verify that user has necessary
284	 * permissions on the device.
285	 */
286	if (p->p_ucred->cr_uid != 0) {
287		accessmode = VREAD;
288		if ((mp->mnt_flag & MNT_RDONLY) == 0)
289			accessmode |= VWRITE;
290		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
291		if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) {
292			vput(devvp);
293			return (error);
294		}
295		VOP_UNLOCK(devvp, 0, p);
296	}
297
298	if (mp->mnt_flag & MNT_UPDATE) {
299		/*
300		 ********************
301		 * UPDATE
302		 * If it's not the same vnode, or at least the same device
303		 * then it's not correct.
304		 ********************
305		 */
306
307		if (devvp != ump->um_devvp) {
308			if ( devvp->v_rdev == ump->um_devvp->v_rdev) {
309				vrele(devvp);
310			} else {
311				err = EINVAL;	/* needs translation */
312			}
313		} else
314			vrele(devvp);
315		/*
316		 * Update device name only on success
317		 */
318		if( !err) {
319			/* Save "mounted from" info for mount point (NULL pad)*/
320			copyinstr(	args.fspec,
321					mp->mnt_stat.f_mntfromname,
322					MNAMELEN - 1,
323					&size);
324			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
325		}
326	} else {
327		/*
328		 ********************
329		 * NEW MOUNT
330		 ********************
331		 */
332
333		/*
334		 * Since this is a new mount, we want the names for
335		 * the device and the mount point copied in.  If an
336		 * error occurs,  the mountpoint is discarded by the
337		 * upper level code.
338		 */
339		/* Save "last mounted on" info for mount point (NULL pad)*/
340		copyinstr(	path,				/* mount point*/
341				mp->mnt_stat.f_mntonname,	/* save area*/
342				MNAMELEN - 1,			/* max size*/
343				&size);				/* real size*/
344		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
345
346		/* Save "mounted from" info for mount point (NULL pad)*/
347		copyinstr(	args.fspec,			/* device name*/
348				mp->mnt_stat.f_mntfromname,	/* save area*/
349				MNAMELEN - 1,			/* max size*/
350				&size);				/* real size*/
351		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
352
353		err = ffs_mountfs(devvp, mp, p, M_FFSNODE);
354	}
355	if (err) {
356		goto error_2;
357	}
358
359dostatfs:
360	/*
361	 * Initialize FS stat information in mount struct; uses both
362	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
363	 *
364	 * This code is common to root and non-root mounts
365	 */
366	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
367
368	goto success;
369
370
371error_2:	/* error with devvp held*/
372
373	/* release devvp before failing*/
374	vrele(devvp);
375
376error_1:	/* no state to back out*/
377
378success:
379	if (!err && path && (mp->mnt_flag & MNT_UPDATE)) {
380		/* Update clean flag after changing read-onlyness. */
381		fs = ump->um_fs;
382		if (ronly != fs->fs_ronly) {
383			fs->fs_ronly = ronly;
384			fs->fs_clean = ronly &&
385			    (fs->fs_flags & FS_UNCLEAN) == 0 ? 1 : 0;
386			ffs_sbupdate(ump, MNT_WAIT);
387		}
388	}
389	return (err);
390}
391
392/*
393 * Reload all incore data for a filesystem (used after running fsck on
394 * the root filesystem and finding things to fix). The filesystem must
395 * be mounted read-only.
396 *
397 * Things to do to update the mount:
398 *	1) invalidate all cached meta-data.
399 *	2) re-read superblock from disk.
400 *	3) re-read summary information from disk.
401 *	4) invalidate all inactive vnodes.
402 *	5) invalidate all cached file data.
403 *	6) re-read inode data for all active vnodes.
404 */
405static int
406ffs_reload(mp, cred, p)
407	register struct mount *mp;
408	struct ucred *cred;
409	struct proc *p;
410{
411	register struct vnode *vp, *nvp, *devvp;
412	struct inode *ip;
413	struct csum *space;
414	struct buf *bp;
415	struct fs *fs, *newfs;
416	struct partinfo dpart;
417	dev_t dev;
418	int i, blks, size, error;
419	int32_t *lp;
420
421	if ((mp->mnt_flag & MNT_RDONLY) == 0)
422		return (EINVAL);
423	/*
424	 * Step 1: invalidate all cached meta-data.
425	 */
426	devvp = VFSTOUFS(mp)->um_devvp;
427	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
428	error = vinvalbuf(devvp, 0, cred, p, 0, 0);
429	VOP_UNLOCK(devvp, 0, p);
430	if (error)
431		panic("ffs_reload: dirty1");
432
433	dev = devvp->v_rdev;
434
435	/*
436	 * Only VMIO the backing device if the backing device is a real
437	 * block device.  See ffs_mountmfs() for more details.
438	 */
439	if (devvp->v_tag != VT_MFS && devvp->v_type == VBLK) {
440		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
441		vfs_object_create(devvp, p, p->p_ucred);
442		simple_lock(&devvp->v_interlock);
443		VOP_UNLOCK(devvp, LK_INTERLOCK, p);
444	}
445
446	/*
447	 * Step 2: re-read superblock from disk.
448	 */
449	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
450		size = DEV_BSIZE;
451	else
452		size = dpart.disklab->d_secsize;
453	if ((error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) != 0)
454		return (error);
455	newfs = (struct fs *)bp->b_data;
456	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
457		newfs->fs_bsize < sizeof(struct fs)) {
458			brelse(bp);
459			return (EIO);		/* XXX needs translation */
460	}
461	fs = VFSTOUFS(mp)->um_fs;
462	/*
463	 * Copy pointer fields back into superblock before copying in	XXX
464	 * new superblock. These should really be in the ufsmount.	XXX
465	 * Note that important parameters (eg fs_ncg) are unchanged.
466	 */
467	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
468	newfs->fs_maxcluster = fs->fs_maxcluster;
469	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
470	if (fs->fs_sbsize < SBSIZE)
471		bp->b_flags |= B_INVAL;
472	brelse(bp);
473	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
474	ffs_oldfscompat(fs);
475
476	/*
477	 * Step 3: re-read summary information from disk.
478	 */
479	blks = howmany(fs->fs_cssize, fs->fs_fsize);
480	space = fs->fs_csp[0];
481	for (i = 0; i < blks; i += fs->fs_frag) {
482		size = fs->fs_bsize;
483		if (i + fs->fs_frag > blks)
484			size = (blks - i) * fs->fs_fsize;
485		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
486		    NOCRED, &bp);
487		if (error)
488			return (error);
489		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
490		brelse(bp);
491	}
492	/*
493	 * We no longer know anything about clusters per cylinder group.
494	 */
495	if (fs->fs_contigsumsize > 0) {
496		lp = fs->fs_maxcluster;
497		for (i = 0; i < fs->fs_ncg; i++)
498			*lp++ = fs->fs_contigsumsize;
499	}
500
501loop:
502	simple_lock(&mntvnode_slock);
503	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
504		if (vp->v_mount != mp) {
505			simple_unlock(&mntvnode_slock);
506			goto loop;
507		}
508		nvp = vp->v_mntvnodes.le_next;
509		/*
510		 * Step 4: invalidate all inactive vnodes.
511		 */
512		if (vrecycle(vp, &mntvnode_slock, p))
513			goto loop;
514		/*
515		 * Step 5: invalidate all cached file data.
516		 */
517		simple_lock(&vp->v_interlock);
518		simple_unlock(&mntvnode_slock);
519		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
520			goto loop;
521		}
522		if (vinvalbuf(vp, 0, cred, p, 0, 0))
523			panic("ffs_reload: dirty2");
524		/*
525		 * Step 6: re-read inode data for all active vnodes.
526		 */
527		ip = VTOI(vp);
528		error =
529		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
530		    (int)fs->fs_bsize, NOCRED, &bp);
531		if (error) {
532			vput(vp);
533			return (error);
534		}
535		ip->i_din = *((struct dinode *)bp->b_data +
536		    ino_to_fsbo(fs, ip->i_number));
537		ip->i_effnlink = ip->i_nlink;
538		brelse(bp);
539		vput(vp);
540		simple_lock(&mntvnode_slock);
541	}
542	simple_unlock(&mntvnode_slock);
543	return (0);
544}
545
546/*
547 * Common code for mount and mountroot
548 */
549int
550ffs_mountfs(devvp, mp, p, malloctype)
551	register struct vnode *devvp;
552	struct mount *mp;
553	struct proc *p;
554	struct malloc_type *malloctype;
555{
556	register struct ufsmount *ump;
557	struct buf *bp;
558	register struct fs *fs;
559	dev_t dev;
560	struct partinfo dpart;
561	caddr_t base, space;
562	int error, i, blks, size, ronly;
563	int32_t *lp;
564	struct ucred *cred;
565	u_int64_t maxfilesize;					/* XXX */
566	size_t strsize;
567	int ncount;
568
569	dev = devvp->v_rdev;
570	cred = p ? p->p_ucred : NOCRED;
571	/*
572	 * Disallow multiple mounts of the same device.
573	 * Disallow mounting of a device that is currently in use
574	 * (except for root, which might share swap device for miniroot).
575	 * Flush out any old buffers remaining from a previous use.
576	 */
577	error = vfs_mountedon(devvp);
578	if (error)
579		return (error);
580	ncount = vcount(devvp);
581
582	if (ncount > 1 && devvp != rootvp)
583		return (EBUSY);
584	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
585	error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0);
586	VOP_UNLOCK(devvp, 0, p);
587	if (error)
588		return (error);
589
590	/*
591	 * Only VMIO the backing device if the backing device is a real
592	 * block device.  This excludes the original MFS implementation.
593	 * Note that it is optional that the backing device be VMIOed.  This
594	 * increases the opportunity for metadata caching.
595	 */
596	if (devvp->v_tag != VT_MFS && devvp->v_type == VBLK) {
597		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
598		vfs_object_create(devvp, p, p->p_ucred);
599		simple_lock(&devvp->v_interlock);
600		VOP_UNLOCK(devvp, LK_INTERLOCK, p);
601	}
602
603	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
604	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
605	if (error)
606		return (error);
607	if (devvp->v_rdev->si_iosize_max > mp->mnt_iosize_max)
608		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
609	if (mp->mnt_iosize_max > MAXPHYS)
610		mp->mnt_iosize_max = MAXPHYS;
611
612	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
613		size = DEV_BSIZE;
614	else
615		size = dpart.disklab->d_secsize;
616
617	bp = NULL;
618	ump = NULL;
619	if ((error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) != 0)
620		goto out;
621	fs = (struct fs *)bp->b_data;
622	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
623	    fs->fs_bsize < sizeof(struct fs)) {
624		error = EINVAL;		/* XXX needs translation */
625		goto out;
626	}
627	fs->fs_fmod = 0;
628	fs->fs_flags &= ~FS_UNCLEAN;
629	if (fs->fs_clean == 0) {
630		fs->fs_flags |= FS_UNCLEAN;
631		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
632			printf(
633"WARNING: %s was not properly dismounted\n",
634			    fs->fs_fsmnt);
635		} else {
636			printf(
637"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
638			    fs->fs_fsmnt);
639			error = EPERM;
640			goto out;
641		}
642	}
643	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
644	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
645		error = EROFS;          /* needs translation */
646		goto out;
647	}
648	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
649	bzero((caddr_t)ump, sizeof *ump);
650	ump->um_malloctype = malloctype;
651	ump->um_i_effnlink_valid = 1;
652	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
653	    M_WAITOK);
654	ump->um_blkatoff = ffs_blkatoff;
655	ump->um_truncate = ffs_truncate;
656	ump->um_update = ffs_update;
657	ump->um_valloc = ffs_valloc;
658	ump->um_vfree = ffs_vfree;
659	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
660	if (fs->fs_sbsize < SBSIZE)
661		bp->b_flags |= B_INVAL;
662	brelse(bp);
663	bp = NULL;
664	fs = ump->um_fs;
665	fs->fs_ronly = ronly;
666	if (ronly == 0) {
667		fs->fs_fmod = 1;
668		fs->fs_clean = 0;
669	}
670	size = fs->fs_cssize;
671	blks = howmany(size, fs->fs_fsize);
672	if (fs->fs_contigsumsize > 0)
673		size += fs->fs_ncg * sizeof(int32_t);
674	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
675	for (i = 0; i < blks; i += fs->fs_frag) {
676		size = fs->fs_bsize;
677		if (i + fs->fs_frag > blks)
678			size = (blks - i) * fs->fs_fsize;
679		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
680		    cred, &bp)) != 0) {
681			free(base, M_UFSMNT);
682			goto out;
683		}
684		bcopy(bp->b_data, space, (u_int)size);
685		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
686		space += size;
687		brelse(bp);
688		bp = NULL;
689	}
690	if (fs->fs_contigsumsize > 0) {
691		fs->fs_maxcluster = lp = (int32_t *)space;
692		for (i = 0; i < fs->fs_ncg; i++)
693			*lp++ = fs->fs_contigsumsize;
694	}
695	mp->mnt_data = (qaddr_t)ump;
696	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
697	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
698	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
699	    vfs_getvfs(&mp->mnt_stat.f_fsid))
700		vfs_getnewfsid(mp);
701	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
702	mp->mnt_flag |= MNT_LOCAL;
703	ump->um_mountp = mp;
704	ump->um_dev = dev;
705	ump->um_devvp = devvp;
706	ump->um_nindir = fs->fs_nindir;
707	ump->um_bptrtodb = fs->fs_fsbtodb;
708	ump->um_seqinc = fs->fs_frag;
709	for (i = 0; i < MAXQUOTAS; i++)
710		ump->um_quotas[i] = NULLVP;
711	devvp->v_specmountpoint = mp;
712	ffs_oldfscompat(fs);
713
714	/*
715	 * Set FS local "last mounted on" information (NULL pad)
716	 */
717	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
718			fs->fs_fsmnt,			/* copy area*/
719			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
720			&strsize);			/* real size*/
721	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
722
723	if( mp->mnt_flag & MNT_ROOTFS) {
724		/*
725		 * Root mount; update timestamp in mount structure.
726		 * this will be used by the common root mount code
727		 * to update the system clock.
728		 */
729		mp->mnt_time = fs->fs_time;
730	}
731
732	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
733	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
734	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
735		fs->fs_maxfilesize = maxfilesize;		/* XXX */
736	if (ronly == 0) {
737		if ((fs->fs_flags & FS_DOSOFTDEP) &&
738		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
739			free(base, M_UFSMNT);
740			goto out;
741		}
742		fs->fs_clean = 0;
743		(void) ffs_sbupdate(ump, MNT_WAIT);
744	}
745	return (0);
746out:
747	devvp->v_specmountpoint = NULL;
748	if (bp)
749		brelse(bp);
750	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
751	if (ump) {
752		free(ump->um_fs, M_UFSMNT);
753		free(ump, M_UFSMNT);
754		mp->mnt_data = (qaddr_t)0;
755	}
756	return (error);
757}
758
759/*
760 * Sanity checks for old file systems.
761 *
762 * XXX - goes away some day.
763 */
764static int
765ffs_oldfscompat(fs)
766	struct fs *fs;
767{
768
769	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
770	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
771	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
772		fs->fs_nrpos = 8;				/* XXX */
773	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
774#if 0
775		int i;						/* XXX */
776		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
777								/* XXX */
778		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
779		for (i = 0; i < NIADDR; i++) {			/* XXX */
780			sizepb *= NINDIR(fs);			/* XXX */
781			fs->fs_maxfilesize += sizepb;		/* XXX */
782		}						/* XXX */
783#endif
784		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
785		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
786		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
787	}							/* XXX */
788	return (0);
789}
790
791/*
792 * unmount system call
793 */
794int
795ffs_unmount(mp, mntflags, p)
796	struct mount *mp;
797	int mntflags;
798	struct proc *p;
799{
800	register struct ufsmount *ump;
801	register struct fs *fs;
802	int error, flags;
803
804	flags = 0;
805	if (mntflags & MNT_FORCE) {
806		flags |= FORCECLOSE;
807	}
808	if (mp->mnt_flag & MNT_SOFTDEP) {
809		if ((error = softdep_flushfiles(mp, flags, p)) != 0)
810			return (error);
811	} else {
812		if ((error = ffs_flushfiles(mp, flags, p)) != 0)
813			return (error);
814	}
815	ump = VFSTOUFS(mp);
816	fs = ump->um_fs;
817	if (fs->fs_ronly == 0) {
818		fs->fs_clean = fs->fs_flags & FS_UNCLEAN ? 0 : 1;
819		error = ffs_sbupdate(ump, MNT_WAIT);
820		if (error) {
821			fs->fs_clean = 0;
822			return (error);
823		}
824	}
825	ump->um_devvp->v_specmountpoint = NULL;
826
827	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0);
828	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
829		NOCRED, p);
830
831	vrele(ump->um_devvp);
832
833	free(fs->fs_csp[0], M_UFSMNT);
834	free(fs, M_UFSMNT);
835	free(ump, M_UFSMNT);
836	mp->mnt_data = (qaddr_t)0;
837	mp->mnt_flag &= ~MNT_LOCAL;
838	return (error);
839}
840
841/*
842 * Flush out all the files in a filesystem.
843 */
844int
845ffs_flushfiles(mp, flags, p)
846	register struct mount *mp;
847	int flags;
848	struct proc *p;
849{
850	register struct ufsmount *ump;
851	int error;
852
853	ump = VFSTOUFS(mp);
854#ifdef QUOTA
855	if (mp->mnt_flag & MNT_QUOTA) {
856		int i;
857		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
858		if (error)
859			return (error);
860		for (i = 0; i < MAXQUOTAS; i++) {
861			if (ump->um_quotas[i] == NULLVP)
862				continue;
863			quotaoff(p, mp, i);
864		}
865		/*
866		 * Here we fall through to vflush again to ensure
867		 * that we have gotten rid of all the system vnodes.
868		 */
869	}
870#endif
871        /*
872	 * Flush all the files.
873	 */
874	if ((error = vflush(mp, NULL, flags)) != 0)
875		return (error);
876	/*
877	 * Flush filesystem metadata.
878	 */
879	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
880	error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
881	VOP_UNLOCK(ump->um_devvp, 0, p);
882	return (error);
883}
884
885/*
886 * Get file system statistics.
887 */
888int
889ffs_statfs(mp, sbp, p)
890	struct mount *mp;
891	register struct statfs *sbp;
892	struct proc *p;
893{
894	register struct ufsmount *ump;
895	register struct fs *fs;
896
897	ump = VFSTOUFS(mp);
898	fs = ump->um_fs;
899	if (fs->fs_magic != FS_MAGIC)
900		panic("ffs_statfs");
901	sbp->f_bsize = fs->fs_fsize;
902	sbp->f_iosize = fs->fs_bsize;
903	sbp->f_blocks = fs->fs_dsize;
904	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
905		fs->fs_cstotal.cs_nffree;
906	sbp->f_bavail = freespace(fs, fs->fs_minfree);
907	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
908	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
909	if (sbp != &mp->mnt_stat) {
910		sbp->f_type = mp->mnt_vfc->vfc_typenum;
911		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
912			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
913		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
914			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
915	}
916	return (0);
917}
918
919/*
920 * Go through the disk queues to initiate sandbagged IO;
921 * go through the inodes to write those that have been modified;
922 * initiate the writing of the super block if it has been modified.
923 *
924 * Note: we are always called with the filesystem marked `MPBUSY'.
925 */
926int
927ffs_sync(mp, waitfor, cred, p)
928	struct mount *mp;
929	int waitfor;
930	struct ucred *cred;
931	struct proc *p;
932{
933	struct vnode *nvp, *vp;
934	struct inode *ip;
935	struct ufsmount *ump = VFSTOUFS(mp);
936	struct fs *fs;
937	int error, allerror = 0;
938
939	fs = ump->um_fs;
940	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
941		printf("fs = %s\n", fs->fs_fsmnt);
942		panic("ffs_sync: rofs mod");
943	}
944	/*
945	 * Write back each (modified) inode.
946	 */
947	simple_lock(&mntvnode_slock);
948loop:
949	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
950		/*
951		 * If the vnode that we are about to sync is no longer
952		 * associated with this mount point, start over.
953		 */
954		if (vp->v_mount != mp)
955			goto loop;
956		simple_lock(&vp->v_interlock);
957		nvp = vp->v_mntvnodes.le_next;
958		ip = VTOI(vp);
959		if ((vp->v_type == VNON) || (((ip->i_flag &
960		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
961		    (TAILQ_EMPTY(&vp->v_dirtyblkhd) || (waitfor == MNT_LAZY)))) {
962			simple_unlock(&vp->v_interlock);
963			continue;
964		}
965		if (vp->v_type != VCHR) {
966			simple_unlock(&mntvnode_slock);
967			error =
968			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
969			if (error) {
970				simple_lock(&mntvnode_slock);
971				if (error == ENOENT)
972					goto loop;
973				continue;
974			}
975			if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0)
976				allerror = error;
977			VOP_UNLOCK(vp, 0, p);
978			vrele(vp);
979			simple_lock(&mntvnode_slock);
980		} else {
981			simple_unlock(&mntvnode_slock);
982			simple_unlock(&vp->v_interlock);
983			/* UFS_UPDATE(vp, waitfor == MNT_WAIT); */
984			UFS_UPDATE(vp, 0);
985			simple_lock(&mntvnode_slock);
986		}
987	}
988	simple_unlock(&mntvnode_slock);
989	/*
990	 * Force stale file system control information to be flushed.
991	 */
992	if (waitfor != MNT_LAZY) {
993		if (ump->um_mountp->mnt_flag & MNT_SOFTDEP)
994			waitfor = MNT_NOWAIT;
995		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
996		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
997			allerror = error;
998		VOP_UNLOCK(ump->um_devvp, 0, p);
999	}
1000#ifdef QUOTA
1001	qsync(mp);
1002#endif
1003	/*
1004	 * Write back modified superblock.
1005	 */
1006	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1007		allerror = error;
1008	return (allerror);
1009}
1010
1011/*
1012 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1013 * in from disk.  If it is in core, wait for the lock bit to clear, then
1014 * return the inode locked.  Detection and handling of mount points must be
1015 * done by the calling routine.
1016 */
1017static int ffs_inode_hash_lock;
1018
1019int
1020ffs_vget(mp, ino, vpp)
1021	struct mount *mp;
1022	ino_t ino;
1023	struct vnode **vpp;
1024{
1025	struct fs *fs;
1026	struct inode *ip;
1027	struct ufsmount *ump;
1028	struct buf *bp;
1029	struct vnode *vp;
1030	dev_t dev;
1031	int error;
1032
1033	ump = VFSTOUFS(mp);
1034	dev = ump->um_dev;
1035restart:
1036	if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1037		return (0);
1038	}
1039
1040	/*
1041	 * Lock out the creation of new entries in the FFS hash table in
1042	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
1043	 * may occur!
1044	 */
1045	if (ffs_inode_hash_lock) {
1046		while (ffs_inode_hash_lock) {
1047			ffs_inode_hash_lock = -1;
1048			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
1049		}
1050		goto restart;
1051	}
1052	ffs_inode_hash_lock = 1;
1053
1054	/*
1055	 * If this MALLOC() is performed after the getnewvnode()
1056	 * it might block, leaving a vnode with a NULL v_data to be
1057	 * found by ffs_sync() if a sync happens to fire right then,
1058	 * which will cause a panic because ffs_sync() blindly
1059	 * dereferences vp->v_data (as well it should).
1060	 */
1061	MALLOC(ip, struct inode *, sizeof(struct inode),
1062	    ump->um_malloctype, M_WAITOK);
1063
1064	/* Allocate a new vnode/inode. */
1065	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
1066	if (error) {
1067		if (ffs_inode_hash_lock < 0)
1068			wakeup(&ffs_inode_hash_lock);
1069		ffs_inode_hash_lock = 0;
1070		*vpp = NULL;
1071		FREE(ip, ump->um_malloctype);
1072		return (error);
1073	}
1074	bzero((caddr_t)ip, sizeof(struct inode));
1075	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
1076	vp->v_data = ip;
1077	ip->i_vnode = vp;
1078	ip->i_fs = fs = ump->um_fs;
1079	ip->i_dev = dev;
1080	ip->i_number = ino;
1081#ifdef QUOTA
1082	{
1083		int i;
1084		for (i = 0; i < MAXQUOTAS; i++)
1085			ip->i_dquot[i] = NODQUOT;
1086	}
1087#endif
1088	/*
1089	 * Put it onto its hash chain and lock it so that other requests for
1090	 * this inode will block if they arrive while we are sleeping waiting
1091	 * for old data structures to be purged or for the contents of the
1092	 * disk portion of this inode to be read.
1093	 */
1094	ufs_ihashins(ip);
1095
1096	if (ffs_inode_hash_lock < 0)
1097		wakeup(&ffs_inode_hash_lock);
1098	ffs_inode_hash_lock = 0;
1099
1100	/* Read in the disk contents for the inode, copy into the inode. */
1101	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1102	    (int)fs->fs_bsize, NOCRED, &bp);
1103	if (error) {
1104		/*
1105		 * The inode does not contain anything useful, so it would
1106		 * be misleading to leave it on its hash chain. With mode
1107		 * still zero, it will be unlinked and returned to the free
1108		 * list by vput().
1109		 */
1110		brelse(bp);
1111		vput(vp);
1112		*vpp = NULL;
1113		return (error);
1114	}
1115	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
1116	if (DOINGSOFTDEP(vp))
1117		softdep_load_inodeblock(ip);
1118	else
1119		ip->i_effnlink = ip->i_nlink;
1120	bqrelse(bp);
1121
1122	/*
1123	 * Initialize the vnode from the inode, check for aliases.
1124	 * Note that the underlying vnode may have changed.
1125	 */
1126	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1127	if (error) {
1128		vput(vp);
1129		*vpp = NULL;
1130		return (error);
1131	}
1132	/*
1133	 * Finish inode initialization now that aliasing has been resolved.
1134	 */
1135	ip->i_devvp = ump->um_devvp;
1136	VREF(ip->i_devvp);
1137	/*
1138	 * Set up a generation number for this inode if it does not
1139	 * already have one. This should only happen on old filesystems.
1140	 */
1141	if (ip->i_gen == 0) {
1142		ip->i_gen = random() / 2 + 1;
1143		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1144			ip->i_flag |= IN_MODIFIED;
1145	}
1146	/*
1147	 * Ensure that uid and gid are correct. This is a temporary
1148	 * fix until fsck has been changed to do the update.
1149	 */
1150	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1151		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1152		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1153	}						/* XXX */
1154
1155	*vpp = vp;
1156	return (0);
1157}
1158
1159/*
1160 * File handle to vnode
1161 *
1162 * Have to be really careful about stale file handles:
1163 * - check that the inode number is valid
1164 * - call ffs_vget() to get the locked inode
1165 * - check for an unallocated inode (i_mode == 0)
1166 * - check that the given client host has export rights and return
1167 *   those rights via. exflagsp and credanonp
1168 */
1169int
1170ffs_fhtovp(mp, fhp, vpp)
1171	register struct mount *mp;
1172	struct fid *fhp;
1173	struct vnode **vpp;
1174{
1175	register struct ufid *ufhp;
1176	struct fs *fs;
1177
1178	ufhp = (struct ufid *)fhp;
1179	fs = VFSTOUFS(mp)->um_fs;
1180	if (ufhp->ufid_ino < ROOTINO ||
1181	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1182		return (ESTALE);
1183	return (ufs_fhtovp(mp, ufhp, vpp));
1184}
1185
1186/*
1187 * Vnode pointer to File handle
1188 */
1189/* ARGSUSED */
1190int
1191ffs_vptofh(vp, fhp)
1192	struct vnode *vp;
1193	struct fid *fhp;
1194{
1195	register struct inode *ip;
1196	register struct ufid *ufhp;
1197
1198	ip = VTOI(vp);
1199	ufhp = (struct ufid *)fhp;
1200	ufhp->ufid_len = sizeof(struct ufid);
1201	ufhp->ufid_ino = ip->i_number;
1202	ufhp->ufid_gen = ip->i_gen;
1203	return (0);
1204}
1205
1206/*
1207 * Initialize the filesystem; just use ufs_init.
1208 */
1209static int
1210ffs_init(vfsp)
1211	struct vfsconf *vfsp;
1212{
1213
1214	softdep_initialize();
1215	return (ufs_init(vfsp));
1216}
1217
1218/*
1219 * Write a superblock and associated information back to disk.
1220 */
1221static int
1222ffs_sbupdate(mp, waitfor)
1223	struct ufsmount *mp;
1224	int waitfor;
1225{
1226	register struct fs *dfs, *fs = mp->um_fs;
1227	register struct buf *bp;
1228	int blks;
1229	caddr_t space;
1230	int i, size, error, allerror = 0;
1231
1232	/*
1233	 * First write back the summary information.
1234	 */
1235	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1236	space = (caddr_t)fs->fs_csp[0];
1237	for (i = 0; i < blks; i += fs->fs_frag) {
1238		size = fs->fs_bsize;
1239		if (i + fs->fs_frag > blks)
1240			size = (blks - i) * fs->fs_fsize;
1241		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1242		    size, 0, 0);
1243		bcopy(space, bp->b_data, (u_int)size);
1244		space += size;
1245		if (waitfor != MNT_WAIT)
1246			bawrite(bp);
1247		else if ((error = bwrite(bp)) != 0)
1248			allerror = error;
1249	}
1250	/*
1251	 * Now write back the superblock itself. If any errors occurred
1252	 * up to this point, then fail so that the superblock avoids
1253	 * being written out as clean.
1254	 */
1255	if (allerror)
1256		return (allerror);
1257	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1258	fs->fs_fmod = 0;
1259	fs->fs_time = time_second;
1260	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1261	/* Restore compatibility to old file systems.		   XXX */
1262	dfs = (struct fs *)bp->b_data;				/* XXX */
1263	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1264		dfs->fs_nrpos = -1;				/* XXX */
1265	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1266		int32_t *lp, tmp;				/* XXX */
1267								/* XXX */
1268		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1269		tmp = lp[4];					/* XXX */
1270		for (i = 4; i > 0; i--)				/* XXX */
1271			lp[i] = lp[i-1];			/* XXX */
1272		lp[0] = tmp;					/* XXX */
1273	}							/* XXX */
1274	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1275	if (waitfor != MNT_WAIT)
1276		bawrite(bp);
1277	else if ((error = bwrite(bp)) != 0)
1278		allerror = error;
1279	return (allerror);
1280}
1281