ffs_vfsops.c revision 34961
1/*
2 * Copyright (c) 1989, 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34 * $Id: ffs_vfsops.c,v 1.77 1998/03/27 14:20:57 peter Exp $
35 */
36
37#include "opt_quota.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/namei.h>
42#include <sys/proc.h>
43#include <sys/kernel.h>
44#include <sys/vnode.h>
45#include <sys/mount.h>
46#include <sys/buf.h>
47#include <sys/conf.h>
48#include <sys/fcntl.h>
49#include <sys/disklabel.h>
50#include <sys/malloc.h>
51
52#include <miscfs/specfs/specdev.h>
53
54#include <ufs/ufs/quota.h>
55#include <ufs/ufs/ufsmount.h>
56#include <ufs/ufs/inode.h>
57#include <ufs/ufs/ufs_extern.h>
58
59#include <ufs/ffs/fs.h>
60#include <ufs/ffs/ffs_extern.h>
61
62#include <vm/vm.h>
63#include <vm/vm_prot.h>
64#include <vm/vm_page.h>
65#include <vm/vm_extern.h>
66#include <vm/vm_object.h>
67
68static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
69
70static int	ffs_sbupdate __P((struct ufsmount *, int));
71static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
72static int	ffs_oldfscompat __P((struct fs *));
73static int	ffs_mount __P((struct mount *, char *, caddr_t,
74				struct nameidata *, struct proc *));
75static int	ffs_init __P((struct vfsconf *));
76
77static struct vfsops ufs_vfsops = {
78	ffs_mount,
79	ufs_start,
80	ffs_unmount,
81	ufs_root,
82	ufs_quotactl,
83	ffs_statfs,
84	ffs_sync,
85	ffs_vget,
86	vfs_vrele,
87	ffs_fhtovp,
88	ffs_vptofh,
89	ffs_init,
90};
91
92VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0);
93
94/*
95 * ffs_mount
96 *
97 * Called when mounting local physical media
98 *
99 * PARAMETERS:
100 *		mountroot
101 *			mp	mount point structure
102 *			path	NULL (flag for root mount!!!)
103 *			data	<unused>
104 *			ndp	<unused>
105 *			p	process (user credentials check [statfs])
106 *
107 *		mount
108 *			mp	mount point structure
109 *			path	path to mount point
110 *			data	pointer to argument struct in user space
111 *			ndp	mount point namei() return (used for
112 *				credentials on reload), reused to look
113 *				up block device.
114 *			p	process (user credentials check)
115 *
116 * RETURNS:	0	Success
117 *		!0	error number (errno.h)
118 *
119 * LOCK STATE:
120 *
121 *		ENTRY
122 *			mount point is locked
123 *		EXIT
124 *			mount point is locked
125 *
126 * NOTES:
127 *		A NULL path can be used for a flag since the mount
128 *		system call will fail with EFAULT in copyinstr in
129 *		namei() if it is a genuine NULL from the user.
130 */
131static int
132ffs_mount( mp, path, data, ndp, p)
133        struct mount		*mp;	/* mount struct pointer*/
134        char			*path;	/* path to mount point*/
135        caddr_t			data;	/* arguments to FS specific mount*/
136        struct nameidata	*ndp;	/* mount point credentials*/
137        struct proc		*p;	/* process requesting mount*/
138{
139	u_int		size;
140	int		err = 0;
141	struct vnode	*devvp;
142
143	struct ufs_args args;
144	struct ufsmount *ump = 0;
145	register struct fs *fs;
146	int error, flags;
147	mode_t accessmode;
148
149	/*
150	 * Use NULL path to flag a root mount
151	 */
152	if( path == NULL) {
153		/*
154		 ***
155		 * Mounting root file system
156		 ***
157		 */
158
159		if ((err = bdevvp(rootdev, &rootvp))) {
160			printf("ffs_mountroot: can't find rootvp");
161			return (err);
162		}
163
164		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR)
165			mp->mnt_flag |= MNT_NOCLUSTERR;
166		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERW)
167			mp->mnt_flag |= MNT_NOCLUSTERW;
168		if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) {
169			/* fs specific cleanup (if any)*/
170			goto error_1;
171		}
172
173		goto dostatfs;		/* success*/
174
175	}
176
177	/*
178	 ***
179	 * Mounting non-root file system or updating a file system
180	 ***
181	 */
182
183	/* copy in user arguments*/
184	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
185	if (err)
186		goto error_1;		/* can't get arguments*/
187
188	/*
189	 * If updating, check whether changing from read-only to
190	 * read/write; if there is no device name, that's all we do.
191	 * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags,
192	 * if block device requests.
193	 */
194	if (mp->mnt_flag & MNT_UPDATE) {
195		ump = VFSTOUFS(mp);
196		fs = ump->um_fs;
197		devvp = ump->um_devvp;
198		err = 0;
199		if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERR)
200			mp->mnt_flag |= MNT_NOCLUSTERR;
201		if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERW)
202			mp->mnt_flag |= MNT_NOCLUSTERW;
203		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
204			flags = WRITECLOSE;
205			if (mp->mnt_flag & MNT_FORCE)
206				flags |= FORCECLOSE;
207			if (mp->mnt_flag & MNT_SOFTDEP) {
208				err = softdep_flushfiles(mp, flags, p);
209			} else {
210				err = ffs_flushfiles(mp, flags, p);
211			}
212		}
213		if (!err && (mp->mnt_flag & MNT_RELOAD))
214			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
215		if (err) {
216			goto error_1;
217		}
218		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
219			if (!fs->fs_clean) {
220				if (mp->mnt_flag & MNT_FORCE) {
221					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
222				} else {
223					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
224					    fs->fs_fsmnt);
225					err = EPERM;
226					goto error_1;
227				}
228			}
229
230			/*
231			 * If upgrade to read-write by non-root, then verify
232			 * that user has necessary permissions on the device.
233			 */
234			if (p->p_ucred->cr_uid != 0) {
235				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
236				if (error = VOP_ACCESS(devvp, VREAD | VWRITE,
237				    p->p_ucred, p)) {
238					VOP_UNLOCK(devvp, 0, p);
239					return (error);
240				}
241				VOP_UNLOCK(devvp, 0, p);
242			}
243
244			/* check to see if we need to start softdep */
245			if (fs->fs_flags & FS_DOSOFTDEP) {
246				err = softdep_mount(devvp, mp, fs, p->p_ucred);
247				if (err)
248					goto error_1;
249			}
250
251			fs->fs_ronly = 0;
252		}
253		if (fs->fs_ronly == 0) {
254			fs->fs_clean = 0;
255			ffs_sbupdate(ump, MNT_WAIT);
256		}
257		/* if not updating name...*/
258		if (args.fspec == 0) {
259			/*
260			 * Process export requests.  Jumping to "success"
261			 * will return the vfs_export() error code.
262			 */
263			err = vfs_export(mp, &ump->um_export, &args.export);
264			goto success;
265		}
266	}
267
268	/*
269	 * Not an update, or updating the name: look up the name
270	 * and verify that it refers to a sensible block device.
271	 */
272	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
273	err = namei(ndp);
274	if (err) {
275		/* can't get devvp!*/
276		goto error_1;
277	}
278
279	devvp = ndp->ni_vp;
280
281	if (devvp->v_type != VBLK) {
282		err = ENOTBLK;
283		goto error_2;
284	}
285	if (major(devvp->v_rdev) >= nblkdev) {
286		err = ENXIO;
287		goto error_2;
288	}
289
290	/*
291	 * If mount by non-root, then verify that user has necessary
292	 * permissions on the device.
293	 */
294	if (p->p_ucred->cr_uid != 0) {
295		accessmode = VREAD;
296		if ((mp->mnt_flag & MNT_RDONLY) == 0)
297			accessmode |= VWRITE;
298		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
299		if (error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) {
300			vput(devvp);
301			return (error);
302		}
303		VOP_UNLOCK(devvp, 0, p);
304	}
305
306	if (mp->mnt_flag & MNT_UPDATE) {
307		/*
308		 ********************
309		 * UPDATE
310		 ********************
311		 */
312
313		if (devvp != ump->um_devvp)
314			err = EINVAL;	/* needs translation */
315		else
316			vrele(devvp);
317		/*
318		 * Update device name only on success
319		 */
320		if( !err) {
321			/* Save "mounted from" info for mount point (NULL pad)*/
322			copyinstr(	args.fspec,
323					mp->mnt_stat.f_mntfromname,
324					MNAMELEN - 1,
325					&size);
326			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
327		}
328	} else {
329		/*
330		 ********************
331		 * NEW MOUNT
332		 ********************
333		 */
334
335		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR)
336			mp->mnt_flag |= MNT_NOCLUSTERR;
337		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERW)
338			mp->mnt_flag |= MNT_NOCLUSTERW;
339
340		/*
341		 * Since this is a new mount, we want the names for
342		 * the device and the mount point copied in.  If an
343		 * error occurs,  the mountpoint is discarded by the
344		 * upper level code.
345		 */
346		/* Save "last mounted on" info for mount point (NULL pad)*/
347		copyinstr(	path,				/* mount point*/
348				mp->mnt_stat.f_mntonname,	/* save area*/
349				MNAMELEN - 1,			/* max size*/
350				&size);				/* real size*/
351		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
352
353		/* Save "mounted from" info for mount point (NULL pad)*/
354		copyinstr(	args.fspec,			/* device name*/
355				mp->mnt_stat.f_mntfromname,	/* save area*/
356				MNAMELEN - 1,			/* max size*/
357				&size);				/* real size*/
358		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
359
360		err = ffs_mountfs(devvp, mp, p, M_FFSNODE);
361	}
362	if (err) {
363		goto error_2;
364	}
365
366dostatfs:
367	/*
368	 * Initialize FS stat information in mount struct; uses both
369	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
370	 *
371	 * This code is common to root and non-root mounts
372	 */
373	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
374
375	goto success;
376
377
378error_2:	/* error with devvp held*/
379
380	/* release devvp before failing*/
381	vrele(devvp);
382
383error_1:	/* no state to back out*/
384
385success:
386	return( err);
387}
388
389/*
390 * Reload all incore data for a filesystem (used after running fsck on
391 * the root filesystem and finding things to fix). The filesystem must
392 * be mounted read-only.
393 *
394 * Things to do to update the mount:
395 *	1) invalidate all cached meta-data.
396 *	2) re-read superblock from disk.
397 *	3) re-read summary information from disk.
398 *	4) invalidate all inactive vnodes.
399 *	5) invalidate all cached file data.
400 *	6) re-read inode data for all active vnodes.
401 */
402static int
403ffs_reload(mp, cred, p)
404	register struct mount *mp;
405	struct ucred *cred;
406	struct proc *p;
407{
408	register struct vnode *vp, *nvp, *devvp;
409	struct inode *ip;
410	struct csum *space;
411	struct buf *bp;
412	struct fs *fs, *newfs;
413	struct partinfo dpart;
414	dev_t dev;
415	int i, blks, size, error;
416	int32_t *lp;
417
418	if ((mp->mnt_flag & MNT_RDONLY) == 0)
419		return (EINVAL);
420	/*
421	 * Step 1: invalidate all cached meta-data.
422	 */
423	devvp = VFSTOUFS(mp)->um_devvp;
424	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
425	error = vinvalbuf(devvp, 0, cred, p, 0, 0);
426	VOP_UNLOCK(devvp, 0, p);
427	if (error)
428		panic("ffs_reload: dirty1");
429
430	dev = devvp->v_rdev;
431	/*
432	 * Only VMIO the backing device if the backing device is a real
433	 * block device.  This excludes the original MFS implementation.
434	 * Note that it is optional that the backing device be VMIOed.  This
435	 * increases the opportunity for metadata caching.
436	 */
437	if ((devvp->v_type == VBLK) && (major(dev) < nblkdev)) {
438		simple_lock(&devvp->v_interlock);
439		vfs_object_create(devvp, p, p->p_ucred, 0);
440	}
441
442	/*
443	 * Step 2: re-read superblock from disk.
444	 */
445	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
446		size = DEV_BSIZE;
447	else
448		size = dpart.disklab->d_secsize;
449	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp))
450		return (error);
451	newfs = (struct fs *)bp->b_data;
452	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
453		newfs->fs_bsize < sizeof(struct fs)) {
454			brelse(bp);
455			return (EIO);		/* XXX needs translation */
456	}
457	fs = VFSTOUFS(mp)->um_fs;
458	/*
459	 * Copy pointer fields back into superblock before copying in	XXX
460	 * new superblock. These should really be in the ufsmount.	XXX
461	 * Note that important parameters (eg fs_ncg) are unchanged.
462	 */
463	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
464	newfs->fs_maxcluster = fs->fs_maxcluster;
465	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
466	if (fs->fs_sbsize < SBSIZE)
467		bp->b_flags |= B_INVAL;
468	brelse(bp);
469	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
470	ffs_oldfscompat(fs);
471
472	/*
473	 * Step 3: re-read summary information from disk.
474	 */
475	blks = howmany(fs->fs_cssize, fs->fs_fsize);
476	space = fs->fs_csp[0];
477	for (i = 0; i < blks; i += fs->fs_frag) {
478		size = fs->fs_bsize;
479		if (i + fs->fs_frag > blks)
480			size = (blks - i) * fs->fs_fsize;
481		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
482		    NOCRED, &bp);
483		if (error)
484			return (error);
485		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
486		brelse(bp);
487	}
488	/*
489	 * We no longer know anything about clusters per cylinder group.
490	 */
491	if (fs->fs_contigsumsize > 0) {
492		lp = fs->fs_maxcluster;
493		for (i = 0; i < fs->fs_ncg; i++)
494			*lp++ = fs->fs_contigsumsize;
495	}
496
497loop:
498	simple_lock(&mntvnode_slock);
499	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
500		if (vp->v_mount != mp) {
501			simple_unlock(&mntvnode_slock);
502			goto loop;
503		}
504		nvp = vp->v_mntvnodes.le_next;
505		/*
506		 * Step 4: invalidate all inactive vnodes.
507		 */
508		if (vrecycle(vp, &mntvnode_slock, p))
509			goto loop;
510		/*
511		 * Step 5: invalidate all cached file data.
512		 */
513		simple_lock(&vp->v_interlock);
514		simple_unlock(&mntvnode_slock);
515		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
516			goto loop;
517		}
518		if (vinvalbuf(vp, 0, cred, p, 0, 0))
519			panic("ffs_reload: dirty2");
520		/*
521		 * Step 6: re-read inode data for all active vnodes.
522		 */
523		ip = VTOI(vp);
524		error =
525		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
526		    (int)fs->fs_bsize, NOCRED, &bp);
527		if (error) {
528			vput(vp);
529			return (error);
530		}
531		ip->i_din = *((struct dinode *)bp->b_data +
532		    ino_to_fsbo(fs, ip->i_number));
533		ip->i_effnlink = ip->i_nlink;
534		brelse(bp);
535		vput(vp);
536		simple_lock(&mntvnode_slock);
537	}
538	simple_unlock(&mntvnode_slock);
539	return (0);
540}
541
542/*
543 * Common code for mount and mountroot
544 */
545int
546ffs_mountfs(devvp, mp, p, malloctype)
547	register struct vnode *devvp;
548	struct mount *mp;
549	struct proc *p;
550	struct malloc_type *malloctype;
551{
552	register struct ufsmount *ump;
553	struct buf *bp;
554	register struct fs *fs;
555	struct cg *cgp;
556	dev_t dev;
557	struct partinfo dpart;
558	struct csum cstotal;
559	caddr_t base, space;
560	int error, i, cyl, blks, size, ronly;
561	int32_t *lp;
562	struct ucred *cred;
563	u_int64_t maxfilesize;					/* XXX */
564	u_int strsize;
565	int ncount;
566
567	dev = devvp->v_rdev;
568	cred = p ? p->p_ucred : NOCRED;
569	/*
570	 * Disallow multiple mounts of the same device.
571	 * Disallow mounting of a device that is currently in use
572	 * (except for root, which might share swap device for miniroot).
573	 * Flush out any old buffers remaining from a previous use.
574	 */
575	error = vfs_mountedon(devvp);
576	if (error)
577		return (error);
578	ncount = vcount(devvp);
579
580	if (ncount > 1 && devvp != rootvp)
581		return (EBUSY);
582	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
583	error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0);
584	VOP_UNLOCK(devvp, 0, p);
585	if (error)
586		return (error);
587
588	/*
589	 * Only VMIO the backing device if the backing device is a real
590	 * block device.  This excludes the original MFS implementation.
591	 * Note that it is optional that the backing device be VMIOed.  This
592	 * increases the opportunity for metadata caching.
593	 */
594	if ((devvp->v_type == VBLK) && (major(dev) < nblkdev)) {
595		simple_lock(&devvp->v_interlock);
596		vfs_object_create(devvp, p, p->p_ucred, 0);
597	}
598
599
600	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
601	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
602	if (error)
603		return (error);
604
605	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
606		size = DEV_BSIZE;
607	else
608		size = dpart.disklab->d_secsize;
609
610	bp = NULL;
611	ump = NULL;
612	if (error = bread(devvp, SBLOCK, SBSIZE, cred, &bp))
613		goto out;
614	fs = (struct fs *)bp->b_data;
615	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
616	    fs->fs_bsize < sizeof(struct fs)) {
617		error = EINVAL;		/* XXX needs translation */
618		goto out;
619	}
620	fs->fs_fmod = 0;
621	if (!fs->fs_clean) {
622		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
623			printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
624		} else {
625			printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt);
626			error = EPERM;
627			goto out;
628		}
629	}
630	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
631	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
632		error = EROFS;          /* needs translation */
633		goto out;
634	}
635	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
636	bzero((caddr_t)ump, sizeof *ump);
637	ump->um_malloctype = malloctype;
638	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
639	    M_WAITOK);
640	ump->um_blkatoff = ffs_blkatoff;
641	ump->um_truncate = ffs_truncate;
642	ump->um_update = ffs_update;
643	ump->um_valloc = ffs_valloc;
644	ump->um_vfree = ffs_vfree;
645	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
646	if (fs->fs_sbsize < SBSIZE)
647		bp->b_flags |= B_INVAL;
648	brelse(bp);
649	bp = NULL;
650	fs = ump->um_fs;
651	fs->fs_ronly = ronly;
652	if (ronly == 0) {
653		fs->fs_fmod = 1;
654		fs->fs_clean = 0;
655	}
656	size = fs->fs_cssize;
657	blks = howmany(size, fs->fs_fsize);
658	if (fs->fs_contigsumsize > 0)
659		size += fs->fs_ncg * sizeof(int32_t);
660	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
661	for (i = 0; i < blks; i += fs->fs_frag) {
662		size = fs->fs_bsize;
663		if (i + fs->fs_frag > blks)
664			size = (blks - i) * fs->fs_fsize;
665		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
666		    cred, &bp)) {
667			free(base, M_UFSMNT);
668			goto out;
669		}
670		bcopy(bp->b_data, space, (u_int)size);
671		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
672		space += size;
673		brelse(bp);
674		bp = NULL;
675	}
676	if (fs->fs_contigsumsize > 0) {
677		fs->fs_maxcluster = lp = (int32_t *)space;
678		for (i = 0; i < fs->fs_ncg; i++)
679			*lp++ = fs->fs_contigsumsize;
680	}
681	mp->mnt_data = (qaddr_t)ump;
682	mp->mnt_stat.f_fsid.val[0] = (long)dev;
683	if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0)
684		mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
685	else
686		mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
687	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
688	mp->mnt_flag |= MNT_LOCAL;
689	ump->um_mountp = mp;
690	ump->um_dev = dev;
691	ump->um_devvp = devvp;
692	ump->um_nindir = fs->fs_nindir;
693	ump->um_bptrtodb = fs->fs_fsbtodb;
694	ump->um_seqinc = fs->fs_frag;
695	for (i = 0; i < MAXQUOTAS; i++)
696		ump->um_quotas[i] = NULLVP;
697	devvp->v_specmountpoint = mp;
698	ffs_oldfscompat(fs);
699
700	/*
701	 * Set FS local "last mounted on" information (NULL pad)
702	 */
703	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
704			fs->fs_fsmnt,			/* copy area*/
705			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
706			&strsize);			/* real size*/
707	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
708
709	if( mp->mnt_flag & MNT_ROOTFS) {
710		/*
711		 * Root mount; update timestamp in mount structure.
712		 * this will be used by the common root mount code
713		 * to update the system clock.
714		 */
715		mp->mnt_time = fs->fs_time;
716	}
717
718	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
719	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
720	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
721		fs->fs_maxfilesize = maxfilesize;		/* XXX */
722	if (ronly == 0) {
723		if ((fs->fs_flags & FS_DOSOFTDEP) &&
724		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
725			free(base, M_UFSMNT);
726			goto out;
727		}
728		fs->fs_clean = 0;
729		(void) ffs_sbupdate(ump, MNT_WAIT);
730	}
731	return (0);
732out:
733	devvp->v_specmountpoint = NULL;
734	if (bp)
735		brelse(bp);
736	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
737	if (ump) {
738		free(ump->um_fs, M_UFSMNT);
739		free(ump, M_UFSMNT);
740		mp->mnt_data = (qaddr_t)0;
741	}
742	return (error);
743}
744
745/*
746 * Sanity checks for old file systems.
747 *
748 * XXX - goes away some day.
749 */
750static int
751ffs_oldfscompat(fs)
752	struct fs *fs;
753{
754
755	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
756	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
757	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
758		fs->fs_nrpos = 8;				/* XXX */
759	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
760#if 0
761		int i;						/* XXX */
762		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
763								/* XXX */
764		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
765		for (i = 0; i < NIADDR; i++) {			/* XXX */
766			sizepb *= NINDIR(fs);			/* XXX */
767			fs->fs_maxfilesize += sizepb;		/* XXX */
768		}						/* XXX */
769#endif
770		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
771		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
772		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
773	}							/* XXX */
774	return (0);
775}
776
777/*
778 * unmount system call
779 */
780int
781ffs_unmount(mp, mntflags, p)
782	struct mount *mp;
783	int mntflags;
784	struct proc *p;
785{
786	register struct ufsmount *ump;
787	register struct fs *fs;
788	int error, flags;
789
790	flags = 0;
791	if (mntflags & MNT_FORCE) {
792		flags |= FORCECLOSE;
793	}
794	if (mp->mnt_flag & MNT_SOFTDEP) {
795		if ((error = softdep_flushfiles(mp, flags, p)) != 0)
796			return (error);
797	} else {
798		if ((error = ffs_flushfiles(mp, flags, p)) != 0)
799			return (error);
800	}
801	ump = VFSTOUFS(mp);
802	fs = ump->um_fs;
803	if (fs->fs_ronly == 0) {
804		fs->fs_clean = 1;
805		error = ffs_sbupdate(ump, MNT_WAIT);
806		if (error) {
807			fs->fs_clean = 0;
808			return (error);
809		}
810	}
811	ump->um_devvp->v_specmountpoint = NULL;
812
813	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0);
814	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
815		NOCRED, p);
816
817	vrele(ump->um_devvp);
818
819	free(fs->fs_csp[0], M_UFSMNT);
820	free(fs, M_UFSMNT);
821	free(ump, M_UFSMNT);
822	mp->mnt_data = (qaddr_t)0;
823	mp->mnt_flag &= ~MNT_LOCAL;
824	return (error);
825}
826
827/*
828 * Flush out all the files in a filesystem.
829 */
830int
831ffs_flushfiles(mp, flags, p)
832	register struct mount *mp;
833	int flags;
834	struct proc *p;
835{
836	register struct ufsmount *ump;
837	int error;
838
839	ump = VFSTOUFS(mp);
840#ifdef QUOTA
841	if (mp->mnt_flag & MNT_QUOTA) {
842		int i;
843		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
844		if (error)
845			return (error);
846		for (i = 0; i < MAXQUOTAS; i++) {
847			if (ump->um_quotas[i] == NULLVP)
848				continue;
849			quotaoff(p, mp, i);
850		}
851		/*
852		 * Here we fall through to vflush again to ensure
853		 * that we have gotten rid of all the system vnodes.
854		 */
855	}
856#endif
857        /*
858	 * Flush all the files.
859	 */
860	if ((error = vflush(mp, NULL, flags)) != 0)
861		return (error);
862	/*
863	 * Flush filesystem metadata.
864	 */
865	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
866	error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
867	VOP_UNLOCK(ump->um_devvp, 0, p);
868	return (error);
869}
870
871/*
872 * Get file system statistics.
873 */
874int
875ffs_statfs(mp, sbp, p)
876	struct mount *mp;
877	register struct statfs *sbp;
878	struct proc *p;
879{
880	register struct ufsmount *ump;
881	register struct fs *fs;
882
883	ump = VFSTOUFS(mp);
884	fs = ump->um_fs;
885	if (fs->fs_magic != FS_MAGIC)
886		panic("ffs_statfs");
887	sbp->f_bsize = fs->fs_fsize;
888	sbp->f_iosize = fs->fs_bsize;
889	sbp->f_blocks = fs->fs_dsize;
890	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
891		fs->fs_cstotal.cs_nffree;
892	sbp->f_bavail = freespace(fs, fs->fs_minfree);
893	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
894	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
895	if (sbp != &mp->mnt_stat) {
896		sbp->f_type = mp->mnt_vfc->vfc_typenum;
897		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
898			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
899		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
900			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
901	}
902	return (0);
903}
904
905/*
906 * Go through the disk queues to initiate sandbagged IO;
907 * go through the inodes to write those that have been modified;
908 * initiate the writing of the super block if it has been modified.
909 *
910 * Note: we are always called with the filesystem marked `MPBUSY'.
911 */
912int
913ffs_sync(mp, waitfor, cred, p)
914	struct mount *mp;
915	int waitfor;
916	struct ucred *cred;
917	struct proc *p;
918{
919	struct vnode *nvp, *vp;
920	struct inode *ip;
921	struct ufsmount *ump = VFSTOUFS(mp);
922	struct fs *fs;
923	struct timeval tv;
924	int error, allerror = 0;
925
926	fs = ump->um_fs;
927	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
928		printf("fs = %s\n", fs->fs_fsmnt);
929		panic("ffs_sync: rofs mod");
930	}
931	/*
932	 * Write back each (modified) inode.
933	 */
934	simple_lock(&mntvnode_slock);
935loop:
936	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
937		/*
938		 * If the vnode that we are about to sync is no longer
939		 * associated with this mount point, start over.
940		 */
941		if (vp->v_mount != mp)
942			goto loop;
943		simple_lock(&vp->v_interlock);
944		nvp = vp->v_mntvnodes.le_next;
945		ip = VTOI(vp);
946		if ((vp->v_type == VNON) || ((ip->i_flag &
947		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
948		    ((vp->v_dirtyblkhd.lh_first == NULL) || (waitfor == MNT_LAZY))) {
949			simple_unlock(&vp->v_interlock);
950			continue;
951		}
952		if (vp->v_type != VCHR) {
953			simple_unlock(&mntvnode_slock);
954			error =
955			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
956			if (error) {
957				simple_lock(&mntvnode_slock);
958				if (error == ENOENT)
959					goto loop;
960				continue;
961			}
962			if (error = VOP_FSYNC(vp, cred, waitfor, p))
963				allerror = error;
964			VOP_UNLOCK(vp, 0, p);
965			vrele(vp);
966			simple_lock(&mntvnode_slock);
967		} else {
968			simple_unlock(&mntvnode_slock);
969			simple_unlock(&vp->v_interlock);
970			getmicrotime(&tv);
971			/* UFS_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */
972			UFS_UPDATE(vp, &tv, &tv, 0);
973			simple_lock(&mntvnode_slock);
974		}
975	}
976	simple_unlock(&mntvnode_slock);
977	/*
978	 * Force stale file system control information to be flushed.
979	 */
980	if (waitfor != MNT_LAZY) {
981		if (ump->um_mountp->mnt_flag & MNT_SOFTDEP)
982			waitfor = MNT_NOWAIT;
983		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
984		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
985			allerror = error;
986		VOP_UNLOCK(ump->um_devvp, 0, p);
987	}
988#ifdef QUOTA
989	qsync(mp);
990#endif
991	/*
992	 * Write back modified superblock.
993	 */
994	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
995		allerror = error;
996	return (allerror);
997}
998
999/*
1000 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1001 * in from disk.  If it is in core, wait for the lock bit to clear, then
1002 * return the inode locked.  Detection and handling of mount points must be
1003 * done by the calling routine.
1004 */
1005static int ffs_inode_hash_lock;
1006
1007int
1008ffs_vget(mp, ino, vpp)
1009	struct mount *mp;
1010	ino_t ino;
1011	struct vnode **vpp;
1012{
1013	struct fs *fs;
1014	struct inode *ip;
1015	struct ufsmount *ump;
1016	struct buf *bp;
1017	struct vnode *vp;
1018	dev_t dev;
1019	int error;
1020
1021	ump = VFSTOUFS(mp);
1022	dev = ump->um_dev;
1023restart:
1024	if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1025		return (0);
1026	}
1027
1028	/*
1029	 * Lock out the creation of new entries in the FFS hash table in
1030	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
1031	 * may occur!
1032	 */
1033	if (ffs_inode_hash_lock) {
1034		while (ffs_inode_hash_lock) {
1035			ffs_inode_hash_lock = -1;
1036			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
1037		}
1038		goto restart;
1039	}
1040	ffs_inode_hash_lock = 1;
1041
1042	/*
1043	 * If this MALLOC() is performed after the getnewvnode()
1044	 * it might block, leaving a vnode with a NULL v_data to be
1045	 * found by ffs_sync() if a sync happens to fire right then,
1046	 * which will cause a panic because ffs_sync() blindly
1047	 * dereferences vp->v_data (as well it should).
1048	 */
1049	MALLOC(ip, struct inode *, sizeof(struct inode),
1050	    ump->um_malloctype, M_WAITOK);
1051
1052	/* Allocate a new vnode/inode. */
1053	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
1054	if (error) {
1055		if (ffs_inode_hash_lock < 0)
1056			wakeup(&ffs_inode_hash_lock);
1057		ffs_inode_hash_lock = 0;
1058		*vpp = NULL;
1059		FREE(ip, ump->um_malloctype);
1060		return (error);
1061	}
1062	bzero((caddr_t)ip, sizeof(struct inode));
1063	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
1064	vp->v_data = ip;
1065	ip->i_vnode = vp;
1066	ip->i_fs = fs = ump->um_fs;
1067	ip->i_dev = dev;
1068	ip->i_number = ino;
1069#ifdef QUOTA
1070	{
1071		int i;
1072		for (i = 0; i < MAXQUOTAS; i++)
1073			ip->i_dquot[i] = NODQUOT;
1074	}
1075#endif
1076	/*
1077	 * Put it onto its hash chain and lock it so that other requests for
1078	 * this inode will block if they arrive while we are sleeping waiting
1079	 * for old data structures to be purged or for the contents of the
1080	 * disk portion of this inode to be read.
1081	 */
1082	ufs_ihashins(ip);
1083
1084	if (ffs_inode_hash_lock < 0)
1085		wakeup(&ffs_inode_hash_lock);
1086	ffs_inode_hash_lock = 0;
1087
1088	/* Read in the disk contents for the inode, copy into the inode. */
1089	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1090	    (int)fs->fs_bsize, NOCRED, &bp);
1091	if (error) {
1092		/*
1093		 * The inode does not contain anything useful, so it would
1094		 * be misleading to leave it on its hash chain. With mode
1095		 * still zero, it will be unlinked and returned to the free
1096		 * list by vput().
1097		 */
1098		brelse(bp);
1099		vput(vp);
1100		*vpp = NULL;
1101		return (error);
1102	}
1103	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
1104	if (DOINGSOFTDEP(vp))
1105		softdep_load_inodeblock(ip);
1106	else
1107		ip->i_effnlink = ip->i_nlink;
1108	bqrelse(bp);
1109
1110	/*
1111	 * Initialize the vnode from the inode, check for aliases.
1112	 * Note that the underlying vnode may have changed.
1113	 */
1114	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1115	if (error) {
1116		vput(vp);
1117		*vpp = NULL;
1118		return (error);
1119	}
1120	/*
1121	 * Finish inode initialization now that aliasing has been resolved.
1122	 */
1123	ip->i_devvp = ump->um_devvp;
1124	VREF(ip->i_devvp);
1125	/*
1126	 * Set up a generation number for this inode if it does not
1127	 * already have one. This should only happen on old filesystems.
1128	 */
1129	if (ip->i_gen == 0) {
1130		ip->i_gen = random() / 2 + 1;
1131		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1132			ip->i_flag |= IN_MODIFIED;
1133	}
1134	/*
1135	 * Ensure that uid and gid are correct. This is a temporary
1136	 * fix until fsck has been changed to do the update.
1137	 */
1138	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1139		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1140		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1141	}						/* XXX */
1142
1143	*vpp = vp;
1144	return (0);
1145}
1146
1147/*
1148 * File handle to vnode
1149 *
1150 * Have to be really careful about stale file handles:
1151 * - check that the inode number is valid
1152 * - call ffs_vget() to get the locked inode
1153 * - check for an unallocated inode (i_mode == 0)
1154 * - check that the given client host has export rights and return
1155 *   those rights via. exflagsp and credanonp
1156 */
1157int
1158ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
1159	register struct mount *mp;
1160	struct fid *fhp;
1161	struct sockaddr *nam;
1162	struct vnode **vpp;
1163	int *exflagsp;
1164	struct ucred **credanonp;
1165{
1166	register struct ufid *ufhp;
1167	struct fs *fs;
1168
1169	ufhp = (struct ufid *)fhp;
1170	fs = VFSTOUFS(mp)->um_fs;
1171	if (ufhp->ufid_ino < ROOTINO ||
1172	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1173		return (ESTALE);
1174	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
1175}
1176
1177/*
1178 * Vnode pointer to File handle
1179 */
1180/* ARGSUSED */
1181int
1182ffs_vptofh(vp, fhp)
1183	struct vnode *vp;
1184	struct fid *fhp;
1185{
1186	register struct inode *ip;
1187	register struct ufid *ufhp;
1188
1189	ip = VTOI(vp);
1190	ufhp = (struct ufid *)fhp;
1191	ufhp->ufid_len = sizeof(struct ufid);
1192	ufhp->ufid_ino = ip->i_number;
1193	ufhp->ufid_gen = ip->i_gen;
1194	return (0);
1195}
1196
1197/*
1198 * Initialize the filesystem; just use ufs_init.
1199 */
1200static int
1201ffs_init(vfsp)
1202	struct vfsconf *vfsp;
1203{
1204
1205	softdep_initialize();
1206	return (ufs_init(vfsp));
1207}
1208
1209/*
1210 * Write a superblock and associated information back to disk.
1211 */
1212static int
1213ffs_sbupdate(mp, waitfor)
1214	struct ufsmount *mp;
1215	int waitfor;
1216{
1217	register struct fs *dfs, *fs = mp->um_fs;
1218	register struct buf *bp;
1219	int blks;
1220	caddr_t space;
1221	int i, size, error, allerror = 0;
1222
1223	/*
1224	 * First write back the summary information.
1225	 */
1226	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1227	space = (caddr_t)fs->fs_csp[0];
1228	for (i = 0; i < blks; i += fs->fs_frag) {
1229		size = fs->fs_bsize;
1230		if (i + fs->fs_frag > blks)
1231			size = (blks - i) * fs->fs_fsize;
1232		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1233		    size, 0, 0);
1234		bcopy(space, bp->b_data, (u_int)size);
1235		space += size;
1236		if (waitfor != MNT_WAIT)
1237			bawrite(bp);
1238		else if (error = bwrite(bp))
1239			allerror = error;
1240	}
1241	/*
1242	 * Now write back the superblock itself. If any errors occurred
1243	 * up to this point, then fail so that the superblock avoids
1244	 * being written out as clean.
1245	 */
1246	if (allerror)
1247		return (allerror);
1248	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1249	fs->fs_fmod = 0;
1250	fs->fs_time = time_second;
1251	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1252	/* Restore compatibility to old file systems.		   XXX */
1253	dfs = (struct fs *)bp->b_data;				/* XXX */
1254	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1255		dfs->fs_nrpos = -1;				/* XXX */
1256	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1257		int32_t *lp, tmp;				/* XXX */
1258								/* XXX */
1259		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1260		tmp = lp[4];					/* XXX */
1261		for (i = 4; i > 0; i--)				/* XXX */
1262			lp[i] = lp[i-1];			/* XXX */
1263		lp[0] = tmp;					/* XXX */
1264	}							/* XXX */
1265	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1266	if (waitfor != MNT_WAIT)
1267		bawrite(bp);
1268	else if (error = bwrite(bp))
1269		allerror = error;
1270	return (allerror);
1271}
1272