ext2_vfsops.c revision 111856
1/*
2 *  modified for EXT2FS support in Lites 1.1
3 *
4 *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5 *  University of Utah, Department of Computer Science
6 */
7/*
8 * Copyright (c) 1989, 1991, 1993, 1994
9 *	The Regents of the University of California.  All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	@(#)ffs_vfsops.c	8.8 (Berkeley) 4/18/94
40 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 111856 2003-03-04 00:04:44Z jeff $
41 */
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/kernel.h>
48#include <sys/vnode.h>
49#include <sys/mount.h>
50#include <sys/bio.h>
51#include <sys/buf.h>
52#include <sys/conf.h>
53#include <sys/fcntl.h>
54#include <sys/malloc.h>
55#include <sys/stat.h>
56#include <sys/mutex.h>
57
58#include <gnu/ext2fs/ext2_mount.h>
59#include <gnu/ext2fs/inode.h>
60
61#include <gnu/ext2fs/fs.h>
62#include <gnu/ext2fs/ext2_extern.h>
63#include <gnu/ext2fs/ext2_fs.h>
64#include <gnu/ext2fs/ext2_fs_sb.h>
65
66static int ext2_fhtovp(struct mount *, struct fid *, struct vnode **);
67static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td);
68static int ext2_init(struct vfsconf *);
69static int ext2_mount(struct mount *, struct nameidata *, struct thread *);
70static int ext2_mountfs(struct vnode *, struct mount *, struct thread *);
71static int ext2_reload(struct mount *mountp, struct ucred *cred,
72			struct thread *td);
73static int ext2_root(struct mount *, struct vnode **vpp);
74static int ext2_sbupdate(struct ext2mount *, int);
75static int ext2_statfs(struct mount *, struct statfs *, struct thread *);
76static int ext2_sync(struct mount *, int, struct ucred *, struct thread *);
77static int ext2_uninit(struct vfsconf *);
78static int ext2_unmount(struct mount *, int, struct thread *);
79static int ext2_vget(struct mount *, ino_t, int, struct vnode **);
80static int ext2_vptofh(struct vnode *, struct fid *);
81
82MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part");
83static MALLOC_DEFINE(M_EXT2MNT, "EXT2 mount", "EXT2 mount structure");
84
85static struct vfsops ext2fs_vfsops = {
86	NULL,
87	vfs_stdstart,
88	ext2_unmount,
89	ext2_root,		/* root inode via vget */
90	vfs_stdquotactl,
91	ext2_statfs,
92	ext2_sync,
93	ext2_vget,
94	ext2_fhtovp,
95	vfs_stdcheckexp,
96	ext2_vptofh,
97	ext2_init,
98	ext2_uninit,
99	vfs_stdextattrctl,
100	ext2_mount,
101};
102
103VFS_SET(ext2fs_vfsops, ext2fs, 0);
104#define bsd_malloc malloc
105#define bsd_free free
106
107static int ext2fs_inode_hash_lock;
108
109static int	ext2_check_sb_compat(struct ext2_super_block *es, dev_t dev,
110		    int ronly);
111static int	compute_sb_data(struct vnode * devvp,
112		    struct ext2_super_block * es, struct ext2_sb_info * fs);
113
114#ifdef notyet
115static int ext2_mountroot(void);
116
117/*
118 * Called by main() when ext2fs is going to be mounted as root.
119 *
120 * Name is updated by mount(8) after booting.
121 */
122#define ROOTNAME	"root_device"
123
124static int
125ext2_mountroot()
126{
127	struct ext2_sb_info *fs;
128	struct mount *mp;
129	struct thread *td = curthread;
130	struct ext2mount *ump;
131	u_int size;
132	int error;
133
134	if ((error = bdevvp(rootdev, &rootvp))) {
135		printf("ext2_mountroot: can't find rootvp\n");
136		return (error);
137	}
138	mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
139	bzero((char *)mp, (u_long)sizeof(struct mount));
140	TAILQ_INIT(&mp->mnt_nvnodelist);
141	TAILQ_INIT(&mp->mnt_reservedvnlist);
142	mp->mnt_op = &ext2fs_vfsops;
143	mp->mnt_flag = MNT_RDONLY;
144	if (error = ext2_mountfs(rootvp, mp, td)) {
145		bsd_free(mp, M_MOUNT);
146		return (error);
147	}
148	if (error = vfs_lock(mp)) {
149		(void)ext2_unmount(mp, 0, td);
150		bsd_free(mp, M_MOUNT);
151		return (error);
152	}
153	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
154	mp->mnt_flag |= MNT_ROOTFS;
155	mp->mnt_vnodecovered = NULLVP;
156	ump = VFSTOEXT2(mp);
157	fs = ump->um_e2fs;
158	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
159	fs->fs_fsmnt[0] = '/';
160	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
161	    MNAMELEN);
162	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
163	    &size);
164	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
165	(void)ext2_statfs(mp, &mp->mnt_stat, td);
166	vfs_unlock(mp);
167	inittodr(fs->s_es->s_wtime);		/* this helps to set the time */
168	return (0);
169}
170#endif
171
172/*
173 * VFS Operations.
174 *
175 * mount system call
176 */
177static int
178ext2_mount(mp, ndp, td)
179	struct mount *mp;
180	struct nameidata *ndp;
181	struct thread *td;
182{
183	struct export_args *export;
184	struct vfsoptlist *opts;
185	struct vnode *devvp;
186	struct ext2mount *ump = 0;
187	struct ext2_sb_info *fs;
188	char *path, *fspec;
189	size_t size;
190	int error, flags, len;
191	mode_t accessmode;
192
193	opts = mp->mnt_optnew;
194
195	vfs_getopt(opts, "fspath", (void **)&path, NULL);
196	/* Double-check the length of path.. */
197	if (strlen(path) >= MAXMNTLEN - 1)
198		return (ENAMETOOLONG);
199
200	fspec = NULL;
201	error = vfs_getopt(opts, "from", (void **)&fspec, &len);
202	if (!error && fspec[len - 1] != '\0')
203		return (EINVAL);
204
205	/*
206	 * If updating, check whether changing from read-only to
207	 * read/write; if there is no device name, that's all we do.
208	 */
209	if (mp->mnt_flag & MNT_UPDATE) {
210		ump = VFSTOEXT2(mp);
211		fs = ump->um_e2fs;
212		error = 0;
213		if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) {
214			flags = WRITECLOSE;
215			if (mp->mnt_flag & MNT_FORCE)
216				flags |= FORCECLOSE;
217			if (vfs_busy(mp, LK_NOWAIT, 0, td))
218				return (EBUSY);
219			error = ext2_flushfiles(mp, flags, td);
220			vfs_unbusy(mp, td);
221			if (!error && fs->s_wasvalid) {
222				fs->s_es->s_state |= EXT2_VALID_FS;
223				ext2_sbupdate(ump, MNT_WAIT);
224			}
225			fs->s_rd_only = 1;
226		}
227		if (!error && (mp->mnt_flag & MNT_RELOAD))
228			error = ext2_reload(mp, ndp->ni_cnd.cn_cred, td);
229		if (error)
230			return (error);
231		devvp = ump->um_devvp;
232		if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev,
233		    (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0)
234			return (EPERM);
235		if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
236			/*
237			 * If upgrade to read-write by non-root, then verify
238			 * that user has necessary permissions on the device.
239			 */
240			if (suser(td)) {
241				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
242				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
243				    td->td_ucred, td)) != 0) {
244					VOP_UNLOCK(devvp, 0, td);
245					return (error);
246				}
247				VOP_UNLOCK(devvp, 0, td);
248			}
249
250			if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 ||
251			    (fs->s_es->s_state & EXT2_ERROR_FS)) {
252				if (mp->mnt_flag & MNT_FORCE) {
253					printf(
254"WARNING: %s was not properly dismounted\n",
255					    fs->fs_fsmnt);
256				} else {
257					printf(
258"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
259					    fs->fs_fsmnt);
260					return (EPERM);
261				}
262			}
263			fs->s_es->s_state &= ~EXT2_VALID_FS;
264			ext2_sbupdate(ump, MNT_WAIT);
265			fs->s_rd_only = 0;
266		}
267		if (fspec == NULL) {
268			error = vfs_getopt(opts, "export", (void **)&export,
269			    &len);
270			if (error || len != sizeof(struct export_args))
271				return (EINVAL);
272				/* Process export requests. */
273			return (vfs_export(mp, export));
274		}
275	}
276	/*
277	 * Not an update, or updating the name: look up the name
278	 * and verify that it refers to a sensible block device.
279	 */
280	if (fspec == NULL)
281		return (EINVAL);
282	NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td);
283	if ((error = namei(ndp)) != 0)
284		return (error);
285	NDFREE(ndp, NDF_ONLY_PNBUF);
286	devvp = ndp->ni_vp;
287
288	if (!vn_isdisk(devvp, &error)) {
289		vrele(devvp);
290		return (error);
291	}
292
293	/*
294	 * If mount by non-root, then verify that user has necessary
295	 * permissions on the device.
296	 */
297	if (suser(td)) {
298		accessmode = VREAD;
299		if ((mp->mnt_flag & MNT_RDONLY) == 0)
300			accessmode |= VWRITE;
301		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
302		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td)) != 0) {
303			vput(devvp);
304			return (error);
305		}
306		VOP_UNLOCK(devvp, 0, td);
307	}
308
309	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
310		error = ext2_mountfs(devvp, mp, td);
311	} else {
312		if (devvp != ump->um_devvp)
313			error = EINVAL;	/* needs translation */
314		else
315			vrele(devvp);
316	}
317	if (error) {
318		vrele(devvp);
319		return (error);
320	}
321	ump = VFSTOEXT2(mp);
322	fs = ump->um_e2fs;
323	/*
324	 * Note that this strncpy() is ok because of a check at the start
325	 * of ext2_mount().
326	 */
327	strncpy(fs->fs_fsmnt, path, MAXMNTLEN);
328	fs->fs_fsmnt[MAXMNTLEN - 1] = '\0';
329	(void)copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
330	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
331	(void)ext2_statfs(mp, &mp->mnt_stat, td);
332	return (0);
333}
334
335/*
336 * checks that the data in the descriptor blocks make sense
337 * this is taken from ext2/super.c
338 */
339static int ext2_check_descriptors (struct ext2_sb_info * sb)
340{
341        int i;
342        int desc_block = 0;
343        unsigned long block = sb->s_es->s_first_data_block;
344        struct ext2_group_desc * gdp = NULL;
345
346        /* ext2_debug ("Checking group descriptors"); */
347
348        for (i = 0; i < sb->s_groups_count; i++)
349        {
350		/* examine next descriptor block */
351                if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
352                        gdp = (struct ext2_group_desc *)
353				sb->s_group_desc[desc_block++]->b_data;
354                if (gdp->bg_block_bitmap < block ||
355                    gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
356                {
357                        printf ("ext2_check_descriptors: "
358                                    "Block bitmap for group %d"
359                                    " not in group (block %lu)!\n",
360                                    i, (unsigned long) gdp->bg_block_bitmap);
361                        return 0;
362                }
363                if (gdp->bg_inode_bitmap < block ||
364                    gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
365                {
366                        printf ("ext2_check_descriptors: "
367                                    "Inode bitmap for group %d"
368                                    " not in group (block %lu)!\n",
369                                    i, (unsigned long) gdp->bg_inode_bitmap);
370                        return 0;
371                }
372                if (gdp->bg_inode_table < block ||
373                    gdp->bg_inode_table + sb->s_itb_per_group >=
374                    block + EXT2_BLOCKS_PER_GROUP(sb))
375                {
376                        printf ("ext2_check_descriptors: "
377                                    "Inode table for group %d"
378                                    " not in group (block %lu)!\n",
379                                    i, (unsigned long) gdp->bg_inode_table);
380                        return 0;
381                }
382                block += EXT2_BLOCKS_PER_GROUP(sb);
383                gdp++;
384        }
385        return 1;
386}
387
388static int
389ext2_check_sb_compat(es, dev, ronly)
390	struct ext2_super_block *es;
391	dev_t dev;
392	int ronly;
393{
394
395	if (es->s_magic != EXT2_SUPER_MAGIC) {
396		printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n",
397		    devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC);
398		return (1);
399	}
400	if (es->s_rev_level > EXT2_GOOD_OLD_REV) {
401		if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) {
402			printf(
403"WARNING: mount of %s denied due to unsupported optional features\n",
404			    devtoname(dev));
405			return (1);
406		}
407		if (!ronly &&
408		    (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) {
409			printf(
410"WARNING: R/W mount of %s denied due to unsupported optional features\n",
411			    devtoname(dev));
412			return (1);
413		}
414	}
415	return (0);
416}
417
418/*
419 * this computes the fields of the  ext2_sb_info structure from the
420 * data in the ext2_super_block structure read in
421 */
422static int compute_sb_data(devvp, es, fs)
423	struct vnode * devvp;
424	struct ext2_super_block * es;
425	struct ext2_sb_info * fs;
426{
427    int db_count, error;
428    int i, j;
429    int logic_sb_block = 1;	/* XXX for now */
430
431#if 1
432#define V(v)
433#else
434#define V(v)  printf(#v"= %d\n", fs->v);
435#endif
436
437    fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size;
438    V(s_blocksize)
439    fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size;
440    V(s_bshift)
441    fs->s_fsbtodb = es->s_log_block_size + 1;
442    V(s_fsbtodb)
443    fs->s_qbmask = fs->s_blocksize - 1;
444    V(s_bmask)
445    fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es);
446    V(s_blocksize_bits)
447    fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size;
448    V(s_frag_size)
449    if (fs->s_frag_size)
450	fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size;
451    V(s_frags_per_block)
452    fs->s_blocks_per_group = es->s_blocks_per_group;
453    V(s_blocks_per_group)
454    fs->s_frags_per_group = es->s_frags_per_group;
455    V(s_frags_per_group)
456    fs->s_inodes_per_group = es->s_inodes_per_group;
457    V(s_inodes_per_group)
458    fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE;
459    V(s_inodes_per_block)
460    fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block;
461    V(s_itb_per_group)
462    fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc);
463    V(s_desc_per_block)
464    /* s_resuid / s_resgid ? */
465    fs->s_groups_count = (es->s_blocks_count -
466			  es->s_first_data_block +
467			  EXT2_BLOCKS_PER_GROUP(fs) - 1) /
468			 EXT2_BLOCKS_PER_GROUP(fs);
469    V(s_groups_count)
470    db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) /
471	EXT2_DESC_PER_BLOCK(fs);
472    fs->s_db_per_group = db_count;
473    V(s_db_per_group)
474
475    fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *),
476		M_EXT2MNT, M_WAITOK);
477
478    /* adjust logic_sb_block */
479    if(fs->s_blocksize > SBSIZE)
480	/* Godmar thinks: if the blocksize is greater than 1024, then
481	   the superblock is logically part of block zero.
482	 */
483        logic_sb_block = 0;
484
485    for (i = 0; i < db_count; i++) {
486	error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1),
487		fs->s_blocksize, NOCRED, &fs->s_group_desc[i]);
488	if(error) {
489	    for (j = 0; j < i; j++)
490		brelse(fs->s_group_desc[j]);
491	    bsd_free(fs->s_group_desc, M_EXT2MNT);
492	    printf("EXT2-fs: unable to read group descriptors (%d)\n", error);
493	    return EIO;
494	}
495	/* Set the B_LOCKED flag on the buffer, then brelse() it */
496	LCK_BUF(fs->s_group_desc[i])
497    }
498    if(!ext2_check_descriptors(fs)) {
499	    for (j = 0; j < db_count; j++)
500		    ULCK_BUF(fs->s_group_desc[j])
501	    bsd_free(fs->s_group_desc, M_EXT2MNT);
502	    printf("EXT2-fs: (ext2_check_descriptors failure) "
503		   "unable to read group descriptors\n");
504	    return EIO;
505    }
506
507    for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) {
508	    fs->s_inode_bitmap_number[i] = 0;
509	    fs->s_inode_bitmap[i] = NULL;
510	    fs->s_block_bitmap_number[i] = 0;
511	    fs->s_block_bitmap[i] = NULL;
512    }
513    fs->s_loaded_inode_bitmaps = 0;
514    fs->s_loaded_block_bitmaps = 0;
515    return 0;
516}
517
518/*
519 * Reload all incore data for a filesystem (used after running fsck on
520 * the root filesystem and finding things to fix). The filesystem must
521 * be mounted read-only.
522 *
523 * Things to do to update the mount:
524 *	1) invalidate all cached meta-data.
525 *	2) re-read superblock from disk.
526 *	3) re-read summary information from disk.
527 *	4) invalidate all inactive vnodes.
528 *	5) invalidate all cached file data.
529 *	6) re-read inode data for all active vnodes.
530 */
531static int
532ext2_reload(mountp, cred, td)
533	struct mount *mountp;
534	struct ucred *cred;
535	struct thread *td;
536{
537	struct vnode *vp, *nvp, *devvp;
538	struct inode *ip;
539	struct buf *bp;
540	struct ext2_super_block * es;
541	struct ext2_sb_info *fs;
542	int error;
543
544	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
545		return (EINVAL);
546	/*
547	 * Step 1: invalidate all cached meta-data.
548	 */
549	devvp = VFSTOEXT2(mountp)->um_devvp;
550	if (vinvalbuf(devvp, 0, cred, td, 0, 0))
551		panic("ext2_reload: dirty1");
552	/*
553	 * Step 2: re-read superblock from disk.
554	 * constants have been adjusted for ext2
555	 */
556	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
557		return (error);
558	es = (struct ext2_super_block *)bp->b_data;
559	if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) {
560		brelse(bp);
561		return (EIO);		/* XXX needs translation */
562	}
563	fs = VFSTOEXT2(mountp)->um_e2fs;
564	bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block));
565
566	if((error = compute_sb_data(devvp, es, fs)) != 0) {
567		brelse(bp);
568		return error;
569	}
570#ifdef UNKLAR
571	if (fs->fs_sbsize < SBSIZE)
572		bp->b_flags |= B_INVAL;
573#endif
574	brelse(bp);
575
576loop:
577	mtx_lock(&mntvnode_mtx);
578	for (vp = TAILQ_FIRST(&mountp->mnt_nvnodelist); vp != NULL; vp = nvp) {
579		if (vp->v_mount != mountp) {
580			mtx_unlock(&mntvnode_mtx);
581			goto loop;
582		}
583		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
584		mtx_unlock(&mntvnode_mtx);
585		/*
586		 * Step 4: invalidate all inactive vnodes.
587		 */
588  		if (vrecycle(vp, NULL, td))
589  			goto loop;
590		/*
591		 * Step 5: invalidate all cached file data.
592		 */
593		mtx_lock(&vp->v_interlock);
594		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
595			goto loop;
596		}
597		if (vinvalbuf(vp, 0, cred, td, 0, 0))
598			panic("ext2_reload: dirty2");
599		/*
600		 * Step 6: re-read inode data for all active vnodes.
601		 */
602		ip = VTOI(vp);
603		error =
604		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
605		    (int)fs->s_blocksize, NOCRED, &bp);
606		if (error) {
607			vput(vp);
608			return (error);
609		}
610		ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data +
611		    EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip);
612		brelse(bp);
613		vput(vp);
614		mtx_lock(&mntvnode_mtx);
615	}
616	mtx_unlock(&mntvnode_mtx);
617	return (0);
618}
619
620/*
621 * Common code for mount and mountroot
622 */
623static int
624ext2_mountfs(devvp, mp, td)
625	struct vnode *devvp;
626	struct mount *mp;
627	struct thread *td;
628{
629	struct ext2mount *ump;
630	struct buf *bp;
631	struct ext2_sb_info *fs;
632	struct ext2_super_block * es;
633	dev_t dev = devvp->v_rdev;
634	int error;
635	int ronly;
636
637	/*
638	 * Disallow multiple mounts of the same device.
639	 * Disallow mounting of a device that is currently in use
640	 * (except for root, which might share swap device for miniroot).
641	 * Flush out any old buffers remaining from a previous use.
642	 */
643	if ((error = vfs_mountedon(devvp)) != 0)
644		return (error);
645	if (vcount(devvp) > 1 && devvp != rootvp)
646		return (EBUSY);
647	if ((error = vinvalbuf(devvp, V_SAVE, td->td_ucred, td, 0, 0)) != 0)
648		return (error);
649#ifdef READONLY
650/* turn on this to force it to be read-only */
651	mp->mnt_flag |= MNT_RDONLY;
652#endif
653
654	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
655	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
656	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
657	VOP_UNLOCK(devvp, 0, td);
658	if (error)
659		return (error);
660	if (devvp->v_rdev->si_iosize_max != 0)
661		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
662	if (mp->mnt_iosize_max > MAXPHYS)
663		mp->mnt_iosize_max = MAXPHYS;
664
665	bp = NULL;
666	ump = NULL;
667	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
668		goto out;
669	es = (struct ext2_super_block *)bp->b_data;
670	if (ext2_check_sb_compat(es, dev, ronly) != 0) {
671		error = EINVAL;		/* XXX needs translation */
672		goto out;
673	}
674	if ((es->s_state & EXT2_VALID_FS) == 0 ||
675	    (es->s_state & EXT2_ERROR_FS)) {
676		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
677			printf(
678"WARNING: Filesystem was not properly dismounted\n");
679		} else {
680			printf(
681"WARNING: R/W mount denied.  Filesystem is not clean - run fsck\n");
682			error = EPERM;
683			goto out;
684		}
685	}
686	ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK);
687	bzero((caddr_t)ump, sizeof *ump);
688	/* I don't know whether this is the right strategy. Note that
689	   we dynamically allocate both an ext2_sb_info and an ext2_super_block
690	   while Linux keeps the super block in a locked buffer
691	 */
692	ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info),
693		M_EXT2MNT, M_WAITOK);
694	ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block),
695		M_EXT2MNT, M_WAITOK);
696	bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block));
697	if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs)))
698		goto out;
699	/*
700	 * We don't free the group descriptors allocated by compute_sb_data()
701	 * until ext2_unmount().  This is OK since the mount will succeed.
702	 */
703	brelse(bp);
704	bp = NULL;
705	fs = ump->um_e2fs;
706	fs->s_rd_only = ronly;	/* ronly is set according to mnt_flags */
707	/* if the fs is not mounted read-only, make sure the super block is
708	   always written back on a sync()
709	 */
710	fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0;
711	if (ronly == 0) {
712		fs->s_dirt = 1;		/* mark it modified */
713		fs->s_es->s_state &= ~EXT2_VALID_FS;	/* set fs invalid */
714	}
715	mp->mnt_data = (qaddr_t)ump;
716	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
717	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
718	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
719	mp->mnt_flag |= MNT_LOCAL;
720	ump->um_mountp = mp;
721	ump->um_dev = dev;
722	ump->um_devvp = devvp;
723	/* setting those two parameters allowed us to use
724	   ufs_bmap w/o changse !
725	*/
726	ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
727	ump->um_bptrtodb = fs->s_es->s_log_block_size + 1;
728	ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
729	devvp->v_rdev->si_mountpoint = mp;
730	if (ronly == 0)
731		ext2_sbupdate(ump, MNT_WAIT);
732	return (0);
733out:
734	if (bp)
735		brelse(bp);
736	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, td);
737	if (ump) {
738		bsd_free(ump->um_e2fs->s_es, M_EXT2MNT);
739		bsd_free(ump->um_e2fs, M_EXT2MNT);
740		bsd_free(ump, M_EXT2MNT);
741		mp->mnt_data = (qaddr_t)0;
742	}
743	return (error);
744}
745
746/*
747 * unmount system call
748 */
749static int
750ext2_unmount(mp, mntflags, td)
751	struct mount *mp;
752	int mntflags;
753	struct thread *td;
754{
755	struct ext2mount *ump;
756	struct ext2_sb_info *fs;
757	int error, flags, ronly, i;
758
759	flags = 0;
760	if (mntflags & MNT_FORCE) {
761		if (mp->mnt_flag & MNT_ROOTFS)
762			return (EINVAL);
763		flags |= FORCECLOSE;
764	}
765	if ((error = ext2_flushfiles(mp, flags, td)) != 0)
766		return (error);
767	ump = VFSTOEXT2(mp);
768	fs = ump->um_e2fs;
769	ronly = fs->s_rd_only;
770	if (ronly == 0) {
771		if (fs->s_wasvalid)
772			fs->s_es->s_state |= EXT2_VALID_FS;
773		ext2_sbupdate(ump, MNT_WAIT);
774	}
775
776	/* release buffers containing group descriptors */
777	for(i = 0; i < fs->s_db_per_group; i++)
778		ULCK_BUF(fs->s_group_desc[i])
779	bsd_free(fs->s_group_desc, M_EXT2MNT);
780
781	/* release cached inode/block bitmaps */
782        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
783                if (fs->s_inode_bitmap[i])
784			ULCK_BUF(fs->s_inode_bitmap[i])
785
786        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
787                if (fs->s_block_bitmap[i])
788			ULCK_BUF(fs->s_block_bitmap[i])
789
790	ump->um_devvp->v_rdev->si_mountpoint = NULL;
791	error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE,
792		NOCRED, td);
793	vrele(ump->um_devvp);
794	bsd_free(fs->s_es, M_EXT2MNT);
795	bsd_free(fs, M_EXT2MNT);
796	bsd_free(ump, M_EXT2MNT);
797	mp->mnt_data = (qaddr_t)0;
798	mp->mnt_flag &= ~MNT_LOCAL;
799	return (error);
800}
801
802/*
803 * Flush out all the files in a filesystem.
804 */
805static int
806ext2_flushfiles(mp, flags, td)
807	struct mount *mp;
808	int flags;
809	struct thread *td;
810{
811	int error;
812
813	error = vflush(mp, 0, flags);
814	return (error);
815}
816
817/*
818 * Get file system statistics.
819 * taken from ext2/super.c ext2_statfs
820 */
821static int
822ext2_statfs(mp, sbp, td)
823	struct mount *mp;
824	struct statfs *sbp;
825	struct thread *td;
826{
827        unsigned long overhead;
828	struct ext2mount *ump;
829	struct ext2_sb_info *fs;
830	struct ext2_super_block *es;
831	int i, nsb;
832
833	ump = VFSTOEXT2(mp);
834	fs = ump->um_e2fs;
835	es = fs->s_es;
836
837	if (es->s_magic != EXT2_SUPER_MAGIC)
838		panic("ext2_statfs - magic number spoiled");
839
840	/*
841	 * Compute the overhead (FS structures)
842	 */
843	if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) {
844		nsb = 0;
845		for (i = 0 ; i < fs->s_groups_count; i++)
846			if (ext2_group_sparse(i))
847				nsb++;
848	} else
849		nsb = fs->s_groups_count;
850	overhead = es->s_first_data_block +
851	    /* Superblocks and block group descriptors: */
852	    nsb * (1 + fs->s_db_per_group) +
853	    /* Inode bitmap, block bitmap, and inode table: */
854	    fs->s_groups_count * (1 + 1 + fs->s_itb_per_group);
855
856	sbp->f_bsize = EXT2_FRAG_SIZE(fs);
857	sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
858	sbp->f_blocks = es->s_blocks_count - overhead;
859	sbp->f_bfree = es->s_free_blocks_count;
860	sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count;
861	sbp->f_files = es->s_inodes_count;
862	sbp->f_ffree = es->s_free_inodes_count;
863	if (sbp != &mp->mnt_stat) {
864		sbp->f_type = mp->mnt_vfc->vfc_typenum;
865		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
866			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
867		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
868			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
869	}
870	return (0);
871}
872
873/*
874 * Go through the disk queues to initiate sandbagged IO;
875 * go through the inodes to write those that have been modified;
876 * initiate the writing of the super block if it has been modified.
877 *
878 * Note: we are always called with the filesystem marked `MPBUSY'.
879 */
880static int
881ext2_sync(mp, waitfor, cred, td)
882	struct mount *mp;
883	int waitfor;
884	struct ucred *cred;
885	struct thread *td;
886{
887	struct vnode *nvp, *vp;
888	struct inode *ip;
889	struct ext2mount *ump = VFSTOEXT2(mp);
890	struct ext2_sb_info *fs;
891	int error, allerror = 0;
892
893	fs = ump->um_e2fs;
894	if (fs->s_dirt != 0 && fs->s_rd_only != 0) {		/* XXX */
895		printf("fs = %s\n", fs->fs_fsmnt);
896		panic("ext2_sync: rofs mod");
897	}
898	/*
899	 * Write back each (modified) inode.
900	 */
901	mtx_lock(&mntvnode_mtx);
902loop:
903	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
904		/*
905		 * If the vnode that we are about to sync is no longer
906		 * associated with this mount point, start over.
907		 */
908		if (vp->v_mount != mp)
909			goto loop;
910		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
911		mtx_unlock(&mntvnode_mtx);
912		VI_LOCK(vp);
913		ip = VTOI(vp);
914		if (vp->v_type == VNON ||
915		    ((ip->i_flag &
916		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
917		    (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) {
918			VI_UNLOCK(vp);
919			mtx_lock(&mntvnode_mtx);
920			continue;
921		}
922		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td);
923		if (error) {
924			mtx_lock(&mntvnode_mtx);
925			if (error == ENOENT)
926				goto loop;
927			continue;
928		}
929		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
930			allerror = error;
931		VOP_UNLOCK(vp, 0, td);
932		vrele(vp);
933		mtx_lock(&mntvnode_mtx);
934	}
935	mtx_unlock(&mntvnode_mtx);
936	/*
937	 * Force stale file system control information to be flushed.
938	 */
939	if (waitfor != MNT_LAZY) {
940		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
941		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, td)) != 0)
942			allerror = error;
943		VOP_UNLOCK(ump->um_devvp, 0, td);
944	}
945	/*
946	 * Write back modified superblock.
947	 */
948	if (fs->s_dirt != 0) {
949		fs->s_dirt = 0;
950		fs->s_es->s_wtime = time_second;
951		if ((error = ext2_sbupdate(ump, waitfor)) != 0)
952			allerror = error;
953	}
954	return (allerror);
955}
956
957/*
958 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it
959 * in from disk.  If it is in core, wait for the lock bit to clear, then
960 * return the inode locked.  Detection and handling of mount points must be
961 * done by the calling routine.
962 */
963static int
964ext2_vget(mp, ino, flags, vpp)
965	struct mount *mp;
966	ino_t ino;
967	int flags;
968	struct vnode **vpp;
969{
970	struct ext2_sb_info *fs;
971	struct inode *ip;
972	struct ext2mount *ump;
973	struct buf *bp;
974	struct vnode *vp;
975	dev_t dev;
976	int i, error;
977	int used_blocks;
978
979	ump = VFSTOEXT2(mp);
980	dev = ump->um_dev;
981restart:
982	if ((error = ext2_ihashget(dev, ino, flags, vpp)) != 0)
983		return (error);
984	if (*vpp != NULL)
985		return (0);
986
987	/*
988	 * Lock out the creation of new entries in the FFS hash table in
989	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
990	 * may occur!
991	 */
992	if (ext2fs_inode_hash_lock) {
993		while (ext2fs_inode_hash_lock) {
994			ext2fs_inode_hash_lock = -1;
995			tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0);
996		}
997		goto restart;
998	}
999	ext2fs_inode_hash_lock = 1;
1000
1001	/*
1002	 * If this MALLOC() is performed after the getnewvnode()
1003	 * it might block, leaving a vnode with a NULL v_data to be
1004	 * found by ext2_sync() if a sync happens to fire right then,
1005	 * which will cause a panic because ext2_sync() blindly
1006	 * dereferences vp->v_data (as well it should).
1007	 */
1008	MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK);
1009
1010	/* Allocate a new vnode/inode. */
1011	if ((error = getnewvnode("ext2fs", mp, ext2_vnodeop_p, &vp)) != 0) {
1012		if (ext2fs_inode_hash_lock < 0)
1013			wakeup(&ext2fs_inode_hash_lock);
1014		ext2fs_inode_hash_lock = 0;
1015		*vpp = NULL;
1016		FREE(ip, M_EXT2NODE);
1017		return (error);
1018	}
1019	bzero((caddr_t)ip, sizeof(struct inode));
1020	vp->v_data = ip;
1021	ip->i_vnode = vp;
1022	ip->i_e2fs = fs = ump->um_e2fs;
1023	ip->i_dev = dev;
1024	ip->i_number = ino;
1025	/*
1026	 * Put it onto its hash chain and lock it so that other requests for
1027	 * this inode will block if they arrive while we are sleeping waiting
1028	 * for old data structures to be purged or for the contents of the
1029	 * disk portion of this inode to be read.
1030	 */
1031	ext2_ihashins(ip);
1032
1033	if (ext2fs_inode_hash_lock < 0)
1034		wakeup(&ext2fs_inode_hash_lock);
1035	ext2fs_inode_hash_lock = 0;
1036
1037	/* Read in the disk contents for the inode, copy into the inode. */
1038#if 0
1039printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino)));
1040#endif
1041	if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1042	    (int)fs->s_blocksize, NOCRED, &bp)) != 0) {
1043		/*
1044		 * The inode does not contain anything useful, so it would
1045		 * be misleading to leave it on its hash chain. With mode
1046		 * still zero, it will be unlinked and returned to the free
1047		 * list by vput().
1048		 */
1049		vput(vp);
1050		brelse(bp);
1051		*vpp = NULL;
1052		return (error);
1053	}
1054	/* convert ext2 inode to dinode */
1055	ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE *
1056			ino_to_fsbo(fs, ino)), ip);
1057	ip->i_block_group = ino_to_cg(fs, ino);
1058	ip->i_next_alloc_block = 0;
1059	ip->i_next_alloc_goal = 0;
1060	ip->i_prealloc_count = 0;
1061	ip->i_prealloc_block = 0;
1062        /* now we want to make sure that block pointers for unused
1063           blocks are zeroed out - ext2_balloc depends on this
1064	   although for regular files and directories only
1065	*/
1066	if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) {
1067		used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize;
1068		for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
1069			ip->i_db[i] = 0;
1070	}
1071/*
1072	ext2_print_inode(ip);
1073*/
1074	brelse(bp);
1075
1076	/*
1077	 * Initialize the vnode from the inode, check for aliases.
1078	 * Note that the underlying vnode may have changed.
1079	 */
1080	if ((error = ext2_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) {
1081		vput(vp);
1082		*vpp = NULL;
1083		return (error);
1084	}
1085	/*
1086	 * Finish inode initialization now that aliasing has been resolved.
1087	 */
1088	ip->i_devvp = ump->um_devvp;
1089	VREF(ip->i_devvp);
1090	/*
1091	 * Set up a generation number for this inode if it does not
1092	 * already have one. This should only happen on old filesystems.
1093	 */
1094	if (ip->i_gen == 0) {
1095		ip->i_gen = random() / 2 + 1;
1096		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1097			ip->i_flag |= IN_MODIFIED;
1098	}
1099	*vpp = vp;
1100	return (0);
1101}
1102
1103/*
1104 * File handle to vnode
1105 *
1106 * Have to be really careful about stale file handles:
1107 * - check that the inode number is valid
1108 * - call ext2_vget() to get the locked inode
1109 * - check for an unallocated inode (i_mode == 0)
1110 * - check that the given client host has export rights and return
1111 *   those rights via. exflagsp and credanonp
1112 */
1113static int
1114ext2_fhtovp(mp, fhp, vpp)
1115	struct mount *mp;
1116	struct fid *fhp;
1117	struct vnode **vpp;
1118{
1119	struct inode *ip;
1120	struct ufid *ufhp;
1121	struct vnode *nvp;
1122	struct ext2_sb_info *fs;
1123	int error;
1124
1125	ufhp = (struct ufid *)fhp;
1126	fs = VFSTOEXT2(mp)->um_e2fs;
1127	if (ufhp->ufid_ino < ROOTINO ||
1128	    ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group)
1129		return (ESTALE);
1130
1131	error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp);
1132	if (error) {
1133		*vpp = NULLVP;
1134		return (error);
1135	}
1136	ip = VTOI(nvp);
1137	if (ip->i_mode == 0 ||
1138	    ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) {
1139		vput(nvp);
1140		*vpp = NULLVP;
1141		return (ESTALE);
1142	}
1143	*vpp = nvp;
1144	return (0);
1145}
1146
1147/*
1148 * Vnode pointer to File handle
1149 */
1150/* ARGSUSED */
1151static int
1152ext2_vptofh(vp, fhp)
1153	struct vnode *vp;
1154	struct fid *fhp;
1155{
1156	struct inode *ip;
1157	struct ufid *ufhp;
1158
1159	ip = VTOI(vp);
1160	ufhp = (struct ufid *)fhp;
1161	ufhp->ufid_len = sizeof(struct ufid);
1162	ufhp->ufid_ino = ip->i_number;
1163	ufhp->ufid_gen = ip->i_gen;
1164	return (0);
1165}
1166
1167/*
1168 * Write a superblock and associated information back to disk.
1169 */
1170static int
1171ext2_sbupdate(mp, waitfor)
1172	struct ext2mount *mp;
1173	int waitfor;
1174{
1175	struct ext2_sb_info *fs = mp->um_e2fs;
1176	struct ext2_super_block *es = fs->s_es;
1177	struct buf *bp;
1178	int error = 0;
1179/*
1180printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no");
1181*/
1182	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0);
1183	bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block));
1184	if (waitfor == MNT_WAIT)
1185		error = bwrite(bp);
1186	else
1187		bawrite(bp);
1188
1189	/*
1190	 * The buffers for group descriptors, inode bitmaps and block bitmaps
1191	 * are not busy at this point and are (hopefully) written by the
1192	 * usual sync mechanism. No need to write them here
1193		 */
1194
1195	return (error);
1196}
1197
1198/*
1199 * Return the root of a filesystem.
1200 */
1201static int
1202ext2_root(mp, vpp)
1203	struct mount *mp;
1204	struct vnode **vpp;
1205{
1206	struct vnode *nvp;
1207	int error;
1208
1209	error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp);
1210	if (error)
1211		return (error);
1212	*vpp = nvp;
1213	return (0);
1214}
1215
1216static int
1217ext2_init(struct vfsconf *vfsp)
1218{
1219
1220	ext2_ihashinit();
1221	return (0);
1222}
1223
1224static int
1225ext2_uninit(struct vfsconf *vfsp)
1226{
1227
1228	ext2_ihashuninit();
1229	return (0);
1230}
1231