ext2_vfsops.c revision 96881
1/*
2 *  modified for EXT2FS support in Lites 1.1
3 *
4 *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5 *  University of Utah, Department of Computer Science
6 */
7/*
8 * Copyright (c) 1989, 1991, 1993, 1994
9 *	The Regents of the University of California.  All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	@(#)ffs_vfsops.c	8.8 (Berkeley) 4/18/94
40 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 96881 2002-05-18 22:18:17Z iedowse $
41 */
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/kernel.h>
48#include <sys/vnode.h>
49#include <sys/mount.h>
50#include <sys/bio.h>
51#include <sys/buf.h>
52#include <sys/conf.h>
53#include <sys/fcntl.h>
54#include <sys/malloc.h>
55#include <sys/stat.h>
56#include <sys/mutex.h>
57
58#include <gnu/ext2fs/ext2_mount.h>
59#include <gnu/ext2fs/inode.h>
60
61#include <gnu/ext2fs/fs.h>
62#include <gnu/ext2fs/ext2_extern.h>
63#include <gnu/ext2fs/ext2_fs.h>
64#include <gnu/ext2fs/ext2_fs_sb.h>
65
66static int ext2_fhtovp(struct mount *, struct fid *, struct vnode **);
67static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td);
68static int ext2_init(struct vfsconf *);
69static int ext2_mount(struct mount *,
70	    char *, caddr_t, struct nameidata *, struct thread *);
71static int ext2_mountfs(struct vnode *, struct mount *, struct thread *);
72static int ext2_reload(struct mount *mountp, struct ucred *cred,
73			struct thread *td);
74static int ext2_root(struct mount *, struct vnode **vpp);
75static int ext2_sbupdate(struct ext2mount *, int);
76static int ext2_statfs(struct mount *, struct statfs *, struct thread *);
77static int ext2_sync(struct mount *, int, struct ucred *, struct thread *);
78static int ext2_uninit(struct vfsconf *);
79static int ext2_unmount(struct mount *, int, struct thread *);
80static int ext2_vget(struct mount *, ino_t, int, struct vnode **);
81static int ext2_vptofh(struct vnode *, struct fid *);
82
83MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part");
84static MALLOC_DEFINE(M_EXT2MNT, "EXT2 mount", "EXT2 mount structure");
85
86static struct vfsops ext2fs_vfsops = {
87	ext2_mount,
88	vfs_stdstart,
89	ext2_unmount,
90	ext2_root,		/* root inode via vget */
91	vfs_stdquotactl,
92	ext2_statfs,
93	ext2_sync,
94	ext2_vget,
95	ext2_fhtovp,
96	vfs_stdcheckexp,
97	ext2_vptofh,
98	ext2_init,
99	ext2_uninit,
100	vfs_stdextattrctl,
101};
102
103VFS_SET(ext2fs_vfsops, ext2fs, 0);
104#define bsd_malloc malloc
105#define bsd_free free
106
107static int ext2fs_inode_hash_lock;
108
109static int	ext2_check_sb_compat(struct ext2_super_block *es, dev_t dev,
110		    int ronly);
111static int	compute_sb_data(struct vnode * devvp,
112		    struct ext2_super_block * es, struct ext2_sb_info * fs);
113
114#ifdef notyet
115static int ext2_mountroot(void);
116
117/*
118 * Called by main() when ext2fs is going to be mounted as root.
119 *
120 * Name is updated by mount(8) after booting.
121 */
122#define ROOTNAME	"root_device"
123
124static int
125ext2_mountroot()
126{
127	struct ext2_sb_info *fs;
128	struct mount *mp;
129	struct thread *td = curthread;
130	struct ext2mount *ump;
131	u_int size;
132	int error;
133
134	if ((error = bdevvp(rootdev, &rootvp))) {
135		printf("ext2_mountroot: can't find rootvp\n");
136		return (error);
137	}
138	mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
139	bzero((char *)mp, (u_long)sizeof(struct mount));
140	TAILQ_INIT(&mp->mnt_nvnodelist);
141	TAILQ_INIT(&mp->mnt_reservedvnlist);
142	mp->mnt_op = &ext2fs_vfsops;
143	mp->mnt_flag = MNT_RDONLY;
144	if (error = ext2_mountfs(rootvp, mp, td)) {
145		bsd_free(mp, M_MOUNT);
146		return (error);
147	}
148	if (error = vfs_lock(mp)) {
149		(void)ext2_unmount(mp, 0, td);
150		bsd_free(mp, M_MOUNT);
151		return (error);
152	}
153	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
154	mp->mnt_flag |= MNT_ROOTFS;
155	mp->mnt_vnodecovered = NULLVP;
156	ump = VFSTOEXT2(mp);
157	fs = ump->um_e2fs;
158	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
159	fs->fs_fsmnt[0] = '/';
160	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
161	    MNAMELEN);
162	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
163	    &size);
164	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
165	(void)ext2_statfs(mp, &mp->mnt_stat, td);
166	vfs_unlock(mp);
167	inittodr(fs->s_es->s_wtime);		/* this helps to set the time */
168	return (0);
169}
170#endif
171
172/*
173 * VFS Operations.
174 *
175 * mount system call
176 */
177static int
178ext2_mount(mp, path, data, ndp, td)
179	struct mount *mp;
180	char *path;
181	caddr_t data;		/* this is actually a (struct ext2_args *) */
182	struct nameidata *ndp;
183	struct thread *td;
184{
185	struct vnode *devvp;
186	struct ext2_args args;
187	struct ext2mount *ump = 0;
188	struct ext2_sb_info *fs;
189	size_t size;
190	int error, flags;
191	mode_t accessmode;
192
193	/* Double-check the length of path.. */
194	if (strlen(path) >= MAXMNTLEN - 1)
195		return (ENAMETOOLONG);
196	error = copyin(data, (caddr_t)&args, sizeof (struct ext2_args));
197	if (error != 0)
198		return (error);
199	/*
200	 * If updating, check whether changing from read-only to
201	 * read/write; if there is no device name, that's all we do.
202	 */
203	if (mp->mnt_flag & MNT_UPDATE) {
204		ump = VFSTOEXT2(mp);
205		fs = ump->um_e2fs;
206		error = 0;
207		if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) {
208			flags = WRITECLOSE;
209			if (mp->mnt_flag & MNT_FORCE)
210				flags |= FORCECLOSE;
211			if (vfs_busy(mp, LK_NOWAIT, 0, td))
212				return (EBUSY);
213			error = ext2_flushfiles(mp, flags, td);
214			vfs_unbusy(mp, td);
215			if (!error && fs->s_wasvalid) {
216				fs->s_es->s_state |= EXT2_VALID_FS;
217				ext2_sbupdate(ump, MNT_WAIT);
218			}
219			fs->s_rd_only = 1;
220		}
221		if (!error && (mp->mnt_flag & MNT_RELOAD))
222			error = ext2_reload(mp, ndp->ni_cnd.cn_cred, td);
223		if (error)
224			return (error);
225		devvp = ump->um_devvp;
226		if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev,
227		    (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0)
228			return (EPERM);
229		if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
230			/*
231			 * If upgrade to read-write by non-root, then verify
232			 * that user has necessary permissions on the device.
233			 */
234			if (suser(td)) {
235				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
236				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
237				    td->td_ucred, td)) != 0) {
238					VOP_UNLOCK(devvp, 0, td);
239					return (error);
240				}
241				VOP_UNLOCK(devvp, 0, td);
242			}
243
244			if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 ||
245			    (fs->s_es->s_state & EXT2_ERROR_FS)) {
246				if (mp->mnt_flag & MNT_FORCE) {
247					printf(
248"WARNING: %s was not properly dismounted\n",
249					    fs->fs_fsmnt);
250				} else {
251					printf(
252"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
253					    fs->fs_fsmnt);
254					return (EPERM);
255				}
256			}
257			fs->s_es->s_state &= ~EXT2_VALID_FS;
258			ext2_sbupdate(ump, MNT_WAIT);
259			fs->s_rd_only = 0;
260		}
261		if (args.fspec == 0) {
262			/*
263			 * Process export requests.
264			 */
265			return (vfs_export(mp, &args.export));
266		}
267	}
268	/*
269	 * Not an update, or updating the name: look up the name
270	 * and verify that it refers to a sensible block device.
271	 */
272	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
273	if ((error = namei(ndp)) != 0)
274		return (error);
275	NDFREE(ndp, NDF_ONLY_PNBUF);
276	devvp = ndp->ni_vp;
277
278	if (!vn_isdisk(devvp, &error)) {
279		vrele(devvp);
280		return (error);
281	}
282
283	/*
284	 * If mount by non-root, then verify that user has necessary
285	 * permissions on the device.
286	 */
287	if (suser(td)) {
288		accessmode = VREAD;
289		if ((mp->mnt_flag & MNT_RDONLY) == 0)
290			accessmode |= VWRITE;
291		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
292		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td)) != 0) {
293			vput(devvp);
294			return (error);
295		}
296		VOP_UNLOCK(devvp, 0, td);
297	}
298
299	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
300		error = ext2_mountfs(devvp, mp, td);
301	} else {
302		if (devvp != ump->um_devvp)
303			error = EINVAL;	/* needs translation */
304		else
305			vrele(devvp);
306	}
307	if (error) {
308		vrele(devvp);
309		return (error);
310	}
311	ump = VFSTOEXT2(mp);
312	fs = ump->um_e2fs;
313	/*
314	 * Note that this strncpy() is ok because of a check at the start
315	 * of ext2_mount().
316	 */
317	strncpy(fs->fs_fsmnt, path, MAXMNTLEN);
318	fs->fs_fsmnt[MAXMNTLEN - 1] = '\0';
319	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
320	    &size);
321	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
322	(void)ext2_statfs(mp, &mp->mnt_stat, td);
323	return (0);
324}
325
326/*
327 * checks that the data in the descriptor blocks make sense
328 * this is taken from ext2/super.c
329 */
330static int ext2_check_descriptors (struct ext2_sb_info * sb)
331{
332        int i;
333        int desc_block = 0;
334        unsigned long block = sb->s_es->s_first_data_block;
335        struct ext2_group_desc * gdp = NULL;
336
337        /* ext2_debug ("Checking group descriptors"); */
338
339        for (i = 0; i < sb->s_groups_count; i++)
340        {
341		/* examine next descriptor block */
342                if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
343                        gdp = (struct ext2_group_desc *)
344				sb->s_group_desc[desc_block++]->b_data;
345                if (gdp->bg_block_bitmap < block ||
346                    gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
347                {
348                        printf ("ext2_check_descriptors: "
349                                    "Block bitmap for group %d"
350                                    " not in group (block %lu)!\n",
351                                    i, (unsigned long) gdp->bg_block_bitmap);
352                        return 0;
353                }
354                if (gdp->bg_inode_bitmap < block ||
355                    gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
356                {
357                        printf ("ext2_check_descriptors: "
358                                    "Inode bitmap for group %d"
359                                    " not in group (block %lu)!\n",
360                                    i, (unsigned long) gdp->bg_inode_bitmap);
361                        return 0;
362                }
363                if (gdp->bg_inode_table < block ||
364                    gdp->bg_inode_table + sb->s_itb_per_group >=
365                    block + EXT2_BLOCKS_PER_GROUP(sb))
366                {
367                        printf ("ext2_check_descriptors: "
368                                    "Inode table for group %d"
369                                    " not in group (block %lu)!\n",
370                                    i, (unsigned long) gdp->bg_inode_table);
371                        return 0;
372                }
373                block += EXT2_BLOCKS_PER_GROUP(sb);
374                gdp++;
375        }
376        return 1;
377}
378
379static int
380ext2_check_sb_compat(es, dev, ronly)
381	struct ext2_super_block *es;
382	dev_t dev;
383	int ronly;
384{
385
386	if (es->s_magic != EXT2_SUPER_MAGIC) {
387		printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n",
388		    devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC);
389		return (1);
390	}
391	if (es->s_rev_level > EXT2_GOOD_OLD_REV) {
392		if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) {
393			printf(
394"WARNING: mount of %s denied due to unsupported optional features\n",
395			    devtoname(dev));
396			return (1);
397		}
398		if (!ronly &&
399		    (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) {
400			printf(
401"WARNING: R/W mount of %s denied due to unsupported optional features\n",
402			    devtoname(dev));
403			return (1);
404		}
405	}
406	return (0);
407}
408
409/*
410 * this computes the fields of the  ext2_sb_info structure from the
411 * data in the ext2_super_block structure read in
412 */
413static int compute_sb_data(devvp, es, fs)
414	struct vnode * devvp;
415	struct ext2_super_block * es;
416	struct ext2_sb_info * fs;
417{
418    int db_count, error;
419    int i, j;
420    int logic_sb_block = 1;	/* XXX for now */
421
422#if 1
423#define V(v)
424#else
425#define V(v)  printf(#v"= %d\n", fs->v);
426#endif
427
428    fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size;
429    V(s_blocksize)
430    fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size;
431    V(s_bshift)
432    fs->s_fsbtodb = es->s_log_block_size + 1;
433    V(s_fsbtodb)
434    fs->s_qbmask = fs->s_blocksize - 1;
435    V(s_bmask)
436    fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es);
437    V(s_blocksize_bits)
438    fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size;
439    V(s_frag_size)
440    if (fs->s_frag_size)
441	fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size;
442    V(s_frags_per_block)
443    fs->s_blocks_per_group = es->s_blocks_per_group;
444    V(s_blocks_per_group)
445    fs->s_frags_per_group = es->s_frags_per_group;
446    V(s_frags_per_group)
447    fs->s_inodes_per_group = es->s_inodes_per_group;
448    V(s_inodes_per_group)
449    fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE;
450    V(s_inodes_per_block)
451    fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block;
452    V(s_itb_per_group)
453    fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc);
454    V(s_desc_per_block)
455    /* s_resuid / s_resgid ? */
456    fs->s_groups_count = (es->s_blocks_count -
457			  es->s_first_data_block +
458			  EXT2_BLOCKS_PER_GROUP(fs) - 1) /
459			 EXT2_BLOCKS_PER_GROUP(fs);
460    V(s_groups_count)
461    db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) /
462	EXT2_DESC_PER_BLOCK(fs);
463    fs->s_db_per_group = db_count;
464    V(s_db_per_group)
465
466    fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *),
467		M_EXT2MNT, M_WAITOK);
468
469    /* adjust logic_sb_block */
470    if(fs->s_blocksize > SBSIZE)
471	/* Godmar thinks: if the blocksize is greater than 1024, then
472	   the superblock is logically part of block zero.
473	 */
474        logic_sb_block = 0;
475
476    for (i = 0; i < db_count; i++) {
477	error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1),
478		fs->s_blocksize, NOCRED, &fs->s_group_desc[i]);
479	if(error) {
480	    for (j = 0; j < i; j++)
481		brelse(fs->s_group_desc[j]);
482	    bsd_free(fs->s_group_desc, M_EXT2MNT);
483	    printf("EXT2-fs: unable to read group descriptors (%d)\n", error);
484	    return EIO;
485	}
486	/* Set the B_LOCKED flag on the buffer, then brelse() it */
487	LCK_BUF(fs->s_group_desc[i])
488    }
489    if(!ext2_check_descriptors(fs)) {
490	    for (j = 0; j < db_count; j++)
491		    ULCK_BUF(fs->s_group_desc[j])
492	    bsd_free(fs->s_group_desc, M_EXT2MNT);
493	    printf("EXT2-fs: (ext2_check_descriptors failure) "
494		   "unable to read group descriptors\n");
495	    return EIO;
496    }
497
498    for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) {
499	    fs->s_inode_bitmap_number[i] = 0;
500	    fs->s_inode_bitmap[i] = NULL;
501	    fs->s_block_bitmap_number[i] = 0;
502	    fs->s_block_bitmap[i] = NULL;
503    }
504    fs->s_loaded_inode_bitmaps = 0;
505    fs->s_loaded_block_bitmaps = 0;
506    return 0;
507}
508
509/*
510 * Reload all incore data for a filesystem (used after running fsck on
511 * the root filesystem and finding things to fix). The filesystem must
512 * be mounted read-only.
513 *
514 * Things to do to update the mount:
515 *	1) invalidate all cached meta-data.
516 *	2) re-read superblock from disk.
517 *	3) re-read summary information from disk.
518 *	4) invalidate all inactive vnodes.
519 *	5) invalidate all cached file data.
520 *	6) re-read inode data for all active vnodes.
521 */
522static int
523ext2_reload(mountp, cred, td)
524	struct mount *mountp;
525	struct ucred *cred;
526	struct thread *td;
527{
528	struct vnode *vp, *nvp, *devvp;
529	struct inode *ip;
530	struct buf *bp;
531	struct ext2_super_block * es;
532	struct ext2_sb_info *fs;
533	int error;
534
535	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
536		return (EINVAL);
537	/*
538	 * Step 1: invalidate all cached meta-data.
539	 */
540	devvp = VFSTOEXT2(mountp)->um_devvp;
541	if (vinvalbuf(devvp, 0, cred, td, 0, 0))
542		panic("ext2_reload: dirty1");
543	/*
544	 * Step 2: re-read superblock from disk.
545	 * constants have been adjusted for ext2
546	 */
547	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
548		return (error);
549	es = (struct ext2_super_block *)bp->b_data;
550	if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) {
551		brelse(bp);
552		return (EIO);		/* XXX needs translation */
553	}
554	fs = VFSTOEXT2(mountp)->um_e2fs;
555	bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block));
556
557	if((error = compute_sb_data(devvp, es, fs)) != 0) {
558		brelse(bp);
559		return error;
560	}
561#ifdef UNKLAR
562	if (fs->fs_sbsize < SBSIZE)
563		bp->b_flags |= B_INVAL;
564#endif
565	brelse(bp);
566
567loop:
568	mtx_lock(&mntvnode_mtx);
569	for (vp = TAILQ_FIRST(&mountp->mnt_nvnodelist); vp != NULL; vp = nvp) {
570		if (vp->v_mount != mountp) {
571			mtx_unlock(&mntvnode_mtx);
572			goto loop;
573		}
574		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
575		mtx_unlock(&mntvnode_mtx);
576		/*
577		 * Step 4: invalidate all inactive vnodes.
578		 */
579  		if (vrecycle(vp, NULL, td))
580  			goto loop;
581		/*
582		 * Step 5: invalidate all cached file data.
583		 */
584		mtx_lock(&vp->v_interlock);
585		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
586			goto loop;
587		}
588		if (vinvalbuf(vp, 0, cred, td, 0, 0))
589			panic("ext2_reload: dirty2");
590		/*
591		 * Step 6: re-read inode data for all active vnodes.
592		 */
593		ip = VTOI(vp);
594		error =
595		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
596		    (int)fs->s_blocksize, NOCRED, &bp);
597		if (error) {
598			vput(vp);
599			return (error);
600		}
601		ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data +
602		    EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip);
603		brelse(bp);
604		vput(vp);
605		mtx_lock(&mntvnode_mtx);
606	}
607	mtx_unlock(&mntvnode_mtx);
608	return (0);
609}
610
611/*
612 * Common code for mount and mountroot
613 */
614static int
615ext2_mountfs(devvp, mp, td)
616	struct vnode *devvp;
617	struct mount *mp;
618	struct thread *td;
619{
620	struct ext2mount *ump;
621	struct buf *bp;
622	struct ext2_sb_info *fs;
623	struct ext2_super_block * es;
624	dev_t dev = devvp->v_rdev;
625	int error;
626	int ronly;
627
628	/*
629	 * Disallow multiple mounts of the same device.
630	 * Disallow mounting of a device that is currently in use
631	 * (except for root, which might share swap device for miniroot).
632	 * Flush out any old buffers remaining from a previous use.
633	 */
634	if ((error = vfs_mountedon(devvp)) != 0)
635		return (error);
636	if (vcount(devvp) > 1 && devvp != rootvp)
637		return (EBUSY);
638	if ((error = vinvalbuf(devvp, V_SAVE, td->td_ucred, td, 0, 0)) != 0)
639		return (error);
640#ifdef READONLY
641/* turn on this to force it to be read-only */
642	mp->mnt_flag |= MNT_RDONLY;
643#endif
644
645	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
646	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
647	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
648	VOP_UNLOCK(devvp, 0, td);
649	if (error)
650		return (error);
651	if (devvp->v_rdev->si_iosize_max != 0)
652		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
653	if (mp->mnt_iosize_max > MAXPHYS)
654		mp->mnt_iosize_max = MAXPHYS;
655
656	bp = NULL;
657	ump = NULL;
658	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
659		goto out;
660	es = (struct ext2_super_block *)bp->b_data;
661	if (ext2_check_sb_compat(es, dev, ronly) != 0) {
662		error = EINVAL;		/* XXX needs translation */
663		goto out;
664	}
665	if ((es->s_state & EXT2_VALID_FS) == 0 ||
666	    (es->s_state & EXT2_ERROR_FS)) {
667		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
668			printf(
669"WARNING: Filesystem was not properly dismounted\n");
670		} else {
671			printf(
672"WARNING: R/W mount denied.  Filesystem is not clean - run fsck\n");
673			error = EPERM;
674			goto out;
675		}
676	}
677	ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK);
678	bzero((caddr_t)ump, sizeof *ump);
679	/* I don't know whether this is the right strategy. Note that
680	   we dynamically allocate both a ext2_sb_info and a ext2_super_block
681	   while Linux keeps the super block in a locked buffer
682	 */
683	ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info),
684		M_EXT2MNT, M_WAITOK);
685	ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block),
686		M_EXT2MNT, M_WAITOK);
687	bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block));
688	if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs)))
689		goto out;
690	/*
691	 * We don't free the group descriptors allocated by compute_sb_data()
692	 * until ext2_unmount().  This is OK since the mount will succeed.
693	 */
694	brelse(bp);
695	bp = NULL;
696	fs = ump->um_e2fs;
697	fs->s_rd_only = ronly;	/* ronly is set according to mnt_flags */
698	/* if the fs is not mounted read-only, make sure the super block is
699	   always written back on a sync()
700	 */
701	fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0;
702	if (ronly == 0) {
703		fs->s_dirt = 1;		/* mark it modified */
704		fs->s_es->s_state &= ~EXT2_VALID_FS;	/* set fs invalid */
705	}
706	mp->mnt_data = (qaddr_t)ump;
707	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
708	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
709	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
710	mp->mnt_flag |= MNT_LOCAL;
711	ump->um_mountp = mp;
712	ump->um_dev = dev;
713	ump->um_devvp = devvp;
714	/* setting those two parameters allowed us to use
715	   ufs_bmap w/o changse !
716	*/
717	ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
718	ump->um_bptrtodb = fs->s_es->s_log_block_size + 1;
719	ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
720	devvp->v_rdev->si_mountpoint = mp;
721	if (ronly == 0)
722		ext2_sbupdate(ump, MNT_WAIT);
723	return (0);
724out:
725	if (bp)
726		brelse(bp);
727	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, td);
728	if (ump) {
729		bsd_free(ump->um_e2fs->s_es, M_EXT2MNT);
730		bsd_free(ump->um_e2fs, M_EXT2MNT);
731		bsd_free(ump, M_EXT2MNT);
732		mp->mnt_data = (qaddr_t)0;
733	}
734	return (error);
735}
736
737/*
738 * unmount system call
739 */
740static int
741ext2_unmount(mp, mntflags, td)
742	struct mount *mp;
743	int mntflags;
744	struct thread *td;
745{
746	struct ext2mount *ump;
747	struct ext2_sb_info *fs;
748	int error, flags, ronly, i;
749
750	flags = 0;
751	if (mntflags & MNT_FORCE) {
752		if (mp->mnt_flag & MNT_ROOTFS)
753			return (EINVAL);
754		flags |= FORCECLOSE;
755	}
756	if ((error = ext2_flushfiles(mp, flags, td)) != 0)
757		return (error);
758	ump = VFSTOEXT2(mp);
759	fs = ump->um_e2fs;
760	ronly = fs->s_rd_only;
761	if (ronly == 0) {
762		if (fs->s_wasvalid)
763			fs->s_es->s_state |= EXT2_VALID_FS;
764		ext2_sbupdate(ump, MNT_WAIT);
765	}
766
767	/* release buffers containing group descriptors */
768	for(i = 0; i < fs->s_db_per_group; i++)
769		ULCK_BUF(fs->s_group_desc[i])
770	bsd_free(fs->s_group_desc, M_EXT2MNT);
771
772	/* release cached inode/block bitmaps */
773        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
774                if (fs->s_inode_bitmap[i])
775			ULCK_BUF(fs->s_inode_bitmap[i])
776
777        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
778                if (fs->s_block_bitmap[i])
779			ULCK_BUF(fs->s_block_bitmap[i])
780
781	ump->um_devvp->v_rdev->si_mountpoint = NULL;
782	error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE,
783		NOCRED, td);
784	vrele(ump->um_devvp);
785	bsd_free(fs->s_es, M_EXT2MNT);
786	bsd_free(fs, M_EXT2MNT);
787	bsd_free(ump, M_EXT2MNT);
788	mp->mnt_data = (qaddr_t)0;
789	mp->mnt_flag &= ~MNT_LOCAL;
790	return (error);
791}
792
793/*
794 * Flush out all the files in a filesystem.
795 */
796static int
797ext2_flushfiles(mp, flags, td)
798	struct mount *mp;
799	int flags;
800	struct thread *td;
801{
802	int error;
803
804	error = vflush(mp, 0, flags);
805	return (error);
806}
807
808/*
809 * Get file system statistics.
810 * taken from ext2/super.c ext2_statfs
811 */
812static int
813ext2_statfs(mp, sbp, td)
814	struct mount *mp;
815	struct statfs *sbp;
816	struct thread *td;
817{
818        unsigned long overhead;
819	struct ext2mount *ump;
820	struct ext2_sb_info *fs;
821	struct ext2_super_block *es;
822	int i, nsb;
823
824	ump = VFSTOEXT2(mp);
825	fs = ump->um_e2fs;
826	es = fs->s_es;
827
828	if (es->s_magic != EXT2_SUPER_MAGIC)
829		panic("ext2_statfs - magic number spoiled");
830
831	/*
832	 * Compute the overhead (FS structures)
833	 */
834	if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) {
835		nsb = 0;
836		for (i = 0 ; i < fs->s_groups_count; i++)
837			if (ext2_group_sparse(i))
838				nsb++;
839	} else
840		nsb = fs->s_groups_count;
841	overhead = es->s_first_data_block +
842	    /* Superblocks and block group descriptors: */
843	    nsb * (1 + fs->s_db_per_group) +
844	    /* Inode bitmap, block bitmap, and inode table: */
845	    fs->s_groups_count * (1 + 1 + fs->s_itb_per_group);
846
847	sbp->f_bsize = EXT2_FRAG_SIZE(fs);
848	sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
849	sbp->f_blocks = es->s_blocks_count - overhead;
850	sbp->f_bfree = es->s_free_blocks_count;
851	sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count;
852	sbp->f_files = es->s_inodes_count;
853	sbp->f_ffree = es->s_free_inodes_count;
854	if (sbp != &mp->mnt_stat) {
855		sbp->f_type = mp->mnt_vfc->vfc_typenum;
856		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
857			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
858		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
859			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
860	}
861	return (0);
862}
863
864/*
865 * Go through the disk queues to initiate sandbagged IO;
866 * go through the inodes to write those that have been modified;
867 * initiate the writing of the super block if it has been modified.
868 *
869 * Note: we are always called with the filesystem marked `MPBUSY'.
870 */
871static int
872ext2_sync(mp, waitfor, cred, td)
873	struct mount *mp;
874	int waitfor;
875	struct ucred *cred;
876	struct thread *td;
877{
878	struct vnode *nvp, *vp;
879	struct inode *ip;
880	struct ext2mount *ump = VFSTOEXT2(mp);
881	struct ext2_sb_info *fs;
882	int error, allerror = 0;
883
884	fs = ump->um_e2fs;
885	if (fs->s_dirt != 0 && fs->s_rd_only != 0) {		/* XXX */
886		printf("fs = %s\n", fs->fs_fsmnt);
887		panic("ext2_sync: rofs mod");
888	}
889	/*
890	 * Write back each (modified) inode.
891	 */
892	mtx_lock(&mntvnode_mtx);
893loop:
894	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
895		/*
896		 * If the vnode that we are about to sync is no longer
897		 * associated with this mount point, start over.
898		 */
899		if (vp->v_mount != mp)
900			goto loop;
901		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
902		mtx_unlock(&mntvnode_mtx);
903		mtx_lock(&vp->v_interlock);
904		ip = VTOI(vp);
905		if (vp->v_type == VNON ||
906		    ((ip->i_flag &
907		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
908		    (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) {
909			mtx_unlock(&vp->v_interlock);
910			mtx_lock(&mntvnode_mtx);
911			continue;
912		}
913		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td);
914		if (error) {
915			mtx_lock(&mntvnode_mtx);
916			if (error == ENOENT)
917				goto loop;
918			continue;
919		}
920		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
921			allerror = error;
922		VOP_UNLOCK(vp, 0, td);
923		vrele(vp);
924		mtx_lock(&mntvnode_mtx);
925	}
926	mtx_unlock(&mntvnode_mtx);
927	/*
928	 * Force stale file system control information to be flushed.
929	 */
930	if (waitfor != MNT_LAZY) {
931		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
932		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, td)) != 0)
933			allerror = error;
934		VOP_UNLOCK(ump->um_devvp, 0, td);
935	}
936	/*
937	 * Write back modified superblock.
938	 */
939	if (fs->s_dirt != 0) {
940		fs->s_dirt = 0;
941		fs->s_es->s_wtime = time_second;
942		if ((error = ext2_sbupdate(ump, waitfor)) != 0)
943			allerror = error;
944	}
945	return (allerror);
946}
947
948/*
949 * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it
950 * in from disk.  If it is in core, wait for the lock bit to clear, then
951 * return the inode locked.  Detection and handling of mount points must be
952 * done by the calling routine.
953 */
954static int
955ext2_vget(mp, ino, flags, vpp)
956	struct mount *mp;
957	ino_t ino;
958	int flags;
959	struct vnode **vpp;
960{
961	struct ext2_sb_info *fs;
962	struct inode *ip;
963	struct ext2mount *ump;
964	struct buf *bp;
965	struct vnode *vp;
966	dev_t dev;
967	int i, error;
968	int used_blocks;
969
970	ump = VFSTOEXT2(mp);
971	dev = ump->um_dev;
972restart:
973	if ((error = ext2_ihashget(dev, ino, flags, vpp)) != 0)
974		return (error);
975	if (*vpp != NULL)
976		return (0);
977
978	/*
979	 * Lock out the creation of new entries in the FFS hash table in
980	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
981	 * may occur!
982	 */
983	if (ext2fs_inode_hash_lock) {
984		while (ext2fs_inode_hash_lock) {
985			ext2fs_inode_hash_lock = -1;
986			tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0);
987		}
988		goto restart;
989	}
990	ext2fs_inode_hash_lock = 1;
991
992	/*
993	 * If this MALLOC() is performed after the getnewvnode()
994	 * it might block, leaving a vnode with a NULL v_data to be
995	 * found by ext2_sync() if a sync happens to fire right then,
996	 * which will cause a panic because ext2_sync() blindly
997	 * dereferences vp->v_data (as well it should).
998	 */
999	MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK);
1000
1001	/* Allocate a new vnode/inode. */
1002	if ((error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) != 0) {
1003		if (ext2fs_inode_hash_lock < 0)
1004			wakeup(&ext2fs_inode_hash_lock);
1005		ext2fs_inode_hash_lock = 0;
1006		*vpp = NULL;
1007		FREE(ip, M_EXT2NODE);
1008		return (error);
1009	}
1010	bzero((caddr_t)ip, sizeof(struct inode));
1011	lockinit(&vp->v_lock, PINOD, "ext2in", 0, 0);
1012	vp->v_data = ip;
1013	ip->i_vnode = vp;
1014	ip->i_e2fs = fs = ump->um_e2fs;
1015	ip->i_dev = dev;
1016	ip->i_number = ino;
1017	/*
1018	 * Put it onto its hash chain and lock it so that other requests for
1019	 * this inode will block if they arrive while we are sleeping waiting
1020	 * for old data structures to be purged or for the contents of the
1021	 * disk portion of this inode to be read.
1022	 */
1023	ext2_ihashins(ip);
1024
1025	if (ext2fs_inode_hash_lock < 0)
1026		wakeup(&ext2fs_inode_hash_lock);
1027	ext2fs_inode_hash_lock = 0;
1028
1029	/* Read in the disk contents for the inode, copy into the inode. */
1030#if 0
1031printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino)));
1032#endif
1033	if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1034	    (int)fs->s_blocksize, NOCRED, &bp)) != 0) {
1035		/*
1036		 * The inode does not contain anything useful, so it would
1037		 * be misleading to leave it on its hash chain. With mode
1038		 * still zero, it will be unlinked and returned to the free
1039		 * list by vput().
1040		 */
1041		vput(vp);
1042		brelse(bp);
1043		*vpp = NULL;
1044		return (error);
1045	}
1046	/* convert ext2 inode to dinode */
1047	ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE *
1048			ino_to_fsbo(fs, ino)), ip);
1049	ip->i_block_group = ino_to_cg(fs, ino);
1050	ip->i_next_alloc_block = 0;
1051	ip->i_next_alloc_goal = 0;
1052	ip->i_prealloc_count = 0;
1053	ip->i_prealloc_block = 0;
1054        /* now we want to make sure that block pointers for unused
1055           blocks are zeroed out - ext2_balloc depends on this
1056	   although for regular files and directories only
1057	*/
1058	if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) {
1059		used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize;
1060		for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
1061			ip->i_db[i] = 0;
1062	}
1063/*
1064	ext2_print_inode(ip);
1065*/
1066	brelse(bp);
1067
1068	/*
1069	 * Initialize the vnode from the inode, check for aliases.
1070	 * Note that the underlying vnode may have changed.
1071	 */
1072	if ((error = ext2_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) {
1073		vput(vp);
1074		*vpp = NULL;
1075		return (error);
1076	}
1077	/*
1078	 * Finish inode initialization now that aliasing has been resolved.
1079	 */
1080	ip->i_devvp = ump->um_devvp;
1081	VREF(ip->i_devvp);
1082	/*
1083	 * Set up a generation number for this inode if it does not
1084	 * already have one. This should only happen on old filesystems.
1085	 */
1086	if (ip->i_gen == 0) {
1087		ip->i_gen = random() / 2 + 1;
1088		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1089			ip->i_flag |= IN_MODIFIED;
1090	}
1091	*vpp = vp;
1092	return (0);
1093}
1094
1095/*
1096 * File handle to vnode
1097 *
1098 * Have to be really careful about stale file handles:
1099 * - check that the inode number is valid
1100 * - call ext2_vget() to get the locked inode
1101 * - check for an unallocated inode (i_mode == 0)
1102 * - check that the given client host has export rights and return
1103 *   those rights via. exflagsp and credanonp
1104 */
1105static int
1106ext2_fhtovp(mp, fhp, vpp)
1107	struct mount *mp;
1108	struct fid *fhp;
1109	struct vnode **vpp;
1110{
1111	struct inode *ip;
1112	struct ufid *ufhp;
1113	struct vnode *nvp;
1114	struct ext2_sb_info *fs;
1115	int error;
1116
1117	ufhp = (struct ufid *)fhp;
1118	fs = VFSTOEXT2(mp)->um_e2fs;
1119	if (ufhp->ufid_ino < ROOTINO ||
1120	    ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group)
1121		return (ESTALE);
1122
1123	error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp);
1124	if (error) {
1125		*vpp = NULLVP;
1126		return (error);
1127	}
1128	ip = VTOI(nvp);
1129	if (ip->i_mode == 0 ||
1130	    ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) {
1131		vput(nvp);
1132		*vpp = NULLVP;
1133		return (ESTALE);
1134	}
1135	*vpp = nvp;
1136	return (0);
1137}
1138
1139/*
1140 * Vnode pointer to File handle
1141 */
1142/* ARGSUSED */
1143static int
1144ext2_vptofh(vp, fhp)
1145	struct vnode *vp;
1146	struct fid *fhp;
1147{
1148	struct inode *ip;
1149	struct ufid *ufhp;
1150
1151	ip = VTOI(vp);
1152	ufhp = (struct ufid *)fhp;
1153	ufhp->ufid_len = sizeof(struct ufid);
1154	ufhp->ufid_ino = ip->i_number;
1155	ufhp->ufid_gen = ip->i_gen;
1156	return (0);
1157}
1158
1159/*
1160 * Write a superblock and associated information back to disk.
1161 */
1162static int
1163ext2_sbupdate(mp, waitfor)
1164	struct ext2mount *mp;
1165	int waitfor;
1166{
1167	struct ext2_sb_info *fs = mp->um_e2fs;
1168	struct ext2_super_block *es = fs->s_es;
1169	struct buf *bp;
1170	int error = 0;
1171/*
1172printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no");
1173*/
1174	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0);
1175	bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block));
1176	if (waitfor == MNT_WAIT)
1177		error = bwrite(bp);
1178	else
1179		bawrite(bp);
1180
1181	/*
1182	 * The buffers for group descriptors, inode bitmaps and block bitmaps
1183	 * are not busy at this point and are (hopefully) written by the
1184	 * usual sync mechanism. No need to write them here
1185		 */
1186
1187	return (error);
1188}
1189
1190/*
1191 * Return the root of a filesystem.
1192 */
1193static int
1194ext2_root(mp, vpp)
1195	struct mount *mp;
1196	struct vnode **vpp;
1197{
1198	struct vnode *nvp;
1199	int error;
1200
1201	error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp);
1202	if (error)
1203		return (error);
1204	*vpp = nvp;
1205	return (0);
1206}
1207
1208static int
1209ext2_init(struct vfsconf *vfsp)
1210{
1211
1212	ext2_ihashinit();
1213	return (0);
1214}
1215
1216static int
1217ext2_uninit(struct vfsconf *vfsp)
1218{
1219
1220	ext2_ihashuninit();
1221	return (0);
1222}
1223