ext2_vfsops.c revision 13260
1/*
2 *  modified for EXT2FS support in Lites 1.1
3 *
4 *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5 *  University of Utah, Department of Computer Science
6 */
7/*
8 * Copyright (c) 1989, 1991, 1993, 1994
9 *	The Regents of the University of California.  All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	@(#)ffs_vfsops.c	8.8 (Berkeley) 4/18/94
40 */
41
42#if !defined(__FreeBSD__)
43#include "quota.h"
44#else
45#include "opt_quota.h"
46#endif
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/namei.h>
51#include <sys/proc.h>
52#include <sys/kernel.h>
53#include <sys/vnode.h>
54#include <sys/socket.h>
55#include <sys/mount.h>
56#include <sys/buf.h>
57#include <sys/mbuf.h>
58#include <sys/file.h>
59#include <sys/disklabel.h>
60#include <sys/ioctl.h>
61#include <sys/errno.h>
62#include <sys/malloc.h>
63#include <sys/stat.h>
64
65#include <miscfs/specfs/specdev.h>
66
67#include <ufs/ufs/quota.h>
68#include <ufs/ufs/ufsmount.h>
69#include <ufs/ufs/inode.h>
70#include <ufs/ufs/ufs_extern.h>
71
72#include <gnu/ext2fs/fs.h>
73#include <gnu/ext2fs/ext2_extern.h>
74#include <gnu/ext2fs/ext2_fs.h>
75#include <gnu/ext2fs/ext2_fs_sb.h>
76
77static int ext2_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
78	    struct vnode **, int *, struct ucred **));
79static int ext2_flushfiles __P((struct mount *mp, int flags, struct proc *p));
80static int ext2_mount __P((struct mount *,
81	    char *, caddr_t, struct nameidata *, struct proc *));
82static int ext2_mountfs __P((struct vnode *, struct mount *, struct proc *));
83static int ext2_mountroot __P((void));
84static int ext2_reload __P((struct mount *mountp, struct ucred *cred,
85			struct proc *p));
86static int ext2_sbupdate __P((struct ufsmount *, int));
87static int ext2_statfs __P((struct mount *, struct statfs *, struct proc *));
88static int ext2_sync __P((struct mount *, int, struct ucred *, struct proc *));
89static int ext2_unmount __P((struct mount *, int, struct proc *));
90static int ext2_vget __P((struct mount *, ino_t, struct vnode **));
91static int ext2_vptofh __P((struct vnode *, struct fid *));
92
93static struct vfsops ext2fs_vfsops = {
94	ext2_mount,
95	ufs_start,		/* empty function */
96	ext2_unmount,
97	ufs_root,		/* root inode via vget */
98	ufs_quotactl,		/* does operations associated with quotas */
99	ext2_statfs,
100	ext2_sync,
101	ext2_vget,
102	ext2_fhtovp,
103	ext2_vptofh,
104	ext2_init,
105};
106
107#if defined(__FreeBSD__)
108VFS_SET(ext2fs_vfsops, ext2fs, MOUNT_EXT2FS, 0);
109#define bsd_malloc malloc
110#define bsd_free free
111#endif
112
113extern u_long nextgennumber;
114#ifdef __FreeBSD__
115static int ext2fs_inode_hash_lock;
116#endif
117
118/*
119 * Called by main() when ufs is going to be mounted as root.
120 *
121 * Name is updated by mount(8) after booting.
122 */
123#define ROOTNAME	"root_device"
124
125static int	compute_sb_data __P((struct vnode * devvp,
126				     struct ext2_super_block * es,
127				     struct ext2_sb_info * fs));
128
129static int
130ext2_mountroot()
131{
132#if !defined(__FreeBSD__)
133	extern struct vnode *rootvp;
134#endif
135	register struct ext2_sb_info *fs;
136	register struct mount *mp;
137#if defined(__FreeBSD__)
138	struct proc *p = curproc;
139#else
140	struct proc *p = get_proc();	/* XXX */
141#endif
142	struct ufsmount *ump;
143	u_int size;
144	int error;
145
146	/*
147	 * Get vnodes for swapdev and rootdev.
148	 */
149	if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
150		panic("ext2_mountroot: can't setup bdevvp's");
151
152	mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
153	bzero((char *)mp, (u_long)sizeof(struct mount));
154	mp->mnt_op = &ext2fs_vfsops;
155	mp->mnt_flag = MNT_RDONLY;
156	if (error = ext2_mountfs(rootvp, mp, p)) {
157		bsd_free(mp, M_MOUNT);
158		return (error);
159	}
160	if (error = vfs_lock(mp)) {
161		(void)ext2_unmount(mp, 0, p);
162		bsd_free(mp, M_MOUNT);
163		return (error);
164	}
165#if defined(__FreeBSD__)
166	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
167#else
168	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
169#endif
170	mp->mnt_flag |= MNT_ROOTFS;
171	mp->mnt_vnodecovered = NULLVP;
172	ump = VFSTOUFS(mp);
173	fs = ump->um_e2fs;
174	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
175	fs->fs_fsmnt[0] = '/';
176	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
177	    MNAMELEN);
178	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
179	    &size);
180	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
181	(void)ext2_statfs(mp, &mp->mnt_stat, p);
182	vfs_unlock(mp);
183	inittodr(fs->s_es->s_wtime);		/* this helps to set the time */
184	return (0);
185}
186
187/*
188 * VFS Operations.
189 *
190 * mount system call
191 */
192static int
193ext2_mount(mp, path, data, ndp, p)
194	register struct mount *mp;
195	char *path;
196	caddr_t data;		/* this is actually a (struct ufs_args *) */
197	struct nameidata *ndp;
198	struct proc *p;
199{
200	struct vnode *devvp;
201	struct ufs_args args;
202	struct ufsmount *ump = 0;
203	register struct ext2_sb_info *fs;
204	u_int size;
205	int error, flags;
206
207	if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)))
208		return (error);
209	/*
210	 * If updating, check whether changing from read-only to
211	 * read/write; if there is no device name, that's all we do.
212	 */
213	if (mp->mnt_flag & MNT_UPDATE) {
214		ump = VFSTOUFS(mp);
215		fs = ump->um_e2fs;
216		error = 0;
217		if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) {
218			flags = WRITECLOSE;
219			if (mp->mnt_flag & MNT_FORCE)
220				flags |= FORCECLOSE;
221			if (vfs_busy(mp))
222				return (EBUSY);
223			error = ext2_flushfiles(mp, flags, p);
224			vfs_unbusy(mp);
225		}
226		if (!error && (mp->mnt_flag & MNT_RELOAD))
227			error = ext2_reload(mp, ndp->ni_cnd.cn_cred, p);
228		if (error)
229			return (error);
230		if (fs->s_rd_only && (mp->mnt_flag & MNT_WANTRDWR))
231			fs->s_rd_only = 0;
232		if (fs->s_rd_only == 0) {
233			/* don't say it's clean */
234			fs->s_es->s_state &= ~EXT2_VALID_FS;
235			ext2_sbupdate(ump, MNT_WAIT);
236		}
237		if (args.fspec == 0) {
238			/*
239			 * Process export requests.
240			 */
241			return (vfs_export(mp, &ump->um_export, &args.export));
242		}
243	}
244	/*
245	 * Not an update, or updating the name: look up the name
246	 * and verify that it refers to a sensible block device.
247	 */
248	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
249	if (error = namei(ndp))
250		return (error);
251	devvp = ndp->ni_vp;
252
253	if (devvp->v_type != VBLK) {
254		vrele(devvp);
255		return (ENOTBLK);
256	}
257	if (major(devvp->v_rdev) >= nblkdev) {
258		vrele(devvp);
259		return (ENXIO);
260	}
261	if ((mp->mnt_flag & MNT_UPDATE) == 0)
262		error = ext2_mountfs(devvp, mp, p);
263	else {
264		if (devvp != ump->um_devvp)
265			error = EINVAL;	/* needs translation */
266		else
267			vrele(devvp);
268	}
269	if (error) {
270		vrele(devvp);
271		return (error);
272	}
273	ump = VFSTOUFS(mp);
274	fs = ump->um_e2fs;
275	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
276	bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
277	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
278	    MNAMELEN);
279	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
280	    &size);
281	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
282	(void)ext2_statfs(mp, &mp->mnt_stat, p);
283	return (0);
284}
285
286/*
287 * checks that the data in the descriptor blocks make sense
288 * this is taken from ext2/super.c
289 */
290static int ext2_check_descriptors (struct ext2_sb_info * sb)
291{
292        int i;
293        int desc_block = 0;
294        unsigned long block = sb->s_es->s_first_data_block;
295        struct ext2_group_desc * gdp = NULL;
296
297        /* ext2_debug ("Checking group descriptors"); */
298
299        for (i = 0; i < sb->s_groups_count; i++)
300        {
301		/* examine next descriptor block */
302                if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
303                        gdp = (struct ext2_group_desc *)
304				sb->s_group_desc[desc_block++]->b_data;
305                if (gdp->bg_block_bitmap < block ||
306                    gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
307                {
308                        printf ("ext2_check_descriptors: "
309                                    "Block bitmap for group %d"
310                                    " not in group (block %lu)!",
311                                    i, (unsigned long) gdp->bg_block_bitmap);
312                        return 0;
313                }
314                if (gdp->bg_inode_bitmap < block ||
315                    gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
316                {
317                        printf ("ext2_check_descriptors: "
318                                    "Inode bitmap for group %d"
319                                    " not in group (block %lu)!",
320                                    i, (unsigned long) gdp->bg_inode_bitmap);
321                        return 0;
322                }
323                if (gdp->bg_inode_table < block ||
324                    gdp->bg_inode_table + sb->s_itb_per_group >=
325                    block + EXT2_BLOCKS_PER_GROUP(sb))
326                {
327                        printf ("ext2_check_descriptors: "
328                                    "Inode table for group %d"
329                                    " not in group (block %lu)!",
330                                    i, (unsigned long) gdp->bg_inode_table);
331                        return 0;
332                }
333                block += EXT2_BLOCKS_PER_GROUP(sb);
334                gdp++;
335        }
336        return 1;
337}
338
339/*
340 * this computes the fields of the  ext2_sb_info structure from the
341 * data in the ext2_super_block structure read in
342 */
343static int compute_sb_data(devvp, es, fs)
344	struct vnode * devvp;
345	struct ext2_super_block * es;
346	struct ext2_sb_info * fs;
347{
348    int db_count, error;
349    int i, j;
350    int logic_sb_block = 1;	/* XXX for now */
351
352#if 1
353#define V(v)
354#else
355#define V(v)  printf(#v"= %d\n", fs->v);
356#endif
357
358    fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size;
359    V(s_blocksize)
360    fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size;
361    V(s_bshift)
362    fs->s_fsbtodb = es->s_log_block_size + 1;
363    V(s_fsbtodb)
364    fs->s_qbmask = fs->s_blocksize - 1;
365    V(s_bmask)
366    fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es);
367    V(s_blocksize_bits)
368    fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size;
369    V(s_frag_size)
370    if (fs->s_frag_size)
371	fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size;
372    V(s_frags_per_block)
373    fs->s_blocks_per_group = es->s_blocks_per_group;
374    V(s_blocks_per_group)
375    fs->s_frags_per_group = es->s_frags_per_group;
376    V(s_frags_per_group)
377    fs->s_inodes_per_group = es->s_inodes_per_group;
378    V(s_inodes_per_group)
379    fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE;
380    V(s_inodes_per_block)
381    fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block;
382    V(s_itb_per_group)
383    fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc);
384    V(s_desc_per_block)
385    /* s_resuid / s_resgid ? */
386    fs->s_groups_count = (es->s_blocks_count -
387			  es->s_first_data_block +
388			  EXT2_BLOCKS_PER_GROUP(fs) - 1) /
389			 EXT2_BLOCKS_PER_GROUP(fs);
390    V(s_groups_count)
391    db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) /
392	EXT2_DESC_PER_BLOCK(fs);
393    fs->s_db_per_group = db_count;
394    V(s_db_per_group)
395
396    fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *),
397		M_UFSMNT, M_WAITOK);
398
399    /* adjust logic_sb_block */
400    if(fs->s_blocksize > SBSIZE)
401	/* Godmar thinks: if the blocksize is greater than 1024, then
402	   the superblock is logically part of block zero.
403	 */
404        logic_sb_block = 0;
405
406    for (i = 0; i < db_count; i++) {
407	error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1),
408		fs->s_blocksize, NOCRED, &fs->s_group_desc[i]);
409	if(error) {
410	    for (j = 0; j < i; j++)
411		brelse(fs->s_group_desc[j]);
412	    bsd_free(fs->s_group_desc, M_UFSMNT);
413	    printf("EXT2-fs: unable to read group descriptors (%d)\n", error);
414	    return EIO;
415	}
416    }
417    if(!ext2_check_descriptors(fs)) {
418	    for (j = 0; j < db_count; j++)
419		brelse(fs->s_group_desc[j]);
420	    bsd_free(fs->s_group_desc, M_UFSMNT);
421	    printf("EXT2-fs: (ext2_check_descriptors failure) "
422		   "unable to read group descriptors\n");
423	    return EIO;
424    }
425
426    for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) {
427	    fs->s_inode_bitmap_number[i] = 0;
428	    fs->s_inode_bitmap[i] = NULL;
429	    fs->s_block_bitmap_number[i] = 0;
430	    fs->s_block_bitmap[i] = NULL;
431    }
432    fs->s_loaded_inode_bitmaps = 0;
433    fs->s_loaded_block_bitmaps = 0;
434    return 0;
435}
436
437/*
438 * Reload all incore data for a filesystem (used after running fsck on
439 * the root filesystem and finding things to fix). The filesystem must
440 * be mounted read-only.
441 *
442 * Things to do to update the mount:
443 *	1) invalidate all cached meta-data.
444 *	2) re-read superblock from disk.
445 *	3) re-read summary information from disk.
446 *	4) invalidate all inactive vnodes.
447 *	5) invalidate all cached file data.
448 *	6) re-read inode data for all active vnodes.
449 */
450static int
451ext2_reload(mountp, cred, p)
452	register struct mount *mountp;
453	struct ucred *cred;
454	struct proc *p;
455{
456	register struct vnode *vp, *nvp, *devvp;
457	struct inode *ip;
458	struct buf *bp;
459	struct ext2_super_block * es;
460	struct ext2_sb_info *fs;
461	int error;
462
463	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
464		return (EINVAL);
465	/*
466	 * Step 1: invalidate all cached meta-data.
467	 */
468	devvp = VFSTOUFS(mountp)->um_devvp;
469	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
470		panic("ext2_reload: dirty1");
471	/*
472	 * Step 2: re-read superblock from disk.
473	 * constants have been adjusted for ext2
474	 */
475	if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp))
476		return (error);
477	es = (struct ext2_super_block *)bp->b_data;
478	if (es->s_magic != EXT2_SUPER_MAGIC) {
479		if(es->s_magic == EXT2_PRE_02B_MAGIC)
480		    printf("This filesystem bears the magic number of a pre "
481			   "0.2b version of ext2. This is not supported by "
482			   "Lites.\n");
483		else
484		    printf("Wrong magic number: %x (expected %x for ext2 fs\n",
485			es->s_magic, EXT2_SUPER_MAGIC);
486		brelse(bp);
487		return (EIO);		/* XXX needs translation */
488	}
489	fs = VFSTOUFS(mountp)->um_e2fs;
490	bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block));
491
492	if(error = compute_sb_data(devvp, es, fs)) {
493		brelse(bp);
494		return error;
495	}
496#ifdef UNKLAR
497	if (fs->fs_sbsize < SBSIZE)
498		bp->b_flags |= B_INVAL;
499#endif
500	brelse(bp);
501
502loop:
503	for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
504		nvp = vp->v_mntvnodes.le_next;
505		/*
506		 * Step 4: invalidate all inactive vnodes.
507		 */
508		if (vp->v_usecount == 0) {
509			vgone(vp);
510			continue;
511		}
512		/*
513		 * Step 5: invalidate all cached file data.
514		 */
515		if (vget(vp, 1))
516			goto loop;
517		if (vinvalbuf(vp, 0, cred, p, 0, 0))
518			panic("ext2_reload: dirty2");
519		/*
520		 * Step 6: re-read inode data for all active vnodes.
521		 */
522		ip = VTOI(vp);
523		if (error =
524		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
525		    (int)fs->s_blocksize, NOCRED, &bp)) {
526			vput(vp);
527			return (error);
528		}
529		ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data +
530			EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)),
531			&ip->i_din);
532		brelse(bp);
533		vput(vp);
534		if (vp->v_mount != mountp)
535			goto loop;
536	}
537	return (0);
538}
539
540/*
541 * Common code for mount and mountroot
542 */
543static int
544ext2_mountfs(devvp, mp, p)
545	register struct vnode *devvp;
546	struct mount *mp;
547	struct proc *p;
548{
549	register struct ufsmount *ump;
550	struct buf *bp;
551	register struct ext2_sb_info *fs;
552	struct ext2_super_block * es;
553	dev_t dev = devvp->v_rdev;
554	struct partinfo dpart;
555	int havepart = 0;
556	int error, i, size;
557	int ronly;
558#if !defined(__FreeBSD__)
559	extern struct vnode *rootvp;
560#endif
561
562	/*
563	 * Disallow multiple mounts of the same device.
564	 * Disallow mounting of a device that is currently in use
565	 * (except for root, which might share swap device for miniroot).
566	 * Flush out any old buffers remaining from a previous use.
567	 */
568	if (error = vfs_mountedon(devvp))
569		return (error);
570	if (vcount(devvp) > 1 && devvp != rootvp)
571		return (EBUSY);
572	if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
573		return (error);
574#ifdef READONLY
575/* turn on this to force it to be read-only */
576	mp->mnt_flag |= MNT_RDONLY;
577#endif
578
579	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
580	if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
581		return (error);
582	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
583		size = DEV_BSIZE;
584	else {
585		havepart = 1;
586		size = dpart.disklab->d_secsize;
587	}
588
589	bp = NULL;
590	ump = NULL;
591	if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp))
592		goto out;
593	es = (struct ext2_super_block *)bp->b_data;
594	if (es->s_magic != EXT2_SUPER_MAGIC) {
595		if(es->s_magic == EXT2_PRE_02B_MAGIC)
596		    printf("This filesystem bears the magic number of a pre "
597			   "0.2b version of ext2. This is not supported by "
598			   "Lites.\n");
599		else
600		    printf("Wrong magic number: %x (expected %x for EXT2FS)\n",
601			es->s_magic, EXT2_SUPER_MAGIC);
602		error = EINVAL;		/* XXX needs translation */
603		goto out;
604	}
605	ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
606	bzero((caddr_t)ump, sizeof *ump);
607	/* I don't know whether this is the right strategy. Note that
608	   we dynamically allocate both a ext2_sb_info and a ext2_super_block
609	   while Linux keeps the super block in a locked buffer
610	 */
611	ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info),
612		M_UFSMNT, M_WAITOK);
613	ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block),
614		M_UFSMNT, M_WAITOK);
615	bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block));
616	if(error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs)) {
617		brelse(bp);
618		return error;
619	}
620	brelse(bp);
621	bp = NULL;
622	fs = ump->um_e2fs;
623	fs->s_rd_only = ronly;	/* ronly is set according to mnt_flags */
624	if (!(fs->s_es->s_state & EXT2_VALID_FS)) {
625		printf("WARNING: %s was not properly dismounted\n",
626			fs->fs_fsmnt);
627	}
628	/* if the fs is not mounted read-only, make sure the super block is
629	   always written back on a sync()
630	 */
631	if (ronly == 0) {
632		fs->s_dirt = 1;		/* mark it modified */
633		fs->s_es->s_state &= ~EXT2_VALID_FS;	/* set fs invalid */
634	}
635	mp->mnt_data = (qaddr_t)ump;
636	mp->mnt_stat.f_fsid.val[0] = (long)dev;
637	mp->mnt_stat.f_fsid.val[1] = MOUNT_EXT2FS;
638	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
639	mp->mnt_flag |= MNT_LOCAL;
640	ump->um_mountp = mp;
641	ump->um_dev = dev;
642	ump->um_devvp = devvp;
643	/* setting those two parameters allows us to use
644	   ufs_bmap w/o changse !
645	*/
646	ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
647	ump->um_bptrtodb = fs->s_es->s_log_block_size + 1;
648	ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
649	for (i = 0; i < MAXQUOTAS; i++)
650		ump->um_quotas[i] = NULLVP;
651		devvp->v_specflags |= SI_MOUNTEDON;
652		if (ronly == 0)
653			ext2_sbupdate(ump, MNT_WAIT);
654	return (0);
655out:
656	if (bp)
657		brelse(bp);
658	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
659	if (ump) {
660		bsd_free(ump->um_fs, M_UFSMNT);
661		bsd_free(ump, M_UFSMNT);
662		mp->mnt_data = (qaddr_t)0;
663	}
664	return (error);
665}
666
667/*
668 * unmount system call
669 */
670static int
671ext2_unmount(mp, mntflags, p)
672	struct mount *mp;
673	int mntflags;
674	struct proc *p;
675{
676	register struct ufsmount *ump;
677	register struct ext2_sb_info *fs;
678	int error, flags, ronly, i;
679
680	flags = 0;
681	if (mntflags & MNT_FORCE) {
682		if (mp->mnt_flag & MNT_ROOTFS)
683			return (EINVAL);
684		flags |= FORCECLOSE;
685	}
686	if (error = ext2_flushfiles(mp, flags, p))
687		return (error);
688	ump = VFSTOUFS(mp);
689	fs = ump->um_e2fs;
690	ronly = fs->s_rd_only;
691	if (!ronly) {
692		fs->s_es->s_state |= EXT2_VALID_FS;	/* was fs_clean = 1 */
693		ext2_sbupdate(ump, MNT_WAIT);
694	}
695	/* release buffers containing group descriptors */
696	for(i = 0; i < fs->s_db_per_group; i++)
697		brelse(fs->s_group_desc[i]);
698	/* release cached inode/block bitmaps */
699        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
700                if (fs->s_inode_bitmap[i])
701                        brelse (fs->s_inode_bitmap[i]);
702        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
703                if (fs->s_block_bitmap[i])
704                        brelse (fs->s_block_bitmap[i]);
705
706	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
707	error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE,
708		NOCRED, p);
709	vrele(ump->um_devvp);
710	bsd_free(fs->s_es, M_UFSMNT);
711	bsd_free(fs, M_UFSMNT);
712	bsd_free(ump, M_UFSMNT);
713	mp->mnt_data = (qaddr_t)0;
714	mp->mnt_flag &= ~MNT_LOCAL;
715	return (error);
716}
717
718/*
719 * Flush out all the files in a filesystem.
720 */
721static int
722ext2_flushfiles(mp, flags, p)
723	register struct mount *mp;
724	int flags;
725	struct proc *p;
726{
727#if !defined(__FreeBSD__)
728	extern int doforce;
729#endif
730	register struct ufsmount *ump;
731	int error;
732#if QUOTA
733	int i;
734#endif
735
736	if (!doforce)
737		flags &= ~FORCECLOSE;
738	ump = VFSTOUFS(mp);
739#if QUOTA
740	if (mp->mnt_flag & MNT_QUOTA) {
741		if (error = vflush(mp, NULLVP, SKIPSYSTEM|flags))
742			return (error);
743		for (i = 0; i < MAXQUOTAS; i++) {
744			if (ump->um_quotas[i] == NULLVP)
745				continue;
746			quotaoff(p, mp, i);
747		}
748		/*
749		 * Here we fall through to vflush again to ensure
750		 * that we have gotten rid of all the system vnodes.
751		 */
752	}
753#endif
754	error = vflush(mp, NULLVP, flags);
755	return (error);
756}
757
758/*
759 * Get file system statistics.
760 * taken from ext2/super.c ext2_statfs
761 */
762static int
763ext2_statfs(mp, sbp, p)
764	struct mount *mp;
765	register struct statfs *sbp;
766	struct proc *p;
767{
768        unsigned long overhead;
769	unsigned long overhead_per_group;
770
771	register struct ufsmount *ump;
772	register struct ext2_sb_info *fs;
773	register struct ext2_super_block *es;
774
775	ump = VFSTOUFS(mp);
776	fs = ump->um_e2fs;
777	es = fs->s_es;
778
779	if (es->s_magic != EXT2_SUPER_MAGIC)
780		panic("ext2_statfs - magic number spoiled");
781
782	/*
783	 * Compute the overhead (FS structures)
784	 */
785	overhead_per_group = 1 /* super block */ +
786			     fs->s_db_per_group +
787			     1 /* block bitmap */ +
788			     1 /* inode bitmap */ +
789			     fs->s_itb_per_group;
790	overhead = es->s_first_data_block +
791		   fs->s_groups_count * overhead_per_group;
792
793	sbp->f_type = MOUNT_EXT2FS;
794	sbp->f_bsize = EXT2_FRAG_SIZE(fs);
795	sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
796	sbp->f_blocks = es->s_blocks_count - overhead;
797	sbp->f_bfree = es->s_free_blocks_count;
798	sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count;
799	sbp->f_files = es->s_inodes_count;
800	sbp->f_ffree = es->s_free_inodes_count;
801	if (sbp != &mp->mnt_stat) {
802		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
803			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
804		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
805			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
806	}
807	return (0);
808}
809
810/*
811 * Go through the disk queues to initiate sandbagged IO;
812 * go through the inodes to write those that have been modified;
813 * initiate the writing of the super block if it has been modified.
814 *
815 * Note: we are always called with the filesystem marked `MPBUSY'.
816 */
817static int
818ext2_sync(mp, waitfor, cred, p)
819	struct mount *mp;
820	int waitfor;
821	struct ucred *cred;
822	struct proc *p;
823{
824	register struct vnode *vp;
825	register struct inode *ip;
826	register struct ufsmount *ump = VFSTOUFS(mp);
827	register struct ext2_sb_info *fs;
828	int error, allerror = 0;
829
830	fs = ump->um_e2fs;
831	/*
832	 * Write back modified superblock.
833	 * Consistency check that the superblock
834	 * is still in the buffer cache.
835	 */
836	if (fs->s_dirt) {
837#if !defined(__FreeBSD__)
838		struct timeval time;
839#endif
840
841		if (fs->s_rd_only != 0) {		/* XXX */
842			printf("fs = %s\n", fs->fs_fsmnt);
843			panic("update: rofs mod");
844		}
845		fs->s_dirt = 0;
846#if !defined(__FreeBSD__)
847		get_time(&time);
848#endif
849		fs->s_es->s_wtime = time.tv_sec;
850		allerror = ext2_sbupdate(ump, waitfor);
851	}
852	/*
853	 * Write back each (modified) inode.
854	 */
855loop:
856	for (vp = mp->mnt_vnodelist.lh_first;
857	     vp != NULL;
858	     vp = vp->v_mntvnodes.le_next) {
859		/*
860		 * If the vnode that we are about to sync is no longer
861		 * associated with this mount point, start over.
862		 */
863		if (vp->v_mount != mp)
864			goto loop;
865		if (VOP_ISLOCKED(vp))
866			continue;
867		ip = VTOI(vp);
868		if ((ip->i_flag &
869		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
870		    vp->v_dirtyblkhd.lh_first == NULL)
871			continue;
872		if (vget(vp, 1))
873			goto loop;
874		if (error = VOP_FSYNC(vp, cred, waitfor, p))
875			allerror = error;
876		vput(vp);
877	}
878	/*
879	 * Force stale file system control information to be flushed.
880	 */
881	if (error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p))
882		allerror = error;
883#if QUOTA
884	qsync(mp);
885#endif
886	return (allerror);
887}
888
889/*
890 * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it
891 * in from disk.  If it is in core, wait for the lock bit to clear, then
892 * return the inode locked.  Detection and handling of mount points must be
893 * done by the calling routine.
894 */
895static int
896ext2_vget(mp, ino, vpp)
897	struct mount *mp;
898	ino_t ino;
899	struct vnode **vpp;
900{
901	register struct ext2_sb_info *fs;
902	register struct inode *ip;
903	struct ufsmount *ump;
904	struct buf *bp;
905	struct vnode *vp;
906	dev_t dev;
907	int i, type, error;
908	int used_blocks;
909
910	ump = VFSTOUFS(mp);
911	dev = ump->um_dev;
912restart:
913	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
914		return (0);
915
916#ifdef __FreeBSD__
917	/*
918	 * Lock out the creation of new entries in the FFS hash table in
919	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
920	 * may occur!
921	 */
922	if (ext2fs_inode_hash_lock) {
923		while (ext2fs_inode_hash_lock) {
924			ext2fs_inode_hash_lock = -1;
925			tsleep(&ext2fs_inode_hash_lock, PVM, "ffsvgt", 0);
926		}
927		goto restart;
928	}
929	ext2fs_inode_hash_lock = 1;
930#endif
931
932	/* Allocate a new vnode/inode. */
933	if (error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) {
934		*vpp = NULL;
935		return (error);
936	}
937	/* I don't really know what this 'type' does. I suppose it's some kind
938	 * of memory accounting. Let's just book this memory on FFS's account
939	 * If I'm not mistaken, this stuff isn't implemented anyway in Lites
940	 */
941	type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
942	MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
943#ifndef __FreeBSD__
944	insmntque(vp, mp);
945#endif
946	bzero((caddr_t)ip, sizeof(struct inode));
947	vp->v_data = ip;
948	ip->i_vnode = vp;
949	ip->i_e2fs = fs = ump->um_e2fs;
950	ip->i_dev = dev;
951	ip->i_number = ino;
952#if QUOTA
953	for (i = 0; i < MAXQUOTAS; i++)
954		ip->i_dquot[i] = NODQUOT;
955#endif
956	/*
957	 * Put it onto its hash chain and lock it so that other requests for
958	 * this inode will block if they arrive while we are sleeping waiting
959	 * for old data structures to be purged or for the contents of the
960	 * disk portion of this inode to be read.
961	 */
962	ufs_ihashins(ip);
963
964#ifdef __FreeBSD__
965	if (ext2fs_inode_hash_lock < 0)
966		wakeup(&ext2fs_inode_hash_lock);
967	ext2fs_inode_hash_lock = 0;
968#endif
969
970	/* Read in the disk contents for the inode, copy into the inode. */
971	/* Read in the disk contents for the inode, copy into the inode. */
972#if 0
973printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino)));
974#endif
975	if (error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
976	    (int)fs->s_blocksize, NOCRED, &bp)) {
977		/*
978		 * The inode does not contain anything useful, so it would
979		 * be misleading to leave it on its hash chain. With mode
980		 * still zero, it will be unlinked and returned to the free
981		 * list by vput().
982		 */
983		vput(vp);
984		brelse(bp);
985		*vpp = NULL;
986		return (error);
987	}
988	/* convert ext2 inode to dinode */
989	ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE *
990			ino_to_fsbo(fs, ino)), &ip->i_din);
991	ip->i_block_group = ino_to_cg(fs, ino);
992	ip->i_next_alloc_block = 0;
993	ip->i_next_alloc_goal = 0;
994	ip->i_prealloc_count = 0;
995	ip->i_prealloc_block = 0;
996        /* now we want to make sure that block pointers for unused
997           blocks are zeroed out - ext2_balloc depends on this
998	   although for regular files and directories only
999	*/
1000	if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) {
1001		used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize;
1002		for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
1003			ip->i_db[i] = 0;
1004	}
1005/*
1006	ext2_print_inode(ip);
1007*/
1008	brelse(bp);
1009
1010	/*
1011	 * Initialize the vnode from the inode, check for aliases.
1012	 * Note that the underlying vnode may have changed.
1013	 */
1014	if (error = ufs_vinit(mp, ext2_specop_p, EXT2_FIFOOPS, &vp)) {
1015		vput(vp);
1016		*vpp = NULL;
1017		return (error);
1018	}
1019	/*
1020	 * Finish inode initialization now that aliasing has been resolved.
1021	 */
1022	ip->i_devvp = ump->um_devvp;
1023	VREF(ip->i_devvp);
1024	/*
1025	 * Set up a generation number for this inode if it does not
1026	 * already have one. This should only happen on old filesystems.
1027	 */
1028	if (ip->i_gen == 0) {
1029#if !defined(__FreeBSD__)
1030		struct timeval time;
1031		get_time(&time);
1032#endif
1033		if (++nextgennumber < (u_long)time.tv_sec)
1034			nextgennumber = time.tv_sec;
1035		ip->i_gen = nextgennumber;
1036		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1037			ip->i_flag |= IN_MODIFIED;
1038	}
1039	*vpp = vp;
1040	return (0);
1041}
1042
1043/*
1044 * File handle to vnode
1045 *
1046 * Have to be really careful about stale file handles:
1047 * - check that the inode number is valid
1048 * - call ext2_vget() to get the locked inode
1049 * - check for an unallocated inode (i_mode == 0)
1050 * - check that the given client host has export rights and return
1051 *   those rights via. exflagsp and credanonp
1052 */
1053static int
1054ext2_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
1055	register struct mount *mp;
1056	struct fid *fhp;
1057	struct mbuf *nam;
1058	struct vnode **vpp;
1059	int *exflagsp;
1060	struct ucred **credanonp;
1061{
1062	register struct ufid *ufhp;
1063	struct ext2_sb_info *fs;
1064
1065	ufhp = (struct ufid *)fhp;
1066	fs = VFSTOUFS(mp)->um_e2fs;
1067	if (ufhp->ufid_ino < ROOTINO ||
1068	    ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group)
1069		return (ESTALE);
1070	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
1071}
1072
1073/*
1074 * Vnode pointer to File handle
1075 */
1076/* ARGSUSED */
1077static int
1078ext2_vptofh(vp, fhp)
1079	struct vnode *vp;
1080	struct fid *fhp;
1081{
1082	register struct inode *ip;
1083	register struct ufid *ufhp;
1084
1085	ip = VTOI(vp);
1086	ufhp = (struct ufid *)fhp;
1087	ufhp->ufid_len = sizeof(struct ufid);
1088	ufhp->ufid_ino = ip->i_number;
1089	ufhp->ufid_gen = ip->i_gen;
1090	return (0);
1091}
1092
1093/*
1094 * Write a superblock and associated information back to disk.
1095 */
1096static int
1097ext2_sbupdate(mp, waitfor)
1098	struct ufsmount *mp;
1099	int waitfor;
1100{
1101	register struct ext2_sb_info *fs = mp->um_e2fs;
1102	register struct ext2_super_block *es = fs->s_es;
1103	register struct buf *bp;
1104	int i, error = 0;
1105/*
1106printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no");
1107*/
1108	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0);
1109	bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block));
1110	if (waitfor == MNT_WAIT)
1111		error = bwrite(bp);
1112	else
1113		bawrite(bp);
1114
1115	/* write group descriptors back on disk */
1116	for(i = 0; i < fs->s_db_per_group; i++)
1117		/* Godmar thinks: we must avoid using any of the b*write
1118		 * functions here: we want to keep the buffer locked
1119		 * so we use my 'housemade' write routine:
1120		 */
1121		error |= ll_w_block(fs->s_group_desc[i], waitfor == MNT_WAIT);
1122
1123        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
1124                if (fs->s_inode_bitmap[i])
1125                        ll_w_block (fs->s_inode_bitmap[i], 1);
1126        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
1127                if (fs->s_block_bitmap[i])
1128                        ll_w_block (fs->s_block_bitmap[i], 1);
1129
1130	return (error);
1131}
1132