ext2_vfsops.c revision 59794
1/*
2 *  modified for EXT2FS support in Lites 1.1
3 *
4 *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5 *  University of Utah, Department of Computer Science
6 */
7/*
8 * Copyright (c) 1989, 1991, 1993, 1994
9 *	The Regents of the University of California.  All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	@(#)ffs_vfsops.c	8.8 (Berkeley) 4/18/94
40 *	$FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 59794 2000-04-30 18:52:11Z phk $
41 */
42
43#include "opt_quota.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/namei.h>
48#include <sys/proc.h>
49#include <sys/kernel.h>
50#include <sys/vnode.h>
51#include <sys/mount.h>
52#include <sys/buf.h>
53#include <sys/conf.h>
54#include <sys/fcntl.h>
55#include <sys/disklabel.h>
56#include <sys/malloc.h>
57#include <sys/stat.h>
58
59#include <ufs/ufs/extattr.h>
60#include <ufs/ufs/quota.h>
61#include <ufs/ufs/ufsmount.h>
62#include <ufs/ufs/inode.h>
63#include <ufs/ufs/ufs_extern.h>
64
65
66#include <gnu/ext2fs/fs.h>
67#include <gnu/ext2fs/ext2_extern.h>
68#include <gnu/ext2fs/ext2_fs.h>
69#include <gnu/ext2fs/ext2_fs_sb.h>
70
71static int ext2_fhtovp __P((struct mount *, struct fid *, struct vnode **));
72static int ext2_flushfiles __P((struct mount *mp, int flags, struct proc *p));
73static int ext2_mount __P((struct mount *,
74	    char *, caddr_t, struct nameidata *, struct proc *));
75static int ext2_mountfs __P((struct vnode *, struct mount *, struct proc *));
76static int ext2_reload __P((struct mount *mountp, struct ucred *cred,
77			struct proc *p));
78static int ext2_sbupdate __P((struct ufsmount *, int));
79static int ext2_statfs __P((struct mount *, struct statfs *, struct proc *));
80static int ext2_sync __P((struct mount *, int, struct ucred *, struct proc *));
81static int ext2_unmount __P((struct mount *, int, struct proc *));
82static int ext2_vget __P((struct mount *, ino_t, struct vnode **));
83static int ext2_vptofh __P((struct vnode *, struct fid *));
84
85static MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part");
86
87static struct vfsops ext2fs_vfsops = {
88	ext2_mount,
89	ufs_start,		/* empty function */
90	ext2_unmount,
91	ufs_root,		/* root inode via vget */
92	ufs_quotactl,		/* does operations associated with quotas */
93	ext2_statfs,
94	ext2_sync,
95	ext2_vget,
96	ext2_fhtovp,
97	ufs_check_export,
98	ext2_vptofh,
99	ext2_init,
100	vfs_stduninit,
101	vfs_stdextattrctl,
102};
103
104VFS_SET(ext2fs_vfsops, ext2fs, 0);
105#define bsd_malloc malloc
106#define bsd_free free
107
108static int ext2fs_inode_hash_lock;
109
110static int	ext2_check_sb_compat __P((struct ext2_super_block *es,
111					  dev_t dev, int ronly));
112static int	compute_sb_data __P((struct vnode * devvp,
113				     struct ext2_super_block * es,
114				     struct ext2_sb_info * fs));
115
116#ifdef notyet
117static int ext2_mountroot __P((void));
118
119/*
120 * Called by main() when ext2fs is going to be mounted as root.
121 *
122 * Name is updated by mount(8) after booting.
123 */
124#define ROOTNAME	"root_device"
125
126static int
127ext2_mountroot()
128{
129	register struct ext2_sb_info *fs;
130	register struct mount *mp;
131	struct proc *p = curproc;
132	struct ufsmount *ump;
133	u_int size;
134	int error;
135
136	if ((error = bdevvp(rootdev, &rootvp))) {
137		printf("ext2_mountroot: can't find rootvp\n");
138		return (error);
139	}
140	mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
141	bzero((char *)mp, (u_long)sizeof(struct mount));
142	mp->mnt_op = &ext2fs_vfsops;
143	mp->mnt_flag = MNT_RDONLY;
144	if (error = ext2_mountfs(rootvp, mp, p)) {
145		bsd_free(mp, M_MOUNT);
146		return (error);
147	}
148	if (error = vfs_lock(mp)) {
149		(void)ext2_unmount(mp, 0, p);
150		bsd_free(mp, M_MOUNT);
151		return (error);
152	}
153	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
154	mp->mnt_flag |= MNT_ROOTFS;
155	mp->mnt_vnodecovered = NULLVP;
156	ump = VFSTOUFS(mp);
157	fs = ump->um_e2fs;
158	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
159	fs->fs_fsmnt[0] = '/';
160	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
161	    MNAMELEN);
162	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
163	    &size);
164	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
165	(void)ext2_statfs(mp, &mp->mnt_stat, p);
166	vfs_unlock(mp);
167	inittodr(fs->s_es->s_wtime);		/* this helps to set the time */
168	return (0);
169}
170#endif
171
172/*
173 * VFS Operations.
174 *
175 * mount system call
176 */
177static int
178ext2_mount(mp, path, data, ndp, p)
179	register struct mount *mp;
180	char *path;
181	caddr_t data;		/* this is actually a (struct ufs_args *) */
182	struct nameidata *ndp;
183	struct proc *p;
184{
185	struct vnode *devvp;
186	struct ufs_args args;
187	struct ufsmount *ump = 0;
188	register struct ext2_sb_info *fs;
189	u_int size;
190	int error, flags;
191	mode_t accessmode;
192
193	if ((error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) != 0)
194		return (error);
195	/*
196	 * If updating, check whether changing from read-only to
197	 * read/write; if there is no device name, that's all we do.
198	 */
199	if (mp->mnt_flag & MNT_UPDATE) {
200		ump = VFSTOUFS(mp);
201		fs = ump->um_e2fs;
202		error = 0;
203		if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) {
204			flags = WRITECLOSE;
205			if (mp->mnt_flag & MNT_FORCE)
206				flags |= FORCECLOSE;
207			if (vfs_busy(mp, LK_NOWAIT, 0, p))
208				return (EBUSY);
209			error = ext2_flushfiles(mp, flags, p);
210			vfs_unbusy(mp, p);
211			if (!error && fs->s_wasvalid) {
212				fs->s_es->s_state |= EXT2_VALID_FS;
213				ext2_sbupdate(ump, MNT_WAIT);
214			}
215			fs->s_rd_only = 1;
216		}
217		if (!error && (mp->mnt_flag & MNT_RELOAD))
218			error = ext2_reload(mp, ndp->ni_cnd.cn_cred, p);
219		if (error)
220			return (error);
221		devvp = ump->um_devvp;
222		if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev,
223		    (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0)
224			return (EPERM);
225		if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
226			/*
227			 * If upgrade to read-write by non-root, then verify
228			 * that user has necessary permissions on the device.
229			 */
230			if (p->p_ucred->cr_uid != 0) {
231				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
232				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
233				    p->p_ucred, p)) != 0) {
234					VOP_UNLOCK(devvp, 0, p);
235					return (error);
236				}
237				VOP_UNLOCK(devvp, 0, p);
238			}
239
240			if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 ||
241			    (fs->s_es->s_state & EXT2_ERROR_FS)) {
242				if (mp->mnt_flag & MNT_FORCE) {
243					printf(
244"WARNING: %s was not properly dismounted\n",
245					    fs->fs_fsmnt);
246				} else {
247					printf(
248"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
249					    fs->fs_fsmnt);
250					return (EPERM);
251				}
252			}
253			fs->s_es->s_state &= ~EXT2_VALID_FS;
254			ext2_sbupdate(ump, MNT_WAIT);
255			fs->s_rd_only = 0;
256		}
257		if (args.fspec == 0) {
258			/*
259			 * Process export requests.
260			 */
261			return (vfs_export(mp, &ump->um_export, &args.export));
262		}
263	}
264	/*
265	 * Not an update, or updating the name: look up the name
266	 * and verify that it refers to a sensible block device.
267	 */
268	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
269	if ((error = namei(ndp)) != 0)
270		return (error);
271	NDFREE(ndp, NDF_ONLY_PNBUF);
272	devvp = ndp->ni_vp;
273
274	if (!vn_isdisk(devvp, &error)) {
275		vrele(devvp);
276		return (error);
277	}
278
279	/*
280	 * If mount by non-root, then verify that user has necessary
281	 * permissions on the device.
282	 */
283	if (p->p_ucred->cr_uid != 0) {
284		accessmode = VREAD;
285		if ((mp->mnt_flag & MNT_RDONLY) == 0)
286			accessmode |= VWRITE;
287		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
288		if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) {
289			vput(devvp);
290			return (error);
291		}
292		VOP_UNLOCK(devvp, 0, p);
293	}
294
295	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
296		error = ext2_mountfs(devvp, mp, p);
297	} else {
298		if (devvp != ump->um_devvp)
299			error = EINVAL;	/* needs translation */
300		else
301			vrele(devvp);
302	}
303	if (error) {
304		vrele(devvp);
305		return (error);
306	}
307	ump = VFSTOUFS(mp);
308	fs = ump->um_e2fs;
309	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
310	bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
311	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
312	    MNAMELEN);
313	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
314	    &size);
315	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
316	(void)ext2_statfs(mp, &mp->mnt_stat, p);
317	return (0);
318}
319
320/*
321 * checks that the data in the descriptor blocks make sense
322 * this is taken from ext2/super.c
323 */
324static int ext2_check_descriptors (struct ext2_sb_info * sb)
325{
326        int i;
327        int desc_block = 0;
328        unsigned long block = sb->s_es->s_first_data_block;
329        struct ext2_group_desc * gdp = NULL;
330
331        /* ext2_debug ("Checking group descriptors"); */
332
333        for (i = 0; i < sb->s_groups_count; i++)
334        {
335		/* examine next descriptor block */
336                if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
337                        gdp = (struct ext2_group_desc *)
338				sb->s_group_desc[desc_block++]->b_data;
339                if (gdp->bg_block_bitmap < block ||
340                    gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
341                {
342                        printf ("ext2_check_descriptors: "
343                                    "Block bitmap for group %d"
344                                    " not in group (block %lu)!\n",
345                                    i, (unsigned long) gdp->bg_block_bitmap);
346                        return 0;
347                }
348                if (gdp->bg_inode_bitmap < block ||
349                    gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
350                {
351                        printf ("ext2_check_descriptors: "
352                                    "Inode bitmap for group %d"
353                                    " not in group (block %lu)!\n",
354                                    i, (unsigned long) gdp->bg_inode_bitmap);
355                        return 0;
356                }
357                if (gdp->bg_inode_table < block ||
358                    gdp->bg_inode_table + sb->s_itb_per_group >=
359                    block + EXT2_BLOCKS_PER_GROUP(sb))
360                {
361                        printf ("ext2_check_descriptors: "
362                                    "Inode table for group %d"
363                                    " not in group (block %lu)!\n",
364                                    i, (unsigned long) gdp->bg_inode_table);
365                        return 0;
366                }
367                block += EXT2_BLOCKS_PER_GROUP(sb);
368                gdp++;
369        }
370        return 1;
371}
372
373static int
374ext2_check_sb_compat(es, dev, ronly)
375	struct ext2_super_block *es;
376	dev_t dev;
377	int ronly;
378{
379
380	if (es->s_magic != EXT2_SUPER_MAGIC) {
381		printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n",
382		    devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC);
383		return (1);
384	}
385	if (es->s_rev_level > EXT2_GOOD_OLD_REV) {
386		if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) {
387			printf(
388"WARNING: mount of %s denied due to unsupported optional features\n",
389			    devtoname(dev));
390			return (1);
391		}
392		if (!ronly &&
393		    (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) {
394			printf(
395"WARNING: R/W mount of %s denied due to unsupported optional features\n",
396			    devtoname(dev));
397			return (1);
398		}
399	}
400	return (0);
401}
402
403/*
404 * this computes the fields of the  ext2_sb_info structure from the
405 * data in the ext2_super_block structure read in
406 */
407static int compute_sb_data(devvp, es, fs)
408	struct vnode * devvp;
409	struct ext2_super_block * es;
410	struct ext2_sb_info * fs;
411{
412    int db_count, error;
413    int i, j;
414    int logic_sb_block = 1;	/* XXX for now */
415
416#if 1
417#define V(v)
418#else
419#define V(v)  printf(#v"= %d\n", fs->v);
420#endif
421
422    fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size;
423    V(s_blocksize)
424    fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size;
425    V(s_bshift)
426    fs->s_fsbtodb = es->s_log_block_size + 1;
427    V(s_fsbtodb)
428    fs->s_qbmask = fs->s_blocksize - 1;
429    V(s_bmask)
430    fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es);
431    V(s_blocksize_bits)
432    fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size;
433    V(s_frag_size)
434    if (fs->s_frag_size)
435	fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size;
436    V(s_frags_per_block)
437    fs->s_blocks_per_group = es->s_blocks_per_group;
438    V(s_blocks_per_group)
439    fs->s_frags_per_group = es->s_frags_per_group;
440    V(s_frags_per_group)
441    fs->s_inodes_per_group = es->s_inodes_per_group;
442    V(s_inodes_per_group)
443    fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE;
444    V(s_inodes_per_block)
445    fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block;
446    V(s_itb_per_group)
447    fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc);
448    V(s_desc_per_block)
449    /* s_resuid / s_resgid ? */
450    fs->s_groups_count = (es->s_blocks_count -
451			  es->s_first_data_block +
452			  EXT2_BLOCKS_PER_GROUP(fs) - 1) /
453			 EXT2_BLOCKS_PER_GROUP(fs);
454    V(s_groups_count)
455    db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) /
456	EXT2_DESC_PER_BLOCK(fs);
457    fs->s_db_per_group = db_count;
458    V(s_db_per_group)
459
460    fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *),
461		M_UFSMNT, M_WAITOK);
462
463    /* adjust logic_sb_block */
464    if(fs->s_blocksize > SBSIZE)
465	/* Godmar thinks: if the blocksize is greater than 1024, then
466	   the superblock is logically part of block zero.
467	 */
468        logic_sb_block = 0;
469
470    for (i = 0; i < db_count; i++) {
471	error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1),
472		fs->s_blocksize, NOCRED, &fs->s_group_desc[i]);
473	if(error) {
474	    for (j = 0; j < i; j++)
475		brelse(fs->s_group_desc[j]);
476	    bsd_free(fs->s_group_desc, M_UFSMNT);
477	    printf("EXT2-fs: unable to read group descriptors (%d)\n", error);
478	    return EIO;
479	}
480	/* Set the B_LOCKED flag on the buffer, then brelse() it */
481	LCK_BUF(fs->s_group_desc[i])
482    }
483    if(!ext2_check_descriptors(fs)) {
484	    for (j = 0; j < db_count; j++)
485		    ULCK_BUF(fs->s_group_desc[j])
486	    bsd_free(fs->s_group_desc, M_UFSMNT);
487	    printf("EXT2-fs: (ext2_check_descriptors failure) "
488		   "unable to read group descriptors\n");
489	    return EIO;
490    }
491
492    for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) {
493	    fs->s_inode_bitmap_number[i] = 0;
494	    fs->s_inode_bitmap[i] = NULL;
495	    fs->s_block_bitmap_number[i] = 0;
496	    fs->s_block_bitmap[i] = NULL;
497    }
498    fs->s_loaded_inode_bitmaps = 0;
499    fs->s_loaded_block_bitmaps = 0;
500    return 0;
501}
502
503/*
504 * Reload all incore data for a filesystem (used after running fsck on
505 * the root filesystem and finding things to fix). The filesystem must
506 * be mounted read-only.
507 *
508 * Things to do to update the mount:
509 *	1) invalidate all cached meta-data.
510 *	2) re-read superblock from disk.
511 *	3) re-read summary information from disk.
512 *	4) invalidate all inactive vnodes.
513 *	5) invalidate all cached file data.
514 *	6) re-read inode data for all active vnodes.
515 */
516static int
517ext2_reload(mountp, cred, p)
518	register struct mount *mountp;
519	struct ucred *cred;
520	struct proc *p;
521{
522	register struct vnode *vp, *nvp, *devvp;
523	struct inode *ip;
524	struct buf *bp;
525	struct ext2_super_block * es;
526	struct ext2_sb_info *fs;
527	int error;
528
529	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
530		return (EINVAL);
531	/*
532	 * Step 1: invalidate all cached meta-data.
533	 */
534	devvp = VFSTOUFS(mountp)->um_devvp;
535	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
536		panic("ext2_reload: dirty1");
537	/*
538	 * Step 2: re-read superblock from disk.
539	 * constants have been adjusted for ext2
540	 */
541	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
542		return (error);
543	es = (struct ext2_super_block *)bp->b_data;
544	if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) {
545		brelse(bp);
546		return (EIO);		/* XXX needs translation */
547	}
548	fs = VFSTOUFS(mountp)->um_e2fs;
549	bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block));
550
551	if((error = compute_sb_data(devvp, es, fs)) != 0) {
552		brelse(bp);
553		return error;
554	}
555#ifdef UNKLAR
556	if (fs->fs_sbsize < SBSIZE)
557		bp->b_flags |= B_INVAL;
558#endif
559	brelse(bp);
560
561loop:
562	simple_lock(&mntvnode_slock);
563	for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
564		if (vp->v_mount != mountp) {
565			simple_unlock(&mntvnode_slock);
566			goto loop;
567		}
568		nvp = vp->v_mntvnodes.le_next;
569		/*
570		 * Step 4: invalidate all inactive vnodes.
571		 */
572  		if (vrecycle(vp, &mntvnode_slock, p))
573  			goto loop;
574		/*
575		 * Step 5: invalidate all cached file data.
576		 */
577		simple_lock(&vp->v_interlock);
578		simple_unlock(&mntvnode_slock);
579		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
580			goto loop;
581		}
582		if (vinvalbuf(vp, 0, cred, p, 0, 0))
583			panic("ext2_reload: dirty2");
584		/*
585		 * Step 6: re-read inode data for all active vnodes.
586		 */
587		ip = VTOI(vp);
588		error =
589		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
590		    (int)fs->s_blocksize, NOCRED, &bp);
591		if (error) {
592			vput(vp);
593			return (error);
594		}
595		ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data +
596		    EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)),
597		    &ip->i_din);
598		brelse(bp);
599		vput(vp);
600		simple_lock(&mntvnode_slock);
601	}
602	simple_unlock(&mntvnode_slock);
603	return (0);
604}
605
606/*
607 * Common code for mount and mountroot
608 */
609static int
610ext2_mountfs(devvp, mp, p)
611	register struct vnode *devvp;
612	struct mount *mp;
613	struct proc *p;
614{
615	register struct ufsmount *ump;
616	struct buf *bp;
617	register struct ext2_sb_info *fs;
618	struct ext2_super_block * es;
619	dev_t dev = devvp->v_rdev;
620	struct partinfo dpart;
621	int havepart = 0;
622	int error, i, size;
623	int ronly;
624
625	/*
626	 * Disallow multiple mounts of the same device.
627	 * Disallow mounting of a device that is currently in use
628	 * (except for root, which might share swap device for miniroot).
629	 * Flush out any old buffers remaining from a previous use.
630	 */
631	if ((error = vfs_mountedon(devvp)) != 0)
632		return (error);
633	if (vcount(devvp) > 1 && devvp != rootvp)
634		return (EBUSY);
635	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) != 0)
636		return (error);
637#ifdef READONLY
638/* turn on this to force it to be read-only */
639	mp->mnt_flag |= MNT_RDONLY;
640#endif
641
642	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
643	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
644	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
645	VOP_UNLOCK(devvp, 0, p);
646	if (error)
647		return (error);
648	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
649		size = DEV_BSIZE;
650	else {
651		havepart = 1;
652		size = dpart.disklab->d_secsize;
653	}
654
655	bp = NULL;
656	ump = NULL;
657	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
658		goto out;
659	es = (struct ext2_super_block *)bp->b_data;
660	if (ext2_check_sb_compat(es, dev, ronly) != 0) {
661		error = EINVAL;		/* XXX needs translation */
662		goto out;
663	}
664	if ((es->s_state & EXT2_VALID_FS) == 0 ||
665	    (es->s_state & EXT2_ERROR_FS)) {
666		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
667			printf(
668"WARNING: Filesystem was not properly dismounted\n");
669		} else {
670			printf(
671"WARNING: R/W mount denied.  Filesystem is not clean - run fsck\n");
672			error = EPERM;
673			goto out;
674		}
675	}
676	ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
677	bzero((caddr_t)ump, sizeof *ump);
678	ump->um_malloctype = M_EXT2NODE;
679	ump->um_blkatoff = ext2_blkatoff;
680	ump->um_truncate = ext2_truncate;
681	ump->um_update = ext2_update;
682	ump->um_valloc = ext2_valloc;
683	ump->um_vfree = ext2_vfree;
684	/* I don't know whether this is the right strategy. Note that
685	   we dynamically allocate both a ext2_sb_info and a ext2_super_block
686	   while Linux keeps the super block in a locked buffer
687	 */
688	ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info),
689		M_UFSMNT, M_WAITOK);
690	ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block),
691		M_UFSMNT, M_WAITOK);
692	bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block));
693	if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs)))
694		goto out;
695	/*
696	 * We don't free the group descriptors allocated by compute_sb_data()
697	 * until ext2_unmount().  This is OK since the mount will succeed.
698	 */
699	brelse(bp);
700	bp = NULL;
701	fs = ump->um_e2fs;
702	fs->s_rd_only = ronly;	/* ronly is set according to mnt_flags */
703	/* if the fs is not mounted read-only, make sure the super block is
704	   always written back on a sync()
705	 */
706	fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0;
707	if (ronly == 0) {
708		fs->s_dirt = 1;		/* mark it modified */
709		fs->s_es->s_state &= ~EXT2_VALID_FS;	/* set fs invalid */
710	}
711	mp->mnt_data = (qaddr_t)ump;
712	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
713	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
714	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
715	mp->mnt_flag |= MNT_LOCAL;
716	ump->um_mountp = mp;
717	ump->um_dev = dev;
718	ump->um_devvp = devvp;
719	/* setting those two parameters allows us to use
720	   ufs_bmap w/o changse !
721	*/
722	ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
723	ump->um_bptrtodb = fs->s_es->s_log_block_size + 1;
724	ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
725	for (i = 0; i < MAXQUOTAS; i++)
726		ump->um_quotas[i] = NULLVP;
727	devvp->v_specmountpoint = mp;
728	if (ronly == 0)
729		ext2_sbupdate(ump, MNT_WAIT);
730	return (0);
731out:
732	if (bp)
733		brelse(bp);
734	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
735	if (ump) {
736		bsd_free(ump->um_e2fs->s_es, M_UFSMNT);
737		bsd_free(ump->um_e2fs, M_UFSMNT);
738		bsd_free(ump, M_UFSMNT);
739		mp->mnt_data = (qaddr_t)0;
740	}
741	return (error);
742}
743
744/*
745 * unmount system call
746 */
747static int
748ext2_unmount(mp, mntflags, p)
749	struct mount *mp;
750	int mntflags;
751	struct proc *p;
752{
753	register struct ufsmount *ump;
754	register struct ext2_sb_info *fs;
755	int error, flags, ronly, i;
756
757	flags = 0;
758	if (mntflags & MNT_FORCE) {
759		if (mp->mnt_flag & MNT_ROOTFS)
760			return (EINVAL);
761		flags |= FORCECLOSE;
762	}
763	if ((error = ext2_flushfiles(mp, flags, p)) != 0)
764		return (error);
765	ump = VFSTOUFS(mp);
766	fs = ump->um_e2fs;
767	ronly = fs->s_rd_only;
768	if (ronly == 0) {
769		if (fs->s_wasvalid)
770			fs->s_es->s_state |= EXT2_VALID_FS;
771		ext2_sbupdate(ump, MNT_WAIT);
772	}
773
774	/* release buffers containing group descriptors */
775	for(i = 0; i < fs->s_db_per_group; i++)
776		ULCK_BUF(fs->s_group_desc[i])
777	bsd_free(fs->s_group_desc, M_UFSMNT);
778
779	/* release cached inode/block bitmaps */
780        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
781                if (fs->s_inode_bitmap[i])
782			ULCK_BUF(fs->s_inode_bitmap[i])
783
784        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
785                if (fs->s_block_bitmap[i])
786			ULCK_BUF(fs->s_block_bitmap[i])
787
788	ump->um_devvp->v_specmountpoint = NULL;
789	error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE,
790		NOCRED, p);
791	vrele(ump->um_devvp);
792	bsd_free(fs->s_es, M_UFSMNT);
793	bsd_free(fs, M_UFSMNT);
794	bsd_free(ump, M_UFSMNT);
795	mp->mnt_data = (qaddr_t)0;
796	mp->mnt_flag &= ~MNT_LOCAL;
797	return (error);
798}
799
800/*
801 * Flush out all the files in a filesystem.
802 */
803static int
804ext2_flushfiles(mp, flags, p)
805	register struct mount *mp;
806	int flags;
807	struct proc *p;
808{
809	register struct ufsmount *ump;
810	int error;
811#if QUOTA
812	int i;
813#endif
814
815	ump = VFSTOUFS(mp);
816#if QUOTA
817	if (mp->mnt_flag & MNT_QUOTA) {
818		if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0)
819			return (error);
820		for (i = 0; i < MAXQUOTAS; i++) {
821			if (ump->um_quotas[i] == NULLVP)
822				continue;
823			quotaoff(p, mp, i);
824		}
825		/*
826		 * Here we fall through to vflush again to ensure
827		 * that we have gotten rid of all the system vnodes.
828		 */
829	}
830#endif
831	error = vflush(mp, NULLVP, flags);
832	return (error);
833}
834
835/*
836 * Get file system statistics.
837 * taken from ext2/super.c ext2_statfs
838 */
839static int
840ext2_statfs(mp, sbp, p)
841	struct mount *mp;
842	register struct statfs *sbp;
843	struct proc *p;
844{
845        unsigned long overhead;
846	unsigned long overhead_per_group;
847
848	register struct ufsmount *ump;
849	register struct ext2_sb_info *fs;
850	register struct ext2_super_block *es;
851
852	ump = VFSTOUFS(mp);
853	fs = ump->um_e2fs;
854	es = fs->s_es;
855
856	if (es->s_magic != EXT2_SUPER_MAGIC)
857		panic("ext2_statfs - magic number spoiled");
858
859	/*
860	 * Compute the overhead (FS structures)
861	 */
862	overhead_per_group = 1 /* super block */ +
863			     fs->s_db_per_group +
864			     1 /* block bitmap */ +
865			     1 /* inode bitmap */ +
866			     fs->s_itb_per_group;
867	overhead = es->s_first_data_block +
868		   fs->s_groups_count * overhead_per_group;
869
870	sbp->f_bsize = EXT2_FRAG_SIZE(fs);
871	sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
872	sbp->f_blocks = es->s_blocks_count - overhead;
873	sbp->f_bfree = es->s_free_blocks_count;
874	sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count;
875	sbp->f_files = es->s_inodes_count;
876	sbp->f_ffree = es->s_free_inodes_count;
877	if (sbp != &mp->mnt_stat) {
878		sbp->f_type = mp->mnt_vfc->vfc_typenum;
879		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
880			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
881		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
882			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
883	}
884	return (0);
885}
886
887/*
888 * Go through the disk queues to initiate sandbagged IO;
889 * go through the inodes to write those that have been modified;
890 * initiate the writing of the super block if it has been modified.
891 *
892 * Note: we are always called with the filesystem marked `MPBUSY'.
893 */
894static int
895ext2_sync(mp, waitfor, cred, p)
896	struct mount *mp;
897	int waitfor;
898	struct ucred *cred;
899	struct proc *p;
900{
901	struct vnode *nvp, *vp;
902	struct inode *ip;
903	struct ufsmount *ump = VFSTOUFS(mp);
904	struct ext2_sb_info *fs;
905	int error, allerror = 0;
906
907	fs = ump->um_e2fs;
908	if (fs->s_dirt != 0 && fs->s_rd_only != 0) {		/* XXX */
909		printf("fs = %s\n", fs->fs_fsmnt);
910		panic("ext2_sync: rofs mod");
911	}
912	/*
913	 * Write back each (modified) inode.
914	 */
915	simple_lock(&mntvnode_slock);
916loop:
917	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
918		/*
919		 * If the vnode that we are about to sync is no longer
920		 * associated with this mount point, start over.
921		 */
922		if (vp->v_mount != mp)
923			goto loop;
924		simple_lock(&vp->v_interlock);
925		nvp = vp->v_mntvnodes.le_next;
926		ip = VTOI(vp);
927		if (vp->v_type == VNON ||
928		    ((ip->i_flag &
929		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
930		    (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) {
931			simple_unlock(&vp->v_interlock);
932			continue;
933		}
934		simple_unlock(&mntvnode_slock);
935		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
936		if (error) {
937			simple_lock(&mntvnode_slock);
938			if (error == ENOENT)
939				goto loop;
940			continue;
941		}
942		if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0)
943			allerror = error;
944		VOP_UNLOCK(vp, 0, p);
945		vrele(vp);
946		simple_lock(&mntvnode_slock);
947	}
948	simple_unlock(&mntvnode_slock);
949	/*
950	 * Force stale file system control information to be flushed.
951	 */
952	if (waitfor != MNT_LAZY) {
953		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
954		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
955			allerror = error;
956		VOP_UNLOCK(ump->um_devvp, 0, p);
957	}
958#if QUOTA
959	qsync(mp);
960#endif
961	/*
962	 * Write back modified superblock.
963	 */
964	if (fs->s_dirt != 0) {
965		fs->s_dirt = 0;
966		fs->s_es->s_wtime = time_second;
967		if ((error = ext2_sbupdate(ump, waitfor)) != 0)
968			allerror = error;
969	}
970	return (allerror);
971}
972
973/*
974 * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it
975 * in from disk.  If it is in core, wait for the lock bit to clear, then
976 * return the inode locked.  Detection and handling of mount points must be
977 * done by the calling routine.
978 */
979static int
980ext2_vget(mp, ino, vpp)
981	struct mount *mp;
982	ino_t ino;
983	struct vnode **vpp;
984{
985	register struct ext2_sb_info *fs;
986	register struct inode *ip;
987	struct ufsmount *ump;
988	struct buf *bp;
989	struct vnode *vp;
990	dev_t dev;
991	int i, error;
992	int used_blocks;
993
994	ump = VFSTOUFS(mp);
995	dev = ump->um_dev;
996restart:
997	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
998		return (0);
999
1000	/*
1001	 * Lock out the creation of new entries in the FFS hash table in
1002	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
1003	 * may occur!
1004	 */
1005	if (ext2fs_inode_hash_lock) {
1006		while (ext2fs_inode_hash_lock) {
1007			ext2fs_inode_hash_lock = -1;
1008			tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0);
1009		}
1010		goto restart;
1011	}
1012	ext2fs_inode_hash_lock = 1;
1013
1014	/*
1015	 * If this MALLOC() is performed after the getnewvnode()
1016	 * it might block, leaving a vnode with a NULL v_data to be
1017	 * found by ext2_sync() if a sync happens to fire right then,
1018	 * which will cause a panic because ext2_sync() blindly
1019	 * dereferences vp->v_data (as well it should).
1020	 */
1021	MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK);
1022
1023	/* Allocate a new vnode/inode. */
1024	if ((error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) != 0) {
1025		if (ext2fs_inode_hash_lock < 0)
1026			wakeup(&ext2fs_inode_hash_lock);
1027		ext2fs_inode_hash_lock = 0;
1028		*vpp = NULL;
1029		FREE(ip, M_EXT2NODE);
1030		return (error);
1031	}
1032	bzero((caddr_t)ip, sizeof(struct inode));
1033	lockinit(&ip->i_lock, PINOD, "ext2in", 0, 0);
1034	vp->v_data = ip;
1035	ip->i_vnode = vp;
1036	ip->i_e2fs = fs = ump->um_e2fs;
1037	ip->i_dev = dev;
1038	ip->i_number = ino;
1039#if QUOTA
1040	for (i = 0; i < MAXQUOTAS; i++)
1041		ip->i_dquot[i] = NODQUOT;
1042#endif
1043	/*
1044	 * Put it onto its hash chain and lock it so that other requests for
1045	 * this inode will block if they arrive while we are sleeping waiting
1046	 * for old data structures to be purged or for the contents of the
1047	 * disk portion of this inode to be read.
1048	 */
1049	ufs_ihashins(ip);
1050
1051	if (ext2fs_inode_hash_lock < 0)
1052		wakeup(&ext2fs_inode_hash_lock);
1053	ext2fs_inode_hash_lock = 0;
1054
1055	/* Read in the disk contents for the inode, copy into the inode. */
1056#if 0
1057printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino)));
1058#endif
1059	if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1060	    (int)fs->s_blocksize, NOCRED, &bp)) != 0) {
1061		/*
1062		 * The inode does not contain anything useful, so it would
1063		 * be misleading to leave it on its hash chain. With mode
1064		 * still zero, it will be unlinked and returned to the free
1065		 * list by vput().
1066		 */
1067		vput(vp);
1068		brelse(bp);
1069		*vpp = NULL;
1070		return (error);
1071	}
1072	/* convert ext2 inode to dinode */
1073	ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE *
1074			ino_to_fsbo(fs, ino)), &ip->i_din);
1075	ip->i_block_group = ino_to_cg(fs, ino);
1076	ip->i_next_alloc_block = 0;
1077	ip->i_next_alloc_goal = 0;
1078	ip->i_prealloc_count = 0;
1079	ip->i_prealloc_block = 0;
1080        /* now we want to make sure that block pointers for unused
1081           blocks are zeroed out - ext2_balloc depends on this
1082	   although for regular files and directories only
1083	*/
1084	if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) {
1085		used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize;
1086		for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
1087			ip->i_db[i] = 0;
1088	}
1089/*
1090	ext2_print_inode(ip);
1091*/
1092	brelse(bp);
1093
1094	/*
1095	 * Initialize the vnode from the inode, check for aliases.
1096	 * Note that the underlying vnode may have changed.
1097	 */
1098	if ((error = ufs_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) {
1099		vput(vp);
1100		*vpp = NULL;
1101		return (error);
1102	}
1103	/*
1104	 * Finish inode initialization now that aliasing has been resolved.
1105	 */
1106	ip->i_devvp = ump->um_devvp;
1107	VREF(ip->i_devvp);
1108	/*
1109	 * Set up a generation number for this inode if it does not
1110	 * already have one. This should only happen on old filesystems.
1111	 */
1112	if (ip->i_gen == 0) {
1113		ip->i_gen = random() / 2 + 1;
1114		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1115			ip->i_flag |= IN_MODIFIED;
1116	}
1117	*vpp = vp;
1118	return (0);
1119}
1120
1121/*
1122 * File handle to vnode
1123 *
1124 * Have to be really careful about stale file handles:
1125 * - check that the inode number is valid
1126 * - call ext2_vget() to get the locked inode
1127 * - check for an unallocated inode (i_mode == 0)
1128 * - check that the given client host has export rights and return
1129 *   those rights via. exflagsp and credanonp
1130 */
1131static int
1132ext2_fhtovp(mp, fhp, vpp)
1133	register struct mount *mp;
1134	struct fid *fhp;
1135	struct vnode **vpp;
1136{
1137	register struct ufid *ufhp;
1138	struct ext2_sb_info *fs;
1139
1140	ufhp = (struct ufid *)fhp;
1141	fs = VFSTOUFS(mp)->um_e2fs;
1142	if (ufhp->ufid_ino < ROOTINO ||
1143	    ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group)
1144		return (ESTALE);
1145	return (ufs_fhtovp(mp, ufhp, vpp));
1146}
1147
1148/*
1149 * Vnode pointer to File handle
1150 */
1151/* ARGSUSED */
1152static int
1153ext2_vptofh(vp, fhp)
1154	struct vnode *vp;
1155	struct fid *fhp;
1156{
1157	register struct inode *ip;
1158	register struct ufid *ufhp;
1159
1160	ip = VTOI(vp);
1161	ufhp = (struct ufid *)fhp;
1162	ufhp->ufid_len = sizeof(struct ufid);
1163	ufhp->ufid_ino = ip->i_number;
1164	ufhp->ufid_gen = ip->i_gen;
1165	return (0);
1166}
1167
1168/*
1169 * Write a superblock and associated information back to disk.
1170 */
1171static int
1172ext2_sbupdate(mp, waitfor)
1173	struct ufsmount *mp;
1174	int waitfor;
1175{
1176	register struct ext2_sb_info *fs = mp->um_e2fs;
1177	register struct ext2_super_block *es = fs->s_es;
1178	register struct buf *bp;
1179	int error = 0;
1180/*
1181printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no");
1182*/
1183	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0);
1184	bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block));
1185	if (waitfor == MNT_WAIT)
1186		error = bwrite(bp);
1187	else
1188		bawrite(bp);
1189
1190	/*
1191	 * The buffers for group descriptors, inode bitmaps and block bitmaps
1192	 * are not busy at this point and are (hopefully) written by the
1193	 * usual sync mechanism. No need to write them here
1194		 */
1195
1196	return (error);
1197}
1198