ext2_vfsops.c revision 91406
1/*
2 *  modified for EXT2FS support in Lites 1.1
3 *
4 *  Aug 1995, Godmar Back (gback@cs.utah.edu)
5 *  University of Utah, Department of Computer Science
6 */
7/*
8 * Copyright (c) 1989, 1991, 1993, 1994
9 *	The Regents of the University of California.  All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	@(#)ffs_vfsops.c	8.8 (Berkeley) 4/18/94
40 *	$FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 91406 2002-02-27 18:32:23Z jhb $
41 */
42
43#include "opt_quota.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/namei.h>
48#include <sys/proc.h>
49#include <sys/kernel.h>
50#include <sys/vnode.h>
51#include <sys/mount.h>
52#include <sys/bio.h>
53#include <sys/buf.h>
54#include <sys/conf.h>
55#include <sys/fcntl.h>
56#include <sys/disklabel.h>
57#include <sys/malloc.h>
58#include <sys/stat.h>
59#include <sys/mutex.h>
60
61#include <ufs/ufs/extattr.h>
62#include <ufs/ufs/quota.h>
63#include <ufs/ufs/ufsmount.h>
64#include <ufs/ufs/inode.h>
65#include <ufs/ufs/ufs_extern.h>
66
67
68#include <gnu/ext2fs/fs.h>
69#include <gnu/ext2fs/ext2_extern.h>
70#include <gnu/ext2fs/ext2_fs.h>
71#include <gnu/ext2fs/ext2_fs_sb.h>
72
73static int ext2_fhtovp __P((struct mount *, struct fid *, struct vnode **));
74static int ext2_flushfiles __P((struct mount *mp, int flags, struct thread *td));
75static int ext2_mount __P((struct mount *,
76	    char *, caddr_t, struct nameidata *, struct thread *));
77static int ext2_mountfs __P((struct vnode *, struct mount *, struct thread *));
78static int ext2_reload __P((struct mount *mountp, struct ucred *cred,
79			struct thread *td));
80static int ext2_sbupdate __P((struct ufsmount *, int));
81static int ext2_statfs __P((struct mount *, struct statfs *, struct thread *));
82static int ext2_sync __P((struct mount *, int, struct ucred *, struct thread *));
83static int ext2_unmount __P((struct mount *, int, struct thread *));
84static int ext2_vget __P((struct mount *, ino_t, struct vnode **));
85static int ext2_vptofh __P((struct vnode *, struct fid *));
86
87static MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part");
88
89static struct vfsops ext2fs_vfsops = {
90	ext2_mount,
91	ufs_start,		/* empty function */
92	ext2_unmount,
93	ufs_root,		/* root inode via vget */
94	ufs_quotactl,		/* does operations associated with quotas */
95	ext2_statfs,
96	ext2_sync,
97	ext2_vget,
98	ext2_fhtovp,
99	vfs_stdcheckexp,
100	ext2_vptofh,
101	ext2_init,
102	vfs_stduninit,
103	vfs_stdextattrctl,
104};
105
106VFS_SET(ext2fs_vfsops, ext2fs, 0);
107#define bsd_malloc malloc
108#define bsd_free free
109
110static int ext2fs_inode_hash_lock;
111
112static int	ext2_check_sb_compat __P((struct ext2_super_block *es,
113					  dev_t dev, int ronly));
114static int	compute_sb_data __P((struct vnode * devvp,
115				     struct ext2_super_block * es,
116				     struct ext2_sb_info * fs));
117
118#ifdef notyet
119static int ext2_mountroot __P((void));
120
121/*
122 * Called by main() when ext2fs is going to be mounted as root.
123 *
124 * Name is updated by mount(8) after booting.
125 */
126#define ROOTNAME	"root_device"
127
128static int
129ext2_mountroot()
130{
131	register struct ext2_sb_info *fs;
132	register struct mount *mp;
133	struct thread *td = curthread;
134	struct ufsmount *ump;
135	u_int size;
136	int error;
137
138	if ((error = bdevvp(rootdev, &rootvp))) {
139		printf("ext2_mountroot: can't find rootvp\n");
140		return (error);
141	}
142	mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
143	bzero((char *)mp, (u_long)sizeof(struct mount));
144	TAILQ_INIT(&mp->mnt_nvnodelist);
145	TAILQ_INIT(&mp->mnt_reservedvnlist);
146	mp->mnt_op = &ext2fs_vfsops;
147	mp->mnt_flag = MNT_RDONLY;
148	if (error = ext2_mountfs(rootvp, mp, td)) {
149		bsd_free(mp, M_MOUNT);
150		return (error);
151	}
152	if (error = vfs_lock(mp)) {
153		(void)ext2_unmount(mp, 0, td);
154		bsd_free(mp, M_MOUNT);
155		return (error);
156	}
157	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
158	mp->mnt_flag |= MNT_ROOTFS;
159	mp->mnt_vnodecovered = NULLVP;
160	ump = VFSTOUFS(mp);
161	fs = ump->um_e2fs;
162	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
163	fs->fs_fsmnt[0] = '/';
164	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
165	    MNAMELEN);
166	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
167	    &size);
168	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
169	(void)ext2_statfs(mp, &mp->mnt_stat, td);
170	vfs_unlock(mp);
171	inittodr(fs->s_es->s_wtime);		/* this helps to set the time */
172	return (0);
173}
174#endif
175
176/*
177 * VFS Operations.
178 *
179 * mount system call
180 */
181static int
182ext2_mount(mp, path, data, ndp, td)
183	register struct mount *mp;
184	char *path;
185	caddr_t data;		/* this is actually a (struct ufs_args *) */
186	struct nameidata *ndp;
187	struct thread *td;
188{
189	struct vnode *devvp;
190	struct ufs_args args;
191	struct ufsmount *ump = 0;
192	register struct ext2_sb_info *fs;
193	size_t size;
194	int error, flags;
195	mode_t accessmode;
196
197	/* Double-check the length of path.. */
198	if (strlen(path) >= MAXMNTLEN - 1)
199		return (ENAMETOOLONG);
200	error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
201	if (error != 0)
202		return (error);
203	/*
204	 * If updating, check whether changing from read-only to
205	 * read/write; if there is no device name, that's all we do.
206	 */
207	if (mp->mnt_flag & MNT_UPDATE) {
208		ump = VFSTOUFS(mp);
209		fs = ump->um_e2fs;
210		error = 0;
211		if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) {
212			flags = WRITECLOSE;
213			if (mp->mnt_flag & MNT_FORCE)
214				flags |= FORCECLOSE;
215			if (vfs_busy(mp, LK_NOWAIT, 0, td))
216				return (EBUSY);
217			error = ext2_flushfiles(mp, flags, td);
218			vfs_unbusy(mp, td);
219			if (!error && fs->s_wasvalid) {
220				fs->s_es->s_state |= EXT2_VALID_FS;
221				ext2_sbupdate(ump, MNT_WAIT);
222			}
223			fs->s_rd_only = 1;
224		}
225		if (!error && (mp->mnt_flag & MNT_RELOAD))
226			error = ext2_reload(mp, ndp->ni_cnd.cn_cred, td);
227		if (error)
228			return (error);
229		devvp = ump->um_devvp;
230		if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev,
231		    (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0)
232			return (EPERM);
233		if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
234			/*
235			 * If upgrade to read-write by non-root, then verify
236			 * that user has necessary permissions on the device.
237			 */
238			if (suser_td(td)) {
239				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
240				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
241				    td->td_ucred, td)) != 0) {
242					VOP_UNLOCK(devvp, 0, td);
243					return (error);
244				}
245				VOP_UNLOCK(devvp, 0, td);
246			}
247
248			if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 ||
249			    (fs->s_es->s_state & EXT2_ERROR_FS)) {
250				if (mp->mnt_flag & MNT_FORCE) {
251					printf(
252"WARNING: %s was not properly dismounted\n",
253					    fs->fs_fsmnt);
254				} else {
255					printf(
256"WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
257					    fs->fs_fsmnt);
258					return (EPERM);
259				}
260			}
261			fs->s_es->s_state &= ~EXT2_VALID_FS;
262			ext2_sbupdate(ump, MNT_WAIT);
263			fs->s_rd_only = 0;
264		}
265		if (args.fspec == 0) {
266			/*
267			 * Process export requests.
268			 */
269			return (vfs_export(mp, &args.export));
270		}
271	}
272	/*
273	 * Not an update, or updating the name: look up the name
274	 * and verify that it refers to a sensible block device.
275	 */
276	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
277	if ((error = namei(ndp)) != 0)
278		return (error);
279	NDFREE(ndp, NDF_ONLY_PNBUF);
280	devvp = ndp->ni_vp;
281
282	if (!vn_isdisk(devvp, &error)) {
283		vrele(devvp);
284		return (error);
285	}
286
287	/*
288	 * If mount by non-root, then verify that user has necessary
289	 * permissions on the device.
290	 */
291	if (suser_td(td)) {
292		accessmode = VREAD;
293		if ((mp->mnt_flag & MNT_RDONLY) == 0)
294			accessmode |= VWRITE;
295		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
296		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td)) != 0) {
297			vput(devvp);
298			return (error);
299		}
300		VOP_UNLOCK(devvp, 0, td);
301	}
302
303	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
304		error = ext2_mountfs(devvp, mp, td);
305	} else {
306		if (devvp != ump->um_devvp)
307			error = EINVAL;	/* needs translation */
308		else
309			vrele(devvp);
310	}
311	if (error) {
312		vrele(devvp);
313		return (error);
314	}
315	ump = VFSTOUFS(mp);
316	fs = ump->um_e2fs;
317	/*
318	 * Note that this strncpy() is ok because of a check at the start
319	 * of ext2_mount().
320	 */
321	strncpy(fs->fs_fsmnt, path, MAXMNTLEN);
322	fs->fs_fsmnt[MAXMNTLEN - 1] = '\0';
323	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
324	    &size);
325	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
326	(void)ext2_statfs(mp, &mp->mnt_stat, td);
327	return (0);
328}
329
330/*
331 * checks that the data in the descriptor blocks make sense
332 * this is taken from ext2/super.c
333 */
334static int ext2_check_descriptors (struct ext2_sb_info * sb)
335{
336        int i;
337        int desc_block = 0;
338        unsigned long block = sb->s_es->s_first_data_block;
339        struct ext2_group_desc * gdp = NULL;
340
341        /* ext2_debug ("Checking group descriptors"); */
342
343        for (i = 0; i < sb->s_groups_count; i++)
344        {
345		/* examine next descriptor block */
346                if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
347                        gdp = (struct ext2_group_desc *)
348				sb->s_group_desc[desc_block++]->b_data;
349                if (gdp->bg_block_bitmap < block ||
350                    gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
351                {
352                        printf ("ext2_check_descriptors: "
353                                    "Block bitmap for group %d"
354                                    " not in group (block %lu)!\n",
355                                    i, (unsigned long) gdp->bg_block_bitmap);
356                        return 0;
357                }
358                if (gdp->bg_inode_bitmap < block ||
359                    gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
360                {
361                        printf ("ext2_check_descriptors: "
362                                    "Inode bitmap for group %d"
363                                    " not in group (block %lu)!\n",
364                                    i, (unsigned long) gdp->bg_inode_bitmap);
365                        return 0;
366                }
367                if (gdp->bg_inode_table < block ||
368                    gdp->bg_inode_table + sb->s_itb_per_group >=
369                    block + EXT2_BLOCKS_PER_GROUP(sb))
370                {
371                        printf ("ext2_check_descriptors: "
372                                    "Inode table for group %d"
373                                    " not in group (block %lu)!\n",
374                                    i, (unsigned long) gdp->bg_inode_table);
375                        return 0;
376                }
377                block += EXT2_BLOCKS_PER_GROUP(sb);
378                gdp++;
379        }
380        return 1;
381}
382
383static int
384ext2_check_sb_compat(es, dev, ronly)
385	struct ext2_super_block *es;
386	dev_t dev;
387	int ronly;
388{
389
390	if (es->s_magic != EXT2_SUPER_MAGIC) {
391		printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n",
392		    devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC);
393		return (1);
394	}
395	if (es->s_rev_level > EXT2_GOOD_OLD_REV) {
396		if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) {
397			printf(
398"WARNING: mount of %s denied due to unsupported optional features\n",
399			    devtoname(dev));
400			return (1);
401		}
402		if (!ronly &&
403		    (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) {
404			printf(
405"WARNING: R/W mount of %s denied due to unsupported optional features\n",
406			    devtoname(dev));
407			return (1);
408		}
409	}
410	return (0);
411}
412
413/*
414 * this computes the fields of the  ext2_sb_info structure from the
415 * data in the ext2_super_block structure read in
416 */
417static int compute_sb_data(devvp, es, fs)
418	struct vnode * devvp;
419	struct ext2_super_block * es;
420	struct ext2_sb_info * fs;
421{
422    int db_count, error;
423    int i, j;
424    int logic_sb_block = 1;	/* XXX for now */
425
426#if 1
427#define V(v)
428#else
429#define V(v)  printf(#v"= %d\n", fs->v);
430#endif
431
432    fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size;
433    V(s_blocksize)
434    fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size;
435    V(s_bshift)
436    fs->s_fsbtodb = es->s_log_block_size + 1;
437    V(s_fsbtodb)
438    fs->s_qbmask = fs->s_blocksize - 1;
439    V(s_bmask)
440    fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es);
441    V(s_blocksize_bits)
442    fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size;
443    V(s_frag_size)
444    if (fs->s_frag_size)
445	fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size;
446    V(s_frags_per_block)
447    fs->s_blocks_per_group = es->s_blocks_per_group;
448    V(s_blocks_per_group)
449    fs->s_frags_per_group = es->s_frags_per_group;
450    V(s_frags_per_group)
451    fs->s_inodes_per_group = es->s_inodes_per_group;
452    V(s_inodes_per_group)
453    fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE;
454    V(s_inodes_per_block)
455    fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block;
456    V(s_itb_per_group)
457    fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc);
458    V(s_desc_per_block)
459    /* s_resuid / s_resgid ? */
460    fs->s_groups_count = (es->s_blocks_count -
461			  es->s_first_data_block +
462			  EXT2_BLOCKS_PER_GROUP(fs) - 1) /
463			 EXT2_BLOCKS_PER_GROUP(fs);
464    V(s_groups_count)
465    db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) /
466	EXT2_DESC_PER_BLOCK(fs);
467    fs->s_db_per_group = db_count;
468    V(s_db_per_group)
469
470    fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *),
471		M_UFSMNT, M_WAITOK);
472
473    /* adjust logic_sb_block */
474    if(fs->s_blocksize > SBSIZE)
475	/* Godmar thinks: if the blocksize is greater than 1024, then
476	   the superblock is logically part of block zero.
477	 */
478        logic_sb_block = 0;
479
480    for (i = 0; i < db_count; i++) {
481	error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1),
482		fs->s_blocksize, NOCRED, &fs->s_group_desc[i]);
483	if(error) {
484	    for (j = 0; j < i; j++)
485		brelse(fs->s_group_desc[j]);
486	    bsd_free(fs->s_group_desc, M_UFSMNT);
487	    printf("EXT2-fs: unable to read group descriptors (%d)\n", error);
488	    return EIO;
489	}
490	/* Set the B_LOCKED flag on the buffer, then brelse() it */
491	LCK_BUF(fs->s_group_desc[i])
492    }
493    if(!ext2_check_descriptors(fs)) {
494	    for (j = 0; j < db_count; j++)
495		    ULCK_BUF(fs->s_group_desc[j])
496	    bsd_free(fs->s_group_desc, M_UFSMNT);
497	    printf("EXT2-fs: (ext2_check_descriptors failure) "
498		   "unable to read group descriptors\n");
499	    return EIO;
500    }
501
502    for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) {
503	    fs->s_inode_bitmap_number[i] = 0;
504	    fs->s_inode_bitmap[i] = NULL;
505	    fs->s_block_bitmap_number[i] = 0;
506	    fs->s_block_bitmap[i] = NULL;
507    }
508    fs->s_loaded_inode_bitmaps = 0;
509    fs->s_loaded_block_bitmaps = 0;
510    return 0;
511}
512
513/*
514 * Reload all incore data for a filesystem (used after running fsck on
515 * the root filesystem and finding things to fix). The filesystem must
516 * be mounted read-only.
517 *
518 * Things to do to update the mount:
519 *	1) invalidate all cached meta-data.
520 *	2) re-read superblock from disk.
521 *	3) re-read summary information from disk.
522 *	4) invalidate all inactive vnodes.
523 *	5) invalidate all cached file data.
524 *	6) re-read inode data for all active vnodes.
525 */
526static int
527ext2_reload(mountp, cred, td)
528	register struct mount *mountp;
529	struct ucred *cred;
530	struct thread *td;
531{
532	register struct vnode *vp, *nvp, *devvp;
533	struct inode *ip;
534	struct buf *bp;
535	struct ext2_super_block * es;
536	struct ext2_sb_info *fs;
537	int error;
538
539	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
540		return (EINVAL);
541	/*
542	 * Step 1: invalidate all cached meta-data.
543	 */
544	devvp = VFSTOUFS(mountp)->um_devvp;
545	if (vinvalbuf(devvp, 0, cred, td, 0, 0))
546		panic("ext2_reload: dirty1");
547	/*
548	 * Step 2: re-read superblock from disk.
549	 * constants have been adjusted for ext2
550	 */
551	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
552		return (error);
553	es = (struct ext2_super_block *)bp->b_data;
554	if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) {
555		brelse(bp);
556		return (EIO);		/* XXX needs translation */
557	}
558	fs = VFSTOUFS(mountp)->um_e2fs;
559	bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block));
560
561	if((error = compute_sb_data(devvp, es, fs)) != 0) {
562		brelse(bp);
563		return error;
564	}
565#ifdef UNKLAR
566	if (fs->fs_sbsize < SBSIZE)
567		bp->b_flags |= B_INVAL;
568#endif
569	brelse(bp);
570
571loop:
572	mtx_lock(&mntvnode_mtx);
573	for (vp = TAILQ_FIRST(&mountp->mnt_nvnodelist); vp != NULL; vp = nvp) {
574		if (vp->v_mount != mountp) {
575			mtx_unlock(&mntvnode_mtx);
576			goto loop;
577		}
578		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
579		mtx_unlock(&mntvnode_mtx);
580		/*
581		 * Step 4: invalidate all inactive vnodes.
582		 */
583  		if (vrecycle(vp, NULL, td))
584  			goto loop;
585		/*
586		 * Step 5: invalidate all cached file data.
587		 */
588		mtx_lock(&vp->v_interlock);
589		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
590			goto loop;
591		}
592		if (vinvalbuf(vp, 0, cred, td, 0, 0))
593			panic("ext2_reload: dirty2");
594		/*
595		 * Step 6: re-read inode data for all active vnodes.
596		 */
597		ip = VTOI(vp);
598		error =
599		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
600		    (int)fs->s_blocksize, NOCRED, &bp);
601		if (error) {
602			vput(vp);
603			return (error);
604		}
605		ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data +
606		    EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)),
607		    &ip->i_din);
608		brelse(bp);
609		vput(vp);
610		mtx_lock(&mntvnode_mtx);
611	}
612	mtx_unlock(&mntvnode_mtx);
613	return (0);
614}
615
616/*
617 * Common code for mount and mountroot
618 */
619static int
620ext2_mountfs(devvp, mp, td)
621	register struct vnode *devvp;
622	struct mount *mp;
623	struct thread *td;
624{
625	register struct ufsmount *ump;
626	struct buf *bp;
627	register struct ext2_sb_info *fs;
628	struct ext2_super_block * es;
629	dev_t dev = devvp->v_rdev;
630	struct partinfo dpart;
631	int havepart = 0;
632	int error, i, size;
633	int ronly;
634
635	/*
636	 * Disallow multiple mounts of the same device.
637	 * Disallow mounting of a device that is currently in use
638	 * (except for root, which might share swap device for miniroot).
639	 * Flush out any old buffers remaining from a previous use.
640	 */
641	if ((error = vfs_mountedon(devvp)) != 0)
642		return (error);
643	if (vcount(devvp) > 1 && devvp != rootvp)
644		return (EBUSY);
645	if ((error = vinvalbuf(devvp, V_SAVE, td->td_ucred, td, 0, 0)) != 0)
646		return (error);
647#ifdef READONLY
648/* turn on this to force it to be read-only */
649	mp->mnt_flag |= MNT_RDONLY;
650#endif
651
652	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
653	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
654	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
655	VOP_UNLOCK(devvp, 0, td);
656	if (error)
657		return (error);
658	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, td) != 0)
659		size = DEV_BSIZE;
660	else {
661		havepart = 1;
662		size = dpart.disklab->d_secsize;
663	}
664
665	bp = NULL;
666	ump = NULL;
667	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
668		goto out;
669	es = (struct ext2_super_block *)bp->b_data;
670	if (ext2_check_sb_compat(es, dev, ronly) != 0) {
671		error = EINVAL;		/* XXX needs translation */
672		goto out;
673	}
674	if ((es->s_state & EXT2_VALID_FS) == 0 ||
675	    (es->s_state & EXT2_ERROR_FS)) {
676		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
677			printf(
678"WARNING: Filesystem was not properly dismounted\n");
679		} else {
680			printf(
681"WARNING: R/W mount denied.  Filesystem is not clean - run fsck\n");
682			error = EPERM;
683			goto out;
684		}
685	}
686	ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
687	bzero((caddr_t)ump, sizeof *ump);
688	ump->um_malloctype = M_EXT2NODE;
689	ump->um_blkatoff = ext2_blkatoff;
690	ump->um_truncate = ext2_truncate;
691	ump->um_update = ext2_update;
692	ump->um_valloc = ext2_valloc;
693	ump->um_vfree = ext2_vfree;
694	/* I don't know whether this is the right strategy. Note that
695	   we dynamically allocate both a ext2_sb_info and a ext2_super_block
696	   while Linux keeps the super block in a locked buffer
697	 */
698	ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info),
699		M_UFSMNT, M_WAITOK);
700	ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block),
701		M_UFSMNT, M_WAITOK);
702	bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block));
703	if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs)))
704		goto out;
705	/*
706	 * We don't free the group descriptors allocated by compute_sb_data()
707	 * until ext2_unmount().  This is OK since the mount will succeed.
708	 */
709	brelse(bp);
710	bp = NULL;
711	fs = ump->um_e2fs;
712	fs->s_rd_only = ronly;	/* ronly is set according to mnt_flags */
713	/* if the fs is not mounted read-only, make sure the super block is
714	   always written back on a sync()
715	 */
716	fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0;
717	if (ronly == 0) {
718		fs->s_dirt = 1;		/* mark it modified */
719		fs->s_es->s_state &= ~EXT2_VALID_FS;	/* set fs invalid */
720	}
721	mp->mnt_data = (qaddr_t)ump;
722	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
723	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
724	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
725	mp->mnt_flag |= MNT_LOCAL;
726	ump->um_mountp = mp;
727	ump->um_dev = dev;
728	ump->um_devvp = devvp;
729	/* setting those two parameters allows us to use
730	   ufs_bmap w/o changse !
731	*/
732	ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
733	ump->um_bptrtodb = fs->s_es->s_log_block_size + 1;
734	ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
735	for (i = 0; i < MAXQUOTAS; i++)
736		ump->um_quotas[i] = NULLVP;
737	devvp->v_rdev->si_mountpoint = mp;
738	if (ronly == 0)
739		ext2_sbupdate(ump, MNT_WAIT);
740	return (0);
741out:
742	if (bp)
743		brelse(bp);
744	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, td);
745	if (ump) {
746		bsd_free(ump->um_e2fs->s_es, M_UFSMNT);
747		bsd_free(ump->um_e2fs, M_UFSMNT);
748		bsd_free(ump, M_UFSMNT);
749		mp->mnt_data = (qaddr_t)0;
750	}
751	return (error);
752}
753
754/*
755 * unmount system call
756 */
757static int
758ext2_unmount(mp, mntflags, td)
759	struct mount *mp;
760	int mntflags;
761	struct thread *td;
762{
763	register struct ufsmount *ump;
764	register struct ext2_sb_info *fs;
765	int error, flags, ronly, i;
766
767	flags = 0;
768	if (mntflags & MNT_FORCE) {
769		if (mp->mnt_flag & MNT_ROOTFS)
770			return (EINVAL);
771		flags |= FORCECLOSE;
772	}
773	if ((error = ext2_flushfiles(mp, flags, td)) != 0)
774		return (error);
775	ump = VFSTOUFS(mp);
776	fs = ump->um_e2fs;
777	ronly = fs->s_rd_only;
778	if (ronly == 0) {
779		if (fs->s_wasvalid)
780			fs->s_es->s_state |= EXT2_VALID_FS;
781		ext2_sbupdate(ump, MNT_WAIT);
782	}
783
784	/* release buffers containing group descriptors */
785	for(i = 0; i < fs->s_db_per_group; i++)
786		ULCK_BUF(fs->s_group_desc[i])
787	bsd_free(fs->s_group_desc, M_UFSMNT);
788
789	/* release cached inode/block bitmaps */
790        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
791                if (fs->s_inode_bitmap[i])
792			ULCK_BUF(fs->s_inode_bitmap[i])
793
794        for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
795                if (fs->s_block_bitmap[i])
796			ULCK_BUF(fs->s_block_bitmap[i])
797
798	ump->um_devvp->v_rdev->si_mountpoint = NULL;
799	error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE,
800		NOCRED, td);
801	vrele(ump->um_devvp);
802	bsd_free(fs->s_es, M_UFSMNT);
803	bsd_free(fs, M_UFSMNT);
804	bsd_free(ump, M_UFSMNT);
805	mp->mnt_data = (qaddr_t)0;
806	mp->mnt_flag &= ~MNT_LOCAL;
807	return (error);
808}
809
810/*
811 * Flush out all the files in a filesystem.
812 */
813static int
814ext2_flushfiles(mp, flags, td)
815	register struct mount *mp;
816	int flags;
817	struct thread *td;
818{
819	register struct ufsmount *ump;
820	int error;
821#if QUOTA
822	int i;
823#endif
824
825	ump = VFSTOUFS(mp);
826#if QUOTA
827	if (mp->mnt_flag & MNT_QUOTA) {
828		if ((error = vflush(mp, 0, SKIPSYSTEM|flags)) != 0)
829			return (error);
830		for (i = 0; i < MAXQUOTAS; i++) {
831			if (ump->um_quotas[i] == NULLVP)
832				continue;
833			quotaoff(td, mp, i);
834		}
835		/*
836		 * Here we fall through to vflush again to ensure
837		 * that we have gotten rid of all the system vnodes.
838		 */
839	}
840#endif
841	error = vflush(mp, 0, flags);
842	return (error);
843}
844
845/*
846 * Get file system statistics.
847 * taken from ext2/super.c ext2_statfs
848 */
849static int
850ext2_statfs(mp, sbp, td)
851	struct mount *mp;
852	register struct statfs *sbp;
853	struct thread *td;
854{
855        unsigned long overhead;
856	register struct ufsmount *ump;
857	register struct ext2_sb_info *fs;
858	register struct ext2_super_block *es;
859	int i, nsb;
860
861	ump = VFSTOUFS(mp);
862	fs = ump->um_e2fs;
863	es = fs->s_es;
864
865	if (es->s_magic != EXT2_SUPER_MAGIC)
866		panic("ext2_statfs - magic number spoiled");
867
868	/*
869	 * Compute the overhead (FS structures)
870	 */
871	if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) {
872		nsb = 0;
873		for (i = 0 ; i < fs->s_groups_count; i++)
874			if (ext2_group_sparse(i))
875				nsb++;
876	} else
877		nsb = fs->s_groups_count;
878	overhead = es->s_first_data_block +
879	    /* Superblocks and block group descriptors: */
880	    nsb * (1 + fs->s_db_per_group) +
881	    /* Inode bitmap, block bitmap, and inode table: */
882	    fs->s_groups_count * (1 + 1 + fs->s_itb_per_group);
883
884	sbp->f_bsize = EXT2_FRAG_SIZE(fs);
885	sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
886	sbp->f_blocks = es->s_blocks_count - overhead;
887	sbp->f_bfree = es->s_free_blocks_count;
888	sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count;
889	sbp->f_files = es->s_inodes_count;
890	sbp->f_ffree = es->s_free_inodes_count;
891	if (sbp != &mp->mnt_stat) {
892		sbp->f_type = mp->mnt_vfc->vfc_typenum;
893		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
894			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
895		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
896			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
897	}
898	return (0);
899}
900
901/*
902 * Go through the disk queues to initiate sandbagged IO;
903 * go through the inodes to write those that have been modified;
904 * initiate the writing of the super block if it has been modified.
905 *
906 * Note: we are always called with the filesystem marked `MPBUSY'.
907 */
908static int
909ext2_sync(mp, waitfor, cred, td)
910	struct mount *mp;
911	int waitfor;
912	struct ucred *cred;
913	struct thread *td;
914{
915	struct vnode *nvp, *vp;
916	struct inode *ip;
917	struct ufsmount *ump = VFSTOUFS(mp);
918	struct ext2_sb_info *fs;
919	int error, allerror = 0;
920
921	fs = ump->um_e2fs;
922	if (fs->s_dirt != 0 && fs->s_rd_only != 0) {		/* XXX */
923		printf("fs = %s\n", fs->fs_fsmnt);
924		panic("ext2_sync: rofs mod");
925	}
926	/*
927	 * Write back each (modified) inode.
928	 */
929	mtx_lock(&mntvnode_mtx);
930loop:
931	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
932		/*
933		 * If the vnode that we are about to sync is no longer
934		 * associated with this mount point, start over.
935		 */
936		if (vp->v_mount != mp)
937			goto loop;
938		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
939		mtx_unlock(&mntvnode_mtx);
940		mtx_lock(&vp->v_interlock);
941		ip = VTOI(vp);
942		if (vp->v_type == VNON ||
943		    ((ip->i_flag &
944		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
945		    (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) {
946			mtx_unlock(&vp->v_interlock);
947			mtx_lock(&mntvnode_mtx);
948			continue;
949		}
950		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td);
951		if (error) {
952			mtx_lock(&mntvnode_mtx);
953			if (error == ENOENT)
954				goto loop;
955			continue;
956		}
957		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
958			allerror = error;
959		VOP_UNLOCK(vp, 0, td);
960		vrele(vp);
961		mtx_lock(&mntvnode_mtx);
962	}
963	mtx_unlock(&mntvnode_mtx);
964	/*
965	 * Force stale file system control information to be flushed.
966	 */
967	if (waitfor != MNT_LAZY) {
968		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
969		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, td)) != 0)
970			allerror = error;
971		VOP_UNLOCK(ump->um_devvp, 0, td);
972	}
973#if QUOTA
974	qsync(mp);
975#endif
976	/*
977	 * Write back modified superblock.
978	 */
979	if (fs->s_dirt != 0) {
980		fs->s_dirt = 0;
981		fs->s_es->s_wtime = time_second;
982		if ((error = ext2_sbupdate(ump, waitfor)) != 0)
983			allerror = error;
984	}
985	return (allerror);
986}
987
988/*
989 * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it
990 * in from disk.  If it is in core, wait for the lock bit to clear, then
991 * return the inode locked.  Detection and handling of mount points must be
992 * done by the calling routine.
993 */
994static int
995ext2_vget(mp, ino, vpp)
996	struct mount *mp;
997	ino_t ino;
998	struct vnode **vpp;
999{
1000	register struct ext2_sb_info *fs;
1001	register struct inode *ip;
1002	struct ufsmount *ump;
1003	struct buf *bp;
1004	struct vnode *vp;
1005	dev_t dev;
1006	int i, error;
1007	int used_blocks;
1008
1009	ump = VFSTOUFS(mp);
1010	dev = ump->um_dev;
1011restart:
1012	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
1013		return (0);
1014
1015	/*
1016	 * Lock out the creation of new entries in the FFS hash table in
1017	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
1018	 * may occur!
1019	 */
1020	if (ext2fs_inode_hash_lock) {
1021		while (ext2fs_inode_hash_lock) {
1022			ext2fs_inode_hash_lock = -1;
1023			tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0);
1024		}
1025		goto restart;
1026	}
1027	ext2fs_inode_hash_lock = 1;
1028
1029	/*
1030	 * If this MALLOC() is performed after the getnewvnode()
1031	 * it might block, leaving a vnode with a NULL v_data to be
1032	 * found by ext2_sync() if a sync happens to fire right then,
1033	 * which will cause a panic because ext2_sync() blindly
1034	 * dereferences vp->v_data (as well it should).
1035	 */
1036	MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK);
1037
1038	/* Allocate a new vnode/inode. */
1039	if ((error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) != 0) {
1040		if (ext2fs_inode_hash_lock < 0)
1041			wakeup(&ext2fs_inode_hash_lock);
1042		ext2fs_inode_hash_lock = 0;
1043		*vpp = NULL;
1044		FREE(ip, M_EXT2NODE);
1045		return (error);
1046	}
1047	bzero((caddr_t)ip, sizeof(struct inode));
1048	lockinit(&vp->v_lock, PINOD, "ext2in", 0, 0);
1049	vp->v_data = ip;
1050	ip->i_vnode = vp;
1051	ip->i_e2fs = fs = ump->um_e2fs;
1052	ip->i_dev = dev;
1053	ip->i_number = ino;
1054#if QUOTA
1055	for (i = 0; i < MAXQUOTAS; i++)
1056		ip->i_dquot[i] = NODQUOT;
1057#endif
1058	/*
1059	 * Put it onto its hash chain and lock it so that other requests for
1060	 * this inode will block if they arrive while we are sleeping waiting
1061	 * for old data structures to be purged or for the contents of the
1062	 * disk portion of this inode to be read.
1063	 */
1064	ufs_ihashins(ip);
1065
1066	if (ext2fs_inode_hash_lock < 0)
1067		wakeup(&ext2fs_inode_hash_lock);
1068	ext2fs_inode_hash_lock = 0;
1069
1070	/* Read in the disk contents for the inode, copy into the inode. */
1071#if 0
1072printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino)));
1073#endif
1074	if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1075	    (int)fs->s_blocksize, NOCRED, &bp)) != 0) {
1076		/*
1077		 * The inode does not contain anything useful, so it would
1078		 * be misleading to leave it on its hash chain. With mode
1079		 * still zero, it will be unlinked and returned to the free
1080		 * list by vput().
1081		 */
1082		vput(vp);
1083		brelse(bp);
1084		*vpp = NULL;
1085		return (error);
1086	}
1087	/* convert ext2 inode to dinode */
1088	ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE *
1089			ino_to_fsbo(fs, ino)), &ip->i_din);
1090	ip->i_block_group = ino_to_cg(fs, ino);
1091	ip->i_next_alloc_block = 0;
1092	ip->i_next_alloc_goal = 0;
1093	ip->i_prealloc_count = 0;
1094	ip->i_prealloc_block = 0;
1095        /* now we want to make sure that block pointers for unused
1096           blocks are zeroed out - ext2_balloc depends on this
1097	   although for regular files and directories only
1098	*/
1099	if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) {
1100		used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize;
1101		for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
1102			ip->i_db[i] = 0;
1103	}
1104/*
1105	ext2_print_inode(ip);
1106*/
1107	brelse(bp);
1108
1109	/*
1110	 * Initialize the vnode from the inode, check for aliases.
1111	 * Note that the underlying vnode may have changed.
1112	 */
1113	if ((error = ufs_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) {
1114		vput(vp);
1115		*vpp = NULL;
1116		return (error);
1117	}
1118	/*
1119	 * Finish inode initialization now that aliasing has been resolved.
1120	 */
1121	ip->i_devvp = ump->um_devvp;
1122	VREF(ip->i_devvp);
1123	/*
1124	 * Set up a generation number for this inode if it does not
1125	 * already have one. This should only happen on old filesystems.
1126	 */
1127	if (ip->i_gen == 0) {
1128		ip->i_gen = random() / 2 + 1;
1129		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1130			ip->i_flag |= IN_MODIFIED;
1131	}
1132	*vpp = vp;
1133	return (0);
1134}
1135
1136/*
1137 * File handle to vnode
1138 *
1139 * Have to be really careful about stale file handles:
1140 * - check that the inode number is valid
1141 * - call ext2_vget() to get the locked inode
1142 * - check for an unallocated inode (i_mode == 0)
1143 * - check that the given client host has export rights and return
1144 *   those rights via. exflagsp and credanonp
1145 */
1146static int
1147ext2_fhtovp(mp, fhp, vpp)
1148	register struct mount *mp;
1149	struct fid *fhp;
1150	struct vnode **vpp;
1151{
1152	register struct ufid *ufhp;
1153	struct ext2_sb_info *fs;
1154
1155	ufhp = (struct ufid *)fhp;
1156	fs = VFSTOUFS(mp)->um_e2fs;
1157	if (ufhp->ufid_ino < ROOTINO ||
1158	    ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group)
1159		return (ESTALE);
1160	return (ufs_fhtovp(mp, ufhp, vpp));
1161}
1162
1163/*
1164 * Vnode pointer to File handle
1165 */
1166/* ARGSUSED */
1167static int
1168ext2_vptofh(vp, fhp)
1169	struct vnode *vp;
1170	struct fid *fhp;
1171{
1172	register struct inode *ip;
1173	register struct ufid *ufhp;
1174
1175	ip = VTOI(vp);
1176	ufhp = (struct ufid *)fhp;
1177	ufhp->ufid_len = sizeof(struct ufid);
1178	ufhp->ufid_ino = ip->i_number;
1179	ufhp->ufid_gen = ip->i_gen;
1180	return (0);
1181}
1182
1183/*
1184 * Write a superblock and associated information back to disk.
1185 */
1186static int
1187ext2_sbupdate(mp, waitfor)
1188	struct ufsmount *mp;
1189	int waitfor;
1190{
1191	register struct ext2_sb_info *fs = mp->um_e2fs;
1192	register struct ext2_super_block *es = fs->s_es;
1193	register struct buf *bp;
1194	int error = 0;
1195/*
1196printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no");
1197*/
1198	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0);
1199	bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block));
1200	if (waitfor == MNT_WAIT)
1201		error = bwrite(bp);
1202	else
1203		bawrite(bp);
1204
1205	/*
1206	 * The buffers for group descriptors, inode bitmaps and block bitmaps
1207	 * are not busy at this point and are (hopefully) written by the
1208	 * usual sync mechanism. No need to write them here
1209		 */
1210
1211	return (error);
1212}
1213