ext2_vfsops.c revision 111856
1/* 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/* 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 40 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 111856 2003-03-04 00:04:44Z jeff $ 41 */ 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/namei.h> 46#include <sys/proc.h> 47#include <sys/kernel.h> 48#include <sys/vnode.h> 49#include <sys/mount.h> 50#include <sys/bio.h> 51#include <sys/buf.h> 52#include <sys/conf.h> 53#include <sys/fcntl.h> 54#include <sys/malloc.h> 55#include <sys/stat.h> 56#include <sys/mutex.h> 57 58#include <gnu/ext2fs/ext2_mount.h> 59#include <gnu/ext2fs/inode.h> 60 61#include <gnu/ext2fs/fs.h> 62#include <gnu/ext2fs/ext2_extern.h> 63#include <gnu/ext2fs/ext2_fs.h> 64#include <gnu/ext2fs/ext2_fs_sb.h> 65 66static int ext2_fhtovp(struct mount *, struct fid *, struct vnode **); 67static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 68static int ext2_init(struct vfsconf *); 69static int ext2_mount(struct mount *, struct nameidata *, struct thread *); 70static int ext2_mountfs(struct vnode *, struct mount *, struct thread *); 71static int ext2_reload(struct mount *mountp, struct ucred *cred, 72 struct thread *td); 73static int ext2_root(struct mount *, struct vnode **vpp); 74static int ext2_sbupdate(struct ext2mount *, int); 75static int ext2_statfs(struct mount *, struct statfs *, struct thread *); 76static int ext2_sync(struct mount *, int, struct ucred *, struct thread *); 77static int ext2_uninit(struct vfsconf *); 78static int ext2_unmount(struct mount *, int, struct thread *); 79static int ext2_vget(struct mount *, ino_t, int, struct vnode **); 80static int ext2_vptofh(struct vnode *, struct fid *); 81 82MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part"); 83static MALLOC_DEFINE(M_EXT2MNT, "EXT2 mount", "EXT2 mount structure"); 84 85static struct vfsops ext2fs_vfsops = { 86 NULL, 87 vfs_stdstart, 88 ext2_unmount, 89 ext2_root, /* root inode via vget */ 90 vfs_stdquotactl, 91 ext2_statfs, 92 ext2_sync, 93 ext2_vget, 94 ext2_fhtovp, 95 vfs_stdcheckexp, 96 ext2_vptofh, 97 ext2_init, 98 ext2_uninit, 99 vfs_stdextattrctl, 100 ext2_mount, 101}; 102 103VFS_SET(ext2fs_vfsops, ext2fs, 0); 104#define bsd_malloc malloc 105#define bsd_free free 106 107static int ext2fs_inode_hash_lock; 108 109static int ext2_check_sb_compat(struct ext2_super_block *es, dev_t dev, 110 int ronly); 111static int compute_sb_data(struct vnode * devvp, 112 struct ext2_super_block * es, struct ext2_sb_info * fs); 113 114#ifdef notyet 115static int ext2_mountroot(void); 116 117/* 118 * Called by main() when ext2fs is going to be mounted as root. 119 * 120 * Name is updated by mount(8) after booting. 121 */ 122#define ROOTNAME "root_device" 123 124static int 125ext2_mountroot() 126{ 127 struct ext2_sb_info *fs; 128 struct mount *mp; 129 struct thread *td = curthread; 130 struct ext2mount *ump; 131 u_int size; 132 int error; 133 134 if ((error = bdevvp(rootdev, &rootvp))) { 135 printf("ext2_mountroot: can't find rootvp\n"); 136 return (error); 137 } 138 mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 139 bzero((char *)mp, (u_long)sizeof(struct mount)); 140 TAILQ_INIT(&mp->mnt_nvnodelist); 141 TAILQ_INIT(&mp->mnt_reservedvnlist); 142 mp->mnt_op = &ext2fs_vfsops; 143 mp->mnt_flag = MNT_RDONLY; 144 if (error = ext2_mountfs(rootvp, mp, td)) { 145 bsd_free(mp, M_MOUNT); 146 return (error); 147 } 148 if (error = vfs_lock(mp)) { 149 (void)ext2_unmount(mp, 0, td); 150 bsd_free(mp, M_MOUNT); 151 return (error); 152 } 153 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); 154 mp->mnt_flag |= MNT_ROOTFS; 155 mp->mnt_vnodecovered = NULLVP; 156 ump = VFSTOEXT2(mp); 157 fs = ump->um_e2fs; 158 bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); 159 fs->fs_fsmnt[0] = '/'; 160 bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 161 MNAMELEN); 162 (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 163 &size); 164 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 165 (void)ext2_statfs(mp, &mp->mnt_stat, td); 166 vfs_unlock(mp); 167 inittodr(fs->s_es->s_wtime); /* this helps to set the time */ 168 return (0); 169} 170#endif 171 172/* 173 * VFS Operations. 174 * 175 * mount system call 176 */ 177static int 178ext2_mount(mp, ndp, td) 179 struct mount *mp; 180 struct nameidata *ndp; 181 struct thread *td; 182{ 183 struct export_args *export; 184 struct vfsoptlist *opts; 185 struct vnode *devvp; 186 struct ext2mount *ump = 0; 187 struct ext2_sb_info *fs; 188 char *path, *fspec; 189 size_t size; 190 int error, flags, len; 191 mode_t accessmode; 192 193 opts = mp->mnt_optnew; 194 195 vfs_getopt(opts, "fspath", (void **)&path, NULL); 196 /* Double-check the length of path.. */ 197 if (strlen(path) >= MAXMNTLEN - 1) 198 return (ENAMETOOLONG); 199 200 fspec = NULL; 201 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 202 if (!error && fspec[len - 1] != '\0') 203 return (EINVAL); 204 205 /* 206 * If updating, check whether changing from read-only to 207 * read/write; if there is no device name, that's all we do. 208 */ 209 if (mp->mnt_flag & MNT_UPDATE) { 210 ump = VFSTOEXT2(mp); 211 fs = ump->um_e2fs; 212 error = 0; 213 if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { 214 flags = WRITECLOSE; 215 if (mp->mnt_flag & MNT_FORCE) 216 flags |= FORCECLOSE; 217 if (vfs_busy(mp, LK_NOWAIT, 0, td)) 218 return (EBUSY); 219 error = ext2_flushfiles(mp, flags, td); 220 vfs_unbusy(mp, td); 221 if (!error && fs->s_wasvalid) { 222 fs->s_es->s_state |= EXT2_VALID_FS; 223 ext2_sbupdate(ump, MNT_WAIT); 224 } 225 fs->s_rd_only = 1; 226 } 227 if (!error && (mp->mnt_flag & MNT_RELOAD)) 228 error = ext2_reload(mp, ndp->ni_cnd.cn_cred, td); 229 if (error) 230 return (error); 231 devvp = ump->um_devvp; 232 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 233 (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0) 234 return (EPERM); 235 if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 236 /* 237 * If upgrade to read-write by non-root, then verify 238 * that user has necessary permissions on the device. 239 */ 240 if (suser(td)) { 241 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 242 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 243 td->td_ucred, td)) != 0) { 244 VOP_UNLOCK(devvp, 0, td); 245 return (error); 246 } 247 VOP_UNLOCK(devvp, 0, td); 248 } 249 250 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 251 (fs->s_es->s_state & EXT2_ERROR_FS)) { 252 if (mp->mnt_flag & MNT_FORCE) { 253 printf( 254"WARNING: %s was not properly dismounted\n", 255 fs->fs_fsmnt); 256 } else { 257 printf( 258"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 259 fs->fs_fsmnt); 260 return (EPERM); 261 } 262 } 263 fs->s_es->s_state &= ~EXT2_VALID_FS; 264 ext2_sbupdate(ump, MNT_WAIT); 265 fs->s_rd_only = 0; 266 } 267 if (fspec == NULL) { 268 error = vfs_getopt(opts, "export", (void **)&export, 269 &len); 270 if (error || len != sizeof(struct export_args)) 271 return (EINVAL); 272 /* Process export requests. */ 273 return (vfs_export(mp, export)); 274 } 275 } 276 /* 277 * Not an update, or updating the name: look up the name 278 * and verify that it refers to a sensible block device. 279 */ 280 if (fspec == NULL) 281 return (EINVAL); 282 NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td); 283 if ((error = namei(ndp)) != 0) 284 return (error); 285 NDFREE(ndp, NDF_ONLY_PNBUF); 286 devvp = ndp->ni_vp; 287 288 if (!vn_isdisk(devvp, &error)) { 289 vrele(devvp); 290 return (error); 291 } 292 293 /* 294 * If mount by non-root, then verify that user has necessary 295 * permissions on the device. 296 */ 297 if (suser(td)) { 298 accessmode = VREAD; 299 if ((mp->mnt_flag & MNT_RDONLY) == 0) 300 accessmode |= VWRITE; 301 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 302 if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td)) != 0) { 303 vput(devvp); 304 return (error); 305 } 306 VOP_UNLOCK(devvp, 0, td); 307 } 308 309 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 310 error = ext2_mountfs(devvp, mp, td); 311 } else { 312 if (devvp != ump->um_devvp) 313 error = EINVAL; /* needs translation */ 314 else 315 vrele(devvp); 316 } 317 if (error) { 318 vrele(devvp); 319 return (error); 320 } 321 ump = VFSTOEXT2(mp); 322 fs = ump->um_e2fs; 323 /* 324 * Note that this strncpy() is ok because of a check at the start 325 * of ext2_mount(). 326 */ 327 strncpy(fs->fs_fsmnt, path, MAXMNTLEN); 328 fs->fs_fsmnt[MAXMNTLEN - 1] = '\0'; 329 (void)copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 330 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 331 (void)ext2_statfs(mp, &mp->mnt_stat, td); 332 return (0); 333} 334 335/* 336 * checks that the data in the descriptor blocks make sense 337 * this is taken from ext2/super.c 338 */ 339static int ext2_check_descriptors (struct ext2_sb_info * sb) 340{ 341 int i; 342 int desc_block = 0; 343 unsigned long block = sb->s_es->s_first_data_block; 344 struct ext2_group_desc * gdp = NULL; 345 346 /* ext2_debug ("Checking group descriptors"); */ 347 348 for (i = 0; i < sb->s_groups_count; i++) 349 { 350 /* examine next descriptor block */ 351 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 352 gdp = (struct ext2_group_desc *) 353 sb->s_group_desc[desc_block++]->b_data; 354 if (gdp->bg_block_bitmap < block || 355 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 356 { 357 printf ("ext2_check_descriptors: " 358 "Block bitmap for group %d" 359 " not in group (block %lu)!\n", 360 i, (unsigned long) gdp->bg_block_bitmap); 361 return 0; 362 } 363 if (gdp->bg_inode_bitmap < block || 364 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 365 { 366 printf ("ext2_check_descriptors: " 367 "Inode bitmap for group %d" 368 " not in group (block %lu)!\n", 369 i, (unsigned long) gdp->bg_inode_bitmap); 370 return 0; 371 } 372 if (gdp->bg_inode_table < block || 373 gdp->bg_inode_table + sb->s_itb_per_group >= 374 block + EXT2_BLOCKS_PER_GROUP(sb)) 375 { 376 printf ("ext2_check_descriptors: " 377 "Inode table for group %d" 378 " not in group (block %lu)!\n", 379 i, (unsigned long) gdp->bg_inode_table); 380 return 0; 381 } 382 block += EXT2_BLOCKS_PER_GROUP(sb); 383 gdp++; 384 } 385 return 1; 386} 387 388static int 389ext2_check_sb_compat(es, dev, ronly) 390 struct ext2_super_block *es; 391 dev_t dev; 392 int ronly; 393{ 394 395 if (es->s_magic != EXT2_SUPER_MAGIC) { 396 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 397 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 398 return (1); 399 } 400 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 401 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 402 printf( 403"WARNING: mount of %s denied due to unsupported optional features\n", 404 devtoname(dev)); 405 return (1); 406 } 407 if (!ronly && 408 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 409 printf( 410"WARNING: R/W mount of %s denied due to unsupported optional features\n", 411 devtoname(dev)); 412 return (1); 413 } 414 } 415 return (0); 416} 417 418/* 419 * this computes the fields of the ext2_sb_info structure from the 420 * data in the ext2_super_block structure read in 421 */ 422static int compute_sb_data(devvp, es, fs) 423 struct vnode * devvp; 424 struct ext2_super_block * es; 425 struct ext2_sb_info * fs; 426{ 427 int db_count, error; 428 int i, j; 429 int logic_sb_block = 1; /* XXX for now */ 430 431#if 1 432#define V(v) 433#else 434#define V(v) printf(#v"= %d\n", fs->v); 435#endif 436 437 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 438 V(s_blocksize) 439 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 440 V(s_bshift) 441 fs->s_fsbtodb = es->s_log_block_size + 1; 442 V(s_fsbtodb) 443 fs->s_qbmask = fs->s_blocksize - 1; 444 V(s_bmask) 445 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 446 V(s_blocksize_bits) 447 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 448 V(s_frag_size) 449 if (fs->s_frag_size) 450 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 451 V(s_frags_per_block) 452 fs->s_blocks_per_group = es->s_blocks_per_group; 453 V(s_blocks_per_group) 454 fs->s_frags_per_group = es->s_frags_per_group; 455 V(s_frags_per_group) 456 fs->s_inodes_per_group = es->s_inodes_per_group; 457 V(s_inodes_per_group) 458 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 459 V(s_inodes_per_block) 460 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 461 V(s_itb_per_group) 462 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 463 V(s_desc_per_block) 464 /* s_resuid / s_resgid ? */ 465 fs->s_groups_count = (es->s_blocks_count - 466 es->s_first_data_block + 467 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 468 EXT2_BLOCKS_PER_GROUP(fs); 469 V(s_groups_count) 470 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 471 EXT2_DESC_PER_BLOCK(fs); 472 fs->s_db_per_group = db_count; 473 V(s_db_per_group) 474 475 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 476 M_EXT2MNT, M_WAITOK); 477 478 /* adjust logic_sb_block */ 479 if(fs->s_blocksize > SBSIZE) 480 /* Godmar thinks: if the blocksize is greater than 1024, then 481 the superblock is logically part of block zero. 482 */ 483 logic_sb_block = 0; 484 485 for (i = 0; i < db_count; i++) { 486 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 487 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 488 if(error) { 489 for (j = 0; j < i; j++) 490 brelse(fs->s_group_desc[j]); 491 bsd_free(fs->s_group_desc, M_EXT2MNT); 492 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 493 return EIO; 494 } 495 /* Set the B_LOCKED flag on the buffer, then brelse() it */ 496 LCK_BUF(fs->s_group_desc[i]) 497 } 498 if(!ext2_check_descriptors(fs)) { 499 for (j = 0; j < db_count; j++) 500 ULCK_BUF(fs->s_group_desc[j]) 501 bsd_free(fs->s_group_desc, M_EXT2MNT); 502 printf("EXT2-fs: (ext2_check_descriptors failure) " 503 "unable to read group descriptors\n"); 504 return EIO; 505 } 506 507 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 508 fs->s_inode_bitmap_number[i] = 0; 509 fs->s_inode_bitmap[i] = NULL; 510 fs->s_block_bitmap_number[i] = 0; 511 fs->s_block_bitmap[i] = NULL; 512 } 513 fs->s_loaded_inode_bitmaps = 0; 514 fs->s_loaded_block_bitmaps = 0; 515 return 0; 516} 517 518/* 519 * Reload all incore data for a filesystem (used after running fsck on 520 * the root filesystem and finding things to fix). The filesystem must 521 * be mounted read-only. 522 * 523 * Things to do to update the mount: 524 * 1) invalidate all cached meta-data. 525 * 2) re-read superblock from disk. 526 * 3) re-read summary information from disk. 527 * 4) invalidate all inactive vnodes. 528 * 5) invalidate all cached file data. 529 * 6) re-read inode data for all active vnodes. 530 */ 531static int 532ext2_reload(mountp, cred, td) 533 struct mount *mountp; 534 struct ucred *cred; 535 struct thread *td; 536{ 537 struct vnode *vp, *nvp, *devvp; 538 struct inode *ip; 539 struct buf *bp; 540 struct ext2_super_block * es; 541 struct ext2_sb_info *fs; 542 int error; 543 544 if ((mountp->mnt_flag & MNT_RDONLY) == 0) 545 return (EINVAL); 546 /* 547 * Step 1: invalidate all cached meta-data. 548 */ 549 devvp = VFSTOEXT2(mountp)->um_devvp; 550 if (vinvalbuf(devvp, 0, cred, td, 0, 0)) 551 panic("ext2_reload: dirty1"); 552 /* 553 * Step 2: re-read superblock from disk. 554 * constants have been adjusted for ext2 555 */ 556 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 557 return (error); 558 es = (struct ext2_super_block *)bp->b_data; 559 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 560 brelse(bp); 561 return (EIO); /* XXX needs translation */ 562 } 563 fs = VFSTOEXT2(mountp)->um_e2fs; 564 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 565 566 if((error = compute_sb_data(devvp, es, fs)) != 0) { 567 brelse(bp); 568 return error; 569 } 570#ifdef UNKLAR 571 if (fs->fs_sbsize < SBSIZE) 572 bp->b_flags |= B_INVAL; 573#endif 574 brelse(bp); 575 576loop: 577 mtx_lock(&mntvnode_mtx); 578 for (vp = TAILQ_FIRST(&mountp->mnt_nvnodelist); vp != NULL; vp = nvp) { 579 if (vp->v_mount != mountp) { 580 mtx_unlock(&mntvnode_mtx); 581 goto loop; 582 } 583 nvp = TAILQ_NEXT(vp, v_nmntvnodes); 584 mtx_unlock(&mntvnode_mtx); 585 /* 586 * Step 4: invalidate all inactive vnodes. 587 */ 588 if (vrecycle(vp, NULL, td)) 589 goto loop; 590 /* 591 * Step 5: invalidate all cached file data. 592 */ 593 mtx_lock(&vp->v_interlock); 594 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 595 goto loop; 596 } 597 if (vinvalbuf(vp, 0, cred, td, 0, 0)) 598 panic("ext2_reload: dirty2"); 599 /* 600 * Step 6: re-read inode data for all active vnodes. 601 */ 602 ip = VTOI(vp); 603 error = 604 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 605 (int)fs->s_blocksize, NOCRED, &bp); 606 if (error) { 607 vput(vp); 608 return (error); 609 } 610 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + 611 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip); 612 brelse(bp); 613 vput(vp); 614 mtx_lock(&mntvnode_mtx); 615 } 616 mtx_unlock(&mntvnode_mtx); 617 return (0); 618} 619 620/* 621 * Common code for mount and mountroot 622 */ 623static int 624ext2_mountfs(devvp, mp, td) 625 struct vnode *devvp; 626 struct mount *mp; 627 struct thread *td; 628{ 629 struct ext2mount *ump; 630 struct buf *bp; 631 struct ext2_sb_info *fs; 632 struct ext2_super_block * es; 633 dev_t dev = devvp->v_rdev; 634 int error; 635 int ronly; 636 637 /* 638 * Disallow multiple mounts of the same device. 639 * Disallow mounting of a device that is currently in use 640 * (except for root, which might share swap device for miniroot). 641 * Flush out any old buffers remaining from a previous use. 642 */ 643 if ((error = vfs_mountedon(devvp)) != 0) 644 return (error); 645 if (vcount(devvp) > 1 && devvp != rootvp) 646 return (EBUSY); 647 if ((error = vinvalbuf(devvp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) 648 return (error); 649#ifdef READONLY 650/* turn on this to force it to be read-only */ 651 mp->mnt_flag |= MNT_RDONLY; 652#endif 653 654 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 655 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 656 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td); 657 VOP_UNLOCK(devvp, 0, td); 658 if (error) 659 return (error); 660 if (devvp->v_rdev->si_iosize_max != 0) 661 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 662 if (mp->mnt_iosize_max > MAXPHYS) 663 mp->mnt_iosize_max = MAXPHYS; 664 665 bp = NULL; 666 ump = NULL; 667 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 668 goto out; 669 es = (struct ext2_super_block *)bp->b_data; 670 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 671 error = EINVAL; /* XXX needs translation */ 672 goto out; 673 } 674 if ((es->s_state & EXT2_VALID_FS) == 0 || 675 (es->s_state & EXT2_ERROR_FS)) { 676 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 677 printf( 678"WARNING: Filesystem was not properly dismounted\n"); 679 } else { 680 printf( 681"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 682 error = EPERM; 683 goto out; 684 } 685 } 686 ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); 687 bzero((caddr_t)ump, sizeof *ump); 688 /* I don't know whether this is the right strategy. Note that 689 we dynamically allocate both an ext2_sb_info and an ext2_super_block 690 while Linux keeps the super block in a locked buffer 691 */ 692 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 693 M_EXT2MNT, M_WAITOK); 694 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 695 M_EXT2MNT, M_WAITOK); 696 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 697 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 698 goto out; 699 /* 700 * We don't free the group descriptors allocated by compute_sb_data() 701 * until ext2_unmount(). This is OK since the mount will succeed. 702 */ 703 brelse(bp); 704 bp = NULL; 705 fs = ump->um_e2fs; 706 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 707 /* if the fs is not mounted read-only, make sure the super block is 708 always written back on a sync() 709 */ 710 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 711 if (ronly == 0) { 712 fs->s_dirt = 1; /* mark it modified */ 713 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 714 } 715 mp->mnt_data = (qaddr_t)ump; 716 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 717 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 718 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 719 mp->mnt_flag |= MNT_LOCAL; 720 ump->um_mountp = mp; 721 ump->um_dev = dev; 722 ump->um_devvp = devvp; 723 /* setting those two parameters allowed us to use 724 ufs_bmap w/o changse ! 725 */ 726 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 727 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 728 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 729 devvp->v_rdev->si_mountpoint = mp; 730 if (ronly == 0) 731 ext2_sbupdate(ump, MNT_WAIT); 732 return (0); 733out: 734 if (bp) 735 brelse(bp); 736 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, td); 737 if (ump) { 738 bsd_free(ump->um_e2fs->s_es, M_EXT2MNT); 739 bsd_free(ump->um_e2fs, M_EXT2MNT); 740 bsd_free(ump, M_EXT2MNT); 741 mp->mnt_data = (qaddr_t)0; 742 } 743 return (error); 744} 745 746/* 747 * unmount system call 748 */ 749static int 750ext2_unmount(mp, mntflags, td) 751 struct mount *mp; 752 int mntflags; 753 struct thread *td; 754{ 755 struct ext2mount *ump; 756 struct ext2_sb_info *fs; 757 int error, flags, ronly, i; 758 759 flags = 0; 760 if (mntflags & MNT_FORCE) { 761 if (mp->mnt_flag & MNT_ROOTFS) 762 return (EINVAL); 763 flags |= FORCECLOSE; 764 } 765 if ((error = ext2_flushfiles(mp, flags, td)) != 0) 766 return (error); 767 ump = VFSTOEXT2(mp); 768 fs = ump->um_e2fs; 769 ronly = fs->s_rd_only; 770 if (ronly == 0) { 771 if (fs->s_wasvalid) 772 fs->s_es->s_state |= EXT2_VALID_FS; 773 ext2_sbupdate(ump, MNT_WAIT); 774 } 775 776 /* release buffers containing group descriptors */ 777 for(i = 0; i < fs->s_db_per_group; i++) 778 ULCK_BUF(fs->s_group_desc[i]) 779 bsd_free(fs->s_group_desc, M_EXT2MNT); 780 781 /* release cached inode/block bitmaps */ 782 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 783 if (fs->s_inode_bitmap[i]) 784 ULCK_BUF(fs->s_inode_bitmap[i]) 785 786 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 787 if (fs->s_block_bitmap[i]) 788 ULCK_BUF(fs->s_block_bitmap[i]) 789 790 ump->um_devvp->v_rdev->si_mountpoint = NULL; 791 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, 792 NOCRED, td); 793 vrele(ump->um_devvp); 794 bsd_free(fs->s_es, M_EXT2MNT); 795 bsd_free(fs, M_EXT2MNT); 796 bsd_free(ump, M_EXT2MNT); 797 mp->mnt_data = (qaddr_t)0; 798 mp->mnt_flag &= ~MNT_LOCAL; 799 return (error); 800} 801 802/* 803 * Flush out all the files in a filesystem. 804 */ 805static int 806ext2_flushfiles(mp, flags, td) 807 struct mount *mp; 808 int flags; 809 struct thread *td; 810{ 811 int error; 812 813 error = vflush(mp, 0, flags); 814 return (error); 815} 816 817/* 818 * Get file system statistics. 819 * taken from ext2/super.c ext2_statfs 820 */ 821static int 822ext2_statfs(mp, sbp, td) 823 struct mount *mp; 824 struct statfs *sbp; 825 struct thread *td; 826{ 827 unsigned long overhead; 828 struct ext2mount *ump; 829 struct ext2_sb_info *fs; 830 struct ext2_super_block *es; 831 int i, nsb; 832 833 ump = VFSTOEXT2(mp); 834 fs = ump->um_e2fs; 835 es = fs->s_es; 836 837 if (es->s_magic != EXT2_SUPER_MAGIC) 838 panic("ext2_statfs - magic number spoiled"); 839 840 /* 841 * Compute the overhead (FS structures) 842 */ 843 if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { 844 nsb = 0; 845 for (i = 0 ; i < fs->s_groups_count; i++) 846 if (ext2_group_sparse(i)) 847 nsb++; 848 } else 849 nsb = fs->s_groups_count; 850 overhead = es->s_first_data_block + 851 /* Superblocks and block group descriptors: */ 852 nsb * (1 + fs->s_db_per_group) + 853 /* Inode bitmap, block bitmap, and inode table: */ 854 fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); 855 856 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 857 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 858 sbp->f_blocks = es->s_blocks_count - overhead; 859 sbp->f_bfree = es->s_free_blocks_count; 860 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 861 sbp->f_files = es->s_inodes_count; 862 sbp->f_ffree = es->s_free_inodes_count; 863 if (sbp != &mp->mnt_stat) { 864 sbp->f_type = mp->mnt_vfc->vfc_typenum; 865 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 866 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 867 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 868 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 869 } 870 return (0); 871} 872 873/* 874 * Go through the disk queues to initiate sandbagged IO; 875 * go through the inodes to write those that have been modified; 876 * initiate the writing of the super block if it has been modified. 877 * 878 * Note: we are always called with the filesystem marked `MPBUSY'. 879 */ 880static int 881ext2_sync(mp, waitfor, cred, td) 882 struct mount *mp; 883 int waitfor; 884 struct ucred *cred; 885 struct thread *td; 886{ 887 struct vnode *nvp, *vp; 888 struct inode *ip; 889 struct ext2mount *ump = VFSTOEXT2(mp); 890 struct ext2_sb_info *fs; 891 int error, allerror = 0; 892 893 fs = ump->um_e2fs; 894 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 895 printf("fs = %s\n", fs->fs_fsmnt); 896 panic("ext2_sync: rofs mod"); 897 } 898 /* 899 * Write back each (modified) inode. 900 */ 901 mtx_lock(&mntvnode_mtx); 902loop: 903 for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { 904 /* 905 * If the vnode that we are about to sync is no longer 906 * associated with this mount point, start over. 907 */ 908 if (vp->v_mount != mp) 909 goto loop; 910 nvp = TAILQ_NEXT(vp, v_nmntvnodes); 911 mtx_unlock(&mntvnode_mtx); 912 VI_LOCK(vp); 913 ip = VTOI(vp); 914 if (vp->v_type == VNON || 915 ((ip->i_flag & 916 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 917 (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) { 918 VI_UNLOCK(vp); 919 mtx_lock(&mntvnode_mtx); 920 continue; 921 } 922 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 923 if (error) { 924 mtx_lock(&mntvnode_mtx); 925 if (error == ENOENT) 926 goto loop; 927 continue; 928 } 929 if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0) 930 allerror = error; 931 VOP_UNLOCK(vp, 0, td); 932 vrele(vp); 933 mtx_lock(&mntvnode_mtx); 934 } 935 mtx_unlock(&mntvnode_mtx); 936 /* 937 * Force stale file system control information to be flushed. 938 */ 939 if (waitfor != MNT_LAZY) { 940 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td); 941 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, td)) != 0) 942 allerror = error; 943 VOP_UNLOCK(ump->um_devvp, 0, td); 944 } 945 /* 946 * Write back modified superblock. 947 */ 948 if (fs->s_dirt != 0) { 949 fs->s_dirt = 0; 950 fs->s_es->s_wtime = time_second; 951 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 952 allerror = error; 953 } 954 return (allerror); 955} 956 957/* 958 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 959 * in from disk. If it is in core, wait for the lock bit to clear, then 960 * return the inode locked. Detection and handling of mount points must be 961 * done by the calling routine. 962 */ 963static int 964ext2_vget(mp, ino, flags, vpp) 965 struct mount *mp; 966 ino_t ino; 967 int flags; 968 struct vnode **vpp; 969{ 970 struct ext2_sb_info *fs; 971 struct inode *ip; 972 struct ext2mount *ump; 973 struct buf *bp; 974 struct vnode *vp; 975 dev_t dev; 976 int i, error; 977 int used_blocks; 978 979 ump = VFSTOEXT2(mp); 980 dev = ump->um_dev; 981restart: 982 if ((error = ext2_ihashget(dev, ino, flags, vpp)) != 0) 983 return (error); 984 if (*vpp != NULL) 985 return (0); 986 987 /* 988 * Lock out the creation of new entries in the FFS hash table in 989 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 990 * may occur! 991 */ 992 if (ext2fs_inode_hash_lock) { 993 while (ext2fs_inode_hash_lock) { 994 ext2fs_inode_hash_lock = -1; 995 tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0); 996 } 997 goto restart; 998 } 999 ext2fs_inode_hash_lock = 1; 1000 1001 /* 1002 * If this MALLOC() is performed after the getnewvnode() 1003 * it might block, leaving a vnode with a NULL v_data to be 1004 * found by ext2_sync() if a sync happens to fire right then, 1005 * which will cause a panic because ext2_sync() blindly 1006 * dereferences vp->v_data (as well it should). 1007 */ 1008 MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK); 1009 1010 /* Allocate a new vnode/inode. */ 1011 if ((error = getnewvnode("ext2fs", mp, ext2_vnodeop_p, &vp)) != 0) { 1012 if (ext2fs_inode_hash_lock < 0) 1013 wakeup(&ext2fs_inode_hash_lock); 1014 ext2fs_inode_hash_lock = 0; 1015 *vpp = NULL; 1016 FREE(ip, M_EXT2NODE); 1017 return (error); 1018 } 1019 bzero((caddr_t)ip, sizeof(struct inode)); 1020 vp->v_data = ip; 1021 ip->i_vnode = vp; 1022 ip->i_e2fs = fs = ump->um_e2fs; 1023 ip->i_dev = dev; 1024 ip->i_number = ino; 1025 /* 1026 * Put it onto its hash chain and lock it so that other requests for 1027 * this inode will block if they arrive while we are sleeping waiting 1028 * for old data structures to be purged or for the contents of the 1029 * disk portion of this inode to be read. 1030 */ 1031 ext2_ihashins(ip); 1032 1033 if (ext2fs_inode_hash_lock < 0) 1034 wakeup(&ext2fs_inode_hash_lock); 1035 ext2fs_inode_hash_lock = 0; 1036 1037 /* Read in the disk contents for the inode, copy into the inode. */ 1038#if 0 1039printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 1040#endif 1041 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1042 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 1043 /* 1044 * The inode does not contain anything useful, so it would 1045 * be misleading to leave it on its hash chain. With mode 1046 * still zero, it will be unlinked and returned to the free 1047 * list by vput(). 1048 */ 1049 vput(vp); 1050 brelse(bp); 1051 *vpp = NULL; 1052 return (error); 1053 } 1054 /* convert ext2 inode to dinode */ 1055 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1056 ino_to_fsbo(fs, ino)), ip); 1057 ip->i_block_group = ino_to_cg(fs, ino); 1058 ip->i_next_alloc_block = 0; 1059 ip->i_next_alloc_goal = 0; 1060 ip->i_prealloc_count = 0; 1061 ip->i_prealloc_block = 0; 1062 /* now we want to make sure that block pointers for unused 1063 blocks are zeroed out - ext2_balloc depends on this 1064 although for regular files and directories only 1065 */ 1066 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1067 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1068 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1069 ip->i_db[i] = 0; 1070 } 1071/* 1072 ext2_print_inode(ip); 1073*/ 1074 brelse(bp); 1075 1076 /* 1077 * Initialize the vnode from the inode, check for aliases. 1078 * Note that the underlying vnode may have changed. 1079 */ 1080 if ((error = ext2_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) { 1081 vput(vp); 1082 *vpp = NULL; 1083 return (error); 1084 } 1085 /* 1086 * Finish inode initialization now that aliasing has been resolved. 1087 */ 1088 ip->i_devvp = ump->um_devvp; 1089 VREF(ip->i_devvp); 1090 /* 1091 * Set up a generation number for this inode if it does not 1092 * already have one. This should only happen on old filesystems. 1093 */ 1094 if (ip->i_gen == 0) { 1095 ip->i_gen = random() / 2 + 1; 1096 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1097 ip->i_flag |= IN_MODIFIED; 1098 } 1099 *vpp = vp; 1100 return (0); 1101} 1102 1103/* 1104 * File handle to vnode 1105 * 1106 * Have to be really careful about stale file handles: 1107 * - check that the inode number is valid 1108 * - call ext2_vget() to get the locked inode 1109 * - check for an unallocated inode (i_mode == 0) 1110 * - check that the given client host has export rights and return 1111 * those rights via. exflagsp and credanonp 1112 */ 1113static int 1114ext2_fhtovp(mp, fhp, vpp) 1115 struct mount *mp; 1116 struct fid *fhp; 1117 struct vnode **vpp; 1118{ 1119 struct inode *ip; 1120 struct ufid *ufhp; 1121 struct vnode *nvp; 1122 struct ext2_sb_info *fs; 1123 int error; 1124 1125 ufhp = (struct ufid *)fhp; 1126 fs = VFSTOEXT2(mp)->um_e2fs; 1127 if (ufhp->ufid_ino < ROOTINO || 1128 ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group) 1129 return (ESTALE); 1130 1131 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1132 if (error) { 1133 *vpp = NULLVP; 1134 return (error); 1135 } 1136 ip = VTOI(nvp); 1137 if (ip->i_mode == 0 || 1138 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1139 vput(nvp); 1140 *vpp = NULLVP; 1141 return (ESTALE); 1142 } 1143 *vpp = nvp; 1144 return (0); 1145} 1146 1147/* 1148 * Vnode pointer to File handle 1149 */ 1150/* ARGSUSED */ 1151static int 1152ext2_vptofh(vp, fhp) 1153 struct vnode *vp; 1154 struct fid *fhp; 1155{ 1156 struct inode *ip; 1157 struct ufid *ufhp; 1158 1159 ip = VTOI(vp); 1160 ufhp = (struct ufid *)fhp; 1161 ufhp->ufid_len = sizeof(struct ufid); 1162 ufhp->ufid_ino = ip->i_number; 1163 ufhp->ufid_gen = ip->i_gen; 1164 return (0); 1165} 1166 1167/* 1168 * Write a superblock and associated information back to disk. 1169 */ 1170static int 1171ext2_sbupdate(mp, waitfor) 1172 struct ext2mount *mp; 1173 int waitfor; 1174{ 1175 struct ext2_sb_info *fs = mp->um_e2fs; 1176 struct ext2_super_block *es = fs->s_es; 1177 struct buf *bp; 1178 int error = 0; 1179/* 1180printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1181*/ 1182 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1183 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1184 if (waitfor == MNT_WAIT) 1185 error = bwrite(bp); 1186 else 1187 bawrite(bp); 1188 1189 /* 1190 * The buffers for group descriptors, inode bitmaps and block bitmaps 1191 * are not busy at this point and are (hopefully) written by the 1192 * usual sync mechanism. No need to write them here 1193 */ 1194 1195 return (error); 1196} 1197 1198/* 1199 * Return the root of a filesystem. 1200 */ 1201static int 1202ext2_root(mp, vpp) 1203 struct mount *mp; 1204 struct vnode **vpp; 1205{ 1206 struct vnode *nvp; 1207 int error; 1208 1209 error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); 1210 if (error) 1211 return (error); 1212 *vpp = nvp; 1213 return (0); 1214} 1215 1216static int 1217ext2_init(struct vfsconf *vfsp) 1218{ 1219 1220 ext2_ihashinit(); 1221 return (0); 1222} 1223 1224static int 1225ext2_uninit(struct vfsconf *vfsp) 1226{ 1227 1228 ext2_ihashuninit(); 1229 return (0); 1230} 1231