ext2_vfsops.c revision 135864
1/* 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/* 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 36 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 135864 2004-09-27 20:38:46Z phk $ 37 */ 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/namei.h> 42#include <sys/proc.h> 43#include <sys/kernel.h> 44#include <sys/vnode.h> 45#include <sys/mount.h> 46#include <sys/bio.h> 47#include <sys/buf.h> 48#include <sys/conf.h> 49#include <sys/fcntl.h> 50#include <sys/malloc.h> 51#include <sys/stat.h> 52#include <sys/mutex.h> 53 54#include <gnu/ext2fs/ext2_mount.h> 55#include <gnu/ext2fs/inode.h> 56 57#include <gnu/ext2fs/fs.h> 58#include <gnu/ext2fs/ext2_extern.h> 59#include <gnu/ext2fs/ext2_fs.h> 60#include <gnu/ext2fs/ext2_fs_sb.h> 61 62static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 63static int ext2_mountfs(struct vnode *, struct mount *, struct thread *); 64static int ext2_reload(struct mount *mp, struct ucred *cred, struct thread *td); 65static int ext2_sbupdate(struct ext2mount *, int); 66 67static vfs_unmount_t ext2_unmount; 68static vfs_root_t ext2_root; 69static vfs_statfs_t ext2_statfs; 70static vfs_sync_t ext2_sync; 71static vfs_vget_t ext2_vget; 72static vfs_fhtovp_t ext2_fhtovp; 73static vfs_vptofh_t ext2_vptofh; 74static vfs_init_t ext2_init; 75static vfs_uninit_t ext2_uninit; 76static vfs_mount_t ext2_mount; 77 78MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part"); 79static MALLOC_DEFINE(M_EXT2MNT, "EXT2 mount", "EXT2 mount structure"); 80 81static struct vfsops ext2fs_vfsops = { 82 .vfs_fhtovp = ext2_fhtovp, 83 .vfs_init = ext2_init, 84 .vfs_mount = ext2_mount, 85 .vfs_root = ext2_root, /* root inode via vget */ 86 .vfs_statfs = ext2_statfs, 87 .vfs_sync = ext2_sync, 88 .vfs_uninit = ext2_uninit, 89 .vfs_unmount = ext2_unmount, 90 .vfs_vget = ext2_vget, 91 .vfs_vptofh = ext2_vptofh, 92}; 93 94VFS_SET(ext2fs_vfsops, ext2fs, 0); 95#define bsd_malloc malloc 96#define bsd_free free 97 98static int ext2fs_inode_hash_lock; 99 100static int ext2_check_sb_compat(struct ext2_super_block *es, struct cdev *dev, 101 int ronly); 102static int compute_sb_data(struct vnode * devvp, 103 struct ext2_super_block * es, struct ext2_sb_info * fs); 104 105#ifdef notyet 106static int ext2_mountroot(void); 107 108/* 109 * Called by main() when ext2fs is going to be mounted as root. 110 * 111 * Name is updated by mount(8) after booting. 112 */ 113#define ROOTNAME "root_device" 114 115static int 116ext2_mountroot() 117{ 118 struct ext2_sb_info *fs; 119 struct mount *mp; 120 struct vnode *rootvp; 121 struct thread *td = curthread; 122 struct ext2mount *ump; 123 u_int size; 124 int error; 125 126 if ((error = bdevvp(rootdev, &rootvp))) { 127 printf("ext2_mountroot: can't find rootvp\n"); 128 return (error); 129 } 130 mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 131 bzero((char *)mp, (u_long)sizeof(struct mount)); 132 TAILQ_INIT(&mp->mnt_nvnodelist); 133 mp->mnt_op = &ext2fs_vfsops; 134 mp->mnt_flag = MNT_RDONLY; 135 if (error = ext2_mountfs(rootvp, mp, td)) { 136 bsd_free(mp, M_MOUNT); 137 return (error); 138 } 139 if (error = vfs_lock(mp)) { 140 (void)ext2_unmount(mp, 0, td); 141 bsd_free(mp, M_MOUNT); 142 return (error); 143 } 144 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); 145 mp->mnt_flag |= MNT_ROOTFS; 146 mp->mnt_vnodecovered = NULLVP; 147 ump = VFSTOEXT2(mp); 148 fs = ump->um_e2fs; 149 bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); 150 fs->fs_fsmnt[0] = '/'; 151 bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 152 MNAMELEN); 153 (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 154 &size); 155 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 156 (void)ext2_statfs(mp, &mp->mnt_stat, td); 157 vfs_unlock(mp); 158 inittodr(fs->s_es->s_wtime); /* this helps to set the time */ 159 return (0); 160} 161#endif 162 163/* 164 * VFS Operations. 165 * 166 * mount system call 167 */ 168static int 169ext2_mount(mp, td) 170 struct mount *mp; 171 struct thread *td; 172{ 173 struct export_args *export; 174 struct vfsoptlist *opts; 175 struct vnode *devvp; 176 struct ext2mount *ump = 0; 177 struct ext2_sb_info *fs; 178 char *path, *fspec; 179 size_t size; 180 int error, flags, len; 181 mode_t accessmode; 182 struct nameidata nd, *ndp = &nd; 183 184 opts = mp->mnt_optnew; 185 186 vfs_getopt(opts, "fspath", (void **)&path, NULL); 187 /* Double-check the length of path.. */ 188 if (strlen(path) >= MAXMNTLEN - 1) 189 return (ENAMETOOLONG); 190 191 fspec = NULL; 192 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 193 if (!error && fspec[len - 1] != '\0') 194 return (EINVAL); 195 196 /* 197 * If updating, check whether changing from read-only to 198 * read/write; if there is no device name, that's all we do. 199 */ 200 if (mp->mnt_flag & MNT_UPDATE) { 201 ump = VFSTOEXT2(mp); 202 fs = ump->um_e2fs; 203 error = 0; 204 if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { 205 flags = WRITECLOSE; 206 if (mp->mnt_flag & MNT_FORCE) 207 flags |= FORCECLOSE; 208 if (vfs_busy(mp, LK_NOWAIT, 0, td)) 209 return (EBUSY); 210 error = ext2_flushfiles(mp, flags, td); 211 vfs_unbusy(mp, td); 212 if (!error && fs->s_wasvalid) { 213 fs->s_es->s_state |= EXT2_VALID_FS; 214 ext2_sbupdate(ump, MNT_WAIT); 215 } 216 fs->s_rd_only = 1; 217 } 218 if (!error && (mp->mnt_flag & MNT_RELOAD)) 219 error = ext2_reload(mp, td->td_ucred, td); 220 if (error) 221 return (error); 222 devvp = ump->um_devvp; 223 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 224 (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0) 225 return (EPERM); 226 if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 227 /* 228 * If upgrade to read-write by non-root, then verify 229 * that user has necessary permissions on the device. 230 */ 231 if (suser(td)) { 232 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 233 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 234 td->td_ucred, td)) != 0) { 235 VOP_UNLOCK(devvp, 0, td); 236 return (error); 237 } 238 VOP_UNLOCK(devvp, 0, td); 239 } 240 241 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 242 (fs->s_es->s_state & EXT2_ERROR_FS)) { 243 if (mp->mnt_flag & MNT_FORCE) { 244 printf( 245"WARNING: %s was not properly dismounted\n", 246 fs->fs_fsmnt); 247 } else { 248 printf( 249"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 250 fs->fs_fsmnt); 251 return (EPERM); 252 } 253 } 254 fs->s_es->s_state &= ~EXT2_VALID_FS; 255 ext2_sbupdate(ump, MNT_WAIT); 256 fs->s_rd_only = 0; 257 } 258 if (fspec == NULL) { 259 error = vfs_getopt(opts, "export", (void **)&export, 260 &len); 261 if (error || len != sizeof(struct export_args)) 262 return (EINVAL); 263 /* Process export requests. */ 264 return (vfs_export(mp, export)); 265 } 266 } 267 /* 268 * Not an update, or updating the name: look up the name 269 * and verify that it refers to a sensible disk device. 270 */ 271 if (fspec == NULL) 272 return (EINVAL); 273 NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td); 274 if ((error = namei(ndp)) != 0) 275 return (error); 276 NDFREE(ndp, NDF_ONLY_PNBUF); 277 devvp = ndp->ni_vp; 278 279 if (!vn_isdisk(devvp, &error)) { 280 vrele(devvp); 281 return (error); 282 } 283 284 /* 285 * If mount by non-root, then verify that user has necessary 286 * permissions on the device. 287 */ 288 if (suser(td)) { 289 accessmode = VREAD; 290 if ((mp->mnt_flag & MNT_RDONLY) == 0) 291 accessmode |= VWRITE; 292 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 293 if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td)) != 0) { 294 vput(devvp); 295 return (error); 296 } 297 VOP_UNLOCK(devvp, 0, td); 298 } 299 300 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 301 error = ext2_mountfs(devvp, mp, td); 302 } else { 303 if (devvp != ump->um_devvp) 304 error = EINVAL; /* needs translation */ 305 else 306 vrele(devvp); 307 } 308 if (error) { 309 vrele(devvp); 310 return (error); 311 } 312 ump = VFSTOEXT2(mp); 313 fs = ump->um_e2fs; 314 /* 315 * Note that this strncpy() is ok because of a check at the start 316 * of ext2_mount(). 317 */ 318 strncpy(fs->fs_fsmnt, path, MAXMNTLEN); 319 fs->fs_fsmnt[MAXMNTLEN - 1] = '\0'; 320 (void)copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 321 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 322 (void)ext2_statfs(mp, &mp->mnt_stat, td); 323 return (0); 324} 325 326/* 327 * checks that the data in the descriptor blocks make sense 328 * this is taken from ext2/super.c 329 */ 330static int ext2_check_descriptors (struct ext2_sb_info * sb) 331{ 332 int i; 333 int desc_block = 0; 334 unsigned long block = sb->s_es->s_first_data_block; 335 struct ext2_group_desc * gdp = NULL; 336 337 /* ext2_debug ("Checking group descriptors"); */ 338 339 for (i = 0; i < sb->s_groups_count; i++) 340 { 341 /* examine next descriptor block */ 342 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 343 gdp = (struct ext2_group_desc *) 344 sb->s_group_desc[desc_block++]->b_data; 345 if (gdp->bg_block_bitmap < block || 346 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 347 { 348 printf ("ext2_check_descriptors: " 349 "Block bitmap for group %d" 350 " not in group (block %lu)!\n", 351 i, (unsigned long) gdp->bg_block_bitmap); 352 return 0; 353 } 354 if (gdp->bg_inode_bitmap < block || 355 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 356 { 357 printf ("ext2_check_descriptors: " 358 "Inode bitmap for group %d" 359 " not in group (block %lu)!\n", 360 i, (unsigned long) gdp->bg_inode_bitmap); 361 return 0; 362 } 363 if (gdp->bg_inode_table < block || 364 gdp->bg_inode_table + sb->s_itb_per_group >= 365 block + EXT2_BLOCKS_PER_GROUP(sb)) 366 { 367 printf ("ext2_check_descriptors: " 368 "Inode table for group %d" 369 " not in group (block %lu)!\n", 370 i, (unsigned long) gdp->bg_inode_table); 371 return 0; 372 } 373 block += EXT2_BLOCKS_PER_GROUP(sb); 374 gdp++; 375 } 376 return 1; 377} 378 379static int 380ext2_check_sb_compat(es, dev, ronly) 381 struct ext2_super_block *es; 382 struct cdev *dev; 383 int ronly; 384{ 385 386 if (es->s_magic != EXT2_SUPER_MAGIC) { 387 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 388 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 389 return (1); 390 } 391 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 392 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 393 printf( 394"WARNING: mount of %s denied due to unsupported optional features\n", 395 devtoname(dev)); 396 return (1); 397 } 398 if (!ronly && 399 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 400 printf( 401"WARNING: R/W mount of %s denied due to unsupported optional features\n", 402 devtoname(dev)); 403 return (1); 404 } 405 } 406 return (0); 407} 408 409/* 410 * this computes the fields of the ext2_sb_info structure from the 411 * data in the ext2_super_block structure read in 412 */ 413static int compute_sb_data(devvp, es, fs) 414 struct vnode * devvp; 415 struct ext2_super_block * es; 416 struct ext2_sb_info * fs; 417{ 418 int db_count, error; 419 int i, j; 420 int logic_sb_block = 1; /* XXX for now */ 421 422#if 1 423#define V(v) 424#else 425#define V(v) printf(#v"= %d\n", fs->v); 426#endif 427 428 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 429 V(s_blocksize) 430 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 431 V(s_bshift) 432 fs->s_fsbtodb = es->s_log_block_size + 1; 433 V(s_fsbtodb) 434 fs->s_qbmask = fs->s_blocksize - 1; 435 V(s_bmask) 436 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 437 V(s_blocksize_bits) 438 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 439 V(s_frag_size) 440 if (fs->s_frag_size) 441 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 442 V(s_frags_per_block) 443 fs->s_blocks_per_group = es->s_blocks_per_group; 444 V(s_blocks_per_group) 445 fs->s_frags_per_group = es->s_frags_per_group; 446 V(s_frags_per_group) 447 fs->s_inodes_per_group = es->s_inodes_per_group; 448 V(s_inodes_per_group) 449 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 450 V(s_inodes_per_block) 451 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 452 V(s_itb_per_group) 453 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 454 V(s_desc_per_block) 455 /* s_resuid / s_resgid ? */ 456 fs->s_groups_count = (es->s_blocks_count - 457 es->s_first_data_block + 458 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 459 EXT2_BLOCKS_PER_GROUP(fs); 460 V(s_groups_count) 461 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 462 EXT2_DESC_PER_BLOCK(fs); 463 fs->s_db_per_group = db_count; 464 V(s_db_per_group) 465 466 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 467 M_EXT2MNT, M_WAITOK); 468 469 /* adjust logic_sb_block */ 470 if(fs->s_blocksize > SBSIZE) 471 /* Godmar thinks: if the blocksize is greater than 1024, then 472 the superblock is logically part of block zero. 473 */ 474 logic_sb_block = 0; 475 476 for (i = 0; i < db_count; i++) { 477 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 478 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 479 if(error) { 480 for (j = 0; j < i; j++) 481 brelse(fs->s_group_desc[j]); 482 bsd_free(fs->s_group_desc, M_EXT2MNT); 483 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 484 return EIO; 485 } 486 LCK_BUF(fs->s_group_desc[i]) 487 } 488 if(!ext2_check_descriptors(fs)) { 489 for (j = 0; j < db_count; j++) 490 ULCK_BUF(fs->s_group_desc[j]) 491 bsd_free(fs->s_group_desc, M_EXT2MNT); 492 printf("EXT2-fs: (ext2_check_descriptors failure) " 493 "unable to read group descriptors\n"); 494 return EIO; 495 } 496 497 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 498 fs->s_inode_bitmap_number[i] = 0; 499 fs->s_inode_bitmap[i] = NULL; 500 fs->s_block_bitmap_number[i] = 0; 501 fs->s_block_bitmap[i] = NULL; 502 } 503 fs->s_loaded_inode_bitmaps = 0; 504 fs->s_loaded_block_bitmaps = 0; 505 if (es->s_rev_level == EXT2_GOOD_OLD_REV || (es->s_feature_ro_compat & 506 EXT2_FEATURE_RO_COMPAT_LARGE_FILE) == 0) 507 fs->fs_maxfilesize = 0x7fffffff; 508 else 509 fs->fs_maxfilesize = 0x7fffffffffffffff; 510 return 0; 511} 512 513/* 514 * Reload all incore data for a filesystem (used after running fsck on 515 * the root filesystem and finding things to fix). The filesystem must 516 * be mounted read-only. 517 * 518 * Things to do to update the mount: 519 * 1) invalidate all cached meta-data. 520 * 2) re-read superblock from disk. 521 * 3) re-read summary information from disk. 522 * 4) invalidate all inactive vnodes. 523 * 5) invalidate all cached file data. 524 * 6) re-read inode data for all active vnodes. 525 */ 526static int 527ext2_reload(mp, cred, td) 528 struct mount *mp; 529 struct ucred *cred; 530 struct thread *td; 531{ 532 struct vnode *vp, *nvp, *devvp; 533 struct inode *ip; 534 struct buf *bp; 535 struct ext2_super_block * es; 536 struct ext2_sb_info *fs; 537 int error; 538 539 if ((mp->mnt_flag & MNT_RDONLY) == 0) 540 return (EINVAL); 541 /* 542 * Step 1: invalidate all cached meta-data. 543 */ 544 devvp = VFSTOEXT2(mp)->um_devvp; 545 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 546 if (vinvalbuf(devvp, 0, cred, td, 0, 0) != 0) 547 panic("ext2_reload: dirty1"); 548 VOP_UNLOCK(devvp, 0, td); 549 550 /* 551 * Step 2: re-read superblock from disk. 552 * constants have been adjusted for ext2 553 */ 554 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 555 return (error); 556 es = (struct ext2_super_block *)bp->b_data; 557 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 558 brelse(bp); 559 return (EIO); /* XXX needs translation */ 560 } 561 fs = VFSTOEXT2(mp)->um_e2fs; 562 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 563 564 if((error = compute_sb_data(devvp, es, fs)) != 0) { 565 brelse(bp); 566 return error; 567 } 568#ifdef UNKLAR 569 if (fs->fs_sbsize < SBSIZE) 570 bp->b_flags |= B_INVAL; 571#endif 572 brelse(bp); 573 574loop: 575 MNT_ILOCK(mp); 576 MNT_VNODE_FOREACH(vp, mp, nvp) { 577 VI_LOCK(vp); 578 if (vp->v_iflag & VI_XLOCK) { 579 VI_UNLOCK(vp); 580 continue; 581 } 582 MNT_IUNLOCK(mp); 583 /* 584 * Step 4: invalidate all inactive vnodes. 585 */ 586 if (vp->v_usecount == 0) { 587 vgonel(vp, td); 588 goto loop; 589 } 590 /* 591 * Step 5: invalidate all cached file data. 592 */ 593 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 594 goto loop; 595 } 596 if (vinvalbuf(vp, 0, cred, td, 0, 0)) 597 panic("ext2_reload: dirty2"); 598 /* 599 * Step 6: re-read inode data for all active vnodes. 600 */ 601 ip = VTOI(vp); 602 error = 603 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 604 (int)fs->s_blocksize, NOCRED, &bp); 605 if (error) { 606 VOP_UNLOCK(vp, 0, td); 607 vrele(vp); 608 return (error); 609 } 610 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + 611 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip); 612 brelse(bp); 613 VOP_UNLOCK(vp, 0, td); 614 vrele(vp); 615 MNT_ILOCK(mp); 616 } 617 MNT_IUNLOCK(mp); 618 return (0); 619} 620 621/* 622 * Common code for mount and mountroot 623 */ 624static int 625ext2_mountfs(devvp, mp, td) 626 struct vnode *devvp; 627 struct mount *mp; 628 struct thread *td; 629{ 630 struct ext2mount *ump; 631 struct buf *bp; 632 struct ext2_sb_info *fs; 633 struct ext2_super_block * es; 634 struct cdev *dev = devvp->v_rdev; 635 int error; 636 int ronly; 637 638 /* 639 * Disallow multiple mounts of the same device. 640 * Disallow mounting of a device that is currently in use 641 * (except for root, which might share swap device for miniroot). 642 * Flush out any old buffers remaining from a previous use. 643 */ 644 if ((error = vfs_mountedon(devvp)) != 0) 645 return (error); 646 if (vcount(devvp) > 1) 647 return (EBUSY); 648 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 649 error = vinvalbuf(devvp, V_SAVE, td->td_ucred, td, 0, 0); 650 if (error) { 651 VOP_UNLOCK(devvp, 0, td); 652 return (error); 653 } 654 655 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 656 /* 657 * XXX: open the device with read and write access even if only 658 * read access is needed now. Write access is needed if the 659 * filesystem is ever mounted read/write, and we don't change the 660 * access mode for remounts. 661 */ 662#ifdef notyet 663 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, td, -1); 664#else 665 error = VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, td, -1); 666#endif 667 VOP_UNLOCK(devvp, 0, td); 668 if (error) 669 return (error); 670 if (devvp->v_rdev->si_iosize_max != 0) 671 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 672 if (mp->mnt_iosize_max > MAXPHYS) 673 mp->mnt_iosize_max = MAXPHYS; 674 675 bp = NULL; 676 ump = NULL; 677 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 678 goto out; 679 es = (struct ext2_super_block *)bp->b_data; 680 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 681 error = EINVAL; /* XXX needs translation */ 682 goto out; 683 } 684 if ((es->s_state & EXT2_VALID_FS) == 0 || 685 (es->s_state & EXT2_ERROR_FS)) { 686 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 687 printf( 688"WARNING: Filesystem was not properly dismounted\n"); 689 } else { 690 printf( 691"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 692 error = EPERM; 693 goto out; 694 } 695 } 696 ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); 697 bzero((caddr_t)ump, sizeof *ump); 698 /* I don't know whether this is the right strategy. Note that 699 we dynamically allocate both an ext2_sb_info and an ext2_super_block 700 while Linux keeps the super block in a locked buffer 701 */ 702 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 703 M_EXT2MNT, M_WAITOK); 704 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 705 M_EXT2MNT, M_WAITOK); 706 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 707 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 708 goto out; 709 /* 710 * We don't free the group descriptors allocated by compute_sb_data() 711 * until ext2_unmount(). This is OK since the mount will succeed. 712 */ 713 brelse(bp); 714 bp = NULL; 715 fs = ump->um_e2fs; 716 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 717 /* if the fs is not mounted read-only, make sure the super block is 718 always written back on a sync() 719 */ 720 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 721 if (ronly == 0) { 722 fs->s_dirt = 1; /* mark it modified */ 723 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 724 } 725 mp->mnt_data = (qaddr_t)ump; 726 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 727 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 728 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 729 mp->mnt_flag |= MNT_LOCAL; 730 ump->um_mountp = mp; 731 ump->um_dev = dev; 732 ump->um_devvp = devvp; 733 /* setting those two parameters allowed us to use 734 ufs_bmap w/o changse ! 735 */ 736 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 737 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 738 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 739 devvp->v_rdev->si_mountpoint = mp; 740 if (ronly == 0) 741 ext2_sbupdate(ump, MNT_WAIT); 742 return (0); 743out: 744 if (bp) 745 brelse(bp); 746 /* XXX: see comment above VOP_OPEN. */ 747#ifdef notyet 748 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NOCRED, td); 749#else 750 (void)VOP_CLOSE(devvp, FREAD | FWRITE, NOCRED, td); 751#endif 752 if (ump) { 753 bsd_free(ump->um_e2fs->s_es, M_EXT2MNT); 754 bsd_free(ump->um_e2fs, M_EXT2MNT); 755 bsd_free(ump, M_EXT2MNT); 756 mp->mnt_data = (qaddr_t)0; 757 } 758 return (error); 759} 760 761/* 762 * unmount system call 763 */ 764static int 765ext2_unmount(mp, mntflags, td) 766 struct mount *mp; 767 int mntflags; 768 struct thread *td; 769{ 770 struct ext2mount *ump; 771 struct ext2_sb_info *fs; 772 int error, flags, ronly, i; 773 774 flags = 0; 775 if (mntflags & MNT_FORCE) { 776 if (mp->mnt_flag & MNT_ROOTFS) 777 return (EINVAL); 778 flags |= FORCECLOSE; 779 } 780 if ((error = ext2_flushfiles(mp, flags, td)) != 0) 781 return (error); 782 ump = VFSTOEXT2(mp); 783 fs = ump->um_e2fs; 784 ronly = fs->s_rd_only; 785 if (ronly == 0) { 786 if (fs->s_wasvalid) 787 fs->s_es->s_state |= EXT2_VALID_FS; 788 ext2_sbupdate(ump, MNT_WAIT); 789 } 790 791 /* release buffers containing group descriptors */ 792 for(i = 0; i < fs->s_db_per_group; i++) 793 ULCK_BUF(fs->s_group_desc[i]) 794 bsd_free(fs->s_group_desc, M_EXT2MNT); 795 796 /* release cached inode/block bitmaps */ 797 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 798 if (fs->s_inode_bitmap[i]) 799 ULCK_BUF(fs->s_inode_bitmap[i]) 800 801 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 802 if (fs->s_block_bitmap[i]) 803 ULCK_BUF(fs->s_block_bitmap[i]) 804 805 ump->um_devvp->v_rdev->si_mountpoint = NULL; 806 /* XXX: see comment above VOP_OPEN. */ 807#ifdef notyet 808 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD | FWRITE, 809 NOCRED, td); 810#else 811 error = VOP_CLOSE(ump->um_devvp, FREAD | FWRITE, NOCRED, td); 812#endif 813 vrele(ump->um_devvp); 814 bsd_free(fs->s_es, M_EXT2MNT); 815 bsd_free(fs, M_EXT2MNT); 816 bsd_free(ump, M_EXT2MNT); 817 mp->mnt_data = (qaddr_t)0; 818 mp->mnt_flag &= ~MNT_LOCAL; 819 return (error); 820} 821 822/* 823 * Flush out all the files in a filesystem. 824 */ 825static int 826ext2_flushfiles(mp, flags, td) 827 struct mount *mp; 828 int flags; 829 struct thread *td; 830{ 831 int error; 832 833 error = vflush(mp, 0, flags, td); 834 return (error); 835} 836 837/* 838 * Get file system statistics. 839 * taken from ext2/super.c ext2_statfs 840 */ 841static int 842ext2_statfs(mp, sbp, td) 843 struct mount *mp; 844 struct statfs *sbp; 845 struct thread *td; 846{ 847 unsigned long overhead; 848 struct ext2mount *ump; 849 struct ext2_sb_info *fs; 850 struct ext2_super_block *es; 851 int i, nsb; 852 853 ump = VFSTOEXT2(mp); 854 fs = ump->um_e2fs; 855 es = fs->s_es; 856 857 if (es->s_magic != EXT2_SUPER_MAGIC) 858 panic("ext2_statfs - magic number spoiled"); 859 860 /* 861 * Compute the overhead (FS structures) 862 */ 863 if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { 864 nsb = 0; 865 for (i = 0 ; i < fs->s_groups_count; i++) 866 if (ext2_group_sparse(i)) 867 nsb++; 868 } else 869 nsb = fs->s_groups_count; 870 overhead = es->s_first_data_block + 871 /* Superblocks and block group descriptors: */ 872 nsb * (1 + fs->s_db_per_group) + 873 /* Inode bitmap, block bitmap, and inode table: */ 874 fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); 875 876 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 877 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 878 sbp->f_blocks = es->s_blocks_count - overhead; 879 sbp->f_bfree = es->s_free_blocks_count; 880 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 881 sbp->f_files = es->s_inodes_count; 882 sbp->f_ffree = es->s_free_inodes_count; 883 if (sbp != &mp->mnt_stat) { 884 sbp->f_type = mp->mnt_vfc->vfc_typenum; 885 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 886 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 887 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 888 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 889 } 890 return (0); 891} 892 893/* 894 * Go through the disk queues to initiate sandbagged IO; 895 * go through the inodes to write those that have been modified; 896 * initiate the writing of the super block if it has been modified. 897 * 898 * Note: we are always called with the filesystem marked `MPBUSY'. 899 */ 900static int 901ext2_sync(mp, waitfor, cred, td) 902 struct mount *mp; 903 int waitfor; 904 struct ucred *cred; 905 struct thread *td; 906{ 907 struct vnode *nvp, *vp; 908 struct inode *ip; 909 struct ext2mount *ump = VFSTOEXT2(mp); 910 struct ext2_sb_info *fs; 911 int error, allerror = 0; 912 913 fs = ump->um_e2fs; 914 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 915 printf("fs = %s\n", fs->fs_fsmnt); 916 panic("ext2_sync: rofs mod"); 917 } 918 /* 919 * Write back each (modified) inode. 920 */ 921 MNT_ILOCK(mp); 922loop: 923 MNT_VNODE_FOREACH(vp, mp, nvp) { 924 VI_LOCK(vp); 925 if (vp->v_iflag & VI_XLOCK) { 926 VI_UNLOCK(vp); 927 continue; 928 } 929 MNT_IUNLOCK(mp); 930 ip = VTOI(vp); 931 if (vp->v_type == VNON || 932 ((ip->i_flag & 933 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 934 (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) { 935 VI_UNLOCK(vp); 936 MNT_ILOCK(mp); 937 continue; 938 } 939 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 940 if (error) { 941 MNT_ILOCK(mp); 942 if (error == ENOENT) 943 goto loop; 944 continue; 945 } 946 if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0) 947 allerror = error; 948 VOP_UNLOCK(vp, 0, td); 949 vrele(vp); 950 MNT_ILOCK(mp); 951 } 952 MNT_IUNLOCK(mp); 953 /* 954 * Force stale file system control information to be flushed. 955 */ 956 if (waitfor != MNT_LAZY) { 957 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td); 958 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, td)) != 0) 959 allerror = error; 960 VOP_UNLOCK(ump->um_devvp, 0, td); 961 } 962 /* 963 * Write back modified superblock. 964 */ 965 if (fs->s_dirt != 0) { 966 fs->s_dirt = 0; 967 fs->s_es->s_wtime = time_second; 968 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 969 allerror = error; 970 } 971 return (allerror); 972} 973 974/* 975 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 976 * in from disk. If it is in core, wait for the lock bit to clear, then 977 * return the inode locked. Detection and handling of mount points must be 978 * done by the calling routine. 979 */ 980static int 981ext2_vget(mp, ino, flags, vpp) 982 struct mount *mp; 983 ino_t ino; 984 int flags; 985 struct vnode **vpp; 986{ 987 struct ext2_sb_info *fs; 988 struct inode *ip; 989 struct ext2mount *ump; 990 struct buf *bp; 991 struct vnode *vp; 992 struct cdev *dev; 993 int i, error; 994 int used_blocks; 995 996 ump = VFSTOEXT2(mp); 997 dev = ump->um_dev; 998restart: 999 if ((error = ext2_ihashget(dev, ino, flags, vpp)) != 0) 1000 return (error); 1001 if (*vpp != NULL) 1002 return (0); 1003 1004 /* 1005 * Lock out the creation of new entries in the FFS hash table in 1006 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 1007 * may occur! 1008 */ 1009 if (ext2fs_inode_hash_lock) { 1010 while (ext2fs_inode_hash_lock) { 1011 ext2fs_inode_hash_lock = -1; 1012 tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0); 1013 } 1014 goto restart; 1015 } 1016 ext2fs_inode_hash_lock = 1; 1017 1018 /* 1019 * If this MALLOC() is performed after the getnewvnode() 1020 * it might block, leaving a vnode with a NULL v_data to be 1021 * found by ext2_sync() if a sync happens to fire right then, 1022 * which will cause a panic because ext2_sync() blindly 1023 * dereferences vp->v_data (as well it should). 1024 */ 1025 MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK); 1026 1027 /* Allocate a new vnode/inode. */ 1028 if ((error = getnewvnode("ext2fs", mp, ext2_vnodeop_p, &vp)) != 0) { 1029 if (ext2fs_inode_hash_lock < 0) 1030 wakeup(&ext2fs_inode_hash_lock); 1031 ext2fs_inode_hash_lock = 0; 1032 *vpp = NULL; 1033 FREE(ip, M_EXT2NODE); 1034 return (error); 1035 } 1036 bzero((caddr_t)ip, sizeof(struct inode)); 1037 vp->v_data = ip; 1038 ip->i_vnode = vp; 1039 ip->i_e2fs = fs = ump->um_e2fs; 1040 ip->i_dev = dev; 1041 ip->i_number = ino; 1042 /* 1043 * Put it onto its hash chain and lock it so that other requests for 1044 * this inode will block if they arrive while we are sleeping waiting 1045 * for old data structures to be purged or for the contents of the 1046 * disk portion of this inode to be read. 1047 */ 1048 ext2_ihashins(ip); 1049 1050 if (ext2fs_inode_hash_lock < 0) 1051 wakeup(&ext2fs_inode_hash_lock); 1052 ext2fs_inode_hash_lock = 0; 1053 1054 /* Read in the disk contents for the inode, copy into the inode. */ 1055#if 0 1056printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 1057#endif 1058 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1059 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 1060 /* 1061 * The inode does not contain anything useful, so it would 1062 * be misleading to leave it on its hash chain. With mode 1063 * still zero, it will be unlinked and returned to the free 1064 * list by vput(). 1065 */ 1066 vput(vp); 1067 brelse(bp); 1068 *vpp = NULL; 1069 return (error); 1070 } 1071 /* convert ext2 inode to dinode */ 1072 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1073 ino_to_fsbo(fs, ino)), ip); 1074 ip->i_block_group = ino_to_cg(fs, ino); 1075 ip->i_next_alloc_block = 0; 1076 ip->i_next_alloc_goal = 0; 1077 ip->i_prealloc_count = 0; 1078 ip->i_prealloc_block = 0; 1079 /* now we want to make sure that block pointers for unused 1080 blocks are zeroed out - ext2_balloc depends on this 1081 although for regular files and directories only 1082 */ 1083 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1084 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1085 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1086 ip->i_db[i] = 0; 1087 } 1088/* 1089 ext2_print_inode(ip); 1090*/ 1091 brelse(bp); 1092 1093 /* 1094 * Initialize the vnode from the inode, check for aliases. 1095 * Note that the underlying vnode may have changed. 1096 */ 1097 if ((error = ext2_vinit(mp, ext2_fifoop_p, &vp)) != 0) { 1098 vput(vp); 1099 *vpp = NULL; 1100 return (error); 1101 } 1102 /* 1103 * Finish inode initialization now that aliasing has been resolved. 1104 */ 1105 ip->i_devvp = ump->um_devvp; 1106 VREF(ip->i_devvp); 1107 /* 1108 * Set up a generation number for this inode if it does not 1109 * already have one. This should only happen on old filesystems. 1110 */ 1111 if (ip->i_gen == 0) { 1112 ip->i_gen = random() / 2 + 1; 1113 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1114 ip->i_flag |= IN_MODIFIED; 1115 } 1116 *vpp = vp; 1117 return (0); 1118} 1119 1120/* 1121 * File handle to vnode 1122 * 1123 * Have to be really careful about stale file handles: 1124 * - check that the inode number is valid 1125 * - call ext2_vget() to get the locked inode 1126 * - check for an unallocated inode (i_mode == 0) 1127 * - check that the given client host has export rights and return 1128 * those rights via. exflagsp and credanonp 1129 */ 1130static int 1131ext2_fhtovp(mp, fhp, vpp) 1132 struct mount *mp; 1133 struct fid *fhp; 1134 struct vnode **vpp; 1135{ 1136 struct inode *ip; 1137 struct ufid *ufhp; 1138 struct vnode *nvp; 1139 struct ext2_sb_info *fs; 1140 int error; 1141 1142 ufhp = (struct ufid *)fhp; 1143 fs = VFSTOEXT2(mp)->um_e2fs; 1144 if (ufhp->ufid_ino < ROOTINO || 1145 ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group) 1146 return (ESTALE); 1147 1148 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1149 if (error) { 1150 *vpp = NULLVP; 1151 return (error); 1152 } 1153 ip = VTOI(nvp); 1154 if (ip->i_mode == 0 || 1155 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1156 vput(nvp); 1157 *vpp = NULLVP; 1158 return (ESTALE); 1159 } 1160 *vpp = nvp; 1161 return (0); 1162} 1163 1164/* 1165 * Vnode pointer to File handle 1166 */ 1167/* ARGSUSED */ 1168static int 1169ext2_vptofh(vp, fhp) 1170 struct vnode *vp; 1171 struct fid *fhp; 1172{ 1173 struct inode *ip; 1174 struct ufid *ufhp; 1175 1176 ip = VTOI(vp); 1177 ufhp = (struct ufid *)fhp; 1178 ufhp->ufid_len = sizeof(struct ufid); 1179 ufhp->ufid_ino = ip->i_number; 1180 ufhp->ufid_gen = ip->i_gen; 1181 return (0); 1182} 1183 1184/* 1185 * Write a superblock and associated information back to disk. 1186 */ 1187static int 1188ext2_sbupdate(mp, waitfor) 1189 struct ext2mount *mp; 1190 int waitfor; 1191{ 1192 struct ext2_sb_info *fs = mp->um_e2fs; 1193 struct ext2_super_block *es = fs->s_es; 1194 struct buf *bp; 1195 int error = 0; 1196/* 1197printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1198*/ 1199 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1200 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1201 if (waitfor == MNT_WAIT) 1202 error = bwrite(bp); 1203 else 1204 bawrite(bp); 1205 1206 /* 1207 * The buffers for group descriptors, inode bitmaps and block bitmaps 1208 * are not busy at this point and are (hopefully) written by the 1209 * usual sync mechanism. No need to write them here 1210 */ 1211 1212 return (error); 1213} 1214 1215/* 1216 * Return the root of a filesystem. 1217 */ 1218static int 1219ext2_root(mp, vpp, td) 1220 struct mount *mp; 1221 struct vnode **vpp; 1222 struct thread *td; 1223{ 1224 struct vnode *nvp; 1225 int error; 1226 1227 error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); 1228 if (error) 1229 return (error); 1230 *vpp = nvp; 1231 return (0); 1232} 1233 1234static int 1235ext2_init(struct vfsconf *vfsp) 1236{ 1237 1238 ext2_ihashinit(); 1239 return (0); 1240} 1241 1242static int 1243ext2_uninit(struct vfsconf *vfsp) 1244{ 1245 1246 ext2_ihashuninit(); 1247 return (0); 1248} 1249