ext2_vfsops.c revision 139778
1/*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/*- 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 36 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 139778 2005-01-06 18:27:30Z imp $ 37 */ 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/namei.h> 42#include <sys/proc.h> 43#include <sys/kernel.h> 44#include <sys/vnode.h> 45#include <sys/mount.h> 46#include <sys/bio.h> 47#include <sys/buf.h> 48#include <sys/conf.h> 49#include <sys/fcntl.h> 50#include <sys/malloc.h> 51#include <sys/stat.h> 52#include <sys/mutex.h> 53 54#include <geom/geom.h> 55#include <geom/geom_vfs.h> 56 57#include <gnu/ext2fs/ext2_mount.h> 58#include <gnu/ext2fs/inode.h> 59 60#include <gnu/ext2fs/fs.h> 61#include <gnu/ext2fs/ext2_extern.h> 62#include <gnu/ext2fs/ext2_fs.h> 63#include <gnu/ext2fs/ext2_fs_sb.h> 64 65static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 66static int ext2_mountfs(struct vnode *, struct mount *, struct thread *); 67static int ext2_reload(struct mount *mp, struct ucred *cred, struct thread *td); 68static int ext2_sbupdate(struct ext2mount *, int); 69 70static vfs_unmount_t ext2_unmount; 71static vfs_root_t ext2_root; 72static vfs_statfs_t ext2_statfs; 73static vfs_sync_t ext2_sync; 74static vfs_vget_t ext2_vget; 75static vfs_fhtovp_t ext2_fhtovp; 76static vfs_vptofh_t ext2_vptofh; 77static vfs_init_t ext2_init; 78static vfs_uninit_t ext2_uninit; 79static vfs_mount_t ext2_mount; 80 81MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part"); 82static MALLOC_DEFINE(M_EXT2MNT, "EXT2 mount", "EXT2 mount structure"); 83 84static struct vfsops ext2fs_vfsops = { 85 .vfs_fhtovp = ext2_fhtovp, 86 .vfs_init = ext2_init, 87 .vfs_mount = ext2_mount, 88 .vfs_root = ext2_root, /* root inode via vget */ 89 .vfs_statfs = ext2_statfs, 90 .vfs_sync = ext2_sync, 91 .vfs_uninit = ext2_uninit, 92 .vfs_unmount = ext2_unmount, 93 .vfs_vget = ext2_vget, 94 .vfs_vptofh = ext2_vptofh, 95}; 96 97VFS_SET(ext2fs_vfsops, ext2fs, 0); 98 99#define bsd_malloc malloc 100#define bsd_free free 101 102static int ext2fs_inode_hash_lock; 103 104static int ext2_check_sb_compat(struct ext2_super_block *es, struct cdev *dev, 105 int ronly); 106static int compute_sb_data(struct vnode * devvp, 107 struct ext2_super_block * es, struct ext2_sb_info * fs); 108 109static const char *ext2_opts[] = { "from", "export" }; 110/* 111 * VFS Operations. 112 * 113 * mount system call 114 */ 115static int 116ext2_mount(mp, td) 117 struct mount *mp; 118 struct thread *td; 119{ 120 struct export_args *export; 121 struct vfsoptlist *opts; 122 struct vnode *devvp; 123 struct ext2mount *ump = 0; 124 struct ext2_sb_info *fs; 125 char *path, *fspec; 126 int error, flags, len; 127 mode_t accessmode; 128 struct nameidata nd, *ndp = &nd; 129 130 opts = mp->mnt_optnew; 131 132 if (vfs_filteropt(opts, ext2_opts)) 133 return (EINVAL); 134 135 vfs_getopt(opts, "fspath", (void **)&path, NULL); 136 /* Double-check the length of path.. */ 137 if (strlen(path) >= MAXMNTLEN - 1) 138 return (ENAMETOOLONG); 139 140 fspec = NULL; 141 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 142 if (!error && fspec[len - 1] != '\0') 143 return (EINVAL); 144 145 /* 146 * If updating, check whether changing from read-only to 147 * read/write; if there is no device name, that's all we do. 148 */ 149 if (mp->mnt_flag & MNT_UPDATE) { 150 ump = VFSTOEXT2(mp); 151 fs = ump->um_e2fs; 152 error = 0; 153 if (fs->s_rd_only == 0 && 154 vfs_flagopt(opts, "ro", NULL, 0)) { 155 error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td); 156 if (error) 157 return (error); 158 flags = WRITECLOSE; 159 if (mp->mnt_flag & MNT_FORCE) 160 flags |= FORCECLOSE; 161 if (vfs_busy(mp, LK_NOWAIT, 0, td)) 162 return (EBUSY); 163 error = ext2_flushfiles(mp, flags, td); 164 vfs_unbusy(mp, td); 165 if (!error && fs->s_wasvalid) { 166 fs->s_es->s_state |= EXT2_VALID_FS; 167 ext2_sbupdate(ump, MNT_WAIT); 168 } 169 fs->s_rd_only = 1; 170 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 171 DROP_GIANT(); 172 g_topology_lock(); 173 g_access(ump->um_cp, 0, -1, 0); 174 g_topology_unlock(); 175 PICKUP_GIANT(); 176 } 177 if (!error && (mp->mnt_flag & MNT_RELOAD)) 178 error = ext2_reload(mp, td->td_ucred, td); 179 if (error) 180 return (error); 181 devvp = ump->um_devvp; 182 if (fs->s_rd_only && !vfs_flagopt(opts, "ro", NULL, 0)) { 183 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 0)) 184 return (EPERM); 185 /* 186 * If upgrade to read-write by non-root, then verify 187 * that user has necessary permissions on the device. 188 */ 189 if (suser(td)) { 190 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 191 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 192 td->td_ucred, td)) != 0) { 193 VOP_UNLOCK(devvp, 0, td); 194 return (error); 195 } 196 VOP_UNLOCK(devvp, 0, td); 197 } 198 DROP_GIANT(); 199 g_topology_lock(); 200 error = g_access(ump->um_cp, 0, 1, 0); 201 g_topology_unlock(); 202 PICKUP_GIANT(); 203 if (error) 204 return (error); 205 206 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 207 (fs->s_es->s_state & EXT2_ERROR_FS)) { 208 if (mp->mnt_flag & MNT_FORCE) { 209 printf( 210"WARNING: %s was not properly dismounted\n", 211 fs->fs_fsmnt); 212 } else { 213 printf( 214"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 215 fs->fs_fsmnt); 216 return (EPERM); 217 } 218 } 219 fs->s_es->s_state &= ~EXT2_VALID_FS; 220 ext2_sbupdate(ump, MNT_WAIT); 221 fs->s_rd_only = 0; 222 mp->mnt_flag &= ~MNT_RDONLY; 223 } 224 if (fspec == NULL) { 225 error = vfs_getopt(opts, "export", (void **)&export, 226 &len); 227 if (error || len != sizeof(struct export_args)) 228 return (EINVAL); 229 /* Process export requests. */ 230 return (vfs_export(mp, export)); 231 } 232 } 233 /* 234 * Not an update, or updating the name: look up the name 235 * and verify that it refers to a sensible disk device. 236 */ 237 if (fspec == NULL) 238 return (EINVAL); 239 NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td); 240 if ((error = namei(ndp)) != 0) 241 return (error); 242 NDFREE(ndp, NDF_ONLY_PNBUF); 243 devvp = ndp->ni_vp; 244 245 if (!vn_isdisk(devvp, &error)) { 246 vrele(devvp); 247 return (error); 248 } 249 250 /* 251 * If mount by non-root, then verify that user has necessary 252 * permissions on the device. 253 */ 254 if (suser(td)) { 255 accessmode = VREAD; 256 if ((mp->mnt_flag & MNT_RDONLY) == 0) 257 accessmode |= VWRITE; 258 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 259 if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td)) != 0) { 260 vput(devvp); 261 return (error); 262 } 263 VOP_UNLOCK(devvp, 0, td); 264 } 265 266 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 267 error = ext2_mountfs(devvp, mp, td); 268 } else { 269 if (devvp != ump->um_devvp) 270 error = EINVAL; /* needs translation */ 271 else 272 vrele(devvp); 273 } 274 if (error) { 275 vrele(devvp); 276 return (error); 277 } 278 ump = VFSTOEXT2(mp); 279 fs = ump->um_e2fs; 280 /* 281 * Note that this strncpy() is ok because of a check at the start 282 * of ext2_mount(). 283 */ 284 strncpy(fs->fs_fsmnt, path, MAXMNTLEN); 285 fs->fs_fsmnt[MAXMNTLEN - 1] = '\0'; 286 vfs_mountedfrom(mp, fspec); 287 return (0); 288} 289 290/* 291 * checks that the data in the descriptor blocks make sense 292 * this is taken from ext2/super.c 293 */ 294static int ext2_check_descriptors (struct ext2_sb_info * sb) 295{ 296 int i; 297 int desc_block = 0; 298 unsigned long block = sb->s_es->s_first_data_block; 299 struct ext2_group_desc * gdp = NULL; 300 301 /* ext2_debug ("Checking group descriptors"); */ 302 303 for (i = 0; i < sb->s_groups_count; i++) 304 { 305 /* examine next descriptor block */ 306 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 307 gdp = (struct ext2_group_desc *) 308 sb->s_group_desc[desc_block++]->b_data; 309 if (gdp->bg_block_bitmap < block || 310 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 311 { 312 printf ("ext2_check_descriptors: " 313 "Block bitmap for group %d" 314 " not in group (block %lu)!\n", 315 i, (unsigned long) gdp->bg_block_bitmap); 316 return 0; 317 } 318 if (gdp->bg_inode_bitmap < block || 319 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 320 { 321 printf ("ext2_check_descriptors: " 322 "Inode bitmap for group %d" 323 " not in group (block %lu)!\n", 324 i, (unsigned long) gdp->bg_inode_bitmap); 325 return 0; 326 } 327 if (gdp->bg_inode_table < block || 328 gdp->bg_inode_table + sb->s_itb_per_group >= 329 block + EXT2_BLOCKS_PER_GROUP(sb)) 330 { 331 printf ("ext2_check_descriptors: " 332 "Inode table for group %d" 333 " not in group (block %lu)!\n", 334 i, (unsigned long) gdp->bg_inode_table); 335 return 0; 336 } 337 block += EXT2_BLOCKS_PER_GROUP(sb); 338 gdp++; 339 } 340 return 1; 341} 342 343static int 344ext2_check_sb_compat(es, dev, ronly) 345 struct ext2_super_block *es; 346 struct cdev *dev; 347 int ronly; 348{ 349 350 if (es->s_magic != EXT2_SUPER_MAGIC) { 351 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 352 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 353 return (1); 354 } 355 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 356 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 357 printf( 358"WARNING: mount of %s denied due to unsupported optional features\n", 359 devtoname(dev)); 360 return (1); 361 } 362 if (!ronly && 363 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 364 printf( 365"WARNING: R/W mount of %s denied due to unsupported optional features\n", 366 devtoname(dev)); 367 return (1); 368 } 369 } 370 return (0); 371} 372 373/* 374 * this computes the fields of the ext2_sb_info structure from the 375 * data in the ext2_super_block structure read in 376 */ 377static int compute_sb_data(devvp, es, fs) 378 struct vnode * devvp; 379 struct ext2_super_block * es; 380 struct ext2_sb_info * fs; 381{ 382 int db_count, error; 383 int i, j; 384 int logic_sb_block = 1; /* XXX for now */ 385 386#if 1 387#define V(v) 388#else 389#define V(v) printf(#v"= %d\n", fs->v); 390#endif 391 392 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 393 V(s_blocksize) 394 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 395 V(s_bshift) 396 fs->s_fsbtodb = es->s_log_block_size + 1; 397 V(s_fsbtodb) 398 fs->s_qbmask = fs->s_blocksize - 1; 399 V(s_bmask) 400 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 401 V(s_blocksize_bits) 402 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 403 V(s_frag_size) 404 if (fs->s_frag_size) 405 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 406 V(s_frags_per_block) 407 fs->s_blocks_per_group = es->s_blocks_per_group; 408 V(s_blocks_per_group) 409 fs->s_frags_per_group = es->s_frags_per_group; 410 V(s_frags_per_group) 411 fs->s_inodes_per_group = es->s_inodes_per_group; 412 V(s_inodes_per_group) 413 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 414 V(s_inodes_per_block) 415 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 416 V(s_itb_per_group) 417 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 418 V(s_desc_per_block) 419 /* s_resuid / s_resgid ? */ 420 fs->s_groups_count = (es->s_blocks_count - 421 es->s_first_data_block + 422 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 423 EXT2_BLOCKS_PER_GROUP(fs); 424 V(s_groups_count) 425 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 426 EXT2_DESC_PER_BLOCK(fs); 427 fs->s_db_per_group = db_count; 428 V(s_db_per_group) 429 430 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 431 M_EXT2MNT, M_WAITOK); 432 433 /* adjust logic_sb_block */ 434 if(fs->s_blocksize > SBSIZE) 435 /* Godmar thinks: if the blocksize is greater than 1024, then 436 the superblock is logically part of block zero. 437 */ 438 logic_sb_block = 0; 439 440 for (i = 0; i < db_count; i++) { 441 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 442 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 443 if(error) { 444 for (j = 0; j < i; j++) 445 brelse(fs->s_group_desc[j]); 446 bsd_free(fs->s_group_desc, M_EXT2MNT); 447 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 448 return EIO; 449 } 450 LCK_BUF(fs->s_group_desc[i]) 451 } 452 if(!ext2_check_descriptors(fs)) { 453 for (j = 0; j < db_count; j++) 454 ULCK_BUF(fs->s_group_desc[j]) 455 bsd_free(fs->s_group_desc, M_EXT2MNT); 456 printf("EXT2-fs: (ext2_check_descriptors failure) " 457 "unable to read group descriptors\n"); 458 return EIO; 459 } 460 461 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 462 fs->s_inode_bitmap_number[i] = 0; 463 fs->s_inode_bitmap[i] = NULL; 464 fs->s_block_bitmap_number[i] = 0; 465 fs->s_block_bitmap[i] = NULL; 466 } 467 fs->s_loaded_inode_bitmaps = 0; 468 fs->s_loaded_block_bitmaps = 0; 469 if (es->s_rev_level == EXT2_GOOD_OLD_REV || (es->s_feature_ro_compat & 470 EXT2_FEATURE_RO_COMPAT_LARGE_FILE) == 0) 471 fs->fs_maxfilesize = 0x7fffffff; 472 else 473 fs->fs_maxfilesize = 0x7fffffffffffffff; 474 return 0; 475} 476 477/* 478 * Reload all incore data for a filesystem (used after running fsck on 479 * the root filesystem and finding things to fix). The filesystem must 480 * be mounted read-only. 481 * 482 * Things to do to update the mount: 483 * 1) invalidate all cached meta-data. 484 * 2) re-read superblock from disk. 485 * 3) re-read summary information from disk. 486 * 4) invalidate all inactive vnodes. 487 * 5) invalidate all cached file data. 488 * 6) re-read inode data for all active vnodes. 489 */ 490static int 491ext2_reload(mp, cred, td) 492 struct mount *mp; 493 struct ucred *cred; 494 struct thread *td; 495{ 496 struct vnode *vp, *nvp, *devvp; 497 struct inode *ip; 498 struct buf *bp; 499 struct ext2_super_block * es; 500 struct ext2_sb_info *fs; 501 int error; 502 503 if ((mp->mnt_flag & MNT_RDONLY) == 0) 504 return (EINVAL); 505 /* 506 * Step 1: invalidate all cached meta-data. 507 */ 508 devvp = VFSTOEXT2(mp)->um_devvp; 509 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 510 if (vinvalbuf(devvp, 0, cred, td, 0, 0) != 0) 511 panic("ext2_reload: dirty1"); 512 VOP_UNLOCK(devvp, 0, td); 513 514 /* 515 * Step 2: re-read superblock from disk. 516 * constants have been adjusted for ext2 517 */ 518 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 519 return (error); 520 es = (struct ext2_super_block *)bp->b_data; 521 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 522 brelse(bp); 523 return (EIO); /* XXX needs translation */ 524 } 525 fs = VFSTOEXT2(mp)->um_e2fs; 526 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 527 528 if((error = compute_sb_data(devvp, es, fs)) != 0) { 529 brelse(bp); 530 return error; 531 } 532#ifdef UNKLAR 533 if (fs->fs_sbsize < SBSIZE) 534 bp->b_flags |= B_INVAL; 535#endif 536 brelse(bp); 537 538loop: 539 MNT_ILOCK(mp); 540 MNT_VNODE_FOREACH(vp, mp, nvp) { 541 VI_LOCK(vp); 542 if (vp->v_iflag & VI_XLOCK) { 543 VI_UNLOCK(vp); 544 continue; 545 } 546 MNT_IUNLOCK(mp); 547 /* 548 * Step 4: invalidate all inactive vnodes. 549 */ 550 if (vp->v_usecount == 0) { 551 vgonel(vp, td); 552 goto loop; 553 } 554 /* 555 * Step 5: invalidate all cached file data. 556 */ 557 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 558 goto loop; 559 } 560 if (vinvalbuf(vp, 0, cred, td, 0, 0)) 561 panic("ext2_reload: dirty2"); 562 /* 563 * Step 6: re-read inode data for all active vnodes. 564 */ 565 ip = VTOI(vp); 566 error = 567 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 568 (int)fs->s_blocksize, NOCRED, &bp); 569 if (error) { 570 VOP_UNLOCK(vp, 0, td); 571 vrele(vp); 572 return (error); 573 } 574 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + 575 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip); 576 brelse(bp); 577 VOP_UNLOCK(vp, 0, td); 578 vrele(vp); 579 MNT_ILOCK(mp); 580 } 581 MNT_IUNLOCK(mp); 582 return (0); 583} 584 585/* 586 * Common code for mount and mountroot 587 */ 588static int 589ext2_mountfs(devvp, mp, td) 590 struct vnode *devvp; 591 struct mount *mp; 592 struct thread *td; 593{ 594 struct ext2mount *ump; 595 struct buf *bp; 596 struct ext2_sb_info *fs; 597 struct ext2_super_block * es; 598 struct cdev *dev = devvp->v_rdev; 599 struct g_consumer *cp; 600 struct bufobj *bo; 601 int error; 602 int ronly; 603 604 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 605 /* XXX: use VOP_ACESS to check FS perms */ 606 DROP_GIANT(); 607 g_topology_lock(); 608 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 609 g_topology_unlock(); 610 PICKUP_GIANT(); 611 VOP_UNLOCK(devvp, 0, td); 612 if (error) 613 return (error); 614 bo = &devvp->v_bufobj; 615 bo->bo_private = cp; 616 bo->bo_ops = g_vfs_bufops; 617 if (devvp->v_rdev->si_iosize_max != 0) 618 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 619 if (mp->mnt_iosize_max > MAXPHYS) 620 mp->mnt_iosize_max = MAXPHYS; 621 622 bp = NULL; 623 ump = NULL; 624 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 625 goto out; 626 es = (struct ext2_super_block *)bp->b_data; 627 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 628 error = EINVAL; /* XXX needs translation */ 629 goto out; 630 } 631 if ((es->s_state & EXT2_VALID_FS) == 0 || 632 (es->s_state & EXT2_ERROR_FS)) { 633 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 634 printf( 635"WARNING: Filesystem was not properly dismounted\n"); 636 } else { 637 printf( 638"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 639 error = EPERM; 640 goto out; 641 } 642 } 643 ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); 644 bzero((caddr_t)ump, sizeof *ump); 645 /* I don't know whether this is the right strategy. Note that 646 we dynamically allocate both an ext2_sb_info and an ext2_super_block 647 while Linux keeps the super block in a locked buffer 648 */ 649 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 650 M_EXT2MNT, M_WAITOK); 651 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 652 M_EXT2MNT, M_WAITOK); 653 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 654 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 655 goto out; 656 /* 657 * We don't free the group descriptors allocated by compute_sb_data() 658 * until ext2_unmount(). This is OK since the mount will succeed. 659 */ 660 brelse(bp); 661 bp = NULL; 662 fs = ump->um_e2fs; 663 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 664 /* if the fs is not mounted read-only, make sure the super block is 665 always written back on a sync() 666 */ 667 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 668 if (ronly == 0) { 669 fs->s_dirt = 1; /* mark it modified */ 670 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 671 } 672 mp->mnt_data = (qaddr_t)ump; 673 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 674 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 675 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 676 mp->mnt_flag |= MNT_LOCAL; 677 ump->um_mountp = mp; 678 ump->um_dev = dev; 679 ump->um_devvp = devvp; 680 ump->um_bo = &devvp->v_bufobj; 681 ump->um_cp = cp; 682 /* setting those two parameters allowed us to use 683 ufs_bmap w/o changse ! 684 */ 685 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 686 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 687 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 688 if (ronly == 0) 689 ext2_sbupdate(ump, MNT_WAIT); 690 return (0); 691out: 692 if (bp) 693 brelse(bp); 694 if (cp != NULL) { 695 DROP_GIANT(); 696 g_topology_lock(); 697 g_wither_geom_close(cp->geom, ENXIO); 698 g_topology_unlock(); 699 PICKUP_GIANT(); 700 } 701 if (ump) { 702 bsd_free(ump->um_e2fs->s_es, M_EXT2MNT); 703 bsd_free(ump->um_e2fs, M_EXT2MNT); 704 bsd_free(ump, M_EXT2MNT); 705 mp->mnt_data = (qaddr_t)0; 706 } 707 return (error); 708} 709 710/* 711 * unmount system call 712 */ 713static int 714ext2_unmount(mp, mntflags, td) 715 struct mount *mp; 716 int mntflags; 717 struct thread *td; 718{ 719 struct ext2mount *ump; 720 struct ext2_sb_info *fs; 721 int error, flags, ronly, i; 722 723 flags = 0; 724 if (mntflags & MNT_FORCE) { 725 if (mp->mnt_flag & MNT_ROOTFS) 726 return (EINVAL); 727 flags |= FORCECLOSE; 728 } 729 if ((error = ext2_flushfiles(mp, flags, td)) != 0) 730 return (error); 731 ump = VFSTOEXT2(mp); 732 fs = ump->um_e2fs; 733 ronly = fs->s_rd_only; 734 if (ronly == 0) { 735 if (fs->s_wasvalid) 736 fs->s_es->s_state |= EXT2_VALID_FS; 737 ext2_sbupdate(ump, MNT_WAIT); 738 } 739 740 /* release buffers containing group descriptors */ 741 for(i = 0; i < fs->s_db_per_group; i++) 742 ULCK_BUF(fs->s_group_desc[i]) 743 bsd_free(fs->s_group_desc, M_EXT2MNT); 744 745 /* release cached inode/block bitmaps */ 746 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 747 if (fs->s_inode_bitmap[i]) 748 ULCK_BUF(fs->s_inode_bitmap[i]) 749 750 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 751 if (fs->s_block_bitmap[i]) 752 ULCK_BUF(fs->s_block_bitmap[i]) 753 754 DROP_GIANT(); 755 g_topology_lock(); 756 g_wither_geom_close(ump->um_cp->geom, ENXIO); 757 g_topology_unlock(); 758 PICKUP_GIANT(); 759 vrele(ump->um_devvp); 760 bsd_free(fs->s_es, M_EXT2MNT); 761 bsd_free(fs, M_EXT2MNT); 762 bsd_free(ump, M_EXT2MNT); 763 mp->mnt_data = (qaddr_t)0; 764 mp->mnt_flag &= ~MNT_LOCAL; 765 return (error); 766} 767 768/* 769 * Flush out all the files in a filesystem. 770 */ 771static int 772ext2_flushfiles(mp, flags, td) 773 struct mount *mp; 774 int flags; 775 struct thread *td; 776{ 777 int error; 778 779 error = vflush(mp, 0, flags, td); 780 return (error); 781} 782 783/* 784 * Get file system statistics. 785 * taken from ext2/super.c ext2_statfs 786 */ 787static int 788ext2_statfs(mp, sbp, td) 789 struct mount *mp; 790 struct statfs *sbp; 791 struct thread *td; 792{ 793 unsigned long overhead; 794 struct ext2mount *ump; 795 struct ext2_sb_info *fs; 796 struct ext2_super_block *es; 797 int i, nsb; 798 799 ump = VFSTOEXT2(mp); 800 fs = ump->um_e2fs; 801 es = fs->s_es; 802 803 if (es->s_magic != EXT2_SUPER_MAGIC) 804 panic("ext2_statfs - magic number spoiled"); 805 806 /* 807 * Compute the overhead (FS structures) 808 */ 809 if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { 810 nsb = 0; 811 for (i = 0 ; i < fs->s_groups_count; i++) 812 if (ext2_group_sparse(i)) 813 nsb++; 814 } else 815 nsb = fs->s_groups_count; 816 overhead = es->s_first_data_block + 817 /* Superblocks and block group descriptors: */ 818 nsb * (1 + fs->s_db_per_group) + 819 /* Inode bitmap, block bitmap, and inode table: */ 820 fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); 821 822 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 823 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 824 sbp->f_blocks = es->s_blocks_count - overhead; 825 sbp->f_bfree = es->s_free_blocks_count; 826 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 827 sbp->f_files = es->s_inodes_count; 828 sbp->f_ffree = es->s_free_inodes_count; 829 return (0); 830} 831 832/* 833 * Go through the disk queues to initiate sandbagged IO; 834 * go through the inodes to write those that have been modified; 835 * initiate the writing of the super block if it has been modified. 836 * 837 * Note: we are always called with the filesystem marked `MPBUSY'. 838 */ 839static int 840ext2_sync(mp, waitfor, cred, td) 841 struct mount *mp; 842 int waitfor; 843 struct ucred *cred; 844 struct thread *td; 845{ 846 struct vnode *nvp, *vp; 847 struct inode *ip; 848 struct ext2mount *ump = VFSTOEXT2(mp); 849 struct ext2_sb_info *fs; 850 int error, allerror = 0; 851 852 fs = ump->um_e2fs; 853 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 854 printf("fs = %s\n", fs->fs_fsmnt); 855 panic("ext2_sync: rofs mod"); 856 } 857 /* 858 * Write back each (modified) inode. 859 */ 860 MNT_ILOCK(mp); 861loop: 862 MNT_VNODE_FOREACH(vp, mp, nvp) { 863 VI_LOCK(vp); 864 if (vp->v_type == VNON || (vp->v_iflag & VI_XLOCK)) { 865 VI_UNLOCK(vp); 866 continue; 867 } 868 MNT_IUNLOCK(mp); 869 ip = VTOI(vp); 870 if ((ip->i_flag & 871 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 872 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 873 waitfor == MNT_LAZY)) { 874 VI_UNLOCK(vp); 875 MNT_ILOCK(mp); 876 continue; 877 } 878 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 879 if (error) { 880 MNT_ILOCK(mp); 881 if (error == ENOENT) 882 goto loop; 883 continue; 884 } 885 if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0) 886 allerror = error; 887 VOP_UNLOCK(vp, 0, td); 888 vrele(vp); 889 MNT_ILOCK(mp); 890 } 891 MNT_IUNLOCK(mp); 892 /* 893 * Force stale file system control information to be flushed. 894 */ 895 if (waitfor != MNT_LAZY) { 896 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td); 897 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, td)) != 0) 898 allerror = error; 899 VOP_UNLOCK(ump->um_devvp, 0, td); 900 } 901 /* 902 * Write back modified superblock. 903 */ 904 if (fs->s_dirt != 0) { 905 fs->s_dirt = 0; 906 fs->s_es->s_wtime = time_second; 907 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 908 allerror = error; 909 } 910 return (allerror); 911} 912 913/* 914 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 915 * in from disk. If it is in core, wait for the lock bit to clear, then 916 * return the inode locked. Detection and handling of mount points must be 917 * done by the calling routine. 918 */ 919static int 920ext2_vget(mp, ino, flags, vpp) 921 struct mount *mp; 922 ino_t ino; 923 int flags; 924 struct vnode **vpp; 925{ 926 struct ext2_sb_info *fs; 927 struct inode *ip; 928 struct ext2mount *ump; 929 struct buf *bp; 930 struct vnode *vp; 931 struct cdev *dev; 932 int i, error; 933 int used_blocks; 934 935 ump = VFSTOEXT2(mp); 936 dev = ump->um_dev; 937restart: 938 if ((error = ext2_ihashget(dev, ino, flags, vpp)) != 0) 939 return (error); 940 if (*vpp != NULL) 941 return (0); 942 943 /* 944 * Lock out the creation of new entries in the FFS hash table in 945 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 946 * may occur! 947 */ 948 if (ext2fs_inode_hash_lock) { 949 while (ext2fs_inode_hash_lock) { 950 ext2fs_inode_hash_lock = -1; 951 tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0); 952 } 953 goto restart; 954 } 955 ext2fs_inode_hash_lock = 1; 956 957 /* 958 * If this MALLOC() is performed after the getnewvnode() 959 * it might block, leaving a vnode with a NULL v_data to be 960 * found by ext2_sync() if a sync happens to fire right then, 961 * which will cause a panic because ext2_sync() blindly 962 * dereferences vp->v_data (as well it should). 963 */ 964 MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK); 965 966 /* Allocate a new vnode/inode. */ 967 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 968 if (ext2fs_inode_hash_lock < 0) 969 wakeup(&ext2fs_inode_hash_lock); 970 ext2fs_inode_hash_lock = 0; 971 *vpp = NULL; 972 FREE(ip, M_EXT2NODE); 973 return (error); 974 } 975 bzero((caddr_t)ip, sizeof(struct inode)); 976 vp->v_data = ip; 977 ip->i_vnode = vp; 978 ip->i_e2fs = fs = ump->um_e2fs; 979 ip->i_dev = dev; 980 ip->i_number = ino; 981 /* 982 * Put it onto its hash chain and lock it so that other requests for 983 * this inode will block if they arrive while we are sleeping waiting 984 * for old data structures to be purged or for the contents of the 985 * disk portion of this inode to be read. 986 */ 987 ext2_ihashins(ip); 988 989 if (ext2fs_inode_hash_lock < 0) 990 wakeup(&ext2fs_inode_hash_lock); 991 ext2fs_inode_hash_lock = 0; 992 993 /* Read in the disk contents for the inode, copy into the inode. */ 994#if 0 995printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 996#endif 997 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 998 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 999 /* 1000 * The inode does not contain anything useful, so it would 1001 * be misleading to leave it on its hash chain. With mode 1002 * still zero, it will be unlinked and returned to the free 1003 * list by vput(). 1004 */ 1005 vput(vp); 1006 brelse(bp); 1007 *vpp = NULL; 1008 return (error); 1009 } 1010 /* convert ext2 inode to dinode */ 1011 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1012 ino_to_fsbo(fs, ino)), ip); 1013 ip->i_block_group = ino_to_cg(fs, ino); 1014 ip->i_next_alloc_block = 0; 1015 ip->i_next_alloc_goal = 0; 1016 ip->i_prealloc_count = 0; 1017 ip->i_prealloc_block = 0; 1018 /* now we want to make sure that block pointers for unused 1019 blocks are zeroed out - ext2_balloc depends on this 1020 although for regular files and directories only 1021 */ 1022 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1023 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1024 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1025 ip->i_db[i] = 0; 1026 } 1027/* 1028 ext2_print_inode(ip); 1029*/ 1030 brelse(bp); 1031 1032 /* 1033 * Initialize the vnode from the inode, check for aliases. 1034 * Note that the underlying vnode may have changed. 1035 */ 1036 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1037 vput(vp); 1038 *vpp = NULL; 1039 return (error); 1040 } 1041 /* 1042 * Finish inode initialization now that aliasing has been resolved. 1043 */ 1044 ip->i_devvp = ump->um_devvp; 1045 VREF(ip->i_devvp); 1046 /* 1047 * Set up a generation number for this inode if it does not 1048 * already have one. This should only happen on old filesystems. 1049 */ 1050 if (ip->i_gen == 0) { 1051 ip->i_gen = random() / 2 + 1; 1052 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1053 ip->i_flag |= IN_MODIFIED; 1054 } 1055 *vpp = vp; 1056 return (0); 1057} 1058 1059/* 1060 * File handle to vnode 1061 * 1062 * Have to be really careful about stale file handles: 1063 * - check that the inode number is valid 1064 * - call ext2_vget() to get the locked inode 1065 * - check for an unallocated inode (i_mode == 0) 1066 * - check that the given client host has export rights and return 1067 * those rights via. exflagsp and credanonp 1068 */ 1069static int 1070ext2_fhtovp(mp, fhp, vpp) 1071 struct mount *mp; 1072 struct fid *fhp; 1073 struct vnode **vpp; 1074{ 1075 struct inode *ip; 1076 struct ufid *ufhp; 1077 struct vnode *nvp; 1078 struct ext2_sb_info *fs; 1079 int error; 1080 1081 ufhp = (struct ufid *)fhp; 1082 fs = VFSTOEXT2(mp)->um_e2fs; 1083 if (ufhp->ufid_ino < ROOTINO || 1084 ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group) 1085 return (ESTALE); 1086 1087 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1088 if (error) { 1089 *vpp = NULLVP; 1090 return (error); 1091 } 1092 ip = VTOI(nvp); 1093 if (ip->i_mode == 0 || 1094 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1095 vput(nvp); 1096 *vpp = NULLVP; 1097 return (ESTALE); 1098 } 1099 *vpp = nvp; 1100 return (0); 1101} 1102 1103/* 1104 * Vnode pointer to File handle 1105 */ 1106/* ARGSUSED */ 1107static int 1108ext2_vptofh(vp, fhp) 1109 struct vnode *vp; 1110 struct fid *fhp; 1111{ 1112 struct inode *ip; 1113 struct ufid *ufhp; 1114 1115 ip = VTOI(vp); 1116 ufhp = (struct ufid *)fhp; 1117 ufhp->ufid_len = sizeof(struct ufid); 1118 ufhp->ufid_ino = ip->i_number; 1119 ufhp->ufid_gen = ip->i_gen; 1120 return (0); 1121} 1122 1123/* 1124 * Write a superblock and associated information back to disk. 1125 */ 1126static int 1127ext2_sbupdate(mp, waitfor) 1128 struct ext2mount *mp; 1129 int waitfor; 1130{ 1131 struct ext2_sb_info *fs = mp->um_e2fs; 1132 struct ext2_super_block *es = fs->s_es; 1133 struct buf *bp; 1134 int error = 0; 1135/* 1136printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1137*/ 1138 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1139 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1140 if (waitfor == MNT_WAIT) 1141 error = bwrite(bp); 1142 else 1143 bawrite(bp); 1144 1145 /* 1146 * The buffers for group descriptors, inode bitmaps and block bitmaps 1147 * are not busy at this point and are (hopefully) written by the 1148 * usual sync mechanism. No need to write them here 1149 */ 1150 1151 return (error); 1152} 1153 1154/* 1155 * Return the root of a filesystem. 1156 */ 1157static int 1158ext2_root(mp, vpp, td) 1159 struct mount *mp; 1160 struct vnode **vpp; 1161 struct thread *td; 1162{ 1163 struct vnode *nvp; 1164 int error; 1165 1166 error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); 1167 if (error) 1168 return (error); 1169 *vpp = nvp; 1170 return (0); 1171} 1172 1173static int 1174ext2_init(struct vfsconf *vfsp) 1175{ 1176 1177 ext2_ihashinit(); 1178 return (0); 1179} 1180 1181static int 1182ext2_uninit(struct vfsconf *vfsp) 1183{ 1184 1185 ext2_ihashuninit(); 1186 return (0); 1187} 1188