ext2_vfsops.c revision 138368
1/* 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/* 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 36 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 138368 2004-12-04 09:58:20Z phk $ 37 */ 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/namei.h> 42#include <sys/proc.h> 43#include <sys/kernel.h> 44#include <sys/vnode.h> 45#include <sys/mount.h> 46#include <sys/bio.h> 47#include <sys/buf.h> 48#include <sys/conf.h> 49#include <sys/fcntl.h> 50#include <sys/malloc.h> 51#include <sys/stat.h> 52#include <sys/mutex.h> 53 54#include <geom/geom.h> 55#include <geom/geom_vfs.h> 56 57#include <gnu/ext2fs/ext2_mount.h> 58#include <gnu/ext2fs/inode.h> 59 60#include <gnu/ext2fs/fs.h> 61#include <gnu/ext2fs/ext2_extern.h> 62#include <gnu/ext2fs/ext2_fs.h> 63#include <gnu/ext2fs/ext2_fs_sb.h> 64 65static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 66static int ext2_mountfs(struct vnode *, struct mount *, struct thread *); 67static int ext2_reload(struct mount *mp, struct ucred *cred, struct thread *td); 68static int ext2_sbupdate(struct ext2mount *, int); 69 70static vfs_unmount_t ext2_unmount; 71static vfs_root_t ext2_root; 72static vfs_statfs_t ext2_statfs; 73static vfs_sync_t ext2_sync; 74static vfs_vget_t ext2_vget; 75static vfs_fhtovp_t ext2_fhtovp; 76static vfs_vptofh_t ext2_vptofh; 77static vfs_init_t ext2_init; 78static vfs_uninit_t ext2_uninit; 79static vfs_mount_t ext2_mount; 80 81MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part"); 82static MALLOC_DEFINE(M_EXT2MNT, "EXT2 mount", "EXT2 mount structure"); 83 84static struct vfsops ext2fs_vfsops = { 85 .vfs_fhtovp = ext2_fhtovp, 86 .vfs_init = ext2_init, 87 .vfs_mount = ext2_mount, 88 .vfs_root = ext2_root, /* root inode via vget */ 89 .vfs_statfs = ext2_statfs, 90 .vfs_sync = ext2_sync, 91 .vfs_uninit = ext2_uninit, 92 .vfs_unmount = ext2_unmount, 93 .vfs_vget = ext2_vget, 94 .vfs_vptofh = ext2_vptofh, 95}; 96 97VFS_SET(ext2fs_vfsops, ext2fs, 0); 98 99#define bsd_malloc malloc 100#define bsd_free free 101 102static int ext2fs_inode_hash_lock; 103 104static int ext2_check_sb_compat(struct ext2_super_block *es, struct cdev *dev, 105 int ronly); 106static int compute_sb_data(struct vnode * devvp, 107 struct ext2_super_block * es, struct ext2_sb_info * fs); 108 109/* 110 * VFS Operations. 111 * 112 * mount system call 113 */ 114static int 115ext2_mount(mp, td) 116 struct mount *mp; 117 struct thread *td; 118{ 119 struct export_args *export; 120 struct vfsoptlist *opts; 121 struct vnode *devvp; 122 struct ext2mount *ump = 0; 123 struct ext2_sb_info *fs; 124 char *path, *fspec; 125 size_t size; 126 int error, flags, len; 127 mode_t accessmode; 128 struct nameidata nd, *ndp = &nd; 129 130 opts = mp->mnt_optnew; 131 132 vfs_getopt(opts, "fspath", (void **)&path, NULL); 133 /* Double-check the length of path.. */ 134 if (strlen(path) >= MAXMNTLEN - 1) 135 return (ENAMETOOLONG); 136 137 fspec = NULL; 138 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 139 if (!error && fspec[len - 1] != '\0') 140 return (EINVAL); 141 142 /* 143 * If updating, check whether changing from read-only to 144 * read/write; if there is no device name, that's all we do. 145 */ 146 if (mp->mnt_flag & MNT_UPDATE) { 147 ump = VFSTOEXT2(mp); 148 fs = ump->um_e2fs; 149 error = 0; 150 if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { 151 error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td); 152 if (error) 153 return (error); 154 flags = WRITECLOSE; 155 if (mp->mnt_flag & MNT_FORCE) 156 flags |= FORCECLOSE; 157 if (vfs_busy(mp, LK_NOWAIT, 0, td)) 158 return (EBUSY); 159 error = ext2_flushfiles(mp, flags, td); 160 vfs_unbusy(mp, td); 161 if (!error && fs->s_wasvalid) { 162 fs->s_es->s_state |= EXT2_VALID_FS; 163 ext2_sbupdate(ump, MNT_WAIT); 164 } 165 fs->s_rd_only = 1; 166 DROP_GIANT(); 167 g_topology_lock(); 168 g_access(ump->um_cp, 0, -1, 0); 169 g_topology_unlock(); 170 PICKUP_GIANT(); 171 } 172 if (!error && (mp->mnt_flag & MNT_RELOAD)) 173 error = ext2_reload(mp, td->td_ucred, td); 174 if (error) 175 return (error); 176 devvp = ump->um_devvp; 177 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 178 (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0) 179 return (EPERM); 180 if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 181 /* 182 * If upgrade to read-write by non-root, then verify 183 * that user has necessary permissions on the device. 184 */ 185 if (suser(td)) { 186 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 187 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 188 td->td_ucred, td)) != 0) { 189 VOP_UNLOCK(devvp, 0, td); 190 return (error); 191 } 192 VOP_UNLOCK(devvp, 0, td); 193 } 194 DROP_GIANT(); 195 g_topology_lock(); 196 error = g_access(ump->um_cp, 0, 1, 0); 197 g_topology_unlock(); 198 PICKUP_GIANT(); 199 if (error) 200 return (error); 201 202 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 203 (fs->s_es->s_state & EXT2_ERROR_FS)) { 204 if (mp->mnt_flag & MNT_FORCE) { 205 printf( 206"WARNING: %s was not properly dismounted\n", 207 fs->fs_fsmnt); 208 } else { 209 printf( 210"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 211 fs->fs_fsmnt); 212 return (EPERM); 213 } 214 } 215 fs->s_es->s_state &= ~EXT2_VALID_FS; 216 ext2_sbupdate(ump, MNT_WAIT); 217 fs->s_rd_only = 0; 218 } 219 if (fspec == NULL) { 220 error = vfs_getopt(opts, "export", (void **)&export, 221 &len); 222 if (error || len != sizeof(struct export_args)) 223 return (EINVAL); 224 /* Process export requests. */ 225 return (vfs_export(mp, export)); 226 } 227 } 228 /* 229 * Not an update, or updating the name: look up the name 230 * and verify that it refers to a sensible disk device. 231 */ 232 if (fspec == NULL) 233 return (EINVAL); 234 NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td); 235 if ((error = namei(ndp)) != 0) 236 return (error); 237 NDFREE(ndp, NDF_ONLY_PNBUF); 238 devvp = ndp->ni_vp; 239 240 if (!vn_isdisk(devvp, &error)) { 241 vrele(devvp); 242 return (error); 243 } 244 245 /* 246 * If mount by non-root, then verify that user has necessary 247 * permissions on the device. 248 */ 249 if (suser(td)) { 250 accessmode = VREAD; 251 if ((mp->mnt_flag & MNT_RDONLY) == 0) 252 accessmode |= VWRITE; 253 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 254 if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td)) != 0) { 255 vput(devvp); 256 return (error); 257 } 258 VOP_UNLOCK(devvp, 0, td); 259 } 260 261 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 262 error = ext2_mountfs(devvp, mp, td); 263 } else { 264 if (devvp != ump->um_devvp) 265 error = EINVAL; /* needs translation */ 266 else 267 vrele(devvp); 268 } 269 if (error) { 270 vrele(devvp); 271 return (error); 272 } 273 ump = VFSTOEXT2(mp); 274 fs = ump->um_e2fs; 275 /* 276 * Note that this strncpy() is ok because of a check at the start 277 * of ext2_mount(). 278 */ 279 strncpy(fs->fs_fsmnt, path, MAXMNTLEN); 280 fs->fs_fsmnt[MAXMNTLEN - 1] = '\0'; 281 (void)copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 282 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 283 (void)ext2_statfs(mp, &mp->mnt_stat, td); 284 return (0); 285} 286 287/* 288 * checks that the data in the descriptor blocks make sense 289 * this is taken from ext2/super.c 290 */ 291static int ext2_check_descriptors (struct ext2_sb_info * sb) 292{ 293 int i; 294 int desc_block = 0; 295 unsigned long block = sb->s_es->s_first_data_block; 296 struct ext2_group_desc * gdp = NULL; 297 298 /* ext2_debug ("Checking group descriptors"); */ 299 300 for (i = 0; i < sb->s_groups_count; i++) 301 { 302 /* examine next descriptor block */ 303 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 304 gdp = (struct ext2_group_desc *) 305 sb->s_group_desc[desc_block++]->b_data; 306 if (gdp->bg_block_bitmap < block || 307 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 308 { 309 printf ("ext2_check_descriptors: " 310 "Block bitmap for group %d" 311 " not in group (block %lu)!\n", 312 i, (unsigned long) gdp->bg_block_bitmap); 313 return 0; 314 } 315 if (gdp->bg_inode_bitmap < block || 316 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 317 { 318 printf ("ext2_check_descriptors: " 319 "Inode bitmap for group %d" 320 " not in group (block %lu)!\n", 321 i, (unsigned long) gdp->bg_inode_bitmap); 322 return 0; 323 } 324 if (gdp->bg_inode_table < block || 325 gdp->bg_inode_table + sb->s_itb_per_group >= 326 block + EXT2_BLOCKS_PER_GROUP(sb)) 327 { 328 printf ("ext2_check_descriptors: " 329 "Inode table for group %d" 330 " not in group (block %lu)!\n", 331 i, (unsigned long) gdp->bg_inode_table); 332 return 0; 333 } 334 block += EXT2_BLOCKS_PER_GROUP(sb); 335 gdp++; 336 } 337 return 1; 338} 339 340static int 341ext2_check_sb_compat(es, dev, ronly) 342 struct ext2_super_block *es; 343 struct cdev *dev; 344 int ronly; 345{ 346 347 if (es->s_magic != EXT2_SUPER_MAGIC) { 348 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 349 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 350 return (1); 351 } 352 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 353 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 354 printf( 355"WARNING: mount of %s denied due to unsupported optional features\n", 356 devtoname(dev)); 357 return (1); 358 } 359 if (!ronly && 360 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 361 printf( 362"WARNING: R/W mount of %s denied due to unsupported optional features\n", 363 devtoname(dev)); 364 return (1); 365 } 366 } 367 return (0); 368} 369 370/* 371 * this computes the fields of the ext2_sb_info structure from the 372 * data in the ext2_super_block structure read in 373 */ 374static int compute_sb_data(devvp, es, fs) 375 struct vnode * devvp; 376 struct ext2_super_block * es; 377 struct ext2_sb_info * fs; 378{ 379 int db_count, error; 380 int i, j; 381 int logic_sb_block = 1; /* XXX for now */ 382 383#if 1 384#define V(v) 385#else 386#define V(v) printf(#v"= %d\n", fs->v); 387#endif 388 389 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 390 V(s_blocksize) 391 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 392 V(s_bshift) 393 fs->s_fsbtodb = es->s_log_block_size + 1; 394 V(s_fsbtodb) 395 fs->s_qbmask = fs->s_blocksize - 1; 396 V(s_bmask) 397 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 398 V(s_blocksize_bits) 399 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 400 V(s_frag_size) 401 if (fs->s_frag_size) 402 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 403 V(s_frags_per_block) 404 fs->s_blocks_per_group = es->s_blocks_per_group; 405 V(s_blocks_per_group) 406 fs->s_frags_per_group = es->s_frags_per_group; 407 V(s_frags_per_group) 408 fs->s_inodes_per_group = es->s_inodes_per_group; 409 V(s_inodes_per_group) 410 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 411 V(s_inodes_per_block) 412 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 413 V(s_itb_per_group) 414 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 415 V(s_desc_per_block) 416 /* s_resuid / s_resgid ? */ 417 fs->s_groups_count = (es->s_blocks_count - 418 es->s_first_data_block + 419 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 420 EXT2_BLOCKS_PER_GROUP(fs); 421 V(s_groups_count) 422 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 423 EXT2_DESC_PER_BLOCK(fs); 424 fs->s_db_per_group = db_count; 425 V(s_db_per_group) 426 427 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 428 M_EXT2MNT, M_WAITOK); 429 430 /* adjust logic_sb_block */ 431 if(fs->s_blocksize > SBSIZE) 432 /* Godmar thinks: if the blocksize is greater than 1024, then 433 the superblock is logically part of block zero. 434 */ 435 logic_sb_block = 0; 436 437 for (i = 0; i < db_count; i++) { 438 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 439 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 440 if(error) { 441 for (j = 0; j < i; j++) 442 brelse(fs->s_group_desc[j]); 443 bsd_free(fs->s_group_desc, M_EXT2MNT); 444 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 445 return EIO; 446 } 447 LCK_BUF(fs->s_group_desc[i]) 448 } 449 if(!ext2_check_descriptors(fs)) { 450 for (j = 0; j < db_count; j++) 451 ULCK_BUF(fs->s_group_desc[j]) 452 bsd_free(fs->s_group_desc, M_EXT2MNT); 453 printf("EXT2-fs: (ext2_check_descriptors failure) " 454 "unable to read group descriptors\n"); 455 return EIO; 456 } 457 458 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 459 fs->s_inode_bitmap_number[i] = 0; 460 fs->s_inode_bitmap[i] = NULL; 461 fs->s_block_bitmap_number[i] = 0; 462 fs->s_block_bitmap[i] = NULL; 463 } 464 fs->s_loaded_inode_bitmaps = 0; 465 fs->s_loaded_block_bitmaps = 0; 466 if (es->s_rev_level == EXT2_GOOD_OLD_REV || (es->s_feature_ro_compat & 467 EXT2_FEATURE_RO_COMPAT_LARGE_FILE) == 0) 468 fs->fs_maxfilesize = 0x7fffffff; 469 else 470 fs->fs_maxfilesize = 0x7fffffffffffffff; 471 return 0; 472} 473 474/* 475 * Reload all incore data for a filesystem (used after running fsck on 476 * the root filesystem and finding things to fix). The filesystem must 477 * be mounted read-only. 478 * 479 * Things to do to update the mount: 480 * 1) invalidate all cached meta-data. 481 * 2) re-read superblock from disk. 482 * 3) re-read summary information from disk. 483 * 4) invalidate all inactive vnodes. 484 * 5) invalidate all cached file data. 485 * 6) re-read inode data for all active vnodes. 486 */ 487static int 488ext2_reload(mp, cred, td) 489 struct mount *mp; 490 struct ucred *cred; 491 struct thread *td; 492{ 493 struct vnode *vp, *nvp, *devvp; 494 struct inode *ip; 495 struct buf *bp; 496 struct ext2_super_block * es; 497 struct ext2_sb_info *fs; 498 int error; 499 500 if ((mp->mnt_flag & MNT_RDONLY) == 0) 501 return (EINVAL); 502 /* 503 * Step 1: invalidate all cached meta-data. 504 */ 505 devvp = VFSTOEXT2(mp)->um_devvp; 506 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 507 if (vinvalbuf(devvp, 0, cred, td, 0, 0) != 0) 508 panic("ext2_reload: dirty1"); 509 VOP_UNLOCK(devvp, 0, td); 510 511 /* 512 * Step 2: re-read superblock from disk. 513 * constants have been adjusted for ext2 514 */ 515 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 516 return (error); 517 es = (struct ext2_super_block *)bp->b_data; 518 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 519 brelse(bp); 520 return (EIO); /* XXX needs translation */ 521 } 522 fs = VFSTOEXT2(mp)->um_e2fs; 523 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 524 525 if((error = compute_sb_data(devvp, es, fs)) != 0) { 526 brelse(bp); 527 return error; 528 } 529#ifdef UNKLAR 530 if (fs->fs_sbsize < SBSIZE) 531 bp->b_flags |= B_INVAL; 532#endif 533 brelse(bp); 534 535loop: 536 MNT_ILOCK(mp); 537 MNT_VNODE_FOREACH(vp, mp, nvp) { 538 VI_LOCK(vp); 539 if (vp->v_iflag & VI_XLOCK) { 540 VI_UNLOCK(vp); 541 continue; 542 } 543 MNT_IUNLOCK(mp); 544 /* 545 * Step 4: invalidate all inactive vnodes. 546 */ 547 if (vp->v_usecount == 0) { 548 vgonel(vp, td); 549 goto loop; 550 } 551 /* 552 * Step 5: invalidate all cached file data. 553 */ 554 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 555 goto loop; 556 } 557 if (vinvalbuf(vp, 0, cred, td, 0, 0)) 558 panic("ext2_reload: dirty2"); 559 /* 560 * Step 6: re-read inode data for all active vnodes. 561 */ 562 ip = VTOI(vp); 563 error = 564 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 565 (int)fs->s_blocksize, NOCRED, &bp); 566 if (error) { 567 VOP_UNLOCK(vp, 0, td); 568 vrele(vp); 569 return (error); 570 } 571 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + 572 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip); 573 brelse(bp); 574 VOP_UNLOCK(vp, 0, td); 575 vrele(vp); 576 MNT_ILOCK(mp); 577 } 578 MNT_IUNLOCK(mp); 579 return (0); 580} 581 582/* 583 * Common code for mount and mountroot 584 */ 585static int 586ext2_mountfs(devvp, mp, td) 587 struct vnode *devvp; 588 struct mount *mp; 589 struct thread *td; 590{ 591 struct ext2mount *ump; 592 struct buf *bp; 593 struct ext2_sb_info *fs; 594 struct ext2_super_block * es; 595 struct cdev *dev = devvp->v_rdev; 596 struct g_consumer *cp; 597 struct bufobj *bo; 598 int error; 599 int ronly; 600 601 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 602 /* XXX: use VOP_ACESS to check FS perms */ 603 DROP_GIANT(); 604 g_topology_lock(); 605 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 606 g_topology_unlock(); 607 PICKUP_GIANT(); 608 VOP_UNLOCK(devvp, 0, td); 609 if (error) 610 return (error); 611 bo = &devvp->v_bufobj; 612 bo->bo_private = cp; 613 bo->bo_ops = g_vfs_bufops; 614 if (devvp->v_rdev->si_iosize_max != 0) 615 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 616 if (mp->mnt_iosize_max > MAXPHYS) 617 mp->mnt_iosize_max = MAXPHYS; 618 619 bp = NULL; 620 ump = NULL; 621 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 622 goto out; 623 es = (struct ext2_super_block *)bp->b_data; 624 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 625 error = EINVAL; /* XXX needs translation */ 626 goto out; 627 } 628 if ((es->s_state & EXT2_VALID_FS) == 0 || 629 (es->s_state & EXT2_ERROR_FS)) { 630 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 631 printf( 632"WARNING: Filesystem was not properly dismounted\n"); 633 } else { 634 printf( 635"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 636 error = EPERM; 637 goto out; 638 } 639 } 640 ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); 641 bzero((caddr_t)ump, sizeof *ump); 642 /* I don't know whether this is the right strategy. Note that 643 we dynamically allocate both an ext2_sb_info and an ext2_super_block 644 while Linux keeps the super block in a locked buffer 645 */ 646 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 647 M_EXT2MNT, M_WAITOK); 648 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 649 M_EXT2MNT, M_WAITOK); 650 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 651 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 652 goto out; 653 /* 654 * We don't free the group descriptors allocated by compute_sb_data() 655 * until ext2_unmount(). This is OK since the mount will succeed. 656 */ 657 brelse(bp); 658 bp = NULL; 659 fs = ump->um_e2fs; 660 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 661 /* if the fs is not mounted read-only, make sure the super block is 662 always written back on a sync() 663 */ 664 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 665 if (ronly == 0) { 666 fs->s_dirt = 1; /* mark it modified */ 667 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 668 } 669 mp->mnt_data = (qaddr_t)ump; 670 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 671 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 672 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 673 mp->mnt_flag |= MNT_LOCAL; 674 ump->um_mountp = mp; 675 ump->um_dev = dev; 676 ump->um_devvp = devvp; 677 ump->um_bo = &devvp->v_bufobj; 678 ump->um_cp = cp; 679 /* setting those two parameters allowed us to use 680 ufs_bmap w/o changse ! 681 */ 682 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 683 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 684 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 685 if (ronly == 0) 686 ext2_sbupdate(ump, MNT_WAIT); 687 return (0); 688out: 689 if (bp) 690 brelse(bp); 691 if (cp != NULL) { 692 DROP_GIANT(); 693 g_topology_lock(); 694 g_wither_geom_close(cp->geom, ENXIO); 695 g_topology_unlock(); 696 PICKUP_GIANT(); 697 } 698 if (ump) { 699 bsd_free(ump->um_e2fs->s_es, M_EXT2MNT); 700 bsd_free(ump->um_e2fs, M_EXT2MNT); 701 bsd_free(ump, M_EXT2MNT); 702 mp->mnt_data = (qaddr_t)0; 703 } 704 return (error); 705} 706 707/* 708 * unmount system call 709 */ 710static int 711ext2_unmount(mp, mntflags, td) 712 struct mount *mp; 713 int mntflags; 714 struct thread *td; 715{ 716 struct ext2mount *ump; 717 struct ext2_sb_info *fs; 718 int error, flags, ronly, i; 719 720 flags = 0; 721 if (mntflags & MNT_FORCE) { 722 if (mp->mnt_flag & MNT_ROOTFS) 723 return (EINVAL); 724 flags |= FORCECLOSE; 725 } 726 if ((error = ext2_flushfiles(mp, flags, td)) != 0) 727 return (error); 728 ump = VFSTOEXT2(mp); 729 fs = ump->um_e2fs; 730 ronly = fs->s_rd_only; 731 if (ronly == 0) { 732 if (fs->s_wasvalid) 733 fs->s_es->s_state |= EXT2_VALID_FS; 734 ext2_sbupdate(ump, MNT_WAIT); 735 } 736 737 /* release buffers containing group descriptors */ 738 for(i = 0; i < fs->s_db_per_group; i++) 739 ULCK_BUF(fs->s_group_desc[i]) 740 bsd_free(fs->s_group_desc, M_EXT2MNT); 741 742 /* release cached inode/block bitmaps */ 743 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 744 if (fs->s_inode_bitmap[i]) 745 ULCK_BUF(fs->s_inode_bitmap[i]) 746 747 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 748 if (fs->s_block_bitmap[i]) 749 ULCK_BUF(fs->s_block_bitmap[i]) 750 751 DROP_GIANT(); 752 g_topology_lock(); 753 g_wither_geom_close(ump->um_cp->geom, ENXIO); 754 g_topology_unlock(); 755 PICKUP_GIANT(); 756 vrele(ump->um_devvp); 757 bsd_free(fs->s_es, M_EXT2MNT); 758 bsd_free(fs, M_EXT2MNT); 759 bsd_free(ump, M_EXT2MNT); 760 mp->mnt_data = (qaddr_t)0; 761 mp->mnt_flag &= ~MNT_LOCAL; 762 return (error); 763} 764 765/* 766 * Flush out all the files in a filesystem. 767 */ 768static int 769ext2_flushfiles(mp, flags, td) 770 struct mount *mp; 771 int flags; 772 struct thread *td; 773{ 774 int error; 775 776 error = vflush(mp, 0, flags, td); 777 return (error); 778} 779 780/* 781 * Get file system statistics. 782 * taken from ext2/super.c ext2_statfs 783 */ 784static int 785ext2_statfs(mp, sbp, td) 786 struct mount *mp; 787 struct statfs *sbp; 788 struct thread *td; 789{ 790 unsigned long overhead; 791 struct ext2mount *ump; 792 struct ext2_sb_info *fs; 793 struct ext2_super_block *es; 794 int i, nsb; 795 796 ump = VFSTOEXT2(mp); 797 fs = ump->um_e2fs; 798 es = fs->s_es; 799 800 if (es->s_magic != EXT2_SUPER_MAGIC) 801 panic("ext2_statfs - magic number spoiled"); 802 803 /* 804 * Compute the overhead (FS structures) 805 */ 806 if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { 807 nsb = 0; 808 for (i = 0 ; i < fs->s_groups_count; i++) 809 if (ext2_group_sparse(i)) 810 nsb++; 811 } else 812 nsb = fs->s_groups_count; 813 overhead = es->s_first_data_block + 814 /* Superblocks and block group descriptors: */ 815 nsb * (1 + fs->s_db_per_group) + 816 /* Inode bitmap, block bitmap, and inode table: */ 817 fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); 818 819 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 820 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 821 sbp->f_blocks = es->s_blocks_count - overhead; 822 sbp->f_bfree = es->s_free_blocks_count; 823 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 824 sbp->f_files = es->s_inodes_count; 825 sbp->f_ffree = es->s_free_inodes_count; 826 if (sbp != &mp->mnt_stat) { 827 sbp->f_type = mp->mnt_vfc->vfc_typenum; 828 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 829 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 830 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 831 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 832 } 833 return (0); 834} 835 836/* 837 * Go through the disk queues to initiate sandbagged IO; 838 * go through the inodes to write those that have been modified; 839 * initiate the writing of the super block if it has been modified. 840 * 841 * Note: we are always called with the filesystem marked `MPBUSY'. 842 */ 843static int 844ext2_sync(mp, waitfor, cred, td) 845 struct mount *mp; 846 int waitfor; 847 struct ucred *cred; 848 struct thread *td; 849{ 850 struct vnode *nvp, *vp; 851 struct inode *ip; 852 struct ext2mount *ump = VFSTOEXT2(mp); 853 struct ext2_sb_info *fs; 854 int error, allerror = 0; 855 856 fs = ump->um_e2fs; 857 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 858 printf("fs = %s\n", fs->fs_fsmnt); 859 panic("ext2_sync: rofs mod"); 860 } 861 /* 862 * Write back each (modified) inode. 863 */ 864 MNT_ILOCK(mp); 865loop: 866 MNT_VNODE_FOREACH(vp, mp, nvp) { 867 VI_LOCK(vp); 868 if (vp->v_type == VNON || (vp->v_iflag & VI_XLOCK)) { 869 VI_UNLOCK(vp); 870 continue; 871 } 872 MNT_IUNLOCK(mp); 873 ip = VTOI(vp); 874 if ((ip->i_flag & 875 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 876 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 877 waitfor == MNT_LAZY)) { 878 VI_UNLOCK(vp); 879 MNT_ILOCK(mp); 880 continue; 881 } 882 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 883 if (error) { 884 MNT_ILOCK(mp); 885 if (error == ENOENT) 886 goto loop; 887 continue; 888 } 889 if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0) 890 allerror = error; 891 VOP_UNLOCK(vp, 0, td); 892 vrele(vp); 893 MNT_ILOCK(mp); 894 } 895 MNT_IUNLOCK(mp); 896 /* 897 * Force stale file system control information to be flushed. 898 */ 899 if (waitfor != MNT_LAZY) { 900 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td); 901 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, td)) != 0) 902 allerror = error; 903 VOP_UNLOCK(ump->um_devvp, 0, td); 904 } 905 /* 906 * Write back modified superblock. 907 */ 908 if (fs->s_dirt != 0) { 909 fs->s_dirt = 0; 910 fs->s_es->s_wtime = time_second; 911 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 912 allerror = error; 913 } 914 return (allerror); 915} 916 917/* 918 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 919 * in from disk. If it is in core, wait for the lock bit to clear, then 920 * return the inode locked. Detection and handling of mount points must be 921 * done by the calling routine. 922 */ 923static int 924ext2_vget(mp, ino, flags, vpp) 925 struct mount *mp; 926 ino_t ino; 927 int flags; 928 struct vnode **vpp; 929{ 930 struct ext2_sb_info *fs; 931 struct inode *ip; 932 struct ext2mount *ump; 933 struct buf *bp; 934 struct vnode *vp; 935 struct cdev *dev; 936 int i, error; 937 int used_blocks; 938 939 ump = VFSTOEXT2(mp); 940 dev = ump->um_dev; 941restart: 942 if ((error = ext2_ihashget(dev, ino, flags, vpp)) != 0) 943 return (error); 944 if (*vpp != NULL) 945 return (0); 946 947 /* 948 * Lock out the creation of new entries in the FFS hash table in 949 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 950 * may occur! 951 */ 952 if (ext2fs_inode_hash_lock) { 953 while (ext2fs_inode_hash_lock) { 954 ext2fs_inode_hash_lock = -1; 955 tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0); 956 } 957 goto restart; 958 } 959 ext2fs_inode_hash_lock = 1; 960 961 /* 962 * If this MALLOC() is performed after the getnewvnode() 963 * it might block, leaving a vnode with a NULL v_data to be 964 * found by ext2_sync() if a sync happens to fire right then, 965 * which will cause a panic because ext2_sync() blindly 966 * dereferences vp->v_data (as well it should). 967 */ 968 MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK); 969 970 /* Allocate a new vnode/inode. */ 971 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 972 if (ext2fs_inode_hash_lock < 0) 973 wakeup(&ext2fs_inode_hash_lock); 974 ext2fs_inode_hash_lock = 0; 975 *vpp = NULL; 976 FREE(ip, M_EXT2NODE); 977 return (error); 978 } 979 bzero((caddr_t)ip, sizeof(struct inode)); 980 vp->v_data = ip; 981 ip->i_vnode = vp; 982 ip->i_e2fs = fs = ump->um_e2fs; 983 ip->i_dev = dev; 984 ip->i_number = ino; 985 /* 986 * Put it onto its hash chain and lock it so that other requests for 987 * this inode will block if they arrive while we are sleeping waiting 988 * for old data structures to be purged or for the contents of the 989 * disk portion of this inode to be read. 990 */ 991 ext2_ihashins(ip); 992 993 if (ext2fs_inode_hash_lock < 0) 994 wakeup(&ext2fs_inode_hash_lock); 995 ext2fs_inode_hash_lock = 0; 996 997 /* Read in the disk contents for the inode, copy into the inode. */ 998#if 0 999printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 1000#endif 1001 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1002 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 1003 /* 1004 * The inode does not contain anything useful, so it would 1005 * be misleading to leave it on its hash chain. With mode 1006 * still zero, it will be unlinked and returned to the free 1007 * list by vput(). 1008 */ 1009 vput(vp); 1010 brelse(bp); 1011 *vpp = NULL; 1012 return (error); 1013 } 1014 /* convert ext2 inode to dinode */ 1015 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1016 ino_to_fsbo(fs, ino)), ip); 1017 ip->i_block_group = ino_to_cg(fs, ino); 1018 ip->i_next_alloc_block = 0; 1019 ip->i_next_alloc_goal = 0; 1020 ip->i_prealloc_count = 0; 1021 ip->i_prealloc_block = 0; 1022 /* now we want to make sure that block pointers for unused 1023 blocks are zeroed out - ext2_balloc depends on this 1024 although for regular files and directories only 1025 */ 1026 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1027 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1028 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1029 ip->i_db[i] = 0; 1030 } 1031/* 1032 ext2_print_inode(ip); 1033*/ 1034 brelse(bp); 1035 1036 /* 1037 * Initialize the vnode from the inode, check for aliases. 1038 * Note that the underlying vnode may have changed. 1039 */ 1040 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1041 vput(vp); 1042 *vpp = NULL; 1043 return (error); 1044 } 1045 /* 1046 * Finish inode initialization now that aliasing has been resolved. 1047 */ 1048 ip->i_devvp = ump->um_devvp; 1049 VREF(ip->i_devvp); 1050 /* 1051 * Set up a generation number for this inode if it does not 1052 * already have one. This should only happen on old filesystems. 1053 */ 1054 if (ip->i_gen == 0) { 1055 ip->i_gen = random() / 2 + 1; 1056 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1057 ip->i_flag |= IN_MODIFIED; 1058 } 1059 *vpp = vp; 1060 return (0); 1061} 1062 1063/* 1064 * File handle to vnode 1065 * 1066 * Have to be really careful about stale file handles: 1067 * - check that the inode number is valid 1068 * - call ext2_vget() to get the locked inode 1069 * - check for an unallocated inode (i_mode == 0) 1070 * - check that the given client host has export rights and return 1071 * those rights via. exflagsp and credanonp 1072 */ 1073static int 1074ext2_fhtovp(mp, fhp, vpp) 1075 struct mount *mp; 1076 struct fid *fhp; 1077 struct vnode **vpp; 1078{ 1079 struct inode *ip; 1080 struct ufid *ufhp; 1081 struct vnode *nvp; 1082 struct ext2_sb_info *fs; 1083 int error; 1084 1085 ufhp = (struct ufid *)fhp; 1086 fs = VFSTOEXT2(mp)->um_e2fs; 1087 if (ufhp->ufid_ino < ROOTINO || 1088 ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group) 1089 return (ESTALE); 1090 1091 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1092 if (error) { 1093 *vpp = NULLVP; 1094 return (error); 1095 } 1096 ip = VTOI(nvp); 1097 if (ip->i_mode == 0 || 1098 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1099 vput(nvp); 1100 *vpp = NULLVP; 1101 return (ESTALE); 1102 } 1103 *vpp = nvp; 1104 return (0); 1105} 1106 1107/* 1108 * Vnode pointer to File handle 1109 */ 1110/* ARGSUSED */ 1111static int 1112ext2_vptofh(vp, fhp) 1113 struct vnode *vp; 1114 struct fid *fhp; 1115{ 1116 struct inode *ip; 1117 struct ufid *ufhp; 1118 1119 ip = VTOI(vp); 1120 ufhp = (struct ufid *)fhp; 1121 ufhp->ufid_len = sizeof(struct ufid); 1122 ufhp->ufid_ino = ip->i_number; 1123 ufhp->ufid_gen = ip->i_gen; 1124 return (0); 1125} 1126 1127/* 1128 * Write a superblock and associated information back to disk. 1129 */ 1130static int 1131ext2_sbupdate(mp, waitfor) 1132 struct ext2mount *mp; 1133 int waitfor; 1134{ 1135 struct ext2_sb_info *fs = mp->um_e2fs; 1136 struct ext2_super_block *es = fs->s_es; 1137 struct buf *bp; 1138 int error = 0; 1139/* 1140printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1141*/ 1142 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1143 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1144 if (waitfor == MNT_WAIT) 1145 error = bwrite(bp); 1146 else 1147 bawrite(bp); 1148 1149 /* 1150 * The buffers for group descriptors, inode bitmaps and block bitmaps 1151 * are not busy at this point and are (hopefully) written by the 1152 * usual sync mechanism. No need to write them here 1153 */ 1154 1155 return (error); 1156} 1157 1158/* 1159 * Return the root of a filesystem. 1160 */ 1161static int 1162ext2_root(mp, vpp, td) 1163 struct mount *mp; 1164 struct vnode **vpp; 1165 struct thread *td; 1166{ 1167 struct vnode *nvp; 1168 int error; 1169 1170 error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); 1171 if (error) 1172 return (error); 1173 *vpp = nvp; 1174 return (0); 1175} 1176 1177static int 1178ext2_init(struct vfsconf *vfsp) 1179{ 1180 1181 ext2_ihashinit(); 1182 return (0); 1183} 1184 1185static int 1186ext2_uninit(struct vfsconf *vfsp) 1187{ 1188 1189 ext2_ihashuninit(); 1190 return (0); 1191} 1192