ext2_vfsops.c revision 120783
1/* 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/* 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 40 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 120783 2003-10-05 06:43:03Z jeff $ 41 */ 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/namei.h> 46#include <sys/proc.h> 47#include <sys/kernel.h> 48#include <sys/vnode.h> 49#include <sys/mount.h> 50#include <sys/bio.h> 51#include <sys/buf.h> 52#include <sys/conf.h> 53#include <sys/fcntl.h> 54#include <sys/malloc.h> 55#include <sys/stat.h> 56#include <sys/mutex.h> 57 58#include <gnu/ext2fs/ext2_mount.h> 59#include <gnu/ext2fs/inode.h> 60 61#include <gnu/ext2fs/fs.h> 62#include <gnu/ext2fs/ext2_extern.h> 63#include <gnu/ext2fs/ext2_fs.h> 64#include <gnu/ext2fs/ext2_fs_sb.h> 65 66static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 67static int ext2_mountfs(struct vnode *, struct mount *, struct thread *); 68static int ext2_reload(struct mount *mountp, struct ucred *cred, 69 struct thread *td); 70static int ext2_sbupdate(struct ext2mount *, int); 71 72static vfs_unmount_t ext2_unmount; 73static vfs_root_t ext2_root; 74static vfs_statfs_t ext2_statfs; 75static vfs_sync_t ext2_sync; 76static vfs_vget_t ext2_vget; 77static vfs_fhtovp_t ext2_fhtovp; 78static vfs_vptofh_t ext2_vptofh; 79static vfs_init_t ext2_init; 80static vfs_uninit_t ext2_uninit; 81static vfs_nmount_t ext2_mount; 82 83MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part"); 84static MALLOC_DEFINE(M_EXT2MNT, "EXT2 mount", "EXT2 mount structure"); 85 86static struct vfsops ext2fs_vfsops = { 87 .vfs_fhtovp = ext2_fhtovp, 88 .vfs_init = ext2_init, 89 .vfs_nmount = ext2_mount, 90 .vfs_root = ext2_root, /* root inode via vget */ 91 .vfs_statfs = ext2_statfs, 92 .vfs_sync = ext2_sync, 93 .vfs_uninit = ext2_uninit, 94 .vfs_unmount = ext2_unmount, 95 .vfs_vget = ext2_vget, 96 .vfs_vptofh = ext2_vptofh, 97}; 98 99VFS_SET(ext2fs_vfsops, ext2fs, 0); 100#define bsd_malloc malloc 101#define bsd_free free 102 103static int ext2fs_inode_hash_lock; 104 105static int ext2_check_sb_compat(struct ext2_super_block *es, dev_t dev, 106 int ronly); 107static int compute_sb_data(struct vnode * devvp, 108 struct ext2_super_block * es, struct ext2_sb_info * fs); 109 110#ifdef notyet 111static int ext2_mountroot(void); 112 113/* 114 * Called by main() when ext2fs is going to be mounted as root. 115 * 116 * Name is updated by mount(8) after booting. 117 */ 118#define ROOTNAME "root_device" 119 120static int 121ext2_mountroot() 122{ 123 struct ext2_sb_info *fs; 124 struct mount *mp; 125 struct thread *td = curthread; 126 struct ext2mount *ump; 127 u_int size; 128 int error; 129 130 if ((error = bdevvp(rootdev, &rootvp))) { 131 printf("ext2_mountroot: can't find rootvp\n"); 132 return (error); 133 } 134 mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 135 bzero((char *)mp, (u_long)sizeof(struct mount)); 136 TAILQ_INIT(&mp->mnt_nvnodelist); 137 TAILQ_INIT(&mp->mnt_reservedvnlist); 138 mp->mnt_op = &ext2fs_vfsops; 139 mp->mnt_flag = MNT_RDONLY; 140 if (error = ext2_mountfs(rootvp, mp, td)) { 141 bsd_free(mp, M_MOUNT); 142 return (error); 143 } 144 if (error = vfs_lock(mp)) { 145 (void)ext2_unmount(mp, 0, td); 146 bsd_free(mp, M_MOUNT); 147 return (error); 148 } 149 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); 150 mp->mnt_flag |= MNT_ROOTFS; 151 mp->mnt_vnodecovered = NULLVP; 152 ump = VFSTOEXT2(mp); 153 fs = ump->um_e2fs; 154 bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); 155 fs->fs_fsmnt[0] = '/'; 156 bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 157 MNAMELEN); 158 (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 159 &size); 160 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 161 (void)ext2_statfs(mp, &mp->mnt_stat, td); 162 vfs_unlock(mp); 163 inittodr(fs->s_es->s_wtime); /* this helps to set the time */ 164 return (0); 165} 166#endif 167 168/* 169 * VFS Operations. 170 * 171 * mount system call 172 */ 173static int 174ext2_mount(mp, ndp, td) 175 struct mount *mp; 176 struct nameidata *ndp; 177 struct thread *td; 178{ 179 struct export_args *export; 180 struct vfsoptlist *opts; 181 struct vnode *devvp; 182 struct ext2mount *ump = 0; 183 struct ext2_sb_info *fs; 184 char *path, *fspec; 185 size_t size; 186 int error, flags, len; 187 mode_t accessmode; 188 189 opts = mp->mnt_optnew; 190 191 vfs_getopt(opts, "fspath", (void **)&path, NULL); 192 /* Double-check the length of path.. */ 193 if (strlen(path) >= MAXMNTLEN - 1) 194 return (ENAMETOOLONG); 195 196 fspec = NULL; 197 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 198 if (!error && fspec[len - 1] != '\0') 199 return (EINVAL); 200 201 /* 202 * If updating, check whether changing from read-only to 203 * read/write; if there is no device name, that's all we do. 204 */ 205 if (mp->mnt_flag & MNT_UPDATE) { 206 ump = VFSTOEXT2(mp); 207 fs = ump->um_e2fs; 208 error = 0; 209 if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { 210 flags = WRITECLOSE; 211 if (mp->mnt_flag & MNT_FORCE) 212 flags |= FORCECLOSE; 213 if (vfs_busy(mp, LK_NOWAIT, 0, td)) 214 return (EBUSY); 215 error = ext2_flushfiles(mp, flags, td); 216 vfs_unbusy(mp, td); 217 if (!error && fs->s_wasvalid) { 218 fs->s_es->s_state |= EXT2_VALID_FS; 219 ext2_sbupdate(ump, MNT_WAIT); 220 } 221 fs->s_rd_only = 1; 222 } 223 if (!error && (mp->mnt_flag & MNT_RELOAD)) 224 error = ext2_reload(mp, ndp->ni_cnd.cn_cred, td); 225 if (error) 226 return (error); 227 devvp = ump->um_devvp; 228 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 229 (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0) 230 return (EPERM); 231 if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 232 /* 233 * If upgrade to read-write by non-root, then verify 234 * that user has necessary permissions on the device. 235 */ 236 if (suser(td)) { 237 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 238 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 239 td->td_ucred, td)) != 0) { 240 VOP_UNLOCK(devvp, 0, td); 241 return (error); 242 } 243 VOP_UNLOCK(devvp, 0, td); 244 } 245 246 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 247 (fs->s_es->s_state & EXT2_ERROR_FS)) { 248 if (mp->mnt_flag & MNT_FORCE) { 249 printf( 250"WARNING: %s was not properly dismounted\n", 251 fs->fs_fsmnt); 252 } else { 253 printf( 254"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 255 fs->fs_fsmnt); 256 return (EPERM); 257 } 258 } 259 fs->s_es->s_state &= ~EXT2_VALID_FS; 260 ext2_sbupdate(ump, MNT_WAIT); 261 fs->s_rd_only = 0; 262 } 263 if (fspec == NULL) { 264 error = vfs_getopt(opts, "export", (void **)&export, 265 &len); 266 if (error || len != sizeof(struct export_args)) 267 return (EINVAL); 268 /* Process export requests. */ 269 return (vfs_export(mp, export)); 270 } 271 } 272 /* 273 * Not an update, or updating the name: look up the name 274 * and verify that it refers to a sensible block device. 275 */ 276 if (fspec == NULL) 277 return (EINVAL); 278 NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td); 279 if ((error = namei(ndp)) != 0) 280 return (error); 281 NDFREE(ndp, NDF_ONLY_PNBUF); 282 devvp = ndp->ni_vp; 283 284 if (!vn_isdisk(devvp, &error)) { 285 vrele(devvp); 286 return (error); 287 } 288 289 /* 290 * If mount by non-root, then verify that user has necessary 291 * permissions on the device. 292 */ 293 if (suser(td)) { 294 accessmode = VREAD; 295 if ((mp->mnt_flag & MNT_RDONLY) == 0) 296 accessmode |= VWRITE; 297 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 298 if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td)) != 0) { 299 vput(devvp); 300 return (error); 301 } 302 VOP_UNLOCK(devvp, 0, td); 303 } 304 305 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 306 error = ext2_mountfs(devvp, mp, td); 307 } else { 308 if (devvp != ump->um_devvp) 309 error = EINVAL; /* needs translation */ 310 else 311 vrele(devvp); 312 } 313 if (error) { 314 vrele(devvp); 315 return (error); 316 } 317 ump = VFSTOEXT2(mp); 318 fs = ump->um_e2fs; 319 /* 320 * Note that this strncpy() is ok because of a check at the start 321 * of ext2_mount(). 322 */ 323 strncpy(fs->fs_fsmnt, path, MAXMNTLEN); 324 fs->fs_fsmnt[MAXMNTLEN - 1] = '\0'; 325 (void)copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 326 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 327 (void)ext2_statfs(mp, &mp->mnt_stat, td); 328 return (0); 329} 330 331/* 332 * checks that the data in the descriptor blocks make sense 333 * this is taken from ext2/super.c 334 */ 335static int ext2_check_descriptors (struct ext2_sb_info * sb) 336{ 337 int i; 338 int desc_block = 0; 339 unsigned long block = sb->s_es->s_first_data_block; 340 struct ext2_group_desc * gdp = NULL; 341 342 /* ext2_debug ("Checking group descriptors"); */ 343 344 for (i = 0; i < sb->s_groups_count; i++) 345 { 346 /* examine next descriptor block */ 347 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 348 gdp = (struct ext2_group_desc *) 349 sb->s_group_desc[desc_block++]->b_data; 350 if (gdp->bg_block_bitmap < block || 351 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 352 { 353 printf ("ext2_check_descriptors: " 354 "Block bitmap for group %d" 355 " not in group (block %lu)!\n", 356 i, (unsigned long) gdp->bg_block_bitmap); 357 return 0; 358 } 359 if (gdp->bg_inode_bitmap < block || 360 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 361 { 362 printf ("ext2_check_descriptors: " 363 "Inode bitmap for group %d" 364 " not in group (block %lu)!\n", 365 i, (unsigned long) gdp->bg_inode_bitmap); 366 return 0; 367 } 368 if (gdp->bg_inode_table < block || 369 gdp->bg_inode_table + sb->s_itb_per_group >= 370 block + EXT2_BLOCKS_PER_GROUP(sb)) 371 { 372 printf ("ext2_check_descriptors: " 373 "Inode table for group %d" 374 " not in group (block %lu)!\n", 375 i, (unsigned long) gdp->bg_inode_table); 376 return 0; 377 } 378 block += EXT2_BLOCKS_PER_GROUP(sb); 379 gdp++; 380 } 381 return 1; 382} 383 384static int 385ext2_check_sb_compat(es, dev, ronly) 386 struct ext2_super_block *es; 387 dev_t dev; 388 int ronly; 389{ 390 391 if (es->s_magic != EXT2_SUPER_MAGIC) { 392 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 393 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 394 return (1); 395 } 396 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 397 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 398 printf( 399"WARNING: mount of %s denied due to unsupported optional features\n", 400 devtoname(dev)); 401 return (1); 402 } 403 if (!ronly && 404 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 405 printf( 406"WARNING: R/W mount of %s denied due to unsupported optional features\n", 407 devtoname(dev)); 408 return (1); 409 } 410 } 411 return (0); 412} 413 414/* 415 * this computes the fields of the ext2_sb_info structure from the 416 * data in the ext2_super_block structure read in 417 */ 418static int compute_sb_data(devvp, es, fs) 419 struct vnode * devvp; 420 struct ext2_super_block * es; 421 struct ext2_sb_info * fs; 422{ 423 int db_count, error; 424 int i, j; 425 int logic_sb_block = 1; /* XXX for now */ 426 427#if 1 428#define V(v) 429#else 430#define V(v) printf(#v"= %d\n", fs->v); 431#endif 432 433 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 434 V(s_blocksize) 435 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 436 V(s_bshift) 437 fs->s_fsbtodb = es->s_log_block_size + 1; 438 V(s_fsbtodb) 439 fs->s_qbmask = fs->s_blocksize - 1; 440 V(s_bmask) 441 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 442 V(s_blocksize_bits) 443 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 444 V(s_frag_size) 445 if (fs->s_frag_size) 446 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 447 V(s_frags_per_block) 448 fs->s_blocks_per_group = es->s_blocks_per_group; 449 V(s_blocks_per_group) 450 fs->s_frags_per_group = es->s_frags_per_group; 451 V(s_frags_per_group) 452 fs->s_inodes_per_group = es->s_inodes_per_group; 453 V(s_inodes_per_group) 454 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 455 V(s_inodes_per_block) 456 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 457 V(s_itb_per_group) 458 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 459 V(s_desc_per_block) 460 /* s_resuid / s_resgid ? */ 461 fs->s_groups_count = (es->s_blocks_count - 462 es->s_first_data_block + 463 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 464 EXT2_BLOCKS_PER_GROUP(fs); 465 V(s_groups_count) 466 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 467 EXT2_DESC_PER_BLOCK(fs); 468 fs->s_db_per_group = db_count; 469 V(s_db_per_group) 470 471 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 472 M_EXT2MNT, M_WAITOK); 473 474 /* adjust logic_sb_block */ 475 if(fs->s_blocksize > SBSIZE) 476 /* Godmar thinks: if the blocksize is greater than 1024, then 477 the superblock is logically part of block zero. 478 */ 479 logic_sb_block = 0; 480 481 for (i = 0; i < db_count; i++) { 482 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 483 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 484 if(error) { 485 for (j = 0; j < i; j++) 486 brelse(fs->s_group_desc[j]); 487 bsd_free(fs->s_group_desc, M_EXT2MNT); 488 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 489 return EIO; 490 } 491 LCK_BUF(fs->s_group_desc[i]) 492 } 493 if(!ext2_check_descriptors(fs)) { 494 for (j = 0; j < db_count; j++) 495 ULCK_BUF(fs->s_group_desc[j]) 496 bsd_free(fs->s_group_desc, M_EXT2MNT); 497 printf("EXT2-fs: (ext2_check_descriptors failure) " 498 "unable to read group descriptors\n"); 499 return EIO; 500 } 501 502 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 503 fs->s_inode_bitmap_number[i] = 0; 504 fs->s_inode_bitmap[i] = NULL; 505 fs->s_block_bitmap_number[i] = 0; 506 fs->s_block_bitmap[i] = NULL; 507 } 508 fs->s_loaded_inode_bitmaps = 0; 509 fs->s_loaded_block_bitmaps = 0; 510 return 0; 511} 512 513/* 514 * Reload all incore data for a filesystem (used after running fsck on 515 * the root filesystem and finding things to fix). The filesystem must 516 * be mounted read-only. 517 * 518 * Things to do to update the mount: 519 * 1) invalidate all cached meta-data. 520 * 2) re-read superblock from disk. 521 * 3) re-read summary information from disk. 522 * 4) invalidate all inactive vnodes. 523 * 5) invalidate all cached file data. 524 * 6) re-read inode data for all active vnodes. 525 */ 526static int 527ext2_reload(mountp, cred, td) 528 struct mount *mountp; 529 struct ucred *cred; 530 struct thread *td; 531{ 532 struct vnode *vp, *nvp, *devvp; 533 struct inode *ip; 534 struct buf *bp; 535 struct ext2_super_block * es; 536 struct ext2_sb_info *fs; 537 int error; 538 539 if ((mountp->mnt_flag & MNT_RDONLY) == 0) 540 return (EINVAL); 541 /* 542 * Step 1: invalidate all cached meta-data. 543 */ 544 devvp = VFSTOEXT2(mountp)->um_devvp; 545 if (vinvalbuf(devvp, 0, cred, td, 0, 0)) 546 panic("ext2_reload: dirty1"); 547 /* 548 * Step 2: re-read superblock from disk. 549 * constants have been adjusted for ext2 550 */ 551 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 552 return (error); 553 es = (struct ext2_super_block *)bp->b_data; 554 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 555 brelse(bp); 556 return (EIO); /* XXX needs translation */ 557 } 558 fs = VFSTOEXT2(mountp)->um_e2fs; 559 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 560 561 if((error = compute_sb_data(devvp, es, fs)) != 0) { 562 brelse(bp); 563 return error; 564 } 565#ifdef UNKLAR 566 if (fs->fs_sbsize < SBSIZE) 567 bp->b_flags |= B_INVAL; 568#endif 569 brelse(bp); 570 571loop: 572 mtx_lock(&mntvnode_mtx); 573 for (vp = TAILQ_FIRST(&mountp->mnt_nvnodelist); vp != NULL; vp = nvp) { 574 if (vp->v_mount != mountp) { 575 mtx_unlock(&mntvnode_mtx); 576 goto loop; 577 } 578 nvp = TAILQ_NEXT(vp, v_nmntvnodes); 579 VI_LOCK(vp); 580 if (vp->v_iflag & VI_XLOCK) { 581 VI_UNLOCK(vp); 582 continue; 583 } 584 mtx_unlock(&mntvnode_mtx); 585 /* 586 * Step 4: invalidate all inactive vnodes. 587 */ 588 if (vp->v_usecount == 0) { 589 vgonel(vp, td); 590 goto loop; 591 } 592 /* 593 * Step 5: invalidate all cached file data. 594 */ 595 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 596 goto loop; 597 } 598 if (vinvalbuf(vp, 0, cred, td, 0, 0)) 599 panic("ext2_reload: dirty2"); 600 /* 601 * Step 6: re-read inode data for all active vnodes. 602 */ 603 ip = VTOI(vp); 604 error = 605 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 606 (int)fs->s_blocksize, NOCRED, &bp); 607 if (error) { 608 vput(vp); 609 return (error); 610 } 611 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + 612 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip); 613 brelse(bp); 614 vput(vp); 615 mtx_lock(&mntvnode_mtx); 616 } 617 mtx_unlock(&mntvnode_mtx); 618 return (0); 619} 620 621/* 622 * Common code for mount and mountroot 623 */ 624static int 625ext2_mountfs(devvp, mp, td) 626 struct vnode *devvp; 627 struct mount *mp; 628 struct thread *td; 629{ 630 struct ext2mount *ump; 631 struct buf *bp; 632 struct ext2_sb_info *fs; 633 struct ext2_super_block * es; 634 dev_t dev = devvp->v_rdev; 635 int error; 636 int ronly; 637 638 /* 639 * Disallow multiple mounts of the same device. 640 * Disallow mounting of a device that is currently in use 641 * (except for root, which might share swap device for miniroot). 642 * Flush out any old buffers remaining from a previous use. 643 */ 644 if ((error = vfs_mountedon(devvp)) != 0) 645 return (error); 646 if (vcount(devvp) > 1 && devvp != rootvp) 647 return (EBUSY); 648 if ((error = vinvalbuf(devvp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) 649 return (error); 650#ifdef READONLY 651/* turn on this to force it to be read-only */ 652 mp->mnt_flag |= MNT_RDONLY; 653#endif 654 655 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 656 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 657 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td, -1); 658 VOP_UNLOCK(devvp, 0, td); 659 if (error) 660 return (error); 661 if (devvp->v_rdev->si_iosize_max != 0) 662 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 663 if (mp->mnt_iosize_max > MAXPHYS) 664 mp->mnt_iosize_max = MAXPHYS; 665 666 bp = NULL; 667 ump = NULL; 668 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 669 goto out; 670 es = (struct ext2_super_block *)bp->b_data; 671 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 672 error = EINVAL; /* XXX needs translation */ 673 goto out; 674 } 675 if ((es->s_state & EXT2_VALID_FS) == 0 || 676 (es->s_state & EXT2_ERROR_FS)) { 677 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 678 printf( 679"WARNING: Filesystem was not properly dismounted\n"); 680 } else { 681 printf( 682"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 683 error = EPERM; 684 goto out; 685 } 686 } 687 ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); 688 bzero((caddr_t)ump, sizeof *ump); 689 /* I don't know whether this is the right strategy. Note that 690 we dynamically allocate both an ext2_sb_info and an ext2_super_block 691 while Linux keeps the super block in a locked buffer 692 */ 693 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 694 M_EXT2MNT, M_WAITOK); 695 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 696 M_EXT2MNT, M_WAITOK); 697 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 698 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 699 goto out; 700 /* 701 * We don't free the group descriptors allocated by compute_sb_data() 702 * until ext2_unmount(). This is OK since the mount will succeed. 703 */ 704 brelse(bp); 705 bp = NULL; 706 fs = ump->um_e2fs; 707 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 708 /* if the fs is not mounted read-only, make sure the super block is 709 always written back on a sync() 710 */ 711 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 712 if (ronly == 0) { 713 fs->s_dirt = 1; /* mark it modified */ 714 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 715 } 716 mp->mnt_data = (qaddr_t)ump; 717 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 718 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 719 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 720 mp->mnt_flag |= MNT_LOCAL; 721 ump->um_mountp = mp; 722 ump->um_dev = dev; 723 ump->um_devvp = devvp; 724 /* setting those two parameters allowed us to use 725 ufs_bmap w/o changse ! 726 */ 727 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 728 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 729 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 730 devvp->v_rdev->si_mountpoint = mp; 731 if (ronly == 0) 732 ext2_sbupdate(ump, MNT_WAIT); 733 return (0); 734out: 735 if (bp) 736 brelse(bp); 737 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, td); 738 if (ump) { 739 bsd_free(ump->um_e2fs->s_es, M_EXT2MNT); 740 bsd_free(ump->um_e2fs, M_EXT2MNT); 741 bsd_free(ump, M_EXT2MNT); 742 mp->mnt_data = (qaddr_t)0; 743 } 744 return (error); 745} 746 747/* 748 * unmount system call 749 */ 750static int 751ext2_unmount(mp, mntflags, td) 752 struct mount *mp; 753 int mntflags; 754 struct thread *td; 755{ 756 struct ext2mount *ump; 757 struct ext2_sb_info *fs; 758 int error, flags, ronly, i; 759 760 flags = 0; 761 if (mntflags & MNT_FORCE) { 762 if (mp->mnt_flag & MNT_ROOTFS) 763 return (EINVAL); 764 flags |= FORCECLOSE; 765 } 766 if ((error = ext2_flushfiles(mp, flags, td)) != 0) 767 return (error); 768 ump = VFSTOEXT2(mp); 769 fs = ump->um_e2fs; 770 ronly = fs->s_rd_only; 771 if (ronly == 0) { 772 if (fs->s_wasvalid) 773 fs->s_es->s_state |= EXT2_VALID_FS; 774 ext2_sbupdate(ump, MNT_WAIT); 775 } 776 777 /* release buffers containing group descriptors */ 778 for(i = 0; i < fs->s_db_per_group; i++) 779 ULCK_BUF(fs->s_group_desc[i]) 780 bsd_free(fs->s_group_desc, M_EXT2MNT); 781 782 /* release cached inode/block bitmaps */ 783 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 784 if (fs->s_inode_bitmap[i]) 785 ULCK_BUF(fs->s_inode_bitmap[i]) 786 787 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 788 if (fs->s_block_bitmap[i]) 789 ULCK_BUF(fs->s_block_bitmap[i]) 790 791 ump->um_devvp->v_rdev->si_mountpoint = NULL; 792 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, 793 NOCRED, td); 794 vrele(ump->um_devvp); 795 bsd_free(fs->s_es, M_EXT2MNT); 796 bsd_free(fs, M_EXT2MNT); 797 bsd_free(ump, M_EXT2MNT); 798 mp->mnt_data = (qaddr_t)0; 799 mp->mnt_flag &= ~MNT_LOCAL; 800 return (error); 801} 802 803/* 804 * Flush out all the files in a filesystem. 805 */ 806static int 807ext2_flushfiles(mp, flags, td) 808 struct mount *mp; 809 int flags; 810 struct thread *td; 811{ 812 int error; 813 814 error = vflush(mp, 0, flags); 815 return (error); 816} 817 818/* 819 * Get file system statistics. 820 * taken from ext2/super.c ext2_statfs 821 */ 822static int 823ext2_statfs(mp, sbp, td) 824 struct mount *mp; 825 struct statfs *sbp; 826 struct thread *td; 827{ 828 unsigned long overhead; 829 struct ext2mount *ump; 830 struct ext2_sb_info *fs; 831 struct ext2_super_block *es; 832 int i, nsb; 833 834 ump = VFSTOEXT2(mp); 835 fs = ump->um_e2fs; 836 es = fs->s_es; 837 838 if (es->s_magic != EXT2_SUPER_MAGIC) 839 panic("ext2_statfs - magic number spoiled"); 840 841 /* 842 * Compute the overhead (FS structures) 843 */ 844 if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { 845 nsb = 0; 846 for (i = 0 ; i < fs->s_groups_count; i++) 847 if (ext2_group_sparse(i)) 848 nsb++; 849 } else 850 nsb = fs->s_groups_count; 851 overhead = es->s_first_data_block + 852 /* Superblocks and block group descriptors: */ 853 nsb * (1 + fs->s_db_per_group) + 854 /* Inode bitmap, block bitmap, and inode table: */ 855 fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); 856 857 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 858 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 859 sbp->f_blocks = es->s_blocks_count - overhead; 860 sbp->f_bfree = es->s_free_blocks_count; 861 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 862 sbp->f_files = es->s_inodes_count; 863 sbp->f_ffree = es->s_free_inodes_count; 864 if (sbp != &mp->mnt_stat) { 865 sbp->f_type = mp->mnt_vfc->vfc_typenum; 866 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 867 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 868 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 869 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 870 } 871 return (0); 872} 873 874/* 875 * Go through the disk queues to initiate sandbagged IO; 876 * go through the inodes to write those that have been modified; 877 * initiate the writing of the super block if it has been modified. 878 * 879 * Note: we are always called with the filesystem marked `MPBUSY'. 880 */ 881static int 882ext2_sync(mp, waitfor, cred, td) 883 struct mount *mp; 884 int waitfor; 885 struct ucred *cred; 886 struct thread *td; 887{ 888 struct vnode *nvp, *vp; 889 struct inode *ip; 890 struct ext2mount *ump = VFSTOEXT2(mp); 891 struct ext2_sb_info *fs; 892 int error, allerror = 0; 893 894 fs = ump->um_e2fs; 895 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 896 printf("fs = %s\n", fs->fs_fsmnt); 897 panic("ext2_sync: rofs mod"); 898 } 899 /* 900 * Write back each (modified) inode. 901 */ 902 mtx_lock(&mntvnode_mtx); 903loop: 904 for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { 905 /* 906 * If the vnode that we are about to sync is no longer 907 * associated with this mount point, start over. 908 */ 909 if (vp->v_mount != mp) 910 goto loop; 911 nvp = TAILQ_NEXT(vp, v_nmntvnodes); 912 VI_LOCK(vp); 913 if (vp->v_iflag & VI_XLOCK) { 914 VI_UNLOCK(vp); 915 continue; 916 } 917 mtx_unlock(&mntvnode_mtx); 918 ip = VTOI(vp); 919 if (vp->v_type == VNON || 920 ((ip->i_flag & 921 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 922 (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) { 923 VI_UNLOCK(vp); 924 mtx_lock(&mntvnode_mtx); 925 continue; 926 } 927 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 928 if (error) { 929 mtx_lock(&mntvnode_mtx); 930 if (error == ENOENT) 931 goto loop; 932 continue; 933 } 934 if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0) 935 allerror = error; 936 VOP_UNLOCK(vp, 0, td); 937 vrele(vp); 938 mtx_lock(&mntvnode_mtx); 939 } 940 mtx_unlock(&mntvnode_mtx); 941 /* 942 * Force stale file system control information to be flushed. 943 */ 944 if (waitfor != MNT_LAZY) { 945 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td); 946 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, td)) != 0) 947 allerror = error; 948 VOP_UNLOCK(ump->um_devvp, 0, td); 949 } 950 /* 951 * Write back modified superblock. 952 */ 953 if (fs->s_dirt != 0) { 954 fs->s_dirt = 0; 955 fs->s_es->s_wtime = time_second; 956 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 957 allerror = error; 958 } 959 return (allerror); 960} 961 962/* 963 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 964 * in from disk. If it is in core, wait for the lock bit to clear, then 965 * return the inode locked. Detection and handling of mount points must be 966 * done by the calling routine. 967 */ 968static int 969ext2_vget(mp, ino, flags, vpp) 970 struct mount *mp; 971 ino_t ino; 972 int flags; 973 struct vnode **vpp; 974{ 975 struct ext2_sb_info *fs; 976 struct inode *ip; 977 struct ext2mount *ump; 978 struct buf *bp; 979 struct vnode *vp; 980 dev_t dev; 981 int i, error; 982 int used_blocks; 983 984 ump = VFSTOEXT2(mp); 985 dev = ump->um_dev; 986restart: 987 if ((error = ext2_ihashget(dev, ino, flags, vpp)) != 0) 988 return (error); 989 if (*vpp != NULL) 990 return (0); 991 992 /* 993 * Lock out the creation of new entries in the FFS hash table in 994 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 995 * may occur! 996 */ 997 if (ext2fs_inode_hash_lock) { 998 while (ext2fs_inode_hash_lock) { 999 ext2fs_inode_hash_lock = -1; 1000 tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0); 1001 } 1002 goto restart; 1003 } 1004 ext2fs_inode_hash_lock = 1; 1005 1006 /* 1007 * If this MALLOC() is performed after the getnewvnode() 1008 * it might block, leaving a vnode with a NULL v_data to be 1009 * found by ext2_sync() if a sync happens to fire right then, 1010 * which will cause a panic because ext2_sync() blindly 1011 * dereferences vp->v_data (as well it should). 1012 */ 1013 MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK); 1014 1015 /* Allocate a new vnode/inode. */ 1016 if ((error = getnewvnode("ext2fs", mp, ext2_vnodeop_p, &vp)) != 0) { 1017 if (ext2fs_inode_hash_lock < 0) 1018 wakeup(&ext2fs_inode_hash_lock); 1019 ext2fs_inode_hash_lock = 0; 1020 *vpp = NULL; 1021 FREE(ip, M_EXT2NODE); 1022 return (error); 1023 } 1024 bzero((caddr_t)ip, sizeof(struct inode)); 1025 vp->v_data = ip; 1026 ip->i_vnode = vp; 1027 ip->i_e2fs = fs = ump->um_e2fs; 1028 ip->i_dev = dev; 1029 ip->i_number = ino; 1030 /* 1031 * Put it onto its hash chain and lock it so that other requests for 1032 * this inode will block if they arrive while we are sleeping waiting 1033 * for old data structures to be purged or for the contents of the 1034 * disk portion of this inode to be read. 1035 */ 1036 ext2_ihashins(ip); 1037 1038 if (ext2fs_inode_hash_lock < 0) 1039 wakeup(&ext2fs_inode_hash_lock); 1040 ext2fs_inode_hash_lock = 0; 1041 1042 /* Read in the disk contents for the inode, copy into the inode. */ 1043#if 0 1044printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 1045#endif 1046 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1047 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 1048 /* 1049 * The inode does not contain anything useful, so it would 1050 * be misleading to leave it on its hash chain. With mode 1051 * still zero, it will be unlinked and returned to the free 1052 * list by vput(). 1053 */ 1054 vput(vp); 1055 brelse(bp); 1056 *vpp = NULL; 1057 return (error); 1058 } 1059 /* convert ext2 inode to dinode */ 1060 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1061 ino_to_fsbo(fs, ino)), ip); 1062 ip->i_block_group = ino_to_cg(fs, ino); 1063 ip->i_next_alloc_block = 0; 1064 ip->i_next_alloc_goal = 0; 1065 ip->i_prealloc_count = 0; 1066 ip->i_prealloc_block = 0; 1067 /* now we want to make sure that block pointers for unused 1068 blocks are zeroed out - ext2_balloc depends on this 1069 although for regular files and directories only 1070 */ 1071 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1072 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1073 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1074 ip->i_db[i] = 0; 1075 } 1076/* 1077 ext2_print_inode(ip); 1078*/ 1079 brelse(bp); 1080 1081 /* 1082 * Initialize the vnode from the inode, check for aliases. 1083 * Note that the underlying vnode may have changed. 1084 */ 1085 if ((error = ext2_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) { 1086 vput(vp); 1087 *vpp = NULL; 1088 return (error); 1089 } 1090 /* 1091 * Finish inode initialization now that aliasing has been resolved. 1092 */ 1093 ip->i_devvp = ump->um_devvp; 1094 VREF(ip->i_devvp); 1095 /* 1096 * Set up a generation number for this inode if it does not 1097 * already have one. This should only happen on old filesystems. 1098 */ 1099 if (ip->i_gen == 0) { 1100 ip->i_gen = random() / 2 + 1; 1101 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1102 ip->i_flag |= IN_MODIFIED; 1103 } 1104 *vpp = vp; 1105 return (0); 1106} 1107 1108/* 1109 * File handle to vnode 1110 * 1111 * Have to be really careful about stale file handles: 1112 * - check that the inode number is valid 1113 * - call ext2_vget() to get the locked inode 1114 * - check for an unallocated inode (i_mode == 0) 1115 * - check that the given client host has export rights and return 1116 * those rights via. exflagsp and credanonp 1117 */ 1118static int 1119ext2_fhtovp(mp, fhp, vpp) 1120 struct mount *mp; 1121 struct fid *fhp; 1122 struct vnode **vpp; 1123{ 1124 struct inode *ip; 1125 struct ufid *ufhp; 1126 struct vnode *nvp; 1127 struct ext2_sb_info *fs; 1128 int error; 1129 1130 ufhp = (struct ufid *)fhp; 1131 fs = VFSTOEXT2(mp)->um_e2fs; 1132 if (ufhp->ufid_ino < ROOTINO || 1133 ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group) 1134 return (ESTALE); 1135 1136 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1137 if (error) { 1138 *vpp = NULLVP; 1139 return (error); 1140 } 1141 ip = VTOI(nvp); 1142 if (ip->i_mode == 0 || 1143 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1144 vput(nvp); 1145 *vpp = NULLVP; 1146 return (ESTALE); 1147 } 1148 *vpp = nvp; 1149 return (0); 1150} 1151 1152/* 1153 * Vnode pointer to File handle 1154 */ 1155/* ARGSUSED */ 1156static int 1157ext2_vptofh(vp, fhp) 1158 struct vnode *vp; 1159 struct fid *fhp; 1160{ 1161 struct inode *ip; 1162 struct ufid *ufhp; 1163 1164 ip = VTOI(vp); 1165 ufhp = (struct ufid *)fhp; 1166 ufhp->ufid_len = sizeof(struct ufid); 1167 ufhp->ufid_ino = ip->i_number; 1168 ufhp->ufid_gen = ip->i_gen; 1169 return (0); 1170} 1171 1172/* 1173 * Write a superblock and associated information back to disk. 1174 */ 1175static int 1176ext2_sbupdate(mp, waitfor) 1177 struct ext2mount *mp; 1178 int waitfor; 1179{ 1180 struct ext2_sb_info *fs = mp->um_e2fs; 1181 struct ext2_super_block *es = fs->s_es; 1182 struct buf *bp; 1183 int error = 0; 1184/* 1185printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1186*/ 1187 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1188 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1189 if (waitfor == MNT_WAIT) 1190 error = bwrite(bp); 1191 else 1192 bawrite(bp); 1193 1194 /* 1195 * The buffers for group descriptors, inode bitmaps and block bitmaps 1196 * are not busy at this point and are (hopefully) written by the 1197 * usual sync mechanism. No need to write them here 1198 */ 1199 1200 return (error); 1201} 1202 1203/* 1204 * Return the root of a filesystem. 1205 */ 1206static int 1207ext2_root(mp, vpp) 1208 struct mount *mp; 1209 struct vnode **vpp; 1210{ 1211 struct vnode *nvp; 1212 int error; 1213 1214 error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); 1215 if (error) 1216 return (error); 1217 *vpp = nvp; 1218 return (0); 1219} 1220 1221static int 1222ext2_init(struct vfsconf *vfsp) 1223{ 1224 1225 ext2_ihashinit(); 1226 return (0); 1227} 1228 1229static int 1230ext2_uninit(struct vfsconf *vfsp) 1231{ 1232 1233 ext2_ihashuninit(); 1234 return (0); 1235} 1236