ext2_vfsops.c revision 122114
1/* 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/* 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 40 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 122114 2003-11-05 11:56:58Z bde $ 41 */ 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/namei.h> 46#include <sys/proc.h> 47#include <sys/kernel.h> 48#include <sys/vnode.h> 49#include <sys/mount.h> 50#include <sys/bio.h> 51#include <sys/buf.h> 52#include <sys/conf.h> 53#include <sys/fcntl.h> 54#include <sys/malloc.h> 55#include <sys/stat.h> 56#include <sys/mutex.h> 57 58#include <gnu/ext2fs/ext2_mount.h> 59#include <gnu/ext2fs/inode.h> 60 61#include <gnu/ext2fs/fs.h> 62#include <gnu/ext2fs/ext2_extern.h> 63#include <gnu/ext2fs/ext2_fs.h> 64#include <gnu/ext2fs/ext2_fs_sb.h> 65 66static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 67static int ext2_mountfs(struct vnode *, struct mount *, struct thread *); 68static int ext2_reload(struct mount *mp, struct ucred *cred, struct thread *td); 69static int ext2_sbupdate(struct ext2mount *, int); 70 71static vfs_unmount_t ext2_unmount; 72static vfs_root_t ext2_root; 73static vfs_statfs_t ext2_statfs; 74static vfs_sync_t ext2_sync; 75static vfs_vget_t ext2_vget; 76static vfs_fhtovp_t ext2_fhtovp; 77static vfs_vptofh_t ext2_vptofh; 78static vfs_init_t ext2_init; 79static vfs_uninit_t ext2_uninit; 80static vfs_nmount_t ext2_mount; 81 82MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part"); 83static MALLOC_DEFINE(M_EXT2MNT, "EXT2 mount", "EXT2 mount structure"); 84 85static struct vfsops ext2fs_vfsops = { 86 .vfs_fhtovp = ext2_fhtovp, 87 .vfs_init = ext2_init, 88 .vfs_nmount = ext2_mount, 89 .vfs_root = ext2_root, /* root inode via vget */ 90 .vfs_statfs = ext2_statfs, 91 .vfs_sync = ext2_sync, 92 .vfs_uninit = ext2_uninit, 93 .vfs_unmount = ext2_unmount, 94 .vfs_vget = ext2_vget, 95 .vfs_vptofh = ext2_vptofh, 96}; 97 98VFS_SET(ext2fs_vfsops, ext2fs, 0); 99#define bsd_malloc malloc 100#define bsd_free free 101 102static int ext2fs_inode_hash_lock; 103 104static int ext2_check_sb_compat(struct ext2_super_block *es, dev_t dev, 105 int ronly); 106static int compute_sb_data(struct vnode * devvp, 107 struct ext2_super_block * es, struct ext2_sb_info * fs); 108 109#ifdef notyet 110static int ext2_mountroot(void); 111 112/* 113 * Called by main() when ext2fs is going to be mounted as root. 114 * 115 * Name is updated by mount(8) after booting. 116 */ 117#define ROOTNAME "root_device" 118 119static int 120ext2_mountroot() 121{ 122 struct ext2_sb_info *fs; 123 struct mount *mp; 124 struct thread *td = curthread; 125 struct ext2mount *ump; 126 u_int size; 127 int error; 128 129 if ((error = bdevvp(rootdev, &rootvp))) { 130 printf("ext2_mountroot: can't find rootvp\n"); 131 return (error); 132 } 133 mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 134 bzero((char *)mp, (u_long)sizeof(struct mount)); 135 TAILQ_INIT(&mp->mnt_nvnodelist); 136 TAILQ_INIT(&mp->mnt_reservedvnlist); 137 mp->mnt_op = &ext2fs_vfsops; 138 mp->mnt_flag = MNT_RDONLY; 139 if (error = ext2_mountfs(rootvp, mp, td)) { 140 bsd_free(mp, M_MOUNT); 141 return (error); 142 } 143 if (error = vfs_lock(mp)) { 144 (void)ext2_unmount(mp, 0, td); 145 bsd_free(mp, M_MOUNT); 146 return (error); 147 } 148 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); 149 mp->mnt_flag |= MNT_ROOTFS; 150 mp->mnt_vnodecovered = NULLVP; 151 ump = VFSTOEXT2(mp); 152 fs = ump->um_e2fs; 153 bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); 154 fs->fs_fsmnt[0] = '/'; 155 bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 156 MNAMELEN); 157 (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 158 &size); 159 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 160 (void)ext2_statfs(mp, &mp->mnt_stat, td); 161 vfs_unlock(mp); 162 inittodr(fs->s_es->s_wtime); /* this helps to set the time */ 163 return (0); 164} 165#endif 166 167/* 168 * VFS Operations. 169 * 170 * mount system call 171 */ 172static int 173ext2_mount(mp, ndp, td) 174 struct mount *mp; 175 struct nameidata *ndp; 176 struct thread *td; 177{ 178 struct export_args *export; 179 struct vfsoptlist *opts; 180 struct vnode *devvp; 181 struct ext2mount *ump = 0; 182 struct ext2_sb_info *fs; 183 char *path, *fspec; 184 size_t size; 185 int error, flags, len; 186 mode_t accessmode; 187 188 opts = mp->mnt_optnew; 189 190 vfs_getopt(opts, "fspath", (void **)&path, NULL); 191 /* Double-check the length of path.. */ 192 if (strlen(path) >= MAXMNTLEN - 1) 193 return (ENAMETOOLONG); 194 195 fspec = NULL; 196 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 197 if (!error && fspec[len - 1] != '\0') 198 return (EINVAL); 199 200 /* 201 * If updating, check whether changing from read-only to 202 * read/write; if there is no device name, that's all we do. 203 */ 204 if (mp->mnt_flag & MNT_UPDATE) { 205 ump = VFSTOEXT2(mp); 206 fs = ump->um_e2fs; 207 error = 0; 208 if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { 209 flags = WRITECLOSE; 210 if (mp->mnt_flag & MNT_FORCE) 211 flags |= FORCECLOSE; 212 if (vfs_busy(mp, LK_NOWAIT, 0, td)) 213 return (EBUSY); 214 error = ext2_flushfiles(mp, flags, td); 215 vfs_unbusy(mp, td); 216 if (!error && fs->s_wasvalid) { 217 fs->s_es->s_state |= EXT2_VALID_FS; 218 ext2_sbupdate(ump, MNT_WAIT); 219 } 220 fs->s_rd_only = 1; 221 } 222 if (!error && (mp->mnt_flag & MNT_RELOAD)) 223 error = ext2_reload(mp, ndp->ni_cnd.cn_cred, td); 224 if (error) 225 return (error); 226 devvp = ump->um_devvp; 227 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 228 (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0) 229 return (EPERM); 230 if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 231 /* 232 * If upgrade to read-write by non-root, then verify 233 * that user has necessary permissions on the device. 234 */ 235 if (suser(td)) { 236 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 237 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 238 td->td_ucred, td)) != 0) { 239 VOP_UNLOCK(devvp, 0, td); 240 return (error); 241 } 242 VOP_UNLOCK(devvp, 0, td); 243 } 244 245 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 246 (fs->s_es->s_state & EXT2_ERROR_FS)) { 247 if (mp->mnt_flag & MNT_FORCE) { 248 printf( 249"WARNING: %s was not properly dismounted\n", 250 fs->fs_fsmnt); 251 } else { 252 printf( 253"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 254 fs->fs_fsmnt); 255 return (EPERM); 256 } 257 } 258 fs->s_es->s_state &= ~EXT2_VALID_FS; 259 ext2_sbupdate(ump, MNT_WAIT); 260 fs->s_rd_only = 0; 261 } 262 if (fspec == NULL) { 263 error = vfs_getopt(opts, "export", (void **)&export, 264 &len); 265 if (error || len != sizeof(struct export_args)) 266 return (EINVAL); 267 /* Process export requests. */ 268 return (vfs_export(mp, export)); 269 } 270 } 271 /* 272 * Not an update, or updating the name: look up the name 273 * and verify that it refers to a sensible block device. 274 */ 275 if (fspec == NULL) 276 return (EINVAL); 277 NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td); 278 if ((error = namei(ndp)) != 0) 279 return (error); 280 NDFREE(ndp, NDF_ONLY_PNBUF); 281 devvp = ndp->ni_vp; 282 283 if (!vn_isdisk(devvp, &error)) { 284 vrele(devvp); 285 return (error); 286 } 287 288 /* 289 * If mount by non-root, then verify that user has necessary 290 * permissions on the device. 291 */ 292 if (suser(td)) { 293 accessmode = VREAD; 294 if ((mp->mnt_flag & MNT_RDONLY) == 0) 295 accessmode |= VWRITE; 296 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 297 if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td)) != 0) { 298 vput(devvp); 299 return (error); 300 } 301 VOP_UNLOCK(devvp, 0, td); 302 } 303 304 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 305 error = ext2_mountfs(devvp, mp, td); 306 } else { 307 if (devvp != ump->um_devvp) 308 error = EINVAL; /* needs translation */ 309 else 310 vrele(devvp); 311 } 312 if (error) { 313 vrele(devvp); 314 return (error); 315 } 316 ump = VFSTOEXT2(mp); 317 fs = ump->um_e2fs; 318 /* 319 * Note that this strncpy() is ok because of a check at the start 320 * of ext2_mount(). 321 */ 322 strncpy(fs->fs_fsmnt, path, MAXMNTLEN); 323 fs->fs_fsmnt[MAXMNTLEN - 1] = '\0'; 324 (void)copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 325 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 326 (void)ext2_statfs(mp, &mp->mnt_stat, td); 327 return (0); 328} 329 330/* 331 * checks that the data in the descriptor blocks make sense 332 * this is taken from ext2/super.c 333 */ 334static int ext2_check_descriptors (struct ext2_sb_info * sb) 335{ 336 int i; 337 int desc_block = 0; 338 unsigned long block = sb->s_es->s_first_data_block; 339 struct ext2_group_desc * gdp = NULL; 340 341 /* ext2_debug ("Checking group descriptors"); */ 342 343 for (i = 0; i < sb->s_groups_count; i++) 344 { 345 /* examine next descriptor block */ 346 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 347 gdp = (struct ext2_group_desc *) 348 sb->s_group_desc[desc_block++]->b_data; 349 if (gdp->bg_block_bitmap < block || 350 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 351 { 352 printf ("ext2_check_descriptors: " 353 "Block bitmap for group %d" 354 " not in group (block %lu)!\n", 355 i, (unsigned long) gdp->bg_block_bitmap); 356 return 0; 357 } 358 if (gdp->bg_inode_bitmap < block || 359 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 360 { 361 printf ("ext2_check_descriptors: " 362 "Inode bitmap for group %d" 363 " not in group (block %lu)!\n", 364 i, (unsigned long) gdp->bg_inode_bitmap); 365 return 0; 366 } 367 if (gdp->bg_inode_table < block || 368 gdp->bg_inode_table + sb->s_itb_per_group >= 369 block + EXT2_BLOCKS_PER_GROUP(sb)) 370 { 371 printf ("ext2_check_descriptors: " 372 "Inode table for group %d" 373 " not in group (block %lu)!\n", 374 i, (unsigned long) gdp->bg_inode_table); 375 return 0; 376 } 377 block += EXT2_BLOCKS_PER_GROUP(sb); 378 gdp++; 379 } 380 return 1; 381} 382 383static int 384ext2_check_sb_compat(es, dev, ronly) 385 struct ext2_super_block *es; 386 dev_t dev; 387 int ronly; 388{ 389 390 if (es->s_magic != EXT2_SUPER_MAGIC) { 391 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 392 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 393 return (1); 394 } 395 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 396 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 397 printf( 398"WARNING: mount of %s denied due to unsupported optional features\n", 399 devtoname(dev)); 400 return (1); 401 } 402 if (!ronly && 403 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 404 printf( 405"WARNING: R/W mount of %s denied due to unsupported optional features\n", 406 devtoname(dev)); 407 return (1); 408 } 409 } 410 return (0); 411} 412 413/* 414 * this computes the fields of the ext2_sb_info structure from the 415 * data in the ext2_super_block structure read in 416 */ 417static int compute_sb_data(devvp, es, fs) 418 struct vnode * devvp; 419 struct ext2_super_block * es; 420 struct ext2_sb_info * fs; 421{ 422 int db_count, error; 423 int i, j; 424 int logic_sb_block = 1; /* XXX for now */ 425 426#if 1 427#define V(v) 428#else 429#define V(v) printf(#v"= %d\n", fs->v); 430#endif 431 432 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 433 V(s_blocksize) 434 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 435 V(s_bshift) 436 fs->s_fsbtodb = es->s_log_block_size + 1; 437 V(s_fsbtodb) 438 fs->s_qbmask = fs->s_blocksize - 1; 439 V(s_bmask) 440 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 441 V(s_blocksize_bits) 442 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 443 V(s_frag_size) 444 if (fs->s_frag_size) 445 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 446 V(s_frags_per_block) 447 fs->s_blocks_per_group = es->s_blocks_per_group; 448 V(s_blocks_per_group) 449 fs->s_frags_per_group = es->s_frags_per_group; 450 V(s_frags_per_group) 451 fs->s_inodes_per_group = es->s_inodes_per_group; 452 V(s_inodes_per_group) 453 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 454 V(s_inodes_per_block) 455 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 456 V(s_itb_per_group) 457 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 458 V(s_desc_per_block) 459 /* s_resuid / s_resgid ? */ 460 fs->s_groups_count = (es->s_blocks_count - 461 es->s_first_data_block + 462 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 463 EXT2_BLOCKS_PER_GROUP(fs); 464 V(s_groups_count) 465 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 466 EXT2_DESC_PER_BLOCK(fs); 467 fs->s_db_per_group = db_count; 468 V(s_db_per_group) 469 470 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 471 M_EXT2MNT, M_WAITOK); 472 473 /* adjust logic_sb_block */ 474 if(fs->s_blocksize > SBSIZE) 475 /* Godmar thinks: if the blocksize is greater than 1024, then 476 the superblock is logically part of block zero. 477 */ 478 logic_sb_block = 0; 479 480 for (i = 0; i < db_count; i++) { 481 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 482 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 483 if(error) { 484 for (j = 0; j < i; j++) 485 brelse(fs->s_group_desc[j]); 486 bsd_free(fs->s_group_desc, M_EXT2MNT); 487 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 488 return EIO; 489 } 490 LCK_BUF(fs->s_group_desc[i]) 491 } 492 if(!ext2_check_descriptors(fs)) { 493 for (j = 0; j < db_count; j++) 494 ULCK_BUF(fs->s_group_desc[j]) 495 bsd_free(fs->s_group_desc, M_EXT2MNT); 496 printf("EXT2-fs: (ext2_check_descriptors failure) " 497 "unable to read group descriptors\n"); 498 return EIO; 499 } 500 501 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 502 fs->s_inode_bitmap_number[i] = 0; 503 fs->s_inode_bitmap[i] = NULL; 504 fs->s_block_bitmap_number[i] = 0; 505 fs->s_block_bitmap[i] = NULL; 506 } 507 fs->s_loaded_inode_bitmaps = 0; 508 fs->s_loaded_block_bitmaps = 0; 509 return 0; 510} 511 512/* 513 * Reload all incore data for a filesystem (used after running fsck on 514 * the root filesystem and finding things to fix). The filesystem must 515 * be mounted read-only. 516 * 517 * Things to do to update the mount: 518 * 1) invalidate all cached meta-data. 519 * 2) re-read superblock from disk. 520 * 3) re-read summary information from disk. 521 * 4) invalidate all inactive vnodes. 522 * 5) invalidate all cached file data. 523 * 6) re-read inode data for all active vnodes. 524 */ 525static int 526ext2_reload(mp, cred, td) 527 struct mount *mp; 528 struct ucred *cred; 529 struct thread *td; 530{ 531 struct vnode *vp, *nvp, *devvp; 532 struct inode *ip; 533 struct buf *bp; 534 struct ext2_super_block * es; 535 struct ext2_sb_info *fs; 536 int error; 537 538 if ((mp->mnt_flag & MNT_RDONLY) == 0) 539 return (EINVAL); 540 /* 541 * Step 1: invalidate all cached meta-data. 542 */ 543 devvp = VFSTOEXT2(mp)->um_devvp; 544 if (vinvalbuf(devvp, 0, cred, td, 0, 0)) 545 panic("ext2_reload: dirty1"); 546 /* 547 * Step 2: re-read superblock from disk. 548 * constants have been adjusted for ext2 549 */ 550 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 551 return (error); 552 es = (struct ext2_super_block *)bp->b_data; 553 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 554 brelse(bp); 555 return (EIO); /* XXX needs translation */ 556 } 557 fs = VFSTOEXT2(mp)->um_e2fs; 558 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 559 560 if((error = compute_sb_data(devvp, es, fs)) != 0) { 561 brelse(bp); 562 return error; 563 } 564#ifdef UNKLAR 565 if (fs->fs_sbsize < SBSIZE) 566 bp->b_flags |= B_INVAL; 567#endif 568 brelse(bp); 569 570loop: 571 MNT_ILOCK(mp); 572 for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { 573 if (vp->v_mount != mp) { 574 MNT_IUNLOCK(mp); 575 goto loop; 576 } 577 nvp = TAILQ_NEXT(vp, v_nmntvnodes); 578 VI_LOCK(vp); 579 if (vp->v_iflag & VI_XLOCK) { 580 VI_UNLOCK(vp); 581 continue; 582 } 583 MNT_IUNLOCK(mp); 584 /* 585 * Step 4: invalidate all inactive vnodes. 586 */ 587 if (vp->v_usecount == 0) { 588 vgonel(vp, td); 589 goto loop; 590 } 591 /* 592 * Step 5: invalidate all cached file data. 593 */ 594 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 595 goto loop; 596 } 597 if (vinvalbuf(vp, 0, cred, td, 0, 0)) 598 panic("ext2_reload: dirty2"); 599 /* 600 * Step 6: re-read inode data for all active vnodes. 601 */ 602 ip = VTOI(vp); 603 error = 604 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 605 (int)fs->s_blocksize, NOCRED, &bp); 606 if (error) { 607 VOP_UNLOCK(vp, 0, td); 608 vrele(vp); 609 return (error); 610 } 611 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + 612 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip); 613 brelse(bp); 614 VOP_UNLOCK(vp, 0, td); 615 vrele(vp); 616 MNT_ILOCK(mp); 617 } 618 MNT_IUNLOCK(mp); 619 return (0); 620} 621 622/* 623 * Common code for mount and mountroot 624 */ 625static int 626ext2_mountfs(devvp, mp, td) 627 struct vnode *devvp; 628 struct mount *mp; 629 struct thread *td; 630{ 631 struct ext2mount *ump; 632 struct buf *bp; 633 struct ext2_sb_info *fs; 634 struct ext2_super_block * es; 635 dev_t dev = devvp->v_rdev; 636 int error; 637 int ronly; 638 639 /* 640 * Disallow multiple mounts of the same device. 641 * Disallow mounting of a device that is currently in use 642 * (except for root, which might share swap device for miniroot). 643 * Flush out any old buffers remaining from a previous use. 644 */ 645 if ((error = vfs_mountedon(devvp)) != 0) 646 return (error); 647 if (vcount(devvp) > 1 && devvp != rootvp) 648 return (EBUSY); 649 if ((error = vinvalbuf(devvp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) 650 return (error); 651#ifdef READONLY 652/* turn on this to force it to be read-only */ 653 mp->mnt_flag |= MNT_RDONLY; 654#endif 655 656 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 657 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 658 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td, -1); 659 VOP_UNLOCK(devvp, 0, td); 660 if (error) 661 return (error); 662 if (devvp->v_rdev->si_iosize_max != 0) 663 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 664 if (mp->mnt_iosize_max > MAXPHYS) 665 mp->mnt_iosize_max = MAXPHYS; 666 667 bp = NULL; 668 ump = NULL; 669 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 670 goto out; 671 es = (struct ext2_super_block *)bp->b_data; 672 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 673 error = EINVAL; /* XXX needs translation */ 674 goto out; 675 } 676 if ((es->s_state & EXT2_VALID_FS) == 0 || 677 (es->s_state & EXT2_ERROR_FS)) { 678 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 679 printf( 680"WARNING: Filesystem was not properly dismounted\n"); 681 } else { 682 printf( 683"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 684 error = EPERM; 685 goto out; 686 } 687 } 688 ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); 689 bzero((caddr_t)ump, sizeof *ump); 690 /* I don't know whether this is the right strategy. Note that 691 we dynamically allocate both an ext2_sb_info and an ext2_super_block 692 while Linux keeps the super block in a locked buffer 693 */ 694 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 695 M_EXT2MNT, M_WAITOK); 696 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 697 M_EXT2MNT, M_WAITOK); 698 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 699 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 700 goto out; 701 /* 702 * We don't free the group descriptors allocated by compute_sb_data() 703 * until ext2_unmount(). This is OK since the mount will succeed. 704 */ 705 brelse(bp); 706 bp = NULL; 707 fs = ump->um_e2fs; 708 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 709 /* if the fs is not mounted read-only, make sure the super block is 710 always written back on a sync() 711 */ 712 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 713 if (ronly == 0) { 714 fs->s_dirt = 1; /* mark it modified */ 715 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 716 } 717 mp->mnt_data = (qaddr_t)ump; 718 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 719 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 720 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 721 mp->mnt_flag |= MNT_LOCAL; 722 ump->um_mountp = mp; 723 ump->um_dev = dev; 724 ump->um_devvp = devvp; 725 /* setting those two parameters allowed us to use 726 ufs_bmap w/o changse ! 727 */ 728 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 729 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 730 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 731 devvp->v_rdev->si_mountpoint = mp; 732 if (ronly == 0) 733 ext2_sbupdate(ump, MNT_WAIT); 734 return (0); 735out: 736 if (bp) 737 brelse(bp); 738 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, td); 739 if (ump) { 740 bsd_free(ump->um_e2fs->s_es, M_EXT2MNT); 741 bsd_free(ump->um_e2fs, M_EXT2MNT); 742 bsd_free(ump, M_EXT2MNT); 743 mp->mnt_data = (qaddr_t)0; 744 } 745 return (error); 746} 747 748/* 749 * unmount system call 750 */ 751static int 752ext2_unmount(mp, mntflags, td) 753 struct mount *mp; 754 int mntflags; 755 struct thread *td; 756{ 757 struct ext2mount *ump; 758 struct ext2_sb_info *fs; 759 int error, flags, ronly, i; 760 761 flags = 0; 762 if (mntflags & MNT_FORCE) { 763 if (mp->mnt_flag & MNT_ROOTFS) 764 return (EINVAL); 765 flags |= FORCECLOSE; 766 } 767 if ((error = ext2_flushfiles(mp, flags, td)) != 0) 768 return (error); 769 ump = VFSTOEXT2(mp); 770 fs = ump->um_e2fs; 771 ronly = fs->s_rd_only; 772 if (ronly == 0) { 773 if (fs->s_wasvalid) 774 fs->s_es->s_state |= EXT2_VALID_FS; 775 ext2_sbupdate(ump, MNT_WAIT); 776 } 777 778 /* release buffers containing group descriptors */ 779 for(i = 0; i < fs->s_db_per_group; i++) 780 ULCK_BUF(fs->s_group_desc[i]) 781 bsd_free(fs->s_group_desc, M_EXT2MNT); 782 783 /* release cached inode/block bitmaps */ 784 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 785 if (fs->s_inode_bitmap[i]) 786 ULCK_BUF(fs->s_inode_bitmap[i]) 787 788 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 789 if (fs->s_block_bitmap[i]) 790 ULCK_BUF(fs->s_block_bitmap[i]) 791 792 ump->um_devvp->v_rdev->si_mountpoint = NULL; 793 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, 794 NOCRED, td); 795 vrele(ump->um_devvp); 796 bsd_free(fs->s_es, M_EXT2MNT); 797 bsd_free(fs, M_EXT2MNT); 798 bsd_free(ump, M_EXT2MNT); 799 mp->mnt_data = (qaddr_t)0; 800 mp->mnt_flag &= ~MNT_LOCAL; 801 return (error); 802} 803 804/* 805 * Flush out all the files in a filesystem. 806 */ 807static int 808ext2_flushfiles(mp, flags, td) 809 struct mount *mp; 810 int flags; 811 struct thread *td; 812{ 813 int error; 814 815 error = vflush(mp, 0, flags); 816 return (error); 817} 818 819/* 820 * Get file system statistics. 821 * taken from ext2/super.c ext2_statfs 822 */ 823static int 824ext2_statfs(mp, sbp, td) 825 struct mount *mp; 826 struct statfs *sbp; 827 struct thread *td; 828{ 829 unsigned long overhead; 830 struct ext2mount *ump; 831 struct ext2_sb_info *fs; 832 struct ext2_super_block *es; 833 int i, nsb; 834 835 ump = VFSTOEXT2(mp); 836 fs = ump->um_e2fs; 837 es = fs->s_es; 838 839 if (es->s_magic != EXT2_SUPER_MAGIC) 840 panic("ext2_statfs - magic number spoiled"); 841 842 /* 843 * Compute the overhead (FS structures) 844 */ 845 if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { 846 nsb = 0; 847 for (i = 0 ; i < fs->s_groups_count; i++) 848 if (ext2_group_sparse(i)) 849 nsb++; 850 } else 851 nsb = fs->s_groups_count; 852 overhead = es->s_first_data_block + 853 /* Superblocks and block group descriptors: */ 854 nsb * (1 + fs->s_db_per_group) + 855 /* Inode bitmap, block bitmap, and inode table: */ 856 fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); 857 858 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 859 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 860 sbp->f_blocks = es->s_blocks_count - overhead; 861 sbp->f_bfree = es->s_free_blocks_count; 862 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 863 sbp->f_files = es->s_inodes_count; 864 sbp->f_ffree = es->s_free_inodes_count; 865 if (sbp != &mp->mnt_stat) { 866 sbp->f_type = mp->mnt_vfc->vfc_typenum; 867 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 868 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 869 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 870 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 871 } 872 return (0); 873} 874 875/* 876 * Go through the disk queues to initiate sandbagged IO; 877 * go through the inodes to write those that have been modified; 878 * initiate the writing of the super block if it has been modified. 879 * 880 * Note: we are always called with the filesystem marked `MPBUSY'. 881 */ 882static int 883ext2_sync(mp, waitfor, cred, td) 884 struct mount *mp; 885 int waitfor; 886 struct ucred *cred; 887 struct thread *td; 888{ 889 struct vnode *nvp, *vp; 890 struct inode *ip; 891 struct ext2mount *ump = VFSTOEXT2(mp); 892 struct ext2_sb_info *fs; 893 int error, allerror = 0; 894 895 fs = ump->um_e2fs; 896 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 897 printf("fs = %s\n", fs->fs_fsmnt); 898 panic("ext2_sync: rofs mod"); 899 } 900 /* 901 * Write back each (modified) inode. 902 */ 903 MNT_ILOCK(mp); 904loop: 905 for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) { 906 /* 907 * If the vnode that we are about to sync is no longer 908 * associated with this mount point, start over. 909 */ 910 if (vp->v_mount != mp) 911 goto loop; 912 nvp = TAILQ_NEXT(vp, v_nmntvnodes); 913 VI_LOCK(vp); 914 if (vp->v_iflag & VI_XLOCK) { 915 VI_UNLOCK(vp); 916 continue; 917 } 918 MNT_IUNLOCK(mp); 919 ip = VTOI(vp); 920 if (vp->v_type == VNON || 921 ((ip->i_flag & 922 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 923 (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) { 924 VI_UNLOCK(vp); 925 MNT_ILOCK(mp); 926 continue; 927 } 928 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 929 if (error) { 930 MNT_ILOCK(mp); 931 if (error == ENOENT) 932 goto loop; 933 continue; 934 } 935 if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0) 936 allerror = error; 937 VOP_UNLOCK(vp, 0, td); 938 vrele(vp); 939 MNT_ILOCK(mp); 940 } 941 MNT_IUNLOCK(mp); 942 /* 943 * Force stale file system control information to be flushed. 944 */ 945 if (waitfor != MNT_LAZY) { 946 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td); 947 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, td)) != 0) 948 allerror = error; 949 VOP_UNLOCK(ump->um_devvp, 0, td); 950 } 951 /* 952 * Write back modified superblock. 953 */ 954 if (fs->s_dirt != 0) { 955 fs->s_dirt = 0; 956 fs->s_es->s_wtime = time_second; 957 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 958 allerror = error; 959 } 960 return (allerror); 961} 962 963/* 964 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 965 * in from disk. If it is in core, wait for the lock bit to clear, then 966 * return the inode locked. Detection and handling of mount points must be 967 * done by the calling routine. 968 */ 969static int 970ext2_vget(mp, ino, flags, vpp) 971 struct mount *mp; 972 ino_t ino; 973 int flags; 974 struct vnode **vpp; 975{ 976 struct ext2_sb_info *fs; 977 struct inode *ip; 978 struct ext2mount *ump; 979 struct buf *bp; 980 struct vnode *vp; 981 dev_t dev; 982 int i, error; 983 int used_blocks; 984 985 ump = VFSTOEXT2(mp); 986 dev = ump->um_dev; 987restart: 988 if ((error = ext2_ihashget(dev, ino, flags, vpp)) != 0) 989 return (error); 990 if (*vpp != NULL) 991 return (0); 992 993 /* 994 * Lock out the creation of new entries in the FFS hash table in 995 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 996 * may occur! 997 */ 998 if (ext2fs_inode_hash_lock) { 999 while (ext2fs_inode_hash_lock) { 1000 ext2fs_inode_hash_lock = -1; 1001 tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0); 1002 } 1003 goto restart; 1004 } 1005 ext2fs_inode_hash_lock = 1; 1006 1007 /* 1008 * If this MALLOC() is performed after the getnewvnode() 1009 * it might block, leaving a vnode with a NULL v_data to be 1010 * found by ext2_sync() if a sync happens to fire right then, 1011 * which will cause a panic because ext2_sync() blindly 1012 * dereferences vp->v_data (as well it should). 1013 */ 1014 MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK); 1015 1016 /* Allocate a new vnode/inode. */ 1017 if ((error = getnewvnode("ext2fs", mp, ext2_vnodeop_p, &vp)) != 0) { 1018 if (ext2fs_inode_hash_lock < 0) 1019 wakeup(&ext2fs_inode_hash_lock); 1020 ext2fs_inode_hash_lock = 0; 1021 *vpp = NULL; 1022 FREE(ip, M_EXT2NODE); 1023 return (error); 1024 } 1025 bzero((caddr_t)ip, sizeof(struct inode)); 1026 vp->v_data = ip; 1027 ip->i_vnode = vp; 1028 ip->i_e2fs = fs = ump->um_e2fs; 1029 ip->i_dev = dev; 1030 ip->i_number = ino; 1031 /* 1032 * Put it onto its hash chain and lock it so that other requests for 1033 * this inode will block if they arrive while we are sleeping waiting 1034 * for old data structures to be purged or for the contents of the 1035 * disk portion of this inode to be read. 1036 */ 1037 ext2_ihashins(ip); 1038 1039 if (ext2fs_inode_hash_lock < 0) 1040 wakeup(&ext2fs_inode_hash_lock); 1041 ext2fs_inode_hash_lock = 0; 1042 1043 /* Read in the disk contents for the inode, copy into the inode. */ 1044#if 0 1045printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 1046#endif 1047 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1048 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 1049 /* 1050 * The inode does not contain anything useful, so it would 1051 * be misleading to leave it on its hash chain. With mode 1052 * still zero, it will be unlinked and returned to the free 1053 * list by vput(). 1054 */ 1055 vput(vp); 1056 brelse(bp); 1057 *vpp = NULL; 1058 return (error); 1059 } 1060 /* convert ext2 inode to dinode */ 1061 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1062 ino_to_fsbo(fs, ino)), ip); 1063 ip->i_block_group = ino_to_cg(fs, ino); 1064 ip->i_next_alloc_block = 0; 1065 ip->i_next_alloc_goal = 0; 1066 ip->i_prealloc_count = 0; 1067 ip->i_prealloc_block = 0; 1068 /* now we want to make sure that block pointers for unused 1069 blocks are zeroed out - ext2_balloc depends on this 1070 although for regular files and directories only 1071 */ 1072 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1073 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1074 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1075 ip->i_db[i] = 0; 1076 } 1077/* 1078 ext2_print_inode(ip); 1079*/ 1080 brelse(bp); 1081 1082 /* 1083 * Initialize the vnode from the inode, check for aliases. 1084 * Note that the underlying vnode may have changed. 1085 */ 1086 if ((error = ext2_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) { 1087 vput(vp); 1088 *vpp = NULL; 1089 return (error); 1090 } 1091 /* 1092 * Finish inode initialization now that aliasing has been resolved. 1093 */ 1094 ip->i_devvp = ump->um_devvp; 1095 VREF(ip->i_devvp); 1096 /* 1097 * Set up a generation number for this inode if it does not 1098 * already have one. This should only happen on old filesystems. 1099 */ 1100 if (ip->i_gen == 0) { 1101 ip->i_gen = random() / 2 + 1; 1102 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1103 ip->i_flag |= IN_MODIFIED; 1104 } 1105 *vpp = vp; 1106 return (0); 1107} 1108 1109/* 1110 * File handle to vnode 1111 * 1112 * Have to be really careful about stale file handles: 1113 * - check that the inode number is valid 1114 * - call ext2_vget() to get the locked inode 1115 * - check for an unallocated inode (i_mode == 0) 1116 * - check that the given client host has export rights and return 1117 * those rights via. exflagsp and credanonp 1118 */ 1119static int 1120ext2_fhtovp(mp, fhp, vpp) 1121 struct mount *mp; 1122 struct fid *fhp; 1123 struct vnode **vpp; 1124{ 1125 struct inode *ip; 1126 struct ufid *ufhp; 1127 struct vnode *nvp; 1128 struct ext2_sb_info *fs; 1129 int error; 1130 1131 ufhp = (struct ufid *)fhp; 1132 fs = VFSTOEXT2(mp)->um_e2fs; 1133 if (ufhp->ufid_ino < ROOTINO || 1134 ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group) 1135 return (ESTALE); 1136 1137 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1138 if (error) { 1139 *vpp = NULLVP; 1140 return (error); 1141 } 1142 ip = VTOI(nvp); 1143 if (ip->i_mode == 0 || 1144 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1145 vput(nvp); 1146 *vpp = NULLVP; 1147 return (ESTALE); 1148 } 1149 *vpp = nvp; 1150 return (0); 1151} 1152 1153/* 1154 * Vnode pointer to File handle 1155 */ 1156/* ARGSUSED */ 1157static int 1158ext2_vptofh(vp, fhp) 1159 struct vnode *vp; 1160 struct fid *fhp; 1161{ 1162 struct inode *ip; 1163 struct ufid *ufhp; 1164 1165 ip = VTOI(vp); 1166 ufhp = (struct ufid *)fhp; 1167 ufhp->ufid_len = sizeof(struct ufid); 1168 ufhp->ufid_ino = ip->i_number; 1169 ufhp->ufid_gen = ip->i_gen; 1170 return (0); 1171} 1172 1173/* 1174 * Write a superblock and associated information back to disk. 1175 */ 1176static int 1177ext2_sbupdate(mp, waitfor) 1178 struct ext2mount *mp; 1179 int waitfor; 1180{ 1181 struct ext2_sb_info *fs = mp->um_e2fs; 1182 struct ext2_super_block *es = fs->s_es; 1183 struct buf *bp; 1184 int error = 0; 1185/* 1186printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1187*/ 1188 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1189 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1190 if (waitfor == MNT_WAIT) 1191 error = bwrite(bp); 1192 else 1193 bawrite(bp); 1194 1195 /* 1196 * The buffers for group descriptors, inode bitmaps and block bitmaps 1197 * are not busy at this point and are (hopefully) written by the 1198 * usual sync mechanism. No need to write them here 1199 */ 1200 1201 return (error); 1202} 1203 1204/* 1205 * Return the root of a filesystem. 1206 */ 1207static int 1208ext2_root(mp, vpp) 1209 struct mount *mp; 1210 struct vnode **vpp; 1211{ 1212 struct vnode *nvp; 1213 int error; 1214 1215 error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); 1216 if (error) 1217 return (error); 1218 *vpp = nvp; 1219 return (0); 1220} 1221 1222static int 1223ext2_init(struct vfsconf *vfsp) 1224{ 1225 1226 ext2_ihashinit(); 1227 return (0); 1228} 1229 1230static int 1231ext2_uninit(struct vfsconf *vfsp) 1232{ 1233 1234 ext2_ihashuninit(); 1235 return (0); 1236} 1237