ext2_vfsops.c revision 151897
1/*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/*- 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 36 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 151897 2005-10-31 15:41:29Z rwatson $ 37 */ 38 39/*- 40 * COPYRIGHT.INFO says this has some GPL'd code from ext2_super.c in it 41 * 42 * This program is free software; you can redistribute it and/or modify 43 * it under the terms of the GNU General Public License as published by 44 * the Free Software Foundation; either version 2 of the License. 45 * 46 * This program is distributed in the hope that it will be useful, 47 * but WITHOUT ANY WARRANTY; without even the implied warranty of 48 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 49 * GNU General Public License for more details. 50 * 51 * You should have received a copy of the GNU General Public License 52 * along with this program; if not, write to the Free Software 53 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 54 * 55 */ 56 57#include <sys/param.h> 58#include <sys/systm.h> 59#include <sys/namei.h> 60#include <sys/proc.h> 61#include <sys/kernel.h> 62#include <sys/vnode.h> 63#include <sys/mount.h> 64#include <sys/bio.h> 65#include <sys/buf.h> 66#include <sys/conf.h> 67#include <sys/fcntl.h> 68#include <sys/malloc.h> 69#include <sys/stat.h> 70#include <sys/mutex.h> 71 72#include <geom/geom.h> 73#include <geom/geom_vfs.h> 74 75#include <gnu/fs/ext2fs/ext2_mount.h> 76#include <gnu/fs/ext2fs/inode.h> 77 78#include <gnu/fs/ext2fs/fs.h> 79#include <gnu/fs/ext2fs/ext2_extern.h> 80#include <gnu/fs/ext2fs/ext2_fs.h> 81#include <gnu/fs/ext2fs/ext2_fs_sb.h> 82 83static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 84static int ext2_mountfs(struct vnode *, struct mount *, struct thread *); 85static int ext2_reload(struct mount *mp, struct thread *td); 86static int ext2_sbupdate(struct ext2mount *, int); 87 88static vfs_unmount_t ext2_unmount; 89static vfs_root_t ext2_root; 90static vfs_statfs_t ext2_statfs; 91static vfs_sync_t ext2_sync; 92static vfs_vget_t ext2_vget; 93static vfs_fhtovp_t ext2_fhtovp; 94static vfs_vptofh_t ext2_vptofh; 95static vfs_mount_t ext2_mount; 96 97MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 98static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 99 100static struct vfsops ext2fs_vfsops = { 101 .vfs_fhtovp = ext2_fhtovp, 102 .vfs_mount = ext2_mount, 103 .vfs_root = ext2_root, /* root inode via vget */ 104 .vfs_statfs = ext2_statfs, 105 .vfs_sync = ext2_sync, 106 .vfs_unmount = ext2_unmount, 107 .vfs_vget = ext2_vget, 108 .vfs_vptofh = ext2_vptofh, 109}; 110 111VFS_SET(ext2fs_vfsops, ext2fs, 0); 112 113#define bsd_malloc malloc 114#define bsd_free free 115 116static int ext2_check_sb_compat(struct ext2_super_block *es, struct cdev *dev, 117 int ronly); 118static int compute_sb_data(struct vnode * devvp, 119 struct ext2_super_block * es, struct ext2_sb_info * fs); 120 121static const char *ext2_opts[] = { "from", "export" }; 122/* 123 * VFS Operations. 124 * 125 * mount system call 126 */ 127static int 128ext2_mount(mp, td) 129 struct mount *mp; 130 struct thread *td; 131{ 132 struct export_args *export; 133 struct vfsoptlist *opts; 134 struct vnode *devvp; 135 struct ext2mount *ump = 0; 136 struct ext2_sb_info *fs; 137 char *path, *fspec; 138 int error, flags, len; 139 mode_t accessmode; 140 struct nameidata nd, *ndp = &nd; 141 142 opts = mp->mnt_optnew; 143 144 if (vfs_filteropt(opts, ext2_opts)) 145 return (EINVAL); 146 147 vfs_getopt(opts, "fspath", (void **)&path, NULL); 148 /* Double-check the length of path.. */ 149 if (strlen(path) >= MAXMNTLEN - 1) 150 return (ENAMETOOLONG); 151 152 fspec = NULL; 153 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 154 if (!error && fspec[len - 1] != '\0') 155 return (EINVAL); 156 157 /* 158 * If updating, check whether changing from read-only to 159 * read/write; if there is no device name, that's all we do. 160 */ 161 if (mp->mnt_flag & MNT_UPDATE) { 162 ump = VFSTOEXT2(mp); 163 fs = ump->um_e2fs; 164 error = 0; 165 if (fs->s_rd_only == 0 && 166 vfs_flagopt(opts, "ro", NULL, 0)) { 167 error = VFS_SYNC(mp, MNT_WAIT, td); 168 if (error) 169 return (error); 170 flags = WRITECLOSE; 171 if (mp->mnt_flag & MNT_FORCE) 172 flags |= FORCECLOSE; 173 if (vfs_busy(mp, LK_NOWAIT, 0, td)) 174 return (EBUSY); 175 error = ext2_flushfiles(mp, flags, td); 176 vfs_unbusy(mp, td); 177 if (!error && fs->s_wasvalid) { 178 fs->s_es->s_state |= EXT2_VALID_FS; 179 ext2_sbupdate(ump, MNT_WAIT); 180 } 181 fs->s_rd_only = 1; 182 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 183 DROP_GIANT(); 184 g_topology_lock(); 185 g_access(ump->um_cp, 0, -1, 0); 186 g_topology_unlock(); 187 PICKUP_GIANT(); 188 } 189 if (!error && (mp->mnt_flag & MNT_RELOAD)) 190 error = ext2_reload(mp, td); 191 if (error) 192 return (error); 193 devvp = ump->um_devvp; 194 if (fs->s_rd_only && !vfs_flagopt(opts, "ro", NULL, 0)) { 195 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 0)) 196 return (EPERM); 197 /* 198 * If upgrade to read-write by non-root, then verify 199 * that user has necessary permissions on the device. 200 */ 201 if (suser(td)) { 202 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 203 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 204 td->td_ucred, td)) != 0) { 205 VOP_UNLOCK(devvp, 0, td); 206 return (error); 207 } 208 VOP_UNLOCK(devvp, 0, td); 209 } 210 DROP_GIANT(); 211 g_topology_lock(); 212 error = g_access(ump->um_cp, 0, 1, 0); 213 g_topology_unlock(); 214 PICKUP_GIANT(); 215 if (error) 216 return (error); 217 218 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 219 (fs->s_es->s_state & EXT2_ERROR_FS)) { 220 if (mp->mnt_flag & MNT_FORCE) { 221 printf( 222"WARNING: %s was not properly dismounted\n", 223 fs->fs_fsmnt); 224 } else { 225 printf( 226"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 227 fs->fs_fsmnt); 228 return (EPERM); 229 } 230 } 231 fs->s_es->s_state &= ~EXT2_VALID_FS; 232 ext2_sbupdate(ump, MNT_WAIT); 233 fs->s_rd_only = 0; 234 mp->mnt_flag &= ~MNT_RDONLY; 235 } 236 if (fspec == NULL) { 237 error = vfs_getopt(opts, "export", (void **)&export, 238 &len); 239 if (error || len != sizeof(struct export_args)) 240 return (EINVAL); 241 /* Process export requests. */ 242 return (vfs_export(mp, export)); 243 } 244 } 245 /* 246 * Not an update, or updating the name: look up the name 247 * and verify that it refers to a sensible disk device. 248 */ 249 if (fspec == NULL) 250 return (EINVAL); 251 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); 252 if ((error = namei(ndp)) != 0) 253 return (error); 254 NDFREE(ndp, NDF_ONLY_PNBUF); 255 devvp = ndp->ni_vp; 256 257 if (!vn_isdisk(devvp, &error)) { 258 vput(devvp); 259 return (error); 260 } 261 262 /* 263 * If mount by non-root, then verify that user has necessary 264 * permissions on the device. 265 */ 266 if (suser(td)) { 267 accessmode = VREAD; 268 if ((mp->mnt_flag & MNT_RDONLY) == 0) 269 accessmode |= VWRITE; 270 if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td)) != 0) { 271 vput(devvp); 272 return (error); 273 } 274 } 275 276 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 277 error = ext2_mountfs(devvp, mp, td); 278 } else { 279 if (devvp != ump->um_devvp) { 280 vput(devvp); 281 return (EINVAL); /* needs translation */ 282 } else 283 vput(devvp); 284 } 285 if (error) { 286 vrele(devvp); 287 return (error); 288 } 289 ump = VFSTOEXT2(mp); 290 fs = ump->um_e2fs; 291 /* 292 * Note that this strncpy() is ok because of a check at the start 293 * of ext2_mount(). 294 */ 295 strncpy(fs->fs_fsmnt, path, MAXMNTLEN); 296 fs->fs_fsmnt[MAXMNTLEN - 1] = '\0'; 297 vfs_mountedfrom(mp, fspec); 298 return (0); 299} 300 301/* 302 * checks that the data in the descriptor blocks make sense 303 * this is taken from ext2/super.c 304 */ 305static int ext2_check_descriptors (struct ext2_sb_info * sb) 306{ 307 int i; 308 int desc_block = 0; 309 unsigned long block = sb->s_es->s_first_data_block; 310 struct ext2_group_desc * gdp = NULL; 311 312 /* ext2_debug ("Checking group descriptors"); */ 313 314 for (i = 0; i < sb->s_groups_count; i++) 315 { 316 /* examine next descriptor block */ 317 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 318 gdp = (struct ext2_group_desc *) 319 sb->s_group_desc[desc_block++]->b_data; 320 if (gdp->bg_block_bitmap < block || 321 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 322 { 323 printf ("ext2_check_descriptors: " 324 "Block bitmap for group %d" 325 " not in group (block %lu)!\n", 326 i, (unsigned long) gdp->bg_block_bitmap); 327 return 0; 328 } 329 if (gdp->bg_inode_bitmap < block || 330 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 331 { 332 printf ("ext2_check_descriptors: " 333 "Inode bitmap for group %d" 334 " not in group (block %lu)!\n", 335 i, (unsigned long) gdp->bg_inode_bitmap); 336 return 0; 337 } 338 if (gdp->bg_inode_table < block || 339 gdp->bg_inode_table + sb->s_itb_per_group >= 340 block + EXT2_BLOCKS_PER_GROUP(sb)) 341 { 342 printf ("ext2_check_descriptors: " 343 "Inode table for group %d" 344 " not in group (block %lu)!\n", 345 i, (unsigned long) gdp->bg_inode_table); 346 return 0; 347 } 348 block += EXT2_BLOCKS_PER_GROUP(sb); 349 gdp++; 350 } 351 return 1; 352} 353 354static int 355ext2_check_sb_compat(es, dev, ronly) 356 struct ext2_super_block *es; 357 struct cdev *dev; 358 int ronly; 359{ 360 361 if (es->s_magic != EXT2_SUPER_MAGIC) { 362 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 363 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 364 return (1); 365 } 366 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 367 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 368 printf( 369"WARNING: mount of %s denied due to unsupported optional features\n", 370 devtoname(dev)); 371 return (1); 372 } 373 if (!ronly && 374 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 375 printf( 376"WARNING: R/W mount of %s denied due to unsupported optional features\n", 377 devtoname(dev)); 378 return (1); 379 } 380 } 381 return (0); 382} 383 384/* 385 * this computes the fields of the ext2_sb_info structure from the 386 * data in the ext2_super_block structure read in 387 */ 388static int compute_sb_data(devvp, es, fs) 389 struct vnode * devvp; 390 struct ext2_super_block * es; 391 struct ext2_sb_info * fs; 392{ 393 int db_count, error; 394 int i, j; 395 int logic_sb_block = 1; /* XXX for now */ 396 397#if 1 398#define V(v) 399#else 400#define V(v) printf(#v"= %d\n", fs->v); 401#endif 402 403 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 404 V(s_blocksize) 405 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 406 V(s_bshift) 407 fs->s_fsbtodb = es->s_log_block_size + 1; 408 V(s_fsbtodb) 409 fs->s_qbmask = fs->s_blocksize - 1; 410 V(s_bmask) 411 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 412 V(s_blocksize_bits) 413 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 414 V(s_frag_size) 415 if (fs->s_frag_size) 416 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 417 V(s_frags_per_block) 418 fs->s_blocks_per_group = es->s_blocks_per_group; 419 V(s_blocks_per_group) 420 fs->s_frags_per_group = es->s_frags_per_group; 421 V(s_frags_per_group) 422 fs->s_inodes_per_group = es->s_inodes_per_group; 423 V(s_inodes_per_group) 424 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 425 V(s_inodes_per_block) 426 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 427 V(s_itb_per_group) 428 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 429 V(s_desc_per_block) 430 /* s_resuid / s_resgid ? */ 431 fs->s_groups_count = (es->s_blocks_count - 432 es->s_first_data_block + 433 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 434 EXT2_BLOCKS_PER_GROUP(fs); 435 V(s_groups_count) 436 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 437 EXT2_DESC_PER_BLOCK(fs); 438 fs->s_db_per_group = db_count; 439 V(s_db_per_group) 440 441 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 442 M_EXT2MNT, M_WAITOK); 443 444 /* adjust logic_sb_block */ 445 if(fs->s_blocksize > SBSIZE) 446 /* Godmar thinks: if the blocksize is greater than 1024, then 447 the superblock is logically part of block zero. 448 */ 449 logic_sb_block = 0; 450 451 for (i = 0; i < db_count; i++) { 452 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 453 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 454 if(error) { 455 for (j = 0; j < i; j++) 456 brelse(fs->s_group_desc[j]); 457 bsd_free(fs->s_group_desc, M_EXT2MNT); 458 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 459 return EIO; 460 } 461 LCK_BUF(fs->s_group_desc[i]) 462 } 463 if(!ext2_check_descriptors(fs)) { 464 for (j = 0; j < db_count; j++) 465 ULCK_BUF(fs->s_group_desc[j]) 466 bsd_free(fs->s_group_desc, M_EXT2MNT); 467 printf("EXT2-fs: (ext2_check_descriptors failure) " 468 "unable to read group descriptors\n"); 469 return EIO; 470 } 471 472 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 473 fs->s_inode_bitmap_number[i] = 0; 474 fs->s_inode_bitmap[i] = NULL; 475 fs->s_block_bitmap_number[i] = 0; 476 fs->s_block_bitmap[i] = NULL; 477 } 478 fs->s_loaded_inode_bitmaps = 0; 479 fs->s_loaded_block_bitmaps = 0; 480 if (es->s_rev_level == EXT2_GOOD_OLD_REV || (es->s_feature_ro_compat & 481 EXT2_FEATURE_RO_COMPAT_LARGE_FILE) == 0) 482 fs->fs_maxfilesize = 0x7fffffff; 483 else 484 fs->fs_maxfilesize = 0x7fffffffffffffff; 485 return 0; 486} 487 488/* 489 * Reload all incore data for a filesystem (used after running fsck on 490 * the root filesystem and finding things to fix). The filesystem must 491 * be mounted read-only. 492 * 493 * Things to do to update the mount: 494 * 1) invalidate all cached meta-data. 495 * 2) re-read superblock from disk. 496 * 3) re-read summary information from disk. 497 * 4) invalidate all inactive vnodes. 498 * 5) invalidate all cached file data. 499 * 6) re-read inode data for all active vnodes. 500 */ 501static int 502ext2_reload(struct mount *mp, struct thread *td) 503{ 504 struct vnode *vp, *nvp, *devvp; 505 struct inode *ip; 506 struct buf *bp; 507 struct ext2_super_block * es; 508 struct ext2_sb_info *fs; 509 int error; 510 511 if ((mp->mnt_flag & MNT_RDONLY) == 0) 512 return (EINVAL); 513 /* 514 * Step 1: invalidate all cached meta-data. 515 */ 516 devvp = VFSTOEXT2(mp)->um_devvp; 517 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 518 if (vinvalbuf(devvp, 0, td, 0, 0) != 0) 519 panic("ext2_reload: dirty1"); 520 VOP_UNLOCK(devvp, 0, td); 521 522 /* 523 * Step 2: re-read superblock from disk. 524 * constants have been adjusted for ext2 525 */ 526 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 527 return (error); 528 es = (struct ext2_super_block *)bp->b_data; 529 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 530 brelse(bp); 531 return (EIO); /* XXX needs translation */ 532 } 533 fs = VFSTOEXT2(mp)->um_e2fs; 534 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 535 536 if((error = compute_sb_data(devvp, es, fs)) != 0) { 537 brelse(bp); 538 return error; 539 } 540#ifdef UNKLAR 541 if (fs->fs_sbsize < SBSIZE) 542 bp->b_flags |= B_INVAL; 543#endif 544 brelse(bp); 545 546loop: 547 MNT_ILOCK(mp); 548 MNT_VNODE_FOREACH(vp, mp, nvp) { 549 VI_LOCK(vp); 550 if (vp->v_iflag & VI_DOOMED) { 551 VI_UNLOCK(vp); 552 continue; 553 } 554 MNT_IUNLOCK(mp); 555 /* 556 * Step 4: invalidate all cached file data. 557 */ 558 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 559 goto loop; 560 } 561 if (vinvalbuf(vp, 0, td, 0, 0)) 562 panic("ext2_reload: dirty2"); 563 /* 564 * Step 5: re-read inode data for all active vnodes. 565 */ 566 ip = VTOI(vp); 567 error = 568 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 569 (int)fs->s_blocksize, NOCRED, &bp); 570 if (error) { 571 VOP_UNLOCK(vp, 0, td); 572 vrele(vp); 573 return (error); 574 } 575 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + 576 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip); 577 brelse(bp); 578 VOP_UNLOCK(vp, 0, td); 579 vrele(vp); 580 MNT_ILOCK(mp); 581 } 582 MNT_IUNLOCK(mp); 583 return (0); 584} 585 586/* 587 * Common code for mount and mountroot 588 */ 589static int 590ext2_mountfs(devvp, mp, td) 591 struct vnode *devvp; 592 struct mount *mp; 593 struct thread *td; 594{ 595 struct ext2mount *ump; 596 struct buf *bp; 597 struct ext2_sb_info *fs; 598 struct ext2_super_block * es; 599 struct cdev *dev = devvp->v_rdev; 600 struct g_consumer *cp; 601 struct bufobj *bo; 602 int error; 603 int ronly; 604 605 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 606 /* XXX: use VOP_ACESS to check FS perms */ 607 DROP_GIANT(); 608 g_topology_lock(); 609 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 610 g_topology_unlock(); 611 PICKUP_GIANT(); 612 VOP_UNLOCK(devvp, 0, td); 613 if (error) 614 return (error); 615 616 /* XXX: should we check for some sectorsize or 512 instead? */ 617 if (((SBSIZE % cp->provider->sectorsize) != 0) || 618 (SBSIZE < cp->provider->sectorsize)) { 619 DROP_GIANT(); 620 g_topology_lock(); 621 g_vfs_close(cp, td); 622 g_topology_unlock(); 623 PICKUP_GIANT(); 624 return (EINVAL); 625 } 626 627 bo = &devvp->v_bufobj; 628 bo->bo_private = cp; 629 bo->bo_ops = g_vfs_bufops; 630 if (devvp->v_rdev->si_iosize_max != 0) 631 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 632 if (mp->mnt_iosize_max > MAXPHYS) 633 mp->mnt_iosize_max = MAXPHYS; 634 635 bp = NULL; 636 ump = NULL; 637 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 638 goto out; 639 es = (struct ext2_super_block *)bp->b_data; 640 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 641 error = EINVAL; /* XXX needs translation */ 642 goto out; 643 } 644 if ((es->s_state & EXT2_VALID_FS) == 0 || 645 (es->s_state & EXT2_ERROR_FS)) { 646 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 647 printf( 648"WARNING: Filesystem was not properly dismounted\n"); 649 } else { 650 printf( 651"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 652 error = EPERM; 653 goto out; 654 } 655 } 656 ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); 657 bzero((caddr_t)ump, sizeof *ump); 658 /* I don't know whether this is the right strategy. Note that 659 we dynamically allocate both an ext2_sb_info and an ext2_super_block 660 while Linux keeps the super block in a locked buffer 661 */ 662 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 663 M_EXT2MNT, M_WAITOK); 664 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 665 M_EXT2MNT, M_WAITOK); 666 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 667 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 668 goto out; 669 /* 670 * We don't free the group descriptors allocated by compute_sb_data() 671 * until ext2_unmount(). This is OK since the mount will succeed. 672 */ 673 brelse(bp); 674 bp = NULL; 675 fs = ump->um_e2fs; 676 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 677 /* if the fs is not mounted read-only, make sure the super block is 678 always written back on a sync() 679 */ 680 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 681 if (ronly == 0) { 682 fs->s_dirt = 1; /* mark it modified */ 683 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 684 } 685 mp->mnt_data = (qaddr_t)ump; 686 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 687 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 688 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 689 mp->mnt_flag |= MNT_LOCAL; 690 ump->um_mountp = mp; 691 ump->um_dev = dev; 692 ump->um_devvp = devvp; 693 ump->um_bo = &devvp->v_bufobj; 694 ump->um_cp = cp; 695 /* setting those two parameters allowed us to use 696 ufs_bmap w/o changse ! 697 */ 698 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 699 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 700 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 701 if (ronly == 0) 702 ext2_sbupdate(ump, MNT_WAIT); 703 return (0); 704out: 705 if (bp) 706 brelse(bp); 707 if (cp != NULL) { 708 DROP_GIANT(); 709 g_topology_lock(); 710 g_vfs_close(cp, td); 711 g_topology_unlock(); 712 PICKUP_GIANT(); 713 } 714 if (ump) { 715 bsd_free(ump->um_e2fs->s_es, M_EXT2MNT); 716 bsd_free(ump->um_e2fs, M_EXT2MNT); 717 bsd_free(ump, M_EXT2MNT); 718 mp->mnt_data = (qaddr_t)0; 719 } 720 return (error); 721} 722 723/* 724 * unmount system call 725 */ 726static int 727ext2_unmount(mp, mntflags, td) 728 struct mount *mp; 729 int mntflags; 730 struct thread *td; 731{ 732 struct ext2mount *ump; 733 struct ext2_sb_info *fs; 734 int error, flags, ronly, i; 735 736 flags = 0; 737 if (mntflags & MNT_FORCE) { 738 if (mp->mnt_flag & MNT_ROOTFS) 739 return (EINVAL); 740 flags |= FORCECLOSE; 741 } 742 if ((error = ext2_flushfiles(mp, flags, td)) != 0) 743 return (error); 744 ump = VFSTOEXT2(mp); 745 fs = ump->um_e2fs; 746 ronly = fs->s_rd_only; 747 if (ronly == 0) { 748 if (fs->s_wasvalid) 749 fs->s_es->s_state |= EXT2_VALID_FS; 750 ext2_sbupdate(ump, MNT_WAIT); 751 } 752 753 /* release buffers containing group descriptors */ 754 for(i = 0; i < fs->s_db_per_group; i++) 755 ULCK_BUF(fs->s_group_desc[i]) 756 bsd_free(fs->s_group_desc, M_EXT2MNT); 757 758 /* release cached inode/block bitmaps */ 759 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 760 if (fs->s_inode_bitmap[i]) 761 ULCK_BUF(fs->s_inode_bitmap[i]) 762 763 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 764 if (fs->s_block_bitmap[i]) 765 ULCK_BUF(fs->s_block_bitmap[i]) 766 767 DROP_GIANT(); 768 g_topology_lock(); 769 g_vfs_close(ump->um_cp, td); 770 g_topology_unlock(); 771 PICKUP_GIANT(); 772 vrele(ump->um_devvp); 773 bsd_free(fs->s_es, M_EXT2MNT); 774 bsd_free(fs, M_EXT2MNT); 775 bsd_free(ump, M_EXT2MNT); 776 mp->mnt_data = (qaddr_t)0; 777 mp->mnt_flag &= ~MNT_LOCAL; 778 return (error); 779} 780 781/* 782 * Flush out all the files in a filesystem. 783 */ 784static int 785ext2_flushfiles(mp, flags, td) 786 struct mount *mp; 787 int flags; 788 struct thread *td; 789{ 790 int error; 791 792 error = vflush(mp, 0, flags, td); 793 return (error); 794} 795 796/* 797 * Get file system statistics. 798 * taken from ext2/super.c ext2_statfs 799 */ 800static int 801ext2_statfs(mp, sbp, td) 802 struct mount *mp; 803 struct statfs *sbp; 804 struct thread *td; 805{ 806 unsigned long overhead; 807 struct ext2mount *ump; 808 struct ext2_sb_info *fs; 809 struct ext2_super_block *es; 810 int i, nsb; 811 812 ump = VFSTOEXT2(mp); 813 fs = ump->um_e2fs; 814 es = fs->s_es; 815 816 if (es->s_magic != EXT2_SUPER_MAGIC) 817 panic("ext2_statfs - magic number spoiled"); 818 819 /* 820 * Compute the overhead (FS structures) 821 */ 822 if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { 823 nsb = 0; 824 for (i = 0 ; i < fs->s_groups_count; i++) 825 if (ext2_group_sparse(i)) 826 nsb++; 827 } else 828 nsb = fs->s_groups_count; 829 overhead = es->s_first_data_block + 830 /* Superblocks and block group descriptors: */ 831 nsb * (1 + fs->s_db_per_group) + 832 /* Inode bitmap, block bitmap, and inode table: */ 833 fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); 834 835 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 836 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 837 sbp->f_blocks = es->s_blocks_count - overhead; 838 sbp->f_bfree = es->s_free_blocks_count; 839 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 840 sbp->f_files = es->s_inodes_count; 841 sbp->f_ffree = es->s_free_inodes_count; 842 return (0); 843} 844 845/* 846 * Go through the disk queues to initiate sandbagged IO; 847 * go through the inodes to write those that have been modified; 848 * initiate the writing of the super block if it has been modified. 849 * 850 * Note: we are always called with the filesystem marked `MPBUSY'. 851 */ 852static int 853ext2_sync(mp, waitfor, td) 854 struct mount *mp; 855 int waitfor; 856 struct thread *td; 857{ 858 struct vnode *nvp, *vp; 859 struct inode *ip; 860 struct ext2mount *ump = VFSTOEXT2(mp); 861 struct ext2_sb_info *fs; 862 int error, allerror = 0; 863 864 fs = ump->um_e2fs; 865 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 866 printf("fs = %s\n", fs->fs_fsmnt); 867 panic("ext2_sync: rofs mod"); 868 } 869 /* 870 * Write back each (modified) inode. 871 */ 872 MNT_ILOCK(mp); 873loop: 874 MNT_VNODE_FOREACH(vp, mp, nvp) { 875 VI_LOCK(vp); 876 if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED)) { 877 VI_UNLOCK(vp); 878 continue; 879 } 880 MNT_IUNLOCK(mp); 881 ip = VTOI(vp); 882 if ((ip->i_flag & 883 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 884 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 885 waitfor == MNT_LAZY)) { 886 VI_UNLOCK(vp); 887 MNT_ILOCK(mp); 888 continue; 889 } 890 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 891 if (error) { 892 MNT_ILOCK(mp); 893 if (error == ENOENT) 894 goto loop; 895 continue; 896 } 897 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) 898 allerror = error; 899 VOP_UNLOCK(vp, 0, td); 900 vrele(vp); 901 MNT_ILOCK(mp); 902 } 903 MNT_IUNLOCK(mp); 904 /* 905 * Force stale file system control information to be flushed. 906 */ 907 if (waitfor != MNT_LAZY) { 908 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td); 909 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) 910 allerror = error; 911 VOP_UNLOCK(ump->um_devvp, 0, td); 912 } 913 /* 914 * Write back modified superblock. 915 */ 916 if (fs->s_dirt != 0) { 917 fs->s_dirt = 0; 918 fs->s_es->s_wtime = time_second; 919 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 920 allerror = error; 921 } 922 return (allerror); 923} 924 925/* 926 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 927 * in from disk. If it is in core, wait for the lock bit to clear, then 928 * return the inode locked. Detection and handling of mount points must be 929 * done by the calling routine. 930 */ 931static int 932ext2_vget(mp, ino, flags, vpp) 933 struct mount *mp; 934 ino_t ino; 935 int flags; 936 struct vnode **vpp; 937{ 938 struct ext2_sb_info *fs; 939 struct inode *ip; 940 struct ext2mount *ump; 941 struct buf *bp; 942 struct vnode *vp; 943 struct cdev *dev; 944 int i, error; 945 int used_blocks; 946 947 error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL); 948 if (error || *vpp != NULL) 949 return (error); 950 951 ump = VFSTOEXT2(mp); 952 dev = ump->um_dev; 953 954 /* 955 * If this MALLOC() is performed after the getnewvnode() 956 * it might block, leaving a vnode with a NULL v_data to be 957 * found by ext2_sync() if a sync happens to fire right then, 958 * which will cause a panic because ext2_sync() blindly 959 * dereferences vp->v_data (as well it should). 960 */ 961 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 962 963 /* Allocate a new vnode/inode. */ 964 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 965 *vpp = NULL; 966 free(ip, M_EXT2NODE); 967 return (error); 968 } 969 vp->v_data = ip; 970 ip->i_vnode = vp; 971 ip->i_e2fs = fs = ump->um_e2fs; 972 ip->i_number = ino; 973 974 error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL); 975 if (error || *vpp != NULL) 976 return (error); 977 978 /* Read in the disk contents for the inode, copy into the inode. */ 979#if 0 980printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 981#endif 982 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 983 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 984 /* 985 * The inode does not contain anything useful, so it would 986 * be misleading to leave it on its hash chain. With mode 987 * still zero, it will be unlinked and returned to the free 988 * list by vput(). 989 */ 990 vput(vp); 991 brelse(bp); 992 *vpp = NULL; 993 return (error); 994 } 995 /* convert ext2 inode to dinode */ 996 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 997 ino_to_fsbo(fs, ino)), ip); 998 ip->i_block_group = ino_to_cg(fs, ino); 999 ip->i_next_alloc_block = 0; 1000 ip->i_next_alloc_goal = 0; 1001 ip->i_prealloc_count = 0; 1002 ip->i_prealloc_block = 0; 1003 /* now we want to make sure that block pointers for unused 1004 blocks are zeroed out - ext2_balloc depends on this 1005 although for regular files and directories only 1006 */ 1007 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1008 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1009 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1010 ip->i_db[i] = 0; 1011 } 1012/* 1013 ext2_print_inode(ip); 1014*/ 1015 brelse(bp); 1016 1017 /* 1018 * Initialize the vnode from the inode, check for aliases. 1019 * Note that the underlying vnode may have changed. 1020 */ 1021 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1022 vput(vp); 1023 *vpp = NULL; 1024 return (error); 1025 } 1026 /* 1027 * Finish inode initialization now that aliasing has been resolved. 1028 */ 1029 ip->i_devvp = ump->um_devvp; 1030 /* 1031 * Set up a generation number for this inode if it does not 1032 * already have one. This should only happen on old filesystems. 1033 */ 1034 if (ip->i_gen == 0) { 1035 ip->i_gen = random() / 2 + 1; 1036 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1037 ip->i_flag |= IN_MODIFIED; 1038 } 1039 *vpp = vp; 1040 return (0); 1041} 1042 1043/* 1044 * File handle to vnode 1045 * 1046 * Have to be really careful about stale file handles: 1047 * - check that the inode number is valid 1048 * - call ext2_vget() to get the locked inode 1049 * - check for an unallocated inode (i_mode == 0) 1050 * - check that the given client host has export rights and return 1051 * those rights via. exflagsp and credanonp 1052 */ 1053static int 1054ext2_fhtovp(mp, fhp, vpp) 1055 struct mount *mp; 1056 struct fid *fhp; 1057 struct vnode **vpp; 1058{ 1059 struct inode *ip; 1060 struct ufid *ufhp; 1061 struct vnode *nvp; 1062 struct ext2_sb_info *fs; 1063 int error; 1064 1065 ufhp = (struct ufid *)fhp; 1066 fs = VFSTOEXT2(mp)->um_e2fs; 1067 if (ufhp->ufid_ino < ROOTINO || 1068 ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group) 1069 return (ESTALE); 1070 1071 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1072 if (error) { 1073 *vpp = NULLVP; 1074 return (error); 1075 } 1076 ip = VTOI(nvp); 1077 if (ip->i_mode == 0 || 1078 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1079 vput(nvp); 1080 *vpp = NULLVP; 1081 return (ESTALE); 1082 } 1083 *vpp = nvp; 1084 vnode_create_vobject(*vpp, 0, curthread); 1085 return (0); 1086} 1087 1088/* 1089 * Vnode pointer to File handle 1090 */ 1091/* ARGSUSED */ 1092static int 1093ext2_vptofh(vp, fhp) 1094 struct vnode *vp; 1095 struct fid *fhp; 1096{ 1097 struct inode *ip; 1098 struct ufid *ufhp; 1099 1100 ip = VTOI(vp); 1101 ufhp = (struct ufid *)fhp; 1102 ufhp->ufid_len = sizeof(struct ufid); 1103 ufhp->ufid_ino = ip->i_number; 1104 ufhp->ufid_gen = ip->i_gen; 1105 return (0); 1106} 1107 1108/* 1109 * Write a superblock and associated information back to disk. 1110 */ 1111static int 1112ext2_sbupdate(mp, waitfor) 1113 struct ext2mount *mp; 1114 int waitfor; 1115{ 1116 struct ext2_sb_info *fs = mp->um_e2fs; 1117 struct ext2_super_block *es = fs->s_es; 1118 struct buf *bp; 1119 int error = 0; 1120/* 1121printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1122*/ 1123 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1124 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1125 if (waitfor == MNT_WAIT) 1126 error = bwrite(bp); 1127 else 1128 bawrite(bp); 1129 1130 /* 1131 * The buffers for group descriptors, inode bitmaps and block bitmaps 1132 * are not busy at this point and are (hopefully) written by the 1133 * usual sync mechanism. No need to write them here 1134 */ 1135 1136 return (error); 1137} 1138 1139/* 1140 * Return the root of a filesystem. 1141 */ 1142static int 1143ext2_root(mp, flags, vpp, td) 1144 struct mount *mp; 1145 int flags; 1146 struct vnode **vpp; 1147 struct thread *td; 1148{ 1149 struct vnode *nvp; 1150 int error; 1151 1152 error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); 1153 if (error) 1154 return (error); 1155 *vpp = nvp; 1156 return (0); 1157} 1158