ext2_vfsops.c revision 187397
1/*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/*- 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 36 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 187397 2009-01-18 15:10:46Z stas $ 37 */ 38 39/*- 40 * COPYRIGHT.INFO says this has some GPL'd code from ext2_super.c in it 41 * 42 * This program is free software; you can redistribute it and/or modify 43 * it under the terms of the GNU General Public License as published by 44 * the Free Software Foundation; either version 2 of the License. 45 * 46 * This program is distributed in the hope that it will be useful, 47 * but WITHOUT ANY WARRANTY; without even the implied warranty of 48 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 49 * GNU General Public License for more details. 50 * 51 * You should have received a copy of the GNU General Public License 52 * along with this program; if not, write to the Free Software 53 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 54 * 55 */ 56 57#include <sys/param.h> 58#include <sys/systm.h> 59#include <sys/namei.h> 60#include <sys/priv.h> 61#include <sys/proc.h> 62#include <sys/kernel.h> 63#include <sys/vnode.h> 64#include <sys/mount.h> 65#include <sys/bio.h> 66#include <sys/buf.h> 67#include <sys/conf.h> 68#include <sys/fcntl.h> 69#include <sys/malloc.h> 70#include <sys/stat.h> 71#include <sys/mutex.h> 72 73#include <geom/geom.h> 74#include <geom/geom_vfs.h> 75 76#include <gnu/fs/ext2fs/ext2_mount.h> 77#include <gnu/fs/ext2fs/inode.h> 78 79#include <gnu/fs/ext2fs/fs.h> 80#include <gnu/fs/ext2fs/ext2_extern.h> 81#include <gnu/fs/ext2fs/ext2_fs.h> 82#include <gnu/fs/ext2fs/ext2_fs_sb.h> 83 84static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 85static int ext2_mountfs(struct vnode *, struct mount *); 86static int ext2_reload(struct mount *mp, struct thread *td); 87static int ext2_sbupdate(struct ext2mount *, int); 88 89static vfs_unmount_t ext2_unmount; 90static vfs_root_t ext2_root; 91static vfs_statfs_t ext2_statfs; 92static vfs_sync_t ext2_sync; 93static vfs_vget_t ext2_vget; 94static vfs_fhtovp_t ext2_fhtovp; 95static vfs_mount_t ext2_mount; 96 97MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 98static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 99 100static struct vfsops ext2fs_vfsops = { 101 .vfs_fhtovp = ext2_fhtovp, 102 .vfs_mount = ext2_mount, 103 .vfs_root = ext2_root, /* root inode via vget */ 104 .vfs_statfs = ext2_statfs, 105 .vfs_sync = ext2_sync, 106 .vfs_unmount = ext2_unmount, 107 .vfs_vget = ext2_vget, 108}; 109 110VFS_SET(ext2fs_vfsops, ext2fs, 0); 111 112#define bsd_malloc malloc 113#define bsd_free free 114 115static int ext2_check_sb_compat(struct ext2_super_block *es, struct cdev *dev, 116 int ronly); 117static int compute_sb_data(struct vnode * devvp, 118 struct ext2_super_block * es, struct ext2_sb_info * fs); 119 120static const char *ext2_opts[] = { "from", "export", "acls", "noexec", 121 "noatime", "union", "suiddir", "multilabel", "nosymfollow", 122 "noclusterr", "noclusterw", "force", NULL }; 123 124/* 125 * VFS Operations. 126 * 127 * mount system call 128 */ 129static int 130ext2_mount(mp, td) 131 struct mount *mp; 132 struct thread *td; 133{ 134 struct vfsoptlist *opts; 135 struct vnode *devvp; 136 struct ext2mount *ump = 0; 137 struct ext2_sb_info *fs; 138 char *path, *fspec; 139 int error, flags, len; 140 accmode_t accmode; 141 struct nameidata nd, *ndp = &nd; 142 143 opts = mp->mnt_optnew; 144 145 if (vfs_filteropt(opts, ext2_opts)) 146 return (EINVAL); 147 148 vfs_getopt(opts, "fspath", (void **)&path, NULL); 149 /* Double-check the length of path.. */ 150 if (strlen(path) >= MAXMNTLEN - 1) 151 return (ENAMETOOLONG); 152 153 fspec = NULL; 154 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 155 if (!error && fspec[len - 1] != '\0') 156 return (EINVAL); 157 158 /* 159 * If updating, check whether changing from read-only to 160 * read/write; if there is no device name, that's all we do. 161 */ 162 if (mp->mnt_flag & MNT_UPDATE) { 163 ump = VFSTOEXT2(mp); 164 fs = ump->um_e2fs; 165 error = 0; 166 if (fs->s_rd_only == 0 && 167 vfs_flagopt(opts, "ro", NULL, 0)) { 168 error = VFS_SYNC(mp, MNT_WAIT, td); 169 if (error) 170 return (error); 171 flags = WRITECLOSE; 172 if (mp->mnt_flag & MNT_FORCE) 173 flags |= FORCECLOSE; 174 if (vfs_busy(mp, MBF_NOWAIT)) 175 return (EBUSY); 176 error = ext2_flushfiles(mp, flags, td); 177 vfs_unbusy(mp); 178 if (!error && fs->s_wasvalid) { 179 fs->s_es->s_state |= EXT2_VALID_FS; 180 ext2_sbupdate(ump, MNT_WAIT); 181 } 182 fs->s_rd_only = 1; 183 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 184 DROP_GIANT(); 185 g_topology_lock(); 186 g_access(ump->um_cp, 0, -1, 0); 187 g_topology_unlock(); 188 PICKUP_GIANT(); 189 } 190 if (!error && (mp->mnt_flag & MNT_RELOAD)) 191 error = ext2_reload(mp, td); 192 if (error) 193 return (error); 194 devvp = ump->um_devvp; 195 if (fs->s_rd_only && !vfs_flagopt(opts, "ro", NULL, 0)) { 196 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 0)) 197 return (EPERM); 198 /* 199 * If upgrade to read-write by non-root, then verify 200 * that user has necessary permissions on the device. 201 */ 202 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 203 error = VOP_ACCESS(devvp, VREAD | VWRITE, 204 td->td_ucred, td); 205 if (error) 206 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 207 if (error) { 208 VOP_UNLOCK(devvp, 0); 209 return (error); 210 } 211 VOP_UNLOCK(devvp, 0); 212 DROP_GIANT(); 213 g_topology_lock(); 214 error = g_access(ump->um_cp, 0, 1, 0); 215 g_topology_unlock(); 216 PICKUP_GIANT(); 217 if (error) 218 return (error); 219 220 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 221 (fs->s_es->s_state & EXT2_ERROR_FS)) { 222 if (mp->mnt_flag & MNT_FORCE) { 223 printf( 224"WARNING: %s was not properly dismounted\n", 225 fs->fs_fsmnt); 226 } else { 227 printf( 228"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 229 fs->fs_fsmnt); 230 return (EPERM); 231 } 232 } 233 fs->s_es->s_state &= ~EXT2_VALID_FS; 234 ext2_sbupdate(ump, MNT_WAIT); 235 fs->s_rd_only = 0; 236 MNT_ILOCK(mp); 237 mp->mnt_flag &= ~MNT_RDONLY; 238 MNT_IUNLOCK(mp); 239 } 240 if (vfs_flagopt(opts, "export", NULL, 0)) { 241 /* Process export requests in vfs_mount.c. */ 242 return (error); 243 } 244 } 245 /* 246 * Not an update, or updating the name: look up the name 247 * and verify that it refers to a sensible disk device. 248 */ 249 if (fspec == NULL) 250 return (EINVAL); 251 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); 252 if ((error = namei(ndp)) != 0) 253 return (error); 254 NDFREE(ndp, NDF_ONLY_PNBUF); 255 devvp = ndp->ni_vp; 256 257 if (!vn_isdisk(devvp, &error)) { 258 vput(devvp); 259 return (error); 260 } 261 262 /* 263 * If mount by non-root, then verify that user has necessary 264 * permissions on the device. 265 * 266 * XXXRW: VOP_ACCESS() enough? 267 */ 268 accmode = VREAD; 269 if ((mp->mnt_flag & MNT_RDONLY) == 0) 270 accmode |= VWRITE; 271 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); 272 if (error) 273 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 274 if (error) { 275 vput(devvp); 276 return (error); 277 } 278 279 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 280 error = ext2_mountfs(devvp, mp); 281 } else { 282 if (devvp != ump->um_devvp) { 283 vput(devvp); 284 return (EINVAL); /* needs translation */ 285 } else 286 vput(devvp); 287 } 288 if (error) { 289 vrele(devvp); 290 return (error); 291 } 292 ump = VFSTOEXT2(mp); 293 fs = ump->um_e2fs; 294 /* 295 * Note that this strncpy() is ok because of a check at the start 296 * of ext2_mount(). 297 */ 298 strncpy(fs->fs_fsmnt, path, MAXMNTLEN); 299 fs->fs_fsmnt[MAXMNTLEN - 1] = '\0'; 300 vfs_mountedfrom(mp, fspec); 301 return (0); 302} 303 304/* 305 * checks that the data in the descriptor blocks make sense 306 * this is taken from ext2/super.c 307 */ 308static int ext2_check_descriptors (struct ext2_sb_info * sb) 309{ 310 int i; 311 int desc_block = 0; 312 unsigned long block = sb->s_es->s_first_data_block; 313 struct ext2_group_desc * gdp = NULL; 314 315 /* ext2_debug ("Checking group descriptors"); */ 316 317 for (i = 0; i < sb->s_groups_count; i++) 318 { 319 /* examine next descriptor block */ 320 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 321 gdp = (struct ext2_group_desc *) 322 sb->s_group_desc[desc_block++]->b_data; 323 if (gdp->bg_block_bitmap < block || 324 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 325 { 326 printf ("ext2_check_descriptors: " 327 "Block bitmap for group %d" 328 " not in group (block %lu)!\n", 329 i, (unsigned long) gdp->bg_block_bitmap); 330 return 0; 331 } 332 if (gdp->bg_inode_bitmap < block || 333 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 334 { 335 printf ("ext2_check_descriptors: " 336 "Inode bitmap for group %d" 337 " not in group (block %lu)!\n", 338 i, (unsigned long) gdp->bg_inode_bitmap); 339 return 0; 340 } 341 if (gdp->bg_inode_table < block || 342 gdp->bg_inode_table + sb->s_itb_per_group >= 343 block + EXT2_BLOCKS_PER_GROUP(sb)) 344 { 345 printf ("ext2_check_descriptors: " 346 "Inode table for group %d" 347 " not in group (block %lu)!\n", 348 i, (unsigned long) gdp->bg_inode_table); 349 return 0; 350 } 351 block += EXT2_BLOCKS_PER_GROUP(sb); 352 gdp++; 353 } 354 return 1; 355} 356 357static int 358ext2_check_sb_compat(es, dev, ronly) 359 struct ext2_super_block *es; 360 struct cdev *dev; 361 int ronly; 362{ 363 364 if (es->s_magic != EXT2_SUPER_MAGIC) { 365 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 366 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 367 return (1); 368 } 369 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 370 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 371 printf( 372"WARNING: mount of %s denied due to unsupported optional features\n", 373 devtoname(dev)); 374 return (1); 375 } 376 if (!ronly && 377 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 378 printf( 379"WARNING: R/W mount of %s denied due to unsupported optional features\n", 380 devtoname(dev)); 381 return (1); 382 } 383 } 384 return (0); 385} 386 387/* 388 * this computes the fields of the ext2_sb_info structure from the 389 * data in the ext2_super_block structure read in 390 */ 391static int compute_sb_data(devvp, es, fs) 392 struct vnode * devvp; 393 struct ext2_super_block * es; 394 struct ext2_sb_info * fs; 395{ 396 int db_count, error; 397 int i, j; 398 int logic_sb_block = 1; /* XXX for now */ 399 400#if 1 401#define V(v) 402#else 403#define V(v) printf(#v"= %lu\n", (unsigned long)fs->v); 404#endif 405 406 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 407 V(s_blocksize) 408 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 409 V(s_bshift) 410 fs->s_fsbtodb = es->s_log_block_size + 1; 411 V(s_fsbtodb) 412 fs->s_qbmask = fs->s_blocksize - 1; 413 V(s_qbmask) 414 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 415 V(s_blocksize_bits) 416 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 417 V(s_frag_size) 418 if (fs->s_frag_size) 419 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 420 V(s_frags_per_block) 421 fs->s_blocks_per_group = es->s_blocks_per_group; 422 V(s_blocks_per_group) 423 fs->s_frags_per_group = es->s_frags_per_group; 424 V(s_frags_per_group) 425 fs->s_inodes_per_group = es->s_inodes_per_group; 426 V(s_inodes_per_group) 427 fs->s_inode_size = es->s_inode_size; 428 V(s_inode_size) 429 fs->s_first_inode = es->s_first_ino; 430 V(s_first_inode); 431 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE(fs); 432 V(s_inodes_per_block) 433 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 434 V(s_itb_per_group) 435 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 436 V(s_desc_per_block) 437 /* s_resuid / s_resgid ? */ 438 fs->s_groups_count = (es->s_blocks_count - 439 es->s_first_data_block + 440 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 441 EXT2_BLOCKS_PER_GROUP(fs); 442 V(s_groups_count) 443 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 444 EXT2_DESC_PER_BLOCK(fs); 445 fs->s_db_per_group = db_count; 446 V(s_db_per_group) 447 448 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 449 M_EXT2MNT, M_WAITOK); 450 451 /* adjust logic_sb_block */ 452 if(fs->s_blocksize > SBSIZE) 453 /* Godmar thinks: if the blocksize is greater than 1024, then 454 the superblock is logically part of block zero. 455 */ 456 logic_sb_block = 0; 457 458 for (i = 0; i < db_count; i++) { 459 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 460 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 461 if(error) { 462 for (j = 0; j < i; j++) 463 brelse(fs->s_group_desc[j]); 464 bsd_free(fs->s_group_desc, M_EXT2MNT); 465 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 466 return EIO; 467 } 468 LCK_BUF(fs->s_group_desc[i]) 469 } 470 if(!ext2_check_descriptors(fs)) { 471 for (j = 0; j < db_count; j++) 472 ULCK_BUF(fs->s_group_desc[j]) 473 bsd_free(fs->s_group_desc, M_EXT2MNT); 474 printf("EXT2-fs: (ext2_check_descriptors failure) " 475 "unable to read group descriptors\n"); 476 return EIO; 477 } 478 479 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 480 fs->s_inode_bitmap_number[i] = 0; 481 fs->s_inode_bitmap[i] = NULL; 482 fs->s_block_bitmap_number[i] = 0; 483 fs->s_block_bitmap[i] = NULL; 484 } 485 fs->s_loaded_inode_bitmaps = 0; 486 fs->s_loaded_block_bitmaps = 0; 487 if (es->s_rev_level == EXT2_GOOD_OLD_REV || (es->s_feature_ro_compat & 488 EXT2_FEATURE_RO_COMPAT_LARGE_FILE) == 0) 489 fs->fs_maxfilesize = 0x7fffffff; 490 else 491 fs->fs_maxfilesize = 0x7fffffffffffffff; 492 return 0; 493} 494 495/* 496 * Reload all incore data for a filesystem (used after running fsck on 497 * the root filesystem and finding things to fix). The filesystem must 498 * be mounted read-only. 499 * 500 * Things to do to update the mount: 501 * 1) invalidate all cached meta-data. 502 * 2) re-read superblock from disk. 503 * 3) re-read summary information from disk. 504 * 4) invalidate all inactive vnodes. 505 * 5) invalidate all cached file data. 506 * 6) re-read inode data for all active vnodes. 507 */ 508static int 509ext2_reload(struct mount *mp, struct thread *td) 510{ 511 struct vnode *vp, *mvp, *devvp; 512 struct inode *ip; 513 struct buf *bp; 514 struct ext2_super_block * es; 515 struct ext2_sb_info *fs; 516 int error; 517 518 if ((mp->mnt_flag & MNT_RDONLY) == 0) 519 return (EINVAL); 520 /* 521 * Step 1: invalidate all cached meta-data. 522 */ 523 devvp = VFSTOEXT2(mp)->um_devvp; 524 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 525 if (vinvalbuf(devvp, 0, 0, 0) != 0) 526 panic("ext2_reload: dirty1"); 527 VOP_UNLOCK(devvp, 0); 528 529 /* 530 * Step 2: re-read superblock from disk. 531 * constants have been adjusted for ext2 532 */ 533 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 534 return (error); 535 es = (struct ext2_super_block *)bp->b_data; 536 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 537 brelse(bp); 538 return (EIO); /* XXX needs translation */ 539 } 540 fs = VFSTOEXT2(mp)->um_e2fs; 541 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 542 543 if((error = compute_sb_data(devvp, es, fs)) != 0) { 544 brelse(bp); 545 return error; 546 } 547#ifdef UNKLAR 548 if (fs->fs_sbsize < SBSIZE) 549 bp->b_flags |= B_INVAL; 550#endif 551 brelse(bp); 552 553loop: 554 MNT_ILOCK(mp); 555 MNT_VNODE_FOREACH(vp, mp, mvp) { 556 VI_LOCK(vp); 557 if (vp->v_iflag & VI_DOOMED) { 558 VI_UNLOCK(vp); 559 continue; 560 } 561 MNT_IUNLOCK(mp); 562 /* 563 * Step 4: invalidate all cached file data. 564 */ 565 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 566 MNT_VNODE_FOREACH_ABORT(mp, mvp); 567 goto loop; 568 } 569 if (vinvalbuf(vp, 0, 0, 0)) 570 panic("ext2_reload: dirty2"); 571 /* 572 * Step 5: re-read inode data for all active vnodes. 573 */ 574 ip = VTOI(vp); 575 error = 576 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 577 (int)fs->s_blocksize, NOCRED, &bp); 578 if (error) { 579 VOP_UNLOCK(vp, 0); 580 vrele(vp); 581 MNT_VNODE_FOREACH_ABORT(mp, mvp); 582 return (error); 583 } 584 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + 585 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip); 586 brelse(bp); 587 VOP_UNLOCK(vp, 0); 588 vrele(vp); 589 MNT_ILOCK(mp); 590 } 591 MNT_IUNLOCK(mp); 592 return (0); 593} 594 595/* 596 * Common code for mount and mountroot 597 */ 598static int 599ext2_mountfs(devvp, mp) 600 struct vnode *devvp; 601 struct mount *mp; 602{ 603 struct ext2mount *ump; 604 struct buf *bp; 605 struct ext2_sb_info *fs; 606 struct ext2_super_block * es; 607 struct cdev *dev = devvp->v_rdev; 608 struct g_consumer *cp; 609 struct bufobj *bo; 610 int error; 611 int ronly; 612 613 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 614 /* XXX: use VOP_ACESS to check FS perms */ 615 DROP_GIANT(); 616 g_topology_lock(); 617 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 618 g_topology_unlock(); 619 PICKUP_GIANT(); 620 VOP_UNLOCK(devvp, 0); 621 if (error) 622 return (error); 623 624 /* XXX: should we check for some sectorsize or 512 instead? */ 625 if (((SBSIZE % cp->provider->sectorsize) != 0) || 626 (SBSIZE < cp->provider->sectorsize)) { 627 DROP_GIANT(); 628 g_topology_lock(); 629 g_vfs_close(cp); 630 g_topology_unlock(); 631 PICKUP_GIANT(); 632 return (EINVAL); 633 } 634 635 bo = &devvp->v_bufobj; 636 bo->bo_private = cp; 637 bo->bo_ops = g_vfs_bufops; 638 if (devvp->v_rdev->si_iosize_max != 0) 639 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 640 if (mp->mnt_iosize_max > MAXPHYS) 641 mp->mnt_iosize_max = MAXPHYS; 642 643 bp = NULL; 644 ump = NULL; 645 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 646 goto out; 647 es = (struct ext2_super_block *)bp->b_data; 648 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 649 error = EINVAL; /* XXX needs translation */ 650 goto out; 651 } 652 if ((es->s_state & EXT2_VALID_FS) == 0 || 653 (es->s_state & EXT2_ERROR_FS)) { 654 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 655 printf( 656"WARNING: Filesystem was not properly dismounted\n"); 657 } else { 658 printf( 659"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 660 error = EPERM; 661 goto out; 662 } 663 } 664 ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); 665 bzero((caddr_t)ump, sizeof *ump); 666 /* I don't know whether this is the right strategy. Note that 667 we dynamically allocate both an ext2_sb_info and an ext2_super_block 668 while Linux keeps the super block in a locked buffer 669 */ 670 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 671 M_EXT2MNT, M_WAITOK); 672 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 673 M_EXT2MNT, M_WAITOK); 674 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 675 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 676 goto out; 677 /* 678 * We don't free the group descriptors allocated by compute_sb_data() 679 * until ext2_unmount(). This is OK since the mount will succeed. 680 */ 681 brelse(bp); 682 bp = NULL; 683 fs = ump->um_e2fs; 684 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 685 /* if the fs is not mounted read-only, make sure the super block is 686 always written back on a sync() 687 */ 688 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 689 if (ronly == 0) { 690 fs->s_dirt = 1; /* mark it modified */ 691 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 692 } 693 mp->mnt_data = ump; 694 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 695 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 696 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 697 MNT_ILOCK(mp); 698 mp->mnt_flag |= MNT_LOCAL; 699 MNT_IUNLOCK(mp); 700 ump->um_mountp = mp; 701 ump->um_dev = dev; 702 ump->um_devvp = devvp; 703 ump->um_bo = &devvp->v_bufobj; 704 ump->um_cp = cp; 705 /* setting those two parameters allowed us to use 706 ufs_bmap w/o changse ! 707 */ 708 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 709 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 710 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 711 if (ronly == 0) 712 ext2_sbupdate(ump, MNT_WAIT); 713 return (0); 714out: 715 if (bp) 716 brelse(bp); 717 if (cp != NULL) { 718 DROP_GIANT(); 719 g_topology_lock(); 720 g_vfs_close(cp); 721 g_topology_unlock(); 722 PICKUP_GIANT(); 723 } 724 if (ump) { 725 bsd_free(ump->um_e2fs->s_es, M_EXT2MNT); 726 bsd_free(ump->um_e2fs, M_EXT2MNT); 727 bsd_free(ump, M_EXT2MNT); 728 mp->mnt_data = NULL; 729 } 730 return (error); 731} 732 733/* 734 * unmount system call 735 */ 736static int 737ext2_unmount(mp, mntflags, td) 738 struct mount *mp; 739 int mntflags; 740 struct thread *td; 741{ 742 struct ext2mount *ump; 743 struct ext2_sb_info *fs; 744 int error, flags, ronly, i; 745 746 flags = 0; 747 if (mntflags & MNT_FORCE) { 748 if (mp->mnt_flag & MNT_ROOTFS) 749 return (EINVAL); 750 flags |= FORCECLOSE; 751 } 752 if ((error = ext2_flushfiles(mp, flags, td)) != 0) 753 return (error); 754 ump = VFSTOEXT2(mp); 755 fs = ump->um_e2fs; 756 ronly = fs->s_rd_only; 757 if (ronly == 0) { 758 if (fs->s_wasvalid) 759 fs->s_es->s_state |= EXT2_VALID_FS; 760 ext2_sbupdate(ump, MNT_WAIT); 761 } 762 763 /* release buffers containing group descriptors */ 764 for(i = 0; i < fs->s_db_per_group; i++) 765 ULCK_BUF(fs->s_group_desc[i]) 766 bsd_free(fs->s_group_desc, M_EXT2MNT); 767 768 /* release cached inode/block bitmaps */ 769 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 770 if (fs->s_inode_bitmap[i]) 771 ULCK_BUF(fs->s_inode_bitmap[i]) 772 773 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 774 if (fs->s_block_bitmap[i]) 775 ULCK_BUF(fs->s_block_bitmap[i]) 776 777 DROP_GIANT(); 778 g_topology_lock(); 779 g_vfs_close(ump->um_cp); 780 g_topology_unlock(); 781 PICKUP_GIANT(); 782 vrele(ump->um_devvp); 783 bsd_free(fs->s_es, M_EXT2MNT); 784 bsd_free(fs, M_EXT2MNT); 785 bsd_free(ump, M_EXT2MNT); 786 mp->mnt_data = NULL; 787 MNT_ILOCK(mp); 788 mp->mnt_flag &= ~MNT_LOCAL; 789 MNT_IUNLOCK(mp); 790 return (error); 791} 792 793/* 794 * Flush out all the files in a filesystem. 795 */ 796static int 797ext2_flushfiles(mp, flags, td) 798 struct mount *mp; 799 int flags; 800 struct thread *td; 801{ 802 int error; 803 804 error = vflush(mp, 0, flags, td); 805 return (error); 806} 807 808/* 809 * Get file system statistics. 810 * taken from ext2/super.c ext2_statfs 811 */ 812static int 813ext2_statfs(mp, sbp, td) 814 struct mount *mp; 815 struct statfs *sbp; 816 struct thread *td; 817{ 818 unsigned long overhead; 819 struct ext2mount *ump; 820 struct ext2_sb_info *fs; 821 struct ext2_super_block *es; 822 int i, nsb; 823 824 ump = VFSTOEXT2(mp); 825 fs = ump->um_e2fs; 826 es = fs->s_es; 827 828 if (es->s_magic != EXT2_SUPER_MAGIC) 829 panic("ext2_statfs - magic number spoiled"); 830 831 /* 832 * Compute the overhead (FS structures) 833 */ 834 if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { 835 nsb = 0; 836 for (i = 0 ; i < fs->s_groups_count; i++) 837 if (ext2_group_sparse(i)) 838 nsb++; 839 } else 840 nsb = fs->s_groups_count; 841 overhead = es->s_first_data_block + 842 /* Superblocks and block group descriptors: */ 843 nsb * (1 + fs->s_db_per_group) + 844 /* Inode bitmap, block bitmap, and inode table: */ 845 fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); 846 847 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 848 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 849 sbp->f_blocks = es->s_blocks_count - overhead; 850 sbp->f_bfree = es->s_free_blocks_count; 851 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 852 sbp->f_files = es->s_inodes_count; 853 sbp->f_ffree = es->s_free_inodes_count; 854 return (0); 855} 856 857/* 858 * Go through the disk queues to initiate sandbagged IO; 859 * go through the inodes to write those that have been modified; 860 * initiate the writing of the super block if it has been modified. 861 * 862 * Note: we are always called with the filesystem marked `MPBUSY'. 863 */ 864static int 865ext2_sync(mp, waitfor, td) 866 struct mount *mp; 867 int waitfor; 868 struct thread *td; 869{ 870 struct vnode *mvp, *vp; 871 struct inode *ip; 872 struct ext2mount *ump = VFSTOEXT2(mp); 873 struct ext2_sb_info *fs; 874 int error, allerror = 0; 875 876 fs = ump->um_e2fs; 877 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 878 printf("fs = %s\n", fs->fs_fsmnt); 879 panic("ext2_sync: rofs mod"); 880 } 881 /* 882 * Write back each (modified) inode. 883 */ 884 MNT_ILOCK(mp); 885loop: 886 MNT_VNODE_FOREACH(vp, mp, mvp) { 887 VI_LOCK(vp); 888 if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED)) { 889 VI_UNLOCK(vp); 890 continue; 891 } 892 MNT_IUNLOCK(mp); 893 ip = VTOI(vp); 894 if ((ip->i_flag & 895 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 896 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 897 waitfor == MNT_LAZY)) { 898 VI_UNLOCK(vp); 899 MNT_ILOCK(mp); 900 continue; 901 } 902 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 903 if (error) { 904 MNT_ILOCK(mp); 905 if (error == ENOENT) { 906 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 907 goto loop; 908 } 909 continue; 910 } 911 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) 912 allerror = error; 913 VOP_UNLOCK(vp, 0); 914 vrele(vp); 915 MNT_ILOCK(mp); 916 } 917 MNT_IUNLOCK(mp); 918 /* 919 * Force stale file system control information to be flushed. 920 */ 921 if (waitfor != MNT_LAZY) { 922 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 923 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) 924 allerror = error; 925 VOP_UNLOCK(ump->um_devvp, 0); 926 } 927 /* 928 * Write back modified superblock. 929 */ 930 if (fs->s_dirt != 0) { 931 fs->s_dirt = 0; 932 fs->s_es->s_wtime = time_second; 933 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 934 allerror = error; 935 } 936 return (allerror); 937} 938 939/* 940 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 941 * in from disk. If it is in core, wait for the lock bit to clear, then 942 * return the inode locked. Detection and handling of mount points must be 943 * done by the calling routine. 944 */ 945static int 946ext2_vget(mp, ino, flags, vpp) 947 struct mount *mp; 948 ino_t ino; 949 int flags; 950 struct vnode **vpp; 951{ 952 struct ext2_sb_info *fs; 953 struct inode *ip; 954 struct ext2mount *ump; 955 struct buf *bp; 956 struct vnode *vp; 957 struct cdev *dev; 958 int i, error; 959 int used_blocks; 960 struct thread *td; 961 962 td = curthread; 963 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); 964 if (error || *vpp != NULL) 965 return (error); 966 967 ump = VFSTOEXT2(mp); 968 dev = ump->um_dev; 969 970 /* 971 * If this malloc() is performed after the getnewvnode() 972 * it might block, leaving a vnode with a NULL v_data to be 973 * found by ext2_sync() if a sync happens to fire right then, 974 * which will cause a panic because ext2_sync() blindly 975 * dereferences vp->v_data (as well it should). 976 */ 977 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 978 979 /* Allocate a new vnode/inode. */ 980 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 981 *vpp = NULL; 982 free(ip, M_EXT2NODE); 983 return (error); 984 } 985 vp->v_data = ip; 986 ip->i_vnode = vp; 987 ip->i_e2fs = fs = ump->um_e2fs; 988 ip->i_number = ino; 989 990 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 991 error = insmntque(vp, mp); 992 if (error != 0) { 993 free(ip, M_EXT2NODE); 994 *vpp = NULL; 995 return (error); 996 } 997 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); 998 if (error || *vpp != NULL) 999 return (error); 1000 1001 /* Read in the disk contents for the inode, copy into the inode. */ 1002#if 0 1003printf("ext2_vget(%d) dbn= %lu ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 1004#endif 1005 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1006 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 1007 /* 1008 * The inode does not contain anything useful, so it would 1009 * be misleading to leave it on its hash chain. With mode 1010 * still zero, it will be unlinked and returned to the free 1011 * list by vput(). 1012 */ 1013 vput(vp); 1014 brelse(bp); 1015 *vpp = NULL; 1016 return (error); 1017 } 1018 /* convert ext2 inode to dinode */ 1019 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) * 1020 ino_to_fsbo(fs, ino)), ip); 1021 ip->i_block_group = ino_to_cg(fs, ino); 1022 ip->i_next_alloc_block = 0; 1023 ip->i_next_alloc_goal = 0; 1024 ip->i_prealloc_count = 0; 1025 ip->i_prealloc_block = 0; 1026 /* now we want to make sure that block pointers for unused 1027 blocks are zeroed out - ext2_balloc depends on this 1028 although for regular files and directories only 1029 */ 1030 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1031 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1032 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1033 ip->i_db[i] = 0; 1034 } 1035/* 1036 ext2_print_inode(ip); 1037*/ 1038 brelse(bp); 1039 1040 /* 1041 * Initialize the vnode from the inode, check for aliases. 1042 * Note that the underlying vnode may have changed. 1043 */ 1044 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1045 vput(vp); 1046 *vpp = NULL; 1047 return (error); 1048 } 1049 /* 1050 * Finish inode initialization now that aliasing has been resolved. 1051 */ 1052 ip->i_devvp = ump->um_devvp; 1053 /* 1054 * Set up a generation number for this inode if it does not 1055 * already have one. This should only happen on old filesystems. 1056 */ 1057 if (ip->i_gen == 0) { 1058 ip->i_gen = random() / 2 + 1; 1059 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1060 ip->i_flag |= IN_MODIFIED; 1061 } 1062 *vpp = vp; 1063 return (0); 1064} 1065 1066/* 1067 * File handle to vnode 1068 * 1069 * Have to be really careful about stale file handles: 1070 * - check that the inode number is valid 1071 * - call ext2_vget() to get the locked inode 1072 * - check for an unallocated inode (i_mode == 0) 1073 * - check that the given client host has export rights and return 1074 * those rights via. exflagsp and credanonp 1075 */ 1076static int 1077ext2_fhtovp(mp, fhp, vpp) 1078 struct mount *mp; 1079 struct fid *fhp; 1080 struct vnode **vpp; 1081{ 1082 struct inode *ip; 1083 struct ufid *ufhp; 1084 struct vnode *nvp; 1085 struct ext2_sb_info *fs; 1086 int error; 1087 1088 ufhp = (struct ufid *)fhp; 1089 fs = VFSTOEXT2(mp)->um_e2fs; 1090 if (ufhp->ufid_ino < ROOTINO || 1091 ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group) 1092 return (ESTALE); 1093 1094 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1095 if (error) { 1096 *vpp = NULLVP; 1097 return (error); 1098 } 1099 ip = VTOI(nvp); 1100 if (ip->i_mode == 0 || 1101 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1102 vput(nvp); 1103 *vpp = NULLVP; 1104 return (ESTALE); 1105 } 1106 *vpp = nvp; 1107 vnode_create_vobject(*vpp, 0, curthread); 1108 return (0); 1109} 1110 1111/* 1112 * Write a superblock and associated information back to disk. 1113 */ 1114static int 1115ext2_sbupdate(mp, waitfor) 1116 struct ext2mount *mp; 1117 int waitfor; 1118{ 1119 struct ext2_sb_info *fs = mp->um_e2fs; 1120 struct ext2_super_block *es = fs->s_es; 1121 struct buf *bp; 1122 int error = 0; 1123/* 1124printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1125*/ 1126 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1127 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1128 if (waitfor == MNT_WAIT) 1129 error = bwrite(bp); 1130 else 1131 bawrite(bp); 1132 1133 /* 1134 * The buffers for group descriptors, inode bitmaps and block bitmaps 1135 * are not busy at this point and are (hopefully) written by the 1136 * usual sync mechanism. No need to write them here 1137 */ 1138 1139 return (error); 1140} 1141 1142/* 1143 * Return the root of a filesystem. 1144 */ 1145static int 1146ext2_root(mp, flags, vpp, td) 1147 struct mount *mp; 1148 int flags; 1149 struct vnode **vpp; 1150 struct thread *td; 1151{ 1152 struct vnode *nvp; 1153 int error; 1154 1155 error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); 1156 if (error) 1157 return (error); 1158 *vpp = nvp; 1159 return (0); 1160} 1161