ext2_vfsops.c revision 184554
1/*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/*- 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 36 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 184554 2008-11-02 10:15:42Z attilio $ 37 */ 38 39/*- 40 * COPYRIGHT.INFO says this has some GPL'd code from ext2_super.c in it 41 * 42 * This program is free software; you can redistribute it and/or modify 43 * it under the terms of the GNU General Public License as published by 44 * the Free Software Foundation; either version 2 of the License. 45 * 46 * This program is distributed in the hope that it will be useful, 47 * but WITHOUT ANY WARRANTY; without even the implied warranty of 48 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 49 * GNU General Public License for more details. 50 * 51 * You should have received a copy of the GNU General Public License 52 * along with this program; if not, write to the Free Software 53 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 54 * 55 */ 56 57#include <sys/param.h> 58#include <sys/systm.h> 59#include <sys/namei.h> 60#include <sys/priv.h> 61#include <sys/proc.h> 62#include <sys/kernel.h> 63#include <sys/vnode.h> 64#include <sys/mount.h> 65#include <sys/bio.h> 66#include <sys/buf.h> 67#include <sys/conf.h> 68#include <sys/fcntl.h> 69#include <sys/malloc.h> 70#include <sys/stat.h> 71#include <sys/mutex.h> 72 73#include <geom/geom.h> 74#include <geom/geom_vfs.h> 75 76#include <gnu/fs/ext2fs/ext2_mount.h> 77#include <gnu/fs/ext2fs/inode.h> 78 79#include <gnu/fs/ext2fs/fs.h> 80#include <gnu/fs/ext2fs/ext2_extern.h> 81#include <gnu/fs/ext2fs/ext2_fs.h> 82#include <gnu/fs/ext2fs/ext2_fs_sb.h> 83 84static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 85static int ext2_mountfs(struct vnode *, struct mount *); 86static int ext2_reload(struct mount *mp, struct thread *td); 87static int ext2_sbupdate(struct ext2mount *, int); 88 89static vfs_unmount_t ext2_unmount; 90static vfs_root_t ext2_root; 91static vfs_statfs_t ext2_statfs; 92static vfs_sync_t ext2_sync; 93static vfs_vget_t ext2_vget; 94static vfs_fhtovp_t ext2_fhtovp; 95static vfs_mount_t ext2_mount; 96 97MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 98static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 99 100static struct vfsops ext2fs_vfsops = { 101 .vfs_fhtovp = ext2_fhtovp, 102 .vfs_mount = ext2_mount, 103 .vfs_root = ext2_root, /* root inode via vget */ 104 .vfs_statfs = ext2_statfs, 105 .vfs_sync = ext2_sync, 106 .vfs_unmount = ext2_unmount, 107 .vfs_vget = ext2_vget, 108}; 109 110VFS_SET(ext2fs_vfsops, ext2fs, 0); 111 112#define bsd_malloc malloc 113#define bsd_free free 114 115static int ext2_check_sb_compat(struct ext2_super_block *es, struct cdev *dev, 116 int ronly); 117static int compute_sb_data(struct vnode * devvp, 118 struct ext2_super_block * es, struct ext2_sb_info * fs); 119 120static const char *ext2_opts[] = { "from", "export", "acls", "noexec", 121 "noatime", "union", "suiddir", "multilabel", "nosymfollow", 122 "noclusterr", "noclusterw", "force", NULL }; 123 124/* 125 * VFS Operations. 126 * 127 * mount system call 128 */ 129static int 130ext2_mount(mp, td) 131 struct mount *mp; 132 struct thread *td; 133{ 134 struct vfsoptlist *opts; 135 struct vnode *devvp; 136 struct ext2mount *ump = 0; 137 struct ext2_sb_info *fs; 138 char *path, *fspec; 139 int error, flags, len; 140 accmode_t accmode; 141 struct nameidata nd, *ndp = &nd; 142 143 opts = mp->mnt_optnew; 144 145 if (vfs_filteropt(opts, ext2_opts)) 146 return (EINVAL); 147 148 vfs_getopt(opts, "fspath", (void **)&path, NULL); 149 /* Double-check the length of path.. */ 150 if (strlen(path) >= MAXMNTLEN - 1) 151 return (ENAMETOOLONG); 152 153 fspec = NULL; 154 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 155 if (!error && fspec[len - 1] != '\0') 156 return (EINVAL); 157 158 /* 159 * If updating, check whether changing from read-only to 160 * read/write; if there is no device name, that's all we do. 161 */ 162 if (mp->mnt_flag & MNT_UPDATE) { 163 ump = VFSTOEXT2(mp); 164 fs = ump->um_e2fs; 165 error = 0; 166 if (fs->s_rd_only == 0 && 167 vfs_flagopt(opts, "ro", NULL, 0)) { 168 error = VFS_SYNC(mp, MNT_WAIT, td); 169 if (error) 170 return (error); 171 flags = WRITECLOSE; 172 if (mp->mnt_flag & MNT_FORCE) 173 flags |= FORCECLOSE; 174 if (vfs_busy(mp, MBF_NOWAIT)) 175 return (EBUSY); 176 error = ext2_flushfiles(mp, flags, td); 177 vfs_unbusy(mp); 178 if (!error && fs->s_wasvalid) { 179 fs->s_es->s_state |= EXT2_VALID_FS; 180 ext2_sbupdate(ump, MNT_WAIT); 181 } 182 fs->s_rd_only = 1; 183 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 184 DROP_GIANT(); 185 g_topology_lock(); 186 g_access(ump->um_cp, 0, -1, 0); 187 g_topology_unlock(); 188 PICKUP_GIANT(); 189 } 190 if (!error && (mp->mnt_flag & MNT_RELOAD)) 191 error = ext2_reload(mp, td); 192 if (error) 193 return (error); 194 devvp = ump->um_devvp; 195 if (fs->s_rd_only && !vfs_flagopt(opts, "ro", NULL, 0)) { 196 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 0)) 197 return (EPERM); 198 /* 199 * If upgrade to read-write by non-root, then verify 200 * that user has necessary permissions on the device. 201 */ 202 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 203 error = VOP_ACCESS(devvp, VREAD | VWRITE, 204 td->td_ucred, td); 205 if (error) 206 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 207 if (error) { 208 VOP_UNLOCK(devvp, 0); 209 return (error); 210 } 211 VOP_UNLOCK(devvp, 0); 212 DROP_GIANT(); 213 g_topology_lock(); 214 error = g_access(ump->um_cp, 0, 1, 0); 215 g_topology_unlock(); 216 PICKUP_GIANT(); 217 if (error) 218 return (error); 219 220 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 221 (fs->s_es->s_state & EXT2_ERROR_FS)) { 222 if (mp->mnt_flag & MNT_FORCE) { 223 printf( 224"WARNING: %s was not properly dismounted\n", 225 fs->fs_fsmnt); 226 } else { 227 printf( 228"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 229 fs->fs_fsmnt); 230 return (EPERM); 231 } 232 } 233 fs->s_es->s_state &= ~EXT2_VALID_FS; 234 ext2_sbupdate(ump, MNT_WAIT); 235 fs->s_rd_only = 0; 236 MNT_ILOCK(mp); 237 mp->mnt_flag &= ~MNT_RDONLY; 238 MNT_IUNLOCK(mp); 239 } 240 if (vfs_flagopt(opts, "export", NULL, 0)) { 241 /* Process export requests in vfs_mount.c. */ 242 return (error); 243 } 244 } 245 /* 246 * Not an update, or updating the name: look up the name 247 * and verify that it refers to a sensible disk device. 248 */ 249 if (fspec == NULL) 250 return (EINVAL); 251 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); 252 if ((error = namei(ndp)) != 0) 253 return (error); 254 NDFREE(ndp, NDF_ONLY_PNBUF); 255 devvp = ndp->ni_vp; 256 257 if (!vn_isdisk(devvp, &error)) { 258 vput(devvp); 259 return (error); 260 } 261 262 /* 263 * If mount by non-root, then verify that user has necessary 264 * permissions on the device. 265 * 266 * XXXRW: VOP_ACCESS() enough? 267 */ 268 accmode = VREAD; 269 if ((mp->mnt_flag & MNT_RDONLY) == 0) 270 accmode |= VWRITE; 271 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); 272 if (error) 273 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 274 if (error) { 275 vput(devvp); 276 return (error); 277 } 278 279 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 280 error = ext2_mountfs(devvp, mp); 281 } else { 282 if (devvp != ump->um_devvp) { 283 vput(devvp); 284 return (EINVAL); /* needs translation */ 285 } else 286 vput(devvp); 287 } 288 if (error) { 289 vrele(devvp); 290 return (error); 291 } 292 ump = VFSTOEXT2(mp); 293 fs = ump->um_e2fs; 294 /* 295 * Note that this strncpy() is ok because of a check at the start 296 * of ext2_mount(). 297 */ 298 strncpy(fs->fs_fsmnt, path, MAXMNTLEN); 299 fs->fs_fsmnt[MAXMNTLEN - 1] = '\0'; 300 vfs_mountedfrom(mp, fspec); 301 return (0); 302} 303 304/* 305 * checks that the data in the descriptor blocks make sense 306 * this is taken from ext2/super.c 307 */ 308static int ext2_check_descriptors (struct ext2_sb_info * sb) 309{ 310 int i; 311 int desc_block = 0; 312 unsigned long block = sb->s_es->s_first_data_block; 313 struct ext2_group_desc * gdp = NULL; 314 315 /* ext2_debug ("Checking group descriptors"); */ 316 317 for (i = 0; i < sb->s_groups_count; i++) 318 { 319 /* examine next descriptor block */ 320 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 321 gdp = (struct ext2_group_desc *) 322 sb->s_group_desc[desc_block++]->b_data; 323 if (gdp->bg_block_bitmap < block || 324 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 325 { 326 printf ("ext2_check_descriptors: " 327 "Block bitmap for group %d" 328 " not in group (block %lu)!\n", 329 i, (unsigned long) gdp->bg_block_bitmap); 330 return 0; 331 } 332 if (gdp->bg_inode_bitmap < block || 333 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 334 { 335 printf ("ext2_check_descriptors: " 336 "Inode bitmap for group %d" 337 " not in group (block %lu)!\n", 338 i, (unsigned long) gdp->bg_inode_bitmap); 339 return 0; 340 } 341 if (gdp->bg_inode_table < block || 342 gdp->bg_inode_table + sb->s_itb_per_group >= 343 block + EXT2_BLOCKS_PER_GROUP(sb)) 344 { 345 printf ("ext2_check_descriptors: " 346 "Inode table for group %d" 347 " not in group (block %lu)!\n", 348 i, (unsigned long) gdp->bg_inode_table); 349 return 0; 350 } 351 block += EXT2_BLOCKS_PER_GROUP(sb); 352 gdp++; 353 } 354 return 1; 355} 356 357static int 358ext2_check_sb_compat(es, dev, ronly) 359 struct ext2_super_block *es; 360 struct cdev *dev; 361 int ronly; 362{ 363 364 if (es->s_magic != EXT2_SUPER_MAGIC) { 365 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 366 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 367 return (1); 368 } 369 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 370 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 371 printf( 372"WARNING: mount of %s denied due to unsupported optional features\n", 373 devtoname(dev)); 374 return (1); 375 } 376 if (!ronly && 377 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 378 printf( 379"WARNING: R/W mount of %s denied due to unsupported optional features\n", 380 devtoname(dev)); 381 return (1); 382 } 383 } 384 return (0); 385} 386 387/* 388 * this computes the fields of the ext2_sb_info structure from the 389 * data in the ext2_super_block structure read in 390 */ 391static int compute_sb_data(devvp, es, fs) 392 struct vnode * devvp; 393 struct ext2_super_block * es; 394 struct ext2_sb_info * fs; 395{ 396 int db_count, error; 397 int i, j; 398 int logic_sb_block = 1; /* XXX for now */ 399 400#if 1 401#define V(v) 402#else 403#define V(v) printf(#v"= %d\n", fs->v); 404#endif 405 406 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 407 V(s_blocksize) 408 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 409 V(s_bshift) 410 fs->s_fsbtodb = es->s_log_block_size + 1; 411 V(s_fsbtodb) 412 fs->s_qbmask = fs->s_blocksize - 1; 413 V(s_bmask) 414 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 415 V(s_blocksize_bits) 416 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 417 V(s_frag_size) 418 if (fs->s_frag_size) 419 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 420 V(s_frags_per_block) 421 fs->s_blocks_per_group = es->s_blocks_per_group; 422 V(s_blocks_per_group) 423 fs->s_frags_per_group = es->s_frags_per_group; 424 V(s_frags_per_group) 425 fs->s_inodes_per_group = es->s_inodes_per_group; 426 V(s_inodes_per_group) 427 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 428 V(s_inodes_per_block) 429 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 430 V(s_itb_per_group) 431 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 432 V(s_desc_per_block) 433 /* s_resuid / s_resgid ? */ 434 fs->s_groups_count = (es->s_blocks_count - 435 es->s_first_data_block + 436 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 437 EXT2_BLOCKS_PER_GROUP(fs); 438 V(s_groups_count) 439 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 440 EXT2_DESC_PER_BLOCK(fs); 441 fs->s_db_per_group = db_count; 442 V(s_db_per_group) 443 444 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 445 M_EXT2MNT, M_WAITOK); 446 447 /* adjust logic_sb_block */ 448 if(fs->s_blocksize > SBSIZE) 449 /* Godmar thinks: if the blocksize is greater than 1024, then 450 the superblock is logically part of block zero. 451 */ 452 logic_sb_block = 0; 453 454 for (i = 0; i < db_count; i++) { 455 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 456 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 457 if(error) { 458 for (j = 0; j < i; j++) 459 brelse(fs->s_group_desc[j]); 460 bsd_free(fs->s_group_desc, M_EXT2MNT); 461 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 462 return EIO; 463 } 464 LCK_BUF(fs->s_group_desc[i]) 465 } 466 if(!ext2_check_descriptors(fs)) { 467 for (j = 0; j < db_count; j++) 468 ULCK_BUF(fs->s_group_desc[j]) 469 bsd_free(fs->s_group_desc, M_EXT2MNT); 470 printf("EXT2-fs: (ext2_check_descriptors failure) " 471 "unable to read group descriptors\n"); 472 return EIO; 473 } 474 475 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 476 fs->s_inode_bitmap_number[i] = 0; 477 fs->s_inode_bitmap[i] = NULL; 478 fs->s_block_bitmap_number[i] = 0; 479 fs->s_block_bitmap[i] = NULL; 480 } 481 fs->s_loaded_inode_bitmaps = 0; 482 fs->s_loaded_block_bitmaps = 0; 483 if (es->s_rev_level == EXT2_GOOD_OLD_REV || (es->s_feature_ro_compat & 484 EXT2_FEATURE_RO_COMPAT_LARGE_FILE) == 0) 485 fs->fs_maxfilesize = 0x7fffffff; 486 else 487 fs->fs_maxfilesize = 0x7fffffffffffffff; 488 return 0; 489} 490 491/* 492 * Reload all incore data for a filesystem (used after running fsck on 493 * the root filesystem and finding things to fix). The filesystem must 494 * be mounted read-only. 495 * 496 * Things to do to update the mount: 497 * 1) invalidate all cached meta-data. 498 * 2) re-read superblock from disk. 499 * 3) re-read summary information from disk. 500 * 4) invalidate all inactive vnodes. 501 * 5) invalidate all cached file data. 502 * 6) re-read inode data for all active vnodes. 503 */ 504static int 505ext2_reload(struct mount *mp, struct thread *td) 506{ 507 struct vnode *vp, *mvp, *devvp; 508 struct inode *ip; 509 struct buf *bp; 510 struct ext2_super_block * es; 511 struct ext2_sb_info *fs; 512 int error; 513 514 if ((mp->mnt_flag & MNT_RDONLY) == 0) 515 return (EINVAL); 516 /* 517 * Step 1: invalidate all cached meta-data. 518 */ 519 devvp = VFSTOEXT2(mp)->um_devvp; 520 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 521 if (vinvalbuf(devvp, 0, 0, 0) != 0) 522 panic("ext2_reload: dirty1"); 523 VOP_UNLOCK(devvp, 0); 524 525 /* 526 * Step 2: re-read superblock from disk. 527 * constants have been adjusted for ext2 528 */ 529 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 530 return (error); 531 es = (struct ext2_super_block *)bp->b_data; 532 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 533 brelse(bp); 534 return (EIO); /* XXX needs translation */ 535 } 536 fs = VFSTOEXT2(mp)->um_e2fs; 537 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 538 539 if((error = compute_sb_data(devvp, es, fs)) != 0) { 540 brelse(bp); 541 return error; 542 } 543#ifdef UNKLAR 544 if (fs->fs_sbsize < SBSIZE) 545 bp->b_flags |= B_INVAL; 546#endif 547 brelse(bp); 548 549loop: 550 MNT_ILOCK(mp); 551 MNT_VNODE_FOREACH(vp, mp, mvp) { 552 VI_LOCK(vp); 553 if (vp->v_iflag & VI_DOOMED) { 554 VI_UNLOCK(vp); 555 continue; 556 } 557 MNT_IUNLOCK(mp); 558 /* 559 * Step 4: invalidate all cached file data. 560 */ 561 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 562 MNT_VNODE_FOREACH_ABORT(mp, mvp); 563 goto loop; 564 } 565 if (vinvalbuf(vp, 0, 0, 0)) 566 panic("ext2_reload: dirty2"); 567 /* 568 * Step 5: re-read inode data for all active vnodes. 569 */ 570 ip = VTOI(vp); 571 error = 572 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 573 (int)fs->s_blocksize, NOCRED, &bp); 574 if (error) { 575 VOP_UNLOCK(vp, 0); 576 vrele(vp); 577 MNT_VNODE_FOREACH_ABORT(mp, mvp); 578 return (error); 579 } 580 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + 581 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip); 582 brelse(bp); 583 VOP_UNLOCK(vp, 0); 584 vrele(vp); 585 MNT_ILOCK(mp); 586 } 587 MNT_IUNLOCK(mp); 588 return (0); 589} 590 591/* 592 * Common code for mount and mountroot 593 */ 594static int 595ext2_mountfs(devvp, mp) 596 struct vnode *devvp; 597 struct mount *mp; 598{ 599 struct ext2mount *ump; 600 struct buf *bp; 601 struct ext2_sb_info *fs; 602 struct ext2_super_block * es; 603 struct cdev *dev = devvp->v_rdev; 604 struct g_consumer *cp; 605 struct bufobj *bo; 606 int error; 607 int ronly; 608 609 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 610 /* XXX: use VOP_ACESS to check FS perms */ 611 DROP_GIANT(); 612 g_topology_lock(); 613 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 614 g_topology_unlock(); 615 PICKUP_GIANT(); 616 VOP_UNLOCK(devvp, 0); 617 if (error) 618 return (error); 619 620 /* XXX: should we check for some sectorsize or 512 instead? */ 621 if (((SBSIZE % cp->provider->sectorsize) != 0) || 622 (SBSIZE < cp->provider->sectorsize)) { 623 DROP_GIANT(); 624 g_topology_lock(); 625 g_vfs_close(cp); 626 g_topology_unlock(); 627 PICKUP_GIANT(); 628 return (EINVAL); 629 } 630 631 bo = &devvp->v_bufobj; 632 bo->bo_private = cp; 633 bo->bo_ops = g_vfs_bufops; 634 if (devvp->v_rdev->si_iosize_max != 0) 635 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 636 if (mp->mnt_iosize_max > MAXPHYS) 637 mp->mnt_iosize_max = MAXPHYS; 638 639 bp = NULL; 640 ump = NULL; 641 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 642 goto out; 643 es = (struct ext2_super_block *)bp->b_data; 644 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 645 error = EINVAL; /* XXX needs translation */ 646 goto out; 647 } 648 if ((es->s_state & EXT2_VALID_FS) == 0 || 649 (es->s_state & EXT2_ERROR_FS)) { 650 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 651 printf( 652"WARNING: Filesystem was not properly dismounted\n"); 653 } else { 654 printf( 655"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 656 error = EPERM; 657 goto out; 658 } 659 } 660 ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); 661 bzero((caddr_t)ump, sizeof *ump); 662 /* I don't know whether this is the right strategy. Note that 663 we dynamically allocate both an ext2_sb_info and an ext2_super_block 664 while Linux keeps the super block in a locked buffer 665 */ 666 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 667 M_EXT2MNT, M_WAITOK); 668 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 669 M_EXT2MNT, M_WAITOK); 670 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 671 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 672 goto out; 673 /* 674 * We don't free the group descriptors allocated by compute_sb_data() 675 * until ext2_unmount(). This is OK since the mount will succeed. 676 */ 677 brelse(bp); 678 bp = NULL; 679 fs = ump->um_e2fs; 680 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 681 /* if the fs is not mounted read-only, make sure the super block is 682 always written back on a sync() 683 */ 684 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 685 if (ronly == 0) { 686 fs->s_dirt = 1; /* mark it modified */ 687 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 688 } 689 mp->mnt_data = ump; 690 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 691 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 692 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 693 MNT_ILOCK(mp); 694 mp->mnt_flag |= MNT_LOCAL; 695 MNT_IUNLOCK(mp); 696 ump->um_mountp = mp; 697 ump->um_dev = dev; 698 ump->um_devvp = devvp; 699 ump->um_bo = &devvp->v_bufobj; 700 ump->um_cp = cp; 701 /* setting those two parameters allowed us to use 702 ufs_bmap w/o changse ! 703 */ 704 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 705 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 706 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 707 if (ronly == 0) 708 ext2_sbupdate(ump, MNT_WAIT); 709 return (0); 710out: 711 if (bp) 712 brelse(bp); 713 if (cp != NULL) { 714 DROP_GIANT(); 715 g_topology_lock(); 716 g_vfs_close(cp); 717 g_topology_unlock(); 718 PICKUP_GIANT(); 719 } 720 if (ump) { 721 bsd_free(ump->um_e2fs->s_es, M_EXT2MNT); 722 bsd_free(ump->um_e2fs, M_EXT2MNT); 723 bsd_free(ump, M_EXT2MNT); 724 mp->mnt_data = NULL; 725 } 726 return (error); 727} 728 729/* 730 * unmount system call 731 */ 732static int 733ext2_unmount(mp, mntflags, td) 734 struct mount *mp; 735 int mntflags; 736 struct thread *td; 737{ 738 struct ext2mount *ump; 739 struct ext2_sb_info *fs; 740 int error, flags, ronly, i; 741 742 flags = 0; 743 if (mntflags & MNT_FORCE) { 744 if (mp->mnt_flag & MNT_ROOTFS) 745 return (EINVAL); 746 flags |= FORCECLOSE; 747 } 748 if ((error = ext2_flushfiles(mp, flags, td)) != 0) 749 return (error); 750 ump = VFSTOEXT2(mp); 751 fs = ump->um_e2fs; 752 ronly = fs->s_rd_only; 753 if (ronly == 0) { 754 if (fs->s_wasvalid) 755 fs->s_es->s_state |= EXT2_VALID_FS; 756 ext2_sbupdate(ump, MNT_WAIT); 757 } 758 759 /* release buffers containing group descriptors */ 760 for(i = 0; i < fs->s_db_per_group; i++) 761 ULCK_BUF(fs->s_group_desc[i]) 762 bsd_free(fs->s_group_desc, M_EXT2MNT); 763 764 /* release cached inode/block bitmaps */ 765 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 766 if (fs->s_inode_bitmap[i]) 767 ULCK_BUF(fs->s_inode_bitmap[i]) 768 769 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 770 if (fs->s_block_bitmap[i]) 771 ULCK_BUF(fs->s_block_bitmap[i]) 772 773 DROP_GIANT(); 774 g_topology_lock(); 775 g_vfs_close(ump->um_cp); 776 g_topology_unlock(); 777 PICKUP_GIANT(); 778 vrele(ump->um_devvp); 779 bsd_free(fs->s_es, M_EXT2MNT); 780 bsd_free(fs, M_EXT2MNT); 781 bsd_free(ump, M_EXT2MNT); 782 mp->mnt_data = NULL; 783 MNT_ILOCK(mp); 784 mp->mnt_flag &= ~MNT_LOCAL; 785 MNT_IUNLOCK(mp); 786 return (error); 787} 788 789/* 790 * Flush out all the files in a filesystem. 791 */ 792static int 793ext2_flushfiles(mp, flags, td) 794 struct mount *mp; 795 int flags; 796 struct thread *td; 797{ 798 int error; 799 800 error = vflush(mp, 0, flags, td); 801 return (error); 802} 803 804/* 805 * Get file system statistics. 806 * taken from ext2/super.c ext2_statfs 807 */ 808static int 809ext2_statfs(mp, sbp, td) 810 struct mount *mp; 811 struct statfs *sbp; 812 struct thread *td; 813{ 814 unsigned long overhead; 815 struct ext2mount *ump; 816 struct ext2_sb_info *fs; 817 struct ext2_super_block *es; 818 int i, nsb; 819 820 ump = VFSTOEXT2(mp); 821 fs = ump->um_e2fs; 822 es = fs->s_es; 823 824 if (es->s_magic != EXT2_SUPER_MAGIC) 825 panic("ext2_statfs - magic number spoiled"); 826 827 /* 828 * Compute the overhead (FS structures) 829 */ 830 if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { 831 nsb = 0; 832 for (i = 0 ; i < fs->s_groups_count; i++) 833 if (ext2_group_sparse(i)) 834 nsb++; 835 } else 836 nsb = fs->s_groups_count; 837 overhead = es->s_first_data_block + 838 /* Superblocks and block group descriptors: */ 839 nsb * (1 + fs->s_db_per_group) + 840 /* Inode bitmap, block bitmap, and inode table: */ 841 fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); 842 843 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 844 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 845 sbp->f_blocks = es->s_blocks_count - overhead; 846 sbp->f_bfree = es->s_free_blocks_count; 847 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 848 sbp->f_files = es->s_inodes_count; 849 sbp->f_ffree = es->s_free_inodes_count; 850 return (0); 851} 852 853/* 854 * Go through the disk queues to initiate sandbagged IO; 855 * go through the inodes to write those that have been modified; 856 * initiate the writing of the super block if it has been modified. 857 * 858 * Note: we are always called with the filesystem marked `MPBUSY'. 859 */ 860static int 861ext2_sync(mp, waitfor, td) 862 struct mount *mp; 863 int waitfor; 864 struct thread *td; 865{ 866 struct vnode *mvp, *vp; 867 struct inode *ip; 868 struct ext2mount *ump = VFSTOEXT2(mp); 869 struct ext2_sb_info *fs; 870 int error, allerror = 0; 871 872 fs = ump->um_e2fs; 873 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 874 printf("fs = %s\n", fs->fs_fsmnt); 875 panic("ext2_sync: rofs mod"); 876 } 877 /* 878 * Write back each (modified) inode. 879 */ 880 MNT_ILOCK(mp); 881loop: 882 MNT_VNODE_FOREACH(vp, mp, mvp) { 883 VI_LOCK(vp); 884 if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED)) { 885 VI_UNLOCK(vp); 886 continue; 887 } 888 MNT_IUNLOCK(mp); 889 ip = VTOI(vp); 890 if ((ip->i_flag & 891 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 892 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 893 waitfor == MNT_LAZY)) { 894 VI_UNLOCK(vp); 895 MNT_ILOCK(mp); 896 continue; 897 } 898 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 899 if (error) { 900 MNT_ILOCK(mp); 901 if (error == ENOENT) { 902 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 903 goto loop; 904 } 905 continue; 906 } 907 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) 908 allerror = error; 909 VOP_UNLOCK(vp, 0); 910 vrele(vp); 911 MNT_ILOCK(mp); 912 } 913 MNT_IUNLOCK(mp); 914 /* 915 * Force stale file system control information to be flushed. 916 */ 917 if (waitfor != MNT_LAZY) { 918 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 919 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) 920 allerror = error; 921 VOP_UNLOCK(ump->um_devvp, 0); 922 } 923 /* 924 * Write back modified superblock. 925 */ 926 if (fs->s_dirt != 0) { 927 fs->s_dirt = 0; 928 fs->s_es->s_wtime = time_second; 929 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 930 allerror = error; 931 } 932 return (allerror); 933} 934 935/* 936 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 937 * in from disk. If it is in core, wait for the lock bit to clear, then 938 * return the inode locked. Detection and handling of mount points must be 939 * done by the calling routine. 940 */ 941static int 942ext2_vget(mp, ino, flags, vpp) 943 struct mount *mp; 944 ino_t ino; 945 int flags; 946 struct vnode **vpp; 947{ 948 struct ext2_sb_info *fs; 949 struct inode *ip; 950 struct ext2mount *ump; 951 struct buf *bp; 952 struct vnode *vp; 953 struct cdev *dev; 954 int i, error; 955 int used_blocks; 956 struct thread *td; 957 958 td = curthread; 959 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); 960 if (error || *vpp != NULL) 961 return (error); 962 963 ump = VFSTOEXT2(mp); 964 dev = ump->um_dev; 965 966 /* 967 * If this malloc() is performed after the getnewvnode() 968 * it might block, leaving a vnode with a NULL v_data to be 969 * found by ext2_sync() if a sync happens to fire right then, 970 * which will cause a panic because ext2_sync() blindly 971 * dereferences vp->v_data (as well it should). 972 */ 973 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 974 975 /* Allocate a new vnode/inode. */ 976 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 977 *vpp = NULL; 978 free(ip, M_EXT2NODE); 979 return (error); 980 } 981 vp->v_data = ip; 982 ip->i_vnode = vp; 983 ip->i_e2fs = fs = ump->um_e2fs; 984 ip->i_number = ino; 985 986 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 987 error = insmntque(vp, mp); 988 if (error != 0) { 989 free(ip, M_EXT2NODE); 990 *vpp = NULL; 991 return (error); 992 } 993 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); 994 if (error || *vpp != NULL) 995 return (error); 996 997 /* Read in the disk contents for the inode, copy into the inode. */ 998#if 0 999printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 1000#endif 1001 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1002 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 1003 /* 1004 * The inode does not contain anything useful, so it would 1005 * be misleading to leave it on its hash chain. With mode 1006 * still zero, it will be unlinked and returned to the free 1007 * list by vput(). 1008 */ 1009 vput(vp); 1010 brelse(bp); 1011 *vpp = NULL; 1012 return (error); 1013 } 1014 /* convert ext2 inode to dinode */ 1015 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1016 ino_to_fsbo(fs, ino)), ip); 1017 ip->i_block_group = ino_to_cg(fs, ino); 1018 ip->i_next_alloc_block = 0; 1019 ip->i_next_alloc_goal = 0; 1020 ip->i_prealloc_count = 0; 1021 ip->i_prealloc_block = 0; 1022 /* now we want to make sure that block pointers for unused 1023 blocks are zeroed out - ext2_balloc depends on this 1024 although for regular files and directories only 1025 */ 1026 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1027 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1028 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1029 ip->i_db[i] = 0; 1030 } 1031/* 1032 ext2_print_inode(ip); 1033*/ 1034 brelse(bp); 1035 1036 /* 1037 * Initialize the vnode from the inode, check for aliases. 1038 * Note that the underlying vnode may have changed. 1039 */ 1040 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1041 vput(vp); 1042 *vpp = NULL; 1043 return (error); 1044 } 1045 /* 1046 * Finish inode initialization now that aliasing has been resolved. 1047 */ 1048 ip->i_devvp = ump->um_devvp; 1049 /* 1050 * Set up a generation number for this inode if it does not 1051 * already have one. This should only happen on old filesystems. 1052 */ 1053 if (ip->i_gen == 0) { 1054 ip->i_gen = random() / 2 + 1; 1055 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1056 ip->i_flag |= IN_MODIFIED; 1057 } 1058 *vpp = vp; 1059 return (0); 1060} 1061 1062/* 1063 * File handle to vnode 1064 * 1065 * Have to be really careful about stale file handles: 1066 * - check that the inode number is valid 1067 * - call ext2_vget() to get the locked inode 1068 * - check for an unallocated inode (i_mode == 0) 1069 * - check that the given client host has export rights and return 1070 * those rights via. exflagsp and credanonp 1071 */ 1072static int 1073ext2_fhtovp(mp, fhp, vpp) 1074 struct mount *mp; 1075 struct fid *fhp; 1076 struct vnode **vpp; 1077{ 1078 struct inode *ip; 1079 struct ufid *ufhp; 1080 struct vnode *nvp; 1081 struct ext2_sb_info *fs; 1082 int error; 1083 1084 ufhp = (struct ufid *)fhp; 1085 fs = VFSTOEXT2(mp)->um_e2fs; 1086 if (ufhp->ufid_ino < ROOTINO || 1087 ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group) 1088 return (ESTALE); 1089 1090 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1091 if (error) { 1092 *vpp = NULLVP; 1093 return (error); 1094 } 1095 ip = VTOI(nvp); 1096 if (ip->i_mode == 0 || 1097 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1098 vput(nvp); 1099 *vpp = NULLVP; 1100 return (ESTALE); 1101 } 1102 *vpp = nvp; 1103 vnode_create_vobject(*vpp, 0, curthread); 1104 return (0); 1105} 1106 1107/* 1108 * Write a superblock and associated information back to disk. 1109 */ 1110static int 1111ext2_sbupdate(mp, waitfor) 1112 struct ext2mount *mp; 1113 int waitfor; 1114{ 1115 struct ext2_sb_info *fs = mp->um_e2fs; 1116 struct ext2_super_block *es = fs->s_es; 1117 struct buf *bp; 1118 int error = 0; 1119/* 1120printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1121*/ 1122 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1123 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1124 if (waitfor == MNT_WAIT) 1125 error = bwrite(bp); 1126 else 1127 bawrite(bp); 1128 1129 /* 1130 * The buffers for group descriptors, inode bitmaps and block bitmaps 1131 * are not busy at this point and are (hopefully) written by the 1132 * usual sync mechanism. No need to write them here 1133 */ 1134 1135 return (error); 1136} 1137 1138/* 1139 * Return the root of a filesystem. 1140 */ 1141static int 1142ext2_root(mp, flags, vpp, td) 1143 struct mount *mp; 1144 int flags; 1145 struct vnode **vpp; 1146 struct thread *td; 1147{ 1148 struct vnode *nvp; 1149 int error; 1150 1151 error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); 1152 if (error) 1153 return (error); 1154 *vpp = nvp; 1155 return (0); 1156} 1157