ext2_vfsops.c revision 167580
1/*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/*- 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 36 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 167580 2007-03-15 00:09:50Z rodrigc $ 37 */ 38 39/*- 40 * COPYRIGHT.INFO says this has some GPL'd code from ext2_super.c in it 41 * 42 * This program is free software; you can redistribute it and/or modify 43 * it under the terms of the GNU General Public License as published by 44 * the Free Software Foundation; either version 2 of the License. 45 * 46 * This program is distributed in the hope that it will be useful, 47 * but WITHOUT ANY WARRANTY; without even the implied warranty of 48 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 49 * GNU General Public License for more details. 50 * 51 * You should have received a copy of the GNU General Public License 52 * along with this program; if not, write to the Free Software 53 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 54 * 55 */ 56 57#include <sys/param.h> 58#include <sys/systm.h> 59#include <sys/namei.h> 60#include <sys/priv.h> 61#include <sys/proc.h> 62#include <sys/kernel.h> 63#include <sys/vnode.h> 64#include <sys/mount.h> 65#include <sys/bio.h> 66#include <sys/buf.h> 67#include <sys/conf.h> 68#include <sys/fcntl.h> 69#include <sys/malloc.h> 70#include <sys/stat.h> 71#include <sys/mutex.h> 72 73#include <geom/geom.h> 74#include <geom/geom_vfs.h> 75 76#include <gnu/fs/ext2fs/ext2_mount.h> 77#include <gnu/fs/ext2fs/inode.h> 78 79#include <gnu/fs/ext2fs/fs.h> 80#include <gnu/fs/ext2fs/ext2_extern.h> 81#include <gnu/fs/ext2fs/ext2_fs.h> 82#include <gnu/fs/ext2fs/ext2_fs_sb.h> 83 84static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 85static int ext2_mountfs(struct vnode *, struct mount *, struct thread *); 86static int ext2_reload(struct mount *mp, struct thread *td); 87static int ext2_sbupdate(struct ext2mount *, int); 88 89static vfs_unmount_t ext2_unmount; 90static vfs_root_t ext2_root; 91static vfs_statfs_t ext2_statfs; 92static vfs_sync_t ext2_sync; 93static vfs_vget_t ext2_vget; 94static vfs_fhtovp_t ext2_fhtovp; 95static vfs_mount_t ext2_mount; 96 97MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 98static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 99 100static struct vfsops ext2fs_vfsops = { 101 .vfs_fhtovp = ext2_fhtovp, 102 .vfs_mount = ext2_mount, 103 .vfs_root = ext2_root, /* root inode via vget */ 104 .vfs_statfs = ext2_statfs, 105 .vfs_sync = ext2_sync, 106 .vfs_unmount = ext2_unmount, 107 .vfs_vget = ext2_vget, 108}; 109 110VFS_SET(ext2fs_vfsops, ext2fs, 0); 111 112#define bsd_malloc malloc 113#define bsd_free free 114 115static int ext2_check_sb_compat(struct ext2_super_block *es, struct cdev *dev, 116 int ronly); 117static int compute_sb_data(struct vnode * devvp, 118 struct ext2_super_block * es, struct ext2_sb_info * fs); 119 120static const char *ext2_opts[] = { "from", "export", "union", "acls", "exec", 121 "atime", "union", "suiddir", "multilabel", "symfollow", "clusterr", 122 "clusterw", "force" }; 123 124/* 125 * VFS Operations. 126 * 127 * mount system call 128 */ 129static int 130ext2_mount(mp, td) 131 struct mount *mp; 132 struct thread *td; 133{ 134 struct vfsoptlist *opts; 135 struct vnode *devvp; 136 struct ext2mount *ump = 0; 137 struct ext2_sb_info *fs; 138 char *path, *fspec; 139 int error, flags, len; 140 mode_t accessmode; 141 struct nameidata nd, *ndp = &nd; 142 143 opts = mp->mnt_optnew; 144 145 if (vfs_filteropt(opts, ext2_opts)) 146 return (EINVAL); 147 148 vfs_getopt(opts, "fspath", (void **)&path, NULL); 149 /* Double-check the length of path.. */ 150 if (strlen(path) >= MAXMNTLEN - 1) 151 return (ENAMETOOLONG); 152 153 fspec = NULL; 154 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 155 if (!error && fspec[len - 1] != '\0') 156 return (EINVAL); 157 158 /* 159 * If updating, check whether changing from read-only to 160 * read/write; if there is no device name, that's all we do. 161 */ 162 if (mp->mnt_flag & MNT_UPDATE) { 163 ump = VFSTOEXT2(mp); 164 fs = ump->um_e2fs; 165 error = 0; 166 if (fs->s_rd_only == 0 && 167 vfs_flagopt(opts, "ro", NULL, 0)) { 168 error = VFS_SYNC(mp, MNT_WAIT, td); 169 if (error) 170 return (error); 171 flags = WRITECLOSE; 172 if (mp->mnt_flag & MNT_FORCE) 173 flags |= FORCECLOSE; 174 if (vfs_busy(mp, LK_NOWAIT, 0, td)) 175 return (EBUSY); 176 error = ext2_flushfiles(mp, flags, td); 177 vfs_unbusy(mp, td); 178 if (!error && fs->s_wasvalid) { 179 fs->s_es->s_state |= EXT2_VALID_FS; 180 ext2_sbupdate(ump, MNT_WAIT); 181 } 182 fs->s_rd_only = 1; 183 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 184 DROP_GIANT(); 185 g_topology_lock(); 186 g_access(ump->um_cp, 0, -1, 0); 187 g_topology_unlock(); 188 PICKUP_GIANT(); 189 } 190 if (!error && (mp->mnt_flag & MNT_RELOAD)) 191 error = ext2_reload(mp, td); 192 if (error) 193 return (error); 194 devvp = ump->um_devvp; 195 if (fs->s_rd_only && !vfs_flagopt(opts, "ro", NULL, 0)) { 196 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 0)) 197 return (EPERM); 198 /* 199 * If upgrade to read-write by non-root, then verify 200 * that user has necessary permissions on the device. 201 */ 202 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 203 error = VOP_ACCESS(devvp, VREAD | VWRITE, 204 td->td_ucred, td); 205 if (error) 206 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 207 if (error) { 208 VOP_UNLOCK(devvp, 0, td); 209 return (error); 210 } 211 VOP_UNLOCK(devvp, 0, td); 212 DROP_GIANT(); 213 g_topology_lock(); 214 error = g_access(ump->um_cp, 0, 1, 0); 215 g_topology_unlock(); 216 PICKUP_GIANT(); 217 if (error) 218 return (error); 219 220 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 221 (fs->s_es->s_state & EXT2_ERROR_FS)) { 222 if (mp->mnt_flag & MNT_FORCE) { 223 printf( 224"WARNING: %s was not properly dismounted\n", 225 fs->fs_fsmnt); 226 } else { 227 printf( 228"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 229 fs->fs_fsmnt); 230 return (EPERM); 231 } 232 } 233 fs->s_es->s_state &= ~EXT2_VALID_FS; 234 ext2_sbupdate(ump, MNT_WAIT); 235 fs->s_rd_only = 0; 236 MNT_ILOCK(mp); 237 mp->mnt_flag &= ~MNT_RDONLY; 238 MNT_IUNLOCK(mp); 239 } 240 if (vfs_flagopt(opts, "export", NULL, 0)) { 241 /* Process export requests in vfs_mount.c. */ 242 return (error); 243 } 244 } 245 /* 246 * Not an update, or updating the name: look up the name 247 * and verify that it refers to a sensible disk device. 248 */ 249 if (fspec == NULL) 250 return (EINVAL); 251 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); 252 if ((error = namei(ndp)) != 0) 253 return (error); 254 NDFREE(ndp, NDF_ONLY_PNBUF); 255 devvp = ndp->ni_vp; 256 257 if (!vn_isdisk(devvp, &error)) { 258 vput(devvp); 259 return (error); 260 } 261 262 /* 263 * If mount by non-root, then verify that user has necessary 264 * permissions on the device. 265 * 266 * XXXRW: VOP_ACCESS() enough? 267 */ 268 accessmode = VREAD; 269 if ((mp->mnt_flag & MNT_RDONLY) == 0) 270 accessmode |= VWRITE; 271 error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td); 272 if (error) 273 error = priv_check(td, PRIV_VFS_MOUNT_PERM); 274 if (error) { 275 vput(devvp); 276 return (error); 277 } 278 279 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 280 error = ext2_mountfs(devvp, mp, td); 281 } else { 282 if (devvp != ump->um_devvp) { 283 vput(devvp); 284 return (EINVAL); /* needs translation */ 285 } else 286 vput(devvp); 287 } 288 if (error) { 289 vrele(devvp); 290 return (error); 291 } 292 ump = VFSTOEXT2(mp); 293 fs = ump->um_e2fs; 294 /* 295 * Note that this strncpy() is ok because of a check at the start 296 * of ext2_mount(). 297 */ 298 strncpy(fs->fs_fsmnt, path, MAXMNTLEN); 299 fs->fs_fsmnt[MAXMNTLEN - 1] = '\0'; 300 vfs_mountedfrom(mp, fspec); 301 return (0); 302} 303 304/* 305 * checks that the data in the descriptor blocks make sense 306 * this is taken from ext2/super.c 307 */ 308static int ext2_check_descriptors (struct ext2_sb_info * sb) 309{ 310 int i; 311 int desc_block = 0; 312 unsigned long block = sb->s_es->s_first_data_block; 313 struct ext2_group_desc * gdp = NULL; 314 315 /* ext2_debug ("Checking group descriptors"); */ 316 317 for (i = 0; i < sb->s_groups_count; i++) 318 { 319 /* examine next descriptor block */ 320 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 321 gdp = (struct ext2_group_desc *) 322 sb->s_group_desc[desc_block++]->b_data; 323 if (gdp->bg_block_bitmap < block || 324 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 325 { 326 printf ("ext2_check_descriptors: " 327 "Block bitmap for group %d" 328 " not in group (block %lu)!\n", 329 i, (unsigned long) gdp->bg_block_bitmap); 330 return 0; 331 } 332 if (gdp->bg_inode_bitmap < block || 333 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 334 { 335 printf ("ext2_check_descriptors: " 336 "Inode bitmap for group %d" 337 " not in group (block %lu)!\n", 338 i, (unsigned long) gdp->bg_inode_bitmap); 339 return 0; 340 } 341 if (gdp->bg_inode_table < block || 342 gdp->bg_inode_table + sb->s_itb_per_group >= 343 block + EXT2_BLOCKS_PER_GROUP(sb)) 344 { 345 printf ("ext2_check_descriptors: " 346 "Inode table for group %d" 347 " not in group (block %lu)!\n", 348 i, (unsigned long) gdp->bg_inode_table); 349 return 0; 350 } 351 block += EXT2_BLOCKS_PER_GROUP(sb); 352 gdp++; 353 } 354 return 1; 355} 356 357static int 358ext2_check_sb_compat(es, dev, ronly) 359 struct ext2_super_block *es; 360 struct cdev *dev; 361 int ronly; 362{ 363 364 if (es->s_magic != EXT2_SUPER_MAGIC) { 365 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 366 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 367 return (1); 368 } 369 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 370 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 371 printf( 372"WARNING: mount of %s denied due to unsupported optional features\n", 373 devtoname(dev)); 374 return (1); 375 } 376 if (!ronly && 377 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 378 printf( 379"WARNING: R/W mount of %s denied due to unsupported optional features\n", 380 devtoname(dev)); 381 return (1); 382 } 383 } 384 return (0); 385} 386 387/* 388 * this computes the fields of the ext2_sb_info structure from the 389 * data in the ext2_super_block structure read in 390 */ 391static int compute_sb_data(devvp, es, fs) 392 struct vnode * devvp; 393 struct ext2_super_block * es; 394 struct ext2_sb_info * fs; 395{ 396 int db_count, error; 397 int i, j; 398 int logic_sb_block = 1; /* XXX for now */ 399 400#if 1 401#define V(v) 402#else 403#define V(v) printf(#v"= %d\n", fs->v); 404#endif 405 406 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 407 V(s_blocksize) 408 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 409 V(s_bshift) 410 fs->s_fsbtodb = es->s_log_block_size + 1; 411 V(s_fsbtodb) 412 fs->s_qbmask = fs->s_blocksize - 1; 413 V(s_bmask) 414 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 415 V(s_blocksize_bits) 416 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 417 V(s_frag_size) 418 if (fs->s_frag_size) 419 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 420 V(s_frags_per_block) 421 fs->s_blocks_per_group = es->s_blocks_per_group; 422 V(s_blocks_per_group) 423 fs->s_frags_per_group = es->s_frags_per_group; 424 V(s_frags_per_group) 425 fs->s_inodes_per_group = es->s_inodes_per_group; 426 V(s_inodes_per_group) 427 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 428 V(s_inodes_per_block) 429 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 430 V(s_itb_per_group) 431 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 432 V(s_desc_per_block) 433 /* s_resuid / s_resgid ? */ 434 fs->s_groups_count = (es->s_blocks_count - 435 es->s_first_data_block + 436 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 437 EXT2_BLOCKS_PER_GROUP(fs); 438 V(s_groups_count) 439 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 440 EXT2_DESC_PER_BLOCK(fs); 441 fs->s_db_per_group = db_count; 442 V(s_db_per_group) 443 444 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 445 M_EXT2MNT, M_WAITOK); 446 447 /* adjust logic_sb_block */ 448 if(fs->s_blocksize > SBSIZE) 449 /* Godmar thinks: if the blocksize is greater than 1024, then 450 the superblock is logically part of block zero. 451 */ 452 logic_sb_block = 0; 453 454 for (i = 0; i < db_count; i++) { 455 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 456 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 457 if(error) { 458 for (j = 0; j < i; j++) 459 brelse(fs->s_group_desc[j]); 460 bsd_free(fs->s_group_desc, M_EXT2MNT); 461 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 462 return EIO; 463 } 464 LCK_BUF(fs->s_group_desc[i]) 465 } 466 if(!ext2_check_descriptors(fs)) { 467 for (j = 0; j < db_count; j++) 468 ULCK_BUF(fs->s_group_desc[j]) 469 bsd_free(fs->s_group_desc, M_EXT2MNT); 470 printf("EXT2-fs: (ext2_check_descriptors failure) " 471 "unable to read group descriptors\n"); 472 return EIO; 473 } 474 475 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 476 fs->s_inode_bitmap_number[i] = 0; 477 fs->s_inode_bitmap[i] = NULL; 478 fs->s_block_bitmap_number[i] = 0; 479 fs->s_block_bitmap[i] = NULL; 480 } 481 fs->s_loaded_inode_bitmaps = 0; 482 fs->s_loaded_block_bitmaps = 0; 483 if (es->s_rev_level == EXT2_GOOD_OLD_REV || (es->s_feature_ro_compat & 484 EXT2_FEATURE_RO_COMPAT_LARGE_FILE) == 0) 485 fs->fs_maxfilesize = 0x7fffffff; 486 else 487 fs->fs_maxfilesize = 0x7fffffffffffffff; 488 return 0; 489} 490 491/* 492 * Reload all incore data for a filesystem (used after running fsck on 493 * the root filesystem and finding things to fix). The filesystem must 494 * be mounted read-only. 495 * 496 * Things to do to update the mount: 497 * 1) invalidate all cached meta-data. 498 * 2) re-read superblock from disk. 499 * 3) re-read summary information from disk. 500 * 4) invalidate all inactive vnodes. 501 * 5) invalidate all cached file data. 502 * 6) re-read inode data for all active vnodes. 503 */ 504static int 505ext2_reload(struct mount *mp, struct thread *td) 506{ 507 struct vnode *vp, *mvp, *devvp; 508 struct inode *ip; 509 struct buf *bp; 510 struct ext2_super_block * es; 511 struct ext2_sb_info *fs; 512 int error; 513 514 if ((mp->mnt_flag & MNT_RDONLY) == 0) 515 return (EINVAL); 516 /* 517 * Step 1: invalidate all cached meta-data. 518 */ 519 devvp = VFSTOEXT2(mp)->um_devvp; 520 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 521 if (vinvalbuf(devvp, 0, td, 0, 0) != 0) 522 panic("ext2_reload: dirty1"); 523 VOP_UNLOCK(devvp, 0, td); 524 525 /* 526 * Step 2: re-read superblock from disk. 527 * constants have been adjusted for ext2 528 */ 529 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 530 return (error); 531 es = (struct ext2_super_block *)bp->b_data; 532 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 533 brelse(bp); 534 return (EIO); /* XXX needs translation */ 535 } 536 fs = VFSTOEXT2(mp)->um_e2fs; 537 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 538 539 if((error = compute_sb_data(devvp, es, fs)) != 0) { 540 brelse(bp); 541 return error; 542 } 543#ifdef UNKLAR 544 if (fs->fs_sbsize < SBSIZE) 545 bp->b_flags |= B_INVAL; 546#endif 547 brelse(bp); 548 549loop: 550 MNT_ILOCK(mp); 551 MNT_VNODE_FOREACH(vp, mp, mvp) { 552 VI_LOCK(vp); 553 if (vp->v_iflag & VI_DOOMED) { 554 VI_UNLOCK(vp); 555 continue; 556 } 557 MNT_IUNLOCK(mp); 558 /* 559 * Step 4: invalidate all cached file data. 560 */ 561 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 562 MNT_VNODE_FOREACH_ABORT(mp, mvp); 563 goto loop; 564 } 565 if (vinvalbuf(vp, 0, td, 0, 0)) 566 panic("ext2_reload: dirty2"); 567 /* 568 * Step 5: re-read inode data for all active vnodes. 569 */ 570 ip = VTOI(vp); 571 error = 572 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 573 (int)fs->s_blocksize, NOCRED, &bp); 574 if (error) { 575 VOP_UNLOCK(vp, 0, td); 576 vrele(vp); 577 MNT_VNODE_FOREACH_ABORT(mp, mvp); 578 return (error); 579 } 580 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + 581 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip); 582 brelse(bp); 583 VOP_UNLOCK(vp, 0, td); 584 vrele(vp); 585 MNT_ILOCK(mp); 586 } 587 MNT_IUNLOCK(mp); 588 return (0); 589} 590 591/* 592 * Common code for mount and mountroot 593 */ 594static int 595ext2_mountfs(devvp, mp, td) 596 struct vnode *devvp; 597 struct mount *mp; 598 struct thread *td; 599{ 600 struct ext2mount *ump; 601 struct buf *bp; 602 struct ext2_sb_info *fs; 603 struct ext2_super_block * es; 604 struct cdev *dev = devvp->v_rdev; 605 struct g_consumer *cp; 606 struct bufobj *bo; 607 int error; 608 int ronly; 609 610 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 611 /* XXX: use VOP_ACESS to check FS perms */ 612 DROP_GIANT(); 613 g_topology_lock(); 614 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 615 g_topology_unlock(); 616 PICKUP_GIANT(); 617 VOP_UNLOCK(devvp, 0, td); 618 if (error) 619 return (error); 620 621 /* XXX: should we check for some sectorsize or 512 instead? */ 622 if (((SBSIZE % cp->provider->sectorsize) != 0) || 623 (SBSIZE < cp->provider->sectorsize)) { 624 DROP_GIANT(); 625 g_topology_lock(); 626 g_vfs_close(cp, td); 627 g_topology_unlock(); 628 PICKUP_GIANT(); 629 return (EINVAL); 630 } 631 632 bo = &devvp->v_bufobj; 633 bo->bo_private = cp; 634 bo->bo_ops = g_vfs_bufops; 635 if (devvp->v_rdev->si_iosize_max != 0) 636 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 637 if (mp->mnt_iosize_max > MAXPHYS) 638 mp->mnt_iosize_max = MAXPHYS; 639 640 bp = NULL; 641 ump = NULL; 642 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 643 goto out; 644 es = (struct ext2_super_block *)bp->b_data; 645 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 646 error = EINVAL; /* XXX needs translation */ 647 goto out; 648 } 649 if ((es->s_state & EXT2_VALID_FS) == 0 || 650 (es->s_state & EXT2_ERROR_FS)) { 651 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 652 printf( 653"WARNING: Filesystem was not properly dismounted\n"); 654 } else { 655 printf( 656"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 657 error = EPERM; 658 goto out; 659 } 660 } 661 ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); 662 bzero((caddr_t)ump, sizeof *ump); 663 /* I don't know whether this is the right strategy. Note that 664 we dynamically allocate both an ext2_sb_info and an ext2_super_block 665 while Linux keeps the super block in a locked buffer 666 */ 667 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 668 M_EXT2MNT, M_WAITOK); 669 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 670 M_EXT2MNT, M_WAITOK); 671 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 672 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 673 goto out; 674 /* 675 * We don't free the group descriptors allocated by compute_sb_data() 676 * until ext2_unmount(). This is OK since the mount will succeed. 677 */ 678 brelse(bp); 679 bp = NULL; 680 fs = ump->um_e2fs; 681 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 682 /* if the fs is not mounted read-only, make sure the super block is 683 always written back on a sync() 684 */ 685 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 686 if (ronly == 0) { 687 fs->s_dirt = 1; /* mark it modified */ 688 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 689 } 690 mp->mnt_data = (qaddr_t)ump; 691 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 692 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 693 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 694 MNT_ILOCK(mp); 695 mp->mnt_flag |= MNT_LOCAL; 696 MNT_IUNLOCK(mp); 697 ump->um_mountp = mp; 698 ump->um_dev = dev; 699 ump->um_devvp = devvp; 700 ump->um_bo = &devvp->v_bufobj; 701 ump->um_cp = cp; 702 /* setting those two parameters allowed us to use 703 ufs_bmap w/o changse ! 704 */ 705 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 706 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 707 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 708 if (ronly == 0) 709 ext2_sbupdate(ump, MNT_WAIT); 710 return (0); 711out: 712 if (bp) 713 brelse(bp); 714 if (cp != NULL) { 715 DROP_GIANT(); 716 g_topology_lock(); 717 g_vfs_close(cp, td); 718 g_topology_unlock(); 719 PICKUP_GIANT(); 720 } 721 if (ump) { 722 bsd_free(ump->um_e2fs->s_es, M_EXT2MNT); 723 bsd_free(ump->um_e2fs, M_EXT2MNT); 724 bsd_free(ump, M_EXT2MNT); 725 mp->mnt_data = (qaddr_t)0; 726 } 727 return (error); 728} 729 730/* 731 * unmount system call 732 */ 733static int 734ext2_unmount(mp, mntflags, td) 735 struct mount *mp; 736 int mntflags; 737 struct thread *td; 738{ 739 struct ext2mount *ump; 740 struct ext2_sb_info *fs; 741 int error, flags, ronly, i; 742 743 flags = 0; 744 if (mntflags & MNT_FORCE) { 745 if (mp->mnt_flag & MNT_ROOTFS) 746 return (EINVAL); 747 flags |= FORCECLOSE; 748 } 749 if ((error = ext2_flushfiles(mp, flags, td)) != 0) 750 return (error); 751 ump = VFSTOEXT2(mp); 752 fs = ump->um_e2fs; 753 ronly = fs->s_rd_only; 754 if (ronly == 0) { 755 if (fs->s_wasvalid) 756 fs->s_es->s_state |= EXT2_VALID_FS; 757 ext2_sbupdate(ump, MNT_WAIT); 758 } 759 760 /* release buffers containing group descriptors */ 761 for(i = 0; i < fs->s_db_per_group; i++) 762 ULCK_BUF(fs->s_group_desc[i]) 763 bsd_free(fs->s_group_desc, M_EXT2MNT); 764 765 /* release cached inode/block bitmaps */ 766 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 767 if (fs->s_inode_bitmap[i]) 768 ULCK_BUF(fs->s_inode_bitmap[i]) 769 770 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 771 if (fs->s_block_bitmap[i]) 772 ULCK_BUF(fs->s_block_bitmap[i]) 773 774 DROP_GIANT(); 775 g_topology_lock(); 776 g_vfs_close(ump->um_cp, td); 777 g_topology_unlock(); 778 PICKUP_GIANT(); 779 vrele(ump->um_devvp); 780 bsd_free(fs->s_es, M_EXT2MNT); 781 bsd_free(fs, M_EXT2MNT); 782 bsd_free(ump, M_EXT2MNT); 783 mp->mnt_data = (qaddr_t)0; 784 MNT_ILOCK(mp); 785 mp->mnt_flag &= ~MNT_LOCAL; 786 MNT_IUNLOCK(mp); 787 return (error); 788} 789 790/* 791 * Flush out all the files in a filesystem. 792 */ 793static int 794ext2_flushfiles(mp, flags, td) 795 struct mount *mp; 796 int flags; 797 struct thread *td; 798{ 799 int error; 800 801 error = vflush(mp, 0, flags, td); 802 return (error); 803} 804 805/* 806 * Get file system statistics. 807 * taken from ext2/super.c ext2_statfs 808 */ 809static int 810ext2_statfs(mp, sbp, td) 811 struct mount *mp; 812 struct statfs *sbp; 813 struct thread *td; 814{ 815 unsigned long overhead; 816 struct ext2mount *ump; 817 struct ext2_sb_info *fs; 818 struct ext2_super_block *es; 819 int i, nsb; 820 821 ump = VFSTOEXT2(mp); 822 fs = ump->um_e2fs; 823 es = fs->s_es; 824 825 if (es->s_magic != EXT2_SUPER_MAGIC) 826 panic("ext2_statfs - magic number spoiled"); 827 828 /* 829 * Compute the overhead (FS structures) 830 */ 831 if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { 832 nsb = 0; 833 for (i = 0 ; i < fs->s_groups_count; i++) 834 if (ext2_group_sparse(i)) 835 nsb++; 836 } else 837 nsb = fs->s_groups_count; 838 overhead = es->s_first_data_block + 839 /* Superblocks and block group descriptors: */ 840 nsb * (1 + fs->s_db_per_group) + 841 /* Inode bitmap, block bitmap, and inode table: */ 842 fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); 843 844 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 845 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 846 sbp->f_blocks = es->s_blocks_count - overhead; 847 sbp->f_bfree = es->s_free_blocks_count; 848 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 849 sbp->f_files = es->s_inodes_count; 850 sbp->f_ffree = es->s_free_inodes_count; 851 return (0); 852} 853 854/* 855 * Go through the disk queues to initiate sandbagged IO; 856 * go through the inodes to write those that have been modified; 857 * initiate the writing of the super block if it has been modified. 858 * 859 * Note: we are always called with the filesystem marked `MPBUSY'. 860 */ 861static int 862ext2_sync(mp, waitfor, td) 863 struct mount *mp; 864 int waitfor; 865 struct thread *td; 866{ 867 struct vnode *mvp, *vp; 868 struct inode *ip; 869 struct ext2mount *ump = VFSTOEXT2(mp); 870 struct ext2_sb_info *fs; 871 int error, allerror = 0; 872 873 fs = ump->um_e2fs; 874 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 875 printf("fs = %s\n", fs->fs_fsmnt); 876 panic("ext2_sync: rofs mod"); 877 } 878 /* 879 * Write back each (modified) inode. 880 */ 881 MNT_ILOCK(mp); 882loop: 883 MNT_VNODE_FOREACH(vp, mp, mvp) { 884 VI_LOCK(vp); 885 if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED)) { 886 VI_UNLOCK(vp); 887 continue; 888 } 889 MNT_IUNLOCK(mp); 890 ip = VTOI(vp); 891 if ((ip->i_flag & 892 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 893 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 894 waitfor == MNT_LAZY)) { 895 VI_UNLOCK(vp); 896 MNT_ILOCK(mp); 897 continue; 898 } 899 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 900 if (error) { 901 MNT_ILOCK(mp); 902 if (error == ENOENT) { 903 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 904 goto loop; 905 } 906 continue; 907 } 908 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) 909 allerror = error; 910 VOP_UNLOCK(vp, 0, td); 911 vrele(vp); 912 MNT_ILOCK(mp); 913 } 914 MNT_IUNLOCK(mp); 915 /* 916 * Force stale file system control information to be flushed. 917 */ 918 if (waitfor != MNT_LAZY) { 919 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td); 920 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) 921 allerror = error; 922 VOP_UNLOCK(ump->um_devvp, 0, td); 923 } 924 /* 925 * Write back modified superblock. 926 */ 927 if (fs->s_dirt != 0) { 928 fs->s_dirt = 0; 929 fs->s_es->s_wtime = time_second; 930 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 931 allerror = error; 932 } 933 return (allerror); 934} 935 936/* 937 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 938 * in from disk. If it is in core, wait for the lock bit to clear, then 939 * return the inode locked. Detection and handling of mount points must be 940 * done by the calling routine. 941 */ 942static int 943ext2_vget(mp, ino, flags, vpp) 944 struct mount *mp; 945 ino_t ino; 946 int flags; 947 struct vnode **vpp; 948{ 949 struct ext2_sb_info *fs; 950 struct inode *ip; 951 struct ext2mount *ump; 952 struct buf *bp; 953 struct vnode *vp; 954 struct cdev *dev; 955 int i, error; 956 int used_blocks; 957 struct thread *td; 958 959 td = curthread; 960 error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); 961 if (error || *vpp != NULL) 962 return (error); 963 964 ump = VFSTOEXT2(mp); 965 dev = ump->um_dev; 966 967 /* 968 * If this MALLOC() is performed after the getnewvnode() 969 * it might block, leaving a vnode with a NULL v_data to be 970 * found by ext2_sync() if a sync happens to fire right then, 971 * which will cause a panic because ext2_sync() blindly 972 * dereferences vp->v_data (as well it should). 973 */ 974 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 975 976 /* Allocate a new vnode/inode. */ 977 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 978 *vpp = NULL; 979 free(ip, M_EXT2NODE); 980 return (error); 981 } 982 vp->v_data = ip; 983 ip->i_vnode = vp; 984 ip->i_e2fs = fs = ump->um_e2fs; 985 ip->i_number = ino; 986 987 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL, td); 988 error = insmntque(vp, mp); 989 if (error != 0) { 990 free(ip, M_EXT2NODE); 991 *vpp = NULL; 992 return (error); 993 } 994 error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); 995 if (error || *vpp != NULL) 996 return (error); 997 998 /* Read in the disk contents for the inode, copy into the inode. */ 999#if 0 1000printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 1001#endif 1002 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1003 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 1004 /* 1005 * The inode does not contain anything useful, so it would 1006 * be misleading to leave it on its hash chain. With mode 1007 * still zero, it will be unlinked and returned to the free 1008 * list by vput(). 1009 */ 1010 vput(vp); 1011 brelse(bp); 1012 *vpp = NULL; 1013 return (error); 1014 } 1015 /* convert ext2 inode to dinode */ 1016 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1017 ino_to_fsbo(fs, ino)), ip); 1018 ip->i_block_group = ino_to_cg(fs, ino); 1019 ip->i_next_alloc_block = 0; 1020 ip->i_next_alloc_goal = 0; 1021 ip->i_prealloc_count = 0; 1022 ip->i_prealloc_block = 0; 1023 /* now we want to make sure that block pointers for unused 1024 blocks are zeroed out - ext2_balloc depends on this 1025 although for regular files and directories only 1026 */ 1027 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1028 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1029 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1030 ip->i_db[i] = 0; 1031 } 1032/* 1033 ext2_print_inode(ip); 1034*/ 1035 brelse(bp); 1036 1037 /* 1038 * Initialize the vnode from the inode, check for aliases. 1039 * Note that the underlying vnode may have changed. 1040 */ 1041 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1042 vput(vp); 1043 *vpp = NULL; 1044 return (error); 1045 } 1046 /* 1047 * Finish inode initialization now that aliasing has been resolved. 1048 */ 1049 ip->i_devvp = ump->um_devvp; 1050 /* 1051 * Set up a generation number for this inode if it does not 1052 * already have one. This should only happen on old filesystems. 1053 */ 1054 if (ip->i_gen == 0) { 1055 ip->i_gen = random() / 2 + 1; 1056 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1057 ip->i_flag |= IN_MODIFIED; 1058 } 1059 *vpp = vp; 1060 return (0); 1061} 1062 1063/* 1064 * File handle to vnode 1065 * 1066 * Have to be really careful about stale file handles: 1067 * - check that the inode number is valid 1068 * - call ext2_vget() to get the locked inode 1069 * - check for an unallocated inode (i_mode == 0) 1070 * - check that the given client host has export rights and return 1071 * those rights via. exflagsp and credanonp 1072 */ 1073static int 1074ext2_fhtovp(mp, fhp, vpp) 1075 struct mount *mp; 1076 struct fid *fhp; 1077 struct vnode **vpp; 1078{ 1079 struct inode *ip; 1080 struct ufid *ufhp; 1081 struct vnode *nvp; 1082 struct ext2_sb_info *fs; 1083 int error; 1084 1085 ufhp = (struct ufid *)fhp; 1086 fs = VFSTOEXT2(mp)->um_e2fs; 1087 if (ufhp->ufid_ino < ROOTINO || 1088 ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group) 1089 return (ESTALE); 1090 1091 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1092 if (error) { 1093 *vpp = NULLVP; 1094 return (error); 1095 } 1096 ip = VTOI(nvp); 1097 if (ip->i_mode == 0 || 1098 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1099 vput(nvp); 1100 *vpp = NULLVP; 1101 return (ESTALE); 1102 } 1103 *vpp = nvp; 1104 vnode_create_vobject(*vpp, 0, curthread); 1105 return (0); 1106} 1107 1108/* 1109 * Write a superblock and associated information back to disk. 1110 */ 1111static int 1112ext2_sbupdate(mp, waitfor) 1113 struct ext2mount *mp; 1114 int waitfor; 1115{ 1116 struct ext2_sb_info *fs = mp->um_e2fs; 1117 struct ext2_super_block *es = fs->s_es; 1118 struct buf *bp; 1119 int error = 0; 1120/* 1121printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1122*/ 1123 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1124 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1125 if (waitfor == MNT_WAIT) 1126 error = bwrite(bp); 1127 else 1128 bawrite(bp); 1129 1130 /* 1131 * The buffers for group descriptors, inode bitmaps and block bitmaps 1132 * are not busy at this point and are (hopefully) written by the 1133 * usual sync mechanism. No need to write them here 1134 */ 1135 1136 return (error); 1137} 1138 1139/* 1140 * Return the root of a filesystem. 1141 */ 1142static int 1143ext2_root(mp, flags, vpp, td) 1144 struct mount *mp; 1145 int flags; 1146 struct vnode **vpp; 1147 struct thread *td; 1148{ 1149 struct vnode *nvp; 1150 int error; 1151 1152 error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); 1153 if (error) 1154 return (error); 1155 *vpp = nvp; 1156 return (0); 1157} 1158