ext2_vfsops.c revision 162647
1/*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/*- 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 36 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 162647 2006-09-26 04:12:49Z tegge $ 37 */ 38 39/*- 40 * COPYRIGHT.INFO says this has some GPL'd code from ext2_super.c in it 41 * 42 * This program is free software; you can redistribute it and/or modify 43 * it under the terms of the GNU General Public License as published by 44 * the Free Software Foundation; either version 2 of the License. 45 * 46 * This program is distributed in the hope that it will be useful, 47 * but WITHOUT ANY WARRANTY; without even the implied warranty of 48 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 49 * GNU General Public License for more details. 50 * 51 * You should have received a copy of the GNU General Public License 52 * along with this program; if not, write to the Free Software 53 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 54 * 55 */ 56 57#include <sys/param.h> 58#include <sys/systm.h> 59#include <sys/namei.h> 60#include <sys/proc.h> 61#include <sys/kernel.h> 62#include <sys/vnode.h> 63#include <sys/mount.h> 64#include <sys/bio.h> 65#include <sys/buf.h> 66#include <sys/conf.h> 67#include <sys/fcntl.h> 68#include <sys/malloc.h> 69#include <sys/stat.h> 70#include <sys/mutex.h> 71 72#include <geom/geom.h> 73#include <geom/geom_vfs.h> 74 75#include <gnu/fs/ext2fs/ext2_mount.h> 76#include <gnu/fs/ext2fs/inode.h> 77 78#include <gnu/fs/ext2fs/fs.h> 79#include <gnu/fs/ext2fs/ext2_extern.h> 80#include <gnu/fs/ext2fs/ext2_fs.h> 81#include <gnu/fs/ext2fs/ext2_fs_sb.h> 82 83static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); 84static int ext2_mountfs(struct vnode *, struct mount *, struct thread *); 85static int ext2_reload(struct mount *mp, struct thread *td); 86static int ext2_sbupdate(struct ext2mount *, int); 87 88static vfs_unmount_t ext2_unmount; 89static vfs_root_t ext2_root; 90static vfs_statfs_t ext2_statfs; 91static vfs_sync_t ext2_sync; 92static vfs_vget_t ext2_vget; 93static vfs_fhtovp_t ext2_fhtovp; 94static vfs_vptofh_t ext2_vptofh; 95static vfs_mount_t ext2_mount; 96 97MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 98static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 99 100static struct vfsops ext2fs_vfsops = { 101 .vfs_fhtovp = ext2_fhtovp, 102 .vfs_mount = ext2_mount, 103 .vfs_root = ext2_root, /* root inode via vget */ 104 .vfs_statfs = ext2_statfs, 105 .vfs_sync = ext2_sync, 106 .vfs_unmount = ext2_unmount, 107 .vfs_vget = ext2_vget, 108 .vfs_vptofh = ext2_vptofh, 109}; 110 111VFS_SET(ext2fs_vfsops, ext2fs, 0); 112 113#define bsd_malloc malloc 114#define bsd_free free 115 116static int ext2_check_sb_compat(struct ext2_super_block *es, struct cdev *dev, 117 int ronly); 118static int compute_sb_data(struct vnode * devvp, 119 struct ext2_super_block * es, struct ext2_sb_info * fs); 120 121static const char *ext2_opts[] = { "from", "export" }; 122/* 123 * VFS Operations. 124 * 125 * mount system call 126 */ 127static int 128ext2_mount(mp, td) 129 struct mount *mp; 130 struct thread *td; 131{ 132 struct vfsoptlist *opts; 133 struct vnode *devvp; 134 struct ext2mount *ump = 0; 135 struct ext2_sb_info *fs; 136 char *path, *fspec; 137 int error, flags, len; 138 mode_t accessmode; 139 struct nameidata nd, *ndp = &nd; 140 141 opts = mp->mnt_optnew; 142 143 if (vfs_filteropt(opts, ext2_opts)) 144 return (EINVAL); 145 146 vfs_getopt(opts, "fspath", (void **)&path, NULL); 147 /* Double-check the length of path.. */ 148 if (strlen(path) >= MAXMNTLEN - 1) 149 return (ENAMETOOLONG); 150 151 fspec = NULL; 152 error = vfs_getopt(opts, "from", (void **)&fspec, &len); 153 if (!error && fspec[len - 1] != '\0') 154 return (EINVAL); 155 156 /* 157 * If updating, check whether changing from read-only to 158 * read/write; if there is no device name, that's all we do. 159 */ 160 if (mp->mnt_flag & MNT_UPDATE) { 161 ump = VFSTOEXT2(mp); 162 fs = ump->um_e2fs; 163 error = 0; 164 if (fs->s_rd_only == 0 && 165 vfs_flagopt(opts, "ro", NULL, 0)) { 166 error = VFS_SYNC(mp, MNT_WAIT, td); 167 if (error) 168 return (error); 169 flags = WRITECLOSE; 170 if (mp->mnt_flag & MNT_FORCE) 171 flags |= FORCECLOSE; 172 if (vfs_busy(mp, LK_NOWAIT, 0, td)) 173 return (EBUSY); 174 error = ext2_flushfiles(mp, flags, td); 175 vfs_unbusy(mp, td); 176 if (!error && fs->s_wasvalid) { 177 fs->s_es->s_state |= EXT2_VALID_FS; 178 ext2_sbupdate(ump, MNT_WAIT); 179 } 180 fs->s_rd_only = 1; 181 vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); 182 DROP_GIANT(); 183 g_topology_lock(); 184 g_access(ump->um_cp, 0, -1, 0); 185 g_topology_unlock(); 186 PICKUP_GIANT(); 187 } 188 if (!error && (mp->mnt_flag & MNT_RELOAD)) 189 error = ext2_reload(mp, td); 190 if (error) 191 return (error); 192 devvp = ump->um_devvp; 193 if (fs->s_rd_only && !vfs_flagopt(opts, "ro", NULL, 0)) { 194 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 0)) 195 return (EPERM); 196 /* 197 * If upgrade to read-write by non-root, then verify 198 * that user has necessary permissions on the device. 199 */ 200 if (suser(td)) { 201 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 202 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 203 td->td_ucred, td)) != 0) { 204 VOP_UNLOCK(devvp, 0, td); 205 return (error); 206 } 207 VOP_UNLOCK(devvp, 0, td); 208 } 209 DROP_GIANT(); 210 g_topology_lock(); 211 error = g_access(ump->um_cp, 0, 1, 0); 212 g_topology_unlock(); 213 PICKUP_GIANT(); 214 if (error) 215 return (error); 216 217 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 218 (fs->s_es->s_state & EXT2_ERROR_FS)) { 219 if (mp->mnt_flag & MNT_FORCE) { 220 printf( 221"WARNING: %s was not properly dismounted\n", 222 fs->fs_fsmnt); 223 } else { 224 printf( 225"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 226 fs->fs_fsmnt); 227 return (EPERM); 228 } 229 } 230 fs->s_es->s_state &= ~EXT2_VALID_FS; 231 ext2_sbupdate(ump, MNT_WAIT); 232 fs->s_rd_only = 0; 233 MNT_ILOCK(mp); 234 mp->mnt_flag &= ~MNT_RDONLY; 235 MNT_IUNLOCK(mp); 236 } 237 if (vfs_flagopt(opts, "export", NULL, 0)) { 238 /* Process export requests in vfs_mount.c. */ 239 return (error); 240 } 241 } 242 /* 243 * Not an update, or updating the name: look up the name 244 * and verify that it refers to a sensible disk device. 245 */ 246 if (fspec == NULL) 247 return (EINVAL); 248 NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); 249 if ((error = namei(ndp)) != 0) 250 return (error); 251 NDFREE(ndp, NDF_ONLY_PNBUF); 252 devvp = ndp->ni_vp; 253 254 if (!vn_isdisk(devvp, &error)) { 255 vput(devvp); 256 return (error); 257 } 258 259 /* 260 * If mount by non-root, then verify that user has necessary 261 * permissions on the device. 262 */ 263 if (suser(td)) { 264 accessmode = VREAD; 265 if ((mp->mnt_flag & MNT_RDONLY) == 0) 266 accessmode |= VWRITE; 267 if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td)) != 0) { 268 vput(devvp); 269 return (error); 270 } 271 } 272 273 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 274 error = ext2_mountfs(devvp, mp, td); 275 } else { 276 if (devvp != ump->um_devvp) { 277 vput(devvp); 278 return (EINVAL); /* needs translation */ 279 } else 280 vput(devvp); 281 } 282 if (error) { 283 vrele(devvp); 284 return (error); 285 } 286 ump = VFSTOEXT2(mp); 287 fs = ump->um_e2fs; 288 /* 289 * Note that this strncpy() is ok because of a check at the start 290 * of ext2_mount(). 291 */ 292 strncpy(fs->fs_fsmnt, path, MAXMNTLEN); 293 fs->fs_fsmnt[MAXMNTLEN - 1] = '\0'; 294 vfs_mountedfrom(mp, fspec); 295 return (0); 296} 297 298/* 299 * checks that the data in the descriptor blocks make sense 300 * this is taken from ext2/super.c 301 */ 302static int ext2_check_descriptors (struct ext2_sb_info * sb) 303{ 304 int i; 305 int desc_block = 0; 306 unsigned long block = sb->s_es->s_first_data_block; 307 struct ext2_group_desc * gdp = NULL; 308 309 /* ext2_debug ("Checking group descriptors"); */ 310 311 for (i = 0; i < sb->s_groups_count; i++) 312 { 313 /* examine next descriptor block */ 314 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 315 gdp = (struct ext2_group_desc *) 316 sb->s_group_desc[desc_block++]->b_data; 317 if (gdp->bg_block_bitmap < block || 318 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 319 { 320 printf ("ext2_check_descriptors: " 321 "Block bitmap for group %d" 322 " not in group (block %lu)!\n", 323 i, (unsigned long) gdp->bg_block_bitmap); 324 return 0; 325 } 326 if (gdp->bg_inode_bitmap < block || 327 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 328 { 329 printf ("ext2_check_descriptors: " 330 "Inode bitmap for group %d" 331 " not in group (block %lu)!\n", 332 i, (unsigned long) gdp->bg_inode_bitmap); 333 return 0; 334 } 335 if (gdp->bg_inode_table < block || 336 gdp->bg_inode_table + sb->s_itb_per_group >= 337 block + EXT2_BLOCKS_PER_GROUP(sb)) 338 { 339 printf ("ext2_check_descriptors: " 340 "Inode table for group %d" 341 " not in group (block %lu)!\n", 342 i, (unsigned long) gdp->bg_inode_table); 343 return 0; 344 } 345 block += EXT2_BLOCKS_PER_GROUP(sb); 346 gdp++; 347 } 348 return 1; 349} 350 351static int 352ext2_check_sb_compat(es, dev, ronly) 353 struct ext2_super_block *es; 354 struct cdev *dev; 355 int ronly; 356{ 357 358 if (es->s_magic != EXT2_SUPER_MAGIC) { 359 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 360 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 361 return (1); 362 } 363 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 364 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 365 printf( 366"WARNING: mount of %s denied due to unsupported optional features\n", 367 devtoname(dev)); 368 return (1); 369 } 370 if (!ronly && 371 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 372 printf( 373"WARNING: R/W mount of %s denied due to unsupported optional features\n", 374 devtoname(dev)); 375 return (1); 376 } 377 } 378 return (0); 379} 380 381/* 382 * this computes the fields of the ext2_sb_info structure from the 383 * data in the ext2_super_block structure read in 384 */ 385static int compute_sb_data(devvp, es, fs) 386 struct vnode * devvp; 387 struct ext2_super_block * es; 388 struct ext2_sb_info * fs; 389{ 390 int db_count, error; 391 int i, j; 392 int logic_sb_block = 1; /* XXX for now */ 393 394#if 1 395#define V(v) 396#else 397#define V(v) printf(#v"= %d\n", fs->v); 398#endif 399 400 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 401 V(s_blocksize) 402 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 403 V(s_bshift) 404 fs->s_fsbtodb = es->s_log_block_size + 1; 405 V(s_fsbtodb) 406 fs->s_qbmask = fs->s_blocksize - 1; 407 V(s_bmask) 408 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 409 V(s_blocksize_bits) 410 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 411 V(s_frag_size) 412 if (fs->s_frag_size) 413 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 414 V(s_frags_per_block) 415 fs->s_blocks_per_group = es->s_blocks_per_group; 416 V(s_blocks_per_group) 417 fs->s_frags_per_group = es->s_frags_per_group; 418 V(s_frags_per_group) 419 fs->s_inodes_per_group = es->s_inodes_per_group; 420 V(s_inodes_per_group) 421 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 422 V(s_inodes_per_block) 423 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 424 V(s_itb_per_group) 425 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 426 V(s_desc_per_block) 427 /* s_resuid / s_resgid ? */ 428 fs->s_groups_count = (es->s_blocks_count - 429 es->s_first_data_block + 430 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 431 EXT2_BLOCKS_PER_GROUP(fs); 432 V(s_groups_count) 433 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 434 EXT2_DESC_PER_BLOCK(fs); 435 fs->s_db_per_group = db_count; 436 V(s_db_per_group) 437 438 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 439 M_EXT2MNT, M_WAITOK); 440 441 /* adjust logic_sb_block */ 442 if(fs->s_blocksize > SBSIZE) 443 /* Godmar thinks: if the blocksize is greater than 1024, then 444 the superblock is logically part of block zero. 445 */ 446 logic_sb_block = 0; 447 448 for (i = 0; i < db_count; i++) { 449 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 450 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 451 if(error) { 452 for (j = 0; j < i; j++) 453 brelse(fs->s_group_desc[j]); 454 bsd_free(fs->s_group_desc, M_EXT2MNT); 455 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 456 return EIO; 457 } 458 LCK_BUF(fs->s_group_desc[i]) 459 } 460 if(!ext2_check_descriptors(fs)) { 461 for (j = 0; j < db_count; j++) 462 ULCK_BUF(fs->s_group_desc[j]) 463 bsd_free(fs->s_group_desc, M_EXT2MNT); 464 printf("EXT2-fs: (ext2_check_descriptors failure) " 465 "unable to read group descriptors\n"); 466 return EIO; 467 } 468 469 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 470 fs->s_inode_bitmap_number[i] = 0; 471 fs->s_inode_bitmap[i] = NULL; 472 fs->s_block_bitmap_number[i] = 0; 473 fs->s_block_bitmap[i] = NULL; 474 } 475 fs->s_loaded_inode_bitmaps = 0; 476 fs->s_loaded_block_bitmaps = 0; 477 if (es->s_rev_level == EXT2_GOOD_OLD_REV || (es->s_feature_ro_compat & 478 EXT2_FEATURE_RO_COMPAT_LARGE_FILE) == 0) 479 fs->fs_maxfilesize = 0x7fffffff; 480 else 481 fs->fs_maxfilesize = 0x7fffffffffffffff; 482 return 0; 483} 484 485/* 486 * Reload all incore data for a filesystem (used after running fsck on 487 * the root filesystem and finding things to fix). The filesystem must 488 * be mounted read-only. 489 * 490 * Things to do to update the mount: 491 * 1) invalidate all cached meta-data. 492 * 2) re-read superblock from disk. 493 * 3) re-read summary information from disk. 494 * 4) invalidate all inactive vnodes. 495 * 5) invalidate all cached file data. 496 * 6) re-read inode data for all active vnodes. 497 */ 498static int 499ext2_reload(struct mount *mp, struct thread *td) 500{ 501 struct vnode *vp, *mvp, *devvp; 502 struct inode *ip; 503 struct buf *bp; 504 struct ext2_super_block * es; 505 struct ext2_sb_info *fs; 506 int error; 507 508 if ((mp->mnt_flag & MNT_RDONLY) == 0) 509 return (EINVAL); 510 /* 511 * Step 1: invalidate all cached meta-data. 512 */ 513 devvp = VFSTOEXT2(mp)->um_devvp; 514 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td); 515 if (vinvalbuf(devvp, 0, td, 0, 0) != 0) 516 panic("ext2_reload: dirty1"); 517 VOP_UNLOCK(devvp, 0, td); 518 519 /* 520 * Step 2: re-read superblock from disk. 521 * constants have been adjusted for ext2 522 */ 523 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 524 return (error); 525 es = (struct ext2_super_block *)bp->b_data; 526 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 527 brelse(bp); 528 return (EIO); /* XXX needs translation */ 529 } 530 fs = VFSTOEXT2(mp)->um_e2fs; 531 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 532 533 if((error = compute_sb_data(devvp, es, fs)) != 0) { 534 brelse(bp); 535 return error; 536 } 537#ifdef UNKLAR 538 if (fs->fs_sbsize < SBSIZE) 539 bp->b_flags |= B_INVAL; 540#endif 541 brelse(bp); 542 543loop: 544 MNT_ILOCK(mp); 545 MNT_VNODE_FOREACH(vp, mp, mvp) { 546 VI_LOCK(vp); 547 if (vp->v_iflag & VI_DOOMED) { 548 VI_UNLOCK(vp); 549 continue; 550 } 551 MNT_IUNLOCK(mp); 552 /* 553 * Step 4: invalidate all cached file data. 554 */ 555 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { 556 MNT_VNODE_FOREACH_ABORT(mp, mvp); 557 goto loop; 558 } 559 if (vinvalbuf(vp, 0, td, 0, 0)) 560 panic("ext2_reload: dirty2"); 561 /* 562 * Step 5: re-read inode data for all active vnodes. 563 */ 564 ip = VTOI(vp); 565 error = 566 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 567 (int)fs->s_blocksize, NOCRED, &bp); 568 if (error) { 569 VOP_UNLOCK(vp, 0, td); 570 vrele(vp); 571 MNT_VNODE_FOREACH_ABORT(mp, mvp); 572 return (error); 573 } 574 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + 575 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), ip); 576 brelse(bp); 577 VOP_UNLOCK(vp, 0, td); 578 vrele(vp); 579 MNT_ILOCK(mp); 580 } 581 MNT_IUNLOCK(mp); 582 return (0); 583} 584 585/* 586 * Common code for mount and mountroot 587 */ 588static int 589ext2_mountfs(devvp, mp, td) 590 struct vnode *devvp; 591 struct mount *mp; 592 struct thread *td; 593{ 594 struct ext2mount *ump; 595 struct buf *bp; 596 struct ext2_sb_info *fs; 597 struct ext2_super_block * es; 598 struct cdev *dev = devvp->v_rdev; 599 struct g_consumer *cp; 600 struct bufobj *bo; 601 int error; 602 int ronly; 603 604 ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); 605 /* XXX: use VOP_ACESS to check FS perms */ 606 DROP_GIANT(); 607 g_topology_lock(); 608 error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); 609 g_topology_unlock(); 610 PICKUP_GIANT(); 611 VOP_UNLOCK(devvp, 0, td); 612 if (error) 613 return (error); 614 615 /* XXX: should we check for some sectorsize or 512 instead? */ 616 if (((SBSIZE % cp->provider->sectorsize) != 0) || 617 (SBSIZE < cp->provider->sectorsize)) { 618 DROP_GIANT(); 619 g_topology_lock(); 620 g_vfs_close(cp, td); 621 g_topology_unlock(); 622 PICKUP_GIANT(); 623 return (EINVAL); 624 } 625 626 bo = &devvp->v_bufobj; 627 bo->bo_private = cp; 628 bo->bo_ops = g_vfs_bufops; 629 if (devvp->v_rdev->si_iosize_max != 0) 630 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 631 if (mp->mnt_iosize_max > MAXPHYS) 632 mp->mnt_iosize_max = MAXPHYS; 633 634 bp = NULL; 635 ump = NULL; 636 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 637 goto out; 638 es = (struct ext2_super_block *)bp->b_data; 639 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 640 error = EINVAL; /* XXX needs translation */ 641 goto out; 642 } 643 if ((es->s_state & EXT2_VALID_FS) == 0 || 644 (es->s_state & EXT2_ERROR_FS)) { 645 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 646 printf( 647"WARNING: Filesystem was not properly dismounted\n"); 648 } else { 649 printf( 650"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 651 error = EPERM; 652 goto out; 653 } 654 } 655 ump = bsd_malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); 656 bzero((caddr_t)ump, sizeof *ump); 657 /* I don't know whether this is the right strategy. Note that 658 we dynamically allocate both an ext2_sb_info and an ext2_super_block 659 while Linux keeps the super block in a locked buffer 660 */ 661 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 662 M_EXT2MNT, M_WAITOK); 663 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 664 M_EXT2MNT, M_WAITOK); 665 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 666 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 667 goto out; 668 /* 669 * We don't free the group descriptors allocated by compute_sb_data() 670 * until ext2_unmount(). This is OK since the mount will succeed. 671 */ 672 brelse(bp); 673 bp = NULL; 674 fs = ump->um_e2fs; 675 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 676 /* if the fs is not mounted read-only, make sure the super block is 677 always written back on a sync() 678 */ 679 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 680 if (ronly == 0) { 681 fs->s_dirt = 1; /* mark it modified */ 682 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 683 } 684 mp->mnt_data = (qaddr_t)ump; 685 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 686 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 687 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 688 MNT_ILOCK(mp); 689 mp->mnt_flag |= MNT_LOCAL; 690 MNT_IUNLOCK(mp); 691 ump->um_mountp = mp; 692 ump->um_dev = dev; 693 ump->um_devvp = devvp; 694 ump->um_bo = &devvp->v_bufobj; 695 ump->um_cp = cp; 696 /* setting those two parameters allowed us to use 697 ufs_bmap w/o changse ! 698 */ 699 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 700 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 701 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 702 if (ronly == 0) 703 ext2_sbupdate(ump, MNT_WAIT); 704 return (0); 705out: 706 if (bp) 707 brelse(bp); 708 if (cp != NULL) { 709 DROP_GIANT(); 710 g_topology_lock(); 711 g_vfs_close(cp, td); 712 g_topology_unlock(); 713 PICKUP_GIANT(); 714 } 715 if (ump) { 716 bsd_free(ump->um_e2fs->s_es, M_EXT2MNT); 717 bsd_free(ump->um_e2fs, M_EXT2MNT); 718 bsd_free(ump, M_EXT2MNT); 719 mp->mnt_data = (qaddr_t)0; 720 } 721 return (error); 722} 723 724/* 725 * unmount system call 726 */ 727static int 728ext2_unmount(mp, mntflags, td) 729 struct mount *mp; 730 int mntflags; 731 struct thread *td; 732{ 733 struct ext2mount *ump; 734 struct ext2_sb_info *fs; 735 int error, flags, ronly, i; 736 737 flags = 0; 738 if (mntflags & MNT_FORCE) { 739 if (mp->mnt_flag & MNT_ROOTFS) 740 return (EINVAL); 741 flags |= FORCECLOSE; 742 } 743 if ((error = ext2_flushfiles(mp, flags, td)) != 0) 744 return (error); 745 ump = VFSTOEXT2(mp); 746 fs = ump->um_e2fs; 747 ronly = fs->s_rd_only; 748 if (ronly == 0) { 749 if (fs->s_wasvalid) 750 fs->s_es->s_state |= EXT2_VALID_FS; 751 ext2_sbupdate(ump, MNT_WAIT); 752 } 753 754 /* release buffers containing group descriptors */ 755 for(i = 0; i < fs->s_db_per_group; i++) 756 ULCK_BUF(fs->s_group_desc[i]) 757 bsd_free(fs->s_group_desc, M_EXT2MNT); 758 759 /* release cached inode/block bitmaps */ 760 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 761 if (fs->s_inode_bitmap[i]) 762 ULCK_BUF(fs->s_inode_bitmap[i]) 763 764 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 765 if (fs->s_block_bitmap[i]) 766 ULCK_BUF(fs->s_block_bitmap[i]) 767 768 DROP_GIANT(); 769 g_topology_lock(); 770 g_vfs_close(ump->um_cp, td); 771 g_topology_unlock(); 772 PICKUP_GIANT(); 773 vrele(ump->um_devvp); 774 bsd_free(fs->s_es, M_EXT2MNT); 775 bsd_free(fs, M_EXT2MNT); 776 bsd_free(ump, M_EXT2MNT); 777 mp->mnt_data = (qaddr_t)0; 778 MNT_ILOCK(mp); 779 mp->mnt_flag &= ~MNT_LOCAL; 780 MNT_IUNLOCK(mp); 781 return (error); 782} 783 784/* 785 * Flush out all the files in a filesystem. 786 */ 787static int 788ext2_flushfiles(mp, flags, td) 789 struct mount *mp; 790 int flags; 791 struct thread *td; 792{ 793 int error; 794 795 error = vflush(mp, 0, flags, td); 796 return (error); 797} 798 799/* 800 * Get file system statistics. 801 * taken from ext2/super.c ext2_statfs 802 */ 803static int 804ext2_statfs(mp, sbp, td) 805 struct mount *mp; 806 struct statfs *sbp; 807 struct thread *td; 808{ 809 unsigned long overhead; 810 struct ext2mount *ump; 811 struct ext2_sb_info *fs; 812 struct ext2_super_block *es; 813 int i, nsb; 814 815 ump = VFSTOEXT2(mp); 816 fs = ump->um_e2fs; 817 es = fs->s_es; 818 819 if (es->s_magic != EXT2_SUPER_MAGIC) 820 panic("ext2_statfs - magic number spoiled"); 821 822 /* 823 * Compute the overhead (FS structures) 824 */ 825 if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { 826 nsb = 0; 827 for (i = 0 ; i < fs->s_groups_count; i++) 828 if (ext2_group_sparse(i)) 829 nsb++; 830 } else 831 nsb = fs->s_groups_count; 832 overhead = es->s_first_data_block + 833 /* Superblocks and block group descriptors: */ 834 nsb * (1 + fs->s_db_per_group) + 835 /* Inode bitmap, block bitmap, and inode table: */ 836 fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); 837 838 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 839 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 840 sbp->f_blocks = es->s_blocks_count - overhead; 841 sbp->f_bfree = es->s_free_blocks_count; 842 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 843 sbp->f_files = es->s_inodes_count; 844 sbp->f_ffree = es->s_free_inodes_count; 845 return (0); 846} 847 848/* 849 * Go through the disk queues to initiate sandbagged IO; 850 * go through the inodes to write those that have been modified; 851 * initiate the writing of the super block if it has been modified. 852 * 853 * Note: we are always called with the filesystem marked `MPBUSY'. 854 */ 855static int 856ext2_sync(mp, waitfor, td) 857 struct mount *mp; 858 int waitfor; 859 struct thread *td; 860{ 861 struct vnode *mvp, *vp; 862 struct inode *ip; 863 struct ext2mount *ump = VFSTOEXT2(mp); 864 struct ext2_sb_info *fs; 865 int error, allerror = 0; 866 867 fs = ump->um_e2fs; 868 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 869 printf("fs = %s\n", fs->fs_fsmnt); 870 panic("ext2_sync: rofs mod"); 871 } 872 /* 873 * Write back each (modified) inode. 874 */ 875 MNT_ILOCK(mp); 876loop: 877 MNT_VNODE_FOREACH(vp, mp, mvp) { 878 VI_LOCK(vp); 879 if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED)) { 880 VI_UNLOCK(vp); 881 continue; 882 } 883 MNT_IUNLOCK(mp); 884 ip = VTOI(vp); 885 if ((ip->i_flag & 886 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 887 (vp->v_bufobj.bo_dirty.bv_cnt == 0 || 888 waitfor == MNT_LAZY)) { 889 VI_UNLOCK(vp); 890 MNT_ILOCK(mp); 891 continue; 892 } 893 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); 894 if (error) { 895 MNT_ILOCK(mp); 896 if (error == ENOENT) { 897 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); 898 goto loop; 899 } 900 continue; 901 } 902 if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) 903 allerror = error; 904 VOP_UNLOCK(vp, 0, td); 905 vrele(vp); 906 MNT_ILOCK(mp); 907 } 908 MNT_IUNLOCK(mp); 909 /* 910 * Force stale file system control information to be flushed. 911 */ 912 if (waitfor != MNT_LAZY) { 913 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td); 914 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) 915 allerror = error; 916 VOP_UNLOCK(ump->um_devvp, 0, td); 917 } 918 /* 919 * Write back modified superblock. 920 */ 921 if (fs->s_dirt != 0) { 922 fs->s_dirt = 0; 923 fs->s_es->s_wtime = time_second; 924 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 925 allerror = error; 926 } 927 return (allerror); 928} 929 930/* 931 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 932 * in from disk. If it is in core, wait for the lock bit to clear, then 933 * return the inode locked. Detection and handling of mount points must be 934 * done by the calling routine. 935 */ 936static int 937ext2_vget(mp, ino, flags, vpp) 938 struct mount *mp; 939 ino_t ino; 940 int flags; 941 struct vnode **vpp; 942{ 943 struct ext2_sb_info *fs; 944 struct inode *ip; 945 struct ext2mount *ump; 946 struct buf *bp; 947 struct vnode *vp; 948 struct cdev *dev; 949 int i, error; 950 int used_blocks; 951 952 error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL); 953 if (error || *vpp != NULL) 954 return (error); 955 956 ump = VFSTOEXT2(mp); 957 dev = ump->um_dev; 958 959 /* 960 * If this MALLOC() is performed after the getnewvnode() 961 * it might block, leaving a vnode with a NULL v_data to be 962 * found by ext2_sync() if a sync happens to fire right then, 963 * which will cause a panic because ext2_sync() blindly 964 * dereferences vp->v_data (as well it should). 965 */ 966 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 967 968 /* Allocate a new vnode/inode. */ 969 if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { 970 *vpp = NULL; 971 free(ip, M_EXT2NODE); 972 return (error); 973 } 974 vp->v_data = ip; 975 ip->i_vnode = vp; 976 ip->i_e2fs = fs = ump->um_e2fs; 977 ip->i_number = ino; 978 979 error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL); 980 if (error || *vpp != NULL) 981 return (error); 982 983 /* Read in the disk contents for the inode, copy into the inode. */ 984#if 0 985printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 986#endif 987 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 988 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 989 /* 990 * The inode does not contain anything useful, so it would 991 * be misleading to leave it on its hash chain. With mode 992 * still zero, it will be unlinked and returned to the free 993 * list by vput(). 994 */ 995 vput(vp); 996 brelse(bp); 997 *vpp = NULL; 998 return (error); 999 } 1000 /* convert ext2 inode to dinode */ 1001 ext2_ei2i((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1002 ino_to_fsbo(fs, ino)), ip); 1003 ip->i_block_group = ino_to_cg(fs, ino); 1004 ip->i_next_alloc_block = 0; 1005 ip->i_next_alloc_goal = 0; 1006 ip->i_prealloc_count = 0; 1007 ip->i_prealloc_block = 0; 1008 /* now we want to make sure that block pointers for unused 1009 blocks are zeroed out - ext2_balloc depends on this 1010 although for regular files and directories only 1011 */ 1012 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1013 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1014 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1015 ip->i_db[i] = 0; 1016 } 1017/* 1018 ext2_print_inode(ip); 1019*/ 1020 brelse(bp); 1021 1022 /* 1023 * Initialize the vnode from the inode, check for aliases. 1024 * Note that the underlying vnode may have changed. 1025 */ 1026 if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { 1027 vput(vp); 1028 *vpp = NULL; 1029 return (error); 1030 } 1031 /* 1032 * Finish inode initialization now that aliasing has been resolved. 1033 */ 1034 ip->i_devvp = ump->um_devvp; 1035 /* 1036 * Set up a generation number for this inode if it does not 1037 * already have one. This should only happen on old filesystems. 1038 */ 1039 if (ip->i_gen == 0) { 1040 ip->i_gen = random() / 2 + 1; 1041 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1042 ip->i_flag |= IN_MODIFIED; 1043 } 1044 *vpp = vp; 1045 return (0); 1046} 1047 1048/* 1049 * File handle to vnode 1050 * 1051 * Have to be really careful about stale file handles: 1052 * - check that the inode number is valid 1053 * - call ext2_vget() to get the locked inode 1054 * - check for an unallocated inode (i_mode == 0) 1055 * - check that the given client host has export rights and return 1056 * those rights via. exflagsp and credanonp 1057 */ 1058static int 1059ext2_fhtovp(mp, fhp, vpp) 1060 struct mount *mp; 1061 struct fid *fhp; 1062 struct vnode **vpp; 1063{ 1064 struct inode *ip; 1065 struct ufid *ufhp; 1066 struct vnode *nvp; 1067 struct ext2_sb_info *fs; 1068 int error; 1069 1070 ufhp = (struct ufid *)fhp; 1071 fs = VFSTOEXT2(mp)->um_e2fs; 1072 if (ufhp->ufid_ino < ROOTINO || 1073 ufhp->ufid_ino > fs->s_groups_count * fs->s_es->s_inodes_per_group) 1074 return (ESTALE); 1075 1076 error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); 1077 if (error) { 1078 *vpp = NULLVP; 1079 return (error); 1080 } 1081 ip = VTOI(nvp); 1082 if (ip->i_mode == 0 || 1083 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1084 vput(nvp); 1085 *vpp = NULLVP; 1086 return (ESTALE); 1087 } 1088 *vpp = nvp; 1089 vnode_create_vobject(*vpp, 0, curthread); 1090 return (0); 1091} 1092 1093/* 1094 * Vnode pointer to File handle 1095 */ 1096/* ARGSUSED */ 1097static int 1098ext2_vptofh(vp, fhp) 1099 struct vnode *vp; 1100 struct fid *fhp; 1101{ 1102 struct inode *ip; 1103 struct ufid *ufhp; 1104 1105 ip = VTOI(vp); 1106 ufhp = (struct ufid *)fhp; 1107 ufhp->ufid_len = sizeof(struct ufid); 1108 ufhp->ufid_ino = ip->i_number; 1109 ufhp->ufid_gen = ip->i_gen; 1110 return (0); 1111} 1112 1113/* 1114 * Write a superblock and associated information back to disk. 1115 */ 1116static int 1117ext2_sbupdate(mp, waitfor) 1118 struct ext2mount *mp; 1119 int waitfor; 1120{ 1121 struct ext2_sb_info *fs = mp->um_e2fs; 1122 struct ext2_super_block *es = fs->s_es; 1123 struct buf *bp; 1124 int error = 0; 1125/* 1126printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1127*/ 1128 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); 1129 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1130 if (waitfor == MNT_WAIT) 1131 error = bwrite(bp); 1132 else 1133 bawrite(bp); 1134 1135 /* 1136 * The buffers for group descriptors, inode bitmaps and block bitmaps 1137 * are not busy at this point and are (hopefully) written by the 1138 * usual sync mechanism. No need to write them here 1139 */ 1140 1141 return (error); 1142} 1143 1144/* 1145 * Return the root of a filesystem. 1146 */ 1147static int 1148ext2_root(mp, flags, vpp, td) 1149 struct mount *mp; 1150 int flags; 1151 struct vnode **vpp; 1152 struct thread *td; 1153{ 1154 struct vnode *nvp; 1155 int error; 1156 1157 error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); 1158 if (error) 1159 return (error); 1160 *vpp = nvp; 1161 return (0); 1162} 1163