ext2_vfsops.c revision 66886
1/* 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/* 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 40 * $FreeBSD: head/sys/gnu/fs/ext2fs/ext2_vfsops.c 66886 2000-10-09 17:31:39Z eivind $ 41 */ 42 43#include "opt_quota.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/namei.h> 48#include <sys/proc.h> 49#include <sys/kernel.h> 50#include <sys/vnode.h> 51#include <sys/mount.h> 52#include <sys/bio.h> 53#include <sys/buf.h> 54#include <sys/conf.h> 55#include <sys/fcntl.h> 56#include <sys/disklabel.h> 57#include <sys/malloc.h> 58#include <sys/stat.h> 59 60#include <machine/mutex.h> 61 62#include <ufs/ufs/extattr.h> 63#include <ufs/ufs/quota.h> 64#include <ufs/ufs/ufsmount.h> 65#include <ufs/ufs/inode.h> 66#include <ufs/ufs/ufs_extern.h> 67 68 69#include <gnu/ext2fs/fs.h> 70#include <gnu/ext2fs/ext2_extern.h> 71#include <gnu/ext2fs/ext2_fs.h> 72#include <gnu/ext2fs/ext2_fs_sb.h> 73 74static int ext2_fhtovp __P((struct mount *, struct fid *, struct vnode **)); 75static int ext2_flushfiles __P((struct mount *mp, int flags, struct proc *p)); 76static int ext2_mount __P((struct mount *, 77 char *, caddr_t, struct nameidata *, struct proc *)); 78static int ext2_mountfs __P((struct vnode *, struct mount *, struct proc *)); 79static int ext2_reload __P((struct mount *mountp, struct ucred *cred, 80 struct proc *p)); 81static int ext2_sbupdate __P((struct ufsmount *, int)); 82static int ext2_statfs __P((struct mount *, struct statfs *, struct proc *)); 83static int ext2_sync __P((struct mount *, int, struct ucred *, struct proc *)); 84static int ext2_unmount __P((struct mount *, int, struct proc *)); 85static int ext2_vget __P((struct mount *, ino_t, struct vnode **)); 86static int ext2_vptofh __P((struct vnode *, struct fid *)); 87 88static MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part"); 89 90static struct vfsops ext2fs_vfsops = { 91 ext2_mount, 92 ufs_start, /* empty function */ 93 ext2_unmount, 94 ufs_root, /* root inode via vget */ 95 ufs_quotactl, /* does operations associated with quotas */ 96 ext2_statfs, 97 ext2_sync, 98 ext2_vget, 99 ext2_fhtovp, 100 ufs_check_export, 101 ext2_vptofh, 102 ext2_init, 103 vfs_stduninit, 104 vfs_stdextattrctl, 105}; 106 107VFS_SET(ext2fs_vfsops, ext2fs, 0); 108#define bsd_malloc malloc 109#define bsd_free free 110 111static int ext2fs_inode_hash_lock; 112 113static int ext2_check_sb_compat __P((struct ext2_super_block *es, 114 dev_t dev, int ronly)); 115static int compute_sb_data __P((struct vnode * devvp, 116 struct ext2_super_block * es, 117 struct ext2_sb_info * fs)); 118 119#ifdef notyet 120static int ext2_mountroot __P((void)); 121 122/* 123 * Called by main() when ext2fs is going to be mounted as root. 124 * 125 * Name is updated by mount(8) after booting. 126 */ 127#define ROOTNAME "root_device" 128 129static int 130ext2_mountroot() 131{ 132 register struct ext2_sb_info *fs; 133 register struct mount *mp; 134 struct proc *p = curproc; 135 struct ufsmount *ump; 136 u_int size; 137 int error; 138 139 if ((error = bdevvp(rootdev, &rootvp))) { 140 printf("ext2_mountroot: can't find rootvp\n"); 141 return (error); 142 } 143 mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 144 bzero((char *)mp, (u_long)sizeof(struct mount)); 145 mp->mnt_op = &ext2fs_vfsops; 146 mp->mnt_flag = MNT_RDONLY; 147 if (error = ext2_mountfs(rootvp, mp, p)) { 148 bsd_free(mp, M_MOUNT); 149 return (error); 150 } 151 if (error = vfs_lock(mp)) { 152 (void)ext2_unmount(mp, 0, p); 153 bsd_free(mp, M_MOUNT); 154 return (error); 155 } 156 TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); 157 mp->mnt_flag |= MNT_ROOTFS; 158 mp->mnt_vnodecovered = NULLVP; 159 ump = VFSTOUFS(mp); 160 fs = ump->um_e2fs; 161 bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); 162 fs->fs_fsmnt[0] = '/'; 163 bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 164 MNAMELEN); 165 (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 166 &size); 167 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 168 (void)ext2_statfs(mp, &mp->mnt_stat, p); 169 vfs_unlock(mp); 170 inittodr(fs->s_es->s_wtime); /* this helps to set the time */ 171 return (0); 172} 173#endif 174 175/* 176 * VFS Operations. 177 * 178 * mount system call 179 */ 180static int 181ext2_mount(mp, path, data, ndp, p) 182 register struct mount *mp; 183 char *path; 184 caddr_t data; /* this is actually a (struct ufs_args *) */ 185 struct nameidata *ndp; 186 struct proc *p; 187{ 188 struct vnode *devvp; 189 struct ufs_args args; 190 struct ufsmount *ump = 0; 191 register struct ext2_sb_info *fs; 192 u_int size; 193 int error, flags; 194 mode_t accessmode; 195 196 if ((error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) != 0) 197 return (error); 198 /* 199 * If updating, check whether changing from read-only to 200 * read/write; if there is no device name, that's all we do. 201 */ 202 if (mp->mnt_flag & MNT_UPDATE) { 203 ump = VFSTOUFS(mp); 204 fs = ump->um_e2fs; 205 error = 0; 206 if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { 207 flags = WRITECLOSE; 208 if (mp->mnt_flag & MNT_FORCE) 209 flags |= FORCECLOSE; 210 if (vfs_busy(mp, LK_NOWAIT, 0, p)) 211 return (EBUSY); 212 error = ext2_flushfiles(mp, flags, p); 213 vfs_unbusy(mp, p); 214 if (!error && fs->s_wasvalid) { 215 fs->s_es->s_state |= EXT2_VALID_FS; 216 ext2_sbupdate(ump, MNT_WAIT); 217 } 218 fs->s_rd_only = 1; 219 } 220 if (!error && (mp->mnt_flag & MNT_RELOAD)) 221 error = ext2_reload(mp, ndp->ni_cnd.cn_cred, p); 222 if (error) 223 return (error); 224 devvp = ump->um_devvp; 225 if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, 226 (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0) 227 return (EPERM); 228 if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 229 /* 230 * If upgrade to read-write by non-root, then verify 231 * that user has necessary permissions on the device. 232 */ 233 if (p->p_ucred->cr_uid != 0) { 234 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 235 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 236 p->p_ucred, p)) != 0) { 237 VOP_UNLOCK(devvp, 0, p); 238 return (error); 239 } 240 VOP_UNLOCK(devvp, 0, p); 241 } 242 243 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 244 (fs->s_es->s_state & EXT2_ERROR_FS)) { 245 if (mp->mnt_flag & MNT_FORCE) { 246 printf( 247"WARNING: %s was not properly dismounted\n", 248 fs->fs_fsmnt); 249 } else { 250 printf( 251"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 252 fs->fs_fsmnt); 253 return (EPERM); 254 } 255 } 256 fs->s_es->s_state &= ~EXT2_VALID_FS; 257 ext2_sbupdate(ump, MNT_WAIT); 258 fs->s_rd_only = 0; 259 } 260 if (args.fspec == 0) { 261 /* 262 * Process export requests. 263 */ 264 return (vfs_export(mp, &ump->um_export, &args.export)); 265 } 266 } 267 /* 268 * Not an update, or updating the name: look up the name 269 * and verify that it refers to a sensible block device. 270 */ 271 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); 272 if ((error = namei(ndp)) != 0) 273 return (error); 274 NDFREE(ndp, NDF_ONLY_PNBUF); 275 devvp = ndp->ni_vp; 276 277 if (!vn_isdisk(devvp, &error)) { 278 vrele(devvp); 279 return (error); 280 } 281 282 /* 283 * If mount by non-root, then verify that user has necessary 284 * permissions on the device. 285 */ 286 if (p->p_ucred->cr_uid != 0) { 287 accessmode = VREAD; 288 if ((mp->mnt_flag & MNT_RDONLY) == 0) 289 accessmode |= VWRITE; 290 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 291 if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) { 292 vput(devvp); 293 return (error); 294 } 295 VOP_UNLOCK(devvp, 0, p); 296 } 297 298 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 299 error = ext2_mountfs(devvp, mp, p); 300 } else { 301 if (devvp != ump->um_devvp) 302 error = EINVAL; /* needs translation */ 303 else 304 vrele(devvp); 305 } 306 if (error) { 307 vrele(devvp); 308 return (error); 309 } 310 ump = VFSTOUFS(mp); 311 fs = ump->um_e2fs; 312 (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size); 313 bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size); 314 bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 315 MNAMELEN); 316 (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 317 &size); 318 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 319 (void)ext2_statfs(mp, &mp->mnt_stat, p); 320 return (0); 321} 322 323/* 324 * checks that the data in the descriptor blocks make sense 325 * this is taken from ext2/super.c 326 */ 327static int ext2_check_descriptors (struct ext2_sb_info * sb) 328{ 329 int i; 330 int desc_block = 0; 331 unsigned long block = sb->s_es->s_first_data_block; 332 struct ext2_group_desc * gdp = NULL; 333 334 /* ext2_debug ("Checking group descriptors"); */ 335 336 for (i = 0; i < sb->s_groups_count; i++) 337 { 338 /* examine next descriptor block */ 339 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 340 gdp = (struct ext2_group_desc *) 341 sb->s_group_desc[desc_block++]->b_data; 342 if (gdp->bg_block_bitmap < block || 343 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 344 { 345 printf ("ext2_check_descriptors: " 346 "Block bitmap for group %d" 347 " not in group (block %lu)!\n", 348 i, (unsigned long) gdp->bg_block_bitmap); 349 return 0; 350 } 351 if (gdp->bg_inode_bitmap < block || 352 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 353 { 354 printf ("ext2_check_descriptors: " 355 "Inode bitmap for group %d" 356 " not in group (block %lu)!\n", 357 i, (unsigned long) gdp->bg_inode_bitmap); 358 return 0; 359 } 360 if (gdp->bg_inode_table < block || 361 gdp->bg_inode_table + sb->s_itb_per_group >= 362 block + EXT2_BLOCKS_PER_GROUP(sb)) 363 { 364 printf ("ext2_check_descriptors: " 365 "Inode table for group %d" 366 " not in group (block %lu)!\n", 367 i, (unsigned long) gdp->bg_inode_table); 368 return 0; 369 } 370 block += EXT2_BLOCKS_PER_GROUP(sb); 371 gdp++; 372 } 373 return 1; 374} 375 376static int 377ext2_check_sb_compat(es, dev, ronly) 378 struct ext2_super_block *es; 379 dev_t dev; 380 int ronly; 381{ 382 383 if (es->s_magic != EXT2_SUPER_MAGIC) { 384 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 385 devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); 386 return (1); 387 } 388 if (es->s_rev_level > EXT2_GOOD_OLD_REV) { 389 if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { 390 printf( 391"WARNING: mount of %s denied due to unsupported optional features\n", 392 devtoname(dev)); 393 return (1); 394 } 395 if (!ronly && 396 (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { 397 printf( 398"WARNING: R/W mount of %s denied due to unsupported optional features\n", 399 devtoname(dev)); 400 return (1); 401 } 402 } 403 return (0); 404} 405 406/* 407 * this computes the fields of the ext2_sb_info structure from the 408 * data in the ext2_super_block structure read in 409 */ 410static int compute_sb_data(devvp, es, fs) 411 struct vnode * devvp; 412 struct ext2_super_block * es; 413 struct ext2_sb_info * fs; 414{ 415 int db_count, error; 416 int i, j; 417 int logic_sb_block = 1; /* XXX for now */ 418 419#if 1 420#define V(v) 421#else 422#define V(v) printf(#v"= %d\n", fs->v); 423#endif 424 425 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 426 V(s_blocksize) 427 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 428 V(s_bshift) 429 fs->s_fsbtodb = es->s_log_block_size + 1; 430 V(s_fsbtodb) 431 fs->s_qbmask = fs->s_blocksize - 1; 432 V(s_bmask) 433 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 434 V(s_blocksize_bits) 435 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 436 V(s_frag_size) 437 if (fs->s_frag_size) 438 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 439 V(s_frags_per_block) 440 fs->s_blocks_per_group = es->s_blocks_per_group; 441 V(s_blocks_per_group) 442 fs->s_frags_per_group = es->s_frags_per_group; 443 V(s_frags_per_group) 444 fs->s_inodes_per_group = es->s_inodes_per_group; 445 V(s_inodes_per_group) 446 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 447 V(s_inodes_per_block) 448 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 449 V(s_itb_per_group) 450 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 451 V(s_desc_per_block) 452 /* s_resuid / s_resgid ? */ 453 fs->s_groups_count = (es->s_blocks_count - 454 es->s_first_data_block + 455 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 456 EXT2_BLOCKS_PER_GROUP(fs); 457 V(s_groups_count) 458 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 459 EXT2_DESC_PER_BLOCK(fs); 460 fs->s_db_per_group = db_count; 461 V(s_db_per_group) 462 463 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 464 M_UFSMNT, M_WAITOK); 465 466 /* adjust logic_sb_block */ 467 if(fs->s_blocksize > SBSIZE) 468 /* Godmar thinks: if the blocksize is greater than 1024, then 469 the superblock is logically part of block zero. 470 */ 471 logic_sb_block = 0; 472 473 for (i = 0; i < db_count; i++) { 474 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 475 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 476 if(error) { 477 for (j = 0; j < i; j++) 478 brelse(fs->s_group_desc[j]); 479 bsd_free(fs->s_group_desc, M_UFSMNT); 480 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 481 return EIO; 482 } 483 /* Set the B_LOCKED flag on the buffer, then brelse() it */ 484 LCK_BUF(fs->s_group_desc[i]) 485 } 486 if(!ext2_check_descriptors(fs)) { 487 for (j = 0; j < db_count; j++) 488 ULCK_BUF(fs->s_group_desc[j]) 489 bsd_free(fs->s_group_desc, M_UFSMNT); 490 printf("EXT2-fs: (ext2_check_descriptors failure) " 491 "unable to read group descriptors\n"); 492 return EIO; 493 } 494 495 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 496 fs->s_inode_bitmap_number[i] = 0; 497 fs->s_inode_bitmap[i] = NULL; 498 fs->s_block_bitmap_number[i] = 0; 499 fs->s_block_bitmap[i] = NULL; 500 } 501 fs->s_loaded_inode_bitmaps = 0; 502 fs->s_loaded_block_bitmaps = 0; 503 return 0; 504} 505 506/* 507 * Reload all incore data for a filesystem (used after running fsck on 508 * the root filesystem and finding things to fix). The filesystem must 509 * be mounted read-only. 510 * 511 * Things to do to update the mount: 512 * 1) invalidate all cached meta-data. 513 * 2) re-read superblock from disk. 514 * 3) re-read summary information from disk. 515 * 4) invalidate all inactive vnodes. 516 * 5) invalidate all cached file data. 517 * 6) re-read inode data for all active vnodes. 518 */ 519static int 520ext2_reload(mountp, cred, p) 521 register struct mount *mountp; 522 struct ucred *cred; 523 struct proc *p; 524{ 525 register struct vnode *vp, *nvp, *devvp; 526 struct inode *ip; 527 struct buf *bp; 528 struct ext2_super_block * es; 529 struct ext2_sb_info *fs; 530 int error; 531 532 if ((mountp->mnt_flag & MNT_RDONLY) == 0) 533 return (EINVAL); 534 /* 535 * Step 1: invalidate all cached meta-data. 536 */ 537 devvp = VFSTOUFS(mountp)->um_devvp; 538 if (vinvalbuf(devvp, 0, cred, p, 0, 0)) 539 panic("ext2_reload: dirty1"); 540 /* 541 * Step 2: re-read superblock from disk. 542 * constants have been adjusted for ext2 543 */ 544 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 545 return (error); 546 es = (struct ext2_super_block *)bp->b_data; 547 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 548 brelse(bp); 549 return (EIO); /* XXX needs translation */ 550 } 551 fs = VFSTOUFS(mountp)->um_e2fs; 552 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 553 554 if((error = compute_sb_data(devvp, es, fs)) != 0) { 555 brelse(bp); 556 return error; 557 } 558#ifdef UNKLAR 559 if (fs->fs_sbsize < SBSIZE) 560 bp->b_flags |= B_INVAL; 561#endif 562 brelse(bp); 563 564loop: 565 simple_lock(&mntvnode_slock); 566 for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 567 if (vp->v_mount != mountp) { 568 simple_unlock(&mntvnode_slock); 569 goto loop; 570 } 571 nvp = vp->v_mntvnodes.le_next; 572 /* 573 * Step 4: invalidate all inactive vnodes. 574 */ 575 if (vrecycle(vp, &mntvnode_slock, p)) 576 goto loop; 577 /* 578 * Step 5: invalidate all cached file data. 579 */ 580 mtx_enter(&vp->v_interlock, MTX_DEF); 581 simple_unlock(&mntvnode_slock); 582 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 583 goto loop; 584 } 585 if (vinvalbuf(vp, 0, cred, p, 0, 0)) 586 panic("ext2_reload: dirty2"); 587 /* 588 * Step 6: re-read inode data for all active vnodes. 589 */ 590 ip = VTOI(vp); 591 error = 592 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 593 (int)fs->s_blocksize, NOCRED, &bp); 594 if (error) { 595 vput(vp); 596 return (error); 597 } 598 ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + 599 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), 600 &ip->i_din); 601 brelse(bp); 602 vput(vp); 603 simple_lock(&mntvnode_slock); 604 } 605 simple_unlock(&mntvnode_slock); 606 return (0); 607} 608 609/* 610 * Common code for mount and mountroot 611 */ 612static int 613ext2_mountfs(devvp, mp, p) 614 register struct vnode *devvp; 615 struct mount *mp; 616 struct proc *p; 617{ 618 register struct ufsmount *ump; 619 struct buf *bp; 620 register struct ext2_sb_info *fs; 621 struct ext2_super_block * es; 622 dev_t dev = devvp->v_rdev; 623 struct partinfo dpart; 624 int havepart = 0; 625 int error, i, size; 626 int ronly; 627 628 /* 629 * Disallow multiple mounts of the same device. 630 * Disallow mounting of a device that is currently in use 631 * (except for root, which might share swap device for miniroot). 632 * Flush out any old buffers remaining from a previous use. 633 */ 634 if ((error = vfs_mountedon(devvp)) != 0) 635 return (error); 636 if (vcount(devvp) > 1 && devvp != rootvp) 637 return (EBUSY); 638 if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) 639 return (error); 640#ifdef READONLY 641/* turn on this to force it to be read-only */ 642 mp->mnt_flag |= MNT_RDONLY; 643#endif 644 645 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 646 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 647 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); 648 VOP_UNLOCK(devvp, 0, p); 649 if (error) 650 return (error); 651 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) 652 size = DEV_BSIZE; 653 else { 654 havepart = 1; 655 size = dpart.disklab->d_secsize; 656 } 657 658 bp = NULL; 659 ump = NULL; 660 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 661 goto out; 662 es = (struct ext2_super_block *)bp->b_data; 663 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 664 error = EINVAL; /* XXX needs translation */ 665 goto out; 666 } 667 if ((es->s_state & EXT2_VALID_FS) == 0 || 668 (es->s_state & EXT2_ERROR_FS)) { 669 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 670 printf( 671"WARNING: Filesystem was not properly dismounted\n"); 672 } else { 673 printf( 674"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 675 error = EPERM; 676 goto out; 677 } 678 } 679 ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK); 680 bzero((caddr_t)ump, sizeof *ump); 681 ump->um_malloctype = M_EXT2NODE; 682 ump->um_blkatoff = ext2_blkatoff; 683 ump->um_truncate = ext2_truncate; 684 ump->um_update = ext2_update; 685 ump->um_valloc = ext2_valloc; 686 ump->um_vfree = ext2_vfree; 687 /* I don't know whether this is the right strategy. Note that 688 we dynamically allocate both a ext2_sb_info and a ext2_super_block 689 while Linux keeps the super block in a locked buffer 690 */ 691 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 692 M_UFSMNT, M_WAITOK); 693 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 694 M_UFSMNT, M_WAITOK); 695 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 696 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 697 goto out; 698 /* 699 * We don't free the group descriptors allocated by compute_sb_data() 700 * until ext2_unmount(). This is OK since the mount will succeed. 701 */ 702 brelse(bp); 703 bp = NULL; 704 fs = ump->um_e2fs; 705 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 706 /* if the fs is not mounted read-only, make sure the super block is 707 always written back on a sync() 708 */ 709 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 710 if (ronly == 0) { 711 fs->s_dirt = 1; /* mark it modified */ 712 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 713 } 714 mp->mnt_data = (qaddr_t)ump; 715 mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); 716 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 717 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 718 mp->mnt_flag |= MNT_LOCAL; 719 ump->um_mountp = mp; 720 ump->um_dev = dev; 721 ump->um_devvp = devvp; 722 /* setting those two parameters allows us to use 723 ufs_bmap w/o changse ! 724 */ 725 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 726 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 727 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 728 for (i = 0; i < MAXQUOTAS; i++) 729 ump->um_quotas[i] = NULLVP; 730 devvp->v_rdev->si_mountpoint = mp; 731 if (ronly == 0) 732 ext2_sbupdate(ump, MNT_WAIT); 733 return (0); 734out: 735 if (bp) 736 brelse(bp); 737 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); 738 if (ump) { 739 bsd_free(ump->um_e2fs->s_es, M_UFSMNT); 740 bsd_free(ump->um_e2fs, M_UFSMNT); 741 bsd_free(ump, M_UFSMNT); 742 mp->mnt_data = (qaddr_t)0; 743 } 744 return (error); 745} 746 747/* 748 * unmount system call 749 */ 750static int 751ext2_unmount(mp, mntflags, p) 752 struct mount *mp; 753 int mntflags; 754 struct proc *p; 755{ 756 register struct ufsmount *ump; 757 register struct ext2_sb_info *fs; 758 int error, flags, ronly, i; 759 760 flags = 0; 761 if (mntflags & MNT_FORCE) { 762 if (mp->mnt_flag & MNT_ROOTFS) 763 return (EINVAL); 764 flags |= FORCECLOSE; 765 } 766 if ((error = ext2_flushfiles(mp, flags, p)) != 0) 767 return (error); 768 ump = VFSTOUFS(mp); 769 fs = ump->um_e2fs; 770 ronly = fs->s_rd_only; 771 if (ronly == 0) { 772 if (fs->s_wasvalid) 773 fs->s_es->s_state |= EXT2_VALID_FS; 774 ext2_sbupdate(ump, MNT_WAIT); 775 } 776 777 /* release buffers containing group descriptors */ 778 for(i = 0; i < fs->s_db_per_group; i++) 779 ULCK_BUF(fs->s_group_desc[i]) 780 bsd_free(fs->s_group_desc, M_UFSMNT); 781 782 /* release cached inode/block bitmaps */ 783 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 784 if (fs->s_inode_bitmap[i]) 785 ULCK_BUF(fs->s_inode_bitmap[i]) 786 787 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 788 if (fs->s_block_bitmap[i]) 789 ULCK_BUF(fs->s_block_bitmap[i]) 790 791 ump->um_devvp->v_rdev->si_mountpoint = NULL; 792 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, 793 NOCRED, p); 794 vrele(ump->um_devvp); 795 bsd_free(fs->s_es, M_UFSMNT); 796 bsd_free(fs, M_UFSMNT); 797 bsd_free(ump, M_UFSMNT); 798 mp->mnt_data = (qaddr_t)0; 799 mp->mnt_flag &= ~MNT_LOCAL; 800 return (error); 801} 802 803/* 804 * Flush out all the files in a filesystem. 805 */ 806static int 807ext2_flushfiles(mp, flags, p) 808 register struct mount *mp; 809 int flags; 810 struct proc *p; 811{ 812 register struct ufsmount *ump; 813 int error; 814#if QUOTA 815 int i; 816#endif 817 818 ump = VFSTOUFS(mp); 819#if QUOTA 820 if (mp->mnt_flag & MNT_QUOTA) { 821 if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0) 822 return (error); 823 for (i = 0; i < MAXQUOTAS; i++) { 824 if (ump->um_quotas[i] == NULLVP) 825 continue; 826 quotaoff(p, mp, i); 827 } 828 /* 829 * Here we fall through to vflush again to ensure 830 * that we have gotten rid of all the system vnodes. 831 */ 832 } 833#endif 834 error = vflush(mp, NULLVP, flags); 835 return (error); 836} 837 838/* 839 * Get file system statistics. 840 * taken from ext2/super.c ext2_statfs 841 */ 842static int 843ext2_statfs(mp, sbp, p) 844 struct mount *mp; 845 register struct statfs *sbp; 846 struct proc *p; 847{ 848 unsigned long overhead; 849 unsigned long overhead_per_group; 850 851 register struct ufsmount *ump; 852 register struct ext2_sb_info *fs; 853 register struct ext2_super_block *es; 854 855 ump = VFSTOUFS(mp); 856 fs = ump->um_e2fs; 857 es = fs->s_es; 858 859 if (es->s_magic != EXT2_SUPER_MAGIC) 860 panic("ext2_statfs - magic number spoiled"); 861 862 /* 863 * Compute the overhead (FS structures) 864 */ 865 overhead_per_group = 1 /* super block */ + 866 fs->s_db_per_group + 867 1 /* block bitmap */ + 868 1 /* inode bitmap */ + 869 fs->s_itb_per_group; 870 overhead = es->s_first_data_block + 871 fs->s_groups_count * overhead_per_group; 872 873 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 874 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 875 sbp->f_blocks = es->s_blocks_count - overhead; 876 sbp->f_bfree = es->s_free_blocks_count; 877 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 878 sbp->f_files = es->s_inodes_count; 879 sbp->f_ffree = es->s_free_inodes_count; 880 if (sbp != &mp->mnt_stat) { 881 sbp->f_type = mp->mnt_vfc->vfc_typenum; 882 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 883 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 884 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 885 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 886 } 887 return (0); 888} 889 890/* 891 * Go through the disk queues to initiate sandbagged IO; 892 * go through the inodes to write those that have been modified; 893 * initiate the writing of the super block if it has been modified. 894 * 895 * Note: we are always called with the filesystem marked `MPBUSY'. 896 */ 897static int 898ext2_sync(mp, waitfor, cred, p) 899 struct mount *mp; 900 int waitfor; 901 struct ucred *cred; 902 struct proc *p; 903{ 904 struct vnode *nvp, *vp; 905 struct inode *ip; 906 struct ufsmount *ump = VFSTOUFS(mp); 907 struct ext2_sb_info *fs; 908 int error, allerror = 0; 909 910 fs = ump->um_e2fs; 911 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 912 printf("fs = %s\n", fs->fs_fsmnt); 913 panic("ext2_sync: rofs mod"); 914 } 915 /* 916 * Write back each (modified) inode. 917 */ 918 simple_lock(&mntvnode_slock); 919loop: 920 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 921 /* 922 * If the vnode that we are about to sync is no longer 923 * associated with this mount point, start over. 924 */ 925 if (vp->v_mount != mp) 926 goto loop; 927 mtx_enter(&vp->v_interlock, MTX_DEF); 928 nvp = vp->v_mntvnodes.le_next; 929 ip = VTOI(vp); 930 if (vp->v_type == VNON || 931 ((ip->i_flag & 932 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 933 (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) { 934 mtx_exit(&vp->v_interlock, MTX_DEF); 935 continue; 936 } 937 simple_unlock(&mntvnode_slock); 938 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); 939 if (error) { 940 simple_lock(&mntvnode_slock); 941 if (error == ENOENT) 942 goto loop; 943 continue; 944 } 945 if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) 946 allerror = error; 947 VOP_UNLOCK(vp, 0, p); 948 vrele(vp); 949 simple_lock(&mntvnode_slock); 950 } 951 simple_unlock(&mntvnode_slock); 952 /* 953 * Force stale file system control information to be flushed. 954 */ 955 if (waitfor != MNT_LAZY) { 956 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); 957 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) 958 allerror = error; 959 VOP_UNLOCK(ump->um_devvp, 0, p); 960 } 961#if QUOTA 962 qsync(mp); 963#endif 964 /* 965 * Write back modified superblock. 966 */ 967 if (fs->s_dirt != 0) { 968 fs->s_dirt = 0; 969 fs->s_es->s_wtime = time_second; 970 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 971 allerror = error; 972 } 973 return (allerror); 974} 975 976/* 977 * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it 978 * in from disk. If it is in core, wait for the lock bit to clear, then 979 * return the inode locked. Detection and handling of mount points must be 980 * done by the calling routine. 981 */ 982static int 983ext2_vget(mp, ino, vpp) 984 struct mount *mp; 985 ino_t ino; 986 struct vnode **vpp; 987{ 988 register struct ext2_sb_info *fs; 989 register struct inode *ip; 990 struct ufsmount *ump; 991 struct buf *bp; 992 struct vnode *vp; 993 dev_t dev; 994 int i, error; 995 int used_blocks; 996 997 ump = VFSTOUFS(mp); 998 dev = ump->um_dev; 999restart: 1000 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) 1001 return (0); 1002 1003 /* 1004 * Lock out the creation of new entries in the FFS hash table in 1005 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 1006 * may occur! 1007 */ 1008 if (ext2fs_inode_hash_lock) { 1009 while (ext2fs_inode_hash_lock) { 1010 ext2fs_inode_hash_lock = -1; 1011 tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0); 1012 } 1013 goto restart; 1014 } 1015 ext2fs_inode_hash_lock = 1; 1016 1017 /* 1018 * If this MALLOC() is performed after the getnewvnode() 1019 * it might block, leaving a vnode with a NULL v_data to be 1020 * found by ext2_sync() if a sync happens to fire right then, 1021 * which will cause a panic because ext2_sync() blindly 1022 * dereferences vp->v_data (as well it should). 1023 */ 1024 MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK); 1025 1026 /* Allocate a new vnode/inode. */ 1027 if ((error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) != 0) { 1028 if (ext2fs_inode_hash_lock < 0) 1029 wakeup(&ext2fs_inode_hash_lock); 1030 ext2fs_inode_hash_lock = 0; 1031 *vpp = NULL; 1032 FREE(ip, M_EXT2NODE); 1033 return (error); 1034 } 1035 bzero((caddr_t)ip, sizeof(struct inode)); 1036 lockinit(&vp->v_lock, PINOD, "ext2in", 0, 0); 1037 vp->v_data = ip; 1038 ip->i_vnode = vp; 1039 ip->i_e2fs = fs = ump->um_e2fs; 1040 ip->i_dev = dev; 1041 ip->i_number = ino; 1042#if QUOTA 1043 for (i = 0; i < MAXQUOTAS; i++) 1044 ip->i_dquot[i] = NODQUOT; 1045#endif 1046 /* 1047 * Put it onto its hash chain and lock it so that other requests for 1048 * this inode will block if they arrive while we are sleeping waiting 1049 * for old data structures to be purged or for the contents of the 1050 * disk portion of this inode to be read. 1051 */ 1052 ufs_ihashins(ip); 1053 1054 if (ext2fs_inode_hash_lock < 0) 1055 wakeup(&ext2fs_inode_hash_lock); 1056 ext2fs_inode_hash_lock = 0; 1057 1058 /* Read in the disk contents for the inode, copy into the inode. */ 1059#if 0 1060printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 1061#endif 1062 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1063 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 1064 /* 1065 * The inode does not contain anything useful, so it would 1066 * be misleading to leave it on its hash chain. With mode 1067 * still zero, it will be unlinked and returned to the free 1068 * list by vput(). 1069 */ 1070 vput(vp); 1071 brelse(bp); 1072 *vpp = NULL; 1073 return (error); 1074 } 1075 /* convert ext2 inode to dinode */ 1076 ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1077 ino_to_fsbo(fs, ino)), &ip->i_din); 1078 ip->i_block_group = ino_to_cg(fs, ino); 1079 ip->i_next_alloc_block = 0; 1080 ip->i_next_alloc_goal = 0; 1081 ip->i_prealloc_count = 0; 1082 ip->i_prealloc_block = 0; 1083 /* now we want to make sure that block pointers for unused 1084 blocks are zeroed out - ext2_balloc depends on this 1085 although for regular files and directories only 1086 */ 1087 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1088 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1089 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1090 ip->i_db[i] = 0; 1091 } 1092/* 1093 ext2_print_inode(ip); 1094*/ 1095 brelse(bp); 1096 1097 /* 1098 * Initialize the vnode from the inode, check for aliases. 1099 * Note that the underlying vnode may have changed. 1100 */ 1101 if ((error = ufs_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) { 1102 vput(vp); 1103 *vpp = NULL; 1104 return (error); 1105 } 1106 /* 1107 * Finish inode initialization now that aliasing has been resolved. 1108 */ 1109 ip->i_devvp = ump->um_devvp; 1110 VREF(ip->i_devvp); 1111 /* 1112 * Set up a generation number for this inode if it does not 1113 * already have one. This should only happen on old filesystems. 1114 */ 1115 if (ip->i_gen == 0) { 1116 ip->i_gen = random() / 2 + 1; 1117 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1118 ip->i_flag |= IN_MODIFIED; 1119 } 1120 *vpp = vp; 1121 return (0); 1122} 1123 1124/* 1125 * File handle to vnode 1126 * 1127 * Have to be really careful about stale file handles: 1128 * - check that the inode number is valid 1129 * - call ext2_vget() to get the locked inode 1130 * - check for an unallocated inode (i_mode == 0) 1131 * - check that the given client host has export rights and return 1132 * those rights via. exflagsp and credanonp 1133 */ 1134static int 1135ext2_fhtovp(mp, fhp, vpp) 1136 register struct mount *mp; 1137 struct fid *fhp; 1138 struct vnode **vpp; 1139{ 1140 register struct ufid *ufhp; 1141 struct ext2_sb_info *fs; 1142 1143 ufhp = (struct ufid *)fhp; 1144 fs = VFSTOUFS(mp)->um_e2fs; 1145 if (ufhp->ufid_ino < ROOTINO || 1146 ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group) 1147 return (ESTALE); 1148 return (ufs_fhtovp(mp, ufhp, vpp)); 1149} 1150 1151/* 1152 * Vnode pointer to File handle 1153 */ 1154/* ARGSUSED */ 1155static int 1156ext2_vptofh(vp, fhp) 1157 struct vnode *vp; 1158 struct fid *fhp; 1159{ 1160 register struct inode *ip; 1161 register struct ufid *ufhp; 1162 1163 ip = VTOI(vp); 1164 ufhp = (struct ufid *)fhp; 1165 ufhp->ufid_len = sizeof(struct ufid); 1166 ufhp->ufid_ino = ip->i_number; 1167 ufhp->ufid_gen = ip->i_gen; 1168 return (0); 1169} 1170 1171/* 1172 * Write a superblock and associated information back to disk. 1173 */ 1174static int 1175ext2_sbupdate(mp, waitfor) 1176 struct ufsmount *mp; 1177 int waitfor; 1178{ 1179 register struct ext2_sb_info *fs = mp->um_e2fs; 1180 register struct ext2_super_block *es = fs->s_es; 1181 register struct buf *bp; 1182 int error = 0; 1183/* 1184printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1185*/ 1186 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0); 1187 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1188 if (waitfor == MNT_WAIT) 1189 error = bwrite(bp); 1190 else 1191 bawrite(bp); 1192 1193 /* 1194 * The buffers for group descriptors, inode bitmaps and block bitmaps 1195 * are not busy at this point and are (hopefully) written by the 1196 * usual sync mechanism. No need to write them here 1197 */ 1198 1199 return (error); 1200} 1201