ext2_vfsops.c revision 39678
1/* 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/* 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 40 */ 41 42#include "opt_quota.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/namei.h> 47#include <sys/proc.h> 48#include <sys/kernel.h> 49#include <sys/vnode.h> 50#include <sys/mount.h> 51#include <sys/buf.h> 52#include <sys/conf.h> 53#include <sys/fcntl.h> 54#include <sys/disklabel.h> 55#include <sys/malloc.h> 56#include <sys/stat.h> 57 58#include <miscfs/specfs/specdev.h> 59 60#include <ufs/ufs/quota.h> 61#include <ufs/ufs/ufsmount.h> 62#include <ufs/ufs/inode.h> 63#include <ufs/ufs/ufs_extern.h> 64 65#include <gnu/ext2fs/fs.h> 66#include <gnu/ext2fs/ext2_extern.h> 67#include <gnu/ext2fs/ext2_fs.h> 68#include <gnu/ext2fs/ext2_fs_sb.h> 69 70static int ext2_fhtovp __P((struct mount *, struct fid *, struct sockaddr *, 71 struct vnode **, int *, struct ucred **)); 72static int ext2_flushfiles __P((struct mount *mp, int flags, struct proc *p)); 73static int ext2_mount __P((struct mount *, 74 char *, caddr_t, struct nameidata *, struct proc *)); 75static int ext2_mountfs __P((struct vnode *, struct mount *, struct proc *)); 76static int ext2_reload __P((struct mount *mountp, struct ucred *cred, 77 struct proc *p)); 78static int ext2_sbupdate __P((struct ufsmount *, int)); 79static int ext2_statfs __P((struct mount *, struct statfs *, struct proc *)); 80static int ext2_sync __P((struct mount *, int, struct ucred *, struct proc *)); 81static int ext2_unmount __P((struct mount *, int, struct proc *)); 82static int ext2_vget __P((struct mount *, ino_t, struct vnode **)); 83static int ext2_vptofh __P((struct vnode *, struct fid *)); 84 85static MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part"); 86 87static struct vfsops ext2fs_vfsops = { 88 ext2_mount, 89 ufs_start, /* empty function */ 90 ext2_unmount, 91 ufs_root, /* root inode via vget */ 92 ufs_quotactl, /* does operations associated with quotas */ 93 ext2_statfs, 94 ext2_sync, 95 ext2_vget, 96 ext2_fhtovp, 97 ext2_vptofh, 98 ext2_init, 99}; 100 101VFS_SET(ext2fs_vfsops, ext2fs, 0); 102#define bsd_malloc malloc 103#define bsd_free free 104 105static int ext2fs_inode_hash_lock; 106 107static int compute_sb_data __P((struct vnode * devvp, 108 struct ext2_super_block * es, 109 struct ext2_sb_info * fs)); 110 111#ifdef notyet 112static int ext2_mountroot __P((void)); 113 114/* 115 * Called by main() when ext2fs is going to be mounted as root. 116 * 117 * Name is updated by mount(8) after booting. 118 */ 119#define ROOTNAME "root_device" 120 121static int 122ext2_mountroot() 123{ 124 register struct ext2_sb_info *fs; 125 register struct mount *mp; 126 struct proc *p = curproc; 127 struct ufsmount *ump; 128 u_int size; 129 int error; 130 131 if ((error = bdevvp(rootdev, &rootvp))) { 132 printf("ext2_mountroot: can't find rootvp"); 133 return (error); 134 } 135 mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 136 bzero((char *)mp, (u_long)sizeof(struct mount)); 137 mp->mnt_op = &ext2fs_vfsops; 138 mp->mnt_flag = MNT_RDONLY; 139 if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR) 140 mp->mnt_flag |= MNT_NOCLUSTERR; 141 if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERW) 142 mp->mnt_flag |= MNT_NOCLUSTERW; 143 if (error = ext2_mountfs(rootvp, mp, p)) { 144 bsd_free(mp, M_MOUNT); 145 return (error); 146 } 147 if (error = vfs_lock(mp)) { 148 (void)ext2_unmount(mp, 0, p); 149 bsd_free(mp, M_MOUNT); 150 return (error); 151 } 152 CIRCLEQ_INSERT_HEAD(&mountlist, mp, mnt_list); 153 mp->mnt_flag |= MNT_ROOTFS; 154 mp->mnt_vnodecovered = NULLVP; 155 ump = VFSTOUFS(mp); 156 fs = ump->um_e2fs; 157 bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); 158 fs->fs_fsmnt[0] = '/'; 159 bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 160 MNAMELEN); 161 (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 162 &size); 163 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 164 (void)ext2_statfs(mp, &mp->mnt_stat, p); 165 vfs_unlock(mp); 166 inittodr(fs->s_es->s_wtime); /* this helps to set the time */ 167 return (0); 168} 169#endif 170 171/* 172 * VFS Operations. 173 * 174 * mount system call 175 */ 176static int 177ext2_mount(mp, path, data, ndp, p) 178 register struct mount *mp; 179 char *path; 180 caddr_t data; /* this is actually a (struct ufs_args *) */ 181 struct nameidata *ndp; 182 struct proc *p; 183{ 184 struct vnode *devvp; 185 struct ufs_args args; 186 struct ufsmount *ump = 0; 187 register struct ext2_sb_info *fs; 188 u_int size; 189 int error, flags; 190 mode_t accessmode; 191 192 if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) 193 return (error); 194 /* 195 * If updating, check whether changing from read-only to 196 * read/write; if there is no device name, that's all we do. 197 * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags, 198 * if block device requests. 199 */ 200 if (mp->mnt_flag & MNT_UPDATE) { 201 ump = VFSTOUFS(mp); 202 fs = ump->um_e2fs; 203 error = 0; 204 if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERR) 205 mp->mnt_flag |= MNT_NOCLUSTERR; 206 if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERW) 207 mp->mnt_flag |= MNT_NOCLUSTERW; 208 if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { 209 flags = WRITECLOSE; 210 if (mp->mnt_flag & MNT_FORCE) 211 flags |= FORCECLOSE; 212 if (vfs_busy(mp, LK_NOWAIT, 0, p)) 213 return (EBUSY); 214 error = ext2_flushfiles(mp, flags, p); 215 vfs_unbusy(mp, p); 216 if (!error && fs->s_wasvalid) { 217 fs->s_es->s_state |= EXT2_VALID_FS; 218 ext2_sbupdate(ump, MNT_WAIT); 219 } 220 fs->s_rd_only = 1; 221 } 222 if (!error && (mp->mnt_flag & MNT_RELOAD)) 223 error = ext2_reload(mp, ndp->ni_cnd.cn_cred, p); 224 if (error) 225 return (error); 226 if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 227 /* 228 * If upgrade to read-write by non-root, then verify 229 * that user has necessary permissions on the device. 230 */ 231 if (p->p_ucred->cr_uid != 0) { 232 devvp = ump->um_devvp; 233 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 234 if (error = VOP_ACCESS(devvp, VREAD | VWRITE, 235 p->p_ucred, p)) { 236 VOP_UNLOCK(devvp, 0, p); 237 return (error); 238 } 239 VOP_UNLOCK(devvp, 0, p); 240 } 241 242 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 243 (fs->s_es->s_state & EXT2_ERROR_FS)) { 244 if (mp->mnt_flag & MNT_FORCE) { 245 printf( 246"WARNING: %s was not properly dismounted\n", 247 fs->fs_fsmnt); 248 } else { 249 printf( 250"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 251 fs->fs_fsmnt); 252 return (EPERM); 253 } 254 } 255 fs->s_es->s_state &= ~EXT2_VALID_FS; 256 ext2_sbupdate(ump, MNT_WAIT); 257 fs->s_rd_only = 0; 258 } 259 if (args.fspec == 0) { 260 /* 261 * Process export requests. 262 */ 263 return (vfs_export(mp, &ump->um_export, &args.export)); 264 } 265 } 266 /* 267 * Not an update, or updating the name: look up the name 268 * and verify that it refers to a sensible block device. 269 */ 270 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); 271 if (error = namei(ndp)) 272 return (error); 273 devvp = ndp->ni_vp; 274 275 if (devvp->v_type != VBLK) { 276 vrele(devvp); 277 return (ENOTBLK); 278 } 279 if (major(devvp->v_rdev) >= nblkdev) { 280 vrele(devvp); 281 return (ENXIO); 282 } 283 284 /* 285 * If mount by non-root, then verify that user has necessary 286 * permissions on the device. 287 */ 288 if (p->p_ucred->cr_uid != 0) { 289 accessmode = VREAD; 290 if ((mp->mnt_flag & MNT_RDONLY) == 0) 291 accessmode |= VWRITE; 292 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 293 if (error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) { 294 vput(devvp); 295 return (error); 296 } 297 VOP_UNLOCK(devvp, 0, p); 298 } 299 300 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 301 if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR) 302 mp->mnt_flag |= MNT_NOCLUSTERR; 303 if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERW) 304 mp->mnt_flag |= MNT_NOCLUSTERW; 305 error = ext2_mountfs(devvp, mp, p); 306 } else { 307 if (devvp != ump->um_devvp) 308 error = EINVAL; /* needs translation */ 309 else 310 vrele(devvp); 311 } 312 if (error) { 313 vrele(devvp); 314 return (error); 315 } 316 ump = VFSTOUFS(mp); 317 fs = ump->um_e2fs; 318 (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size); 319 bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size); 320 bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 321 MNAMELEN); 322 (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 323 &size); 324 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 325 (void)ext2_statfs(mp, &mp->mnt_stat, p); 326 return (0); 327} 328 329/* 330 * checks that the data in the descriptor blocks make sense 331 * this is taken from ext2/super.c 332 */ 333static int ext2_check_descriptors (struct ext2_sb_info * sb) 334{ 335 int i; 336 int desc_block = 0; 337 unsigned long block = sb->s_es->s_first_data_block; 338 struct ext2_group_desc * gdp = NULL; 339 340 /* ext2_debug ("Checking group descriptors"); */ 341 342 for (i = 0; i < sb->s_groups_count; i++) 343 { 344 /* examine next descriptor block */ 345 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 346 gdp = (struct ext2_group_desc *) 347 sb->s_group_desc[desc_block++]->b_data; 348 if (gdp->bg_block_bitmap < block || 349 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 350 { 351 printf ("ext2_check_descriptors: " 352 "Block bitmap for group %d" 353 " not in group (block %lu)!\n", 354 i, (unsigned long) gdp->bg_block_bitmap); 355 return 0; 356 } 357 if (gdp->bg_inode_bitmap < block || 358 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 359 { 360 printf ("ext2_check_descriptors: " 361 "Inode bitmap for group %d" 362 " not in group (block %lu)!\n", 363 i, (unsigned long) gdp->bg_inode_bitmap); 364 return 0; 365 } 366 if (gdp->bg_inode_table < block || 367 gdp->bg_inode_table + sb->s_itb_per_group >= 368 block + EXT2_BLOCKS_PER_GROUP(sb)) 369 { 370 printf ("ext2_check_descriptors: " 371 "Inode table for group %d" 372 " not in group (block %lu)!\n", 373 i, (unsigned long) gdp->bg_inode_table); 374 return 0; 375 } 376 block += EXT2_BLOCKS_PER_GROUP(sb); 377 gdp++; 378 } 379 return 1; 380} 381 382/* 383 * this computes the fields of the ext2_sb_info structure from the 384 * data in the ext2_super_block structure read in 385 */ 386static int compute_sb_data(devvp, es, fs) 387 struct vnode * devvp; 388 struct ext2_super_block * es; 389 struct ext2_sb_info * fs; 390{ 391 int db_count, error; 392 int i, j; 393 int logic_sb_block = 1; /* XXX for now */ 394 395#if 1 396#define V(v) 397#else 398#define V(v) printf(#v"= %d\n", fs->v); 399#endif 400 401 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 402 V(s_blocksize) 403 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 404 V(s_bshift) 405 fs->s_fsbtodb = es->s_log_block_size + 1; 406 V(s_fsbtodb) 407 fs->s_qbmask = fs->s_blocksize - 1; 408 V(s_bmask) 409 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 410 V(s_blocksize_bits) 411 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 412 V(s_frag_size) 413 if (fs->s_frag_size) 414 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 415 V(s_frags_per_block) 416 fs->s_blocks_per_group = es->s_blocks_per_group; 417 V(s_blocks_per_group) 418 fs->s_frags_per_group = es->s_frags_per_group; 419 V(s_frags_per_group) 420 fs->s_inodes_per_group = es->s_inodes_per_group; 421 V(s_inodes_per_group) 422 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 423 V(s_inodes_per_block) 424 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 425 V(s_itb_per_group) 426 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 427 V(s_desc_per_block) 428 /* s_resuid / s_resgid ? */ 429 fs->s_groups_count = (es->s_blocks_count - 430 es->s_first_data_block + 431 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 432 EXT2_BLOCKS_PER_GROUP(fs); 433 V(s_groups_count) 434 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 435 EXT2_DESC_PER_BLOCK(fs); 436 fs->s_db_per_group = db_count; 437 V(s_db_per_group) 438 439 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 440 M_UFSMNT, M_WAITOK); 441 442 /* adjust logic_sb_block */ 443 if(fs->s_blocksize > SBSIZE) 444 /* Godmar thinks: if the blocksize is greater than 1024, then 445 the superblock is logically part of block zero. 446 */ 447 logic_sb_block = 0; 448 449 for (i = 0; i < db_count; i++) { 450 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 451 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 452 if(error) { 453 for (j = 0; j < i; j++) 454 brelse(fs->s_group_desc[j]); 455 bsd_free(fs->s_group_desc, M_UFSMNT); 456 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 457 return EIO; 458 } 459 /* Set the B_LOCKED flag on the buffer, then brelse() it */ 460 LCK_BUF(fs->s_group_desc[i]) 461 } 462 if(!ext2_check_descriptors(fs)) { 463 for (j = 0; j < db_count; j++) 464 ULCK_BUF(fs->s_group_desc[j]) 465 bsd_free(fs->s_group_desc, M_UFSMNT); 466 printf("EXT2-fs: (ext2_check_descriptors failure) " 467 "unable to read group descriptors\n"); 468 return EIO; 469 } 470 471 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 472 fs->s_inode_bitmap_number[i] = 0; 473 fs->s_inode_bitmap[i] = NULL; 474 fs->s_block_bitmap_number[i] = 0; 475 fs->s_block_bitmap[i] = NULL; 476 } 477 fs->s_loaded_inode_bitmaps = 0; 478 fs->s_loaded_block_bitmaps = 0; 479 return 0; 480} 481 482/* 483 * Reload all incore data for a filesystem (used after running fsck on 484 * the root filesystem and finding things to fix). The filesystem must 485 * be mounted read-only. 486 * 487 * Things to do to update the mount: 488 * 1) invalidate all cached meta-data. 489 * 2) re-read superblock from disk. 490 * 3) re-read summary information from disk. 491 * 4) invalidate all inactive vnodes. 492 * 5) invalidate all cached file data. 493 * 6) re-read inode data for all active vnodes. 494 */ 495static int 496ext2_reload(mountp, cred, p) 497 register struct mount *mountp; 498 struct ucred *cred; 499 struct proc *p; 500{ 501 register struct vnode *vp, *nvp, *devvp; 502 struct inode *ip; 503 struct buf *bp; 504 struct ext2_super_block * es; 505 struct ext2_sb_info *fs; 506 int error; 507 508 if ((mountp->mnt_flag & MNT_RDONLY) == 0) 509 return (EINVAL); 510 /* 511 * Step 1: invalidate all cached meta-data. 512 */ 513 devvp = VFSTOUFS(mountp)->um_devvp; 514 if (vinvalbuf(devvp, 0, cred, p, 0, 0)) 515 panic("ext2_reload: dirty1"); 516 /* 517 * Step 2: re-read superblock from disk. 518 * constants have been adjusted for ext2 519 */ 520 if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) 521 return (error); 522 es = (struct ext2_super_block *)bp->b_data; 523 if (es->s_magic != EXT2_SUPER_MAGIC) { 524 if(es->s_magic == EXT2_PRE_02B_MAGIC) 525 printf("This filesystem bears the magic number of a pre " 526 "0.2b version of ext2. This is not supported by " 527 "Lites.\n"); 528 else 529 printf("Wrong magic number: %x (expected %x for ext2 fs\n", 530 es->s_magic, EXT2_SUPER_MAGIC); 531 brelse(bp); 532 return (EIO); /* XXX needs translation */ 533 } 534 fs = VFSTOUFS(mountp)->um_e2fs; 535 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 536 537 if(error = compute_sb_data(devvp, es, fs)) { 538 brelse(bp); 539 return error; 540 } 541#ifdef UNKLAR 542 if (fs->fs_sbsize < SBSIZE) 543 bp->b_flags |= B_INVAL; 544#endif 545 brelse(bp); 546 547loop: 548 simple_lock(&mntvnode_slock); 549 for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 550 if (vp->v_mount != mountp) { 551 simple_unlock(&mntvnode_slock); 552 goto loop; 553 } 554 nvp = vp->v_mntvnodes.le_next; 555 /* 556 * Step 4: invalidate all inactive vnodes. 557 */ 558 if (vrecycle(vp, &mntvnode_slock, p)) 559 goto loop; 560 /* 561 * Step 5: invalidate all cached file data. 562 */ 563 simple_lock(&vp->v_interlock); 564 simple_unlock(&mntvnode_slock); 565 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 566 goto loop; 567 } 568 if (vinvalbuf(vp, 0, cred, p, 0, 0)) 569 panic("ext2_reload: dirty2"); 570 /* 571 * Step 6: re-read inode data for all active vnodes. 572 */ 573 ip = VTOI(vp); 574 error = 575 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 576 (int)fs->s_blocksize, NOCRED, &bp); 577 if (error) { 578 vput(vp); 579 return (error); 580 } 581 ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + 582 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), 583 &ip->i_din); 584 brelse(bp); 585 vput(vp); 586 simple_lock(&mntvnode_slock); 587 } 588 simple_unlock(&mntvnode_slock); 589 return (0); 590} 591 592/* 593 * Common code for mount and mountroot 594 */ 595static int 596ext2_mountfs(devvp, mp, p) 597 register struct vnode *devvp; 598 struct mount *mp; 599 struct proc *p; 600{ 601 register struct ufsmount *ump; 602 struct buf *bp; 603 register struct ext2_sb_info *fs; 604 struct ext2_super_block * es; 605 dev_t dev = devvp->v_rdev; 606 struct partinfo dpart; 607 int havepart = 0; 608 int error, i, size; 609 int ronly; 610 611 /* 612 * Disallow multiple mounts of the same device. 613 * Disallow mounting of a device that is currently in use 614 * (except for root, which might share swap device for miniroot). 615 * Flush out any old buffers remaining from a previous use. 616 */ 617 if (error = vfs_mountedon(devvp)) 618 return (error); 619 if (vcount(devvp) > 1 && devvp != rootvp) 620 return (EBUSY); 621 if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) 622 return (error); 623#ifdef READONLY 624/* turn on this to force it to be read-only */ 625 mp->mnt_flag |= MNT_RDONLY; 626#endif 627 628 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 629 if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p)) 630 return (error); 631 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) 632 size = DEV_BSIZE; 633 else { 634 havepart = 1; 635 size = dpart.disklab->d_secsize; 636 } 637 638 bp = NULL; 639 ump = NULL; 640 if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) 641 goto out; 642 es = (struct ext2_super_block *)bp->b_data; 643 if (es->s_magic != EXT2_SUPER_MAGIC) { 644 if(es->s_magic == EXT2_PRE_02B_MAGIC) 645 printf("This filesystem bears the magic number of a pre " 646 "0.2b version of ext2. This is not supported by " 647 "Lites.\n"); 648 else 649 printf("Wrong magic number: %x (expected %x for EXT2FS)\n", 650 es->s_magic, EXT2_SUPER_MAGIC); 651 error = EINVAL; /* XXX needs translation */ 652 goto out; 653 } 654 if ((es->s_state & EXT2_VALID_FS) == 0 || 655 (es->s_state & EXT2_ERROR_FS)) { 656 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 657 printf( 658"WARNING: Filesystem was not properly dismounted\n"); 659 } else { 660 printf( 661"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 662 error = EPERM; 663 goto out; 664 } 665 } 666 ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK); 667 bzero((caddr_t)ump, sizeof *ump); 668 ump->um_malloctype = M_EXT2NODE; 669 ump->um_blkatoff = ext2_blkatoff; 670 ump->um_truncate = ext2_truncate; 671 ump->um_update = ext2_update; 672 ump->um_valloc = ext2_valloc; 673 ump->um_vfree = ext2_vfree; 674 /* I don't know whether this is the right strategy. Note that 675 we dynamically allocate both a ext2_sb_info and a ext2_super_block 676 while Linux keeps the super block in a locked buffer 677 */ 678 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 679 M_UFSMNT, M_WAITOK); 680 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 681 M_UFSMNT, M_WAITOK); 682 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 683 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 684 goto out; 685 /* 686 * We don't free the group descriptors allocated by compute_sb_data() 687 * until ext2_unmount(). This is OK since the mount will succeed. 688 */ 689 brelse(bp); 690 bp = NULL; 691 fs = ump->um_e2fs; 692 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 693 /* if the fs is not mounted read-only, make sure the super block is 694 always written back on a sync() 695 */ 696 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 697 if (ronly == 0) { 698 fs->s_dirt = 1; /* mark it modified */ 699 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 700 } 701 mp->mnt_data = (qaddr_t)ump; 702 mp->mnt_stat.f_fsid.val[0] = (long)dev; 703 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 704 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 705 mp->mnt_flag |= MNT_LOCAL; 706 ump->um_mountp = mp; 707 ump->um_dev = dev; 708 ump->um_devvp = devvp; 709 /* setting those two parameters allows us to use 710 ufs_bmap w/o changse ! 711 */ 712 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 713 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 714 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 715 for (i = 0; i < MAXQUOTAS; i++) 716 ump->um_quotas[i] = NULLVP; 717 devvp->v_specmountpoint = mp; 718 if (ronly == 0) 719 ext2_sbupdate(ump, MNT_WAIT); 720 return (0); 721out: 722 if (bp) 723 brelse(bp); 724 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); 725 if (ump) { 726 bsd_free(ump->um_e2fs->s_es, M_UFSMNT); 727 bsd_free(ump->um_e2fs, M_UFSMNT); 728 bsd_free(ump, M_UFSMNT); 729 mp->mnt_data = (qaddr_t)0; 730 } 731 return (error); 732} 733 734/* 735 * unmount system call 736 */ 737static int 738ext2_unmount(mp, mntflags, p) 739 struct mount *mp; 740 int mntflags; 741 struct proc *p; 742{ 743 register struct ufsmount *ump; 744 register struct ext2_sb_info *fs; 745 int error, flags, ronly, i; 746 747 flags = 0; 748 if (mntflags & MNT_FORCE) { 749 if (mp->mnt_flag & MNT_ROOTFS) 750 return (EINVAL); 751 flags |= FORCECLOSE; 752 } 753 if (error = ext2_flushfiles(mp, flags, p)) 754 return (error); 755 ump = VFSTOUFS(mp); 756 fs = ump->um_e2fs; 757 ronly = fs->s_rd_only; 758 if (ronly == 0) { 759 if (fs->s_wasvalid) 760 fs->s_es->s_state |= EXT2_VALID_FS; 761 ext2_sbupdate(ump, MNT_WAIT); 762 } 763 764 /* release buffers containing group descriptors */ 765 for(i = 0; i < fs->s_db_per_group; i++) 766 ULCK_BUF(fs->s_group_desc[i]) 767 bsd_free(fs->s_group_desc, M_UFSMNT); 768 769 /* release cached inode/block bitmaps */ 770 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 771 if (fs->s_inode_bitmap[i]) 772 ULCK_BUF(fs->s_inode_bitmap[i]) 773 774 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 775 if (fs->s_block_bitmap[i]) 776 ULCK_BUF(fs->s_block_bitmap[i]) 777 778 ump->um_devvp->v_specmountpoint = NULL; 779 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, 780 NOCRED, p); 781 vrele(ump->um_devvp); 782 bsd_free(fs->s_es, M_UFSMNT); 783 bsd_free(fs, M_UFSMNT); 784 bsd_free(ump, M_UFSMNT); 785 mp->mnt_data = (qaddr_t)0; 786 mp->mnt_flag &= ~MNT_LOCAL; 787 return (error); 788} 789 790/* 791 * Flush out all the files in a filesystem. 792 */ 793static int 794ext2_flushfiles(mp, flags, p) 795 register struct mount *mp; 796 int flags; 797 struct proc *p; 798{ 799 register struct ufsmount *ump; 800 int error; 801#if QUOTA 802 int i; 803#endif 804 805 ump = VFSTOUFS(mp); 806#if QUOTA 807 if (mp->mnt_flag & MNT_QUOTA) { 808 if (error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) 809 return (error); 810 for (i = 0; i < MAXQUOTAS; i++) { 811 if (ump->um_quotas[i] == NULLVP) 812 continue; 813 quotaoff(p, mp, i); 814 } 815 /* 816 * Here we fall through to vflush again to ensure 817 * that we have gotten rid of all the system vnodes. 818 */ 819 } 820#endif 821 error = vflush(mp, NULLVP, flags); 822 return (error); 823} 824 825/* 826 * Get file system statistics. 827 * taken from ext2/super.c ext2_statfs 828 */ 829static int 830ext2_statfs(mp, sbp, p) 831 struct mount *mp; 832 register struct statfs *sbp; 833 struct proc *p; 834{ 835 unsigned long overhead; 836 unsigned long overhead_per_group; 837 838 register struct ufsmount *ump; 839 register struct ext2_sb_info *fs; 840 register struct ext2_super_block *es; 841 842 ump = VFSTOUFS(mp); 843 fs = ump->um_e2fs; 844 es = fs->s_es; 845 846 if (es->s_magic != EXT2_SUPER_MAGIC) 847 panic("ext2_statfs - magic number spoiled"); 848 849 /* 850 * Compute the overhead (FS structures) 851 */ 852 overhead_per_group = 1 /* super block */ + 853 fs->s_db_per_group + 854 1 /* block bitmap */ + 855 1 /* inode bitmap */ + 856 fs->s_itb_per_group; 857 overhead = es->s_first_data_block + 858 fs->s_groups_count * overhead_per_group; 859 860 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 861 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 862 sbp->f_blocks = es->s_blocks_count - overhead; 863 sbp->f_bfree = es->s_free_blocks_count; 864 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 865 sbp->f_files = es->s_inodes_count; 866 sbp->f_ffree = es->s_free_inodes_count; 867 if (sbp != &mp->mnt_stat) { 868 sbp->f_type = mp->mnt_vfc->vfc_typenum; 869 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 870 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 871 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 872 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 873 } 874 return (0); 875} 876 877/* 878 * Go through the disk queues to initiate sandbagged IO; 879 * go through the inodes to write those that have been modified; 880 * initiate the writing of the super block if it has been modified. 881 * 882 * Note: we are always called with the filesystem marked `MPBUSY'. 883 */ 884static int 885ext2_sync(mp, waitfor, cred, p) 886 struct mount *mp; 887 int waitfor; 888 struct ucred *cred; 889 struct proc *p; 890{ 891 struct vnode *nvp, *vp; 892 struct inode *ip; 893 struct ufsmount *ump = VFSTOUFS(mp); 894 struct ext2_sb_info *fs; 895 struct timeval tv; 896 int error, allerror = 0; 897 898 fs = ump->um_e2fs; 899 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 900 printf("fs = %s\n", fs->fs_fsmnt); 901 panic("ext2_sync: rofs mod"); 902 } 903 /* 904 * Write back each (modified) inode. 905 */ 906 simple_lock(&mntvnode_slock); 907loop: 908 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 909 /* 910 * If the vnode that we are about to sync is no longer 911 * associated with this mount point, start over. 912 */ 913 if (vp->v_mount != mp) 914 goto loop; 915 simple_lock(&vp->v_interlock); 916 nvp = vp->v_mntvnodes.le_next; 917 ip = VTOI(vp); 918 if (vp->v_type == VNON || 919 (ip->i_flag & 920 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 921 (vp->v_dirtyblkhd.lh_first == NULL || 922 waitfor == MNT_LAZY)) { 923 simple_unlock(&vp->v_interlock); 924 continue; 925 } 926 simple_unlock(&mntvnode_slock); 927 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); 928 if (error) { 929 simple_lock(&mntvnode_slock); 930 if (error == ENOENT) 931 goto loop; 932 continue; 933 } 934 if (error = VOP_FSYNC(vp, cred, waitfor, p)) 935 allerror = error; 936 VOP_UNLOCK(vp, 0, p); 937 vrele(vp); 938 simple_lock(&mntvnode_slock); 939 } 940 simple_unlock(&mntvnode_slock); 941 /* 942 * Force stale file system control information to be flushed. 943 */ 944 if (waitfor != MNT_LAZY) { 945 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); 946 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) 947 allerror = error; 948 VOP_UNLOCK(ump->um_devvp, 0, p); 949 } 950#if QUOTA 951 qsync(mp); 952#endif 953 /* 954 * Write back modified superblock. 955 */ 956 if (fs->s_dirt != 0) { 957 fs->s_dirt = 0; 958 fs->s_es->s_wtime = time_second; 959 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 960 allerror = error; 961 } 962 return (allerror); 963} 964 965/* 966 * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it 967 * in from disk. If it is in core, wait for the lock bit to clear, then 968 * return the inode locked. Detection and handling of mount points must be 969 * done by the calling routine. 970 */ 971static int 972ext2_vget(mp, ino, vpp) 973 struct mount *mp; 974 ino_t ino; 975 struct vnode **vpp; 976{ 977 register struct ext2_sb_info *fs; 978 register struct inode *ip; 979 struct ufsmount *ump; 980 struct buf *bp; 981 struct vnode *vp; 982 dev_t dev; 983 int i, error; 984 int used_blocks; 985 986 ump = VFSTOUFS(mp); 987 dev = ump->um_dev; 988restart: 989 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) 990 return (0); 991 992 /* 993 * Lock out the creation of new entries in the FFS hash table in 994 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 995 * may occur! 996 */ 997 if (ext2fs_inode_hash_lock) { 998 while (ext2fs_inode_hash_lock) { 999 ext2fs_inode_hash_lock = -1; 1000 tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0); 1001 } 1002 goto restart; 1003 } 1004 ext2fs_inode_hash_lock = 1; 1005 1006 /* 1007 * If this MALLOC() is performed after the getnewvnode() 1008 * it might block, leaving a vnode with a NULL v_data to be 1009 * found by ext2_sync() if a sync happens to fire right then, 1010 * which will cause a panic because ext2_sync() blindly 1011 * dereferences vp->v_data (as well it should). 1012 */ 1013 MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK); 1014 1015 /* Allocate a new vnode/inode. */ 1016 if (error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) { 1017 if (ext2fs_inode_hash_lock < 0) 1018 wakeup(&ext2fs_inode_hash_lock); 1019 ext2fs_inode_hash_lock = 0; 1020 *vpp = NULL; 1021 FREE(ip, M_EXT2NODE); 1022 return (error); 1023 } 1024 bzero((caddr_t)ip, sizeof(struct inode)); 1025 lockinit(&ip->i_lock, PINOD, "ext2in", 0, 0); 1026 vp->v_data = ip; 1027 ip->i_vnode = vp; 1028 ip->i_e2fs = fs = ump->um_e2fs; 1029 ip->i_dev = dev; 1030 ip->i_number = ino; 1031#if QUOTA 1032 for (i = 0; i < MAXQUOTAS; i++) 1033 ip->i_dquot[i] = NODQUOT; 1034#endif 1035 /* 1036 * Put it onto its hash chain and lock it so that other requests for 1037 * this inode will block if they arrive while we are sleeping waiting 1038 * for old data structures to be purged or for the contents of the 1039 * disk portion of this inode to be read. 1040 */ 1041 ufs_ihashins(ip); 1042 1043 if (ext2fs_inode_hash_lock < 0) 1044 wakeup(&ext2fs_inode_hash_lock); 1045 ext2fs_inode_hash_lock = 0; 1046 1047 /* Read in the disk contents for the inode, copy into the inode. */ 1048#if 0 1049printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 1050#endif 1051 if (error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1052 (int)fs->s_blocksize, NOCRED, &bp)) { 1053 /* 1054 * The inode does not contain anything useful, so it would 1055 * be misleading to leave it on its hash chain. With mode 1056 * still zero, it will be unlinked and returned to the free 1057 * list by vput(). 1058 */ 1059 vput(vp); 1060 brelse(bp); 1061 *vpp = NULL; 1062 return (error); 1063 } 1064 /* convert ext2 inode to dinode */ 1065 ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1066 ino_to_fsbo(fs, ino)), &ip->i_din); 1067 ip->i_block_group = ino_to_cg(fs, ino); 1068 ip->i_next_alloc_block = 0; 1069 ip->i_next_alloc_goal = 0; 1070 ip->i_prealloc_count = 0; 1071 ip->i_prealloc_block = 0; 1072 /* now we want to make sure that block pointers for unused 1073 blocks are zeroed out - ext2_balloc depends on this 1074 although for regular files and directories only 1075 */ 1076 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1077 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1078 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1079 ip->i_db[i] = 0; 1080 } 1081/* 1082 ext2_print_inode(ip); 1083*/ 1084 brelse(bp); 1085 1086 /* 1087 * Initialize the vnode from the inode, check for aliases. 1088 * Note that the underlying vnode may have changed. 1089 */ 1090 if (error = ufs_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) { 1091 vput(vp); 1092 *vpp = NULL; 1093 return (error); 1094 } 1095 /* 1096 * Finish inode initialization now that aliasing has been resolved. 1097 */ 1098 ip->i_devvp = ump->um_devvp; 1099 VREF(ip->i_devvp); 1100 /* 1101 * Set up a generation number for this inode if it does not 1102 * already have one. This should only happen on old filesystems. 1103 */ 1104 if (ip->i_gen == 0) { 1105 ip->i_gen = random() / 2 + 1; 1106 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1107 ip->i_flag |= IN_MODIFIED; 1108 } 1109 *vpp = vp; 1110 return (0); 1111} 1112 1113/* 1114 * File handle to vnode 1115 * 1116 * Have to be really careful about stale file handles: 1117 * - check that the inode number is valid 1118 * - call ext2_vget() to get the locked inode 1119 * - check for an unallocated inode (i_mode == 0) 1120 * - check that the given client host has export rights and return 1121 * those rights via. exflagsp and credanonp 1122 */ 1123static int 1124ext2_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) 1125 register struct mount *mp; 1126 struct fid *fhp; 1127 struct sockaddr *nam; 1128 struct vnode **vpp; 1129 int *exflagsp; 1130 struct ucred **credanonp; 1131{ 1132 register struct ufid *ufhp; 1133 struct ext2_sb_info *fs; 1134 1135 ufhp = (struct ufid *)fhp; 1136 fs = VFSTOUFS(mp)->um_e2fs; 1137 if (ufhp->ufid_ino < ROOTINO || 1138 ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group) 1139 return (ESTALE); 1140 return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)); 1141} 1142 1143/* 1144 * Vnode pointer to File handle 1145 */ 1146/* ARGSUSED */ 1147static int 1148ext2_vptofh(vp, fhp) 1149 struct vnode *vp; 1150 struct fid *fhp; 1151{ 1152 register struct inode *ip; 1153 register struct ufid *ufhp; 1154 1155 ip = VTOI(vp); 1156 ufhp = (struct ufid *)fhp; 1157 ufhp->ufid_len = sizeof(struct ufid); 1158 ufhp->ufid_ino = ip->i_number; 1159 ufhp->ufid_gen = ip->i_gen; 1160 return (0); 1161} 1162 1163/* 1164 * Write a superblock and associated information back to disk. 1165 */ 1166static int 1167ext2_sbupdate(mp, waitfor) 1168 struct ufsmount *mp; 1169 int waitfor; 1170{ 1171 register struct ext2_sb_info *fs = mp->um_e2fs; 1172 register struct ext2_super_block *es = fs->s_es; 1173 register struct buf *bp; 1174 int i, error = 0; 1175/* 1176printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1177*/ 1178 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0); 1179 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1180 if (waitfor == MNT_WAIT) 1181 error = bwrite(bp); 1182 else 1183 bawrite(bp); 1184 1185 /* 1186 * The buffers for group descriptors, inode bitmaps and block bitmaps 1187 * are not busy at this point and are (hopefully) written by the 1188 * usual sync mechanism. No need to write them here 1189 */ 1190 1191 return (error); 1192} 1193