ext2_vfsops.c revision 49535
1/* 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7/* 8 * Copyright (c) 1989, 1991, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 40 */ 41 42#include "opt_quota.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/namei.h> 47#include <sys/proc.h> 48#include <sys/kernel.h> 49#include <sys/vnode.h> 50#include <sys/mount.h> 51#include <sys/buf.h> 52#include <sys/conf.h> 53#include <sys/fcntl.h> 54#include <sys/disklabel.h> 55#include <sys/malloc.h> 56#include <sys/stat.h> 57 58#include <ufs/ufs/quota.h> 59#include <ufs/ufs/ufsmount.h> 60#include <ufs/ufs/inode.h> 61#include <ufs/ufs/ufs_extern.h> 62 63#include <gnu/ext2fs/fs.h> 64#include <gnu/ext2fs/ext2_extern.h> 65#include <gnu/ext2fs/ext2_fs.h> 66#include <gnu/ext2fs/ext2_fs_sb.h> 67 68static int ext2_fhtovp __P((struct mount *, struct fid *, struct sockaddr *, 69 struct vnode **, int *, struct ucred **)); 70static int ext2_flushfiles __P((struct mount *mp, int flags, struct proc *p)); 71static int ext2_mount __P((struct mount *, 72 char *, caddr_t, struct nameidata *, struct proc *)); 73static int ext2_mountfs __P((struct vnode *, struct mount *, struct proc *)); 74static int ext2_reload __P((struct mount *mountp, struct ucred *cred, 75 struct proc *p)); 76static int ext2_sbupdate __P((struct ufsmount *, int)); 77static int ext2_statfs __P((struct mount *, struct statfs *, struct proc *)); 78static int ext2_sync __P((struct mount *, int, struct ucred *, struct proc *)); 79static int ext2_unmount __P((struct mount *, int, struct proc *)); 80static int ext2_vget __P((struct mount *, ino_t, struct vnode **)); 81static int ext2_vptofh __P((struct vnode *, struct fid *)); 82 83static MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part"); 84 85static struct vfsops ext2fs_vfsops = { 86 ext2_mount, 87 ufs_start, /* empty function */ 88 ext2_unmount, 89 ufs_root, /* root inode via vget */ 90 ufs_quotactl, /* does operations associated with quotas */ 91 ext2_statfs, 92 ext2_sync, 93 ext2_vget, 94 ext2_fhtovp, 95 ext2_vptofh, 96 ext2_init, 97}; 98 99VFS_SET(ext2fs_vfsops, ext2fs, 0); 100#define bsd_malloc malloc 101#define bsd_free free 102 103static int ext2fs_inode_hash_lock; 104 105static int compute_sb_data __P((struct vnode * devvp, 106 struct ext2_super_block * es, 107 struct ext2_sb_info * fs)); 108 109#ifdef notyet 110static int ext2_mountroot __P((void)); 111 112/* 113 * Called by main() when ext2fs is going to be mounted as root. 114 * 115 * Name is updated by mount(8) after booting. 116 */ 117#define ROOTNAME "root_device" 118 119static int 120ext2_mountroot() 121{ 122 register struct ext2_sb_info *fs; 123 register struct mount *mp; 124 struct proc *p = curproc; 125 struct ufsmount *ump; 126 u_int size; 127 int error; 128 129 if ((error = bdevvp(rootdev, &rootvp))) { 130 printf("ext2_mountroot: can't find rootvp"); 131 return (error); 132 } 133 mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 134 bzero((char *)mp, (u_long)sizeof(struct mount)); 135 mp->mnt_op = &ext2fs_vfsops; 136 mp->mnt_flag = MNT_RDONLY; 137 if (bdevsw(rootdev)->d_flags & D_NOCLUSTERR) 138 mp->mnt_flag |= MNT_NOCLUSTERR; 139 if (bdevsw(rootdev)->d_flags & D_NOCLUSTERW) 140 mp->mnt_flag |= MNT_NOCLUSTERW; 141 if (error = ext2_mountfs(rootvp, mp, p)) { 142 bsd_free(mp, M_MOUNT); 143 return (error); 144 } 145 if (error = vfs_lock(mp)) { 146 (void)ext2_unmount(mp, 0, p); 147 bsd_free(mp, M_MOUNT); 148 return (error); 149 } 150 CIRCLEQ_INSERT_HEAD(&mountlist, mp, mnt_list); 151 mp->mnt_flag |= MNT_ROOTFS; 152 mp->mnt_vnodecovered = NULLVP; 153 ump = VFSTOUFS(mp); 154 fs = ump->um_e2fs; 155 bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); 156 fs->fs_fsmnt[0] = '/'; 157 bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 158 MNAMELEN); 159 (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 160 &size); 161 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 162 (void)ext2_statfs(mp, &mp->mnt_stat, p); 163 vfs_unlock(mp); 164 inittodr(fs->s_es->s_wtime); /* this helps to set the time */ 165 return (0); 166} 167#endif 168 169/* 170 * VFS Operations. 171 * 172 * mount system call 173 */ 174static int 175ext2_mount(mp, path, data, ndp, p) 176 register struct mount *mp; 177 char *path; 178 caddr_t data; /* this is actually a (struct ufs_args *) */ 179 struct nameidata *ndp; 180 struct proc *p; 181{ 182 struct vnode *devvp; 183 struct ufs_args args; 184 struct ufsmount *ump = 0; 185 register struct ext2_sb_info *fs; 186 u_int size; 187 int error, flags; 188 mode_t accessmode; 189 190 if ((error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) != 0) 191 return (error); 192 /* 193 * If updating, check whether changing from read-only to 194 * read/write; if there is no device name, that's all we do. 195 * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags, 196 * if block device requests. 197 */ 198 if (mp->mnt_flag & MNT_UPDATE) { 199 ump = VFSTOUFS(mp); 200 fs = ump->um_e2fs; 201 error = 0; 202 if (bdevsw(ump->um_dev)->d_flags & D_NOCLUSTERR) 203 mp->mnt_flag |= MNT_NOCLUSTERR; 204 if (bdevsw(ump->um_dev)->d_flags & D_NOCLUSTERW) 205 mp->mnt_flag |= MNT_NOCLUSTERW; 206 if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { 207 flags = WRITECLOSE; 208 if (mp->mnt_flag & MNT_FORCE) 209 flags |= FORCECLOSE; 210 if (vfs_busy(mp, LK_NOWAIT, 0, p)) 211 return (EBUSY); 212 error = ext2_flushfiles(mp, flags, p); 213 vfs_unbusy(mp, p); 214 if (!error && fs->s_wasvalid) { 215 fs->s_es->s_state |= EXT2_VALID_FS; 216 ext2_sbupdate(ump, MNT_WAIT); 217 } 218 fs->s_rd_only = 1; 219 } 220 if (!error && (mp->mnt_flag & MNT_RELOAD)) 221 error = ext2_reload(mp, ndp->ni_cnd.cn_cred, p); 222 if (error) 223 return (error); 224 if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 225 /* 226 * If upgrade to read-write by non-root, then verify 227 * that user has necessary permissions on the device. 228 */ 229 if (p->p_ucred->cr_uid != 0) { 230 devvp = ump->um_devvp; 231 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 232 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, 233 p->p_ucred, p)) != 0) { 234 VOP_UNLOCK(devvp, 0, p); 235 return (error); 236 } 237 VOP_UNLOCK(devvp, 0, p); 238 } 239 240 if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || 241 (fs->s_es->s_state & EXT2_ERROR_FS)) { 242 if (mp->mnt_flag & MNT_FORCE) { 243 printf( 244"WARNING: %s was not properly dismounted\n", 245 fs->fs_fsmnt); 246 } else { 247 printf( 248"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 249 fs->fs_fsmnt); 250 return (EPERM); 251 } 252 } 253 fs->s_es->s_state &= ~EXT2_VALID_FS; 254 ext2_sbupdate(ump, MNT_WAIT); 255 fs->s_rd_only = 0; 256 } 257 if (args.fspec == 0) { 258 /* 259 * Process export requests. 260 */ 261 return (vfs_export(mp, &ump->um_export, &args.export)); 262 } 263 } 264 /* 265 * Not an update, or updating the name: look up the name 266 * and verify that it refers to a sensible block device. 267 */ 268 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); 269 if ((error = namei(ndp)) != 0) 270 return (error); 271 devvp = ndp->ni_vp; 272 273 if (devvp->v_type != VBLK) { 274 vrele(devvp); 275 return (ENOTBLK); 276 } 277 if (bdevsw(devvp->v_rdev) == NULL) { 278 vrele(devvp); 279 return (ENXIO); 280 } 281 282 /* 283 * If mount by non-root, then verify that user has necessary 284 * permissions on the device. 285 */ 286 if (p->p_ucred->cr_uid != 0) { 287 accessmode = VREAD; 288 if ((mp->mnt_flag & MNT_RDONLY) == 0) 289 accessmode |= VWRITE; 290 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); 291 if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) { 292 vput(devvp); 293 return (error); 294 } 295 VOP_UNLOCK(devvp, 0, p); 296 } 297 298 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 299 if (bdevsw(devvp->v_rdev)->d_flags & D_NOCLUSTERR) 300 mp->mnt_flag |= MNT_NOCLUSTERR; 301 if (bdevsw(devvp->v_rdev)->d_flags & D_NOCLUSTERW) 302 mp->mnt_flag |= MNT_NOCLUSTERW; 303 error = ext2_mountfs(devvp, mp, p); 304 } else { 305 if (devvp != ump->um_devvp) 306 error = EINVAL; /* needs translation */ 307 else 308 vrele(devvp); 309 } 310 if (error) { 311 vrele(devvp); 312 return (error); 313 } 314 ump = VFSTOUFS(mp); 315 fs = ump->um_e2fs; 316 (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size); 317 bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size); 318 bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, 319 MNAMELEN); 320 (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 321 &size); 322 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 323 (void)ext2_statfs(mp, &mp->mnt_stat, p); 324 return (0); 325} 326 327/* 328 * checks that the data in the descriptor blocks make sense 329 * this is taken from ext2/super.c 330 */ 331static int ext2_check_descriptors (struct ext2_sb_info * sb) 332{ 333 int i; 334 int desc_block = 0; 335 unsigned long block = sb->s_es->s_first_data_block; 336 struct ext2_group_desc * gdp = NULL; 337 338 /* ext2_debug ("Checking group descriptors"); */ 339 340 for (i = 0; i < sb->s_groups_count; i++) 341 { 342 /* examine next descriptor block */ 343 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) 344 gdp = (struct ext2_group_desc *) 345 sb->s_group_desc[desc_block++]->b_data; 346 if (gdp->bg_block_bitmap < block || 347 gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 348 { 349 printf ("ext2_check_descriptors: " 350 "Block bitmap for group %d" 351 " not in group (block %lu)!\n", 352 i, (unsigned long) gdp->bg_block_bitmap); 353 return 0; 354 } 355 if (gdp->bg_inode_bitmap < block || 356 gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) 357 { 358 printf ("ext2_check_descriptors: " 359 "Inode bitmap for group %d" 360 " not in group (block %lu)!\n", 361 i, (unsigned long) gdp->bg_inode_bitmap); 362 return 0; 363 } 364 if (gdp->bg_inode_table < block || 365 gdp->bg_inode_table + sb->s_itb_per_group >= 366 block + EXT2_BLOCKS_PER_GROUP(sb)) 367 { 368 printf ("ext2_check_descriptors: " 369 "Inode table for group %d" 370 " not in group (block %lu)!\n", 371 i, (unsigned long) gdp->bg_inode_table); 372 return 0; 373 } 374 block += EXT2_BLOCKS_PER_GROUP(sb); 375 gdp++; 376 } 377 return 1; 378} 379 380/* 381 * this computes the fields of the ext2_sb_info structure from the 382 * data in the ext2_super_block structure read in 383 */ 384static int compute_sb_data(devvp, es, fs) 385 struct vnode * devvp; 386 struct ext2_super_block * es; 387 struct ext2_sb_info * fs; 388{ 389 int db_count, error; 390 int i, j; 391 int logic_sb_block = 1; /* XXX for now */ 392 393#if 1 394#define V(v) 395#else 396#define V(v) printf(#v"= %d\n", fs->v); 397#endif 398 399 fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 400 V(s_blocksize) 401 fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; 402 V(s_bshift) 403 fs->s_fsbtodb = es->s_log_block_size + 1; 404 V(s_fsbtodb) 405 fs->s_qbmask = fs->s_blocksize - 1; 406 V(s_bmask) 407 fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); 408 V(s_blocksize_bits) 409 fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; 410 V(s_frag_size) 411 if (fs->s_frag_size) 412 fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; 413 V(s_frags_per_block) 414 fs->s_blocks_per_group = es->s_blocks_per_group; 415 V(s_blocks_per_group) 416 fs->s_frags_per_group = es->s_frags_per_group; 417 V(s_frags_per_group) 418 fs->s_inodes_per_group = es->s_inodes_per_group; 419 V(s_inodes_per_group) 420 fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; 421 V(s_inodes_per_block) 422 fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; 423 V(s_itb_per_group) 424 fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); 425 V(s_desc_per_block) 426 /* s_resuid / s_resgid ? */ 427 fs->s_groups_count = (es->s_blocks_count - 428 es->s_first_data_block + 429 EXT2_BLOCKS_PER_GROUP(fs) - 1) / 430 EXT2_BLOCKS_PER_GROUP(fs); 431 V(s_groups_count) 432 db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / 433 EXT2_DESC_PER_BLOCK(fs); 434 fs->s_db_per_group = db_count; 435 V(s_db_per_group) 436 437 fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), 438 M_UFSMNT, M_WAITOK); 439 440 /* adjust logic_sb_block */ 441 if(fs->s_blocksize > SBSIZE) 442 /* Godmar thinks: if the blocksize is greater than 1024, then 443 the superblock is logically part of block zero. 444 */ 445 logic_sb_block = 0; 446 447 for (i = 0; i < db_count; i++) { 448 error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 449 fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); 450 if(error) { 451 for (j = 0; j < i; j++) 452 brelse(fs->s_group_desc[j]); 453 bsd_free(fs->s_group_desc, M_UFSMNT); 454 printf("EXT2-fs: unable to read group descriptors (%d)\n", error); 455 return EIO; 456 } 457 /* Set the B_LOCKED flag on the buffer, then brelse() it */ 458 LCK_BUF(fs->s_group_desc[i]) 459 } 460 if(!ext2_check_descriptors(fs)) { 461 for (j = 0; j < db_count; j++) 462 ULCK_BUF(fs->s_group_desc[j]) 463 bsd_free(fs->s_group_desc, M_UFSMNT); 464 printf("EXT2-fs: (ext2_check_descriptors failure) " 465 "unable to read group descriptors\n"); 466 return EIO; 467 } 468 469 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { 470 fs->s_inode_bitmap_number[i] = 0; 471 fs->s_inode_bitmap[i] = NULL; 472 fs->s_block_bitmap_number[i] = 0; 473 fs->s_block_bitmap[i] = NULL; 474 } 475 fs->s_loaded_inode_bitmaps = 0; 476 fs->s_loaded_block_bitmaps = 0; 477 return 0; 478} 479 480/* 481 * Reload all incore data for a filesystem (used after running fsck on 482 * the root filesystem and finding things to fix). The filesystem must 483 * be mounted read-only. 484 * 485 * Things to do to update the mount: 486 * 1) invalidate all cached meta-data. 487 * 2) re-read superblock from disk. 488 * 3) re-read summary information from disk. 489 * 4) invalidate all inactive vnodes. 490 * 5) invalidate all cached file data. 491 * 6) re-read inode data for all active vnodes. 492 */ 493static int 494ext2_reload(mountp, cred, p) 495 register struct mount *mountp; 496 struct ucred *cred; 497 struct proc *p; 498{ 499 register struct vnode *vp, *nvp, *devvp; 500 struct inode *ip; 501 struct buf *bp; 502 struct ext2_super_block * es; 503 struct ext2_sb_info *fs; 504 int error; 505 506 if ((mountp->mnt_flag & MNT_RDONLY) == 0) 507 return (EINVAL); 508 /* 509 * Step 1: invalidate all cached meta-data. 510 */ 511 devvp = VFSTOUFS(mountp)->um_devvp; 512 if (vinvalbuf(devvp, 0, cred, p, 0, 0)) 513 panic("ext2_reload: dirty1"); 514 /* 515 * Step 2: re-read superblock from disk. 516 * constants have been adjusted for ext2 517 */ 518 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 519 return (error); 520 es = (struct ext2_super_block *)bp->b_data; 521 if (es->s_magic != EXT2_SUPER_MAGIC) { 522 if(es->s_magic == EXT2_PRE_02B_MAGIC) 523 printf("This filesystem bears the magic number of a pre " 524 "0.2b version of ext2. This is not supported by " 525 "Lites.\n"); 526 else 527 printf("Wrong magic number: %x (expected %x for ext2 fs\n", 528 es->s_magic, EXT2_SUPER_MAGIC); 529 brelse(bp); 530 return (EIO); /* XXX needs translation */ 531 } 532 fs = VFSTOUFS(mountp)->um_e2fs; 533 bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); 534 535 if((error = compute_sb_data(devvp, es, fs)) != 0) { 536 brelse(bp); 537 return error; 538 } 539#ifdef UNKLAR 540 if (fs->fs_sbsize < SBSIZE) 541 bp->b_flags |= B_INVAL; 542#endif 543 brelse(bp); 544 545loop: 546 simple_lock(&mntvnode_slock); 547 for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 548 if (vp->v_mount != mountp) { 549 simple_unlock(&mntvnode_slock); 550 goto loop; 551 } 552 nvp = vp->v_mntvnodes.le_next; 553 /* 554 * Step 4: invalidate all inactive vnodes. 555 */ 556 if (vrecycle(vp, &mntvnode_slock, p)) 557 goto loop; 558 /* 559 * Step 5: invalidate all cached file data. 560 */ 561 simple_lock(&vp->v_interlock); 562 simple_unlock(&mntvnode_slock); 563 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 564 goto loop; 565 } 566 if (vinvalbuf(vp, 0, cred, p, 0, 0)) 567 panic("ext2_reload: dirty2"); 568 /* 569 * Step 6: re-read inode data for all active vnodes. 570 */ 571 ip = VTOI(vp); 572 error = 573 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 574 (int)fs->s_blocksize, NOCRED, &bp); 575 if (error) { 576 vput(vp); 577 return (error); 578 } 579 ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + 580 EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), 581 &ip->i_din); 582 brelse(bp); 583 vput(vp); 584 simple_lock(&mntvnode_slock); 585 } 586 simple_unlock(&mntvnode_slock); 587 return (0); 588} 589 590/* 591 * Common code for mount and mountroot 592 */ 593static int 594ext2_mountfs(devvp, mp, p) 595 register struct vnode *devvp; 596 struct mount *mp; 597 struct proc *p; 598{ 599 register struct ufsmount *ump; 600 struct buf *bp; 601 register struct ext2_sb_info *fs; 602 struct ext2_super_block * es; 603 dev_t dev = devvp->v_rdev; 604 struct partinfo dpart; 605 int havepart = 0; 606 int error, i, size; 607 int ronly; 608 609 /* 610 * Disallow multiple mounts of the same device. 611 * Disallow mounting of a device that is currently in use 612 * (except for root, which might share swap device for miniroot). 613 * Flush out any old buffers remaining from a previous use. 614 */ 615 if ((error = vfs_mountedon(devvp)) != 0) 616 return (error); 617 if (vcount(devvp) > 1 && devvp != rootvp) 618 return (EBUSY); 619 if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) 620 return (error); 621#ifdef READONLY 622/* turn on this to force it to be read-only */ 623 mp->mnt_flag |= MNT_RDONLY; 624#endif 625 626 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 627 if ((error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p)) != 0) 628 return (error); 629 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) 630 size = DEV_BSIZE; 631 else { 632 havepart = 1; 633 size = dpart.disklab->d_secsize; 634 } 635 636 bp = NULL; 637 ump = NULL; 638 if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) 639 goto out; 640 es = (struct ext2_super_block *)bp->b_data; 641 if (es->s_magic != EXT2_SUPER_MAGIC) { 642 if(es->s_magic == EXT2_PRE_02B_MAGIC) 643 printf("This filesystem bears the magic number of a pre " 644 "0.2b version of ext2. This is not supported by " 645 "Lites.\n"); 646 else 647 printf("Wrong magic number: %x (expected %x for EXT2FS)\n", 648 es->s_magic, EXT2_SUPER_MAGIC); 649 error = EINVAL; /* XXX needs translation */ 650 goto out; 651 } 652 if ((es->s_state & EXT2_VALID_FS) == 0 || 653 (es->s_state & EXT2_ERROR_FS)) { 654 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 655 printf( 656"WARNING: Filesystem was not properly dismounted\n"); 657 } else { 658 printf( 659"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 660 error = EPERM; 661 goto out; 662 } 663 } 664 ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK); 665 bzero((caddr_t)ump, sizeof *ump); 666 ump->um_malloctype = M_EXT2NODE; 667 ump->um_blkatoff = ext2_blkatoff; 668 ump->um_truncate = ext2_truncate; 669 ump->um_update = ext2_update; 670 ump->um_valloc = ext2_valloc; 671 ump->um_vfree = ext2_vfree; 672 /* I don't know whether this is the right strategy. Note that 673 we dynamically allocate both a ext2_sb_info and a ext2_super_block 674 while Linux keeps the super block in a locked buffer 675 */ 676 ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 677 M_UFSMNT, M_WAITOK); 678 ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 679 M_UFSMNT, M_WAITOK); 680 bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); 681 if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) 682 goto out; 683 /* 684 * We don't free the group descriptors allocated by compute_sb_data() 685 * until ext2_unmount(). This is OK since the mount will succeed. 686 */ 687 brelse(bp); 688 bp = NULL; 689 fs = ump->um_e2fs; 690 fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ 691 /* if the fs is not mounted read-only, make sure the super block is 692 always written back on a sync() 693 */ 694 fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; 695 if (ronly == 0) { 696 fs->s_dirt = 1; /* mark it modified */ 697 fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ 698 } 699 mp->mnt_data = (qaddr_t)ump; 700 mp->mnt_stat.f_fsid.val[0] = (long)dev; 701 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 702 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 703 mp->mnt_flag |= MNT_LOCAL; 704 ump->um_mountp = mp; 705 ump->um_dev = dev; 706 ump->um_devvp = devvp; 707 /* setting those two parameters allows us to use 708 ufs_bmap w/o changse ! 709 */ 710 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 711 ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; 712 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 713 for (i = 0; i < MAXQUOTAS; i++) 714 ump->um_quotas[i] = NULLVP; 715 devvp->v_specmountpoint = mp; 716 if (ronly == 0) 717 ext2_sbupdate(ump, MNT_WAIT); 718 return (0); 719out: 720 if (bp) 721 brelse(bp); 722 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); 723 if (ump) { 724 bsd_free(ump->um_e2fs->s_es, M_UFSMNT); 725 bsd_free(ump->um_e2fs, M_UFSMNT); 726 bsd_free(ump, M_UFSMNT); 727 mp->mnt_data = (qaddr_t)0; 728 } 729 return (error); 730} 731 732/* 733 * unmount system call 734 */ 735static int 736ext2_unmount(mp, mntflags, p) 737 struct mount *mp; 738 int mntflags; 739 struct proc *p; 740{ 741 register struct ufsmount *ump; 742 register struct ext2_sb_info *fs; 743 int error, flags, ronly, i; 744 745 flags = 0; 746 if (mntflags & MNT_FORCE) { 747 if (mp->mnt_flag & MNT_ROOTFS) 748 return (EINVAL); 749 flags |= FORCECLOSE; 750 } 751 if ((error = ext2_flushfiles(mp, flags, p)) != 0) 752 return (error); 753 ump = VFSTOUFS(mp); 754 fs = ump->um_e2fs; 755 ronly = fs->s_rd_only; 756 if (ronly == 0) { 757 if (fs->s_wasvalid) 758 fs->s_es->s_state |= EXT2_VALID_FS; 759 ext2_sbupdate(ump, MNT_WAIT); 760 } 761 762 /* release buffers containing group descriptors */ 763 for(i = 0; i < fs->s_db_per_group; i++) 764 ULCK_BUF(fs->s_group_desc[i]) 765 bsd_free(fs->s_group_desc, M_UFSMNT); 766 767 /* release cached inode/block bitmaps */ 768 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 769 if (fs->s_inode_bitmap[i]) 770 ULCK_BUF(fs->s_inode_bitmap[i]) 771 772 for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) 773 if (fs->s_block_bitmap[i]) 774 ULCK_BUF(fs->s_block_bitmap[i]) 775 776 ump->um_devvp->v_specmountpoint = NULL; 777 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, 778 NOCRED, p); 779 vrele(ump->um_devvp); 780 bsd_free(fs->s_es, M_UFSMNT); 781 bsd_free(fs, M_UFSMNT); 782 bsd_free(ump, M_UFSMNT); 783 mp->mnt_data = (qaddr_t)0; 784 mp->mnt_flag &= ~MNT_LOCAL; 785 return (error); 786} 787 788/* 789 * Flush out all the files in a filesystem. 790 */ 791static int 792ext2_flushfiles(mp, flags, p) 793 register struct mount *mp; 794 int flags; 795 struct proc *p; 796{ 797 register struct ufsmount *ump; 798 int error; 799#if QUOTA 800 int i; 801#endif 802 803 ump = VFSTOUFS(mp); 804#if QUOTA 805 if (mp->mnt_flag & MNT_QUOTA) { 806 if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0) 807 return (error); 808 for (i = 0; i < MAXQUOTAS; i++) { 809 if (ump->um_quotas[i] == NULLVP) 810 continue; 811 quotaoff(p, mp, i); 812 } 813 /* 814 * Here we fall through to vflush again to ensure 815 * that we have gotten rid of all the system vnodes. 816 */ 817 } 818#endif 819 error = vflush(mp, NULLVP, flags); 820 return (error); 821} 822 823/* 824 * Get file system statistics. 825 * taken from ext2/super.c ext2_statfs 826 */ 827static int 828ext2_statfs(mp, sbp, p) 829 struct mount *mp; 830 register struct statfs *sbp; 831 struct proc *p; 832{ 833 unsigned long overhead; 834 unsigned long overhead_per_group; 835 836 register struct ufsmount *ump; 837 register struct ext2_sb_info *fs; 838 register struct ext2_super_block *es; 839 840 ump = VFSTOUFS(mp); 841 fs = ump->um_e2fs; 842 es = fs->s_es; 843 844 if (es->s_magic != EXT2_SUPER_MAGIC) 845 panic("ext2_statfs - magic number spoiled"); 846 847 /* 848 * Compute the overhead (FS structures) 849 */ 850 overhead_per_group = 1 /* super block */ + 851 fs->s_db_per_group + 852 1 /* block bitmap */ + 853 1 /* inode bitmap */ + 854 fs->s_itb_per_group; 855 overhead = es->s_first_data_block + 856 fs->s_groups_count * overhead_per_group; 857 858 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 859 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 860 sbp->f_blocks = es->s_blocks_count - overhead; 861 sbp->f_bfree = es->s_free_blocks_count; 862 sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 863 sbp->f_files = es->s_inodes_count; 864 sbp->f_ffree = es->s_free_inodes_count; 865 if (sbp != &mp->mnt_stat) { 866 sbp->f_type = mp->mnt_vfc->vfc_typenum; 867 bcopy((caddr_t)mp->mnt_stat.f_mntonname, 868 (caddr_t)&sbp->f_mntonname[0], MNAMELEN); 869 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 870 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 871 } 872 return (0); 873} 874 875/* 876 * Go through the disk queues to initiate sandbagged IO; 877 * go through the inodes to write those that have been modified; 878 * initiate the writing of the super block if it has been modified. 879 * 880 * Note: we are always called with the filesystem marked `MPBUSY'. 881 */ 882static int 883ext2_sync(mp, waitfor, cred, p) 884 struct mount *mp; 885 int waitfor; 886 struct ucred *cred; 887 struct proc *p; 888{ 889 struct vnode *nvp, *vp; 890 struct inode *ip; 891 struct ufsmount *ump = VFSTOUFS(mp); 892 struct ext2_sb_info *fs; 893 int error, allerror = 0; 894 895 fs = ump->um_e2fs; 896 if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ 897 printf("fs = %s\n", fs->fs_fsmnt); 898 panic("ext2_sync: rofs mod"); 899 } 900 /* 901 * Write back each (modified) inode. 902 */ 903 simple_lock(&mntvnode_slock); 904loop: 905 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 906 /* 907 * If the vnode that we are about to sync is no longer 908 * associated with this mount point, start over. 909 */ 910 if (vp->v_mount != mp) 911 goto loop; 912 simple_lock(&vp->v_interlock); 913 nvp = vp->v_mntvnodes.le_next; 914 ip = VTOI(vp); 915 if (vp->v_type == VNON || 916 ((ip->i_flag & 917 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 918 (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) { 919 simple_unlock(&vp->v_interlock); 920 continue; 921 } 922 simple_unlock(&mntvnode_slock); 923 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); 924 if (error) { 925 simple_lock(&mntvnode_slock); 926 if (error == ENOENT) 927 goto loop; 928 continue; 929 } 930 if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) 931 allerror = error; 932 VOP_UNLOCK(vp, 0, p); 933 vrele(vp); 934 simple_lock(&mntvnode_slock); 935 } 936 simple_unlock(&mntvnode_slock); 937 /* 938 * Force stale file system control information to be flushed. 939 */ 940 if (waitfor != MNT_LAZY) { 941 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); 942 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) 943 allerror = error; 944 VOP_UNLOCK(ump->um_devvp, 0, p); 945 } 946#if QUOTA 947 qsync(mp); 948#endif 949 /* 950 * Write back modified superblock. 951 */ 952 if (fs->s_dirt != 0) { 953 fs->s_dirt = 0; 954 fs->s_es->s_wtime = time_second; 955 if ((error = ext2_sbupdate(ump, waitfor)) != 0) 956 allerror = error; 957 } 958 return (allerror); 959} 960 961/* 962 * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it 963 * in from disk. If it is in core, wait for the lock bit to clear, then 964 * return the inode locked. Detection and handling of mount points must be 965 * done by the calling routine. 966 */ 967static int 968ext2_vget(mp, ino, vpp) 969 struct mount *mp; 970 ino_t ino; 971 struct vnode **vpp; 972{ 973 register struct ext2_sb_info *fs; 974 register struct inode *ip; 975 struct ufsmount *ump; 976 struct buf *bp; 977 struct vnode *vp; 978 dev_t dev; 979 int i, error; 980 int used_blocks; 981 982 ump = VFSTOUFS(mp); 983 dev = ump->um_dev; 984restart: 985 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) 986 return (0); 987 988 /* 989 * Lock out the creation of new entries in the FFS hash table in 990 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 991 * may occur! 992 */ 993 if (ext2fs_inode_hash_lock) { 994 while (ext2fs_inode_hash_lock) { 995 ext2fs_inode_hash_lock = -1; 996 tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0); 997 } 998 goto restart; 999 } 1000 ext2fs_inode_hash_lock = 1; 1001 1002 /* 1003 * If this MALLOC() is performed after the getnewvnode() 1004 * it might block, leaving a vnode with a NULL v_data to be 1005 * found by ext2_sync() if a sync happens to fire right then, 1006 * which will cause a panic because ext2_sync() blindly 1007 * dereferences vp->v_data (as well it should). 1008 */ 1009 MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK); 1010 1011 /* Allocate a new vnode/inode. */ 1012 if ((error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) != 0) { 1013 if (ext2fs_inode_hash_lock < 0) 1014 wakeup(&ext2fs_inode_hash_lock); 1015 ext2fs_inode_hash_lock = 0; 1016 *vpp = NULL; 1017 FREE(ip, M_EXT2NODE); 1018 return (error); 1019 } 1020 bzero((caddr_t)ip, sizeof(struct inode)); 1021 lockinit(&ip->i_lock, PINOD, "ext2in", 0, 0); 1022 vp->v_data = ip; 1023 ip->i_vnode = vp; 1024 ip->i_e2fs = fs = ump->um_e2fs; 1025 ip->i_dev = dev; 1026 ip->i_number = ino; 1027#if QUOTA 1028 for (i = 0; i < MAXQUOTAS; i++) 1029 ip->i_dquot[i] = NODQUOT; 1030#endif 1031 /* 1032 * Put it onto its hash chain and lock it so that other requests for 1033 * this inode will block if they arrive while we are sleeping waiting 1034 * for old data structures to be purged or for the contents of the 1035 * disk portion of this inode to be read. 1036 */ 1037 ufs_ihashins(ip); 1038 1039 if (ext2fs_inode_hash_lock < 0) 1040 wakeup(&ext2fs_inode_hash_lock); 1041 ext2fs_inode_hash_lock = 0; 1042 1043 /* Read in the disk contents for the inode, copy into the inode. */ 1044#if 0 1045printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); 1046#endif 1047 if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 1048 (int)fs->s_blocksize, NOCRED, &bp)) != 0) { 1049 /* 1050 * The inode does not contain anything useful, so it would 1051 * be misleading to leave it on its hash chain. With mode 1052 * still zero, it will be unlinked and returned to the free 1053 * list by vput(). 1054 */ 1055 vput(vp); 1056 brelse(bp); 1057 *vpp = NULL; 1058 return (error); 1059 } 1060 /* convert ext2 inode to dinode */ 1061 ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * 1062 ino_to_fsbo(fs, ino)), &ip->i_din); 1063 ip->i_block_group = ino_to_cg(fs, ino); 1064 ip->i_next_alloc_block = 0; 1065 ip->i_next_alloc_goal = 0; 1066 ip->i_prealloc_count = 0; 1067 ip->i_prealloc_block = 0; 1068 /* now we want to make sure that block pointers for unused 1069 blocks are zeroed out - ext2_balloc depends on this 1070 although for regular files and directories only 1071 */ 1072 if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { 1073 used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; 1074 for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1075 ip->i_db[i] = 0; 1076 } 1077/* 1078 ext2_print_inode(ip); 1079*/ 1080 brelse(bp); 1081 1082 /* 1083 * Initialize the vnode from the inode, check for aliases. 1084 * Note that the underlying vnode may have changed. 1085 */ 1086 if ((error = ufs_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) { 1087 vput(vp); 1088 *vpp = NULL; 1089 return (error); 1090 } 1091 /* 1092 * Finish inode initialization now that aliasing has been resolved. 1093 */ 1094 ip->i_devvp = ump->um_devvp; 1095 VREF(ip->i_devvp); 1096 /* 1097 * Set up a generation number for this inode if it does not 1098 * already have one. This should only happen on old filesystems. 1099 */ 1100 if (ip->i_gen == 0) { 1101 ip->i_gen = random() / 2 + 1; 1102 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1103 ip->i_flag |= IN_MODIFIED; 1104 } 1105 *vpp = vp; 1106 return (0); 1107} 1108 1109/* 1110 * File handle to vnode 1111 * 1112 * Have to be really careful about stale file handles: 1113 * - check that the inode number is valid 1114 * - call ext2_vget() to get the locked inode 1115 * - check for an unallocated inode (i_mode == 0) 1116 * - check that the given client host has export rights and return 1117 * those rights via. exflagsp and credanonp 1118 */ 1119static int 1120ext2_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) 1121 register struct mount *mp; 1122 struct fid *fhp; 1123 struct sockaddr *nam; 1124 struct vnode **vpp; 1125 int *exflagsp; 1126 struct ucred **credanonp; 1127{ 1128 register struct ufid *ufhp; 1129 struct ext2_sb_info *fs; 1130 1131 ufhp = (struct ufid *)fhp; 1132 fs = VFSTOUFS(mp)->um_e2fs; 1133 if (ufhp->ufid_ino < ROOTINO || 1134 ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group) 1135 return (ESTALE); 1136 return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)); 1137} 1138 1139/* 1140 * Vnode pointer to File handle 1141 */ 1142/* ARGSUSED */ 1143static int 1144ext2_vptofh(vp, fhp) 1145 struct vnode *vp; 1146 struct fid *fhp; 1147{ 1148 register struct inode *ip; 1149 register struct ufid *ufhp; 1150 1151 ip = VTOI(vp); 1152 ufhp = (struct ufid *)fhp; 1153 ufhp->ufid_len = sizeof(struct ufid); 1154 ufhp->ufid_ino = ip->i_number; 1155 ufhp->ufid_gen = ip->i_gen; 1156 return (0); 1157} 1158 1159/* 1160 * Write a superblock and associated information back to disk. 1161 */ 1162static int 1163ext2_sbupdate(mp, waitfor) 1164 struct ufsmount *mp; 1165 int waitfor; 1166{ 1167 register struct ext2_sb_info *fs = mp->um_e2fs; 1168 register struct ext2_super_block *es = fs->s_es; 1169 register struct buf *bp; 1170 int error = 0; 1171/* 1172printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); 1173*/ 1174 bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0); 1175 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); 1176 if (waitfor == MNT_WAIT) 1177 error = bwrite(bp); 1178 else 1179 bawrite(bp); 1180 1181 /* 1182 * The buffers for group descriptors, inode bitmaps and block bitmaps 1183 * are not busy at this point and are (hopefully) written by the 1184 * usual sync mechanism. No need to write them here 1185 */ 1186 1187 return (error); 1188} 1189