1/* $NetBSD: mfs_vfsops.c,v 1.103 2011/06/12 03:36:01 rmind Exp $ */ 2 3/* 4 * Copyright (c) 1989, 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)mfs_vfsops.c 8.11 (Berkeley) 6/19/95 32 */ 33 34#include <sys/cdefs.h> 35__KERNEL_RCSID(0, "$NetBSD: mfs_vfsops.c,v 1.103 2011/06/12 03:36:01 rmind Exp $"); 36 37#if defined(_KERNEL_OPT) 38#include "opt_compat_netbsd.h" 39#endif 40 41#include <sys/param.h> 42#include <sys/systm.h> 43#include <sys/sysctl.h> 44#include <sys/time.h> 45#include <sys/kernel.h> 46#include <sys/proc.h> 47#include <sys/buf.h> 48#include <sys/bufq.h> 49#include <sys/mount.h> 50#include <sys/signalvar.h> 51#include <sys/vnode.h> 52#include <sys/kmem.h> 53#include <sys/module.h> 54 55#include <miscfs/genfs/genfs.h> 56#include <miscfs/specfs/specdev.h> 57 58#include <ufs/ufs/quota.h> 59#include <ufs/ufs/inode.h> 60#include <ufs/ufs/ufsmount.h> 61#include <ufs/ufs/ufs_extern.h> 62 63#include <ufs/ffs/fs.h> 64#include <ufs/ffs/ffs_extern.h> 65 66#include <ufs/mfs/mfsnode.h> 67#include <ufs/mfs/mfs_extern.h> 68 69MODULE(MODULE_CLASS_VFS, mfs, "ffs"); 70 71kmutex_t mfs_lock; /* global lock */ 72 73/* used for building internal dev_t, minor == 0 reserved for miniroot */ 74static int mfs_minor = 1; 75static int mfs_initcnt; 76 77extern int (**mfs_vnodeop_p)(void *); 78 79static struct sysctllog *mfs_sysctl_log; 80 81/* 82 * mfs vfs operations. 83 */ 84 85extern const struct vnodeopv_desc mfs_vnodeop_opv_desc; 86 87const struct vnodeopv_desc * const mfs_vnodeopv_descs[] = { 88 &mfs_vnodeop_opv_desc, 89 NULL, 90}; 91 92struct vfsops mfs_vfsops = { 93 MOUNT_MFS, 94 sizeof (struct mfs_args), 95 mfs_mount, 96 mfs_start, 97 ffs_unmount, 98 ufs_root, 99 ufs_quotactl, 100 mfs_statvfs, 101 ffs_sync, 102 ffs_vget, 103 ffs_fhtovp, 104 ffs_vptofh, 105 mfs_init, 106 mfs_reinit, 107 mfs_done, 108 NULL, 109 (int (*)(struct mount *, struct vnode *, struct timespec *)) eopnotsupp, 110 vfs_stdextattrctl, 111 (void *)eopnotsupp, /* vfs_suspendctl */ 112 genfs_renamelock_enter, 113 genfs_renamelock_exit, 114 (void *)eopnotsupp, 115 mfs_vnodeopv_descs, 116 0, 117 { NULL, NULL }, 118}; 119 120static int 121mfs_modcmd(modcmd_t cmd, void *arg) 122{ 123 int error; 124 125 switch (cmd) { 126 case MODULE_CMD_INIT: 127 error = vfs_attach(&mfs_vfsops); 128 if (error != 0) 129 break; 130 sysctl_createv(&mfs_sysctl_log, 0, NULL, NULL, 131 CTLFLAG_PERMANENT, 132 CTLTYPE_NODE, "vfs", NULL, 133 NULL, 0, NULL, 0, 134 CTL_VFS, CTL_EOL); 135 sysctl_createv(&mfs_sysctl_log, 0, NULL, NULL, 136 CTLFLAG_PERMANENT|CTLFLAG_ALIAS, 137 CTLTYPE_NODE, "mfs", 138 SYSCTL_DESCR("Memory based file system"), 139 NULL, 1, NULL, 0, 140 CTL_VFS, 3, CTL_EOL); 141 /* 142 * XXX the "1" and the "3" above could be dynamic, thereby 143 * eliminating one more instance of the "number to vfs" 144 * mapping problem, but they are in order as taken from 145 * sys/mount.h 146 */ 147 break; 148 case MODULE_CMD_FINI: 149 error = vfs_detach(&mfs_vfsops); 150 if (error != 0) 151 break; 152 sysctl_teardown(&mfs_sysctl_log); 153 break; 154 default: 155 error = ENOTTY; 156 break; 157 } 158 159 return (error); 160} 161 162/* 163 * Memory based filesystem initialization. 164 */ 165void 166mfs_init(void) 167{ 168 169 if (mfs_initcnt++ == 0) { 170 mutex_init(&mfs_lock, MUTEX_DEFAULT, IPL_NONE); 171 ffs_init(); 172 } 173} 174 175void 176mfs_reinit(void) 177{ 178 179 ffs_reinit(); 180} 181 182void 183mfs_done(void) 184{ 185 186 if (--mfs_initcnt == 0) { 187 ffs_done(); 188 mutex_destroy(&mfs_lock); 189 } 190} 191 192/* 193 * Called by main() when mfs is going to be mounted as root. 194 */ 195 196int 197mfs_mountroot(void) 198{ 199 struct fs *fs; 200 struct mount *mp; 201 struct lwp *l = curlwp; /* XXX */ 202 struct ufsmount *ump; 203 struct mfsnode *mfsp; 204 int error = 0; 205 206 if ((error = vfs_rootmountalloc(MOUNT_MFS, "mfs_root", &mp))) { 207 vrele(rootvp); 208 return (error); 209 } 210 211 mfsp = kmem_alloc(sizeof(*mfsp), KM_SLEEP); 212 rootvp->v_data = mfsp; 213 rootvp->v_op = mfs_vnodeop_p; 214 rootvp->v_tag = VT_MFS; 215 mfsp->mfs_baseoff = mfs_rootbase; 216 mfsp->mfs_size = mfs_rootsize; 217 mfsp->mfs_vnode = rootvp; 218 mfsp->mfs_proc = NULL; /* indicate kernel space */ 219 mfsp->mfs_shutdown = 0; 220 cv_init(&mfsp->mfs_cv, "mfs"); 221 mfsp->mfs_refcnt = 1; 222 bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0); 223 if ((error = ffs_mountfs(rootvp, mp, l)) != 0) { 224 vfs_unbusy(mp, false, NULL); 225 bufq_free(mfsp->mfs_buflist); 226 vfs_destroy(mp); 227 kmem_free(mfsp, sizeof(*mfsp)); 228 return (error); 229 } 230 mutex_enter(&mountlist_lock); 231 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 232 mutex_exit(&mountlist_lock); 233 mp->mnt_vnodecovered = NULLVP; 234 ump = VFSTOUFS(mp); 235 fs = ump->um_fs; 236 (void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0); 237 (void)ffs_statvfs(mp, &mp->mnt_stat); 238 vfs_unbusy(mp, false, NULL); 239 return (0); 240} 241 242/* 243 * VFS Operations. 244 * 245 * mount system call 246 */ 247/* ARGSUSED */ 248int 249mfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) 250{ 251 struct lwp *l = curlwp; 252 struct vnode *devvp; 253 struct mfs_args *args = data; 254 struct ufsmount *ump; 255 struct fs *fs; 256 struct mfsnode *mfsp; 257 struct proc *p; 258 int flags, error = 0; 259 260 if (args == NULL) 261 return EINVAL; 262 if (*data_len < sizeof *args) 263 return EINVAL; 264 265 p = l->l_proc; 266 if (mp->mnt_flag & MNT_GETARGS) { 267 struct vnode *vp; 268 269 ump = VFSTOUFS(mp); 270 if (ump == NULL) 271 return EIO; 272 273 vp = ump->um_devvp; 274 if (vp == NULL) 275 return EIO; 276 277 mfsp = VTOMFS(vp); 278 if (mfsp == NULL) 279 return EIO; 280 281 args->fspec = NULL; 282 args->base = mfsp->mfs_baseoff; 283 args->size = mfsp->mfs_size; 284 *data_len = sizeof *args; 285 return 0; 286 } 287 /* 288 * XXX turn off async to avoid hangs when writing lots of data. 289 * the problem is that MFS needs to allocate pages to clean pages, 290 * so if we wait until the last minute to clean pages then there 291 * may not be any pages available to do the cleaning. 292 * ... and since the default partially-synchronous mode turns out 293 * to not be sufficient under heavy load, make it full synchronous. 294 */ 295 mp->mnt_flag &= ~MNT_ASYNC; 296 mp->mnt_flag |= MNT_SYNCHRONOUS; 297 298 /* 299 * If updating, check whether changing from read-only to 300 * read/write; if there is no device name, that's all we do. 301 */ 302 if (mp->mnt_flag & MNT_UPDATE) { 303 ump = VFSTOUFS(mp); 304 fs = ump->um_fs; 305 if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 306 flags = WRITECLOSE; 307 if (mp->mnt_flag & MNT_FORCE) 308 flags |= FORCECLOSE; 309 error = ffs_flushfiles(mp, flags, l); 310 if (error) 311 return (error); 312 } 313 if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) 314 fs->fs_ronly = 0; 315 if (args->fspec == NULL) 316 return EINVAL; 317 return (0); 318 } 319 error = getnewvnode(VT_MFS, NULL, mfs_vnodeop_p, NULL, &devvp); 320 if (error) 321 return (error); 322 devvp->v_vflag |= VV_MPSAFE; 323 devvp->v_type = VBLK; 324 spec_node_init(devvp, makedev(255, mfs_minor)); 325 mfs_minor++; 326 mfsp = kmem_alloc(sizeof(*mfsp), KM_SLEEP); 327 devvp->v_data = mfsp; 328 mfsp->mfs_baseoff = args->base; 329 mfsp->mfs_size = args->size; 330 mfsp->mfs_vnode = devvp; 331 mfsp->mfs_proc = p; 332 mfsp->mfs_shutdown = 0; 333 cv_init(&mfsp->mfs_cv, "mfsidl"); 334 mfsp->mfs_refcnt = 1; 335 bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0); 336 if ((error = ffs_mountfs(devvp, mp, l)) != 0) { 337 mfsp->mfs_shutdown = 1; 338 vrele(devvp); 339 return (error); 340 } 341 ump = VFSTOUFS(mp); 342 fs = ump->um_fs; 343 error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, 344 UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); 345 if (error) 346 return error; 347 (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, 348 sizeof(fs->fs_fsmnt)); 349 fs->fs_fsmnt[sizeof(fs->fs_fsmnt) - 1] = '\0'; 350 /* XXX: cleanup on error */ 351 return 0; 352} 353 354/* 355 * Used to grab the process and keep it in the kernel to service 356 * memory filesystem I/O requests. 357 * 358 * Loop servicing I/O requests. 359 * Copy the requested data into or out of the memory filesystem 360 * address space. 361 */ 362/* ARGSUSED */ 363int 364mfs_start(struct mount *mp, int flags) 365{ 366 struct vnode *vp; 367 struct mfsnode *mfsp; 368 struct proc *p; 369 struct buf *bp; 370 void *base; 371 int sleepreturn = 0, refcnt, error; 372 ksiginfoq_t kq; 373 374 /* 375 * Ensure that file system is still mounted when getting mfsnode. 376 * Add a reference to the mfsnode to prevent it disappearing in 377 * this routine. 378 */ 379 if ((error = vfs_busy(mp, NULL)) != 0) 380 return error; 381 vp = VFSTOUFS(mp)->um_devvp; 382 mfsp = VTOMFS(vp); 383 mutex_enter(&mfs_lock); 384 mfsp->mfs_refcnt++; 385 mutex_exit(&mfs_lock); 386 vfs_unbusy(mp, false, NULL); 387 388 base = mfsp->mfs_baseoff; 389 mutex_enter(&mfs_lock); 390 while (mfsp->mfs_shutdown != 1) { 391 while ((bp = bufq_get(mfsp->mfs_buflist)) != NULL) { 392 mutex_exit(&mfs_lock); 393 mfs_doio(bp, base); 394 mutex_enter(&mfs_lock); 395 } 396 /* 397 * If a non-ignored signal is received, try to unmount. 398 * If that fails, or the filesystem is already in the 399 * process of being unmounted, clear the signal (it has been 400 * "processed"), otherwise we will loop here, as tsleep 401 * will always return EINTR/ERESTART. 402 */ 403 if (sleepreturn != 0) { 404 mutex_exit(&mfs_lock); 405 if (dounmount(mp, 0, curlwp) != 0) { 406 p = curproc; 407 ksiginfo_queue_init(&kq); 408 mutex_enter(p->p_lock); 409 sigclearall(p, NULL, &kq); 410 mutex_exit(p->p_lock); 411 ksiginfo_queue_drain(&kq); 412 } 413 sleepreturn = 0; 414 mutex_enter(&mfs_lock); 415 continue; 416 } 417 418 sleepreturn = cv_wait_sig(&mfsp->mfs_cv, &mfs_lock); 419 } 420 KASSERT(bufq_peek(mfsp->mfs_buflist) == NULL); 421 refcnt = --mfsp->mfs_refcnt; 422 mutex_exit(&mfs_lock); 423 if (refcnt == 0) { 424 bufq_free(mfsp->mfs_buflist); 425 cv_destroy(&mfsp->mfs_cv); 426 kmem_free(mfsp, sizeof(*mfsp)); 427 } 428 return (sleepreturn); 429} 430 431/* 432 * Get file system statistics. 433 */ 434int 435mfs_statvfs(struct mount *mp, struct statvfs *sbp) 436{ 437 int error; 438 439 error = ffs_statvfs(mp, sbp); 440 if (error) 441 return error; 442 (void)strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, 443 sizeof(sbp->f_fstypename)); 444 sbp->f_fstypename[sizeof(sbp->f_fstypename) - 1] = '\0'; 445 return 0; 446} 447