1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
| 1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
|
39 * $Id: vfs_subr.c,v 1.179 1999/01/05 18:12:29 eivind Exp $
| 39 * $Id: vfs_subr.c,v 1.180 1999/01/05 18:49:53 eivind Exp $
|
40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/conf.h> 50#include <sys/fcntl.h> 51#include <sys/kernel.h> 52#include <sys/proc.h> 53#include <sys/malloc.h> 54#include <sys/mount.h> 55#include <sys/socket.h> 56#include <sys/vnode.h> 57#include <sys/stat.h> 58#include <sys/buf.h> 59#include <sys/domain.h> 60#include <sys/dirent.h> 61#include <sys/vmmeter.h> 62 63#include <machine/limits.h> 64 65#include <vm/vm.h> 66#include <vm/vm_object.h> 67#include <vm/vm_extern.h> 68#include <vm/pmap.h> 69#include <vm/vm_map.h> 70#include <vm/vm_pager.h> 71#include <vm/vnode_pager.h> 72#include <vm/vm_zone.h> 73#include <sys/sysctl.h> 74 75#include <miscfs/specfs/specdev.h> 76 77static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 78 79static void insmntque __P((struct vnode *vp, struct mount *mp)); 80static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 81static void vfree __P((struct vnode *)); 82static void vgonel __P((struct vnode *vp, struct proc *p)); 83static unsigned long numvnodes; 84SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 85 86enum vtype iftovt_tab[16] = { 87 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 88 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 89}; 90int vttoif_tab[9] = { 91 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 92 S_IFSOCK, S_IFIFO, S_IFMT, 93}; 94 95static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 96struct tobefreelist vnode_tobefree_list; /* vnode free list */ 97 98static u_long wantfreevnodes = 25; 99SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 100static u_long freevnodes = 0; 101SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 102 103int vfs_ioopt = 0; 104#ifdef ENABLE_VFS_IOOPT 105SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 106#endif 107 108struct mntlist mountlist; /* mounted filesystem list */ 109struct simplelock mountlist_slock; 110struct simplelock mntvnode_slock; 111int nfs_mount_type = -1; 112#ifndef NULL_SIMPLELOCKS 113static struct simplelock mntid_slock; 114static struct simplelock vnode_free_list_slock; 115static struct simplelock spechash_slock; 116#endif 117struct nfs_public nfs_pub; /* publicly exported FS */ 118static vm_zone_t vnode_zone; 119 120/* 121 * The workitem queue. 122 */ 123#define SYNCER_MAXDELAY 32 124static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 125time_t syncdelay = 30; 126int rushjob; /* number of slots to run ASAP */ 127 128static int syncer_delayno = 0; 129static long syncer_mask; 130LIST_HEAD(synclist, vnode); 131static struct synclist *syncer_workitem_pending; 132 133int desiredvnodes; 134SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 135 136static void vfs_free_addrlist __P((struct netexport *nep)); 137static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 138static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 139 struct export_args *argp)); 140 141/* 142 * Initialize the vnode management data structures. 143 */ 144void 145vntblinit() 146{ 147 148 desiredvnodes = maxproc + cnt.v_page_count / 4; 149 simple_lock_init(&mntvnode_slock); 150 simple_lock_init(&mntid_slock); 151 simple_lock_init(&spechash_slock); 152 TAILQ_INIT(&vnode_free_list); 153 TAILQ_INIT(&vnode_tobefree_list); 154 simple_lock_init(&vnode_free_list_slock); 155 CIRCLEQ_INIT(&mountlist); 156 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 157 /* 158 * Initialize the filesystem syncer. 159 */ 160 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 161 &syncer_mask); 162 syncer_maxdelay = syncer_mask + 1; 163} 164 165/* 166 * Mark a mount point as busy. Used to synchronize access and to delay 167 * unmounting. Interlock is not released on failure. 168 */ 169int 170vfs_busy(mp, flags, interlkp, p) 171 struct mount *mp; 172 int flags; 173 struct simplelock *interlkp; 174 struct proc *p; 175{ 176 int lkflags; 177 178 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 179 if (flags & LK_NOWAIT) 180 return (ENOENT); 181 mp->mnt_kern_flag |= MNTK_MWAIT; 182 if (interlkp) { 183 simple_unlock(interlkp); 184 } 185 /* 186 * Since all busy locks are shared except the exclusive 187 * lock granted when unmounting, the only place that a 188 * wakeup needs to be done is at the release of the 189 * exclusive lock at the end of dounmount. 190 */ 191 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 192 if (interlkp) { 193 simple_lock(interlkp); 194 } 195 return (ENOENT); 196 } 197 lkflags = LK_SHARED | LK_NOPAUSE; 198 if (interlkp) 199 lkflags |= LK_INTERLOCK; 200 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 201 panic("vfs_busy: unexpected lock failure"); 202 return (0); 203} 204 205/* 206 * Free a busy filesystem. 207 */ 208void 209vfs_unbusy(mp, p) 210 struct mount *mp; 211 struct proc *p; 212{ 213 214 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 215} 216 217/* 218 * Lookup a filesystem type, and if found allocate and initialize 219 * a mount structure for it. 220 * 221 * Devname is usually updated by mount(8) after booting. 222 */ 223int 224vfs_rootmountalloc(fstypename, devname, mpp) 225 char *fstypename; 226 char *devname; 227 struct mount **mpp; 228{ 229 struct proc *p = curproc; /* XXX */ 230 struct vfsconf *vfsp; 231 struct mount *mp; 232 233 if (fstypename == NULL) 234 return (ENODEV); 235 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 236 if (!strcmp(vfsp->vfc_name, fstypename)) 237 break; 238 if (vfsp == NULL) 239 return (ENODEV); 240 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 241 bzero((char *)mp, (u_long)sizeof(struct mount)); 242 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 243 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 244 LIST_INIT(&mp->mnt_vnodelist); 245 mp->mnt_vfc = vfsp; 246 mp->mnt_op = vfsp->vfc_vfsops; 247 mp->mnt_flag = MNT_RDONLY; 248 mp->mnt_vnodecovered = NULLVP; 249 vfsp->vfc_refcount++; 250 mp->mnt_stat.f_type = vfsp->vfc_typenum; 251 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 252 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 253 mp->mnt_stat.f_mntonname[0] = '/'; 254 mp->mnt_stat.f_mntonname[1] = 0; 255 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 256 *mpp = mp; 257 return (0); 258} 259 260/* 261 * Find an appropriate filesystem to use for the root. If a filesystem 262 * has not been preselected, walk through the list of known filesystems 263 * trying those that have mountroot routines, and try them until one 264 * works or we have tried them all. 265 */ 266#ifdef notdef /* XXX JH */ 267int 268lite2_vfs_mountroot() 269{ 270 struct vfsconf *vfsp; 271 extern int (*lite2_mountroot) __P((void)); 272 int error; 273 274 if (lite2_mountroot != NULL) 275 return ((*lite2_mountroot)()); 276 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 277 if (vfsp->vfc_mountroot == NULL) 278 continue; 279 if ((error = (*vfsp->vfc_mountroot)()) == 0) 280 return (0); 281 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 282 } 283 return (ENODEV); 284} 285#endif 286 287/* 288 * Lookup a mount point by filesystem identifier. 289 */ 290struct mount * 291vfs_getvfs(fsid) 292 fsid_t *fsid; 293{ 294 register struct mount *mp; 295 296 simple_lock(&mountlist_slock); 297 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 298 mp = mp->mnt_list.cqe_next) { 299 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 300 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 301 simple_unlock(&mountlist_slock); 302 return (mp); 303 } 304 } 305 simple_unlock(&mountlist_slock); 306 return ((struct mount *) 0); 307} 308 309/* 310 * Get a new unique fsid 311 */ 312void 313vfs_getnewfsid(mp) 314 struct mount *mp; 315{ 316 static u_short xxxfs_mntid; 317 318 fsid_t tfsid; 319 int mtype; 320 321 simple_lock(&mntid_slock); 322 mtype = mp->mnt_vfc->vfc_typenum; 323 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 324 mp->mnt_stat.f_fsid.val[1] = mtype; 325 if (xxxfs_mntid == 0) 326 ++xxxfs_mntid; 327 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 328 tfsid.val[1] = mtype; 329 if (mountlist.cqh_first != (void *)&mountlist) { 330 while (vfs_getvfs(&tfsid)) { 331 tfsid.val[0]++; 332 xxxfs_mntid++; 333 } 334 } 335 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 336 simple_unlock(&mntid_slock); 337} 338 339/* 340 * Set vnode attributes to VNOVAL 341 */ 342void 343vattr_null(vap) 344 register struct vattr *vap; 345{ 346 347 vap->va_type = VNON; 348 vap->va_size = VNOVAL; 349 vap->va_bytes = VNOVAL; 350 vap->va_mode = VNOVAL; 351 vap->va_nlink = VNOVAL; 352 vap->va_uid = VNOVAL; 353 vap->va_gid = VNOVAL; 354 vap->va_fsid = VNOVAL; 355 vap->va_fileid = VNOVAL; 356 vap->va_blocksize = VNOVAL; 357 vap->va_rdev = VNOVAL; 358 vap->va_atime.tv_sec = VNOVAL; 359 vap->va_atime.tv_nsec = VNOVAL; 360 vap->va_mtime.tv_sec = VNOVAL; 361 vap->va_mtime.tv_nsec = VNOVAL; 362 vap->va_ctime.tv_sec = VNOVAL; 363 vap->va_ctime.tv_nsec = VNOVAL; 364 vap->va_flags = VNOVAL; 365 vap->va_gen = VNOVAL; 366 vap->va_vaflags = 0; 367} 368 369/* 370 * Routines having to do with the management of the vnode table. 371 */ 372extern vop_t **dead_vnodeop_p; 373 374/* 375 * Return the next vnode from the free list. 376 */ 377int 378getnewvnode(tag, mp, vops, vpp) 379 enum vtagtype tag; 380 struct mount *mp; 381 vop_t **vops; 382 struct vnode **vpp; 383{ 384 int s; 385 struct proc *p = curproc; /* XXX */ 386 struct vnode *vp, *tvp, *nvp; 387 vm_object_t object; 388 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 389 390 /* 391 * We take the least recently used vnode from the freelist 392 * if we can get it and it has no cached pages, and no 393 * namecache entries are relative to it. 394 * Otherwise we allocate a new vnode 395 */ 396 397 s = splbio(); 398 simple_lock(&vnode_free_list_slock); 399 TAILQ_INIT(&vnode_tmp_list); 400 401 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 402 nvp = TAILQ_NEXT(vp, v_freelist); 403 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 404 if (vp->v_flag & VAGE) { 405 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 406 } else { 407 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 408 } 409 vp->v_flag &= ~(VTBFREE|VAGE); 410 vp->v_flag |= VFREE; 411 if (vp->v_usecount) 412 panic("tobe free vnode isn't"); 413 freevnodes++; 414 } 415 416 if (wantfreevnodes && freevnodes < wantfreevnodes) { 417 vp = NULL; 418 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 419 /* 420 * XXX: this is only here to be backwards compatible 421 */ 422 vp = NULL; 423 } else { 424 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 425 nvp = TAILQ_NEXT(vp, v_freelist); 426 if (!simple_lock_try(&vp->v_interlock)) 427 continue; 428 if (vp->v_usecount) 429 panic("free vnode isn't"); 430 431 object = vp->v_object; 432 if (object && (object->resident_page_count || object->ref_count)) { 433 printf("object inconsistant state: RPC: %d, RC: %d\n", 434 object->resident_page_count, object->ref_count); 435 /* Don't recycle if it's caching some pages */ 436 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 437 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 438 continue; 439 } else if (LIST_FIRST(&vp->v_cache_src)) { 440 /* Don't recycle if active in the namecache */ 441 simple_unlock(&vp->v_interlock); 442 continue; 443 } else { 444 break; 445 } 446 } 447 } 448 449 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 450 nvp = TAILQ_NEXT(tvp, v_freelist); 451 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 452 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 453 simple_unlock(&tvp->v_interlock); 454 } 455 456 if (vp) { 457 vp->v_flag |= VDOOMED; 458 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 459 freevnodes--; 460 simple_unlock(&vnode_free_list_slock); 461 cache_purge(vp); 462 vp->v_lease = NULL; 463 if (vp->v_type != VBAD) { 464 vgonel(vp, p); 465 } else { 466 simple_unlock(&vp->v_interlock); 467 } 468
| 40 */ 41 42/* 43 * External virtual filesystem routines 44 */ 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/conf.h> 50#include <sys/fcntl.h> 51#include <sys/kernel.h> 52#include <sys/proc.h> 53#include <sys/malloc.h> 54#include <sys/mount.h> 55#include <sys/socket.h> 56#include <sys/vnode.h> 57#include <sys/stat.h> 58#include <sys/buf.h> 59#include <sys/domain.h> 60#include <sys/dirent.h> 61#include <sys/vmmeter.h> 62 63#include <machine/limits.h> 64 65#include <vm/vm.h> 66#include <vm/vm_object.h> 67#include <vm/vm_extern.h> 68#include <vm/pmap.h> 69#include <vm/vm_map.h> 70#include <vm/vm_pager.h> 71#include <vm/vnode_pager.h> 72#include <vm/vm_zone.h> 73#include <sys/sysctl.h> 74 75#include <miscfs/specfs/specdev.h> 76 77static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 78 79static void insmntque __P((struct vnode *vp, struct mount *mp)); 80static void vclean __P((struct vnode *vp, int flags, struct proc *p)); 81static void vfree __P((struct vnode *)); 82static void vgonel __P((struct vnode *vp, struct proc *p)); 83static unsigned long numvnodes; 84SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 85 86enum vtype iftovt_tab[16] = { 87 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 88 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 89}; 90int vttoif_tab[9] = { 91 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 92 S_IFSOCK, S_IFIFO, S_IFMT, 93}; 94 95static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 96struct tobefreelist vnode_tobefree_list; /* vnode free list */ 97 98static u_long wantfreevnodes = 25; 99SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 100static u_long freevnodes = 0; 101SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 102 103int vfs_ioopt = 0; 104#ifdef ENABLE_VFS_IOOPT 105SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 106#endif 107 108struct mntlist mountlist; /* mounted filesystem list */ 109struct simplelock mountlist_slock; 110struct simplelock mntvnode_slock; 111int nfs_mount_type = -1; 112#ifndef NULL_SIMPLELOCKS 113static struct simplelock mntid_slock; 114static struct simplelock vnode_free_list_slock; 115static struct simplelock spechash_slock; 116#endif 117struct nfs_public nfs_pub; /* publicly exported FS */ 118static vm_zone_t vnode_zone; 119 120/* 121 * The workitem queue. 122 */ 123#define SYNCER_MAXDELAY 32 124static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ 125time_t syncdelay = 30; 126int rushjob; /* number of slots to run ASAP */ 127 128static int syncer_delayno = 0; 129static long syncer_mask; 130LIST_HEAD(synclist, vnode); 131static struct synclist *syncer_workitem_pending; 132 133int desiredvnodes; 134SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); 135 136static void vfs_free_addrlist __P((struct netexport *nep)); 137static int vfs_free_netcred __P((struct radix_node *rn, void *w)); 138static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, 139 struct export_args *argp)); 140 141/* 142 * Initialize the vnode management data structures. 143 */ 144void 145vntblinit() 146{ 147 148 desiredvnodes = maxproc + cnt.v_page_count / 4; 149 simple_lock_init(&mntvnode_slock); 150 simple_lock_init(&mntid_slock); 151 simple_lock_init(&spechash_slock); 152 TAILQ_INIT(&vnode_free_list); 153 TAILQ_INIT(&vnode_tobefree_list); 154 simple_lock_init(&vnode_free_list_slock); 155 CIRCLEQ_INIT(&mountlist); 156 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 157 /* 158 * Initialize the filesystem syncer. 159 */ 160 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 161 &syncer_mask); 162 syncer_maxdelay = syncer_mask + 1; 163} 164 165/* 166 * Mark a mount point as busy. Used to synchronize access and to delay 167 * unmounting. Interlock is not released on failure. 168 */ 169int 170vfs_busy(mp, flags, interlkp, p) 171 struct mount *mp; 172 int flags; 173 struct simplelock *interlkp; 174 struct proc *p; 175{ 176 int lkflags; 177 178 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 179 if (flags & LK_NOWAIT) 180 return (ENOENT); 181 mp->mnt_kern_flag |= MNTK_MWAIT; 182 if (interlkp) { 183 simple_unlock(interlkp); 184 } 185 /* 186 * Since all busy locks are shared except the exclusive 187 * lock granted when unmounting, the only place that a 188 * wakeup needs to be done is at the release of the 189 * exclusive lock at the end of dounmount. 190 */ 191 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 192 if (interlkp) { 193 simple_lock(interlkp); 194 } 195 return (ENOENT); 196 } 197 lkflags = LK_SHARED | LK_NOPAUSE; 198 if (interlkp) 199 lkflags |= LK_INTERLOCK; 200 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 201 panic("vfs_busy: unexpected lock failure"); 202 return (0); 203} 204 205/* 206 * Free a busy filesystem. 207 */ 208void 209vfs_unbusy(mp, p) 210 struct mount *mp; 211 struct proc *p; 212{ 213 214 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 215} 216 217/* 218 * Lookup a filesystem type, and if found allocate and initialize 219 * a mount structure for it. 220 * 221 * Devname is usually updated by mount(8) after booting. 222 */ 223int 224vfs_rootmountalloc(fstypename, devname, mpp) 225 char *fstypename; 226 char *devname; 227 struct mount **mpp; 228{ 229 struct proc *p = curproc; /* XXX */ 230 struct vfsconf *vfsp; 231 struct mount *mp; 232 233 if (fstypename == NULL) 234 return (ENODEV); 235 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 236 if (!strcmp(vfsp->vfc_name, fstypename)) 237 break; 238 if (vfsp == NULL) 239 return (ENODEV); 240 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 241 bzero((char *)mp, (u_long)sizeof(struct mount)); 242 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 243 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 244 LIST_INIT(&mp->mnt_vnodelist); 245 mp->mnt_vfc = vfsp; 246 mp->mnt_op = vfsp->vfc_vfsops; 247 mp->mnt_flag = MNT_RDONLY; 248 mp->mnt_vnodecovered = NULLVP; 249 vfsp->vfc_refcount++; 250 mp->mnt_stat.f_type = vfsp->vfc_typenum; 251 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 252 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 253 mp->mnt_stat.f_mntonname[0] = '/'; 254 mp->mnt_stat.f_mntonname[1] = 0; 255 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 256 *mpp = mp; 257 return (0); 258} 259 260/* 261 * Find an appropriate filesystem to use for the root. If a filesystem 262 * has not been preselected, walk through the list of known filesystems 263 * trying those that have mountroot routines, and try them until one 264 * works or we have tried them all. 265 */ 266#ifdef notdef /* XXX JH */ 267int 268lite2_vfs_mountroot() 269{ 270 struct vfsconf *vfsp; 271 extern int (*lite2_mountroot) __P((void)); 272 int error; 273 274 if (lite2_mountroot != NULL) 275 return ((*lite2_mountroot)()); 276 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 277 if (vfsp->vfc_mountroot == NULL) 278 continue; 279 if ((error = (*vfsp->vfc_mountroot)()) == 0) 280 return (0); 281 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 282 } 283 return (ENODEV); 284} 285#endif 286 287/* 288 * Lookup a mount point by filesystem identifier. 289 */ 290struct mount * 291vfs_getvfs(fsid) 292 fsid_t *fsid; 293{ 294 register struct mount *mp; 295 296 simple_lock(&mountlist_slock); 297 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 298 mp = mp->mnt_list.cqe_next) { 299 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 300 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 301 simple_unlock(&mountlist_slock); 302 return (mp); 303 } 304 } 305 simple_unlock(&mountlist_slock); 306 return ((struct mount *) 0); 307} 308 309/* 310 * Get a new unique fsid 311 */ 312void 313vfs_getnewfsid(mp) 314 struct mount *mp; 315{ 316 static u_short xxxfs_mntid; 317 318 fsid_t tfsid; 319 int mtype; 320 321 simple_lock(&mntid_slock); 322 mtype = mp->mnt_vfc->vfc_typenum; 323 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 324 mp->mnt_stat.f_fsid.val[1] = mtype; 325 if (xxxfs_mntid == 0) 326 ++xxxfs_mntid; 327 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 328 tfsid.val[1] = mtype; 329 if (mountlist.cqh_first != (void *)&mountlist) { 330 while (vfs_getvfs(&tfsid)) { 331 tfsid.val[0]++; 332 xxxfs_mntid++; 333 } 334 } 335 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 336 simple_unlock(&mntid_slock); 337} 338 339/* 340 * Set vnode attributes to VNOVAL 341 */ 342void 343vattr_null(vap) 344 register struct vattr *vap; 345{ 346 347 vap->va_type = VNON; 348 vap->va_size = VNOVAL; 349 vap->va_bytes = VNOVAL; 350 vap->va_mode = VNOVAL; 351 vap->va_nlink = VNOVAL; 352 vap->va_uid = VNOVAL; 353 vap->va_gid = VNOVAL; 354 vap->va_fsid = VNOVAL; 355 vap->va_fileid = VNOVAL; 356 vap->va_blocksize = VNOVAL; 357 vap->va_rdev = VNOVAL; 358 vap->va_atime.tv_sec = VNOVAL; 359 vap->va_atime.tv_nsec = VNOVAL; 360 vap->va_mtime.tv_sec = VNOVAL; 361 vap->va_mtime.tv_nsec = VNOVAL; 362 vap->va_ctime.tv_sec = VNOVAL; 363 vap->va_ctime.tv_nsec = VNOVAL; 364 vap->va_flags = VNOVAL; 365 vap->va_gen = VNOVAL; 366 vap->va_vaflags = 0; 367} 368 369/* 370 * Routines having to do with the management of the vnode table. 371 */ 372extern vop_t **dead_vnodeop_p; 373 374/* 375 * Return the next vnode from the free list. 376 */ 377int 378getnewvnode(tag, mp, vops, vpp) 379 enum vtagtype tag; 380 struct mount *mp; 381 vop_t **vops; 382 struct vnode **vpp; 383{ 384 int s; 385 struct proc *p = curproc; /* XXX */ 386 struct vnode *vp, *tvp, *nvp; 387 vm_object_t object; 388 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 389 390 /* 391 * We take the least recently used vnode from the freelist 392 * if we can get it and it has no cached pages, and no 393 * namecache entries are relative to it. 394 * Otherwise we allocate a new vnode 395 */ 396 397 s = splbio(); 398 simple_lock(&vnode_free_list_slock); 399 TAILQ_INIT(&vnode_tmp_list); 400 401 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 402 nvp = TAILQ_NEXT(vp, v_freelist); 403 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 404 if (vp->v_flag & VAGE) { 405 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 406 } else { 407 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 408 } 409 vp->v_flag &= ~(VTBFREE|VAGE); 410 vp->v_flag |= VFREE; 411 if (vp->v_usecount) 412 panic("tobe free vnode isn't"); 413 freevnodes++; 414 } 415 416 if (wantfreevnodes && freevnodes < wantfreevnodes) { 417 vp = NULL; 418 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 419 /* 420 * XXX: this is only here to be backwards compatible 421 */ 422 vp = NULL; 423 } else { 424 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 425 nvp = TAILQ_NEXT(vp, v_freelist); 426 if (!simple_lock_try(&vp->v_interlock)) 427 continue; 428 if (vp->v_usecount) 429 panic("free vnode isn't"); 430 431 object = vp->v_object; 432 if (object && (object->resident_page_count || object->ref_count)) { 433 printf("object inconsistant state: RPC: %d, RC: %d\n", 434 object->resident_page_count, object->ref_count); 435 /* Don't recycle if it's caching some pages */ 436 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 437 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 438 continue; 439 } else if (LIST_FIRST(&vp->v_cache_src)) { 440 /* Don't recycle if active in the namecache */ 441 simple_unlock(&vp->v_interlock); 442 continue; 443 } else { 444 break; 445 } 446 } 447 } 448 449 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 450 nvp = TAILQ_NEXT(tvp, v_freelist); 451 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 452 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 453 simple_unlock(&tvp->v_interlock); 454 } 455 456 if (vp) { 457 vp->v_flag |= VDOOMED; 458 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 459 freevnodes--; 460 simple_unlock(&vnode_free_list_slock); 461 cache_purge(vp); 462 vp->v_lease = NULL; 463 if (vp->v_type != VBAD) { 464 vgonel(vp, p); 465 } else { 466 simple_unlock(&vp->v_interlock); 467 } 468
|
469#ifdef DIAGNOSTIC
| 469#ifdef INVARIANTS
|
470 { 471 int s; 472 473 if (vp->v_data) 474 panic("cleaned vnode isn't"); 475 s = splbio(); 476 if (vp->v_numoutput) 477 panic("Clean vnode has pending I/O's"); 478 splx(s); 479 } 480#endif 481 vp->v_flag = 0; 482 vp->v_lastr = 0; 483 vp->v_lastw = 0; 484 vp->v_lasta = 0; 485 vp->v_cstart = 0; 486 vp->v_clen = 0; 487 vp->v_socket = 0; 488 vp->v_writecount = 0; /* XXX */ 489 vp->v_maxio = 0; 490 } else { 491 simple_unlock(&vnode_free_list_slock); 492 vp = (struct vnode *) zalloc(vnode_zone); 493 bzero((char *) vp, sizeof *vp); 494 simple_lock_init(&vp->v_interlock); 495 vp->v_dd = vp; 496 cache_purge(vp); 497 LIST_INIT(&vp->v_cache_src); 498 TAILQ_INIT(&vp->v_cache_dst); 499 numvnodes++; 500 } 501 502 TAILQ_INIT(&vp->v_cleanblkhd); 503 TAILQ_INIT(&vp->v_dirtyblkhd); 504 vp->v_type = VNON; 505 vp->v_tag = tag; 506 vp->v_op = vops; 507 insmntque(vp, mp); 508 *vpp = vp; 509 vp->v_usecount = 1; 510 vp->v_data = 0; 511 splx(s); 512 513 vfs_object_create(vp, p, p->p_ucred); 514 return (0); 515} 516 517/* 518 * Move a vnode from one mount queue to another. 519 */ 520static void 521insmntque(vp, mp) 522 register struct vnode *vp; 523 register struct mount *mp; 524{ 525 526 simple_lock(&mntvnode_slock); 527 /* 528 * Delete from old mount point vnode list, if on one. 529 */ 530 if (vp->v_mount != NULL) 531 LIST_REMOVE(vp, v_mntvnodes); 532 /* 533 * Insert into list of vnodes for the new mount point, if available. 534 */ 535 if ((vp->v_mount = mp) == NULL) { 536 simple_unlock(&mntvnode_slock); 537 return; 538 } 539 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 540 simple_unlock(&mntvnode_slock); 541} 542 543/* 544 * Update outstanding I/O count and do wakeup if requested. 545 */ 546void 547vwakeup(bp) 548 register struct buf *bp; 549{ 550 register struct vnode *vp; 551 552 bp->b_flags &= ~B_WRITEINPROG; 553 if ((vp = bp->b_vp)) { 554 vp->v_numoutput--; 555 if (vp->v_numoutput < 0) 556 panic("vwakeup: neg numoutput"); 557 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 558 vp->v_flag &= ~VBWAIT; 559 wakeup((caddr_t) &vp->v_numoutput); 560 } 561 } 562} 563 564/* 565 * Flush out and invalidate all buffers associated with a vnode. 566 * Called with the underlying object locked. 567 */ 568int 569vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 570 register struct vnode *vp; 571 int flags; 572 struct ucred *cred; 573 struct proc *p; 574 int slpflag, slptimeo; 575{ 576 register struct buf *bp; 577 struct buf *nbp, *blist; 578 int s, error; 579 vm_object_t object; 580 581 if (flags & V_SAVE) { 582 s = splbio(); 583 while (vp->v_numoutput) { 584 vp->v_flag |= VBWAIT; 585 error = tsleep((caddr_t)&vp->v_numoutput, 586 slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 587 if (error) { 588 splx(s); 589 return (error); 590 } 591 } 592 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 593 splx(s); 594 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 595 return (error); 596 s = splbio(); 597 if (vp->v_numoutput > 0 || 598 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 599 panic("vinvalbuf: dirty bufs"); 600 } 601 splx(s); 602 } 603 s = splbio(); 604 for (;;) { 605 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 606 if (!blist) 607 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 608 if (!blist) 609 break; 610 611 for (bp = blist; bp; bp = nbp) { 612 nbp = TAILQ_NEXT(bp, b_vnbufs); 613 if (bp->b_flags & B_BUSY) { 614 bp->b_flags |= B_WANTED; 615 error = tsleep((caddr_t) bp, 616 slpflag | (PRIBIO + 4), "vinvalbuf", 617 slptimeo); 618 if (error) { 619 splx(s); 620 return (error); 621 } 622 break; 623 } 624 /* 625 * XXX Since there are no node locks for NFS, I 626 * believe there is a slight chance that a delayed 627 * write will occur while sleeping just above, so 628 * check for it. Note that vfs_bio_awrite expects 629 * buffers to reside on a queue, while VOP_BWRITE and 630 * brelse do not. 631 */ 632 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 633 (flags & V_SAVE)) { 634 635 if (bp->b_vp == vp) { 636 if (bp->b_flags & B_CLUSTEROK) { 637 vfs_bio_awrite(bp); 638 } else { 639 bremfree(bp); 640 bp->b_flags |= (B_BUSY | B_ASYNC); 641 VOP_BWRITE(bp); 642 } 643 } else { 644 bremfree(bp); 645 bp->b_flags |= B_BUSY; 646 (void) VOP_BWRITE(bp); 647 } 648 break; 649 } 650 bremfree(bp); 651 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY); 652 bp->b_flags &= ~B_ASYNC; 653 brelse(bp); 654 } 655 } 656 657 while (vp->v_numoutput > 0) { 658 vp->v_flag |= VBWAIT; 659 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 660 } 661 662 splx(s); 663 664 /* 665 * Destroy the copy in the VM cache, too. 666 */ 667 simple_lock(&vp->v_interlock); 668 object = vp->v_object; 669 if (object != NULL) { 670 vm_object_page_remove(object, 0, 0, 671 (flags & V_SAVE) ? TRUE : FALSE); 672 } 673 simple_unlock(&vp->v_interlock); 674 675 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 676 panic("vinvalbuf: flush failed"); 677 return (0); 678} 679 680/* 681 * Truncate a file's buffer and pages to a specified length. This 682 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 683 * sync activity. 684 */ 685int 686vtruncbuf(vp, cred, p, length, blksize) 687 register struct vnode *vp; 688 struct ucred *cred; 689 struct proc *p; 690 off_t length; 691 int blksize; 692{ 693 register struct buf *bp; 694 struct buf *nbp; 695 int s, anyfreed; 696 int trunclbn; 697 698 /* 699 * Round up to the *next* lbn. 700 */ 701 trunclbn = (length + blksize - 1) / blksize; 702 703 s = splbio(); 704restart: 705 anyfreed = 1; 706 for (;anyfreed;) { 707 anyfreed = 0; 708 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 709 nbp = TAILQ_NEXT(bp, b_vnbufs); 710 if (bp->b_lblkno >= trunclbn) { 711 if (bp->b_flags & B_BUSY) { 712 bp->b_flags |= B_WANTED; 713 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 714 goto restart; 715 } else { 716 bremfree(bp); 717 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 718 bp->b_flags &= ~B_ASYNC; 719 brelse(bp); 720 anyfreed = 1; 721 } 722 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)|| 723 (nbp->b_vp != vp) || 724 (nbp->b_flags & B_DELWRI))) { 725 goto restart; 726 } 727 } 728 } 729 730 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 731 nbp = TAILQ_NEXT(bp, b_vnbufs); 732 if (bp->b_lblkno >= trunclbn) { 733 if (bp->b_flags & B_BUSY) { 734 bp->b_flags |= B_WANTED; 735 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 736 goto restart; 737 } else { 738 bremfree(bp); 739 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 740 bp->b_flags &= ~B_ASYNC; 741 brelse(bp); 742 anyfreed = 1; 743 } 744 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)|| 745 (nbp->b_vp != vp) || 746 (nbp->b_flags & B_DELWRI) == 0)) { 747 goto restart; 748 } 749 } 750 } 751 } 752 753 if (length > 0) { 754restartsync: 755 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 756 nbp = TAILQ_NEXT(bp, b_vnbufs); 757 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 758 if (bp->b_flags & B_BUSY) { 759 bp->b_flags |= B_WANTED; 760 tsleep(bp, PRIBIO, "vtrb3", 0); 761 } else { 762 bremfree(bp); 763 bp->b_flags |= B_BUSY; 764 if (bp->b_vp == vp) { 765 bp->b_flags |= B_ASYNC; 766 } else { 767 bp->b_flags &= ~B_ASYNC; 768 } 769 VOP_BWRITE(bp); 770 } 771 goto restartsync; 772 } 773 774 } 775 } 776 777 while (vp->v_numoutput > 0) { 778 vp->v_flag |= VBWAIT; 779 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 780 } 781 782 splx(s); 783 784 vnode_pager_setsize(vp, length); 785 786 return (0); 787} 788 789/* 790 * Associate a buffer with a vnode. 791 */ 792void 793bgetvp(vp, bp) 794 register struct vnode *vp; 795 register struct buf *bp; 796{ 797 int s; 798
| 470 { 471 int s; 472 473 if (vp->v_data) 474 panic("cleaned vnode isn't"); 475 s = splbio(); 476 if (vp->v_numoutput) 477 panic("Clean vnode has pending I/O's"); 478 splx(s); 479 } 480#endif 481 vp->v_flag = 0; 482 vp->v_lastr = 0; 483 vp->v_lastw = 0; 484 vp->v_lasta = 0; 485 vp->v_cstart = 0; 486 vp->v_clen = 0; 487 vp->v_socket = 0; 488 vp->v_writecount = 0; /* XXX */ 489 vp->v_maxio = 0; 490 } else { 491 simple_unlock(&vnode_free_list_slock); 492 vp = (struct vnode *) zalloc(vnode_zone); 493 bzero((char *) vp, sizeof *vp); 494 simple_lock_init(&vp->v_interlock); 495 vp->v_dd = vp; 496 cache_purge(vp); 497 LIST_INIT(&vp->v_cache_src); 498 TAILQ_INIT(&vp->v_cache_dst); 499 numvnodes++; 500 } 501 502 TAILQ_INIT(&vp->v_cleanblkhd); 503 TAILQ_INIT(&vp->v_dirtyblkhd); 504 vp->v_type = VNON; 505 vp->v_tag = tag; 506 vp->v_op = vops; 507 insmntque(vp, mp); 508 *vpp = vp; 509 vp->v_usecount = 1; 510 vp->v_data = 0; 511 splx(s); 512 513 vfs_object_create(vp, p, p->p_ucred); 514 return (0); 515} 516 517/* 518 * Move a vnode from one mount queue to another. 519 */ 520static void 521insmntque(vp, mp) 522 register struct vnode *vp; 523 register struct mount *mp; 524{ 525 526 simple_lock(&mntvnode_slock); 527 /* 528 * Delete from old mount point vnode list, if on one. 529 */ 530 if (vp->v_mount != NULL) 531 LIST_REMOVE(vp, v_mntvnodes); 532 /* 533 * Insert into list of vnodes for the new mount point, if available. 534 */ 535 if ((vp->v_mount = mp) == NULL) { 536 simple_unlock(&mntvnode_slock); 537 return; 538 } 539 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 540 simple_unlock(&mntvnode_slock); 541} 542 543/* 544 * Update outstanding I/O count and do wakeup if requested. 545 */ 546void 547vwakeup(bp) 548 register struct buf *bp; 549{ 550 register struct vnode *vp; 551 552 bp->b_flags &= ~B_WRITEINPROG; 553 if ((vp = bp->b_vp)) { 554 vp->v_numoutput--; 555 if (vp->v_numoutput < 0) 556 panic("vwakeup: neg numoutput"); 557 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 558 vp->v_flag &= ~VBWAIT; 559 wakeup((caddr_t) &vp->v_numoutput); 560 } 561 } 562} 563 564/* 565 * Flush out and invalidate all buffers associated with a vnode. 566 * Called with the underlying object locked. 567 */ 568int 569vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 570 register struct vnode *vp; 571 int flags; 572 struct ucred *cred; 573 struct proc *p; 574 int slpflag, slptimeo; 575{ 576 register struct buf *bp; 577 struct buf *nbp, *blist; 578 int s, error; 579 vm_object_t object; 580 581 if (flags & V_SAVE) { 582 s = splbio(); 583 while (vp->v_numoutput) { 584 vp->v_flag |= VBWAIT; 585 error = tsleep((caddr_t)&vp->v_numoutput, 586 slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo); 587 if (error) { 588 splx(s); 589 return (error); 590 } 591 } 592 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 593 splx(s); 594 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 595 return (error); 596 s = splbio(); 597 if (vp->v_numoutput > 0 || 598 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 599 panic("vinvalbuf: dirty bufs"); 600 } 601 splx(s); 602 } 603 s = splbio(); 604 for (;;) { 605 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 606 if (!blist) 607 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 608 if (!blist) 609 break; 610 611 for (bp = blist; bp; bp = nbp) { 612 nbp = TAILQ_NEXT(bp, b_vnbufs); 613 if (bp->b_flags & B_BUSY) { 614 bp->b_flags |= B_WANTED; 615 error = tsleep((caddr_t) bp, 616 slpflag | (PRIBIO + 4), "vinvalbuf", 617 slptimeo); 618 if (error) { 619 splx(s); 620 return (error); 621 } 622 break; 623 } 624 /* 625 * XXX Since there are no node locks for NFS, I 626 * believe there is a slight chance that a delayed 627 * write will occur while sleeping just above, so 628 * check for it. Note that vfs_bio_awrite expects 629 * buffers to reside on a queue, while VOP_BWRITE and 630 * brelse do not. 631 */ 632 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 633 (flags & V_SAVE)) { 634 635 if (bp->b_vp == vp) { 636 if (bp->b_flags & B_CLUSTEROK) { 637 vfs_bio_awrite(bp); 638 } else { 639 bremfree(bp); 640 bp->b_flags |= (B_BUSY | B_ASYNC); 641 VOP_BWRITE(bp); 642 } 643 } else { 644 bremfree(bp); 645 bp->b_flags |= B_BUSY; 646 (void) VOP_BWRITE(bp); 647 } 648 break; 649 } 650 bremfree(bp); 651 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY); 652 bp->b_flags &= ~B_ASYNC; 653 brelse(bp); 654 } 655 } 656 657 while (vp->v_numoutput > 0) { 658 vp->v_flag |= VBWAIT; 659 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 660 } 661 662 splx(s); 663 664 /* 665 * Destroy the copy in the VM cache, too. 666 */ 667 simple_lock(&vp->v_interlock); 668 object = vp->v_object; 669 if (object != NULL) { 670 vm_object_page_remove(object, 0, 0, 671 (flags & V_SAVE) ? TRUE : FALSE); 672 } 673 simple_unlock(&vp->v_interlock); 674 675 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd)) 676 panic("vinvalbuf: flush failed"); 677 return (0); 678} 679 680/* 681 * Truncate a file's buffer and pages to a specified length. This 682 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 683 * sync activity. 684 */ 685int 686vtruncbuf(vp, cred, p, length, blksize) 687 register struct vnode *vp; 688 struct ucred *cred; 689 struct proc *p; 690 off_t length; 691 int blksize; 692{ 693 register struct buf *bp; 694 struct buf *nbp; 695 int s, anyfreed; 696 int trunclbn; 697 698 /* 699 * Round up to the *next* lbn. 700 */ 701 trunclbn = (length + blksize - 1) / blksize; 702 703 s = splbio(); 704restart: 705 anyfreed = 1; 706 for (;anyfreed;) { 707 anyfreed = 0; 708 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 709 nbp = TAILQ_NEXT(bp, b_vnbufs); 710 if (bp->b_lblkno >= trunclbn) { 711 if (bp->b_flags & B_BUSY) { 712 bp->b_flags |= B_WANTED; 713 tsleep(bp, PRIBIO + 4, "vtrb1", 0); 714 goto restart; 715 } else { 716 bremfree(bp); 717 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 718 bp->b_flags &= ~B_ASYNC; 719 brelse(bp); 720 anyfreed = 1; 721 } 722 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)|| 723 (nbp->b_vp != vp) || 724 (nbp->b_flags & B_DELWRI))) { 725 goto restart; 726 } 727 } 728 } 729 730 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 731 nbp = TAILQ_NEXT(bp, b_vnbufs); 732 if (bp->b_lblkno >= trunclbn) { 733 if (bp->b_flags & B_BUSY) { 734 bp->b_flags |= B_WANTED; 735 tsleep(bp, PRIBIO + 4, "vtrb2", 0); 736 goto restart; 737 } else { 738 bremfree(bp); 739 bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF); 740 bp->b_flags &= ~B_ASYNC; 741 brelse(bp); 742 anyfreed = 1; 743 } 744 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)|| 745 (nbp->b_vp != vp) || 746 (nbp->b_flags & B_DELWRI) == 0)) { 747 goto restart; 748 } 749 } 750 } 751 } 752 753 if (length > 0) { 754restartsync: 755 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 756 nbp = TAILQ_NEXT(bp, b_vnbufs); 757 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 758 if (bp->b_flags & B_BUSY) { 759 bp->b_flags |= B_WANTED; 760 tsleep(bp, PRIBIO, "vtrb3", 0); 761 } else { 762 bremfree(bp); 763 bp->b_flags |= B_BUSY; 764 if (bp->b_vp == vp) { 765 bp->b_flags |= B_ASYNC; 766 } else { 767 bp->b_flags &= ~B_ASYNC; 768 } 769 VOP_BWRITE(bp); 770 } 771 goto restartsync; 772 } 773 774 } 775 } 776 777 while (vp->v_numoutput > 0) { 778 vp->v_flag |= VBWAIT; 779 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 780 } 781 782 splx(s); 783 784 vnode_pager_setsize(vp, length); 785 786 return (0); 787} 788 789/* 790 * Associate a buffer with a vnode. 791 */ 792void 793bgetvp(vp, bp) 794 register struct vnode *vp; 795 register struct buf *bp; 796{ 797 int s; 798
|
799#if defined(DIAGNOSTIC) 800 if (bp->b_vp) 801 panic("bgetvp: not free"); 802#endif
| 799 KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
|
803 vhold(vp); 804 bp->b_vp = vp; 805 if (vp->v_type == VBLK || vp->v_type == VCHR) 806 bp->b_dev = vp->v_rdev; 807 else 808 bp->b_dev = NODEV; 809 /* 810 * Insert onto list for new vnode. 811 */ 812 s = splbio(); 813 bp->b_xflags |= B_VNCLEAN; 814 bp->b_xflags &= ~B_VNDIRTY; 815 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 816 splx(s); 817} 818 819/* 820 * Disassociate a buffer from a vnode. 821 */ 822void 823brelvp(bp) 824 register struct buf *bp; 825{ 826 struct vnode *vp; 827 struct buflists *listheadp; 828 int s; 829
| 800 vhold(vp); 801 bp->b_vp = vp; 802 if (vp->v_type == VBLK || vp->v_type == VCHR) 803 bp->b_dev = vp->v_rdev; 804 else 805 bp->b_dev = NODEV; 806 /* 807 * Insert onto list for new vnode. 808 */ 809 s = splbio(); 810 bp->b_xflags |= B_VNCLEAN; 811 bp->b_xflags &= ~B_VNDIRTY; 812 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 813 splx(s); 814} 815 816/* 817 * Disassociate a buffer from a vnode. 818 */ 819void 820brelvp(bp) 821 register struct buf *bp; 822{ 823 struct vnode *vp; 824 struct buflists *listheadp; 825 int s; 826
|
830#if defined(DIAGNOSTIC) 831 if (bp->b_vp == (struct vnode *) 0) 832 panic("brelvp: NULL"); 833#endif
| 827 KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
|
834 835 /* 836 * Delete from old vnode list, if on one. 837 */ 838 vp = bp->b_vp; 839 s = splbio(); 840 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 841 if (bp->b_xflags & B_VNDIRTY) 842 listheadp = &vp->v_dirtyblkhd; 843 else 844 listheadp = &vp->v_cleanblkhd; 845 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 846 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 847 } 848 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 849 vp->v_flag &= ~VONWORKLST; 850 LIST_REMOVE(vp, v_synclist); 851 } 852 splx(s); 853 bp->b_vp = (struct vnode *) 0; 854 vdrop(vp); 855} 856 857/* 858 * The workitem queue. 859 * 860 * It is useful to delay writes of file data and filesystem metadata 861 * for tens of seconds so that quickly created and deleted files need 862 * not waste disk bandwidth being created and removed. To realize this, 863 * we append vnodes to a "workitem" queue. When running with a soft 864 * updates implementation, most pending metadata dependencies should 865 * not wait for more than a few seconds. Thus, mounted on block devices 866 * are delayed only about a half the time that file data is delayed. 867 * Similarly, directory updates are more critical, so are only delayed 868 * about a third the time that file data is delayed. Thus, there are 869 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 870 * one each second (driven off the filesystem syner process). The 871 * syncer_delayno variable indicates the next queue that is to be processed. 872 * Items that need to be processed soon are placed in this queue: 873 * 874 * syncer_workitem_pending[syncer_delayno] 875 * 876 * A delay of fifteen seconds is done by placing the request fifteen 877 * entries later in the queue: 878 * 879 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 880 * 881 */ 882 883/* 884 * Add an item to the syncer work queue. 885 */ 886void 887vn_syncer_add_to_worklist(vp, delay) 888 struct vnode *vp; 889 int delay; 890{ 891 int s, slot; 892 893 s = splbio(); 894 895 if (vp->v_flag & VONWORKLST) { 896 LIST_REMOVE(vp, v_synclist); 897 } 898 899 if (delay > syncer_maxdelay - 2) 900 delay = syncer_maxdelay - 2; 901 slot = (syncer_delayno + delay) & syncer_mask; 902 903 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 904 vp->v_flag |= VONWORKLST; 905 splx(s); 906} 907 908static void sched_sync __P((void)); 909static struct proc *updateproc; 910static struct kproc_desc up_kp = { 911 "syncer", 912 sched_sync, 913 &updateproc 914}; 915SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 916 917/* 918 * System filesystem synchronizer daemon. 919 */ 920void 921sched_sync(void) 922{ 923 struct synclist *slp; 924 struct vnode *vp; 925 long starttime; 926 int s; 927 struct proc *p = updateproc; 928 929 for (;;) { 930 starttime = time_second; 931 932 /* 933 * Push files whose dirty time has expired. 934 */ 935 s = splbio(); 936 slp = &syncer_workitem_pending[syncer_delayno]; 937 syncer_delayno += 1; 938 if (syncer_delayno == syncer_maxdelay) 939 syncer_delayno = 0; 940 splx(s); 941 942 while ((vp = LIST_FIRST(slp)) != NULL) { 943 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 944 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 945 VOP_UNLOCK(vp, 0, p); 946 if (LIST_FIRST(slp) == vp) { 947 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 948 vp->v_type != VBLK) 949 panic("sched_sync: fsync failed"); 950 /* 951 * Move ourselves to the back of the sync list. 952 */ 953 LIST_REMOVE(vp, v_synclist); 954 vn_syncer_add_to_worklist(vp, syncdelay); 955 } 956 } 957 958 /* 959 * Do soft update processing. 960 */ 961 if (bioops.io_sync) 962 (*bioops.io_sync)(NULL); 963 964 /* 965 * The variable rushjob allows the kernel to speed up the 966 * processing of the filesystem syncer process. A rushjob 967 * value of N tells the filesystem syncer to process the next 968 * N seconds worth of work on its queue ASAP. Currently rushjob 969 * is used by the soft update code to speed up the filesystem 970 * syncer process when the incore state is getting so far 971 * ahead of the disk that the kernel memory pool is being 972 * threatened with exhaustion. 973 */ 974 if (rushjob > 0) { 975 rushjob -= 1; 976 continue; 977 } 978 /* 979 * If it has taken us less than a second to process the 980 * current work, then wait. Otherwise start right over 981 * again. We can still lose time if any single round 982 * takes more than two seconds, but it does not really 983 * matter as we are just trying to generally pace the 984 * filesystem activity. 985 */ 986 if (time_second == starttime) 987 tsleep(&lbolt, PPAUSE, "syncer", 0); 988 } 989} 990 991/* 992 * Associate a p-buffer with a vnode. 993 */ 994void 995pbgetvp(vp, bp) 996 register struct vnode *vp; 997 register struct buf *bp; 998{
| 828 829 /* 830 * Delete from old vnode list, if on one. 831 */ 832 vp = bp->b_vp; 833 s = splbio(); 834 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 835 if (bp->b_xflags & B_VNDIRTY) 836 listheadp = &vp->v_dirtyblkhd; 837 else 838 listheadp = &vp->v_cleanblkhd; 839 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 840 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 841 } 842 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 843 vp->v_flag &= ~VONWORKLST; 844 LIST_REMOVE(vp, v_synclist); 845 } 846 splx(s); 847 bp->b_vp = (struct vnode *) 0; 848 vdrop(vp); 849} 850 851/* 852 * The workitem queue. 853 * 854 * It is useful to delay writes of file data and filesystem metadata 855 * for tens of seconds so that quickly created and deleted files need 856 * not waste disk bandwidth being created and removed. To realize this, 857 * we append vnodes to a "workitem" queue. When running with a soft 858 * updates implementation, most pending metadata dependencies should 859 * not wait for more than a few seconds. Thus, mounted on block devices 860 * are delayed only about a half the time that file data is delayed. 861 * Similarly, directory updates are more critical, so are only delayed 862 * about a third the time that file data is delayed. Thus, there are 863 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 864 * one each second (driven off the filesystem syner process). The 865 * syncer_delayno variable indicates the next queue that is to be processed. 866 * Items that need to be processed soon are placed in this queue: 867 * 868 * syncer_workitem_pending[syncer_delayno] 869 * 870 * A delay of fifteen seconds is done by placing the request fifteen 871 * entries later in the queue: 872 * 873 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 874 * 875 */ 876 877/* 878 * Add an item to the syncer work queue. 879 */ 880void 881vn_syncer_add_to_worklist(vp, delay) 882 struct vnode *vp; 883 int delay; 884{ 885 int s, slot; 886 887 s = splbio(); 888 889 if (vp->v_flag & VONWORKLST) { 890 LIST_REMOVE(vp, v_synclist); 891 } 892 893 if (delay > syncer_maxdelay - 2) 894 delay = syncer_maxdelay - 2; 895 slot = (syncer_delayno + delay) & syncer_mask; 896 897 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 898 vp->v_flag |= VONWORKLST; 899 splx(s); 900} 901 902static void sched_sync __P((void)); 903static struct proc *updateproc; 904static struct kproc_desc up_kp = { 905 "syncer", 906 sched_sync, 907 &updateproc 908}; 909SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 910 911/* 912 * System filesystem synchronizer daemon. 913 */ 914void 915sched_sync(void) 916{ 917 struct synclist *slp; 918 struct vnode *vp; 919 long starttime; 920 int s; 921 struct proc *p = updateproc; 922 923 for (;;) { 924 starttime = time_second; 925 926 /* 927 * Push files whose dirty time has expired. 928 */ 929 s = splbio(); 930 slp = &syncer_workitem_pending[syncer_delayno]; 931 syncer_delayno += 1; 932 if (syncer_delayno == syncer_maxdelay) 933 syncer_delayno = 0; 934 splx(s); 935 936 while ((vp = LIST_FIRST(slp)) != NULL) { 937 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 938 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 939 VOP_UNLOCK(vp, 0, p); 940 if (LIST_FIRST(slp) == vp) { 941 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 942 vp->v_type != VBLK) 943 panic("sched_sync: fsync failed"); 944 /* 945 * Move ourselves to the back of the sync list. 946 */ 947 LIST_REMOVE(vp, v_synclist); 948 vn_syncer_add_to_worklist(vp, syncdelay); 949 } 950 } 951 952 /* 953 * Do soft update processing. 954 */ 955 if (bioops.io_sync) 956 (*bioops.io_sync)(NULL); 957 958 /* 959 * The variable rushjob allows the kernel to speed up the 960 * processing of the filesystem syncer process. A rushjob 961 * value of N tells the filesystem syncer to process the next 962 * N seconds worth of work on its queue ASAP. Currently rushjob 963 * is used by the soft update code to speed up the filesystem 964 * syncer process when the incore state is getting so far 965 * ahead of the disk that the kernel memory pool is being 966 * threatened with exhaustion. 967 */ 968 if (rushjob > 0) { 969 rushjob -= 1; 970 continue; 971 } 972 /* 973 * If it has taken us less than a second to process the 974 * current work, then wait. Otherwise start right over 975 * again. We can still lose time if any single round 976 * takes more than two seconds, but it does not really 977 * matter as we are just trying to generally pace the 978 * filesystem activity. 979 */ 980 if (time_second == starttime) 981 tsleep(&lbolt, PPAUSE, "syncer", 0); 982 } 983} 984 985/* 986 * Associate a p-buffer with a vnode. 987 */ 988void 989pbgetvp(vp, bp) 990 register struct vnode *vp; 991 register struct buf *bp; 992{
|
999#if defined(DIAGNOSTIC) 1000 if (bp->b_vp) 1001 panic("pbgetvp: not free"); 1002#endif
| 993 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 994
|
1003 bp->b_vp = vp; 1004 if (vp->v_type == VBLK || vp->v_type == VCHR) 1005 bp->b_dev = vp->v_rdev; 1006 else 1007 bp->b_dev = NODEV; 1008} 1009 1010/* 1011 * Disassociate a p-buffer from a vnode. 1012 */ 1013void 1014pbrelvp(bp) 1015 register struct buf *bp; 1016{ 1017
| 995 bp->b_vp = vp; 996 if (vp->v_type == VBLK || vp->v_type == VCHR) 997 bp->b_dev = vp->v_rdev; 998 else 999 bp->b_dev = NODEV; 1000} 1001 1002/* 1003 * Disassociate a p-buffer from a vnode. 1004 */ 1005void 1006pbrelvp(bp) 1007 register struct buf *bp; 1008{ 1009
|
1018#if defined(DIAGNOSTIC) 1019 if (bp->b_vp == (struct vnode *) 0) 1020 panic("pbrelvp: NULL"); 1021#endif
| 1010 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
|
1022 1023 bp->b_vp = (struct vnode *) 0; 1024} 1025 1026/* 1027 * Reassign a buffer from one vnode to another. 1028 * Used to assign file specific control information 1029 * (indirect blocks) to the vnode to which they belong. 1030 */ 1031void 1032reassignbuf(bp, newvp) 1033 register struct buf *bp; 1034 register struct vnode *newvp; 1035{ 1036 struct buflists *listheadp; 1037 struct vnode *oldvp; 1038 int delay; 1039 int s; 1040 1041 if (newvp == NULL) { 1042 printf("reassignbuf: NULL"); 1043 return; 1044 } 1045 1046 s = splbio(); 1047 /* 1048 * Delete from old vnode list, if on one. 1049 */ 1050 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 1051 oldvp = bp->b_vp; 1052 if (bp->b_xflags & B_VNDIRTY) 1053 listheadp = &oldvp->v_dirtyblkhd; 1054 else 1055 listheadp = &oldvp->v_cleanblkhd; 1056 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1057 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 1058 vdrop(oldvp); 1059 } 1060 /* 1061 * If dirty, put on list of dirty buffers; otherwise insert onto list 1062 * of clean buffers. 1063 */ 1064 if (bp->b_flags & B_DELWRI) { 1065 struct buf *tbp; 1066 1067 listheadp = &newvp->v_dirtyblkhd; 1068 if ((newvp->v_flag & VONWORKLST) == 0) { 1069 switch (newvp->v_type) { 1070 case VDIR: 1071 delay = syncdelay / 3; 1072 break; 1073 case VBLK: 1074 if (newvp->v_specmountpoint != NULL) { 1075 delay = syncdelay / 2; 1076 break; 1077 } 1078 /* fall through */ 1079 default: 1080 delay = syncdelay; 1081 } 1082 vn_syncer_add_to_worklist(newvp, delay); 1083 } 1084 bp->b_xflags |= B_VNDIRTY; 1085 tbp = TAILQ_FIRST(listheadp); 1086 if (tbp == NULL || 1087 (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) { 1088 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1089 } else { 1090 if (bp->b_lblkno >= 0) { 1091 struct buf *ttbp; 1092 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1093 (ttbp->b_lblkno < bp->b_lblkno)) { 1094 tbp = ttbp; 1095 } 1096 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1097 } else { 1098 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1099 } 1100 } 1101 } else { 1102 bp->b_xflags |= B_VNCLEAN; 1103 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1104 if ((newvp->v_flag & VONWORKLST) && 1105 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1106 newvp->v_flag &= ~VONWORKLST; 1107 LIST_REMOVE(newvp, v_synclist); 1108 } 1109 } 1110 bp->b_vp = newvp; 1111 vhold(bp->b_vp); 1112 splx(s); 1113} 1114 1115/* 1116 * Create a vnode for a block device. 1117 * Used for mounting the root file system. 1118 */ 1119int 1120bdevvp(dev, vpp) 1121 dev_t dev; 1122 struct vnode **vpp; 1123{ 1124 register struct vnode *vp; 1125 struct vnode *nvp; 1126 int error; 1127 1128 /* XXX 255 is for mfs. */ 1129 if (dev == NODEV || (major(dev) != 255 && (major(dev) >= nblkdev || 1130 bdevsw[major(dev)] == NULL))) { 1131 *vpp = NULLVP; 1132 return (ENXIO); 1133 } 1134 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1135 if (error) { 1136 *vpp = NULLVP; 1137 return (error); 1138 } 1139 vp = nvp; 1140 vp->v_type = VBLK; 1141 if ((nvp = checkalias(vp, dev, (struct mount *)0)) != NULL) { 1142 vput(vp); 1143 vp = nvp; 1144 } 1145 *vpp = vp; 1146 return (0); 1147} 1148 1149/* 1150 * Check to see if the new vnode represents a special device 1151 * for which we already have a vnode (either because of 1152 * bdevvp() or because of a different vnode representing 1153 * the same block device). If such an alias exists, deallocate 1154 * the existing contents and return the aliased vnode. The 1155 * caller is responsible for filling it with its new contents. 1156 */ 1157struct vnode * 1158checkalias(nvp, nvp_rdev, mp) 1159 register struct vnode *nvp; 1160 dev_t nvp_rdev; 1161 struct mount *mp; 1162{ 1163 struct proc *p = curproc; /* XXX */ 1164 struct vnode *vp; 1165 struct vnode **vpp; 1166 1167 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1168 return (NULLVP); 1169 1170 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1171loop: 1172 simple_lock(&spechash_slock); 1173 for (vp = *vpp; vp; vp = vp->v_specnext) { 1174 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1175 continue; 1176 /* 1177 * Alias, but not in use, so flush it out. 1178 * Only alias active device nodes. 1179 * Not sure why we don't re-use this like we do below. 1180 */ 1181 simple_lock(&vp->v_interlock); 1182 if (vp->v_usecount == 0) { 1183 simple_unlock(&spechash_slock); 1184 vgonel(vp, p); 1185 goto loop; 1186 } 1187 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 1188 /* 1189 * It dissappeared, and we may have slept. 1190 * Restart from the beginning 1191 */ 1192 simple_unlock(&spechash_slock); 1193 goto loop; 1194 } 1195 break; 1196 } 1197 /* 1198 * It would be a lot clearer what is going on here if 1199 * this had been expressed as: 1200 * if ( vp && (vp->v_tag == VT_NULL)) 1201 * and the clauses had been swapped. 1202 */ 1203 if (vp == NULL || vp->v_tag != VT_NON) { 1204 /* 1205 * Put the new vnode into the hash chain. 1206 * and if there was an alias, connect them. 1207 */ 1208 MALLOC(nvp->v_specinfo, struct specinfo *, 1209 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1210 nvp->v_rdev = nvp_rdev; 1211 nvp->v_hashchain = vpp; 1212 nvp->v_specnext = *vpp; 1213 nvp->v_specmountpoint = NULL; 1214 simple_unlock(&spechash_slock); 1215 *vpp = nvp; 1216 if (vp != NULLVP) { 1217 nvp->v_flag |= VALIASED; 1218 vp->v_flag |= VALIASED; 1219 vput(vp); 1220 } 1221 return (NULLVP); 1222 } 1223 /* 1224 * if ( vp && (vp->v_tag == VT_NULL)) 1225 * We have a vnode alias, but it is a trashed. 1226 * Make it look like it's newley allocated. (by getnewvnode()) 1227 * The caller should use this instead. 1228 */ 1229 simple_unlock(&spechash_slock); 1230 VOP_UNLOCK(vp, 0, p); 1231 simple_lock(&vp->v_interlock); 1232 vclean(vp, 0, p); 1233 vp->v_op = nvp->v_op; 1234 vp->v_tag = nvp->v_tag; 1235 nvp->v_type = VNON; 1236 insmntque(vp, mp); 1237 return (vp); 1238} 1239 1240/* 1241 * Grab a particular vnode from the free list, increment its 1242 * reference count and lock it. The vnode lock bit is set the 1243 * vnode is being eliminated in vgone. The process is awakened 1244 * when the transition is completed, and an error returned to 1245 * indicate that the vnode is no longer usable (possibly having 1246 * been changed to a new file system type). 1247 */ 1248int 1249vget(vp, flags, p) 1250 register struct vnode *vp; 1251 int flags; 1252 struct proc *p; 1253{ 1254 int error; 1255 1256 /* 1257 * If the vnode is in the process of being cleaned out for 1258 * another use, we wait for the cleaning to finish and then 1259 * return failure. Cleaning is determined by checking that 1260 * the VXLOCK flag is set. 1261 */ 1262 if ((flags & LK_INTERLOCK) == 0) { 1263 simple_lock(&vp->v_interlock); 1264 } 1265 if (vp->v_flag & VXLOCK) { 1266 vp->v_flag |= VXWANT; 1267 simple_unlock(&vp->v_interlock); 1268 tsleep((caddr_t)vp, PINOD, "vget", 0); 1269 return (ENOENT); 1270 } 1271 1272 vp->v_usecount++; 1273 1274 if (VSHOULDBUSY(vp)) 1275 vbusy(vp); 1276 if (flags & LK_TYPE_MASK) { 1277 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1278 /* 1279 * must expand vrele here because we do not want 1280 * to call VOP_INACTIVE if the reference count 1281 * drops back to zero since it was never really 1282 * active. We must remove it from the free list 1283 * before sleeping so that multiple processes do 1284 * not try to recycle it. 1285 */ 1286 simple_lock(&vp->v_interlock); 1287 vp->v_usecount--; 1288 if (VSHOULDFREE(vp)) 1289 vfree(vp); 1290 simple_unlock(&vp->v_interlock); 1291 } 1292 return (error); 1293 } 1294 simple_unlock(&vp->v_interlock); 1295 return (0); 1296} 1297 1298void 1299vref(struct vnode *vp) 1300{ 1301 simple_lock(&vp->v_interlock); 1302 vp->v_usecount++; 1303 simple_unlock(&vp->v_interlock); 1304} 1305 1306/* 1307 * Vnode put/release. 1308 * If count drops to zero, call inactive routine and return to freelist. 1309 */ 1310void 1311vrele(vp) 1312 struct vnode *vp; 1313{ 1314 struct proc *p = curproc; /* XXX */ 1315
| 1011 1012 bp->b_vp = (struct vnode *) 0; 1013} 1014 1015/* 1016 * Reassign a buffer from one vnode to another. 1017 * Used to assign file specific control information 1018 * (indirect blocks) to the vnode to which they belong. 1019 */ 1020void 1021reassignbuf(bp, newvp) 1022 register struct buf *bp; 1023 register struct vnode *newvp; 1024{ 1025 struct buflists *listheadp; 1026 struct vnode *oldvp; 1027 int delay; 1028 int s; 1029 1030 if (newvp == NULL) { 1031 printf("reassignbuf: NULL"); 1032 return; 1033 } 1034 1035 s = splbio(); 1036 /* 1037 * Delete from old vnode list, if on one. 1038 */ 1039 if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) { 1040 oldvp = bp->b_vp; 1041 if (bp->b_xflags & B_VNDIRTY) 1042 listheadp = &oldvp->v_dirtyblkhd; 1043 else 1044 listheadp = &oldvp->v_cleanblkhd; 1045 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1046 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN); 1047 vdrop(oldvp); 1048 } 1049 /* 1050 * If dirty, put on list of dirty buffers; otherwise insert onto list 1051 * of clean buffers. 1052 */ 1053 if (bp->b_flags & B_DELWRI) { 1054 struct buf *tbp; 1055 1056 listheadp = &newvp->v_dirtyblkhd; 1057 if ((newvp->v_flag & VONWORKLST) == 0) { 1058 switch (newvp->v_type) { 1059 case VDIR: 1060 delay = syncdelay / 3; 1061 break; 1062 case VBLK: 1063 if (newvp->v_specmountpoint != NULL) { 1064 delay = syncdelay / 2; 1065 break; 1066 } 1067 /* fall through */ 1068 default: 1069 delay = syncdelay; 1070 } 1071 vn_syncer_add_to_worklist(newvp, delay); 1072 } 1073 bp->b_xflags |= B_VNDIRTY; 1074 tbp = TAILQ_FIRST(listheadp); 1075 if (tbp == NULL || 1076 (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) { 1077 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1078 } else { 1079 if (bp->b_lblkno >= 0) { 1080 struct buf *ttbp; 1081 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1082 (ttbp->b_lblkno < bp->b_lblkno)) { 1083 tbp = ttbp; 1084 } 1085 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1086 } else { 1087 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1088 } 1089 } 1090 } else { 1091 bp->b_xflags |= B_VNCLEAN; 1092 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1093 if ((newvp->v_flag & VONWORKLST) && 1094 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1095 newvp->v_flag &= ~VONWORKLST; 1096 LIST_REMOVE(newvp, v_synclist); 1097 } 1098 } 1099 bp->b_vp = newvp; 1100 vhold(bp->b_vp); 1101 splx(s); 1102} 1103 1104/* 1105 * Create a vnode for a block device. 1106 * Used for mounting the root file system. 1107 */ 1108int 1109bdevvp(dev, vpp) 1110 dev_t dev; 1111 struct vnode **vpp; 1112{ 1113 register struct vnode *vp; 1114 struct vnode *nvp; 1115 int error; 1116 1117 /* XXX 255 is for mfs. */ 1118 if (dev == NODEV || (major(dev) != 255 && (major(dev) >= nblkdev || 1119 bdevsw[major(dev)] == NULL))) { 1120 *vpp = NULLVP; 1121 return (ENXIO); 1122 } 1123 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 1124 if (error) { 1125 *vpp = NULLVP; 1126 return (error); 1127 } 1128 vp = nvp; 1129 vp->v_type = VBLK; 1130 if ((nvp = checkalias(vp, dev, (struct mount *)0)) != NULL) { 1131 vput(vp); 1132 vp = nvp; 1133 } 1134 *vpp = vp; 1135 return (0); 1136} 1137 1138/* 1139 * Check to see if the new vnode represents a special device 1140 * for which we already have a vnode (either because of 1141 * bdevvp() or because of a different vnode representing 1142 * the same block device). If such an alias exists, deallocate 1143 * the existing contents and return the aliased vnode. The 1144 * caller is responsible for filling it with its new contents. 1145 */ 1146struct vnode * 1147checkalias(nvp, nvp_rdev, mp) 1148 register struct vnode *nvp; 1149 dev_t nvp_rdev; 1150 struct mount *mp; 1151{ 1152 struct proc *p = curproc; /* XXX */ 1153 struct vnode *vp; 1154 struct vnode **vpp; 1155 1156 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1157 return (NULLVP); 1158 1159 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1160loop: 1161 simple_lock(&spechash_slock); 1162 for (vp = *vpp; vp; vp = vp->v_specnext) { 1163 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1164 continue; 1165 /* 1166 * Alias, but not in use, so flush it out. 1167 * Only alias active device nodes. 1168 * Not sure why we don't re-use this like we do below. 1169 */ 1170 simple_lock(&vp->v_interlock); 1171 if (vp->v_usecount == 0) { 1172 simple_unlock(&spechash_slock); 1173 vgonel(vp, p); 1174 goto loop; 1175 } 1176 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 1177 /* 1178 * It dissappeared, and we may have slept. 1179 * Restart from the beginning 1180 */ 1181 simple_unlock(&spechash_slock); 1182 goto loop; 1183 } 1184 break; 1185 } 1186 /* 1187 * It would be a lot clearer what is going on here if 1188 * this had been expressed as: 1189 * if ( vp && (vp->v_tag == VT_NULL)) 1190 * and the clauses had been swapped. 1191 */ 1192 if (vp == NULL || vp->v_tag != VT_NON) { 1193 /* 1194 * Put the new vnode into the hash chain. 1195 * and if there was an alias, connect them. 1196 */ 1197 MALLOC(nvp->v_specinfo, struct specinfo *, 1198 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1199 nvp->v_rdev = nvp_rdev; 1200 nvp->v_hashchain = vpp; 1201 nvp->v_specnext = *vpp; 1202 nvp->v_specmountpoint = NULL; 1203 simple_unlock(&spechash_slock); 1204 *vpp = nvp; 1205 if (vp != NULLVP) { 1206 nvp->v_flag |= VALIASED; 1207 vp->v_flag |= VALIASED; 1208 vput(vp); 1209 } 1210 return (NULLVP); 1211 } 1212 /* 1213 * if ( vp && (vp->v_tag == VT_NULL)) 1214 * We have a vnode alias, but it is a trashed. 1215 * Make it look like it's newley allocated. (by getnewvnode()) 1216 * The caller should use this instead. 1217 */ 1218 simple_unlock(&spechash_slock); 1219 VOP_UNLOCK(vp, 0, p); 1220 simple_lock(&vp->v_interlock); 1221 vclean(vp, 0, p); 1222 vp->v_op = nvp->v_op; 1223 vp->v_tag = nvp->v_tag; 1224 nvp->v_type = VNON; 1225 insmntque(vp, mp); 1226 return (vp); 1227} 1228 1229/* 1230 * Grab a particular vnode from the free list, increment its 1231 * reference count and lock it. The vnode lock bit is set the 1232 * vnode is being eliminated in vgone. The process is awakened 1233 * when the transition is completed, and an error returned to 1234 * indicate that the vnode is no longer usable (possibly having 1235 * been changed to a new file system type). 1236 */ 1237int 1238vget(vp, flags, p) 1239 register struct vnode *vp; 1240 int flags; 1241 struct proc *p; 1242{ 1243 int error; 1244 1245 /* 1246 * If the vnode is in the process of being cleaned out for 1247 * another use, we wait for the cleaning to finish and then 1248 * return failure. Cleaning is determined by checking that 1249 * the VXLOCK flag is set. 1250 */ 1251 if ((flags & LK_INTERLOCK) == 0) { 1252 simple_lock(&vp->v_interlock); 1253 } 1254 if (vp->v_flag & VXLOCK) { 1255 vp->v_flag |= VXWANT; 1256 simple_unlock(&vp->v_interlock); 1257 tsleep((caddr_t)vp, PINOD, "vget", 0); 1258 return (ENOENT); 1259 } 1260 1261 vp->v_usecount++; 1262 1263 if (VSHOULDBUSY(vp)) 1264 vbusy(vp); 1265 if (flags & LK_TYPE_MASK) { 1266 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) { 1267 /* 1268 * must expand vrele here because we do not want 1269 * to call VOP_INACTIVE if the reference count 1270 * drops back to zero since it was never really 1271 * active. We must remove it from the free list 1272 * before sleeping so that multiple processes do 1273 * not try to recycle it. 1274 */ 1275 simple_lock(&vp->v_interlock); 1276 vp->v_usecount--; 1277 if (VSHOULDFREE(vp)) 1278 vfree(vp); 1279 simple_unlock(&vp->v_interlock); 1280 } 1281 return (error); 1282 } 1283 simple_unlock(&vp->v_interlock); 1284 return (0); 1285} 1286 1287void 1288vref(struct vnode *vp) 1289{ 1290 simple_lock(&vp->v_interlock); 1291 vp->v_usecount++; 1292 simple_unlock(&vp->v_interlock); 1293} 1294 1295/* 1296 * Vnode put/release. 1297 * If count drops to zero, call inactive routine and return to freelist. 1298 */ 1299void 1300vrele(vp) 1301 struct vnode *vp; 1302{ 1303 struct proc *p = curproc; /* XXX */ 1304
|
1316#ifdef DIAGNOSTIC 1317 if (vp == NULL) 1318 panic("vrele: null vp"); 1319#endif
| 1305 KASSERT(vp, ("vrele: null vp")); 1306
|
1320 simple_lock(&vp->v_interlock); 1321 1322 if (vp->v_usecount > 1) { 1323 1324 vp->v_usecount--; 1325 simple_unlock(&vp->v_interlock); 1326 1327 return; 1328 } 1329 1330 if (vp->v_usecount == 1) { 1331 1332 vp->v_usecount--; 1333 if (VSHOULDFREE(vp)) 1334 vfree(vp); 1335 /* 1336 * If we are doing a vput, the node is already locked, and we must 1337 * call VOP_INACTIVE with the node locked. So, in the case of 1338 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1339 */ 1340 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1341 VOP_INACTIVE(vp, p); 1342 } 1343 1344 } else { 1345#ifdef DIAGNOSTIC 1346 vprint("vrele: negative ref count", vp); 1347 simple_unlock(&vp->v_interlock); 1348#endif 1349 panic("vrele: negative ref cnt"); 1350 } 1351} 1352 1353void 1354vput(vp) 1355 struct vnode *vp; 1356{ 1357 struct proc *p = curproc; /* XXX */ 1358
| 1307 simple_lock(&vp->v_interlock); 1308 1309 if (vp->v_usecount > 1) { 1310 1311 vp->v_usecount--; 1312 simple_unlock(&vp->v_interlock); 1313 1314 return; 1315 } 1316 1317 if (vp->v_usecount == 1) { 1318 1319 vp->v_usecount--; 1320 if (VSHOULDFREE(vp)) 1321 vfree(vp); 1322 /* 1323 * If we are doing a vput, the node is already locked, and we must 1324 * call VOP_INACTIVE with the node locked. So, in the case of 1325 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1326 */ 1327 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { 1328 VOP_INACTIVE(vp, p); 1329 } 1330 1331 } else { 1332#ifdef DIAGNOSTIC 1333 vprint("vrele: negative ref count", vp); 1334 simple_unlock(&vp->v_interlock); 1335#endif 1336 panic("vrele: negative ref cnt"); 1337 } 1338} 1339 1340void 1341vput(vp) 1342 struct vnode *vp; 1343{ 1344 struct proc *p = curproc; /* XXX */ 1345
|
1359#ifdef DIAGNOSTIC 1360 if (vp == NULL) 1361 panic("vput: null vp"); 1362#endif
| 1346 KASSERT(vp != NULL, ("vput: null vp"));
|
1363 1364 simple_lock(&vp->v_interlock); 1365 1366 if (vp->v_usecount > 1) { 1367 1368 vp->v_usecount--; 1369 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1370 return; 1371 1372 } 1373 1374 if (vp->v_usecount == 1) { 1375 1376 vp->v_usecount--; 1377 if (VSHOULDFREE(vp)) 1378 vfree(vp); 1379 /* 1380 * If we are doing a vput, the node is already locked, and we must 1381 * call VOP_INACTIVE with the node locked. So, in the case of 1382 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1383 */ 1384 simple_unlock(&vp->v_interlock); 1385 VOP_INACTIVE(vp, p); 1386 1387 } else { 1388#ifdef DIAGNOSTIC 1389 vprint("vput: negative ref count", vp); 1390#endif 1391 panic("vput: negative ref cnt"); 1392 } 1393} 1394 1395/* 1396 * Somebody doesn't want the vnode recycled. 1397 */ 1398void 1399vhold(vp) 1400 register struct vnode *vp; 1401{ 1402 int s; 1403 1404 s = splbio(); 1405 vp->v_holdcnt++; 1406 if (VSHOULDBUSY(vp)) 1407 vbusy(vp); 1408 splx(s); 1409} 1410 1411/* 1412 * One less who cares about this vnode. 1413 */ 1414void 1415vdrop(vp) 1416 register struct vnode *vp; 1417{ 1418 int s; 1419 1420 s = splbio(); 1421 if (vp->v_holdcnt <= 0) 1422 panic("vdrop: holdcnt"); 1423 vp->v_holdcnt--; 1424 if (VSHOULDFREE(vp)) 1425 vfree(vp); 1426 splx(s); 1427} 1428 1429/* 1430 * Remove any vnodes in the vnode table belonging to mount point mp. 1431 * 1432 * If MNT_NOFORCE is specified, there should not be any active ones, 1433 * return error if any are found (nb: this is a user error, not a 1434 * system error). If MNT_FORCE is specified, detach any active vnodes 1435 * that are found. 1436 */ 1437#ifdef DIAGNOSTIC 1438static int busyprt = 0; /* print out busy vnodes */ 1439SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1440#endif 1441 1442int 1443vflush(mp, skipvp, flags) 1444 struct mount *mp; 1445 struct vnode *skipvp; 1446 int flags; 1447{ 1448 struct proc *p = curproc; /* XXX */ 1449 struct vnode *vp, *nvp; 1450 int busy = 0; 1451 1452 simple_lock(&mntvnode_slock); 1453loop: 1454 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1455 /* 1456 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1457 * Start over if it has (it won't be on the list anymore). 1458 */ 1459 if (vp->v_mount != mp) 1460 goto loop; 1461 nvp = vp->v_mntvnodes.le_next; 1462 /* 1463 * Skip over a selected vnode. 1464 */ 1465 if (vp == skipvp) 1466 continue; 1467 1468 simple_lock(&vp->v_interlock); 1469 /* 1470 * Skip over a vnodes marked VSYSTEM. 1471 */ 1472 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1473 simple_unlock(&vp->v_interlock); 1474 continue; 1475 } 1476 /* 1477 * If WRITECLOSE is set, only flush out regular file vnodes 1478 * open for writing. 1479 */ 1480 if ((flags & WRITECLOSE) && 1481 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1482 simple_unlock(&vp->v_interlock); 1483 continue; 1484 } 1485 1486 /* 1487 * With v_usecount == 0, all we need to do is clear out the 1488 * vnode data structures and we are done. 1489 */ 1490 if (vp->v_usecount == 0) { 1491 simple_unlock(&mntvnode_slock); 1492 vgonel(vp, p); 1493 simple_lock(&mntvnode_slock); 1494 continue; 1495 } 1496 1497 /* 1498 * If FORCECLOSE is set, forcibly close the vnode. For block 1499 * or character devices, revert to an anonymous device. For 1500 * all other files, just kill them. 1501 */ 1502 if (flags & FORCECLOSE) { 1503 simple_unlock(&mntvnode_slock); 1504 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1505 vgonel(vp, p); 1506 } else { 1507 vclean(vp, 0, p); 1508 vp->v_op = spec_vnodeop_p; 1509 insmntque(vp, (struct mount *) 0); 1510 } 1511 simple_lock(&mntvnode_slock); 1512 continue; 1513 } 1514#ifdef DIAGNOSTIC 1515 if (busyprt) 1516 vprint("vflush: busy vnode", vp); 1517#endif 1518 simple_unlock(&vp->v_interlock); 1519 busy++; 1520 } 1521 simple_unlock(&mntvnode_slock); 1522 if (busy) 1523 return (EBUSY); 1524 return (0); 1525} 1526 1527/* 1528 * Disassociate the underlying file system from a vnode. 1529 */ 1530static void 1531vclean(vp, flags, p) 1532 struct vnode *vp; 1533 int flags; 1534 struct proc *p; 1535{ 1536 int active; 1537 vm_object_t obj; 1538 1539 /* 1540 * Check to see if the vnode is in use. If so we have to reference it 1541 * before we clean it out so that its count cannot fall to zero and 1542 * generate a race against ourselves to recycle it. 1543 */ 1544 if ((active = vp->v_usecount)) 1545 vp->v_usecount++; 1546 1547 /* 1548 * Prevent the vnode from being recycled or brought into use while we 1549 * clean it out. 1550 */ 1551 if (vp->v_flag & VXLOCK) 1552 panic("vclean: deadlock"); 1553 vp->v_flag |= VXLOCK; 1554 /* 1555 * Even if the count is zero, the VOP_INACTIVE routine may still 1556 * have the object locked while it cleans it out. The VOP_LOCK 1557 * ensures that the VOP_INACTIVE routine is done with its work. 1558 * For active vnodes, it ensures that no other activity can 1559 * occur while the underlying object is being cleaned out. 1560 */ 1561 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1562 1563 /* 1564 * Clean out any buffers associated with the vnode. 1565 */ 1566 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1567 if (obj = vp->v_object) { 1568 if (obj->ref_count == 0) { 1569 /* 1570 * This is a normal way of shutting down the object/vnode 1571 * association. 1572 */ 1573 vm_object_terminate(obj); 1574 } else { 1575 /* 1576 * Woe to the process that tries to page now :-). 1577 */ 1578 vm_pager_deallocate(obj); 1579 } 1580 } 1581 1582 /* 1583 * If purging an active vnode, it must be closed and 1584 * deactivated before being reclaimed. Note that the 1585 * VOP_INACTIVE will unlock the vnode. 1586 */ 1587 if (active) { 1588 if (flags & DOCLOSE) 1589 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1590 VOP_INACTIVE(vp, p); 1591 } else { 1592 /* 1593 * Any other processes trying to obtain this lock must first 1594 * wait for VXLOCK to clear, then call the new lock operation. 1595 */ 1596 VOP_UNLOCK(vp, 0, p); 1597 } 1598 /* 1599 * Reclaim the vnode. 1600 */ 1601 if (VOP_RECLAIM(vp, p)) 1602 panic("vclean: cannot reclaim"); 1603 1604 if (active) 1605 vrele(vp); 1606 1607 cache_purge(vp); 1608 if (vp->v_vnlock) { 1609#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1610#ifdef DIAGNOSTIC 1611 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1612 vprint("vclean: lock not drained", vp); 1613#endif 1614#endif 1615 FREE(vp->v_vnlock, M_VNODE); 1616 vp->v_vnlock = NULL; 1617 } 1618 1619 if (VSHOULDFREE(vp)) 1620 vfree(vp); 1621 1622 /* 1623 * Done with purge, notify sleepers of the grim news. 1624 */ 1625 vp->v_op = dead_vnodeop_p; 1626 vn_pollgone(vp); 1627 vp->v_tag = VT_NON; 1628 vp->v_flag &= ~VXLOCK; 1629 if (vp->v_flag & VXWANT) { 1630 vp->v_flag &= ~VXWANT; 1631 wakeup((caddr_t) vp); 1632 } 1633} 1634 1635/* 1636 * Eliminate all activity associated with the requested vnode 1637 * and with all vnodes aliased to the requested vnode. 1638 */ 1639int 1640vop_revoke(ap) 1641 struct vop_revoke_args /* { 1642 struct vnode *a_vp; 1643 int a_flags; 1644 } */ *ap; 1645{ 1646 struct vnode *vp, *vq; 1647 struct proc *p = curproc; /* XXX */ 1648
| 1347 1348 simple_lock(&vp->v_interlock); 1349 1350 if (vp->v_usecount > 1) { 1351 1352 vp->v_usecount--; 1353 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1354 return; 1355 1356 } 1357 1358 if (vp->v_usecount == 1) { 1359 1360 vp->v_usecount--; 1361 if (VSHOULDFREE(vp)) 1362 vfree(vp); 1363 /* 1364 * If we are doing a vput, the node is already locked, and we must 1365 * call VOP_INACTIVE with the node locked. So, in the case of 1366 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1367 */ 1368 simple_unlock(&vp->v_interlock); 1369 VOP_INACTIVE(vp, p); 1370 1371 } else { 1372#ifdef DIAGNOSTIC 1373 vprint("vput: negative ref count", vp); 1374#endif 1375 panic("vput: negative ref cnt"); 1376 } 1377} 1378 1379/* 1380 * Somebody doesn't want the vnode recycled. 1381 */ 1382void 1383vhold(vp) 1384 register struct vnode *vp; 1385{ 1386 int s; 1387 1388 s = splbio(); 1389 vp->v_holdcnt++; 1390 if (VSHOULDBUSY(vp)) 1391 vbusy(vp); 1392 splx(s); 1393} 1394 1395/* 1396 * One less who cares about this vnode. 1397 */ 1398void 1399vdrop(vp) 1400 register struct vnode *vp; 1401{ 1402 int s; 1403 1404 s = splbio(); 1405 if (vp->v_holdcnt <= 0) 1406 panic("vdrop: holdcnt"); 1407 vp->v_holdcnt--; 1408 if (VSHOULDFREE(vp)) 1409 vfree(vp); 1410 splx(s); 1411} 1412 1413/* 1414 * Remove any vnodes in the vnode table belonging to mount point mp. 1415 * 1416 * If MNT_NOFORCE is specified, there should not be any active ones, 1417 * return error if any are found (nb: this is a user error, not a 1418 * system error). If MNT_FORCE is specified, detach any active vnodes 1419 * that are found. 1420 */ 1421#ifdef DIAGNOSTIC 1422static int busyprt = 0; /* print out busy vnodes */ 1423SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1424#endif 1425 1426int 1427vflush(mp, skipvp, flags) 1428 struct mount *mp; 1429 struct vnode *skipvp; 1430 int flags; 1431{ 1432 struct proc *p = curproc; /* XXX */ 1433 struct vnode *vp, *nvp; 1434 int busy = 0; 1435 1436 simple_lock(&mntvnode_slock); 1437loop: 1438 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1439 /* 1440 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1441 * Start over if it has (it won't be on the list anymore). 1442 */ 1443 if (vp->v_mount != mp) 1444 goto loop; 1445 nvp = vp->v_mntvnodes.le_next; 1446 /* 1447 * Skip over a selected vnode. 1448 */ 1449 if (vp == skipvp) 1450 continue; 1451 1452 simple_lock(&vp->v_interlock); 1453 /* 1454 * Skip over a vnodes marked VSYSTEM. 1455 */ 1456 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1457 simple_unlock(&vp->v_interlock); 1458 continue; 1459 } 1460 /* 1461 * If WRITECLOSE is set, only flush out regular file vnodes 1462 * open for writing. 1463 */ 1464 if ((flags & WRITECLOSE) && 1465 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1466 simple_unlock(&vp->v_interlock); 1467 continue; 1468 } 1469 1470 /* 1471 * With v_usecount == 0, all we need to do is clear out the 1472 * vnode data structures and we are done. 1473 */ 1474 if (vp->v_usecount == 0) { 1475 simple_unlock(&mntvnode_slock); 1476 vgonel(vp, p); 1477 simple_lock(&mntvnode_slock); 1478 continue; 1479 } 1480 1481 /* 1482 * If FORCECLOSE is set, forcibly close the vnode. For block 1483 * or character devices, revert to an anonymous device. For 1484 * all other files, just kill them. 1485 */ 1486 if (flags & FORCECLOSE) { 1487 simple_unlock(&mntvnode_slock); 1488 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1489 vgonel(vp, p); 1490 } else { 1491 vclean(vp, 0, p); 1492 vp->v_op = spec_vnodeop_p; 1493 insmntque(vp, (struct mount *) 0); 1494 } 1495 simple_lock(&mntvnode_slock); 1496 continue; 1497 } 1498#ifdef DIAGNOSTIC 1499 if (busyprt) 1500 vprint("vflush: busy vnode", vp); 1501#endif 1502 simple_unlock(&vp->v_interlock); 1503 busy++; 1504 } 1505 simple_unlock(&mntvnode_slock); 1506 if (busy) 1507 return (EBUSY); 1508 return (0); 1509} 1510 1511/* 1512 * Disassociate the underlying file system from a vnode. 1513 */ 1514static void 1515vclean(vp, flags, p) 1516 struct vnode *vp; 1517 int flags; 1518 struct proc *p; 1519{ 1520 int active; 1521 vm_object_t obj; 1522 1523 /* 1524 * Check to see if the vnode is in use. If so we have to reference it 1525 * before we clean it out so that its count cannot fall to zero and 1526 * generate a race against ourselves to recycle it. 1527 */ 1528 if ((active = vp->v_usecount)) 1529 vp->v_usecount++; 1530 1531 /* 1532 * Prevent the vnode from being recycled or brought into use while we 1533 * clean it out. 1534 */ 1535 if (vp->v_flag & VXLOCK) 1536 panic("vclean: deadlock"); 1537 vp->v_flag |= VXLOCK; 1538 /* 1539 * Even if the count is zero, the VOP_INACTIVE routine may still 1540 * have the object locked while it cleans it out. The VOP_LOCK 1541 * ensures that the VOP_INACTIVE routine is done with its work. 1542 * For active vnodes, it ensures that no other activity can 1543 * occur while the underlying object is being cleaned out. 1544 */ 1545 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1546 1547 /* 1548 * Clean out any buffers associated with the vnode. 1549 */ 1550 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1551 if (obj = vp->v_object) { 1552 if (obj->ref_count == 0) { 1553 /* 1554 * This is a normal way of shutting down the object/vnode 1555 * association. 1556 */ 1557 vm_object_terminate(obj); 1558 } else { 1559 /* 1560 * Woe to the process that tries to page now :-). 1561 */ 1562 vm_pager_deallocate(obj); 1563 } 1564 } 1565 1566 /* 1567 * If purging an active vnode, it must be closed and 1568 * deactivated before being reclaimed. Note that the 1569 * VOP_INACTIVE will unlock the vnode. 1570 */ 1571 if (active) { 1572 if (flags & DOCLOSE) 1573 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1574 VOP_INACTIVE(vp, p); 1575 } else { 1576 /* 1577 * Any other processes trying to obtain this lock must first 1578 * wait for VXLOCK to clear, then call the new lock operation. 1579 */ 1580 VOP_UNLOCK(vp, 0, p); 1581 } 1582 /* 1583 * Reclaim the vnode. 1584 */ 1585 if (VOP_RECLAIM(vp, p)) 1586 panic("vclean: cannot reclaim"); 1587 1588 if (active) 1589 vrele(vp); 1590 1591 cache_purge(vp); 1592 if (vp->v_vnlock) { 1593#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */ 1594#ifdef DIAGNOSTIC 1595 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1596 vprint("vclean: lock not drained", vp); 1597#endif 1598#endif 1599 FREE(vp->v_vnlock, M_VNODE); 1600 vp->v_vnlock = NULL; 1601 } 1602 1603 if (VSHOULDFREE(vp)) 1604 vfree(vp); 1605 1606 /* 1607 * Done with purge, notify sleepers of the grim news. 1608 */ 1609 vp->v_op = dead_vnodeop_p; 1610 vn_pollgone(vp); 1611 vp->v_tag = VT_NON; 1612 vp->v_flag &= ~VXLOCK; 1613 if (vp->v_flag & VXWANT) { 1614 vp->v_flag &= ~VXWANT; 1615 wakeup((caddr_t) vp); 1616 } 1617} 1618 1619/* 1620 * Eliminate all activity associated with the requested vnode 1621 * and with all vnodes aliased to the requested vnode. 1622 */ 1623int 1624vop_revoke(ap) 1625 struct vop_revoke_args /* { 1626 struct vnode *a_vp; 1627 int a_flags; 1628 } */ *ap; 1629{ 1630 struct vnode *vp, *vq; 1631 struct proc *p = curproc; /* XXX */ 1632
|
1649#ifdef DIAGNOSTIC 1650 if ((ap->a_flags & REVOKEALL) == 0) 1651 panic("vop_revoke"); 1652#endif
| 1633 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke"));
|
1653 1654 vp = ap->a_vp; 1655 simple_lock(&vp->v_interlock); 1656 1657 if (vp->v_flag & VALIASED) { 1658 /* 1659 * If a vgone (or vclean) is already in progress, 1660 * wait until it is done and return. 1661 */ 1662 if (vp->v_flag & VXLOCK) { 1663 vp->v_flag |= VXWANT; 1664 simple_unlock(&vp->v_interlock); 1665 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1666 return (0); 1667 } 1668 /* 1669 * Ensure that vp will not be vgone'd while we 1670 * are eliminating its aliases. 1671 */ 1672 vp->v_flag |= VXLOCK; 1673 simple_unlock(&vp->v_interlock); 1674 while (vp->v_flag & VALIASED) { 1675 simple_lock(&spechash_slock); 1676 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1677 if (vq->v_rdev != vp->v_rdev || 1678 vq->v_type != vp->v_type || vp == vq) 1679 continue; 1680 simple_unlock(&spechash_slock); 1681 vgone(vq); 1682 break; 1683 } 1684 if (vq == NULLVP) { 1685 simple_unlock(&spechash_slock); 1686 } 1687 } 1688 /* 1689 * Remove the lock so that vgone below will 1690 * really eliminate the vnode after which time 1691 * vgone will awaken any sleepers. 1692 */ 1693 simple_lock(&vp->v_interlock); 1694 vp->v_flag &= ~VXLOCK; 1695 if (vp->v_flag & VXWANT) { 1696 vp->v_flag &= ~VXWANT; 1697 wakeup(vp); 1698 } 1699 } 1700 vgonel(vp, p); 1701 return (0); 1702} 1703 1704/* 1705 * Recycle an unused vnode to the front of the free list. 1706 * Release the passed interlock if the vnode will be recycled. 1707 */ 1708int 1709vrecycle(vp, inter_lkp, p) 1710 struct vnode *vp; 1711 struct simplelock *inter_lkp; 1712 struct proc *p; 1713{ 1714 1715 simple_lock(&vp->v_interlock); 1716 if (vp->v_usecount == 0) { 1717 if (inter_lkp) { 1718 simple_unlock(inter_lkp); 1719 } 1720 vgonel(vp, p); 1721 return (1); 1722 } 1723 simple_unlock(&vp->v_interlock); 1724 return (0); 1725} 1726 1727/* 1728 * Eliminate all activity associated with a vnode 1729 * in preparation for reuse. 1730 */ 1731void 1732vgone(vp) 1733 register struct vnode *vp; 1734{ 1735 struct proc *p = curproc; /* XXX */ 1736 1737 simple_lock(&vp->v_interlock); 1738 vgonel(vp, p); 1739} 1740 1741/* 1742 * vgone, with the vp interlock held. 1743 */ 1744static void 1745vgonel(vp, p) 1746 struct vnode *vp; 1747 struct proc *p; 1748{ 1749 int s; 1750 struct vnode *vq; 1751 struct vnode *vx; 1752 1753 /* 1754 * If a vgone (or vclean) is already in progress, 1755 * wait until it is done and return. 1756 */ 1757 if (vp->v_flag & VXLOCK) { 1758 vp->v_flag |= VXWANT; 1759 simple_unlock(&vp->v_interlock); 1760 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1761 return; 1762 } 1763 1764 /* 1765 * Clean out the filesystem specific data. 1766 */ 1767 vclean(vp, DOCLOSE, p); 1768 simple_lock(&vp->v_interlock); 1769 1770 /* 1771 * Delete from old mount point vnode list, if on one. 1772 */ 1773 if (vp->v_mount != NULL) 1774 insmntque(vp, (struct mount *)0); 1775 /* 1776 * If special device, remove it from special device alias list 1777 * if it is on one. 1778 */ 1779 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1780 simple_lock(&spechash_slock); 1781 if (*vp->v_hashchain == vp) { 1782 *vp->v_hashchain = vp->v_specnext; 1783 } else { 1784 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1785 if (vq->v_specnext != vp) 1786 continue; 1787 vq->v_specnext = vp->v_specnext; 1788 break; 1789 } 1790 if (vq == NULL) 1791 panic("missing bdev"); 1792 } 1793 if (vp->v_flag & VALIASED) { 1794 vx = NULL; 1795 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1796 if (vq->v_rdev != vp->v_rdev || 1797 vq->v_type != vp->v_type) 1798 continue; 1799 if (vx) 1800 break; 1801 vx = vq; 1802 } 1803 if (vx == NULL) 1804 panic("missing alias"); 1805 if (vq == NULL) 1806 vx->v_flag &= ~VALIASED; 1807 vp->v_flag &= ~VALIASED; 1808 } 1809 simple_unlock(&spechash_slock); 1810 FREE(vp->v_specinfo, M_VNODE); 1811 vp->v_specinfo = NULL; 1812 } 1813 1814 /* 1815 * If it is on the freelist and not already at the head, 1816 * move it to the head of the list. The test of the back 1817 * pointer and the reference count of zero is because 1818 * it will be removed from the free list by getnewvnode, 1819 * but will not have its reference count incremented until 1820 * after calling vgone. If the reference count were 1821 * incremented first, vgone would (incorrectly) try to 1822 * close the previous instance of the underlying object. 1823 */ 1824 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1825 s = splbio(); 1826 simple_lock(&vnode_free_list_slock); 1827 if (vp->v_flag & VFREE) { 1828 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1829 } else if (vp->v_flag & VTBFREE) { 1830 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1831 vp->v_flag &= ~VTBFREE; 1832 freevnodes++; 1833 } else 1834 freevnodes++; 1835 vp->v_flag |= VFREE; 1836 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1837 simple_unlock(&vnode_free_list_slock); 1838 splx(s); 1839 } 1840 1841 vp->v_type = VBAD; 1842 simple_unlock(&vp->v_interlock); 1843} 1844 1845/* 1846 * Lookup a vnode by device number. 1847 */ 1848int 1849vfinddev(dev, type, vpp) 1850 dev_t dev; 1851 enum vtype type; 1852 struct vnode **vpp; 1853{ 1854 register struct vnode *vp; 1855 int rc = 0; 1856 1857 simple_lock(&spechash_slock); 1858 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1859 if (dev != vp->v_rdev || type != vp->v_type) 1860 continue; 1861 *vpp = vp; 1862 rc = 1; 1863 break; 1864 } 1865 simple_unlock(&spechash_slock); 1866 return (rc); 1867} 1868 1869/* 1870 * Calculate the total number of references to a special device. 1871 */ 1872int 1873vcount(vp) 1874 register struct vnode *vp; 1875{ 1876 struct vnode *vq, *vnext; 1877 int count; 1878 1879loop: 1880 if ((vp->v_flag & VALIASED) == 0) 1881 return (vp->v_usecount); 1882 simple_lock(&spechash_slock); 1883 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1884 vnext = vq->v_specnext; 1885 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1886 continue; 1887 /* 1888 * Alias, but not in use, so flush it out. 1889 */ 1890 if (vq->v_usecount == 0 && vq != vp) { 1891 simple_unlock(&spechash_slock); 1892 vgone(vq); 1893 goto loop; 1894 } 1895 count += vq->v_usecount; 1896 } 1897 simple_unlock(&spechash_slock); 1898 return (count); 1899} 1900/* 1901 * Print out a description of a vnode. 1902 */ 1903static char *typename[] = 1904{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1905 1906void 1907vprint(label, vp) 1908 char *label; 1909 register struct vnode *vp; 1910{ 1911 char buf[96]; 1912 1913 if (label != NULL) 1914 printf("%s: %p: ", label, (void *)vp); 1915 else 1916 printf("%p: ", (void *)vp); 1917 printf("type %s, usecount %d, writecount %d, refcount %d,", 1918 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1919 vp->v_holdcnt); 1920 buf[0] = '\0'; 1921 if (vp->v_flag & VROOT) 1922 strcat(buf, "|VROOT"); 1923 if (vp->v_flag & VTEXT) 1924 strcat(buf, "|VTEXT"); 1925 if (vp->v_flag & VSYSTEM) 1926 strcat(buf, "|VSYSTEM"); 1927 if (vp->v_flag & VXLOCK) 1928 strcat(buf, "|VXLOCK"); 1929 if (vp->v_flag & VXWANT) 1930 strcat(buf, "|VXWANT"); 1931 if (vp->v_flag & VBWAIT) 1932 strcat(buf, "|VBWAIT"); 1933 if (vp->v_flag & VALIASED) 1934 strcat(buf, "|VALIASED"); 1935 if (vp->v_flag & VDOOMED) 1936 strcat(buf, "|VDOOMED"); 1937 if (vp->v_flag & VFREE) 1938 strcat(buf, "|VFREE"); 1939 if (vp->v_flag & VOBJBUF) 1940 strcat(buf, "|VOBJBUF"); 1941 if (buf[0] != '\0') 1942 printf(" flags (%s)", &buf[1]); 1943 if (vp->v_data == NULL) { 1944 printf("\n"); 1945 } else { 1946 printf("\n\t"); 1947 VOP_PRINT(vp); 1948 } 1949} 1950 1951#ifdef DDB 1952#include <ddb/ddb.h> 1953/* 1954 * List all of the locked vnodes in the system. 1955 * Called when debugging the kernel. 1956 */ 1957DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1958{ 1959 struct proc *p = curproc; /* XXX */ 1960 struct mount *mp, *nmp; 1961 struct vnode *vp; 1962 1963 printf("Locked vnodes\n"); 1964 simple_lock(&mountlist_slock); 1965 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1966 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1967 nmp = mp->mnt_list.cqe_next; 1968 continue; 1969 } 1970 for (vp = mp->mnt_vnodelist.lh_first; 1971 vp != NULL; 1972 vp = vp->v_mntvnodes.le_next) { 1973 if (VOP_ISLOCKED(vp)) 1974 vprint((char *)0, vp); 1975 } 1976 simple_lock(&mountlist_slock); 1977 nmp = mp->mnt_list.cqe_next; 1978 vfs_unbusy(mp, p); 1979 } 1980 simple_unlock(&mountlist_slock); 1981} 1982#endif 1983 1984/* 1985 * Top level filesystem related information gathering. 1986 */ 1987static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1988 1989static int 1990vfs_sysctl SYSCTL_HANDLER_ARGS 1991{ 1992 int *name = (int *)arg1 - 1; /* XXX */ 1993 u_int namelen = arg2 + 1; /* XXX */ 1994 struct vfsconf *vfsp; 1995 1996#if 1 || defined(COMPAT_PRELITE2) 1997 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1998 if (namelen == 1) 1999 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2000#endif 2001 2002#ifdef notyet 2003 /* all sysctl names at this level are at least name and field */ 2004 if (namelen < 2) 2005 return (ENOTDIR); /* overloaded */ 2006 if (name[0] != VFS_GENERIC) { 2007 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2008 if (vfsp->vfc_typenum == name[0]) 2009 break; 2010 if (vfsp == NULL) 2011 return (EOPNOTSUPP); 2012 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2013 oldp, oldlenp, newp, newlen, p)); 2014 } 2015#endif 2016 switch (name[1]) { 2017 case VFS_MAXTYPENUM: 2018 if (namelen != 2) 2019 return (ENOTDIR); 2020 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2021 case VFS_CONF: 2022 if (namelen != 3) 2023 return (ENOTDIR); /* overloaded */ 2024 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2025 if (vfsp->vfc_typenum == name[2]) 2026 break; 2027 if (vfsp == NULL) 2028 return (EOPNOTSUPP); 2029 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2030 } 2031 return (EOPNOTSUPP); 2032} 2033 2034SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2035 "Generic filesystem"); 2036 2037#if 1 || defined(COMPAT_PRELITE2) 2038 2039static int 2040sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2041{ 2042 int error; 2043 struct vfsconf *vfsp; 2044 struct ovfsconf ovfs; 2045 2046 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2047 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2048 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2049 ovfs.vfc_index = vfsp->vfc_typenum; 2050 ovfs.vfc_refcount = vfsp->vfc_refcount; 2051 ovfs.vfc_flags = vfsp->vfc_flags; 2052 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2053 if (error) 2054 return error; 2055 } 2056 return 0; 2057} 2058 2059#endif /* 1 || COMPAT_PRELITE2 */ 2060 2061#if 0 2062#define KINFO_VNODESLOP 10 2063/* 2064 * Dump vnode list (via sysctl). 2065 * Copyout address of vnode followed by vnode. 2066 */ 2067/* ARGSUSED */ 2068static int 2069sysctl_vnode SYSCTL_HANDLER_ARGS 2070{ 2071 struct proc *p = curproc; /* XXX */ 2072 struct mount *mp, *nmp; 2073 struct vnode *nvp, *vp; 2074 int error; 2075 2076#define VPTRSZ sizeof (struct vnode *) 2077#define VNODESZ sizeof (struct vnode) 2078 2079 req->lock = 0; 2080 if (!req->oldptr) /* Make an estimate */ 2081 return (SYSCTL_OUT(req, 0, 2082 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2083 2084 simple_lock(&mountlist_slock); 2085 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2086 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2087 nmp = mp->mnt_list.cqe_next; 2088 continue; 2089 } 2090again: 2091 simple_lock(&mntvnode_slock); 2092 for (vp = mp->mnt_vnodelist.lh_first; 2093 vp != NULL; 2094 vp = nvp) { 2095 /* 2096 * Check that the vp is still associated with 2097 * this filesystem. RACE: could have been 2098 * recycled onto the same filesystem. 2099 */ 2100 if (vp->v_mount != mp) { 2101 simple_unlock(&mntvnode_slock); 2102 goto again; 2103 } 2104 nvp = vp->v_mntvnodes.le_next; 2105 simple_unlock(&mntvnode_slock); 2106 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2107 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2108 return (error); 2109 simple_lock(&mntvnode_slock); 2110 } 2111 simple_unlock(&mntvnode_slock); 2112 simple_lock(&mountlist_slock); 2113 nmp = mp->mnt_list.cqe_next; 2114 vfs_unbusy(mp, p); 2115 } 2116 simple_unlock(&mountlist_slock); 2117 2118 return (0); 2119} 2120#endif 2121 2122/* 2123 * XXX 2124 * Exporting the vnode list on large systems causes them to crash. 2125 * Exporting the vnode list on medium systems causes sysctl to coredump. 2126 */ 2127#if 0 2128SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2129 0, 0, sysctl_vnode, "S,vnode", ""); 2130#endif 2131 2132/* 2133 * Check to see if a filesystem is mounted on a block device. 2134 */ 2135int 2136vfs_mountedon(vp) 2137 struct vnode *vp; 2138{ 2139 struct vnode *vq; 2140 int error = 0; 2141 2142 if (vp->v_specmountpoint != NULL) 2143 return (EBUSY); 2144 if (vp->v_flag & VALIASED) { 2145 simple_lock(&spechash_slock); 2146 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2147 if (vq->v_rdev != vp->v_rdev || 2148 vq->v_type != vp->v_type) 2149 continue; 2150 if (vq->v_specmountpoint != NULL) { 2151 error = EBUSY; 2152 break; 2153 } 2154 } 2155 simple_unlock(&spechash_slock); 2156 } 2157 return (error); 2158} 2159 2160/* 2161 * Unmount all filesystems. The list is traversed in reverse order 2162 * of mounting to avoid dependencies. 2163 */ 2164void 2165vfs_unmountall() 2166{ 2167 struct mount *mp, *nmp; 2168 struct proc *p; 2169 int error; 2170 2171 if (curproc != NULL) 2172 p = curproc; 2173 else 2174 p = initproc; /* XXX XXX should this be proc0? */ 2175 /* 2176 * Since this only runs when rebooting, it is not interlocked. 2177 */ 2178 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2179 nmp = mp->mnt_list.cqe_prev; 2180 error = dounmount(mp, MNT_FORCE, p); 2181 if (error) { 2182 printf("unmount of %s failed (", 2183 mp->mnt_stat.f_mntonname); 2184 if (error == EBUSY) 2185 printf("BUSY)\n"); 2186 else 2187 printf("%d)\n", error); 2188 } 2189 } 2190} 2191 2192/* 2193 * Build hash lists of net addresses and hang them off the mount point. 2194 * Called by ufs_mount() to set up the lists of export addresses. 2195 */ 2196static int 2197vfs_hang_addrlist(mp, nep, argp) 2198 struct mount *mp; 2199 struct netexport *nep; 2200 struct export_args *argp; 2201{ 2202 register struct netcred *np; 2203 register struct radix_node_head *rnh; 2204 register int i; 2205 struct radix_node *rn; 2206 struct sockaddr *saddr, *smask = 0; 2207 struct domain *dom; 2208 int error; 2209 2210 if (argp->ex_addrlen == 0) { 2211 if (mp->mnt_flag & MNT_DEFEXPORTED) 2212 return (EPERM); 2213 np = &nep->ne_defexported; 2214 np->netc_exflags = argp->ex_flags; 2215 np->netc_anon = argp->ex_anon; 2216 np->netc_anon.cr_ref = 1; 2217 mp->mnt_flag |= MNT_DEFEXPORTED; 2218 return (0); 2219 } 2220 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2221 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2222 bzero((caddr_t) np, i); 2223 saddr = (struct sockaddr *) (np + 1); 2224 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2225 goto out; 2226 if (saddr->sa_len > argp->ex_addrlen) 2227 saddr->sa_len = argp->ex_addrlen; 2228 if (argp->ex_masklen) { 2229 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2230 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2231 if (error) 2232 goto out; 2233 if (smask->sa_len > argp->ex_masklen) 2234 smask->sa_len = argp->ex_masklen; 2235 } 2236 i = saddr->sa_family; 2237 if ((rnh = nep->ne_rtable[i]) == 0) { 2238 /* 2239 * Seems silly to initialize every AF when most are not used, 2240 * do so on demand here 2241 */ 2242 for (dom = domains; dom; dom = dom->dom_next) 2243 if (dom->dom_family == i && dom->dom_rtattach) { 2244 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2245 dom->dom_rtoffset); 2246 break; 2247 } 2248 if ((rnh = nep->ne_rtable[i]) == 0) { 2249 error = ENOBUFS; 2250 goto out; 2251 } 2252 } 2253 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2254 np->netc_rnodes); 2255 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2256 error = EPERM; 2257 goto out; 2258 } 2259 np->netc_exflags = argp->ex_flags; 2260 np->netc_anon = argp->ex_anon; 2261 np->netc_anon.cr_ref = 1; 2262 return (0); 2263out: 2264 free(np, M_NETADDR); 2265 return (error); 2266} 2267 2268/* ARGSUSED */ 2269static int 2270vfs_free_netcred(rn, w) 2271 struct radix_node *rn; 2272 void *w; 2273{ 2274 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2275 2276 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2277 free((caddr_t) rn, M_NETADDR); 2278 return (0); 2279} 2280 2281/* 2282 * Free the net address hash lists that are hanging off the mount points. 2283 */ 2284static void 2285vfs_free_addrlist(nep) 2286 struct netexport *nep; 2287{ 2288 register int i; 2289 register struct radix_node_head *rnh; 2290 2291 for (i = 0; i <= AF_MAX; i++) 2292 if ((rnh = nep->ne_rtable[i])) { 2293 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2294 (caddr_t) rnh); 2295 free((caddr_t) rnh, M_RTABLE); 2296 nep->ne_rtable[i] = 0; 2297 } 2298} 2299 2300int 2301vfs_export(mp, nep, argp) 2302 struct mount *mp; 2303 struct netexport *nep; 2304 struct export_args *argp; 2305{ 2306 int error; 2307 2308 if (argp->ex_flags & MNT_DELEXPORT) { 2309 if (mp->mnt_flag & MNT_EXPUBLIC) { 2310 vfs_setpublicfs(NULL, NULL, NULL); 2311 mp->mnt_flag &= ~MNT_EXPUBLIC; 2312 } 2313 vfs_free_addrlist(nep); 2314 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2315 } 2316 if (argp->ex_flags & MNT_EXPORTED) { 2317 if (argp->ex_flags & MNT_EXPUBLIC) { 2318 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2319 return (error); 2320 mp->mnt_flag |= MNT_EXPUBLIC; 2321 } 2322 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2323 return (error); 2324 mp->mnt_flag |= MNT_EXPORTED; 2325 } 2326 return (0); 2327} 2328 2329 2330/* 2331 * Set the publicly exported filesystem (WebNFS). Currently, only 2332 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2333 */ 2334int 2335vfs_setpublicfs(mp, nep, argp) 2336 struct mount *mp; 2337 struct netexport *nep; 2338 struct export_args *argp; 2339{ 2340 int error; 2341 struct vnode *rvp; 2342 char *cp; 2343 2344 /* 2345 * mp == NULL -> invalidate the current info, the FS is 2346 * no longer exported. May be called from either vfs_export 2347 * or unmount, so check if it hasn't already been done. 2348 */ 2349 if (mp == NULL) { 2350 if (nfs_pub.np_valid) { 2351 nfs_pub.np_valid = 0; 2352 if (nfs_pub.np_index != NULL) { 2353 FREE(nfs_pub.np_index, M_TEMP); 2354 nfs_pub.np_index = NULL; 2355 } 2356 } 2357 return (0); 2358 } 2359 2360 /* 2361 * Only one allowed at a time. 2362 */ 2363 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2364 return (EBUSY); 2365 2366 /* 2367 * Get real filehandle for root of exported FS. 2368 */ 2369 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2370 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2371 2372 if ((error = VFS_ROOT(mp, &rvp))) 2373 return (error); 2374 2375 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2376 return (error); 2377 2378 vput(rvp); 2379 2380 /* 2381 * If an indexfile was specified, pull it in. 2382 */ 2383 if (argp->ex_indexfile != NULL) { 2384 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2385 M_WAITOK); 2386 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2387 MAXNAMLEN, (size_t *)0); 2388 if (!error) { 2389 /* 2390 * Check for illegal filenames. 2391 */ 2392 for (cp = nfs_pub.np_index; *cp; cp++) { 2393 if (*cp == '/') { 2394 error = EINVAL; 2395 break; 2396 } 2397 } 2398 } 2399 if (error) { 2400 FREE(nfs_pub.np_index, M_TEMP); 2401 return (error); 2402 } 2403 } 2404 2405 nfs_pub.np_mount = mp; 2406 nfs_pub.np_valid = 1; 2407 return (0); 2408} 2409 2410struct netcred * 2411vfs_export_lookup(mp, nep, nam) 2412 register struct mount *mp; 2413 struct netexport *nep; 2414 struct sockaddr *nam; 2415{ 2416 register struct netcred *np; 2417 register struct radix_node_head *rnh; 2418 struct sockaddr *saddr; 2419 2420 np = NULL; 2421 if (mp->mnt_flag & MNT_EXPORTED) { 2422 /* 2423 * Lookup in the export list first. 2424 */ 2425 if (nam != NULL) { 2426 saddr = nam; 2427 rnh = nep->ne_rtable[saddr->sa_family]; 2428 if (rnh != NULL) { 2429 np = (struct netcred *) 2430 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2431 rnh); 2432 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2433 np = NULL; 2434 } 2435 } 2436 /* 2437 * If no address match, use the default if it exists. 2438 */ 2439 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2440 np = &nep->ne_defexported; 2441 } 2442 return (np); 2443} 2444 2445/* 2446 * perform msync on all vnodes under a mount point 2447 * the mount point must be locked. 2448 */ 2449void 2450vfs_msync(struct mount *mp, int flags) { 2451 struct vnode *vp, *nvp; 2452 struct vm_object *obj; 2453 int anyio, tries; 2454 2455 tries = 5; 2456loop: 2457 anyio = 0; 2458 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2459 2460 nvp = vp->v_mntvnodes.le_next; 2461 2462 if (vp->v_mount != mp) { 2463 goto loop; 2464 } 2465 2466 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2467 continue; 2468 2469 if (flags != MNT_WAIT) { 2470 obj = vp->v_object; 2471 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2472 continue; 2473 if (VOP_ISLOCKED(vp)) 2474 continue; 2475 } 2476 2477 simple_lock(&vp->v_interlock); 2478 if (vp->v_object && 2479 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2480 if (!vget(vp, 2481 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2482 if (vp->v_object) { 2483 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2484 anyio = 1; 2485 } 2486 vput(vp); 2487 } 2488 } else { 2489 simple_unlock(&vp->v_interlock); 2490 } 2491 } 2492 if (anyio && (--tries > 0)) 2493 goto loop; 2494} 2495 2496/* 2497 * Create the VM object needed for VMIO and mmap support. This 2498 * is done for all VREG files in the system. Some filesystems might 2499 * afford the additional metadata buffering capability of the 2500 * VMIO code by making the device node be VMIO mode also. 2501 * 2502 * vp must be locked when vfs_object_create is called. 2503 */ 2504int 2505vfs_object_create(vp, p, cred) 2506 struct vnode *vp; 2507 struct proc *p; 2508 struct ucred *cred; 2509{ 2510 struct vattr vat; 2511 vm_object_t object; 2512 int error = 0; 2513 2514 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) 2515 return 0; 2516 2517retry: 2518 if ((object = vp->v_object) == NULL) { 2519 if (vp->v_type == VREG) { 2520 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2521 goto retn; 2522 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2523 } else if (major(vp->v_rdev) < nblkdev && 2524 bdevsw[major(vp->v_rdev)] != NULL) { 2525 /* 2526 * This simply allocates the biggest object possible 2527 * for a VBLK vnode. This should be fixed, but doesn't 2528 * cause any problems (yet). 2529 */ 2530 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2531 } 2532 object->ref_count--; 2533 vp->v_usecount--; 2534 } else { 2535 if (object->flags & OBJ_DEAD) { 2536 VOP_UNLOCK(vp, 0, p); 2537 tsleep(object, PVM, "vodead", 0); 2538 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2539 goto retry; 2540 } 2541 } 2542 2543 if (vp->v_object) 2544 vp->v_flag |= VOBJBUF; 2545 2546retn: 2547 return error; 2548} 2549 2550static void 2551vfree(vp) 2552 struct vnode *vp; 2553{ 2554 int s; 2555 2556 s = splbio(); 2557 simple_lock(&vnode_free_list_slock); 2558 if (vp->v_flag & VTBFREE) { 2559 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2560 vp->v_flag &= ~VTBFREE; 2561 } 2562 if (vp->v_flag & VAGE) { 2563 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2564 } else { 2565 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2566 } 2567 freevnodes++; 2568 simple_unlock(&vnode_free_list_slock); 2569 vp->v_flag &= ~VAGE; 2570 vp->v_flag |= VFREE; 2571 splx(s); 2572} 2573 2574void 2575vbusy(vp) 2576 struct vnode *vp; 2577{ 2578 int s; 2579 2580 s = splbio(); 2581 simple_lock(&vnode_free_list_slock); 2582 if (vp->v_flag & VTBFREE) { 2583 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2584 vp->v_flag &= ~VTBFREE; 2585 } else { 2586 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2587 freevnodes--; 2588 } 2589 simple_unlock(&vnode_free_list_slock); 2590 vp->v_flag &= ~(VFREE|VAGE); 2591 splx(s); 2592} 2593 2594/* 2595 * Record a process's interest in events which might happen to 2596 * a vnode. Because poll uses the historic select-style interface 2597 * internally, this routine serves as both the ``check for any 2598 * pending events'' and the ``record my interest in future events'' 2599 * functions. (These are done together, while the lock is held, 2600 * to avoid race conditions.) 2601 */ 2602int 2603vn_pollrecord(vp, p, events) 2604 struct vnode *vp; 2605 struct proc *p; 2606 short events; 2607{ 2608 simple_lock(&vp->v_pollinfo.vpi_lock); 2609 if (vp->v_pollinfo.vpi_revents & events) { 2610 /* 2611 * This leaves events we are not interested 2612 * in available for the other process which 2613 * which presumably had requested them 2614 * (otherwise they would never have been 2615 * recorded). 2616 */ 2617 events &= vp->v_pollinfo.vpi_revents; 2618 vp->v_pollinfo.vpi_revents &= ~events; 2619 2620 simple_unlock(&vp->v_pollinfo.vpi_lock); 2621 return events; 2622 } 2623 vp->v_pollinfo.vpi_events |= events; 2624 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2625 simple_unlock(&vp->v_pollinfo.vpi_lock); 2626 return 0; 2627} 2628 2629/* 2630 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2631 * it is possible for us to miss an event due to race conditions, but 2632 * that condition is expected to be rare, so for the moment it is the 2633 * preferred interface. 2634 */ 2635void 2636vn_pollevent(vp, events) 2637 struct vnode *vp; 2638 short events; 2639{ 2640 simple_lock(&vp->v_pollinfo.vpi_lock); 2641 if (vp->v_pollinfo.vpi_events & events) { 2642 /* 2643 * We clear vpi_events so that we don't 2644 * call selwakeup() twice if two events are 2645 * posted before the polling process(es) is 2646 * awakened. This also ensures that we take at 2647 * most one selwakeup() if the polling process 2648 * is no longer interested. However, it does 2649 * mean that only one event can be noticed at 2650 * a time. (Perhaps we should only clear those 2651 * event bits which we note?) XXX 2652 */ 2653 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2654 vp->v_pollinfo.vpi_revents |= events; 2655 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2656 } 2657 simple_unlock(&vp->v_pollinfo.vpi_lock); 2658} 2659 2660/* 2661 * Wake up anyone polling on vp because it is being revoked. 2662 * This depends on dead_poll() returning POLLHUP for correct 2663 * behavior. 2664 */ 2665void 2666vn_pollgone(vp) 2667 struct vnode *vp; 2668{ 2669 simple_lock(&vp->v_pollinfo.vpi_lock); 2670 if (vp->v_pollinfo.vpi_events) { 2671 vp->v_pollinfo.vpi_events = 0; 2672 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2673 } 2674 simple_unlock(&vp->v_pollinfo.vpi_lock); 2675} 2676 2677 2678 2679/* 2680 * Routine to create and manage a filesystem syncer vnode. 2681 */ 2682#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2683static int sync_fsync __P((struct vop_fsync_args *)); 2684static int sync_inactive __P((struct vop_inactive_args *)); 2685static int sync_reclaim __P((struct vop_reclaim_args *)); 2686#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2687#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2688static int sync_print __P((struct vop_print_args *)); 2689#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2690 2691static vop_t **sync_vnodeop_p; 2692static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2693 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2694 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2695 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2696 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2697 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2698 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2699 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2700 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2701 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2702 { NULL, NULL } 2703}; 2704static struct vnodeopv_desc sync_vnodeop_opv_desc = 2705 { &sync_vnodeop_p, sync_vnodeop_entries }; 2706 2707VNODEOP_SET(sync_vnodeop_opv_desc); 2708 2709/* 2710 * Create a new filesystem syncer vnode for the specified mount point. 2711 */ 2712int 2713vfs_allocate_syncvnode(mp) 2714 struct mount *mp; 2715{ 2716 struct vnode *vp; 2717 static long start, incr, next; 2718 int error; 2719 2720 /* Allocate a new vnode */ 2721 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2722 mp->mnt_syncer = NULL; 2723 return (error); 2724 } 2725 vp->v_type = VNON; 2726 /* 2727 * Place the vnode onto the syncer worklist. We attempt to 2728 * scatter them about on the list so that they will go off 2729 * at evenly distributed times even if all the filesystems 2730 * are mounted at once. 2731 */ 2732 next += incr; 2733 if (next == 0 || next > syncer_maxdelay) { 2734 start /= 2; 2735 incr /= 2; 2736 if (start == 0) { 2737 start = syncer_maxdelay / 2; 2738 incr = syncer_maxdelay; 2739 } 2740 next = start; 2741 } 2742 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2743 mp->mnt_syncer = vp; 2744 return (0); 2745} 2746 2747/* 2748 * Do a lazy sync of the filesystem. 2749 */ 2750static int 2751sync_fsync(ap) 2752 struct vop_fsync_args /* { 2753 struct vnode *a_vp; 2754 struct ucred *a_cred; 2755 int a_waitfor; 2756 struct proc *a_p; 2757 } */ *ap; 2758{ 2759 struct vnode *syncvp = ap->a_vp; 2760 struct mount *mp = syncvp->v_mount; 2761 struct proc *p = ap->a_p; 2762 int asyncflag; 2763 2764 /* 2765 * We only need to do something if this is a lazy evaluation. 2766 */ 2767 if (ap->a_waitfor != MNT_LAZY) 2768 return (0); 2769 2770 /* 2771 * Move ourselves to the back of the sync list. 2772 */ 2773 vn_syncer_add_to_worklist(syncvp, syncdelay); 2774 2775 /* 2776 * Walk the list of vnodes pushing all that are dirty and 2777 * not already on the sync list. 2778 */ 2779 simple_lock(&mountlist_slock); 2780 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2781 simple_unlock(&mountlist_slock); 2782 return (0); 2783 } 2784 asyncflag = mp->mnt_flag & MNT_ASYNC; 2785 mp->mnt_flag &= ~MNT_ASYNC; 2786 vfs_msync(mp, MNT_NOWAIT); 2787 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2788 if (asyncflag) 2789 mp->mnt_flag |= MNT_ASYNC; 2790 vfs_unbusy(mp, p); 2791 return (0); 2792} 2793 2794/* 2795 * The syncer vnode is no referenced. 2796 */ 2797static int 2798sync_inactive(ap) 2799 struct vop_inactive_args /* { 2800 struct vnode *a_vp; 2801 struct proc *a_p; 2802 } */ *ap; 2803{ 2804 2805 vgone(ap->a_vp); 2806 return (0); 2807} 2808 2809/* 2810 * The syncer vnode is no longer needed and is being decommissioned. 2811 */ 2812static int 2813sync_reclaim(ap) 2814 struct vop_reclaim_args /* { 2815 struct vnode *a_vp; 2816 } */ *ap; 2817{ 2818 struct vnode *vp = ap->a_vp; 2819 2820 vp->v_mount->mnt_syncer = NULL; 2821 if (vp->v_flag & VONWORKLST) { 2822 LIST_REMOVE(vp, v_synclist); 2823 vp->v_flag &= ~VONWORKLST; 2824 } 2825 2826 return (0); 2827} 2828 2829/* 2830 * Print out a syncer vnode. 2831 */ 2832static int 2833sync_print(ap) 2834 struct vop_print_args /* { 2835 struct vnode *a_vp; 2836 } */ *ap; 2837{ 2838 struct vnode *vp = ap->a_vp; 2839 2840 printf("syncer vnode"); 2841 if (vp->v_vnlock != NULL) 2842 lockmgr_printinfo(vp->v_vnlock); 2843 printf("\n"); 2844 return (0); 2845}
| 1634 1635 vp = ap->a_vp; 1636 simple_lock(&vp->v_interlock); 1637 1638 if (vp->v_flag & VALIASED) { 1639 /* 1640 * If a vgone (or vclean) is already in progress, 1641 * wait until it is done and return. 1642 */ 1643 if (vp->v_flag & VXLOCK) { 1644 vp->v_flag |= VXWANT; 1645 simple_unlock(&vp->v_interlock); 1646 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1647 return (0); 1648 } 1649 /* 1650 * Ensure that vp will not be vgone'd while we 1651 * are eliminating its aliases. 1652 */ 1653 vp->v_flag |= VXLOCK; 1654 simple_unlock(&vp->v_interlock); 1655 while (vp->v_flag & VALIASED) { 1656 simple_lock(&spechash_slock); 1657 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1658 if (vq->v_rdev != vp->v_rdev || 1659 vq->v_type != vp->v_type || vp == vq) 1660 continue; 1661 simple_unlock(&spechash_slock); 1662 vgone(vq); 1663 break; 1664 } 1665 if (vq == NULLVP) { 1666 simple_unlock(&spechash_slock); 1667 } 1668 } 1669 /* 1670 * Remove the lock so that vgone below will 1671 * really eliminate the vnode after which time 1672 * vgone will awaken any sleepers. 1673 */ 1674 simple_lock(&vp->v_interlock); 1675 vp->v_flag &= ~VXLOCK; 1676 if (vp->v_flag & VXWANT) { 1677 vp->v_flag &= ~VXWANT; 1678 wakeup(vp); 1679 } 1680 } 1681 vgonel(vp, p); 1682 return (0); 1683} 1684 1685/* 1686 * Recycle an unused vnode to the front of the free list. 1687 * Release the passed interlock if the vnode will be recycled. 1688 */ 1689int 1690vrecycle(vp, inter_lkp, p) 1691 struct vnode *vp; 1692 struct simplelock *inter_lkp; 1693 struct proc *p; 1694{ 1695 1696 simple_lock(&vp->v_interlock); 1697 if (vp->v_usecount == 0) { 1698 if (inter_lkp) { 1699 simple_unlock(inter_lkp); 1700 } 1701 vgonel(vp, p); 1702 return (1); 1703 } 1704 simple_unlock(&vp->v_interlock); 1705 return (0); 1706} 1707 1708/* 1709 * Eliminate all activity associated with a vnode 1710 * in preparation for reuse. 1711 */ 1712void 1713vgone(vp) 1714 register struct vnode *vp; 1715{ 1716 struct proc *p = curproc; /* XXX */ 1717 1718 simple_lock(&vp->v_interlock); 1719 vgonel(vp, p); 1720} 1721 1722/* 1723 * vgone, with the vp interlock held. 1724 */ 1725static void 1726vgonel(vp, p) 1727 struct vnode *vp; 1728 struct proc *p; 1729{ 1730 int s; 1731 struct vnode *vq; 1732 struct vnode *vx; 1733 1734 /* 1735 * If a vgone (or vclean) is already in progress, 1736 * wait until it is done and return. 1737 */ 1738 if (vp->v_flag & VXLOCK) { 1739 vp->v_flag |= VXWANT; 1740 simple_unlock(&vp->v_interlock); 1741 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1742 return; 1743 } 1744 1745 /* 1746 * Clean out the filesystem specific data. 1747 */ 1748 vclean(vp, DOCLOSE, p); 1749 simple_lock(&vp->v_interlock); 1750 1751 /* 1752 * Delete from old mount point vnode list, if on one. 1753 */ 1754 if (vp->v_mount != NULL) 1755 insmntque(vp, (struct mount *)0); 1756 /* 1757 * If special device, remove it from special device alias list 1758 * if it is on one. 1759 */ 1760 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1761 simple_lock(&spechash_slock); 1762 if (*vp->v_hashchain == vp) { 1763 *vp->v_hashchain = vp->v_specnext; 1764 } else { 1765 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1766 if (vq->v_specnext != vp) 1767 continue; 1768 vq->v_specnext = vp->v_specnext; 1769 break; 1770 } 1771 if (vq == NULL) 1772 panic("missing bdev"); 1773 } 1774 if (vp->v_flag & VALIASED) { 1775 vx = NULL; 1776 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1777 if (vq->v_rdev != vp->v_rdev || 1778 vq->v_type != vp->v_type) 1779 continue; 1780 if (vx) 1781 break; 1782 vx = vq; 1783 } 1784 if (vx == NULL) 1785 panic("missing alias"); 1786 if (vq == NULL) 1787 vx->v_flag &= ~VALIASED; 1788 vp->v_flag &= ~VALIASED; 1789 } 1790 simple_unlock(&spechash_slock); 1791 FREE(vp->v_specinfo, M_VNODE); 1792 vp->v_specinfo = NULL; 1793 } 1794 1795 /* 1796 * If it is on the freelist and not already at the head, 1797 * move it to the head of the list. The test of the back 1798 * pointer and the reference count of zero is because 1799 * it will be removed from the free list by getnewvnode, 1800 * but will not have its reference count incremented until 1801 * after calling vgone. If the reference count were 1802 * incremented first, vgone would (incorrectly) try to 1803 * close the previous instance of the underlying object. 1804 */ 1805 if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1806 s = splbio(); 1807 simple_lock(&vnode_free_list_slock); 1808 if (vp->v_flag & VFREE) { 1809 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1810 } else if (vp->v_flag & VTBFREE) { 1811 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1812 vp->v_flag &= ~VTBFREE; 1813 freevnodes++; 1814 } else 1815 freevnodes++; 1816 vp->v_flag |= VFREE; 1817 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1818 simple_unlock(&vnode_free_list_slock); 1819 splx(s); 1820 } 1821 1822 vp->v_type = VBAD; 1823 simple_unlock(&vp->v_interlock); 1824} 1825 1826/* 1827 * Lookup a vnode by device number. 1828 */ 1829int 1830vfinddev(dev, type, vpp) 1831 dev_t dev; 1832 enum vtype type; 1833 struct vnode **vpp; 1834{ 1835 register struct vnode *vp; 1836 int rc = 0; 1837 1838 simple_lock(&spechash_slock); 1839 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1840 if (dev != vp->v_rdev || type != vp->v_type) 1841 continue; 1842 *vpp = vp; 1843 rc = 1; 1844 break; 1845 } 1846 simple_unlock(&spechash_slock); 1847 return (rc); 1848} 1849 1850/* 1851 * Calculate the total number of references to a special device. 1852 */ 1853int 1854vcount(vp) 1855 register struct vnode *vp; 1856{ 1857 struct vnode *vq, *vnext; 1858 int count; 1859 1860loop: 1861 if ((vp->v_flag & VALIASED) == 0) 1862 return (vp->v_usecount); 1863 simple_lock(&spechash_slock); 1864 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1865 vnext = vq->v_specnext; 1866 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1867 continue; 1868 /* 1869 * Alias, but not in use, so flush it out. 1870 */ 1871 if (vq->v_usecount == 0 && vq != vp) { 1872 simple_unlock(&spechash_slock); 1873 vgone(vq); 1874 goto loop; 1875 } 1876 count += vq->v_usecount; 1877 } 1878 simple_unlock(&spechash_slock); 1879 return (count); 1880} 1881/* 1882 * Print out a description of a vnode. 1883 */ 1884static char *typename[] = 1885{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1886 1887void 1888vprint(label, vp) 1889 char *label; 1890 register struct vnode *vp; 1891{ 1892 char buf[96]; 1893 1894 if (label != NULL) 1895 printf("%s: %p: ", label, (void *)vp); 1896 else 1897 printf("%p: ", (void *)vp); 1898 printf("type %s, usecount %d, writecount %d, refcount %d,", 1899 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1900 vp->v_holdcnt); 1901 buf[0] = '\0'; 1902 if (vp->v_flag & VROOT) 1903 strcat(buf, "|VROOT"); 1904 if (vp->v_flag & VTEXT) 1905 strcat(buf, "|VTEXT"); 1906 if (vp->v_flag & VSYSTEM) 1907 strcat(buf, "|VSYSTEM"); 1908 if (vp->v_flag & VXLOCK) 1909 strcat(buf, "|VXLOCK"); 1910 if (vp->v_flag & VXWANT) 1911 strcat(buf, "|VXWANT"); 1912 if (vp->v_flag & VBWAIT) 1913 strcat(buf, "|VBWAIT"); 1914 if (vp->v_flag & VALIASED) 1915 strcat(buf, "|VALIASED"); 1916 if (vp->v_flag & VDOOMED) 1917 strcat(buf, "|VDOOMED"); 1918 if (vp->v_flag & VFREE) 1919 strcat(buf, "|VFREE"); 1920 if (vp->v_flag & VOBJBUF) 1921 strcat(buf, "|VOBJBUF"); 1922 if (buf[0] != '\0') 1923 printf(" flags (%s)", &buf[1]); 1924 if (vp->v_data == NULL) { 1925 printf("\n"); 1926 } else { 1927 printf("\n\t"); 1928 VOP_PRINT(vp); 1929 } 1930} 1931 1932#ifdef DDB 1933#include <ddb/ddb.h> 1934/* 1935 * List all of the locked vnodes in the system. 1936 * Called when debugging the kernel. 1937 */ 1938DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1939{ 1940 struct proc *p = curproc; /* XXX */ 1941 struct mount *mp, *nmp; 1942 struct vnode *vp; 1943 1944 printf("Locked vnodes\n"); 1945 simple_lock(&mountlist_slock); 1946 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1947 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1948 nmp = mp->mnt_list.cqe_next; 1949 continue; 1950 } 1951 for (vp = mp->mnt_vnodelist.lh_first; 1952 vp != NULL; 1953 vp = vp->v_mntvnodes.le_next) { 1954 if (VOP_ISLOCKED(vp)) 1955 vprint((char *)0, vp); 1956 } 1957 simple_lock(&mountlist_slock); 1958 nmp = mp->mnt_list.cqe_next; 1959 vfs_unbusy(mp, p); 1960 } 1961 simple_unlock(&mountlist_slock); 1962} 1963#endif 1964 1965/* 1966 * Top level filesystem related information gathering. 1967 */ 1968static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 1969 1970static int 1971vfs_sysctl SYSCTL_HANDLER_ARGS 1972{ 1973 int *name = (int *)arg1 - 1; /* XXX */ 1974 u_int namelen = arg2 + 1; /* XXX */ 1975 struct vfsconf *vfsp; 1976 1977#if 1 || defined(COMPAT_PRELITE2) 1978 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1979 if (namelen == 1) 1980 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1981#endif 1982 1983#ifdef notyet 1984 /* all sysctl names at this level are at least name and field */ 1985 if (namelen < 2) 1986 return (ENOTDIR); /* overloaded */ 1987 if (name[0] != VFS_GENERIC) { 1988 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1989 if (vfsp->vfc_typenum == name[0]) 1990 break; 1991 if (vfsp == NULL) 1992 return (EOPNOTSUPP); 1993 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1994 oldp, oldlenp, newp, newlen, p)); 1995 } 1996#endif 1997 switch (name[1]) { 1998 case VFS_MAXTYPENUM: 1999 if (namelen != 2) 2000 return (ENOTDIR); 2001 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2002 case VFS_CONF: 2003 if (namelen != 3) 2004 return (ENOTDIR); /* overloaded */ 2005 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2006 if (vfsp->vfc_typenum == name[2]) 2007 break; 2008 if (vfsp == NULL) 2009 return (EOPNOTSUPP); 2010 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 2011 } 2012 return (EOPNOTSUPP); 2013} 2014 2015SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2016 "Generic filesystem"); 2017 2018#if 1 || defined(COMPAT_PRELITE2) 2019 2020static int 2021sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2022{ 2023 int error; 2024 struct vfsconf *vfsp; 2025 struct ovfsconf ovfs; 2026 2027 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2028 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 2029 strcpy(ovfs.vfc_name, vfsp->vfc_name); 2030 ovfs.vfc_index = vfsp->vfc_typenum; 2031 ovfs.vfc_refcount = vfsp->vfc_refcount; 2032 ovfs.vfc_flags = vfsp->vfc_flags; 2033 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2034 if (error) 2035 return error; 2036 } 2037 return 0; 2038} 2039 2040#endif /* 1 || COMPAT_PRELITE2 */ 2041 2042#if 0 2043#define KINFO_VNODESLOP 10 2044/* 2045 * Dump vnode list (via sysctl). 2046 * Copyout address of vnode followed by vnode. 2047 */ 2048/* ARGSUSED */ 2049static int 2050sysctl_vnode SYSCTL_HANDLER_ARGS 2051{ 2052 struct proc *p = curproc; /* XXX */ 2053 struct mount *mp, *nmp; 2054 struct vnode *nvp, *vp; 2055 int error; 2056 2057#define VPTRSZ sizeof (struct vnode *) 2058#define VNODESZ sizeof (struct vnode) 2059 2060 req->lock = 0; 2061 if (!req->oldptr) /* Make an estimate */ 2062 return (SYSCTL_OUT(req, 0, 2063 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2064 2065 simple_lock(&mountlist_slock); 2066 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 2067 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2068 nmp = mp->mnt_list.cqe_next; 2069 continue; 2070 } 2071again: 2072 simple_lock(&mntvnode_slock); 2073 for (vp = mp->mnt_vnodelist.lh_first; 2074 vp != NULL; 2075 vp = nvp) { 2076 /* 2077 * Check that the vp is still associated with 2078 * this filesystem. RACE: could have been 2079 * recycled onto the same filesystem. 2080 */ 2081 if (vp->v_mount != mp) { 2082 simple_unlock(&mntvnode_slock); 2083 goto again; 2084 } 2085 nvp = vp->v_mntvnodes.le_next; 2086 simple_unlock(&mntvnode_slock); 2087 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || 2088 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2089 return (error); 2090 simple_lock(&mntvnode_slock); 2091 } 2092 simple_unlock(&mntvnode_slock); 2093 simple_lock(&mountlist_slock); 2094 nmp = mp->mnt_list.cqe_next; 2095 vfs_unbusy(mp, p); 2096 } 2097 simple_unlock(&mountlist_slock); 2098 2099 return (0); 2100} 2101#endif 2102 2103/* 2104 * XXX 2105 * Exporting the vnode list on large systems causes them to crash. 2106 * Exporting the vnode list on medium systems causes sysctl to coredump. 2107 */ 2108#if 0 2109SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 2110 0, 0, sysctl_vnode, "S,vnode", ""); 2111#endif 2112 2113/* 2114 * Check to see if a filesystem is mounted on a block device. 2115 */ 2116int 2117vfs_mountedon(vp) 2118 struct vnode *vp; 2119{ 2120 struct vnode *vq; 2121 int error = 0; 2122 2123 if (vp->v_specmountpoint != NULL) 2124 return (EBUSY); 2125 if (vp->v_flag & VALIASED) { 2126 simple_lock(&spechash_slock); 2127 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2128 if (vq->v_rdev != vp->v_rdev || 2129 vq->v_type != vp->v_type) 2130 continue; 2131 if (vq->v_specmountpoint != NULL) { 2132 error = EBUSY; 2133 break; 2134 } 2135 } 2136 simple_unlock(&spechash_slock); 2137 } 2138 return (error); 2139} 2140 2141/* 2142 * Unmount all filesystems. The list is traversed in reverse order 2143 * of mounting to avoid dependencies. 2144 */ 2145void 2146vfs_unmountall() 2147{ 2148 struct mount *mp, *nmp; 2149 struct proc *p; 2150 int error; 2151 2152 if (curproc != NULL) 2153 p = curproc; 2154 else 2155 p = initproc; /* XXX XXX should this be proc0? */ 2156 /* 2157 * Since this only runs when rebooting, it is not interlocked. 2158 */ 2159 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2160 nmp = mp->mnt_list.cqe_prev; 2161 error = dounmount(mp, MNT_FORCE, p); 2162 if (error) { 2163 printf("unmount of %s failed (", 2164 mp->mnt_stat.f_mntonname); 2165 if (error == EBUSY) 2166 printf("BUSY)\n"); 2167 else 2168 printf("%d)\n", error); 2169 } 2170 } 2171} 2172 2173/* 2174 * Build hash lists of net addresses and hang them off the mount point. 2175 * Called by ufs_mount() to set up the lists of export addresses. 2176 */ 2177static int 2178vfs_hang_addrlist(mp, nep, argp) 2179 struct mount *mp; 2180 struct netexport *nep; 2181 struct export_args *argp; 2182{ 2183 register struct netcred *np; 2184 register struct radix_node_head *rnh; 2185 register int i; 2186 struct radix_node *rn; 2187 struct sockaddr *saddr, *smask = 0; 2188 struct domain *dom; 2189 int error; 2190 2191 if (argp->ex_addrlen == 0) { 2192 if (mp->mnt_flag & MNT_DEFEXPORTED) 2193 return (EPERM); 2194 np = &nep->ne_defexported; 2195 np->netc_exflags = argp->ex_flags; 2196 np->netc_anon = argp->ex_anon; 2197 np->netc_anon.cr_ref = 1; 2198 mp->mnt_flag |= MNT_DEFEXPORTED; 2199 return (0); 2200 } 2201 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2202 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 2203 bzero((caddr_t) np, i); 2204 saddr = (struct sockaddr *) (np + 1); 2205 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2206 goto out; 2207 if (saddr->sa_len > argp->ex_addrlen) 2208 saddr->sa_len = argp->ex_addrlen; 2209 if (argp->ex_masklen) { 2210 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); 2211 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2212 if (error) 2213 goto out; 2214 if (smask->sa_len > argp->ex_masklen) 2215 smask->sa_len = argp->ex_masklen; 2216 } 2217 i = saddr->sa_family; 2218 if ((rnh = nep->ne_rtable[i]) == 0) { 2219 /* 2220 * Seems silly to initialize every AF when most are not used, 2221 * do so on demand here 2222 */ 2223 for (dom = domains; dom; dom = dom->dom_next) 2224 if (dom->dom_family == i && dom->dom_rtattach) { 2225 dom->dom_rtattach((void **) &nep->ne_rtable[i], 2226 dom->dom_rtoffset); 2227 break; 2228 } 2229 if ((rnh = nep->ne_rtable[i]) == 0) { 2230 error = ENOBUFS; 2231 goto out; 2232 } 2233 } 2234 rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2235 np->netc_rnodes); 2236 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 2237 error = EPERM; 2238 goto out; 2239 } 2240 np->netc_exflags = argp->ex_flags; 2241 np->netc_anon = argp->ex_anon; 2242 np->netc_anon.cr_ref = 1; 2243 return (0); 2244out: 2245 free(np, M_NETADDR); 2246 return (error); 2247} 2248 2249/* ARGSUSED */ 2250static int 2251vfs_free_netcred(rn, w) 2252 struct radix_node *rn; 2253 void *w; 2254{ 2255 register struct radix_node_head *rnh = (struct radix_node_head *) w; 2256 2257 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2258 free((caddr_t) rn, M_NETADDR); 2259 return (0); 2260} 2261 2262/* 2263 * Free the net address hash lists that are hanging off the mount points. 2264 */ 2265static void 2266vfs_free_addrlist(nep) 2267 struct netexport *nep; 2268{ 2269 register int i; 2270 register struct radix_node_head *rnh; 2271 2272 for (i = 0; i <= AF_MAX; i++) 2273 if ((rnh = nep->ne_rtable[i])) { 2274 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 2275 (caddr_t) rnh); 2276 free((caddr_t) rnh, M_RTABLE); 2277 nep->ne_rtable[i] = 0; 2278 } 2279} 2280 2281int 2282vfs_export(mp, nep, argp) 2283 struct mount *mp; 2284 struct netexport *nep; 2285 struct export_args *argp; 2286{ 2287 int error; 2288 2289 if (argp->ex_flags & MNT_DELEXPORT) { 2290 if (mp->mnt_flag & MNT_EXPUBLIC) { 2291 vfs_setpublicfs(NULL, NULL, NULL); 2292 mp->mnt_flag &= ~MNT_EXPUBLIC; 2293 } 2294 vfs_free_addrlist(nep); 2295 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2296 } 2297 if (argp->ex_flags & MNT_EXPORTED) { 2298 if (argp->ex_flags & MNT_EXPUBLIC) { 2299 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2300 return (error); 2301 mp->mnt_flag |= MNT_EXPUBLIC; 2302 } 2303 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2304 return (error); 2305 mp->mnt_flag |= MNT_EXPORTED; 2306 } 2307 return (0); 2308} 2309 2310 2311/* 2312 * Set the publicly exported filesystem (WebNFS). Currently, only 2313 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2314 */ 2315int 2316vfs_setpublicfs(mp, nep, argp) 2317 struct mount *mp; 2318 struct netexport *nep; 2319 struct export_args *argp; 2320{ 2321 int error; 2322 struct vnode *rvp; 2323 char *cp; 2324 2325 /* 2326 * mp == NULL -> invalidate the current info, the FS is 2327 * no longer exported. May be called from either vfs_export 2328 * or unmount, so check if it hasn't already been done. 2329 */ 2330 if (mp == NULL) { 2331 if (nfs_pub.np_valid) { 2332 nfs_pub.np_valid = 0; 2333 if (nfs_pub.np_index != NULL) { 2334 FREE(nfs_pub.np_index, M_TEMP); 2335 nfs_pub.np_index = NULL; 2336 } 2337 } 2338 return (0); 2339 } 2340 2341 /* 2342 * Only one allowed at a time. 2343 */ 2344 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2345 return (EBUSY); 2346 2347 /* 2348 * Get real filehandle for root of exported FS. 2349 */ 2350 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2351 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2352 2353 if ((error = VFS_ROOT(mp, &rvp))) 2354 return (error); 2355 2356 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2357 return (error); 2358 2359 vput(rvp); 2360 2361 /* 2362 * If an indexfile was specified, pull it in. 2363 */ 2364 if (argp->ex_indexfile != NULL) { 2365 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2366 M_WAITOK); 2367 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2368 MAXNAMLEN, (size_t *)0); 2369 if (!error) { 2370 /* 2371 * Check for illegal filenames. 2372 */ 2373 for (cp = nfs_pub.np_index; *cp; cp++) { 2374 if (*cp == '/') { 2375 error = EINVAL; 2376 break; 2377 } 2378 } 2379 } 2380 if (error) { 2381 FREE(nfs_pub.np_index, M_TEMP); 2382 return (error); 2383 } 2384 } 2385 2386 nfs_pub.np_mount = mp; 2387 nfs_pub.np_valid = 1; 2388 return (0); 2389} 2390 2391struct netcred * 2392vfs_export_lookup(mp, nep, nam) 2393 register struct mount *mp; 2394 struct netexport *nep; 2395 struct sockaddr *nam; 2396{ 2397 register struct netcred *np; 2398 register struct radix_node_head *rnh; 2399 struct sockaddr *saddr; 2400 2401 np = NULL; 2402 if (mp->mnt_flag & MNT_EXPORTED) { 2403 /* 2404 * Lookup in the export list first. 2405 */ 2406 if (nam != NULL) { 2407 saddr = nam; 2408 rnh = nep->ne_rtable[saddr->sa_family]; 2409 if (rnh != NULL) { 2410 np = (struct netcred *) 2411 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2412 rnh); 2413 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2414 np = NULL; 2415 } 2416 } 2417 /* 2418 * If no address match, use the default if it exists. 2419 */ 2420 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2421 np = &nep->ne_defexported; 2422 } 2423 return (np); 2424} 2425 2426/* 2427 * perform msync on all vnodes under a mount point 2428 * the mount point must be locked. 2429 */ 2430void 2431vfs_msync(struct mount *mp, int flags) { 2432 struct vnode *vp, *nvp; 2433 struct vm_object *obj; 2434 int anyio, tries; 2435 2436 tries = 5; 2437loop: 2438 anyio = 0; 2439 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2440 2441 nvp = vp->v_mntvnodes.le_next; 2442 2443 if (vp->v_mount != mp) { 2444 goto loop; 2445 } 2446 2447 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */ 2448 continue; 2449 2450 if (flags != MNT_WAIT) { 2451 obj = vp->v_object; 2452 if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2453 continue; 2454 if (VOP_ISLOCKED(vp)) 2455 continue; 2456 } 2457 2458 simple_lock(&vp->v_interlock); 2459 if (vp->v_object && 2460 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2461 if (!vget(vp, 2462 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { 2463 if (vp->v_object) { 2464 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2465 anyio = 1; 2466 } 2467 vput(vp); 2468 } 2469 } else { 2470 simple_unlock(&vp->v_interlock); 2471 } 2472 } 2473 if (anyio && (--tries > 0)) 2474 goto loop; 2475} 2476 2477/* 2478 * Create the VM object needed for VMIO and mmap support. This 2479 * is done for all VREG files in the system. Some filesystems might 2480 * afford the additional metadata buffering capability of the 2481 * VMIO code by making the device node be VMIO mode also. 2482 * 2483 * vp must be locked when vfs_object_create is called. 2484 */ 2485int 2486vfs_object_create(vp, p, cred) 2487 struct vnode *vp; 2488 struct proc *p; 2489 struct ucred *cred; 2490{ 2491 struct vattr vat; 2492 vm_object_t object; 2493 int error = 0; 2494 2495 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) 2496 return 0; 2497 2498retry: 2499 if ((object = vp->v_object) == NULL) { 2500 if (vp->v_type == VREG) { 2501 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2502 goto retn; 2503 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2504 } else if (major(vp->v_rdev) < nblkdev && 2505 bdevsw[major(vp->v_rdev)] != NULL) { 2506 /* 2507 * This simply allocates the biggest object possible 2508 * for a VBLK vnode. This should be fixed, but doesn't 2509 * cause any problems (yet). 2510 */ 2511 object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2512 } 2513 object->ref_count--; 2514 vp->v_usecount--; 2515 } else { 2516 if (object->flags & OBJ_DEAD) { 2517 VOP_UNLOCK(vp, 0, p); 2518 tsleep(object, PVM, "vodead", 0); 2519 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 2520 goto retry; 2521 } 2522 } 2523 2524 if (vp->v_object) 2525 vp->v_flag |= VOBJBUF; 2526 2527retn: 2528 return error; 2529} 2530 2531static void 2532vfree(vp) 2533 struct vnode *vp; 2534{ 2535 int s; 2536 2537 s = splbio(); 2538 simple_lock(&vnode_free_list_slock); 2539 if (vp->v_flag & VTBFREE) { 2540 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2541 vp->v_flag &= ~VTBFREE; 2542 } 2543 if (vp->v_flag & VAGE) { 2544 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2545 } else { 2546 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2547 } 2548 freevnodes++; 2549 simple_unlock(&vnode_free_list_slock); 2550 vp->v_flag &= ~VAGE; 2551 vp->v_flag |= VFREE; 2552 splx(s); 2553} 2554 2555void 2556vbusy(vp) 2557 struct vnode *vp; 2558{ 2559 int s; 2560 2561 s = splbio(); 2562 simple_lock(&vnode_free_list_slock); 2563 if (vp->v_flag & VTBFREE) { 2564 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2565 vp->v_flag &= ~VTBFREE; 2566 } else { 2567 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2568 freevnodes--; 2569 } 2570 simple_unlock(&vnode_free_list_slock); 2571 vp->v_flag &= ~(VFREE|VAGE); 2572 splx(s); 2573} 2574 2575/* 2576 * Record a process's interest in events which might happen to 2577 * a vnode. Because poll uses the historic select-style interface 2578 * internally, this routine serves as both the ``check for any 2579 * pending events'' and the ``record my interest in future events'' 2580 * functions. (These are done together, while the lock is held, 2581 * to avoid race conditions.) 2582 */ 2583int 2584vn_pollrecord(vp, p, events) 2585 struct vnode *vp; 2586 struct proc *p; 2587 short events; 2588{ 2589 simple_lock(&vp->v_pollinfo.vpi_lock); 2590 if (vp->v_pollinfo.vpi_revents & events) { 2591 /* 2592 * This leaves events we are not interested 2593 * in available for the other process which 2594 * which presumably had requested them 2595 * (otherwise they would never have been 2596 * recorded). 2597 */ 2598 events &= vp->v_pollinfo.vpi_revents; 2599 vp->v_pollinfo.vpi_revents &= ~events; 2600 2601 simple_unlock(&vp->v_pollinfo.vpi_lock); 2602 return events; 2603 } 2604 vp->v_pollinfo.vpi_events |= events; 2605 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2606 simple_unlock(&vp->v_pollinfo.vpi_lock); 2607 return 0; 2608} 2609 2610/* 2611 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2612 * it is possible for us to miss an event due to race conditions, but 2613 * that condition is expected to be rare, so for the moment it is the 2614 * preferred interface. 2615 */ 2616void 2617vn_pollevent(vp, events) 2618 struct vnode *vp; 2619 short events; 2620{ 2621 simple_lock(&vp->v_pollinfo.vpi_lock); 2622 if (vp->v_pollinfo.vpi_events & events) { 2623 /* 2624 * We clear vpi_events so that we don't 2625 * call selwakeup() twice if two events are 2626 * posted before the polling process(es) is 2627 * awakened. This also ensures that we take at 2628 * most one selwakeup() if the polling process 2629 * is no longer interested. However, it does 2630 * mean that only one event can be noticed at 2631 * a time. (Perhaps we should only clear those 2632 * event bits which we note?) XXX 2633 */ 2634 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 2635 vp->v_pollinfo.vpi_revents |= events; 2636 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2637 } 2638 simple_unlock(&vp->v_pollinfo.vpi_lock); 2639} 2640 2641/* 2642 * Wake up anyone polling on vp because it is being revoked. 2643 * This depends on dead_poll() returning POLLHUP for correct 2644 * behavior. 2645 */ 2646void 2647vn_pollgone(vp) 2648 struct vnode *vp; 2649{ 2650 simple_lock(&vp->v_pollinfo.vpi_lock); 2651 if (vp->v_pollinfo.vpi_events) { 2652 vp->v_pollinfo.vpi_events = 0; 2653 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2654 } 2655 simple_unlock(&vp->v_pollinfo.vpi_lock); 2656} 2657 2658 2659 2660/* 2661 * Routine to create and manage a filesystem syncer vnode. 2662 */ 2663#define sync_close ((int (*) __P((struct vop_close_args *)))nullop) 2664static int sync_fsync __P((struct vop_fsync_args *)); 2665static int sync_inactive __P((struct vop_inactive_args *)); 2666static int sync_reclaim __P((struct vop_reclaim_args *)); 2667#define sync_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) 2668#define sync_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) 2669static int sync_print __P((struct vop_print_args *)); 2670#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) 2671 2672static vop_t **sync_vnodeop_p; 2673static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2674 { &vop_default_desc, (vop_t *) vop_eopnotsupp }, 2675 { &vop_close_desc, (vop_t *) sync_close }, /* close */ 2676 { &vop_fsync_desc, (vop_t *) sync_fsync }, /* fsync */ 2677 { &vop_inactive_desc, (vop_t *) sync_inactive }, /* inactive */ 2678 { &vop_reclaim_desc, (vop_t *) sync_reclaim }, /* reclaim */ 2679 { &vop_lock_desc, (vop_t *) sync_lock }, /* lock */ 2680 { &vop_unlock_desc, (vop_t *) sync_unlock }, /* unlock */ 2681 { &vop_print_desc, (vop_t *) sync_print }, /* print */ 2682 { &vop_islocked_desc, (vop_t *) sync_islocked }, /* islocked */ 2683 { NULL, NULL } 2684}; 2685static struct vnodeopv_desc sync_vnodeop_opv_desc = 2686 { &sync_vnodeop_p, sync_vnodeop_entries }; 2687 2688VNODEOP_SET(sync_vnodeop_opv_desc); 2689 2690/* 2691 * Create a new filesystem syncer vnode for the specified mount point. 2692 */ 2693int 2694vfs_allocate_syncvnode(mp) 2695 struct mount *mp; 2696{ 2697 struct vnode *vp; 2698 static long start, incr, next; 2699 int error; 2700 2701 /* Allocate a new vnode */ 2702 if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2703 mp->mnt_syncer = NULL; 2704 return (error); 2705 } 2706 vp->v_type = VNON; 2707 /* 2708 * Place the vnode onto the syncer worklist. We attempt to 2709 * scatter them about on the list so that they will go off 2710 * at evenly distributed times even if all the filesystems 2711 * are mounted at once. 2712 */ 2713 next += incr; 2714 if (next == 0 || next > syncer_maxdelay) { 2715 start /= 2; 2716 incr /= 2; 2717 if (start == 0) { 2718 start = syncer_maxdelay / 2; 2719 incr = syncer_maxdelay; 2720 } 2721 next = start; 2722 } 2723 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2724 mp->mnt_syncer = vp; 2725 return (0); 2726} 2727 2728/* 2729 * Do a lazy sync of the filesystem. 2730 */ 2731static int 2732sync_fsync(ap) 2733 struct vop_fsync_args /* { 2734 struct vnode *a_vp; 2735 struct ucred *a_cred; 2736 int a_waitfor; 2737 struct proc *a_p; 2738 } */ *ap; 2739{ 2740 struct vnode *syncvp = ap->a_vp; 2741 struct mount *mp = syncvp->v_mount; 2742 struct proc *p = ap->a_p; 2743 int asyncflag; 2744 2745 /* 2746 * We only need to do something if this is a lazy evaluation. 2747 */ 2748 if (ap->a_waitfor != MNT_LAZY) 2749 return (0); 2750 2751 /* 2752 * Move ourselves to the back of the sync list. 2753 */ 2754 vn_syncer_add_to_worklist(syncvp, syncdelay); 2755 2756 /* 2757 * Walk the list of vnodes pushing all that are dirty and 2758 * not already on the sync list. 2759 */ 2760 simple_lock(&mountlist_slock); 2761 if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) { 2762 simple_unlock(&mountlist_slock); 2763 return (0); 2764 } 2765 asyncflag = mp->mnt_flag & MNT_ASYNC; 2766 mp->mnt_flag &= ~MNT_ASYNC; 2767 vfs_msync(mp, MNT_NOWAIT); 2768 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2769 if (asyncflag) 2770 mp->mnt_flag |= MNT_ASYNC; 2771 vfs_unbusy(mp, p); 2772 return (0); 2773} 2774 2775/* 2776 * The syncer vnode is no referenced. 2777 */ 2778static int 2779sync_inactive(ap) 2780 struct vop_inactive_args /* { 2781 struct vnode *a_vp; 2782 struct proc *a_p; 2783 } */ *ap; 2784{ 2785 2786 vgone(ap->a_vp); 2787 return (0); 2788} 2789 2790/* 2791 * The syncer vnode is no longer needed and is being decommissioned. 2792 */ 2793static int 2794sync_reclaim(ap) 2795 struct vop_reclaim_args /* { 2796 struct vnode *a_vp; 2797 } */ *ap; 2798{ 2799 struct vnode *vp = ap->a_vp; 2800 2801 vp->v_mount->mnt_syncer = NULL; 2802 if (vp->v_flag & VONWORKLST) { 2803 LIST_REMOVE(vp, v_synclist); 2804 vp->v_flag &= ~VONWORKLST; 2805 } 2806 2807 return (0); 2808} 2809 2810/* 2811 * Print out a syncer vnode. 2812 */ 2813static int 2814sync_print(ap) 2815 struct vop_print_args /* { 2816 struct vnode *a_vp; 2817 } */ *ap; 2818{ 2819 struct vnode *vp = ap->a_vp; 2820 2821 printf("syncer vnode"); 2822 if (vp->v_vnlock != NULL) 2823 lockmgr_printinfo(vp->v_vnlock); 2824 printf("\n"); 2825 return (0); 2826}
|