Cross Reference: /freebsd-10.0-release/sys/kern/vfs

Deleted Added

sdiff udiff text old ( 48777 ) new ( 48859 )

full compact

vfs_export.c (48777)	vfs_export.c (48859)
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95	1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
39 * $Id: vfs_subr.c,v 1.207 1999/07/08 06:05:55 mckusick Exp $	39 * $Id: vfs_subr.c,v 1.208 1999/07/12 15:02:50 kris Exp $
40 / 41 42/ 43 * External virtual filesystem routines 44 / 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/conf.h> 50#include <sys/fcntl.h> 51#include <sys/kernel.h> 52#include <sys/proc.h> 53#include <sys/kthread.h> 54#include <sys/malloc.h> 55#include <sys/mount.h> 56#include <sys/socket.h> 57#include <sys/vnode.h> 58#include <sys/stat.h> 59#include <sys/buf.h> 60#include <sys/domain.h> 61#include <sys/dirent.h> 62#include <sys/vmmeter.h> 63 64#include <machine/limits.h> 65 66#include <vm/vm.h> 67#include <vm/vm_param.h> 68#include <vm/vm_prot.h> 69#include <vm/vm_object.h> 70#include <vm/vm_extern.h> 71#include <vm/pmap.h> 72#include <vm/vm_map.h> 73#include <vm/vm_page.h> 74#include <vm/vm_pager.h> 75#include <vm/vnode_pager.h> 76#include <vm/vm_zone.h> 77#include <sys/sysctl.h> 78 79#include <miscfs/specfs/specdev.h> 80 81static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 82 83static void insmntque __P((struct vnode vp, struct mount mp)); 84static void vclean __P((struct vnode vp, int flags, struct proc p)); 85static void vfree __P((struct vnode )); 86static void vgonel __P((struct vnode vp, struct proc p)); 87static unsigned long numvnodes; 88SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 89 90enum vtype iftovt_tab[16] = { 91 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 92 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 93}; 94int vttoif_tab[9] = { 95 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 96 S_IFSOCK, S_IFIFO, S_IFMT, 97}; 98 99static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list / 100struct tobefreelist vnode_tobefree_list; / vnode free list / 101* 102static u_long wantfreevnodes = 25; 103SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 104static u_long freevnodes = 0; 105SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 106 107static int reassignbufcalls; 108SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); 109static int reassignbufloops; 110SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, ""); 111static int reassignbufsortgood; 112SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, ""); 113static int reassignbufsortbad; 114SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, ""); 115static int reassignbufmethod = 1; 116SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, ""); 117 118#ifdef ENABLE_VFS_IOOPT 119int vfs_ioopt = 0; 120SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 121#endif 122 123struct mntlist mountlist; /* mounted filesystem list / 124struct simplelock mountlist_slock; 125struct simplelock mntvnode_slock; 126int nfs_mount_type = -1; 127#ifndef NULL_SIMPLELOCKS 128static struct simplelock mntid_slock; 129static struct simplelock vnode_free_list_slock; 130static struct simplelock spechash_slock; 131#endif 132struct nfs_public nfs_pub; / publicly exported FS / 133static vm_zone_t vnode_zone; 134* 135/* 136 * The workitem queue. 137 / 138#define SYNCER_MAXDELAY 32 139static int syncer_maxdelay = SYNCER_MAXDELAY; / maximum delay time / 140time_t syncdelay = 30; / max time to delay syncing data / 141time_t filedelay = 30; / time to delay syncing files / 142SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); 143time_t dirdelay = 29; / time to delay syncing directories / 144SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); 145time_t metadelay = 28; / time to delay syncing metadata / 146SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); 147static int rushjob; / number of slots to run ASAP / 148static int stat_rush_requests; / number of times I/O speeded up / 149SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); 150* 151static int syncer_delayno = 0; 152static long syncer_mask; 153LIST_HEAD(synclist, vnode); 154static struct synclist syncer_workitem_pending; 155* 156int desiredvnodes; 157SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 158 &desiredvnodes, 0, "Maximum number of vnodes"); 159 160static void vfs_free_addrlist __P((struct netexport nep)); 161static int vfs_free_netcred __P((struct radix_node rn, void w)); 162static int vfs_hang_addrlist __P((struct mount mp, struct netexport nep, 163* struct export_args argp)); 164* 165/* 166 * Initialize the vnode management data structures. 167 / 168void 169vntblinit() 170{ 171* 172 desiredvnodes = maxproc + cnt.v_page_count / 4; 173 simple_lock_init(&mntvnode_slock); 174 simple_lock_init(&mntid_slock); 175 simple_lock_init(&spechash_slock); 176 TAILQ_INIT(&vnode_free_list); 177 TAILQ_INIT(&vnode_tobefree_list); 178 simple_lock_init(&vnode_free_list_slock); 179 CIRCLEQ_INIT(&mountlist); 180 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 181 /* 182 * Initialize the filesystem syncer. 183 / 184* syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 185 &syncer_mask); 186 syncer_maxdelay = syncer_mask + 1; 187} 188 189/* 190 * Mark a mount point as busy. Used to synchronize access and to delay 191 * unmounting. Interlock is not released on failure. 192 / 193int 194vfs_busy(mp, flags, interlkp, p) 195* struct mount mp; 196* int flags; 197 struct simplelock interlkp; 198* struct proc p; 199{ 200* int lkflags; 201 202 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 203 if (flags & LK_NOWAIT) 204 return (ENOENT); 205 mp->mnt_kern_flag \|= MNTK_MWAIT; 206 if (interlkp) { 207 simple_unlock(interlkp); 208 } 209 /* 210 * Since all busy locks are shared except the exclusive 211 * lock granted when unmounting, the only place that a 212 * wakeup needs to be done is at the release of the 213 * exclusive lock at the end of dounmount. 214 / 215* tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 216 if (interlkp) { 217 simple_lock(interlkp); 218 } 219 return (ENOENT); 220 } 221 lkflags = LK_SHARED \| LK_NOPAUSE; 222 if (interlkp) 223 lkflags \|= LK_INTERLOCK; 224 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 225 panic("vfs_busy: unexpected lock failure"); 226 return (0); 227} 228 229/* 230 * Free a busy filesystem. 231 / 232void 233vfs_unbusy(mp, p) 234* struct mount mp; 235* struct proc p; 236{ 237* 238 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 239} 240 241/* 242 * Lookup a filesystem type, and if found allocate and initialize 243 * a mount structure for it. 244 * 245 * Devname is usually updated by mount(8) after booting. 246 / 247int 248vfs_rootmountalloc(fstypename, devname, mpp) 249* char fstypename; 250* char devname; 251* struct mount *mpp; 252{ 253* struct proc p = curproc; / XXX / 254* struct vfsconf vfsp; 255* struct mount mp; 256* 257 if (fstypename == NULL) 258 return (ENODEV); 259 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 260 if (!strcmp(vfsp->vfc_name, fstypename)) 261 break; 262 if (vfsp == NULL) 263 return (ENODEV); 264 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 265 bzero((char )mp, (u_long)sizeof(struct mount)); 266* lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 267 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 268 LIST_INIT(&mp->mnt_vnodelist); 269 mp->mnt_vfc = vfsp; 270 mp->mnt_op = vfsp->vfc_vfsops; 271 mp->mnt_flag = MNT_RDONLY; 272 mp->mnt_vnodecovered = NULLVP; 273 vfsp->vfc_refcount++; 274 mp->mnt_stat.f_type = vfsp->vfc_typenum; 275 mp->mnt_flag \|= vfsp->vfc_flags & MNT_VISFLAGMASK; 276 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 277 mp->mnt_stat.f_mntonname[0] = '/'; 278 mp->mnt_stat.f_mntonname[1] = 0; 279 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 280 mpp = mp; 281* return (0); 282} 283 284/* 285 * Find an appropriate filesystem to use for the root. If a filesystem 286 * has not been preselected, walk through the list of known filesystems 287 * trying those that have mountroot routines, and try them until one 288 * works or we have tried them all. 289 / 290#ifdef notdef / XXX JH / 291int 292lite2_vfs_mountroot() 293{ 294* struct vfsconf vfsp; 295* extern int (lite2_mountroot) __P((void)); 296* int error; 297 298 if (lite2_mountroot != NULL) 299 return ((lite2_mountroot)()); 300* for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 301 if (vfsp->vfc_mountroot == NULL) 302 continue; 303 if ((error = (vfsp->vfc_mountroot)()) == 0) 304* return (0); 305 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 306 } 307 return (ENODEV); 308} 309#endif 310 311/* 312 * Lookup a mount point by filesystem identifier. 313 / 314struct mount 315vfs_getvfs(fsid) 316 fsid_t fsid; 317{ 318* register struct mount mp; 319* 320 simple_lock(&mountlist_slock); 321 for (mp = mountlist.cqh_first; mp != (void )&mountlist; 322* mp = mp->mnt_list.cqe_next) { 323 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 324 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 325 simple_unlock(&mountlist_slock); 326 return (mp); 327 } 328 } 329 simple_unlock(&mountlist_slock); 330 return ((struct mount ) 0); 331} 332* 333/* 334 * Get a new unique fsid 335 / 336void 337vfs_getnewfsid(mp) 338* struct mount mp; 339{ 340* static u_short xxxfs_mntid; 341 342 fsid_t tfsid; 343 int mtype; 344 345 simple_lock(&mntid_slock); 346 mtype = mp->mnt_vfc->vfc_typenum;	40 / 41 42/ 43 * External virtual filesystem routines 44 / 45#include "opt_ddb.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/conf.h> 50#include <sys/fcntl.h> 51#include <sys/kernel.h> 52#include <sys/proc.h> 53#include <sys/kthread.h> 54#include <sys/malloc.h> 55#include <sys/mount.h> 56#include <sys/socket.h> 57#include <sys/vnode.h> 58#include <sys/stat.h> 59#include <sys/buf.h> 60#include <sys/domain.h> 61#include <sys/dirent.h> 62#include <sys/vmmeter.h> 63 64#include <machine/limits.h> 65 66#include <vm/vm.h> 67#include <vm/vm_param.h> 68#include <vm/vm_prot.h> 69#include <vm/vm_object.h> 70#include <vm/vm_extern.h> 71#include <vm/pmap.h> 72#include <vm/vm_map.h> 73#include <vm/vm_page.h> 74#include <vm/vm_pager.h> 75#include <vm/vnode_pager.h> 76#include <vm/vm_zone.h> 77#include <sys/sysctl.h> 78 79#include <miscfs/specfs/specdev.h> 80 81static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 82 83static void insmntque __P((struct vnode vp, struct mount mp)); 84static void vclean __P((struct vnode vp, int flags, struct proc p)); 85static void vfree __P((struct vnode )); 86static void vgonel __P((struct vnode vp, struct proc p)); 87static unsigned long numvnodes; 88SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 89 90enum vtype iftovt_tab[16] = { 91 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 92 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 93}; 94int vttoif_tab[9] = { 95 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 96 S_IFSOCK, S_IFIFO, S_IFMT, 97}; 98 99static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list / 100struct tobefreelist vnode_tobefree_list; / vnode free list / 101* 102static u_long wantfreevnodes = 25; 103SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); 104static u_long freevnodes = 0; 105SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); 106 107static int reassignbufcalls; 108SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); 109static int reassignbufloops; 110SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, ""); 111static int reassignbufsortgood; 112SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, ""); 113static int reassignbufsortbad; 114SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, ""); 115static int reassignbufmethod = 1; 116SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, ""); 117 118#ifdef ENABLE_VFS_IOOPT 119int vfs_ioopt = 0; 120SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); 121#endif 122 123struct mntlist mountlist; /* mounted filesystem list / 124struct simplelock mountlist_slock; 125struct simplelock mntvnode_slock; 126int nfs_mount_type = -1; 127#ifndef NULL_SIMPLELOCKS 128static struct simplelock mntid_slock; 129static struct simplelock vnode_free_list_slock; 130static struct simplelock spechash_slock; 131#endif 132struct nfs_public nfs_pub; / publicly exported FS / 133static vm_zone_t vnode_zone; 134* 135/* 136 * The workitem queue. 137 / 138#define SYNCER_MAXDELAY 32 139static int syncer_maxdelay = SYNCER_MAXDELAY; / maximum delay time / 140time_t syncdelay = 30; / max time to delay syncing data / 141time_t filedelay = 30; / time to delay syncing files / 142SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); 143time_t dirdelay = 29; / time to delay syncing directories / 144SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); 145time_t metadelay = 28; / time to delay syncing metadata / 146SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); 147static int rushjob; / number of slots to run ASAP / 148static int stat_rush_requests; / number of times I/O speeded up / 149SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); 150* 151static int syncer_delayno = 0; 152static long syncer_mask; 153LIST_HEAD(synclist, vnode); 154static struct synclist syncer_workitem_pending; 155* 156int desiredvnodes; 157SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 158 &desiredvnodes, 0, "Maximum number of vnodes"); 159 160static void vfs_free_addrlist __P((struct netexport nep)); 161static int vfs_free_netcred __P((struct radix_node rn, void w)); 162static int vfs_hang_addrlist __P((struct mount mp, struct netexport nep, 163* struct export_args argp)); 164* 165/* 166 * Initialize the vnode management data structures. 167 / 168void 169vntblinit() 170{ 171* 172 desiredvnodes = maxproc + cnt.v_page_count / 4; 173 simple_lock_init(&mntvnode_slock); 174 simple_lock_init(&mntid_slock); 175 simple_lock_init(&spechash_slock); 176 TAILQ_INIT(&vnode_free_list); 177 TAILQ_INIT(&vnode_tobefree_list); 178 simple_lock_init(&vnode_free_list_slock); 179 CIRCLEQ_INIT(&mountlist); 180 vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5); 181 /* 182 * Initialize the filesystem syncer. 183 / 184* syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 185 &syncer_mask); 186 syncer_maxdelay = syncer_mask + 1; 187} 188 189/* 190 * Mark a mount point as busy. Used to synchronize access and to delay 191 * unmounting. Interlock is not released on failure. 192 / 193int 194vfs_busy(mp, flags, interlkp, p) 195* struct mount mp; 196* int flags; 197 struct simplelock interlkp; 198* struct proc p; 199{ 200* int lkflags; 201 202 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 203 if (flags & LK_NOWAIT) 204 return (ENOENT); 205 mp->mnt_kern_flag \|= MNTK_MWAIT; 206 if (interlkp) { 207 simple_unlock(interlkp); 208 } 209 /* 210 * Since all busy locks are shared except the exclusive 211 * lock granted when unmounting, the only place that a 212 * wakeup needs to be done is at the release of the 213 * exclusive lock at the end of dounmount. 214 / 215* tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 216 if (interlkp) { 217 simple_lock(interlkp); 218 } 219 return (ENOENT); 220 } 221 lkflags = LK_SHARED \| LK_NOPAUSE; 222 if (interlkp) 223 lkflags \|= LK_INTERLOCK; 224 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 225 panic("vfs_busy: unexpected lock failure"); 226 return (0); 227} 228 229/* 230 * Free a busy filesystem. 231 / 232void 233vfs_unbusy(mp, p) 234* struct mount mp; 235* struct proc p; 236{ 237* 238 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 239} 240 241/* 242 * Lookup a filesystem type, and if found allocate and initialize 243 * a mount structure for it. 244 * 245 * Devname is usually updated by mount(8) after booting. 246 / 247int 248vfs_rootmountalloc(fstypename, devname, mpp) 249* char fstypename; 250* char devname; 251* struct mount *mpp; 252{ 253* struct proc p = curproc; / XXX / 254* struct vfsconf vfsp; 255* struct mount mp; 256* 257 if (fstypename == NULL) 258 return (ENODEV); 259 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 260 if (!strcmp(vfsp->vfc_name, fstypename)) 261 break; 262 if (vfsp == NULL) 263 return (ENODEV); 264 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 265 bzero((char )mp, (u_long)sizeof(struct mount)); 266* lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); 267 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 268 LIST_INIT(&mp->mnt_vnodelist); 269 mp->mnt_vfc = vfsp; 270 mp->mnt_op = vfsp->vfc_vfsops; 271 mp->mnt_flag = MNT_RDONLY; 272 mp->mnt_vnodecovered = NULLVP; 273 vfsp->vfc_refcount++; 274 mp->mnt_stat.f_type = vfsp->vfc_typenum; 275 mp->mnt_flag \|= vfsp->vfc_flags & MNT_VISFLAGMASK; 276 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 277 mp->mnt_stat.f_mntonname[0] = '/'; 278 mp->mnt_stat.f_mntonname[1] = 0; 279 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 280 mpp = mp; 281* return (0); 282} 283 284/* 285 * Find an appropriate filesystem to use for the root. If a filesystem 286 * has not been preselected, walk through the list of known filesystems 287 * trying those that have mountroot routines, and try them until one 288 * works or we have tried them all. 289 / 290#ifdef notdef / XXX JH / 291int 292lite2_vfs_mountroot() 293{ 294* struct vfsconf vfsp; 295* extern int (lite2_mountroot) __P((void)); 296* int error; 297 298 if (lite2_mountroot != NULL) 299 return ((lite2_mountroot)()); 300* for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 301 if (vfsp->vfc_mountroot == NULL) 302 continue; 303 if ((error = (vfsp->vfc_mountroot)()) == 0) 304* return (0); 305 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 306 } 307 return (ENODEV); 308} 309#endif 310 311/* 312 * Lookup a mount point by filesystem identifier. 313 / 314struct mount 315vfs_getvfs(fsid) 316 fsid_t fsid; 317{ 318* register struct mount mp; 319* 320 simple_lock(&mountlist_slock); 321 for (mp = mountlist.cqh_first; mp != (void )&mountlist; 322* mp = mp->mnt_list.cqe_next) { 323 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 324 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 325 simple_unlock(&mountlist_slock); 326 return (mp); 327 } 328 } 329 simple_unlock(&mountlist_slock); 330 return ((struct mount ) 0); 331} 332* 333/* 334 * Get a new unique fsid 335 / 336void 337vfs_getnewfsid(mp) 338* struct mount mp; 339{ 340* static u_short xxxfs_mntid; 341 342 fsid_t tfsid; 343 int mtype; 344 345 simple_lock(&mntid_slock); 346 mtype = mp->mnt_vfc->vfc_typenum;
347 mp->mnt_stat.f_fsid.val[0] = umakedev(255, mtype);	347 mp->mnt_stat.f_fsid.val[0] = makeudev(255, mtype);
348 mp->mnt_stat.f_fsid.val[1] = mtype; 349 if (xxxfs_mntid == 0) 350 ++xxxfs_mntid;	348 mp->mnt_stat.f_fsid.val[1] = mtype; 349 if (xxxfs_mntid == 0) 350 ++xxxfs_mntid;
351 tfsid.val[0] = umakedev(255, mtype + (xxxfs_mntid << 16));	351 tfsid.val[0] = makeudev(255, mtype + (xxxfs_mntid << 16));
352 tfsid.val[1] = mtype; 353 if (mountlist.cqh_first != (void )&mountlist) { 354* while (vfs_getvfs(&tfsid)) { 355 xxxfs_mntid++;	352 tfsid.val[1] = mtype; 353 if (mountlist.cqh_first != (void )&mountlist) { 354* while (vfs_getvfs(&tfsid)) { 355 xxxfs_mntid++;
356 tfsid.val[0] = umakedev(255,	356 tfsid.val[0] = makeudev(255,
357 mtype + (xxxfs_mntid << 16)); 358 } 359 } 360 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 361 simple_unlock(&mntid_slock); 362} 363 364/* 365 * Set vnode attributes to VNOVAL 366 / 367void 368vattr_null(vap) 369* register struct vattr vap; 370{ 371* 372 vap->va_type = VNON; 373 vap->va_size = VNOVAL; 374 vap->va_bytes = VNOVAL; 375 vap->va_mode = VNOVAL; 376 vap->va_nlink = VNOVAL; 377 vap->va_uid = VNOVAL; 378 vap->va_gid = VNOVAL; 379 vap->va_fsid = VNOVAL; 380 vap->va_fileid = VNOVAL; 381 vap->va_blocksize = VNOVAL; 382 vap->va_rdev = VNOVAL; 383 vap->va_atime.tv_sec = VNOVAL; 384 vap->va_atime.tv_nsec = VNOVAL; 385 vap->va_mtime.tv_sec = VNOVAL; 386 vap->va_mtime.tv_nsec = VNOVAL; 387 vap->va_ctime.tv_sec = VNOVAL; 388 vap->va_ctime.tv_nsec = VNOVAL; 389 vap->va_flags = VNOVAL; 390 vap->va_gen = VNOVAL; 391 vap->va_vaflags = 0; 392} 393 394/* 395 * Routines having to do with the management of the vnode table. 396 / 397extern vop_t dead_vnodeop_p; 398* 399/* 400 * Return the next vnode from the free list. 401 / 402int 403getnewvnode(tag, mp, vops, vpp) 404* enum vtagtype tag; 405 struct mount mp; 406* vop_t *vops; 407* struct vnode *vpp; 408{ 409* int s; 410 struct proc p = curproc; / XXX / 411* struct vnode vp, tvp, nvp; 412* vm_object_t object; 413 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 414 415 /* 416 * We take the least recently used vnode from the freelist 417 * if we can get it and it has no cached pages, and no 418 * namecache entries are relative to it. 419 * Otherwise we allocate a new vnode 420 / 421* 422 s = splbio(); 423 simple_lock(&vnode_free_list_slock); 424 TAILQ_INIT(&vnode_tmp_list); 425 426 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 427 nvp = TAILQ_NEXT(vp, v_freelist); 428 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 429 if (vp->v_flag & VAGE) { 430 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 431 } else { 432 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 433 } 434 vp->v_flag &= ~(VTBFREE\|VAGE); 435 vp->v_flag \|= VFREE; 436 if (vp->v_usecount) 437 panic("tobe free vnode isn't"); 438 freevnodes++; 439 } 440 441 if (wantfreevnodes && freevnodes < wantfreevnodes) { 442 vp = NULL; 443 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 444 /* 445 * XXX: this is only here to be backwards compatible 446 / 447* vp = NULL; 448 } else { 449 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 450 nvp = TAILQ_NEXT(vp, v_freelist); 451 if (!simple_lock_try(&vp->v_interlock)) 452 continue; 453 if (vp->v_usecount) 454 panic("free vnode isn't"); 455 456 object = vp->v_object; 457 if (object && (object->resident_page_count \|\| object->ref_count)) { 458 printf("object inconsistant state: RPC: %d, RC: %d\n", 459 object->resident_page_count, object->ref_count); 460 /* Don't recycle if it's caching some pages / 461* TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 462 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 463 continue; 464 } else if (LIST_FIRST(&vp->v_cache_src)) { 465 /* Don't recycle if active in the namecache / 466* simple_unlock(&vp->v_interlock); 467 continue; 468 } else { 469 break; 470 } 471 } 472 } 473 474 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 475 nvp = TAILQ_NEXT(tvp, v_freelist); 476 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 477 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 478 simple_unlock(&tvp->v_interlock); 479 } 480 481 if (vp) { 482 vp->v_flag \|= VDOOMED; 483 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 484 freevnodes--; 485 simple_unlock(&vnode_free_list_slock); 486 cache_purge(vp); 487 vp->v_lease = NULL; 488 if (vp->v_type != VBAD) { 489 vgonel(vp, p); 490 } else { 491 simple_unlock(&vp->v_interlock); 492 } 493 494#ifdef INVARIANTS 495 { 496 int s; 497 498 if (vp->v_data) 499 panic("cleaned vnode isn't"); 500 s = splbio(); 501 if (vp->v_numoutput) 502 panic("Clean vnode has pending I/O's"); 503 splx(s); 504 } 505#endif 506 vp->v_flag = 0; 507 vp->v_lastr = 0; 508 vp->v_lastw = 0; 509 vp->v_lasta = 0; 510 vp->v_cstart = 0; 511 vp->v_clen = 0; 512 vp->v_socket = 0; 513 vp->v_writecount = 0; /* XXX / 514* vp->v_maxio = 0; 515 } else { 516 simple_unlock(&vnode_free_list_slock); 517 vp = (struct vnode ) zalloc(vnode_zone); 518* bzero((char ) vp, sizeof vp); 519 simple_lock_init(&vp->v_interlock); 520 vp->v_dd = vp; 521 cache_purge(vp); 522 LIST_INIT(&vp->v_cache_src); 523 TAILQ_INIT(&vp->v_cache_dst); 524 numvnodes++; 525 } 526 527 TAILQ_INIT(&vp->v_cleanblkhd); 528 TAILQ_INIT(&vp->v_dirtyblkhd); 529 vp->v_type = VNON; 530 vp->v_tag = tag; 531 vp->v_op = vops; 532 insmntque(vp, mp); 533 vpp = vp; 534* vp->v_usecount = 1; 535 vp->v_data = 0; 536 splx(s); 537 538 vfs_object_create(vp, p, p->p_ucred); 539 return (0); 540} 541 542/* 543 * Move a vnode from one mount queue to another. 544 / 545static void 546insmntque(vp, mp) 547* register struct vnode vp; 548* register struct mount mp; 549{ 550* 551 simple_lock(&mntvnode_slock); 552 /* 553 * Delete from old mount point vnode list, if on one. 554 / 555* if (vp->v_mount != NULL) 556 LIST_REMOVE(vp, v_mntvnodes); 557 /* 558 * Insert into list of vnodes for the new mount point, if available. 559 / 560* if ((vp->v_mount = mp) == NULL) { 561 simple_unlock(&mntvnode_slock); 562 return; 563 } 564 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 565 simple_unlock(&mntvnode_slock); 566} 567 568/* 569 * Update outstanding I/O count and do wakeup if requested. 570 / 571void 572vwakeup(bp) 573* register struct buf bp; 574{ 575* register struct vnode vp; 576* 577 bp->b_flags &= ~B_WRITEINPROG; 578 if ((vp = bp->b_vp)) { 579 vp->v_numoutput--; 580 if (vp->v_numoutput < 0) 581 panic("vwakeup: neg numoutput"); 582 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 583 vp->v_flag &= ~VBWAIT; 584 wakeup((caddr_t) &vp->v_numoutput); 585 } 586 } 587} 588 589/* 590 * Flush out and invalidate all buffers associated with a vnode. 591 * Called with the underlying object locked. 592 / 593int 594vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 595* register struct vnode vp; 596* int flags; 597 struct ucred cred; 598* struct proc p; 599* int slpflag, slptimeo; 600{ 601 register struct buf bp; 602* struct buf nbp, blist; 603 int s, error; 604 vm_object_t object; 605 606 if (flags & V_SAVE) { 607 s = splbio(); 608 while (vp->v_numoutput) { 609 vp->v_flag \|= VBWAIT; 610 error = tsleep((caddr_t)&vp->v_numoutput, 611 slpflag \| (PRIBIO + 1), "vinvlbuf", slptimeo); 612 if (error) { 613 splx(s); 614 return (error); 615 } 616 } 617 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 618 splx(s); 619 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 620 return (error); 621 s = splbio(); 622 if (vp->v_numoutput > 0 \|\| 623 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 624 panic("vinvalbuf: dirty bufs"); 625 } 626 splx(s); 627 } 628 s = splbio(); 629 for (;;) { 630 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 631 if (!blist) 632 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 633 if (!blist) 634 break; 635 636 for (bp = blist; bp; bp = nbp) { 637 nbp = TAILQ_NEXT(bp, b_vnbufs); 638 if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) { 639 error = BUF_TIMELOCK(bp, 640 LK_EXCLUSIVE \| LK_SLEEPFAIL, 641 "vinvalbuf", slpflag, slptimeo); 642 if (error == ENOLCK) 643 break; 644 splx(s); 645 return (error); 646 } 647 /* 648 * XXX Since there are no node locks for NFS, I 649 * believe there is a slight chance that a delayed 650 * write will occur while sleeping just above, so 651 * check for it. Note that vfs_bio_awrite expects 652 * buffers to reside on a queue, while VOP_BWRITE and 653 * brelse do not. 654 / 655* if (((bp->b_flags & (B_DELWRI \| B_INVAL)) == B_DELWRI) && 656 (flags & V_SAVE)) { 657 658 if (bp->b_vp == vp) { 659 if (bp->b_flags & B_CLUSTEROK) { 660 BUF_UNLOCK(bp); 661 vfs_bio_awrite(bp); 662 } else { 663 bremfree(bp); 664 bp->b_flags \|= B_ASYNC; 665 VOP_BWRITE(bp->b_vp, bp); 666 } 667 } else { 668 bremfree(bp); 669 (void) VOP_BWRITE(bp->b_vp, bp); 670 } 671 break; 672 } 673 bremfree(bp); 674 bp->b_flags \|= (B_INVAL \| B_NOCACHE \| B_RELBUF); 675 bp->b_flags &= ~B_ASYNC; 676 brelse(bp); 677 } 678 } 679 680 while (vp->v_numoutput > 0) { 681 vp->v_flag \|= VBWAIT; 682 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 683 } 684 685 splx(s); 686 687 /* 688 * Destroy the copy in the VM cache, too. 689 / 690* simple_lock(&vp->v_interlock); 691 object = vp->v_object; 692 if (object != NULL) { 693 vm_object_page_remove(object, 0, 0, 694 (flags & V_SAVE) ? TRUE : FALSE); 695 } 696 simple_unlock(&vp->v_interlock); 697 698 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) \|\| !TAILQ_EMPTY(&vp->v_cleanblkhd)) 699 panic("vinvalbuf: flush failed"); 700 return (0); 701} 702 703/* 704 * Truncate a file's buffer and pages to a specified length. This 705 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 706 * sync activity. 707 / 708int 709vtruncbuf(vp, cred, p, length, blksize) 710* register struct vnode vp; 711* struct ucred cred; 712* struct proc p; 713* off_t length; 714 int blksize; 715{ 716 register struct buf bp; 717* struct buf nbp; 718* int s, anyfreed; 719 int trunclbn; 720 721 /* 722 * Round up to the next lbn. 723 / 724* trunclbn = (length + blksize - 1) / blksize; 725 726 s = splbio(); 727restart: 728 anyfreed = 1; 729 for (;anyfreed;) { 730 anyfreed = 0; 731 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 732 nbp = TAILQ_NEXT(bp, b_vnbufs); 733 if (bp->b_lblkno >= trunclbn) { 734 if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) { 735 BUF_LOCK(bp, LK_EXCLUSIVE\|LK_SLEEPFAIL); 736 goto restart; 737 } else { 738 bremfree(bp); 739 bp->b_flags \|= (B_INVAL \| B_RELBUF); 740 bp->b_flags &= ~B_ASYNC; 741 brelse(bp); 742 anyfreed = 1; 743 } 744 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)\|\| 745 (nbp->b_vp != vp) \|\| 746 (nbp->b_flags & B_DELWRI))) { 747 goto restart; 748 } 749 } 750 } 751 752 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 753 nbp = TAILQ_NEXT(bp, b_vnbufs); 754 if (bp->b_lblkno >= trunclbn) { 755 if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) { 756 BUF_LOCK(bp, LK_EXCLUSIVE\|LK_SLEEPFAIL); 757 goto restart; 758 } else { 759 bremfree(bp); 760 bp->b_flags \|= (B_INVAL \| B_RELBUF); 761 bp->b_flags &= ~B_ASYNC; 762 brelse(bp); 763 anyfreed = 1; 764 } 765 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)\|\| 766 (nbp->b_vp != vp) \|\| 767 (nbp->b_flags & B_DELWRI) == 0)) { 768 goto restart; 769 } 770 } 771 } 772 } 773 774 if (length > 0) { 775restartsync: 776 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 777 nbp = TAILQ_NEXT(bp, b_vnbufs); 778 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 779 if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) { 780 BUF_LOCK(bp, LK_EXCLUSIVE\|LK_SLEEPFAIL); 781 goto restart; 782 } else { 783 bremfree(bp); 784 if (bp->b_vp == vp) { 785 bp->b_flags \|= B_ASYNC; 786 } else { 787 bp->b_flags &= ~B_ASYNC; 788 } 789 VOP_BWRITE(bp->b_vp, bp); 790 } 791 goto restartsync; 792 } 793 794 } 795 } 796 797 while (vp->v_numoutput > 0) { 798 vp->v_flag \|= VBWAIT; 799 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 800 } 801 802 splx(s); 803 804 vnode_pager_setsize(vp, length); 805 806 return (0); 807} 808 809/* 810 * Associate a buffer with a vnode. 811 / 812void 813bgetvp(vp, bp) 814* register struct vnode vp; 815* register struct buf bp; 816{ 817* int s; 818 819 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 820 821 vhold(vp); 822 bp->b_vp = vp; 823 if (vp->v_type == VBLK \|\| vp->v_type == VCHR) 824 bp->b_dev = vp->v_rdev; 825 else 826 bp->b_dev = NODEV; 827 /* 828 * Insert onto list for new vnode. 829 / 830* s = splbio(); 831 bp->b_xflags \|= B_VNCLEAN; 832 bp->b_xflags &= ~B_VNDIRTY; 833 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 834 splx(s); 835} 836 837/* 838 * Disassociate a buffer from a vnode. 839 / 840void 841brelvp(bp) 842* register struct buf bp; 843{ 844* struct vnode vp; 845* struct buflists listheadp; 846* int s; 847 848 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 849 850 /* 851 * Delete from old vnode list, if on one. 852 / 853* vp = bp->b_vp; 854 s = splbio(); 855 if (bp->b_xflags & (B_VNDIRTY\|B_VNCLEAN)) { 856 if (bp->b_xflags & B_VNDIRTY) 857 listheadp = &vp->v_dirtyblkhd; 858 else 859 listheadp = &vp->v_cleanblkhd; 860 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 861 bp->b_xflags &= ~(B_VNDIRTY\|B_VNCLEAN); 862 } 863 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 864 vp->v_flag &= ~VONWORKLST; 865 LIST_REMOVE(vp, v_synclist); 866 } 867 splx(s); 868 bp->b_vp = (struct vnode ) 0; 869* vdrop(vp); 870} 871 872/* 873 * The workitem queue. 874 * 875 * It is useful to delay writes of file data and filesystem metadata 876 * for tens of seconds so that quickly created and deleted files need 877 * not waste disk bandwidth being created and removed. To realize this, 878 * we append vnodes to a "workitem" queue. When running with a soft 879 * updates implementation, most pending metadata dependencies should 880 * not wait for more than a few seconds. Thus, mounted on block devices 881 * are delayed only about a half the time that file data is delayed. 882 * Similarly, directory updates are more critical, so are only delayed 883 * about a third the time that file data is delayed. Thus, there are 884 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 885 * one each second (driven off the filesystem syncer process). The 886 * syncer_delayno variable indicates the next queue that is to be processed. 887 * Items that need to be processed soon are placed in this queue: 888 * 889 * syncer_workitem_pending[syncer_delayno] 890 * 891 * A delay of fifteen seconds is done by placing the request fifteen 892 * entries later in the queue: 893 * 894 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 895 * 896 / 897* 898/* 899 * Add an item to the syncer work queue. 900 / 901static void 902vn_syncer_add_to_worklist(struct vnode vp, int delay) 903{ 904 int s, slot; 905 906 s = splbio(); 907 908 if (vp->v_flag & VONWORKLST) { 909 LIST_REMOVE(vp, v_synclist); 910 } 911 912 if (delay > syncer_maxdelay - 2) 913 delay = syncer_maxdelay - 2; 914 slot = (syncer_delayno + delay) & syncer_mask; 915 916 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 917 vp->v_flag \|= VONWORKLST; 918 splx(s); 919} 920 921struct proc updateproc; 922static void sched_sync __P((void)); 923static struct kproc_desc up_kp = { 924* "syncer", 925 sched_sync, 926 &updateproc 927}; 928SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 929 930/* 931 * System filesystem synchronizer daemon. 932 / 933void 934sched_sync(void) 935{ 936* struct synclist slp; 937* struct vnode vp; 938* long starttime; 939 int s; 940 struct proc p = updateproc; 941* 942 p->p_flag \|= P_BUFEXHAUST; 943 944 for (;;) { 945 starttime = time_second; 946 947 /* 948 * Push files whose dirty time has expired. Be careful 949 * of interrupt race on slp queue. 950 / 951* s = splbio(); 952 slp = &syncer_workitem_pending[syncer_delayno]; 953 syncer_delayno += 1; 954 if (syncer_delayno == syncer_maxdelay) 955 syncer_delayno = 0; 956 splx(s); 957 958 while ((vp = LIST_FIRST(slp)) != NULL) { 959 if (VOP_ISLOCKED(vp) == 0) { 960 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 961 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 962 VOP_UNLOCK(vp, 0, p); 963 } 964 s = splbio(); 965 if (LIST_FIRST(slp) == vp) { 966 /* 967 * Note: v_tag VT_VFS vps can remain on the 968 * worklist too with no dirty blocks, but 969 * since sync_fsync() moves it to a different 970 * slot we are safe. 971 / 972* if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 973 vp->v_type != VBLK) 974 panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 975 /* 976 * Put us back on the worklist. The worklist 977 * routine will remove us from our current 978 * position and then add us back in at a later 979 * position. 980 / 981* vn_syncer_add_to_worklist(vp, syncdelay); 982 } 983 splx(s); 984 } 985 986 /* 987 * Do soft update processing. 988 / 989* if (bioops.io_sync) 990 (bioops.io_sync)(NULL); 991* 992 /* 993 * The variable rushjob allows the kernel to speed up the 994 * processing of the filesystem syncer process. A rushjob 995 * value of N tells the filesystem syncer to process the next 996 * N seconds worth of work on its queue ASAP. Currently rushjob 997 * is used by the soft update code to speed up the filesystem 998 * syncer process when the incore state is getting so far 999 * ahead of the disk that the kernel memory pool is being 1000 * threatened with exhaustion. 1001 / 1002* if (rushjob > 0) { 1003 rushjob -= 1; 1004 continue; 1005 } 1006 /* 1007 * If it has taken us less than a second to process the 1008 * current work, then wait. Otherwise start right over 1009 * again. We can still lose time if any single round 1010 * takes more than two seconds, but it does not really 1011 * matter as we are just trying to generally pace the 1012 * filesystem activity. 1013 / 1014* if (time_second == starttime) 1015 tsleep(&lbolt, PPAUSE, "syncer", 0); 1016 } 1017} 1018 1019/* 1020 * Request the syncer daemon to speed up its work. 1021 * We never push it to speed up more than half of its 1022 * normal turn time, otherwise it could take over the cpu. 1023 / 1024int 1025speedup_syncer() 1026{ 1027* int s; 1028 1029 s = splhigh(); 1030 if (updateproc->p_wchan == &lbolt) 1031 setrunnable(updateproc); 1032 splx(s); 1033 if (rushjob < syncdelay / 2) { 1034 rushjob += 1; 1035 stat_rush_requests += 1; 1036 return (1); 1037 } 1038 return(0); 1039} 1040 1041/* 1042 * Associate a p-buffer with a vnode. 1043 * 1044 * Also sets B_PAGING flag to indicate that vnode is not fully associated 1045 * with the buffer. i.e. the bp has not been linked into the vnode or 1046 * ref-counted. 1047 / 1048void 1049pbgetvp(vp, bp) 1050* register struct vnode vp; 1051* register struct buf bp; 1052{ 1053* 1054 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 1055 1056 bp->b_vp = vp; 1057 bp->b_flags \|= B_PAGING; 1058 if (vp->v_type == VBLK \|\| vp->v_type == VCHR) 1059 bp->b_dev = vp->v_rdev; 1060 else 1061 bp->b_dev = NODEV; 1062} 1063 1064/* 1065 * Disassociate a p-buffer from a vnode. 1066 / 1067void 1068pbrelvp(bp) 1069* register struct buf bp; 1070{ 1071* 1072 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1073 1074#if !defined(MAX_PERF) 1075 /* XXX REMOVE ME / 1076* if (bp->b_vnbufs.tqe_next != NULL) { 1077 panic( 1078 "relpbuf(): b_vp was probably reassignbuf()d %p %x", 1079 bp, 1080 (int)bp->b_flags 1081 ); 1082 } 1083#endif 1084 bp->b_vp = (struct vnode ) 0; 1085* bp->b_flags &= ~B_PAGING; 1086} 1087 1088void 1089pbreassignbuf(bp, newvp) 1090 struct buf bp; 1091* struct vnode newvp; 1092{ 1093#if !defined(MAX_PERF) 1094* if ((bp->b_flags & B_PAGING) == 0) { 1095 panic( 1096 "pbreassignbuf() on non phys bp %p", 1097 bp 1098 ); 1099 } 1100#endif 1101 bp->b_vp = newvp; 1102} 1103 1104/* 1105 * Reassign a buffer from one vnode to another. 1106 * Used to assign file specific control information 1107 * (indirect blocks) to the vnode to which they belong. 1108 / 1109void 1110reassignbuf(bp, newvp) 1111* register struct buf bp; 1112* register struct vnode newvp; 1113{ 1114* struct buflists listheadp; 1115* int delay; 1116 int s; 1117 1118 if (newvp == NULL) { 1119 printf("reassignbuf: NULL"); 1120 return; 1121 } 1122 ++reassignbufcalls; 1123 1124#if !defined(MAX_PERF) 1125 /* 1126 * B_PAGING flagged buffers cannot be reassigned because their vp 1127 * is not fully linked in. 1128 / 1129* if (bp->b_flags & B_PAGING) 1130 panic("cannot reassign paging buffer"); 1131#endif 1132 1133 s = splbio(); 1134 /* 1135 * Delete from old vnode list, if on one. 1136 / 1137* if (bp->b_xflags & (B_VNDIRTY\|B_VNCLEAN)) { 1138 if (bp->b_xflags & B_VNDIRTY) 1139 listheadp = &bp->b_vp->v_dirtyblkhd; 1140 else 1141 listheadp = &bp->b_vp->v_cleanblkhd; 1142 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1143 bp->b_xflags &= ~(B_VNDIRTY\|B_VNCLEAN); 1144 if (bp->b_vp != newvp) { 1145 vdrop(bp->b_vp); 1146 bp->b_vp = NULL; /* for clarification / 1147* } 1148 } 1149 /* 1150 * If dirty, put on list of dirty buffers; otherwise insert onto list 1151 * of clean buffers. 1152 / 1153* if (bp->b_flags & B_DELWRI) { 1154 struct buf tbp; 1155* 1156 listheadp = &newvp->v_dirtyblkhd; 1157 if ((newvp->v_flag & VONWORKLST) == 0) { 1158 switch (newvp->v_type) { 1159 case VDIR: 1160 delay = dirdelay; 1161 break; 1162 case VBLK: 1163 if (newvp->v_specmountpoint != NULL) { 1164 delay = metadelay; 1165 break; 1166 } 1167 /* fall through / 1168* default: 1169 delay = filedelay; 1170 } 1171 vn_syncer_add_to_worklist(newvp, delay); 1172 } 1173 bp->b_xflags \|= B_VNDIRTY; 1174 tbp = TAILQ_FIRST(listheadp); 1175 if (tbp == NULL \|\| 1176 bp->b_lblkno == 0 \|\| 1177 (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { 1178 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1179 ++reassignbufsortgood; 1180 } else if (bp->b_lblkno < 0) { 1181 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1182 ++reassignbufsortgood; 1183 } else if (reassignbufmethod == 1) { 1184 /* 1185 * New sorting algorithm, only handle sequential case, 1186 * otherwise guess. 1187 / 1188* if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && 1189 (tbp->b_xflags & B_VNDIRTY)) { 1190 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1191 ++reassignbufsortgood; 1192 } else { 1193 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1194 ++reassignbufsortbad; 1195 } 1196 } else { 1197 /* 1198 * Old sorting algorithm, scan queue and insert 1199 / 1200* struct buf ttbp; 1201* while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1202 (ttbp->b_lblkno < bp->b_lblkno)) { 1203 ++reassignbufloops; 1204 tbp = ttbp; 1205 } 1206 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1207 } 1208 } else { 1209 bp->b_xflags \|= B_VNCLEAN; 1210 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1211 if ((newvp->v_flag & VONWORKLST) && 1212 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1213 newvp->v_flag &= ~VONWORKLST; 1214 LIST_REMOVE(newvp, v_synclist); 1215 } 1216 } 1217 if (bp->b_vp != newvp) { 1218 bp->b_vp = newvp; 1219 vhold(bp->b_vp); 1220 } 1221 splx(s); 1222} 1223 1224/* 1225 * Create a vnode for a block device. 1226 * Used for mounting the root file system. 1227 / 1228int 1229bdevvp(dev, vpp) 1230* dev_t dev; 1231 struct vnode *vpp; 1232{ 1233* register struct vnode vp; 1234* struct vnode nvp; 1235* int error; 1236 1237 if (dev == NODEV) { 1238 vpp = NULLVP; 1239* return (ENXIO); 1240 } 1241 error = getnewvnode(VT_NON, (struct mount )0, spec_vnodeop_p, &nvp); 1242* if (error) { 1243 vpp = NULLVP; 1244* return (error); 1245 } 1246 vp = nvp; 1247 vp->v_type = VBLK; 1248 if ((nvp = checkalias(vp, dev2udev(dev), (struct mount )0)) != NULL) { 1249* vput(vp); 1250 vp = nvp; 1251 } 1252 vpp = vp; 1253* return (0); 1254} 1255 1256/* 1257 * Check to see if the new vnode represents a special device 1258 * for which we already have a vnode (either because of 1259 * bdevvp() or because of a different vnode representing 1260 * the same block device). If such an alias exists, deallocate 1261 * the existing contents and return the aliased vnode. The 1262 * caller is responsible for filling it with its new contents. 1263 / 1264struct vnode 1265checkalias(nvp, nvp_rdev, mp) 1266 register struct vnode nvp; 1267* udev_t nvp_rdev; 1268 struct mount mp; 1269{ 1270* struct proc p = curproc; / XXX / 1271* struct vnode vp; 1272* struct vnode *vpp; 1273* dev_t dev; 1274 1275 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1276 return (NULLVP); 1277 1278 dev = udev2dev(nvp_rdev, 2); 1279 1280 vpp = &speclisth[SPECHASH(dev)]; 1281loop: 1282 simple_lock(&spechash_slock); 1283 for (vp = vpp; vp; vp = vp->v_specnext) { 1284* if (dev != vp->v_rdev \|\| nvp->v_type != vp->v_type) 1285 continue; 1286 /* 1287 * Alias, but not in use, so flush it out. 1288 * Only alias active device nodes. 1289 * Not sure why we don't re-use this like we do below. 1290 / 1291* simple_lock(&vp->v_interlock); 1292 if (vp->v_usecount == 0) { 1293 simple_unlock(&spechash_slock); 1294 vgonel(vp, p); 1295 goto loop; 1296 } 1297 if (vget(vp, LK_EXCLUSIVE \| LK_INTERLOCK, p)) { 1298 /* 1299 * It dissappeared, and we may have slept. 1300 * Restart from the beginning 1301 / 1302* simple_unlock(&spechash_slock); 1303 goto loop; 1304 } 1305 break; 1306 } 1307 /* 1308 * It would be a lot clearer what is going on here if 1309 * this had been expressed as: 1310 * if ( vp && (vp->v_tag == VT_NULL)) 1311 * and the clauses had been swapped. 1312 / 1313* if (vp == NULL \|\| vp->v_tag != VT_NON) { 1314 struct specinfo sinfo; 1315* 1316 /* 1317 * Put the new vnode into the hash chain. 1318 * and if there was an alias, connect them. 1319 / 1320* MALLOC(sinfo, struct specinfo , 1321* sizeof(struct specinfo), M_VNODE, M_WAITOK); 1322 bzero(sinfo, sizeof(struct specinfo)); 1323 nvp->v_specinfo = sinfo; 1324 sinfo->si_rdev = dev; 1325 sinfo->si_hashchain = vpp; 1326 sinfo->si_specnext = vpp; 1327* sinfo->si_bsize_phys = DEV_BSIZE; 1328 sinfo->si_bsize_best = BLKDEV_IOSIZE; 1329 sinfo->si_bsize_max = MAXBSIZE; 1330 1331 /* 1332 * Ask the device to fix up specinfo. Typically the 1333 * si_bsize_* parameters may need fixing up. 1334 / 1335* 1336 if (nvp->v_type == VBLK) { 1337 if (bdevsw(dev) && bdevsw(dev)->d_parms) 1338 (bdevsw(dev)->d_parms)(dev, sinfo, DPARM_GET); 1339* } else if (nvp->v_type == VCHR) { 1340 if (devsw(dev) && devsw(dev)->d_parms) 1341 (devsw(dev)->d_parms)(dev, sinfo, DPARM_GET); 1342* } 1343 1344 simple_unlock(&spechash_slock); 1345 vpp = nvp; 1346* if (vp != NULLVP) { 1347 nvp->v_flag \|= VALIASED; 1348 vp->v_flag \|= VALIASED; 1349 vput(vp); 1350 } 1351 return (NULLVP); 1352 } 1353 /* 1354 * if ( vp && (vp->v_tag == VT_NULL)) 1355 * We have a vnode alias, but it is a trashed. 1356 * Make it look like it's newley allocated. (by getnewvnode()) 1357 * The caller should use this instead. 1358 / 1359* simple_unlock(&spechash_slock); 1360 VOP_UNLOCK(vp, 0, p); 1361 simple_lock(&vp->v_interlock); 1362 vclean(vp, 0, p); 1363 vp->v_op = nvp->v_op; 1364 vp->v_tag = nvp->v_tag; 1365 nvp->v_type = VNON; 1366 insmntque(vp, mp); 1367 return (vp); 1368} 1369 1370/* 1371 * Grab a particular vnode from the free list, increment its 1372 * reference count and lock it. The vnode lock bit is set if the 1373 * vnode is being eliminated in vgone. The process is awakened 1374 * when the transition is completed, and an error returned to 1375 * indicate that the vnode is no longer usable (possibly having 1376 * been changed to a new file system type). 1377 / 1378int 1379vget(vp, flags, p) 1380* register struct vnode vp; 1381* int flags; 1382 struct proc p; 1383{ 1384* int error; 1385 1386 /* 1387 * If the vnode is in the process of being cleaned out for 1388 * another use, we wait for the cleaning to finish and then 1389 * return failure. Cleaning is determined by checking that 1390 * the VXLOCK flag is set. 1391 / 1392* if ((flags & LK_INTERLOCK) == 0) { 1393 simple_lock(&vp->v_interlock); 1394 } 1395 if (vp->v_flag & VXLOCK) { 1396 vp->v_flag \|= VXWANT; 1397 simple_unlock(&vp->v_interlock); 1398 tsleep((caddr_t)vp, PINOD, "vget", 0); 1399 return (ENOENT); 1400 } 1401 1402 vp->v_usecount++; 1403 1404 if (VSHOULDBUSY(vp)) 1405 vbusy(vp); 1406 if (flags & LK_TYPE_MASK) { 1407 if ((error = vn_lock(vp, flags \| LK_INTERLOCK, p)) != 0) { 1408 /* 1409 * must expand vrele here because we do not want 1410 * to call VOP_INACTIVE if the reference count 1411 * drops back to zero since it was never really 1412 * active. We must remove it from the free list 1413 * before sleeping so that multiple processes do 1414 * not try to recycle it. 1415 / 1416* simple_lock(&vp->v_interlock); 1417 vp->v_usecount--; 1418 if (VSHOULDFREE(vp)) 1419 vfree(vp); 1420 simple_unlock(&vp->v_interlock); 1421 } 1422 return (error); 1423 } 1424 simple_unlock(&vp->v_interlock); 1425 return (0); 1426} 1427 1428void 1429vref(struct vnode vp) 1430{ 1431* simple_lock(&vp->v_interlock); 1432 vp->v_usecount++; 1433 simple_unlock(&vp->v_interlock); 1434} 1435 1436/* 1437 * Vnode put/release. 1438 * If count drops to zero, call inactive routine and return to freelist. 1439 / 1440void 1441vrele(vp) 1442* struct vnode vp; 1443{ 1444* struct proc p = curproc; / XXX / 1445* 1446 KASSERT(vp != NULL, ("vrele: null vp")); 1447 1448 simple_lock(&vp->v_interlock); 1449 1450 if (vp->v_usecount > 1) { 1451 1452 vp->v_usecount--; 1453 simple_unlock(&vp->v_interlock); 1454 1455 return; 1456 } 1457 1458 if (vp->v_usecount == 1) { 1459 1460 vp->v_usecount--; 1461 if (VSHOULDFREE(vp)) 1462 vfree(vp); 1463 /* 1464 * If we are doing a vput, the node is already locked, and we must 1465 * call VOP_INACTIVE with the node locked. So, in the case of 1466 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1467 / 1468* if (vn_lock(vp, LK_EXCLUSIVE \| LK_INTERLOCK, p) == 0) { 1469 VOP_INACTIVE(vp, p); 1470 } 1471 1472 } else { 1473#ifdef DIAGNOSTIC 1474 vprint("vrele: negative ref count", vp); 1475 simple_unlock(&vp->v_interlock); 1476#endif 1477 panic("vrele: negative ref cnt"); 1478 } 1479} 1480 1481void 1482vput(vp) 1483 struct vnode vp; 1484{ 1485* struct proc p = curproc; / XXX / 1486* 1487 KASSERT(vp != NULL, ("vput: null vp")); 1488 1489 simple_lock(&vp->v_interlock); 1490 1491 if (vp->v_usecount > 1) { 1492 1493 vp->v_usecount--; 1494 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1495 return; 1496 1497 } 1498 1499 if (vp->v_usecount == 1) { 1500 1501 vp->v_usecount--; 1502 if (VSHOULDFREE(vp)) 1503 vfree(vp); 1504 /* 1505 * If we are doing a vput, the node is already locked, and we must 1506 * call VOP_INACTIVE with the node locked. So, in the case of 1507 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1508 / 1509* simple_unlock(&vp->v_interlock); 1510 VOP_INACTIVE(vp, p); 1511 1512 } else { 1513#ifdef DIAGNOSTIC 1514 vprint("vput: negative ref count", vp); 1515#endif 1516 panic("vput: negative ref cnt"); 1517 } 1518} 1519 1520/* 1521 * Somebody doesn't want the vnode recycled. 1522 / 1523void 1524vhold(vp) 1525* register struct vnode vp; 1526{ 1527* int s; 1528 1529 s = splbio(); 1530 vp->v_holdcnt++; 1531 if (VSHOULDBUSY(vp)) 1532 vbusy(vp); 1533 splx(s); 1534} 1535 1536/* 1537 * One less who cares about this vnode. 1538 / 1539void 1540vdrop(vp) 1541* register struct vnode vp; 1542{ 1543* int s; 1544 1545 s = splbio(); 1546 if (vp->v_holdcnt <= 0) 1547 panic("vdrop: holdcnt"); 1548 vp->v_holdcnt--; 1549 if (VSHOULDFREE(vp)) 1550 vfree(vp); 1551 splx(s); 1552} 1553 1554/* 1555 * Remove any vnodes in the vnode table belonging to mount point mp. 1556 * 1557 * If MNT_NOFORCE is specified, there should not be any active ones, 1558 * return error if any are found (nb: this is a user error, not a 1559 * system error). If MNT_FORCE is specified, detach any active vnodes 1560 * that are found. 1561 / 1562#ifdef DIAGNOSTIC 1563static int busyprt = 0; / print out busy vnodes / 1564SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1565#endif 1566* 1567int 1568vflush(mp, skipvp, flags) 1569 struct mount mp; 1570* struct vnode skipvp; 1571* int flags; 1572{ 1573 struct proc p = curproc; / XXX / 1574* struct vnode vp, nvp; 1575 int busy = 0; 1576 1577 simple_lock(&mntvnode_slock); 1578loop: 1579 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1580 /* 1581 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1582 * Start over if it has (it won't be on the list anymore). 1583 / 1584* if (vp->v_mount != mp) 1585 goto loop; 1586 nvp = vp->v_mntvnodes.le_next; 1587 /* 1588 * Skip over a selected vnode. 1589 / 1590* if (vp == skipvp) 1591 continue; 1592 1593 simple_lock(&vp->v_interlock); 1594 /* 1595 * Skip over a vnodes marked VSYSTEM. 1596 / 1597* if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1598 simple_unlock(&vp->v_interlock); 1599 continue; 1600 } 1601 /* 1602 * If WRITECLOSE is set, only flush out regular file vnodes 1603 * open for writing. 1604 / 1605* if ((flags & WRITECLOSE) && 1606 (vp->v_writecount == 0 \|\| vp->v_type != VREG)) { 1607 simple_unlock(&vp->v_interlock); 1608 continue; 1609 } 1610 1611 /* 1612 * With v_usecount == 0, all we need to do is clear out the 1613 * vnode data structures and we are done. 1614 / 1615* if (vp->v_usecount == 0) { 1616 simple_unlock(&mntvnode_slock); 1617 vgonel(vp, p); 1618 simple_lock(&mntvnode_slock); 1619 continue; 1620 } 1621 1622 /* 1623 * If FORCECLOSE is set, forcibly close the vnode. For block 1624 * or character devices, revert to an anonymous device. For 1625 * all other files, just kill them. 1626 / 1627* if (flags & FORCECLOSE) { 1628 simple_unlock(&mntvnode_slock); 1629 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1630 vgonel(vp, p); 1631 } else { 1632 vclean(vp, 0, p); 1633 vp->v_op = spec_vnodeop_p; 1634 insmntque(vp, (struct mount ) 0); 1635* } 1636 simple_lock(&mntvnode_slock); 1637 continue; 1638 } 1639#ifdef DIAGNOSTIC 1640 if (busyprt) 1641 vprint("vflush: busy vnode", vp); 1642#endif 1643 simple_unlock(&vp->v_interlock); 1644 busy++; 1645 } 1646 simple_unlock(&mntvnode_slock); 1647 if (busy) 1648 return (EBUSY); 1649 return (0); 1650} 1651 1652/* 1653 * Disassociate the underlying file system from a vnode. 1654 / 1655static void 1656vclean(vp, flags, p) 1657* struct vnode vp; 1658* int flags; 1659 struct proc p; 1660{ 1661* int active; 1662 vm_object_t obj; 1663 1664 /* 1665 * Check to see if the vnode is in use. If so we have to reference it 1666 * before we clean it out so that its count cannot fall to zero and 1667 * generate a race against ourselves to recycle it. 1668 / 1669* if ((active = vp->v_usecount)) 1670 vp->v_usecount++; 1671 1672 /* 1673 * Prevent the vnode from being recycled or brought into use while we 1674 * clean it out. 1675 / 1676* if (vp->v_flag & VXLOCK) 1677 panic("vclean: deadlock"); 1678 vp->v_flag \|= VXLOCK; 1679 /* 1680 * Even if the count is zero, the VOP_INACTIVE routine may still 1681 * have the object locked while it cleans it out. The VOP_LOCK 1682 * ensures that the VOP_INACTIVE routine is done with its work. 1683 * For active vnodes, it ensures that no other activity can 1684 * occur while the underlying object is being cleaned out. 1685 / 1686* VOP_LOCK(vp, LK_DRAIN \| LK_INTERLOCK, p); 1687 1688 /* 1689 * Clean out any buffers associated with the vnode. 1690 / 1691* vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1692 if ((obj = vp->v_object) != NULL) { 1693 if (obj->ref_count == 0) { 1694 /* 1695 * This is a normal way of shutting down the object/vnode 1696 * association. 1697 / 1698* vm_object_terminate(obj); 1699 } else { 1700 /* 1701 * Woe to the process that tries to page now :-). 1702 / 1703* vm_pager_deallocate(obj); 1704 } 1705 } 1706 1707 /* 1708 * If purging an active vnode, it must be closed and 1709 * deactivated before being reclaimed. Note that the 1710 * VOP_INACTIVE will unlock the vnode. 1711 / 1712* if (active) { 1713 if (flags & DOCLOSE) 1714 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1715 VOP_INACTIVE(vp, p); 1716 } else { 1717 /* 1718 * Any other processes trying to obtain this lock must first 1719 * wait for VXLOCK to clear, then call the new lock operation. 1720 / 1721* VOP_UNLOCK(vp, 0, p); 1722 } 1723 /* 1724 * Reclaim the vnode. 1725 / 1726* if (VOP_RECLAIM(vp, p)) 1727 panic("vclean: cannot reclaim"); 1728 1729 if (active) 1730 vrele(vp); 1731 1732 cache_purge(vp); 1733 if (vp->v_vnlock) { 1734#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? / 1735#ifdef DIAGNOSTIC 1736* if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1737 vprint("vclean: lock not drained", vp); 1738#endif 1739#endif 1740 FREE(vp->v_vnlock, M_VNODE); 1741 vp->v_vnlock = NULL; 1742 } 1743 1744 if (VSHOULDFREE(vp)) 1745 vfree(vp); 1746 1747 /* 1748 * Done with purge, notify sleepers of the grim news. 1749 / 1750* vp->v_op = dead_vnodeop_p; 1751 vn_pollgone(vp); 1752 vp->v_tag = VT_NON; 1753 vp->v_flag &= ~VXLOCK; 1754 if (vp->v_flag & VXWANT) { 1755 vp->v_flag &= ~VXWANT; 1756 wakeup((caddr_t) vp); 1757 } 1758} 1759 1760/* 1761 * Eliminate all activity associated with the requested vnode 1762 * and with all vnodes aliased to the requested vnode. 1763 / 1764int 1765vop_revoke(ap) 1766* struct vop_revoke_args /* { 1767 struct vnode a_vp; 1768* int a_flags; 1769 } / ap; 1770{ 1771 struct vnode vp, vq; 1772 struct proc p = curproc; / XXX / 1773* 1774 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1775 1776 vp = ap->a_vp; 1777 simple_lock(&vp->v_interlock); 1778 1779 if (vp->v_flag & VALIASED) { 1780 /* 1781 * If a vgone (or vclean) is already in progress, 1782 * wait until it is done and return. 1783 / 1784* if (vp->v_flag & VXLOCK) { 1785 vp->v_flag \|= VXWANT; 1786 simple_unlock(&vp->v_interlock); 1787 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1788 return (0); 1789 } 1790 /* 1791 * Ensure that vp will not be vgone'd while we 1792 * are eliminating its aliases. 1793 / 1794* vp->v_flag \|= VXLOCK; 1795 simple_unlock(&vp->v_interlock); 1796 while (vp->v_flag & VALIASED) { 1797 simple_lock(&spechash_slock); 1798 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1799* if (vq->v_rdev != vp->v_rdev \|\| 1800 vq->v_type != vp->v_type \|\| vp == vq) 1801 continue; 1802 simple_unlock(&spechash_slock); 1803 vgone(vq); 1804 break; 1805 } 1806 if (vq == NULLVP) { 1807 simple_unlock(&spechash_slock); 1808 } 1809 } 1810 /* 1811 * Remove the lock so that vgone below will 1812 * really eliminate the vnode after which time 1813 * vgone will awaken any sleepers. 1814 / 1815* simple_lock(&vp->v_interlock); 1816 vp->v_flag &= ~VXLOCK; 1817 if (vp->v_flag & VXWANT) { 1818 vp->v_flag &= ~VXWANT; 1819 wakeup(vp); 1820 } 1821 } 1822 vgonel(vp, p); 1823 return (0); 1824} 1825 1826/* 1827 * Recycle an unused vnode to the front of the free list. 1828 * Release the passed interlock if the vnode will be recycled. 1829 / 1830int 1831vrecycle(vp, inter_lkp, p) 1832* struct vnode vp; 1833* struct simplelock inter_lkp; 1834* struct proc p; 1835{ 1836* 1837 simple_lock(&vp->v_interlock); 1838 if (vp->v_usecount == 0) { 1839 if (inter_lkp) { 1840 simple_unlock(inter_lkp); 1841 } 1842 vgonel(vp, p); 1843 return (1); 1844 } 1845 simple_unlock(&vp->v_interlock); 1846 return (0); 1847} 1848 1849/* 1850 * Eliminate all activity associated with a vnode 1851 * in preparation for reuse. 1852 / 1853void 1854vgone(vp) 1855* register struct vnode vp; 1856{ 1857* struct proc p = curproc; / XXX / 1858* 1859 simple_lock(&vp->v_interlock); 1860 vgonel(vp, p); 1861} 1862 1863/* 1864 * vgone, with the vp interlock held. 1865 / 1866static void 1867vgonel(vp, p) 1868* struct vnode vp; 1869* struct proc p; 1870{ 1871* int s; 1872 struct vnode vq; 1873* struct vnode vx; 1874* 1875 /* 1876 * If a vgone (or vclean) is already in progress, 1877 * wait until it is done and return. 1878 / 1879* if (vp->v_flag & VXLOCK) { 1880 vp->v_flag \|= VXWANT; 1881 simple_unlock(&vp->v_interlock); 1882 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1883 return; 1884 } 1885 1886 /* 1887 * Clean out the filesystem specific data. 1888 / 1889* vclean(vp, DOCLOSE, p); 1890 simple_lock(&vp->v_interlock); 1891 1892 /* 1893 * Delete from old mount point vnode list, if on one. 1894 / 1895* if (vp->v_mount != NULL) 1896 insmntque(vp, (struct mount )0); 1897* /* 1898 * If special device, remove it from special device alias list 1899 * if it is on one. 1900 / 1901* if ((vp->v_type == VBLK \|\| vp->v_type == VCHR) && vp->v_specinfo != 0) { 1902 simple_lock(&spechash_slock); 1903 if (vp->v_hashchain == vp) { 1904* vp->v_hashchain = vp->v_specnext; 1905* } else { 1906 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1907* if (vq->v_specnext != vp) 1908 continue; 1909 vq->v_specnext = vp->v_specnext; 1910 break; 1911 } 1912 if (vq == NULL) 1913 panic("missing bdev"); 1914 } 1915 if (vp->v_flag & VALIASED) { 1916 vx = NULL; 1917 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1918* if (vq->v_rdev != vp->v_rdev \|\| 1919 vq->v_type != vp->v_type) 1920 continue; 1921 if (vx) 1922 break; 1923 vx = vq; 1924 } 1925 if (vx == NULL) 1926 panic("missing alias"); 1927 if (vq == NULL) 1928 vx->v_flag &= ~VALIASED; 1929 vp->v_flag &= ~VALIASED; 1930 } 1931 simple_unlock(&spechash_slock); 1932 FREE(vp->v_specinfo, M_VNODE); 1933 vp->v_specinfo = NULL; 1934 } 1935 1936 /* 1937 * If it is on the freelist and not already at the head, 1938 * move it to the head of the list. The test of the back 1939 * pointer and the reference count of zero is because 1940 * it will be removed from the free list by getnewvnode, 1941 * but will not have its reference count incremented until 1942 * after calling vgone. If the reference count were 1943 * incremented first, vgone would (incorrectly) try to 1944 * close the previous instance of the underlying object. 1945 / 1946* if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1947 s = splbio(); 1948 simple_lock(&vnode_free_list_slock); 1949 if (vp->v_flag & VFREE) { 1950 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1951 } else if (vp->v_flag & VTBFREE) { 1952 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1953 vp->v_flag &= ~VTBFREE; 1954 freevnodes++; 1955 } else 1956 freevnodes++; 1957 vp->v_flag \|= VFREE; 1958 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1959 simple_unlock(&vnode_free_list_slock); 1960 splx(s); 1961 } 1962 1963 vp->v_type = VBAD; 1964 simple_unlock(&vp->v_interlock); 1965} 1966 1967/* 1968 * Lookup a vnode by device number. 1969 / 1970int 1971vfinddev(dev, type, vpp) 1972* dev_t dev; 1973 enum vtype type; 1974 struct vnode *vpp; 1975{ 1976* register struct vnode vp; 1977* int rc = 0; 1978 1979 simple_lock(&spechash_slock); 1980 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1981 if (dev != vp->v_rdev \|\| type != vp->v_type) 1982 continue; 1983 vpp = vp; 1984* rc = 1; 1985 break; 1986 } 1987 simple_unlock(&spechash_slock); 1988 return (rc); 1989} 1990 1991/* 1992 * Calculate the total number of references to a special device. 1993 / 1994int 1995vcount(vp) 1996* register struct vnode vp; 1997{ 1998* struct vnode vq, vnext; 1999 int count; 2000 2001loop: 2002 if ((vp->v_flag & VALIASED) == 0) 2003 return (vp->v_usecount); 2004 simple_lock(&spechash_slock); 2005 for (count = 0, vq = vp->v_hashchain; vq; vq = vnext) { 2006* vnext = vq->v_specnext; 2007 if (vq->v_rdev != vp->v_rdev \|\| vq->v_type != vp->v_type) 2008 continue; 2009 /* 2010 * Alias, but not in use, so flush it out. 2011 / 2012* if (vq->v_usecount == 0 && vq != vp) { 2013 simple_unlock(&spechash_slock); 2014 vgone(vq); 2015 goto loop; 2016 } 2017 count += vq->v_usecount; 2018 } 2019 simple_unlock(&spechash_slock); 2020 return (count); 2021} 2022/* 2023 * Print out a description of a vnode. 2024 / 2025static char typename[] = 2026{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 2027 2028void 2029vprint(label, vp) 2030 char label; 2031* register struct vnode vp; 2032{ 2033* char buf[96]; 2034 2035 if (label != NULL) 2036 printf("%s: %p: ", label, (void )vp); 2037* else 2038 printf("%p: ", (void )vp); 2039* printf("type %s, usecount %d, writecount %d, refcount %d,", 2040 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 2041 vp->v_holdcnt); 2042 buf[0] = '\0'; 2043 if (vp->v_flag & VROOT) 2044 strcat(buf, "\|VROOT"); 2045 if (vp->v_flag & VTEXT) 2046 strcat(buf, "\|VTEXT"); 2047 if (vp->v_flag & VSYSTEM) 2048 strcat(buf, "\|VSYSTEM"); 2049 if (vp->v_flag & VXLOCK) 2050 strcat(buf, "\|VXLOCK"); 2051 if (vp->v_flag & VXWANT) 2052 strcat(buf, "\|VXWANT"); 2053 if (vp->v_flag & VBWAIT) 2054 strcat(buf, "\|VBWAIT"); 2055 if (vp->v_flag & VALIASED) 2056 strcat(buf, "\|VALIASED"); 2057 if (vp->v_flag & VDOOMED) 2058 strcat(buf, "\|VDOOMED"); 2059 if (vp->v_flag & VFREE) 2060 strcat(buf, "\|VFREE"); 2061 if (vp->v_flag & VOBJBUF) 2062 strcat(buf, "\|VOBJBUF"); 2063 if (buf[0] != '\0') 2064 printf(" flags (%s)", &buf[1]); 2065 if (vp->v_data == NULL) { 2066 printf("\n"); 2067 } else { 2068 printf("\n\t"); 2069 VOP_PRINT(vp); 2070 } 2071} 2072 2073#ifdef DDB 2074#include <ddb/ddb.h> 2075/* 2076 * List all of the locked vnodes in the system. 2077 * Called when debugging the kernel. 2078 / 2079DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 2080{ 2081* struct proc p = curproc; / XXX / 2082* struct mount mp, nmp; 2083 struct vnode vp; 2084* 2085 printf("Locked vnodes\n"); 2086 simple_lock(&mountlist_slock); 2087 for (mp = mountlist.cqh_first; mp != (void )&mountlist; mp = nmp) { 2088* if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2089 nmp = mp->mnt_list.cqe_next; 2090 continue; 2091 } 2092 for (vp = mp->mnt_vnodelist.lh_first; 2093 vp != NULL; 2094 vp = vp->v_mntvnodes.le_next) { 2095 if (VOP_ISLOCKED(vp)) 2096 vprint((char )0, vp); 2097* } 2098 simple_lock(&mountlist_slock); 2099 nmp = mp->mnt_list.cqe_next; 2100 vfs_unbusy(mp, p); 2101 } 2102 simple_unlock(&mountlist_slock); 2103} 2104#endif 2105 2106/* 2107 * Top level filesystem related information gathering. 2108 / 2109static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 2110* 2111static int 2112vfs_sysctl SYSCTL_HANDLER_ARGS 2113{ 2114 int name = (int )arg1 - 1; /* XXX / 2115* u_int namelen = arg2 + 1; /* XXX / 2116* struct vfsconf vfsp; 2117* 2118#if 1 \|\| defined(COMPAT_PRELITE2) 2119 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. / 2120* if (namelen == 1) 2121 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2122#endif 2123 2124#ifdef notyet 2125 /* all sysctl names at this level are at least name and field / 2126* if (namelen < 2) 2127 return (ENOTDIR); /* overloaded / 2128* if (name[0] != VFS_GENERIC) { 2129 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2130 if (vfsp->vfc_typenum == name[0]) 2131 break; 2132 if (vfsp == NULL) 2133 return (EOPNOTSUPP); 2134 return ((vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2135* oldp, oldlenp, newp, newlen, p)); 2136 } 2137#endif 2138 switch (name[1]) { 2139 case VFS_MAXTYPENUM: 2140 if (namelen != 2) 2141 return (ENOTDIR); 2142 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2143 case VFS_CONF: 2144 if (namelen != 3) 2145 return (ENOTDIR); /* overloaded / 2146* for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2147 if (vfsp->vfc_typenum == name[2]) 2148 break; 2149 if (vfsp == NULL) 2150 return (EOPNOTSUPP); 2151 return (SYSCTL_OUT(req, vfsp, sizeof vfsp)); 2152* } 2153 return (EOPNOTSUPP); 2154} 2155 2156SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2157 "Generic filesystem"); 2158 2159#if 1 \|\| defined(COMPAT_PRELITE2) 2160 2161static int 2162sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2163{ 2164 int error; 2165 struct vfsconf vfsp; 2166* struct ovfsconf ovfs; 2167 2168 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2169 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag / 2170* strcpy(ovfs.vfc_name, vfsp->vfc_name); 2171 ovfs.vfc_index = vfsp->vfc_typenum; 2172 ovfs.vfc_refcount = vfsp->vfc_refcount; 2173 ovfs.vfc_flags = vfsp->vfc_flags; 2174 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2175 if (error) 2176 return error; 2177 } 2178 return 0; 2179} 2180 2181#endif /* 1 \|\| COMPAT_PRELITE2 / 2182* 2183#if 0 2184#define KINFO_VNODESLOP 10 2185/* 2186 * Dump vnode list (via sysctl). 2187 * Copyout address of vnode followed by vnode. 2188 / 2189/ ARGSUSED / 2190static int 2191sysctl_vnode SYSCTL_HANDLER_ARGS 2192{ 2193* struct proc p = curproc; / XXX / 2194* struct mount mp, nmp; 2195 struct vnode nvp, vp; 2196 int error; 2197 2198#define VPTRSZ sizeof (struct vnode ) 2199#define VNODESZ sizeof (struct vnode) 2200* 2201 req->lock = 0; 2202 if (!req->oldptr) /* Make an estimate / 2203* return (SYSCTL_OUT(req, 0, 2204 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2205 2206 simple_lock(&mountlist_slock); 2207 for (mp = mountlist.cqh_first; mp != (void )&mountlist; mp = nmp) { 2208* if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2209 nmp = mp->mnt_list.cqe_next; 2210 continue; 2211 } 2212again: 2213 simple_lock(&mntvnode_slock); 2214 for (vp = mp->mnt_vnodelist.lh_first; 2215 vp != NULL; 2216 vp = nvp) { 2217 /* 2218 * Check that the vp is still associated with 2219 * this filesystem. RACE: could have been 2220 * recycled onto the same filesystem. 2221 / 2222* if (vp->v_mount != mp) { 2223 simple_unlock(&mntvnode_slock); 2224 goto again; 2225 } 2226 nvp = vp->v_mntvnodes.le_next; 2227 simple_unlock(&mntvnode_slock); 2228 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) \|\| 2229 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2230 return (error); 2231 simple_lock(&mntvnode_slock); 2232 } 2233 simple_unlock(&mntvnode_slock); 2234 simple_lock(&mountlist_slock); 2235 nmp = mp->mnt_list.cqe_next; 2236 vfs_unbusy(mp, p); 2237 } 2238 simple_unlock(&mountlist_slock); 2239 2240 return (0); 2241} 2242#endif 2243 2244/* 2245 * XXX 2246 * Exporting the vnode list on large systems causes them to crash. 2247 * Exporting the vnode list on medium systems causes sysctl to coredump. 2248 / 2249#if 0 2250SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE\|CTLFLAG_RD, 2251* 0, 0, sysctl_vnode, "S,vnode", ""); 2252#endif 2253 2254/* 2255 * Check to see if a filesystem is mounted on a block device. 2256 / 2257int 2258vfs_mountedon(vp) 2259* struct vnode vp; 2260{ 2261* struct vnode vq; 2262* int error = 0; 2263 2264 if (vp->v_specmountpoint != NULL) 2265 return (EBUSY); 2266 if (vp->v_flag & VALIASED) { 2267 simple_lock(&spechash_slock); 2268 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 2269* if (vq->v_rdev != vp->v_rdev \|\| 2270 vq->v_type != vp->v_type) 2271 continue; 2272 if (vq->v_specmountpoint != NULL) { 2273 error = EBUSY; 2274 break; 2275 } 2276 } 2277 simple_unlock(&spechash_slock); 2278 } 2279 return (error); 2280} 2281 2282/* 2283 * Unmount all filesystems. The list is traversed in reverse order 2284 * of mounting to avoid dependencies. 2285 / 2286void 2287vfs_unmountall() 2288{ 2289* struct mount mp, nmp; 2290 struct proc p; 2291* int error; 2292 2293 if (curproc != NULL) 2294 p = curproc; 2295 else 2296 p = initproc; /* XXX XXX should this be proc0? / 2297* /* 2298 * Since this only runs when rebooting, it is not interlocked. 2299 / 2300* for (mp = mountlist.cqh_last; mp != (void )&mountlist; mp = nmp) { 2301* nmp = mp->mnt_list.cqe_prev; 2302 error = dounmount(mp, MNT_FORCE, p); 2303 if (error) { 2304 printf("unmount of %s failed (", 2305 mp->mnt_stat.f_mntonname); 2306 if (error == EBUSY) 2307 printf("BUSY)\n"); 2308 else 2309 printf("%d)\n", error); 2310 } 2311 } 2312} 2313 2314/* 2315 * Build hash lists of net addresses and hang them off the mount point. 2316 * Called by ufs_mount() to set up the lists of export addresses. 2317 / 2318static int 2319vfs_hang_addrlist(mp, nep, argp) 2320* struct mount mp; 2321* struct netexport nep; 2322* struct export_args argp; 2323{ 2324* register struct netcred np; 2325* register struct radix_node_head rnh; 2326* register int i; 2327 struct radix_node rn; 2328* struct sockaddr saddr, smask = 0; 2329 struct domain dom; 2330* int error; 2331 2332 if (argp->ex_addrlen == 0) { 2333 if (mp->mnt_flag & MNT_DEFEXPORTED) 2334 return (EPERM); 2335 np = &nep->ne_defexported; 2336 np->netc_exflags = argp->ex_flags; 2337 np->netc_anon = argp->ex_anon; 2338 np->netc_anon.cr_ref = 1; 2339 mp->mnt_flag \|= MNT_DEFEXPORTED; 2340 return (0); 2341 } 2342 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2343 np = (struct netcred ) malloc(i, M_NETADDR, M_WAITOK); 2344* bzero((caddr_t) np, i); 2345 saddr = (struct sockaddr ) (np + 1); 2346* if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2347 goto out; 2348 if (saddr->sa_len > argp->ex_addrlen) 2349 saddr->sa_len = argp->ex_addrlen; 2350 if (argp->ex_masklen) { 2351 smask = (struct sockaddr ) ((caddr_t) saddr + argp->ex_addrlen); 2352* error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2353 if (error) 2354 goto out; 2355 if (smask->sa_len > argp->ex_masklen) 2356 smask->sa_len = argp->ex_masklen; 2357 } 2358 i = saddr->sa_family; 2359 if ((rnh = nep->ne_rtable[i]) == 0) { 2360 /* 2361 * Seems silly to initialize every AF when most are not used, 2362 * do so on demand here 2363 / 2364* for (dom = domains; dom; dom = dom->dom_next) 2365 if (dom->dom_family == i && dom->dom_rtattach) { 2366 dom->dom_rtattach((void *) &nep->ne_rtable[i], 2367* dom->dom_rtoffset); 2368 break; 2369 } 2370 if ((rnh = nep->ne_rtable[i]) == 0) { 2371 error = ENOBUFS; 2372 goto out; 2373 } 2374 } 2375 rn = (rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2376* np->netc_rnodes); 2377 if (rn == 0 \|\| np != (struct netcred ) rn) { / already exists / 2378* error = EPERM; 2379 goto out; 2380 } 2381 np->netc_exflags = argp->ex_flags; 2382 np->netc_anon = argp->ex_anon; 2383 np->netc_anon.cr_ref = 1; 2384 return (0); 2385out: 2386 free(np, M_NETADDR); 2387 return (error); 2388} 2389 2390/* ARGSUSED / 2391static int 2392vfs_free_netcred(rn, w) 2393* struct radix_node rn; 2394* void w; 2395{ 2396* register struct radix_node_head rnh = (struct radix_node_head ) w; 2397 2398 (rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2399* free((caddr_t) rn, M_NETADDR); 2400 return (0); 2401} 2402 2403/* 2404 * Free the net address hash lists that are hanging off the mount points. 2405 / 2406static void 2407vfs_free_addrlist(nep) 2408* struct netexport nep; 2409{ 2410* register int i; 2411 register struct radix_node_head rnh; 2412* 2413 for (i = 0; i <= AF_MAX; i++) 2414 if ((rnh = nep->ne_rtable[i])) { 2415 (rnh->rnh_walktree) (rnh, vfs_free_netcred, 2416* (caddr_t) rnh); 2417 free((caddr_t) rnh, M_RTABLE); 2418 nep->ne_rtable[i] = 0; 2419 } 2420} 2421 2422int 2423vfs_export(mp, nep, argp) 2424 struct mount mp; 2425* struct netexport nep; 2426* struct export_args argp; 2427{ 2428* int error; 2429 2430 if (argp->ex_flags & MNT_DELEXPORT) { 2431 if (mp->mnt_flag & MNT_EXPUBLIC) { 2432 vfs_setpublicfs(NULL, NULL, NULL); 2433 mp->mnt_flag &= ~MNT_EXPUBLIC; 2434 } 2435 vfs_free_addrlist(nep); 2436 mp->mnt_flag &= ~(MNT_EXPORTED \| MNT_DEFEXPORTED); 2437 } 2438 if (argp->ex_flags & MNT_EXPORTED) { 2439 if (argp->ex_flags & MNT_EXPUBLIC) { 2440 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2441 return (error); 2442 mp->mnt_flag \|= MNT_EXPUBLIC; 2443 } 2444 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2445 return (error); 2446 mp->mnt_flag \|= MNT_EXPORTED; 2447 } 2448 return (0); 2449} 2450 2451 2452/* 2453 * Set the publicly exported filesystem (WebNFS). Currently, only 2454 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2455 / 2456int 2457vfs_setpublicfs(mp, nep, argp) 2458* struct mount mp; 2459* struct netexport nep; 2460* struct export_args argp; 2461{ 2462* int error; 2463 struct vnode rvp; 2464* char cp; 2465* 2466 /* 2467 * mp == NULL -> invalidate the current info, the FS is 2468 * no longer exported. May be called from either vfs_export 2469 * or unmount, so check if it hasn't already been done. 2470 / 2471* if (mp == NULL) { 2472 if (nfs_pub.np_valid) { 2473 nfs_pub.np_valid = 0; 2474 if (nfs_pub.np_index != NULL) { 2475 FREE(nfs_pub.np_index, M_TEMP); 2476 nfs_pub.np_index = NULL; 2477 } 2478 } 2479 return (0); 2480 } 2481 2482 /* 2483 * Only one allowed at a time. 2484 / 2485* if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2486 return (EBUSY); 2487 2488 /* 2489 * Get real filehandle for root of exported FS. 2490 / 2491* bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2492 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2493 2494 if ((error = VFS_ROOT(mp, &rvp))) 2495 return (error); 2496 2497 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2498 return (error); 2499 2500 vput(rvp); 2501 2502 /* 2503 * If an indexfile was specified, pull it in. 2504 / 2505* if (argp->ex_indexfile != NULL) { 2506 MALLOC(nfs_pub.np_index, char , MAXNAMLEN + 1, M_TEMP, 2507* M_WAITOK); 2508 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2509 MAXNAMLEN, (size_t )0); 2510* if (!error) { 2511 /* 2512 * Check for illegal filenames. 2513 / 2514* for (cp = nfs_pub.np_index; cp; cp++) { 2515* if (cp == '/') { 2516* error = EINVAL; 2517 break; 2518 } 2519 } 2520 } 2521 if (error) { 2522 FREE(nfs_pub.np_index, M_TEMP); 2523 return (error); 2524 } 2525 } 2526 2527 nfs_pub.np_mount = mp; 2528 nfs_pub.np_valid = 1; 2529 return (0); 2530} 2531 2532struct netcred * 2533vfs_export_lookup(mp, nep, nam) 2534 register struct mount mp; 2535* struct netexport nep; 2536* struct sockaddr nam; 2537{ 2538* register struct netcred np; 2539* register struct radix_node_head rnh; 2540* struct sockaddr saddr; 2541* 2542 np = NULL; 2543 if (mp->mnt_flag & MNT_EXPORTED) { 2544 /* 2545 * Lookup in the export list first. 2546 / 2547* if (nam != NULL) { 2548 saddr = nam; 2549 rnh = nep->ne_rtable[saddr->sa_family]; 2550 if (rnh != NULL) { 2551 np = (struct netcred ) 2552* (rnh->rnh_matchaddr)((caddr_t)saddr, 2553* rnh); 2554 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2555 np = NULL; 2556 } 2557 } 2558 /* 2559 * If no address match, use the default if it exists. 2560 / 2561* if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2562 np = &nep->ne_defexported; 2563 } 2564 return (np); 2565} 2566 2567/* 2568 * perform msync on all vnodes under a mount point 2569 * the mount point must be locked. 2570 / 2571void 2572vfs_msync(struct mount mp, int flags) { 2573 struct vnode vp, nvp; 2574 struct vm_object obj; 2575* int anyio, tries; 2576 2577 tries = 5; 2578loop: 2579 anyio = 0; 2580 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2581 2582 nvp = vp->v_mntvnodes.le_next; 2583 2584 if (vp->v_mount != mp) { 2585 goto loop; 2586 } 2587 2588 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? / 2589* continue; 2590 2591 if (flags != MNT_WAIT) { 2592 obj = vp->v_object; 2593 if (obj == NULL \|\| (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2594 continue; 2595 if (VOP_ISLOCKED(vp)) 2596 continue; 2597 } 2598 2599 simple_lock(&vp->v_interlock); 2600 if (vp->v_object && 2601 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2602 if (!vget(vp, 2603 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_RETRY \| LK_NOOBJ, curproc)) { 2604 if (vp->v_object) { 2605 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2606 anyio = 1; 2607 } 2608 vput(vp); 2609 } 2610 } else { 2611 simple_unlock(&vp->v_interlock); 2612 } 2613 } 2614 if (anyio && (--tries > 0)) 2615 goto loop; 2616} 2617 2618/* 2619 * Create the VM object needed for VMIO and mmap support. This 2620 * is done for all VREG files in the system. Some filesystems might 2621 * afford the additional metadata buffering capability of the 2622 * VMIO code by making the device node be VMIO mode also. 2623 * 2624 * vp must be locked when vfs_object_create is called. 2625 / 2626int 2627vfs_object_create(vp, p, cred) 2628* struct vnode vp; 2629* struct proc p; 2630* struct ucred cred; 2631{ 2632* struct vattr vat; 2633 vm_object_t object; 2634 int error = 0; 2635 2636 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) 2637 return 0; 2638 2639retry: 2640 if ((object = vp->v_object) == NULL) { 2641 if (vp->v_type == VREG) { 2642 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2643 goto retn; 2644 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2645 } else if (bdevsw(vp->v_rdev) != NULL) { 2646 /* 2647 * This simply allocates the biggest object possible 2648 * for a VBLK vnode. This should be fixed, but doesn't 2649 * cause any problems (yet). 2650 / 2651* object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2652 } else { 2653 goto retn; 2654 } 2655 /* 2656 * Dereference the reference we just created. This assumes 2657 * that the object is associated with the vp. 2658 / 2659* object->ref_count--; 2660 vp->v_usecount--; 2661 } else { 2662 if (object->flags & OBJ_DEAD) { 2663 VOP_UNLOCK(vp, 0, p); 2664 tsleep(object, PVM, "vodead", 0); 2665 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 2666 goto retry; 2667 } 2668 } 2669 2670 KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); 2671 vp->v_flag \|= VOBJBUF; 2672 2673retn: 2674 return error; 2675} 2676 2677static void 2678vfree(vp) 2679 struct vnode vp; 2680{ 2681* int s; 2682 2683 s = splbio(); 2684 simple_lock(&vnode_free_list_slock); 2685 if (vp->v_flag & VTBFREE) { 2686 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2687 vp->v_flag &= ~VTBFREE; 2688 } 2689 if (vp->v_flag & VAGE) { 2690 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2691 } else { 2692 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2693 } 2694 freevnodes++; 2695 simple_unlock(&vnode_free_list_slock); 2696 vp->v_flag &= ~VAGE; 2697 vp->v_flag \|= VFREE; 2698 splx(s); 2699} 2700 2701void 2702vbusy(vp) 2703 struct vnode vp; 2704{ 2705* int s; 2706 2707 s = splbio(); 2708 simple_lock(&vnode_free_list_slock); 2709 if (vp->v_flag & VTBFREE) { 2710 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2711 vp->v_flag &= ~VTBFREE; 2712 } else { 2713 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2714 freevnodes--; 2715 } 2716 simple_unlock(&vnode_free_list_slock); 2717 vp->v_flag &= ~(VFREE\|VAGE); 2718 splx(s); 2719} 2720 2721/* 2722 * Record a process's interest in events which might happen to 2723 * a vnode. Because poll uses the historic select-style interface 2724 * internally, this routine serves as both the ``check for any 2725 * pending events'' and the ``record my interest in future events'' 2726 * functions. (These are done together, while the lock is held, 2727 * to avoid race conditions.) 2728 / 2729int 2730vn_pollrecord(vp, p, events) 2731* struct vnode vp; 2732* struct proc p; 2733* short events; 2734{ 2735 simple_lock(&vp->v_pollinfo.vpi_lock); 2736 if (vp->v_pollinfo.vpi_revents & events) { 2737 /* 2738 * This leaves events we are not interested 2739 * in available for the other process which 2740 * which presumably had requested them 2741 * (otherwise they would never have been 2742 * recorded). 2743 / 2744* events &= vp->v_pollinfo.vpi_revents; 2745 vp->v_pollinfo.vpi_revents &= ~events; 2746 2747 simple_unlock(&vp->v_pollinfo.vpi_lock); 2748 return events; 2749 } 2750 vp->v_pollinfo.vpi_events \|= events; 2751 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2752 simple_unlock(&vp->v_pollinfo.vpi_lock); 2753 return 0; 2754} 2755 2756/* 2757 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2758 * it is possible for us to miss an event due to race conditions, but 2759 * that condition is expected to be rare, so for the moment it is the 2760 * preferred interface. 2761 / 2762void 2763vn_pollevent(vp, events) 2764* struct vnode vp; 2765* short events; 2766{ 2767 simple_lock(&vp->v_pollinfo.vpi_lock); 2768 if (vp->v_pollinfo.vpi_events & events) { 2769 /* 2770 * We clear vpi_events so that we don't 2771 * call selwakeup() twice if two events are 2772 * posted before the polling process(es) is 2773 * awakened. This also ensures that we take at 2774 * most one selwakeup() if the polling process 2775 * is no longer interested. However, it does 2776 * mean that only one event can be noticed at 2777 * a time. (Perhaps we should only clear those 2778 * event bits which we note?) XXX 2779 / 2780* vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? / 2781* vp->v_pollinfo.vpi_revents \|= events; 2782 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2783 } 2784 simple_unlock(&vp->v_pollinfo.vpi_lock); 2785} 2786 2787/* 2788 * Wake up anyone polling on vp because it is being revoked. 2789 * This depends on dead_poll() returning POLLHUP for correct 2790 * behavior. 2791 / 2792void 2793vn_pollgone(vp) 2794* struct vnode vp; 2795{ 2796* simple_lock(&vp->v_pollinfo.vpi_lock); 2797 if (vp->v_pollinfo.vpi_events) { 2798 vp->v_pollinfo.vpi_events = 0; 2799 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2800 } 2801 simple_unlock(&vp->v_pollinfo.vpi_lock); 2802} 2803 2804 2805 2806/* 2807 * Routine to create and manage a filesystem syncer vnode. 2808 / 2809#define sync_close ((int () __P((struct vop_close_args )))nullop) 2810static int sync_fsync __P((struct vop_fsync_args )); 2811static int sync_inactive __P((struct vop_inactive_args )); 2812static int sync_reclaim __P((struct vop_reclaim_args )); 2813#define sync_lock ((int () __P((struct vop_lock_args )))vop_nolock) 2814#define sync_unlock ((int () __P((struct vop_unlock_args )))vop_nounlock) 2815static int sync_print __P((struct vop_print_args )); 2816#define sync_islocked ((int() __P((struct vop_islocked_args )))vop_noislocked) 2817* 2818static vop_t *sync_vnodeop_p; 2819static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2820* { &vop_default_desc, (vop_t ) vop_eopnotsupp }, 2821* { &vop_close_desc, (vop_t ) sync_close }, / close / 2822* { &vop_fsync_desc, (vop_t ) sync_fsync }, / fsync / 2823* { &vop_inactive_desc, (vop_t ) sync_inactive }, / inactive / 2824* { &vop_reclaim_desc, (vop_t ) sync_reclaim }, / reclaim / 2825* { &vop_lock_desc, (vop_t ) sync_lock }, / lock / 2826* { &vop_unlock_desc, (vop_t ) sync_unlock }, / unlock / 2827* { &vop_print_desc, (vop_t ) sync_print }, / print / 2828* { &vop_islocked_desc, (vop_t ) sync_islocked }, / islocked / 2829* { NULL, NULL } 2830}; 2831static struct vnodeopv_desc sync_vnodeop_opv_desc = 2832 { &sync_vnodeop_p, sync_vnodeop_entries }; 2833 2834VNODEOP_SET(sync_vnodeop_opv_desc); 2835 2836/* 2837 * Create a new filesystem syncer vnode for the specified mount point. 2838 / 2839int 2840vfs_allocate_syncvnode(mp) 2841* struct mount mp; 2842{ 2843* struct vnode vp; 2844* static long start, incr, next; 2845 int error; 2846 2847 /* Allocate a new vnode / 2848* if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2849 mp->mnt_syncer = NULL; 2850 return (error); 2851 } 2852 vp->v_type = VNON; 2853 /* 2854 * Place the vnode onto the syncer worklist. We attempt to 2855 * scatter them about on the list so that they will go off 2856 * at evenly distributed times even if all the filesystems 2857 * are mounted at once. 2858 / 2859* next += incr; 2860 if (next == 0 \|\| next > syncer_maxdelay) { 2861 start /= 2; 2862 incr /= 2; 2863 if (start == 0) { 2864 start = syncer_maxdelay / 2; 2865 incr = syncer_maxdelay; 2866 } 2867 next = start; 2868 } 2869 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2870 mp->mnt_syncer = vp; 2871 return (0); 2872} 2873 2874/* 2875 * Do a lazy sync of the filesystem. 2876 / 2877static int 2878sync_fsync(ap) 2879* struct vop_fsync_args /* { 2880 struct vnode a_vp; 2881* struct ucred a_cred; 2882* int a_waitfor; 2883 struct proc a_p; 2884* } / ap; 2885{ 2886 struct vnode syncvp = ap->a_vp; 2887* struct mount mp = syncvp->v_mount; 2888* struct proc p = ap->a_p; 2889* int asyncflag; 2890 2891 /* 2892 * We only need to do something if this is a lazy evaluation. 2893 / 2894* if (ap->a_waitfor != MNT_LAZY) 2895 return (0); 2896 2897 /* 2898 * Move ourselves to the back of the sync list. 2899 / 2900* vn_syncer_add_to_worklist(syncvp, syncdelay); 2901 2902 /* 2903 * Walk the list of vnodes pushing all that are dirty and 2904 * not already on the sync list. 2905 / 2906* simple_lock(&mountlist_slock); 2907 if (vfs_busy(mp, LK_EXCLUSIVE \| LK_NOWAIT, &mountlist_slock, p) != 0) { 2908 simple_unlock(&mountlist_slock); 2909 return (0); 2910 } 2911 asyncflag = mp->mnt_flag & MNT_ASYNC; 2912 mp->mnt_flag &= ~MNT_ASYNC; 2913 vfs_msync(mp, MNT_NOWAIT); 2914 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2915 if (asyncflag) 2916 mp->mnt_flag \|= MNT_ASYNC; 2917 vfs_unbusy(mp, p); 2918 return (0); 2919} 2920 2921/* 2922 * The syncer vnode is no referenced. 2923 / 2924static int 2925sync_inactive(ap) 2926* struct vop_inactive_args /* { 2927 struct vnode a_vp; 2928* struct proc a_p; 2929* } / ap; 2930{ 2931 2932 vgone(ap->a_vp); 2933 return (0); 2934} 2935 2936/* 2937 * The syncer vnode is no longer needed and is being decommissioned. 2938 * 2939 * Modifications to the worklist must be protected at splbio(). 2940 / 2941static int 2942sync_reclaim(ap) 2943* struct vop_reclaim_args /* { 2944 struct vnode a_vp; 2945* } / ap; 2946{ 2947 struct vnode vp = ap->a_vp; 2948* int s; 2949 2950 s = splbio(); 2951 vp->v_mount->mnt_syncer = NULL; 2952 if (vp->v_flag & VONWORKLST) { 2953 LIST_REMOVE(vp, v_synclist); 2954 vp->v_flag &= ~VONWORKLST; 2955 } 2956 splx(s); 2957 2958 return (0); 2959} 2960 2961/* 2962 * Print out a syncer vnode. 2963 / 2964static int 2965sync_print(ap) 2966* struct vop_print_args /* { 2967 struct vnode a_vp; 2968* } / ap; 2969{ 2970 struct vnode vp = ap->a_vp; 2971* 2972 printf("syncer vnode"); 2973 if (vp->v_vnlock != NULL) 2974 lockmgr_printinfo(vp->v_vnlock); 2975 printf("\n"); 2976 return (0); 2977}	357 mtype + (xxxfs_mntid << 16)); 358 } 359 } 360 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 361 simple_unlock(&mntid_slock); 362} 363 364/* 365 * Set vnode attributes to VNOVAL 366 / 367void 368vattr_null(vap) 369* register struct vattr vap; 370{ 371* 372 vap->va_type = VNON; 373 vap->va_size = VNOVAL; 374 vap->va_bytes = VNOVAL; 375 vap->va_mode = VNOVAL; 376 vap->va_nlink = VNOVAL; 377 vap->va_uid = VNOVAL; 378 vap->va_gid = VNOVAL; 379 vap->va_fsid = VNOVAL; 380 vap->va_fileid = VNOVAL; 381 vap->va_blocksize = VNOVAL; 382 vap->va_rdev = VNOVAL; 383 vap->va_atime.tv_sec = VNOVAL; 384 vap->va_atime.tv_nsec = VNOVAL; 385 vap->va_mtime.tv_sec = VNOVAL; 386 vap->va_mtime.tv_nsec = VNOVAL; 387 vap->va_ctime.tv_sec = VNOVAL; 388 vap->va_ctime.tv_nsec = VNOVAL; 389 vap->va_flags = VNOVAL; 390 vap->va_gen = VNOVAL; 391 vap->va_vaflags = 0; 392} 393 394/* 395 * Routines having to do with the management of the vnode table. 396 / 397extern vop_t dead_vnodeop_p; 398* 399/* 400 * Return the next vnode from the free list. 401 / 402int 403getnewvnode(tag, mp, vops, vpp) 404* enum vtagtype tag; 405 struct mount mp; 406* vop_t *vops; 407* struct vnode *vpp; 408{ 409* int s; 410 struct proc p = curproc; / XXX / 411* struct vnode vp, tvp, nvp; 412* vm_object_t object; 413 TAILQ_HEAD(freelst, vnode) vnode_tmp_list; 414 415 /* 416 * We take the least recently used vnode from the freelist 417 * if we can get it and it has no cached pages, and no 418 * namecache entries are relative to it. 419 * Otherwise we allocate a new vnode 420 / 421* 422 s = splbio(); 423 simple_lock(&vnode_free_list_slock); 424 TAILQ_INIT(&vnode_tmp_list); 425 426 for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) { 427 nvp = TAILQ_NEXT(vp, v_freelist); 428 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 429 if (vp->v_flag & VAGE) { 430 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 431 } else { 432 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 433 } 434 vp->v_flag &= ~(VTBFREE\|VAGE); 435 vp->v_flag \|= VFREE; 436 if (vp->v_usecount) 437 panic("tobe free vnode isn't"); 438 freevnodes++; 439 } 440 441 if (wantfreevnodes && freevnodes < wantfreevnodes) { 442 vp = NULL; 443 } else if (!wantfreevnodes && freevnodes <= desiredvnodes) { 444 /* 445 * XXX: this is only here to be backwards compatible 446 / 447* vp = NULL; 448 } else { 449 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) { 450 nvp = TAILQ_NEXT(vp, v_freelist); 451 if (!simple_lock_try(&vp->v_interlock)) 452 continue; 453 if (vp->v_usecount) 454 panic("free vnode isn't"); 455 456 object = vp->v_object; 457 if (object && (object->resident_page_count \|\| object->ref_count)) { 458 printf("object inconsistant state: RPC: %d, RC: %d\n", 459 object->resident_page_count, object->ref_count); 460 /* Don't recycle if it's caching some pages / 461* TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 462 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); 463 continue; 464 } else if (LIST_FIRST(&vp->v_cache_src)) { 465 /* Don't recycle if active in the namecache / 466* simple_unlock(&vp->v_interlock); 467 continue; 468 } else { 469 break; 470 } 471 } 472 } 473 474 for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) { 475 nvp = TAILQ_NEXT(tvp, v_freelist); 476 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); 477 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); 478 simple_unlock(&tvp->v_interlock); 479 } 480 481 if (vp) { 482 vp->v_flag \|= VDOOMED; 483 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 484 freevnodes--; 485 simple_unlock(&vnode_free_list_slock); 486 cache_purge(vp); 487 vp->v_lease = NULL; 488 if (vp->v_type != VBAD) { 489 vgonel(vp, p); 490 } else { 491 simple_unlock(&vp->v_interlock); 492 } 493 494#ifdef INVARIANTS 495 { 496 int s; 497 498 if (vp->v_data) 499 panic("cleaned vnode isn't"); 500 s = splbio(); 501 if (vp->v_numoutput) 502 panic("Clean vnode has pending I/O's"); 503 splx(s); 504 } 505#endif 506 vp->v_flag = 0; 507 vp->v_lastr = 0; 508 vp->v_lastw = 0; 509 vp->v_lasta = 0; 510 vp->v_cstart = 0; 511 vp->v_clen = 0; 512 vp->v_socket = 0; 513 vp->v_writecount = 0; /* XXX / 514* vp->v_maxio = 0; 515 } else { 516 simple_unlock(&vnode_free_list_slock); 517 vp = (struct vnode ) zalloc(vnode_zone); 518* bzero((char ) vp, sizeof vp); 519 simple_lock_init(&vp->v_interlock); 520 vp->v_dd = vp; 521 cache_purge(vp); 522 LIST_INIT(&vp->v_cache_src); 523 TAILQ_INIT(&vp->v_cache_dst); 524 numvnodes++; 525 } 526 527 TAILQ_INIT(&vp->v_cleanblkhd); 528 TAILQ_INIT(&vp->v_dirtyblkhd); 529 vp->v_type = VNON; 530 vp->v_tag = tag; 531 vp->v_op = vops; 532 insmntque(vp, mp); 533 vpp = vp; 534* vp->v_usecount = 1; 535 vp->v_data = 0; 536 splx(s); 537 538 vfs_object_create(vp, p, p->p_ucred); 539 return (0); 540} 541 542/* 543 * Move a vnode from one mount queue to another. 544 / 545static void 546insmntque(vp, mp) 547* register struct vnode vp; 548* register struct mount mp; 549{ 550* 551 simple_lock(&mntvnode_slock); 552 /* 553 * Delete from old mount point vnode list, if on one. 554 / 555* if (vp->v_mount != NULL) 556 LIST_REMOVE(vp, v_mntvnodes); 557 /* 558 * Insert into list of vnodes for the new mount point, if available. 559 / 560* if ((vp->v_mount = mp) == NULL) { 561 simple_unlock(&mntvnode_slock); 562 return; 563 } 564 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 565 simple_unlock(&mntvnode_slock); 566} 567 568/* 569 * Update outstanding I/O count and do wakeup if requested. 570 / 571void 572vwakeup(bp) 573* register struct buf bp; 574{ 575* register struct vnode vp; 576* 577 bp->b_flags &= ~B_WRITEINPROG; 578 if ((vp = bp->b_vp)) { 579 vp->v_numoutput--; 580 if (vp->v_numoutput < 0) 581 panic("vwakeup: neg numoutput"); 582 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { 583 vp->v_flag &= ~VBWAIT; 584 wakeup((caddr_t) &vp->v_numoutput); 585 } 586 } 587} 588 589/* 590 * Flush out and invalidate all buffers associated with a vnode. 591 * Called with the underlying object locked. 592 / 593int 594vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 595* register struct vnode vp; 596* int flags; 597 struct ucred cred; 598* struct proc p; 599* int slpflag, slptimeo; 600{ 601 register struct buf bp; 602* struct buf nbp, blist; 603 int s, error; 604 vm_object_t object; 605 606 if (flags & V_SAVE) { 607 s = splbio(); 608 while (vp->v_numoutput) { 609 vp->v_flag \|= VBWAIT; 610 error = tsleep((caddr_t)&vp->v_numoutput, 611 slpflag \| (PRIBIO + 1), "vinvlbuf", slptimeo); 612 if (error) { 613 splx(s); 614 return (error); 615 } 616 } 617 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 618 splx(s); 619 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 620 return (error); 621 s = splbio(); 622 if (vp->v_numoutput > 0 \|\| 623 !TAILQ_EMPTY(&vp->v_dirtyblkhd)) 624 panic("vinvalbuf: dirty bufs"); 625 } 626 splx(s); 627 } 628 s = splbio(); 629 for (;;) { 630 blist = TAILQ_FIRST(&vp->v_cleanblkhd); 631 if (!blist) 632 blist = TAILQ_FIRST(&vp->v_dirtyblkhd); 633 if (!blist) 634 break; 635 636 for (bp = blist; bp; bp = nbp) { 637 nbp = TAILQ_NEXT(bp, b_vnbufs); 638 if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) { 639 error = BUF_TIMELOCK(bp, 640 LK_EXCLUSIVE \| LK_SLEEPFAIL, 641 "vinvalbuf", slpflag, slptimeo); 642 if (error == ENOLCK) 643 break; 644 splx(s); 645 return (error); 646 } 647 /* 648 * XXX Since there are no node locks for NFS, I 649 * believe there is a slight chance that a delayed 650 * write will occur while sleeping just above, so 651 * check for it. Note that vfs_bio_awrite expects 652 * buffers to reside on a queue, while VOP_BWRITE and 653 * brelse do not. 654 / 655* if (((bp->b_flags & (B_DELWRI \| B_INVAL)) == B_DELWRI) && 656 (flags & V_SAVE)) { 657 658 if (bp->b_vp == vp) { 659 if (bp->b_flags & B_CLUSTEROK) { 660 BUF_UNLOCK(bp); 661 vfs_bio_awrite(bp); 662 } else { 663 bremfree(bp); 664 bp->b_flags \|= B_ASYNC; 665 VOP_BWRITE(bp->b_vp, bp); 666 } 667 } else { 668 bremfree(bp); 669 (void) VOP_BWRITE(bp->b_vp, bp); 670 } 671 break; 672 } 673 bremfree(bp); 674 bp->b_flags \|= (B_INVAL \| B_NOCACHE \| B_RELBUF); 675 bp->b_flags &= ~B_ASYNC; 676 brelse(bp); 677 } 678 } 679 680 while (vp->v_numoutput > 0) { 681 vp->v_flag \|= VBWAIT; 682 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); 683 } 684 685 splx(s); 686 687 /* 688 * Destroy the copy in the VM cache, too. 689 / 690* simple_lock(&vp->v_interlock); 691 object = vp->v_object; 692 if (object != NULL) { 693 vm_object_page_remove(object, 0, 0, 694 (flags & V_SAVE) ? TRUE : FALSE); 695 } 696 simple_unlock(&vp->v_interlock); 697 698 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) \|\| !TAILQ_EMPTY(&vp->v_cleanblkhd)) 699 panic("vinvalbuf: flush failed"); 700 return (0); 701} 702 703/* 704 * Truncate a file's buffer and pages to a specified length. This 705 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 706 * sync activity. 707 / 708int 709vtruncbuf(vp, cred, p, length, blksize) 710* register struct vnode vp; 711* struct ucred cred; 712* struct proc p; 713* off_t length; 714 int blksize; 715{ 716 register struct buf bp; 717* struct buf nbp; 718* int s, anyfreed; 719 int trunclbn; 720 721 /* 722 * Round up to the next lbn. 723 / 724* trunclbn = (length + blksize - 1) / blksize; 725 726 s = splbio(); 727restart: 728 anyfreed = 1; 729 for (;anyfreed;) { 730 anyfreed = 0; 731 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 732 nbp = TAILQ_NEXT(bp, b_vnbufs); 733 if (bp->b_lblkno >= trunclbn) { 734 if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) { 735 BUF_LOCK(bp, LK_EXCLUSIVE\|LK_SLEEPFAIL); 736 goto restart; 737 } else { 738 bremfree(bp); 739 bp->b_flags \|= (B_INVAL \| B_RELBUF); 740 bp->b_flags &= ~B_ASYNC; 741 brelse(bp); 742 anyfreed = 1; 743 } 744 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)\|\| 745 (nbp->b_vp != vp) \|\| 746 (nbp->b_flags & B_DELWRI))) { 747 goto restart; 748 } 749 } 750 } 751 752 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 753 nbp = TAILQ_NEXT(bp, b_vnbufs); 754 if (bp->b_lblkno >= trunclbn) { 755 if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) { 756 BUF_LOCK(bp, LK_EXCLUSIVE\|LK_SLEEPFAIL); 757 goto restart; 758 } else { 759 bremfree(bp); 760 bp->b_flags \|= (B_INVAL \| B_RELBUF); 761 bp->b_flags &= ~B_ASYNC; 762 brelse(bp); 763 anyfreed = 1; 764 } 765 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)\|\| 766 (nbp->b_vp != vp) \|\| 767 (nbp->b_flags & B_DELWRI) == 0)) { 768 goto restart; 769 } 770 } 771 } 772 } 773 774 if (length > 0) { 775restartsync: 776 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 777 nbp = TAILQ_NEXT(bp, b_vnbufs); 778 if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) { 779 if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT)) { 780 BUF_LOCK(bp, LK_EXCLUSIVE\|LK_SLEEPFAIL); 781 goto restart; 782 } else { 783 bremfree(bp); 784 if (bp->b_vp == vp) { 785 bp->b_flags \|= B_ASYNC; 786 } else { 787 bp->b_flags &= ~B_ASYNC; 788 } 789 VOP_BWRITE(bp->b_vp, bp); 790 } 791 goto restartsync; 792 } 793 794 } 795 } 796 797 while (vp->v_numoutput > 0) { 798 vp->v_flag \|= VBWAIT; 799 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0); 800 } 801 802 splx(s); 803 804 vnode_pager_setsize(vp, length); 805 806 return (0); 807} 808 809/* 810 * Associate a buffer with a vnode. 811 / 812void 813bgetvp(vp, bp) 814* register struct vnode vp; 815* register struct buf bp; 816{ 817* int s; 818 819 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 820 821 vhold(vp); 822 bp->b_vp = vp; 823 if (vp->v_type == VBLK \|\| vp->v_type == VCHR) 824 bp->b_dev = vp->v_rdev; 825 else 826 bp->b_dev = NODEV; 827 /* 828 * Insert onto list for new vnode. 829 / 830* s = splbio(); 831 bp->b_xflags \|= B_VNCLEAN; 832 bp->b_xflags &= ~B_VNDIRTY; 833 TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs); 834 splx(s); 835} 836 837/* 838 * Disassociate a buffer from a vnode. 839 / 840void 841brelvp(bp) 842* register struct buf bp; 843{ 844* struct vnode vp; 845* struct buflists listheadp; 846* int s; 847 848 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 849 850 /* 851 * Delete from old vnode list, if on one. 852 / 853* vp = bp->b_vp; 854 s = splbio(); 855 if (bp->b_xflags & (B_VNDIRTY\|B_VNCLEAN)) { 856 if (bp->b_xflags & B_VNDIRTY) 857 listheadp = &vp->v_dirtyblkhd; 858 else 859 listheadp = &vp->v_cleanblkhd; 860 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 861 bp->b_xflags &= ~(B_VNDIRTY\|B_VNCLEAN); 862 } 863 if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 864 vp->v_flag &= ~VONWORKLST; 865 LIST_REMOVE(vp, v_synclist); 866 } 867 splx(s); 868 bp->b_vp = (struct vnode ) 0; 869* vdrop(vp); 870} 871 872/* 873 * The workitem queue. 874 * 875 * It is useful to delay writes of file data and filesystem metadata 876 * for tens of seconds so that quickly created and deleted files need 877 * not waste disk bandwidth being created and removed. To realize this, 878 * we append vnodes to a "workitem" queue. When running with a soft 879 * updates implementation, most pending metadata dependencies should 880 * not wait for more than a few seconds. Thus, mounted on block devices 881 * are delayed only about a half the time that file data is delayed. 882 * Similarly, directory updates are more critical, so are only delayed 883 * about a third the time that file data is delayed. Thus, there are 884 * SYNCER_MAXDELAY queues that are processed round-robin at a rate of 885 * one each second (driven off the filesystem syncer process). The 886 * syncer_delayno variable indicates the next queue that is to be processed. 887 * Items that need to be processed soon are placed in this queue: 888 * 889 * syncer_workitem_pending[syncer_delayno] 890 * 891 * A delay of fifteen seconds is done by placing the request fifteen 892 * entries later in the queue: 893 * 894 * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] 895 * 896 / 897* 898/* 899 * Add an item to the syncer work queue. 900 / 901static void 902vn_syncer_add_to_worklist(struct vnode vp, int delay) 903{ 904 int s, slot; 905 906 s = splbio(); 907 908 if (vp->v_flag & VONWORKLST) { 909 LIST_REMOVE(vp, v_synclist); 910 } 911 912 if (delay > syncer_maxdelay - 2) 913 delay = syncer_maxdelay - 2; 914 slot = (syncer_delayno + delay) & syncer_mask; 915 916 LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist); 917 vp->v_flag \|= VONWORKLST; 918 splx(s); 919} 920 921struct proc updateproc; 922static void sched_sync __P((void)); 923static struct kproc_desc up_kp = { 924* "syncer", 925 sched_sync, 926 &updateproc 927}; 928SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) 929 930/* 931 * System filesystem synchronizer daemon. 932 / 933void 934sched_sync(void) 935{ 936* struct synclist slp; 937* struct vnode vp; 938* long starttime; 939 int s; 940 struct proc p = updateproc; 941* 942 p->p_flag \|= P_BUFEXHAUST; 943 944 for (;;) { 945 starttime = time_second; 946 947 /* 948 * Push files whose dirty time has expired. Be careful 949 * of interrupt race on slp queue. 950 / 951* s = splbio(); 952 slp = &syncer_workitem_pending[syncer_delayno]; 953 syncer_delayno += 1; 954 if (syncer_delayno == syncer_maxdelay) 955 syncer_delayno = 0; 956 splx(s); 957 958 while ((vp = LIST_FIRST(slp)) != NULL) { 959 if (VOP_ISLOCKED(vp) == 0) { 960 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 961 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); 962 VOP_UNLOCK(vp, 0, p); 963 } 964 s = splbio(); 965 if (LIST_FIRST(slp) == vp) { 966 /* 967 * Note: v_tag VT_VFS vps can remain on the 968 * worklist too with no dirty blocks, but 969 * since sync_fsync() moves it to a different 970 * slot we are safe. 971 / 972* if (TAILQ_EMPTY(&vp->v_dirtyblkhd) && 973 vp->v_type != VBLK) 974 panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag); 975 /* 976 * Put us back on the worklist. The worklist 977 * routine will remove us from our current 978 * position and then add us back in at a later 979 * position. 980 / 981* vn_syncer_add_to_worklist(vp, syncdelay); 982 } 983 splx(s); 984 } 985 986 /* 987 * Do soft update processing. 988 / 989* if (bioops.io_sync) 990 (bioops.io_sync)(NULL); 991* 992 /* 993 * The variable rushjob allows the kernel to speed up the 994 * processing of the filesystem syncer process. A rushjob 995 * value of N tells the filesystem syncer to process the next 996 * N seconds worth of work on its queue ASAP. Currently rushjob 997 * is used by the soft update code to speed up the filesystem 998 * syncer process when the incore state is getting so far 999 * ahead of the disk that the kernel memory pool is being 1000 * threatened with exhaustion. 1001 / 1002* if (rushjob > 0) { 1003 rushjob -= 1; 1004 continue; 1005 } 1006 /* 1007 * If it has taken us less than a second to process the 1008 * current work, then wait. Otherwise start right over 1009 * again. We can still lose time if any single round 1010 * takes more than two seconds, but it does not really 1011 * matter as we are just trying to generally pace the 1012 * filesystem activity. 1013 / 1014* if (time_second == starttime) 1015 tsleep(&lbolt, PPAUSE, "syncer", 0); 1016 } 1017} 1018 1019/* 1020 * Request the syncer daemon to speed up its work. 1021 * We never push it to speed up more than half of its 1022 * normal turn time, otherwise it could take over the cpu. 1023 / 1024int 1025speedup_syncer() 1026{ 1027* int s; 1028 1029 s = splhigh(); 1030 if (updateproc->p_wchan == &lbolt) 1031 setrunnable(updateproc); 1032 splx(s); 1033 if (rushjob < syncdelay / 2) { 1034 rushjob += 1; 1035 stat_rush_requests += 1; 1036 return (1); 1037 } 1038 return(0); 1039} 1040 1041/* 1042 * Associate a p-buffer with a vnode. 1043 * 1044 * Also sets B_PAGING flag to indicate that vnode is not fully associated 1045 * with the buffer. i.e. the bp has not been linked into the vnode or 1046 * ref-counted. 1047 / 1048void 1049pbgetvp(vp, bp) 1050* register struct vnode vp; 1051* register struct buf bp; 1052{ 1053* 1054 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 1055 1056 bp->b_vp = vp; 1057 bp->b_flags \|= B_PAGING; 1058 if (vp->v_type == VBLK \|\| vp->v_type == VCHR) 1059 bp->b_dev = vp->v_rdev; 1060 else 1061 bp->b_dev = NODEV; 1062} 1063 1064/* 1065 * Disassociate a p-buffer from a vnode. 1066 / 1067void 1068pbrelvp(bp) 1069* register struct buf bp; 1070{ 1071* 1072 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 1073 1074#if !defined(MAX_PERF) 1075 /* XXX REMOVE ME / 1076* if (bp->b_vnbufs.tqe_next != NULL) { 1077 panic( 1078 "relpbuf(): b_vp was probably reassignbuf()d %p %x", 1079 bp, 1080 (int)bp->b_flags 1081 ); 1082 } 1083#endif 1084 bp->b_vp = (struct vnode ) 0; 1085* bp->b_flags &= ~B_PAGING; 1086} 1087 1088void 1089pbreassignbuf(bp, newvp) 1090 struct buf bp; 1091* struct vnode newvp; 1092{ 1093#if !defined(MAX_PERF) 1094* if ((bp->b_flags & B_PAGING) == 0) { 1095 panic( 1096 "pbreassignbuf() on non phys bp %p", 1097 bp 1098 ); 1099 } 1100#endif 1101 bp->b_vp = newvp; 1102} 1103 1104/* 1105 * Reassign a buffer from one vnode to another. 1106 * Used to assign file specific control information 1107 * (indirect blocks) to the vnode to which they belong. 1108 / 1109void 1110reassignbuf(bp, newvp) 1111* register struct buf bp; 1112* register struct vnode newvp; 1113{ 1114* struct buflists listheadp; 1115* int delay; 1116 int s; 1117 1118 if (newvp == NULL) { 1119 printf("reassignbuf: NULL"); 1120 return; 1121 } 1122 ++reassignbufcalls; 1123 1124#if !defined(MAX_PERF) 1125 /* 1126 * B_PAGING flagged buffers cannot be reassigned because their vp 1127 * is not fully linked in. 1128 / 1129* if (bp->b_flags & B_PAGING) 1130 panic("cannot reassign paging buffer"); 1131#endif 1132 1133 s = splbio(); 1134 /* 1135 * Delete from old vnode list, if on one. 1136 / 1137* if (bp->b_xflags & (B_VNDIRTY\|B_VNCLEAN)) { 1138 if (bp->b_xflags & B_VNDIRTY) 1139 listheadp = &bp->b_vp->v_dirtyblkhd; 1140 else 1141 listheadp = &bp->b_vp->v_cleanblkhd; 1142 TAILQ_REMOVE(listheadp, bp, b_vnbufs); 1143 bp->b_xflags &= ~(B_VNDIRTY\|B_VNCLEAN); 1144 if (bp->b_vp != newvp) { 1145 vdrop(bp->b_vp); 1146 bp->b_vp = NULL; /* for clarification / 1147* } 1148 } 1149 /* 1150 * If dirty, put on list of dirty buffers; otherwise insert onto list 1151 * of clean buffers. 1152 / 1153* if (bp->b_flags & B_DELWRI) { 1154 struct buf tbp; 1155* 1156 listheadp = &newvp->v_dirtyblkhd; 1157 if ((newvp->v_flag & VONWORKLST) == 0) { 1158 switch (newvp->v_type) { 1159 case VDIR: 1160 delay = dirdelay; 1161 break; 1162 case VBLK: 1163 if (newvp->v_specmountpoint != NULL) { 1164 delay = metadelay; 1165 break; 1166 } 1167 /* fall through / 1168* default: 1169 delay = filedelay; 1170 } 1171 vn_syncer_add_to_worklist(newvp, delay); 1172 } 1173 bp->b_xflags \|= B_VNDIRTY; 1174 tbp = TAILQ_FIRST(listheadp); 1175 if (tbp == NULL \|\| 1176 bp->b_lblkno == 0 \|\| 1177 (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { 1178 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1179 ++reassignbufsortgood; 1180 } else if (bp->b_lblkno < 0) { 1181 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs); 1182 ++reassignbufsortgood; 1183 } else if (reassignbufmethod == 1) { 1184 /* 1185 * New sorting algorithm, only handle sequential case, 1186 * otherwise guess. 1187 / 1188* if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && 1189 (tbp->b_xflags & B_VNDIRTY)) { 1190 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1191 ++reassignbufsortgood; 1192 } else { 1193 TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); 1194 ++reassignbufsortbad; 1195 } 1196 } else { 1197 /* 1198 * Old sorting algorithm, scan queue and insert 1199 / 1200* struct buf ttbp; 1201* while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) && 1202 (ttbp->b_lblkno < bp->b_lblkno)) { 1203 ++reassignbufloops; 1204 tbp = ttbp; 1205 } 1206 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); 1207 } 1208 } else { 1209 bp->b_xflags \|= B_VNCLEAN; 1210 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs); 1211 if ((newvp->v_flag & VONWORKLST) && 1212 TAILQ_EMPTY(&newvp->v_dirtyblkhd)) { 1213 newvp->v_flag &= ~VONWORKLST; 1214 LIST_REMOVE(newvp, v_synclist); 1215 } 1216 } 1217 if (bp->b_vp != newvp) { 1218 bp->b_vp = newvp; 1219 vhold(bp->b_vp); 1220 } 1221 splx(s); 1222} 1223 1224/* 1225 * Create a vnode for a block device. 1226 * Used for mounting the root file system. 1227 / 1228int 1229bdevvp(dev, vpp) 1230* dev_t dev; 1231 struct vnode *vpp; 1232{ 1233* register struct vnode vp; 1234* struct vnode nvp; 1235* int error; 1236 1237 if (dev == NODEV) { 1238 vpp = NULLVP; 1239* return (ENXIO); 1240 } 1241 error = getnewvnode(VT_NON, (struct mount )0, spec_vnodeop_p, &nvp); 1242* if (error) { 1243 vpp = NULLVP; 1244* return (error); 1245 } 1246 vp = nvp; 1247 vp->v_type = VBLK; 1248 if ((nvp = checkalias(vp, dev2udev(dev), (struct mount )0)) != NULL) { 1249* vput(vp); 1250 vp = nvp; 1251 } 1252 vpp = vp; 1253* return (0); 1254} 1255 1256/* 1257 * Check to see if the new vnode represents a special device 1258 * for which we already have a vnode (either because of 1259 * bdevvp() or because of a different vnode representing 1260 * the same block device). If such an alias exists, deallocate 1261 * the existing contents and return the aliased vnode. The 1262 * caller is responsible for filling it with its new contents. 1263 / 1264struct vnode 1265checkalias(nvp, nvp_rdev, mp) 1266 register struct vnode nvp; 1267* udev_t nvp_rdev; 1268 struct mount mp; 1269{ 1270* struct proc p = curproc; / XXX / 1271* struct vnode vp; 1272* struct vnode *vpp; 1273* dev_t dev; 1274 1275 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1276 return (NULLVP); 1277 1278 dev = udev2dev(nvp_rdev, 2); 1279 1280 vpp = &speclisth[SPECHASH(dev)]; 1281loop: 1282 simple_lock(&spechash_slock); 1283 for (vp = vpp; vp; vp = vp->v_specnext) { 1284* if (dev != vp->v_rdev \|\| nvp->v_type != vp->v_type) 1285 continue; 1286 /* 1287 * Alias, but not in use, so flush it out. 1288 * Only alias active device nodes. 1289 * Not sure why we don't re-use this like we do below. 1290 / 1291* simple_lock(&vp->v_interlock); 1292 if (vp->v_usecount == 0) { 1293 simple_unlock(&spechash_slock); 1294 vgonel(vp, p); 1295 goto loop; 1296 } 1297 if (vget(vp, LK_EXCLUSIVE \| LK_INTERLOCK, p)) { 1298 /* 1299 * It dissappeared, and we may have slept. 1300 * Restart from the beginning 1301 / 1302* simple_unlock(&spechash_slock); 1303 goto loop; 1304 } 1305 break; 1306 } 1307 /* 1308 * It would be a lot clearer what is going on here if 1309 * this had been expressed as: 1310 * if ( vp && (vp->v_tag == VT_NULL)) 1311 * and the clauses had been swapped. 1312 / 1313* if (vp == NULL \|\| vp->v_tag != VT_NON) { 1314 struct specinfo sinfo; 1315* 1316 /* 1317 * Put the new vnode into the hash chain. 1318 * and if there was an alias, connect them. 1319 / 1320* MALLOC(sinfo, struct specinfo , 1321* sizeof(struct specinfo), M_VNODE, M_WAITOK); 1322 bzero(sinfo, sizeof(struct specinfo)); 1323 nvp->v_specinfo = sinfo; 1324 sinfo->si_rdev = dev; 1325 sinfo->si_hashchain = vpp; 1326 sinfo->si_specnext = vpp; 1327* sinfo->si_bsize_phys = DEV_BSIZE; 1328 sinfo->si_bsize_best = BLKDEV_IOSIZE; 1329 sinfo->si_bsize_max = MAXBSIZE; 1330 1331 /* 1332 * Ask the device to fix up specinfo. Typically the 1333 * si_bsize_* parameters may need fixing up. 1334 / 1335* 1336 if (nvp->v_type == VBLK) { 1337 if (bdevsw(dev) && bdevsw(dev)->d_parms) 1338 (bdevsw(dev)->d_parms)(dev, sinfo, DPARM_GET); 1339* } else if (nvp->v_type == VCHR) { 1340 if (devsw(dev) && devsw(dev)->d_parms) 1341 (devsw(dev)->d_parms)(dev, sinfo, DPARM_GET); 1342* } 1343 1344 simple_unlock(&spechash_slock); 1345 vpp = nvp; 1346* if (vp != NULLVP) { 1347 nvp->v_flag \|= VALIASED; 1348 vp->v_flag \|= VALIASED; 1349 vput(vp); 1350 } 1351 return (NULLVP); 1352 } 1353 /* 1354 * if ( vp && (vp->v_tag == VT_NULL)) 1355 * We have a vnode alias, but it is a trashed. 1356 * Make it look like it's newley allocated. (by getnewvnode()) 1357 * The caller should use this instead. 1358 / 1359* simple_unlock(&spechash_slock); 1360 VOP_UNLOCK(vp, 0, p); 1361 simple_lock(&vp->v_interlock); 1362 vclean(vp, 0, p); 1363 vp->v_op = nvp->v_op; 1364 vp->v_tag = nvp->v_tag; 1365 nvp->v_type = VNON; 1366 insmntque(vp, mp); 1367 return (vp); 1368} 1369 1370/* 1371 * Grab a particular vnode from the free list, increment its 1372 * reference count and lock it. The vnode lock bit is set if the 1373 * vnode is being eliminated in vgone. The process is awakened 1374 * when the transition is completed, and an error returned to 1375 * indicate that the vnode is no longer usable (possibly having 1376 * been changed to a new file system type). 1377 / 1378int 1379vget(vp, flags, p) 1380* register struct vnode vp; 1381* int flags; 1382 struct proc p; 1383{ 1384* int error; 1385 1386 /* 1387 * If the vnode is in the process of being cleaned out for 1388 * another use, we wait for the cleaning to finish and then 1389 * return failure. Cleaning is determined by checking that 1390 * the VXLOCK flag is set. 1391 / 1392* if ((flags & LK_INTERLOCK) == 0) { 1393 simple_lock(&vp->v_interlock); 1394 } 1395 if (vp->v_flag & VXLOCK) { 1396 vp->v_flag \|= VXWANT; 1397 simple_unlock(&vp->v_interlock); 1398 tsleep((caddr_t)vp, PINOD, "vget", 0); 1399 return (ENOENT); 1400 } 1401 1402 vp->v_usecount++; 1403 1404 if (VSHOULDBUSY(vp)) 1405 vbusy(vp); 1406 if (flags & LK_TYPE_MASK) { 1407 if ((error = vn_lock(vp, flags \| LK_INTERLOCK, p)) != 0) { 1408 /* 1409 * must expand vrele here because we do not want 1410 * to call VOP_INACTIVE if the reference count 1411 * drops back to zero since it was never really 1412 * active. We must remove it from the free list 1413 * before sleeping so that multiple processes do 1414 * not try to recycle it. 1415 / 1416* simple_lock(&vp->v_interlock); 1417 vp->v_usecount--; 1418 if (VSHOULDFREE(vp)) 1419 vfree(vp); 1420 simple_unlock(&vp->v_interlock); 1421 } 1422 return (error); 1423 } 1424 simple_unlock(&vp->v_interlock); 1425 return (0); 1426} 1427 1428void 1429vref(struct vnode vp) 1430{ 1431* simple_lock(&vp->v_interlock); 1432 vp->v_usecount++; 1433 simple_unlock(&vp->v_interlock); 1434} 1435 1436/* 1437 * Vnode put/release. 1438 * If count drops to zero, call inactive routine and return to freelist. 1439 / 1440void 1441vrele(vp) 1442* struct vnode vp; 1443{ 1444* struct proc p = curproc; / XXX / 1445* 1446 KASSERT(vp != NULL, ("vrele: null vp")); 1447 1448 simple_lock(&vp->v_interlock); 1449 1450 if (vp->v_usecount > 1) { 1451 1452 vp->v_usecount--; 1453 simple_unlock(&vp->v_interlock); 1454 1455 return; 1456 } 1457 1458 if (vp->v_usecount == 1) { 1459 1460 vp->v_usecount--; 1461 if (VSHOULDFREE(vp)) 1462 vfree(vp); 1463 /* 1464 * If we are doing a vput, the node is already locked, and we must 1465 * call VOP_INACTIVE with the node locked. So, in the case of 1466 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1467 / 1468* if (vn_lock(vp, LK_EXCLUSIVE \| LK_INTERLOCK, p) == 0) { 1469 VOP_INACTIVE(vp, p); 1470 } 1471 1472 } else { 1473#ifdef DIAGNOSTIC 1474 vprint("vrele: negative ref count", vp); 1475 simple_unlock(&vp->v_interlock); 1476#endif 1477 panic("vrele: negative ref cnt"); 1478 } 1479} 1480 1481void 1482vput(vp) 1483 struct vnode vp; 1484{ 1485* struct proc p = curproc; / XXX / 1486* 1487 KASSERT(vp != NULL, ("vput: null vp")); 1488 1489 simple_lock(&vp->v_interlock); 1490 1491 if (vp->v_usecount > 1) { 1492 1493 vp->v_usecount--; 1494 VOP_UNLOCK(vp, LK_INTERLOCK, p); 1495 return; 1496 1497 } 1498 1499 if (vp->v_usecount == 1) { 1500 1501 vp->v_usecount--; 1502 if (VSHOULDFREE(vp)) 1503 vfree(vp); 1504 /* 1505 * If we are doing a vput, the node is already locked, and we must 1506 * call VOP_INACTIVE with the node locked. So, in the case of 1507 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE. 1508 / 1509* simple_unlock(&vp->v_interlock); 1510 VOP_INACTIVE(vp, p); 1511 1512 } else { 1513#ifdef DIAGNOSTIC 1514 vprint("vput: negative ref count", vp); 1515#endif 1516 panic("vput: negative ref cnt"); 1517 } 1518} 1519 1520/* 1521 * Somebody doesn't want the vnode recycled. 1522 / 1523void 1524vhold(vp) 1525* register struct vnode vp; 1526{ 1527* int s; 1528 1529 s = splbio(); 1530 vp->v_holdcnt++; 1531 if (VSHOULDBUSY(vp)) 1532 vbusy(vp); 1533 splx(s); 1534} 1535 1536/* 1537 * One less who cares about this vnode. 1538 / 1539void 1540vdrop(vp) 1541* register struct vnode vp; 1542{ 1543* int s; 1544 1545 s = splbio(); 1546 if (vp->v_holdcnt <= 0) 1547 panic("vdrop: holdcnt"); 1548 vp->v_holdcnt--; 1549 if (VSHOULDFREE(vp)) 1550 vfree(vp); 1551 splx(s); 1552} 1553 1554/* 1555 * Remove any vnodes in the vnode table belonging to mount point mp. 1556 * 1557 * If MNT_NOFORCE is specified, there should not be any active ones, 1558 * return error if any are found (nb: this is a user error, not a 1559 * system error). If MNT_FORCE is specified, detach any active vnodes 1560 * that are found. 1561 / 1562#ifdef DIAGNOSTIC 1563static int busyprt = 0; / print out busy vnodes / 1564SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); 1565#endif 1566* 1567int 1568vflush(mp, skipvp, flags) 1569 struct mount mp; 1570* struct vnode skipvp; 1571* int flags; 1572{ 1573 struct proc p = curproc; / XXX / 1574* struct vnode vp, nvp; 1575 int busy = 0; 1576 1577 simple_lock(&mntvnode_slock); 1578loop: 1579 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1580 /* 1581 * Make sure this vnode wasn't reclaimed in getnewvnode(). 1582 * Start over if it has (it won't be on the list anymore). 1583 / 1584* if (vp->v_mount != mp) 1585 goto loop; 1586 nvp = vp->v_mntvnodes.le_next; 1587 /* 1588 * Skip over a selected vnode. 1589 / 1590* if (vp == skipvp) 1591 continue; 1592 1593 simple_lock(&vp->v_interlock); 1594 /* 1595 * Skip over a vnodes marked VSYSTEM. 1596 / 1597* if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1598 simple_unlock(&vp->v_interlock); 1599 continue; 1600 } 1601 /* 1602 * If WRITECLOSE is set, only flush out regular file vnodes 1603 * open for writing. 1604 / 1605* if ((flags & WRITECLOSE) && 1606 (vp->v_writecount == 0 \|\| vp->v_type != VREG)) { 1607 simple_unlock(&vp->v_interlock); 1608 continue; 1609 } 1610 1611 /* 1612 * With v_usecount == 0, all we need to do is clear out the 1613 * vnode data structures and we are done. 1614 / 1615* if (vp->v_usecount == 0) { 1616 simple_unlock(&mntvnode_slock); 1617 vgonel(vp, p); 1618 simple_lock(&mntvnode_slock); 1619 continue; 1620 } 1621 1622 /* 1623 * If FORCECLOSE is set, forcibly close the vnode. For block 1624 * or character devices, revert to an anonymous device. For 1625 * all other files, just kill them. 1626 / 1627* if (flags & FORCECLOSE) { 1628 simple_unlock(&mntvnode_slock); 1629 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1630 vgonel(vp, p); 1631 } else { 1632 vclean(vp, 0, p); 1633 vp->v_op = spec_vnodeop_p; 1634 insmntque(vp, (struct mount ) 0); 1635* } 1636 simple_lock(&mntvnode_slock); 1637 continue; 1638 } 1639#ifdef DIAGNOSTIC 1640 if (busyprt) 1641 vprint("vflush: busy vnode", vp); 1642#endif 1643 simple_unlock(&vp->v_interlock); 1644 busy++; 1645 } 1646 simple_unlock(&mntvnode_slock); 1647 if (busy) 1648 return (EBUSY); 1649 return (0); 1650} 1651 1652/* 1653 * Disassociate the underlying file system from a vnode. 1654 / 1655static void 1656vclean(vp, flags, p) 1657* struct vnode vp; 1658* int flags; 1659 struct proc p; 1660{ 1661* int active; 1662 vm_object_t obj; 1663 1664 /* 1665 * Check to see if the vnode is in use. If so we have to reference it 1666 * before we clean it out so that its count cannot fall to zero and 1667 * generate a race against ourselves to recycle it. 1668 / 1669* if ((active = vp->v_usecount)) 1670 vp->v_usecount++; 1671 1672 /* 1673 * Prevent the vnode from being recycled or brought into use while we 1674 * clean it out. 1675 / 1676* if (vp->v_flag & VXLOCK) 1677 panic("vclean: deadlock"); 1678 vp->v_flag \|= VXLOCK; 1679 /* 1680 * Even if the count is zero, the VOP_INACTIVE routine may still 1681 * have the object locked while it cleans it out. The VOP_LOCK 1682 * ensures that the VOP_INACTIVE routine is done with its work. 1683 * For active vnodes, it ensures that no other activity can 1684 * occur while the underlying object is being cleaned out. 1685 / 1686* VOP_LOCK(vp, LK_DRAIN \| LK_INTERLOCK, p); 1687 1688 /* 1689 * Clean out any buffers associated with the vnode. 1690 / 1691* vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1692 if ((obj = vp->v_object) != NULL) { 1693 if (obj->ref_count == 0) { 1694 /* 1695 * This is a normal way of shutting down the object/vnode 1696 * association. 1697 / 1698* vm_object_terminate(obj); 1699 } else { 1700 /* 1701 * Woe to the process that tries to page now :-). 1702 / 1703* vm_pager_deallocate(obj); 1704 } 1705 } 1706 1707 /* 1708 * If purging an active vnode, it must be closed and 1709 * deactivated before being reclaimed. Note that the 1710 * VOP_INACTIVE will unlock the vnode. 1711 / 1712* if (active) { 1713 if (flags & DOCLOSE) 1714 VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); 1715 VOP_INACTIVE(vp, p); 1716 } else { 1717 /* 1718 * Any other processes trying to obtain this lock must first 1719 * wait for VXLOCK to clear, then call the new lock operation. 1720 / 1721* VOP_UNLOCK(vp, 0, p); 1722 } 1723 /* 1724 * Reclaim the vnode. 1725 / 1726* if (VOP_RECLAIM(vp, p)) 1727 panic("vclean: cannot reclaim"); 1728 1729 if (active) 1730 vrele(vp); 1731 1732 cache_purge(vp); 1733 if (vp->v_vnlock) { 1734#if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? / 1735#ifdef DIAGNOSTIC 1736* if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1737 vprint("vclean: lock not drained", vp); 1738#endif 1739#endif 1740 FREE(vp->v_vnlock, M_VNODE); 1741 vp->v_vnlock = NULL; 1742 } 1743 1744 if (VSHOULDFREE(vp)) 1745 vfree(vp); 1746 1747 /* 1748 * Done with purge, notify sleepers of the grim news. 1749 / 1750* vp->v_op = dead_vnodeop_p; 1751 vn_pollgone(vp); 1752 vp->v_tag = VT_NON; 1753 vp->v_flag &= ~VXLOCK; 1754 if (vp->v_flag & VXWANT) { 1755 vp->v_flag &= ~VXWANT; 1756 wakeup((caddr_t) vp); 1757 } 1758} 1759 1760/* 1761 * Eliminate all activity associated with the requested vnode 1762 * and with all vnodes aliased to the requested vnode. 1763 / 1764int 1765vop_revoke(ap) 1766* struct vop_revoke_args /* { 1767 struct vnode a_vp; 1768* int a_flags; 1769 } / ap; 1770{ 1771 struct vnode vp, vq; 1772 struct proc p = curproc; / XXX / 1773* 1774 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1775 1776 vp = ap->a_vp; 1777 simple_lock(&vp->v_interlock); 1778 1779 if (vp->v_flag & VALIASED) { 1780 /* 1781 * If a vgone (or vclean) is already in progress, 1782 * wait until it is done and return. 1783 / 1784* if (vp->v_flag & VXLOCK) { 1785 vp->v_flag \|= VXWANT; 1786 simple_unlock(&vp->v_interlock); 1787 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1788 return (0); 1789 } 1790 /* 1791 * Ensure that vp will not be vgone'd while we 1792 * are eliminating its aliases. 1793 / 1794* vp->v_flag \|= VXLOCK; 1795 simple_unlock(&vp->v_interlock); 1796 while (vp->v_flag & VALIASED) { 1797 simple_lock(&spechash_slock); 1798 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1799* if (vq->v_rdev != vp->v_rdev \|\| 1800 vq->v_type != vp->v_type \|\| vp == vq) 1801 continue; 1802 simple_unlock(&spechash_slock); 1803 vgone(vq); 1804 break; 1805 } 1806 if (vq == NULLVP) { 1807 simple_unlock(&spechash_slock); 1808 } 1809 } 1810 /* 1811 * Remove the lock so that vgone below will 1812 * really eliminate the vnode after which time 1813 * vgone will awaken any sleepers. 1814 / 1815* simple_lock(&vp->v_interlock); 1816 vp->v_flag &= ~VXLOCK; 1817 if (vp->v_flag & VXWANT) { 1818 vp->v_flag &= ~VXWANT; 1819 wakeup(vp); 1820 } 1821 } 1822 vgonel(vp, p); 1823 return (0); 1824} 1825 1826/* 1827 * Recycle an unused vnode to the front of the free list. 1828 * Release the passed interlock if the vnode will be recycled. 1829 / 1830int 1831vrecycle(vp, inter_lkp, p) 1832* struct vnode vp; 1833* struct simplelock inter_lkp; 1834* struct proc p; 1835{ 1836* 1837 simple_lock(&vp->v_interlock); 1838 if (vp->v_usecount == 0) { 1839 if (inter_lkp) { 1840 simple_unlock(inter_lkp); 1841 } 1842 vgonel(vp, p); 1843 return (1); 1844 } 1845 simple_unlock(&vp->v_interlock); 1846 return (0); 1847} 1848 1849/* 1850 * Eliminate all activity associated with a vnode 1851 * in preparation for reuse. 1852 / 1853void 1854vgone(vp) 1855* register struct vnode vp; 1856{ 1857* struct proc p = curproc; / XXX / 1858* 1859 simple_lock(&vp->v_interlock); 1860 vgonel(vp, p); 1861} 1862 1863/* 1864 * vgone, with the vp interlock held. 1865 / 1866static void 1867vgonel(vp, p) 1868* struct vnode vp; 1869* struct proc p; 1870{ 1871* int s; 1872 struct vnode vq; 1873* struct vnode vx; 1874* 1875 /* 1876 * If a vgone (or vclean) is already in progress, 1877 * wait until it is done and return. 1878 / 1879* if (vp->v_flag & VXLOCK) { 1880 vp->v_flag \|= VXWANT; 1881 simple_unlock(&vp->v_interlock); 1882 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1883 return; 1884 } 1885 1886 /* 1887 * Clean out the filesystem specific data. 1888 / 1889* vclean(vp, DOCLOSE, p); 1890 simple_lock(&vp->v_interlock); 1891 1892 /* 1893 * Delete from old mount point vnode list, if on one. 1894 / 1895* if (vp->v_mount != NULL) 1896 insmntque(vp, (struct mount )0); 1897* /* 1898 * If special device, remove it from special device alias list 1899 * if it is on one. 1900 / 1901* if ((vp->v_type == VBLK \|\| vp->v_type == VCHR) && vp->v_specinfo != 0) { 1902 simple_lock(&spechash_slock); 1903 if (vp->v_hashchain == vp) { 1904* vp->v_hashchain = vp->v_specnext; 1905* } else { 1906 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1907* if (vq->v_specnext != vp) 1908 continue; 1909 vq->v_specnext = vp->v_specnext; 1910 break; 1911 } 1912 if (vq == NULL) 1913 panic("missing bdev"); 1914 } 1915 if (vp->v_flag & VALIASED) { 1916 vx = NULL; 1917 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 1918* if (vq->v_rdev != vp->v_rdev \|\| 1919 vq->v_type != vp->v_type) 1920 continue; 1921 if (vx) 1922 break; 1923 vx = vq; 1924 } 1925 if (vx == NULL) 1926 panic("missing alias"); 1927 if (vq == NULL) 1928 vx->v_flag &= ~VALIASED; 1929 vp->v_flag &= ~VALIASED; 1930 } 1931 simple_unlock(&spechash_slock); 1932 FREE(vp->v_specinfo, M_VNODE); 1933 vp->v_specinfo = NULL; 1934 } 1935 1936 /* 1937 * If it is on the freelist and not already at the head, 1938 * move it to the head of the list. The test of the back 1939 * pointer and the reference count of zero is because 1940 * it will be removed from the free list by getnewvnode, 1941 * but will not have its reference count incremented until 1942 * after calling vgone. If the reference count were 1943 * incremented first, vgone would (incorrectly) try to 1944 * close the previous instance of the underlying object. 1945 / 1946* if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) { 1947 s = splbio(); 1948 simple_lock(&vnode_free_list_slock); 1949 if (vp->v_flag & VFREE) { 1950 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1951 } else if (vp->v_flag & VTBFREE) { 1952 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 1953 vp->v_flag &= ~VTBFREE; 1954 freevnodes++; 1955 } else 1956 freevnodes++; 1957 vp->v_flag \|= VFREE; 1958 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1959 simple_unlock(&vnode_free_list_slock); 1960 splx(s); 1961 } 1962 1963 vp->v_type = VBAD; 1964 simple_unlock(&vp->v_interlock); 1965} 1966 1967/* 1968 * Lookup a vnode by device number. 1969 / 1970int 1971vfinddev(dev, type, vpp) 1972* dev_t dev; 1973 enum vtype type; 1974 struct vnode *vpp; 1975{ 1976* register struct vnode vp; 1977* int rc = 0; 1978 1979 simple_lock(&spechash_slock); 1980 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1981 if (dev != vp->v_rdev \|\| type != vp->v_type) 1982 continue; 1983 vpp = vp; 1984* rc = 1; 1985 break; 1986 } 1987 simple_unlock(&spechash_slock); 1988 return (rc); 1989} 1990 1991/* 1992 * Calculate the total number of references to a special device. 1993 / 1994int 1995vcount(vp) 1996* register struct vnode vp; 1997{ 1998* struct vnode vq, vnext; 1999 int count; 2000 2001loop: 2002 if ((vp->v_flag & VALIASED) == 0) 2003 return (vp->v_usecount); 2004 simple_lock(&spechash_slock); 2005 for (count = 0, vq = vp->v_hashchain; vq; vq = vnext) { 2006* vnext = vq->v_specnext; 2007 if (vq->v_rdev != vp->v_rdev \|\| vq->v_type != vp->v_type) 2008 continue; 2009 /* 2010 * Alias, but not in use, so flush it out. 2011 / 2012* if (vq->v_usecount == 0 && vq != vp) { 2013 simple_unlock(&spechash_slock); 2014 vgone(vq); 2015 goto loop; 2016 } 2017 count += vq->v_usecount; 2018 } 2019 simple_unlock(&spechash_slock); 2020 return (count); 2021} 2022/* 2023 * Print out a description of a vnode. 2024 / 2025static char typename[] = 2026{"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 2027 2028void 2029vprint(label, vp) 2030 char label; 2031* register struct vnode vp; 2032{ 2033* char buf[96]; 2034 2035 if (label != NULL) 2036 printf("%s: %p: ", label, (void )vp); 2037* else 2038 printf("%p: ", (void )vp); 2039* printf("type %s, usecount %d, writecount %d, refcount %d,", 2040 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 2041 vp->v_holdcnt); 2042 buf[0] = '\0'; 2043 if (vp->v_flag & VROOT) 2044 strcat(buf, "\|VROOT"); 2045 if (vp->v_flag & VTEXT) 2046 strcat(buf, "\|VTEXT"); 2047 if (vp->v_flag & VSYSTEM) 2048 strcat(buf, "\|VSYSTEM"); 2049 if (vp->v_flag & VXLOCK) 2050 strcat(buf, "\|VXLOCK"); 2051 if (vp->v_flag & VXWANT) 2052 strcat(buf, "\|VXWANT"); 2053 if (vp->v_flag & VBWAIT) 2054 strcat(buf, "\|VBWAIT"); 2055 if (vp->v_flag & VALIASED) 2056 strcat(buf, "\|VALIASED"); 2057 if (vp->v_flag & VDOOMED) 2058 strcat(buf, "\|VDOOMED"); 2059 if (vp->v_flag & VFREE) 2060 strcat(buf, "\|VFREE"); 2061 if (vp->v_flag & VOBJBUF) 2062 strcat(buf, "\|VOBJBUF"); 2063 if (buf[0] != '\0') 2064 printf(" flags (%s)", &buf[1]); 2065 if (vp->v_data == NULL) { 2066 printf("\n"); 2067 } else { 2068 printf("\n\t"); 2069 VOP_PRINT(vp); 2070 } 2071} 2072 2073#ifdef DDB 2074#include <ddb/ddb.h> 2075/* 2076 * List all of the locked vnodes in the system. 2077 * Called when debugging the kernel. 2078 / 2079DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 2080{ 2081* struct proc p = curproc; / XXX / 2082* struct mount mp, nmp; 2083 struct vnode vp; 2084* 2085 printf("Locked vnodes\n"); 2086 simple_lock(&mountlist_slock); 2087 for (mp = mountlist.cqh_first; mp != (void )&mountlist; mp = nmp) { 2088* if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2089 nmp = mp->mnt_list.cqe_next; 2090 continue; 2091 } 2092 for (vp = mp->mnt_vnodelist.lh_first; 2093 vp != NULL; 2094 vp = vp->v_mntvnodes.le_next) { 2095 if (VOP_ISLOCKED(vp)) 2096 vprint((char )0, vp); 2097* } 2098 simple_lock(&mountlist_slock); 2099 nmp = mp->mnt_list.cqe_next; 2100 vfs_unbusy(mp, p); 2101 } 2102 simple_unlock(&mountlist_slock); 2103} 2104#endif 2105 2106/* 2107 * Top level filesystem related information gathering. 2108 / 2109static int sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS); 2110* 2111static int 2112vfs_sysctl SYSCTL_HANDLER_ARGS 2113{ 2114 int name = (int )arg1 - 1; /* XXX / 2115* u_int namelen = arg2 + 1; /* XXX / 2116* struct vfsconf vfsp; 2117* 2118#if 1 \|\| defined(COMPAT_PRELITE2) 2119 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. / 2120* if (namelen == 1) 2121 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 2122#endif 2123 2124#ifdef notyet 2125 /* all sysctl names at this level are at least name and field / 2126* if (namelen < 2) 2127 return (ENOTDIR); /* overloaded / 2128* if (name[0] != VFS_GENERIC) { 2129 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2130 if (vfsp->vfc_typenum == name[0]) 2131 break; 2132 if (vfsp == NULL) 2133 return (EOPNOTSUPP); 2134 return ((vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 2135* oldp, oldlenp, newp, newlen, p)); 2136 } 2137#endif 2138 switch (name[1]) { 2139 case VFS_MAXTYPENUM: 2140 if (namelen != 2) 2141 return (ENOTDIR); 2142 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 2143 case VFS_CONF: 2144 if (namelen != 3) 2145 return (ENOTDIR); /* overloaded / 2146* for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 2147 if (vfsp->vfc_typenum == name[2]) 2148 break; 2149 if (vfsp == NULL) 2150 return (EOPNOTSUPP); 2151 return (SYSCTL_OUT(req, vfsp, sizeof vfsp)); 2152* } 2153 return (EOPNOTSUPP); 2154} 2155 2156SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 2157 "Generic filesystem"); 2158 2159#if 1 \|\| defined(COMPAT_PRELITE2) 2160 2161static int 2162sysctl_ovfs_conf SYSCTL_HANDLER_ARGS 2163{ 2164 int error; 2165 struct vfsconf vfsp; 2166* struct ovfsconf ovfs; 2167 2168 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 2169 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag / 2170* strcpy(ovfs.vfc_name, vfsp->vfc_name); 2171 ovfs.vfc_index = vfsp->vfc_typenum; 2172 ovfs.vfc_refcount = vfsp->vfc_refcount; 2173 ovfs.vfc_flags = vfsp->vfc_flags; 2174 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 2175 if (error) 2176 return error; 2177 } 2178 return 0; 2179} 2180 2181#endif /* 1 \|\| COMPAT_PRELITE2 / 2182* 2183#if 0 2184#define KINFO_VNODESLOP 10 2185/* 2186 * Dump vnode list (via sysctl). 2187 * Copyout address of vnode followed by vnode. 2188 / 2189/ ARGSUSED / 2190static int 2191sysctl_vnode SYSCTL_HANDLER_ARGS 2192{ 2193* struct proc p = curproc; / XXX / 2194* struct mount mp, nmp; 2195 struct vnode nvp, vp; 2196 int error; 2197 2198#define VPTRSZ sizeof (struct vnode ) 2199#define VNODESZ sizeof (struct vnode) 2200* 2201 req->lock = 0; 2202 if (!req->oldptr) /* Make an estimate / 2203* return (SYSCTL_OUT(req, 0, 2204 (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); 2205 2206 simple_lock(&mountlist_slock); 2207 for (mp = mountlist.cqh_first; mp != (void )&mountlist; mp = nmp) { 2208* if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 2209 nmp = mp->mnt_list.cqe_next; 2210 continue; 2211 } 2212again: 2213 simple_lock(&mntvnode_slock); 2214 for (vp = mp->mnt_vnodelist.lh_first; 2215 vp != NULL; 2216 vp = nvp) { 2217 /* 2218 * Check that the vp is still associated with 2219 * this filesystem. RACE: could have been 2220 * recycled onto the same filesystem. 2221 / 2222* if (vp->v_mount != mp) { 2223 simple_unlock(&mntvnode_slock); 2224 goto again; 2225 } 2226 nvp = vp->v_mntvnodes.le_next; 2227 simple_unlock(&mntvnode_slock); 2228 if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) \|\| 2229 (error = SYSCTL_OUT(req, vp, VNODESZ))) 2230 return (error); 2231 simple_lock(&mntvnode_slock); 2232 } 2233 simple_unlock(&mntvnode_slock); 2234 simple_lock(&mountlist_slock); 2235 nmp = mp->mnt_list.cqe_next; 2236 vfs_unbusy(mp, p); 2237 } 2238 simple_unlock(&mountlist_slock); 2239 2240 return (0); 2241} 2242#endif 2243 2244/* 2245 * XXX 2246 * Exporting the vnode list on large systems causes them to crash. 2247 * Exporting the vnode list on medium systems causes sysctl to coredump. 2248 / 2249#if 0 2250SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE\|CTLFLAG_RD, 2251* 0, 0, sysctl_vnode, "S,vnode", ""); 2252#endif 2253 2254/* 2255 * Check to see if a filesystem is mounted on a block device. 2256 / 2257int 2258vfs_mountedon(vp) 2259* struct vnode vp; 2260{ 2261* struct vnode vq; 2262* int error = 0; 2263 2264 if (vp->v_specmountpoint != NULL) 2265 return (EBUSY); 2266 if (vp->v_flag & VALIASED) { 2267 simple_lock(&spechash_slock); 2268 for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) { 2269* if (vq->v_rdev != vp->v_rdev \|\| 2270 vq->v_type != vp->v_type) 2271 continue; 2272 if (vq->v_specmountpoint != NULL) { 2273 error = EBUSY; 2274 break; 2275 } 2276 } 2277 simple_unlock(&spechash_slock); 2278 } 2279 return (error); 2280} 2281 2282/* 2283 * Unmount all filesystems. The list is traversed in reverse order 2284 * of mounting to avoid dependencies. 2285 / 2286void 2287vfs_unmountall() 2288{ 2289* struct mount mp, nmp; 2290 struct proc p; 2291* int error; 2292 2293 if (curproc != NULL) 2294 p = curproc; 2295 else 2296 p = initproc; /* XXX XXX should this be proc0? / 2297* /* 2298 * Since this only runs when rebooting, it is not interlocked. 2299 / 2300* for (mp = mountlist.cqh_last; mp != (void )&mountlist; mp = nmp) { 2301* nmp = mp->mnt_list.cqe_prev; 2302 error = dounmount(mp, MNT_FORCE, p); 2303 if (error) { 2304 printf("unmount of %s failed (", 2305 mp->mnt_stat.f_mntonname); 2306 if (error == EBUSY) 2307 printf("BUSY)\n"); 2308 else 2309 printf("%d)\n", error); 2310 } 2311 } 2312} 2313 2314/* 2315 * Build hash lists of net addresses and hang them off the mount point. 2316 * Called by ufs_mount() to set up the lists of export addresses. 2317 / 2318static int 2319vfs_hang_addrlist(mp, nep, argp) 2320* struct mount mp; 2321* struct netexport nep; 2322* struct export_args argp; 2323{ 2324* register struct netcred np; 2325* register struct radix_node_head rnh; 2326* register int i; 2327 struct radix_node rn; 2328* struct sockaddr saddr, smask = 0; 2329 struct domain dom; 2330* int error; 2331 2332 if (argp->ex_addrlen == 0) { 2333 if (mp->mnt_flag & MNT_DEFEXPORTED) 2334 return (EPERM); 2335 np = &nep->ne_defexported; 2336 np->netc_exflags = argp->ex_flags; 2337 np->netc_anon = argp->ex_anon; 2338 np->netc_anon.cr_ref = 1; 2339 mp->mnt_flag \|= MNT_DEFEXPORTED; 2340 return (0); 2341 } 2342 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2343 np = (struct netcred ) malloc(i, M_NETADDR, M_WAITOK); 2344* bzero((caddr_t) np, i); 2345 saddr = (struct sockaddr ) (np + 1); 2346* if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 2347 goto out; 2348 if (saddr->sa_len > argp->ex_addrlen) 2349 saddr->sa_len = argp->ex_addrlen; 2350 if (argp->ex_masklen) { 2351 smask = (struct sockaddr ) ((caddr_t) saddr + argp->ex_addrlen); 2352* error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen); 2353 if (error) 2354 goto out; 2355 if (smask->sa_len > argp->ex_masklen) 2356 smask->sa_len = argp->ex_masklen; 2357 } 2358 i = saddr->sa_family; 2359 if ((rnh = nep->ne_rtable[i]) == 0) { 2360 /* 2361 * Seems silly to initialize every AF when most are not used, 2362 * do so on demand here 2363 / 2364* for (dom = domains; dom; dom = dom->dom_next) 2365 if (dom->dom_family == i && dom->dom_rtattach) { 2366 dom->dom_rtattach((void *) &nep->ne_rtable[i], 2367* dom->dom_rtoffset); 2368 break; 2369 } 2370 if ((rnh = nep->ne_rtable[i]) == 0) { 2371 error = ENOBUFS; 2372 goto out; 2373 } 2374 } 2375 rn = (rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, 2376* np->netc_rnodes); 2377 if (rn == 0 \|\| np != (struct netcred ) rn) { / already exists / 2378* error = EPERM; 2379 goto out; 2380 } 2381 np->netc_exflags = argp->ex_flags; 2382 np->netc_anon = argp->ex_anon; 2383 np->netc_anon.cr_ref = 1; 2384 return (0); 2385out: 2386 free(np, M_NETADDR); 2387 return (error); 2388} 2389 2390/* ARGSUSED / 2391static int 2392vfs_free_netcred(rn, w) 2393* struct radix_node rn; 2394* void w; 2395{ 2396* register struct radix_node_head rnh = (struct radix_node_head ) w; 2397 2398 (rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 2399* free((caddr_t) rn, M_NETADDR); 2400 return (0); 2401} 2402 2403/* 2404 * Free the net address hash lists that are hanging off the mount points. 2405 / 2406static void 2407vfs_free_addrlist(nep) 2408* struct netexport nep; 2409{ 2410* register int i; 2411 register struct radix_node_head rnh; 2412* 2413 for (i = 0; i <= AF_MAX; i++) 2414 if ((rnh = nep->ne_rtable[i])) { 2415 (rnh->rnh_walktree) (rnh, vfs_free_netcred, 2416* (caddr_t) rnh); 2417 free((caddr_t) rnh, M_RTABLE); 2418 nep->ne_rtable[i] = 0; 2419 } 2420} 2421 2422int 2423vfs_export(mp, nep, argp) 2424 struct mount mp; 2425* struct netexport nep; 2426* struct export_args argp; 2427{ 2428* int error; 2429 2430 if (argp->ex_flags & MNT_DELEXPORT) { 2431 if (mp->mnt_flag & MNT_EXPUBLIC) { 2432 vfs_setpublicfs(NULL, NULL, NULL); 2433 mp->mnt_flag &= ~MNT_EXPUBLIC; 2434 } 2435 vfs_free_addrlist(nep); 2436 mp->mnt_flag &= ~(MNT_EXPORTED \| MNT_DEFEXPORTED); 2437 } 2438 if (argp->ex_flags & MNT_EXPORTED) { 2439 if (argp->ex_flags & MNT_EXPUBLIC) { 2440 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2441 return (error); 2442 mp->mnt_flag \|= MNT_EXPUBLIC; 2443 } 2444 if ((error = vfs_hang_addrlist(mp, nep, argp))) 2445 return (error); 2446 mp->mnt_flag \|= MNT_EXPORTED; 2447 } 2448 return (0); 2449} 2450 2451 2452/* 2453 * Set the publicly exported filesystem (WebNFS). Currently, only 2454 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2455 / 2456int 2457vfs_setpublicfs(mp, nep, argp) 2458* struct mount mp; 2459* struct netexport nep; 2460* struct export_args argp; 2461{ 2462* int error; 2463 struct vnode rvp; 2464* char cp; 2465* 2466 /* 2467 * mp == NULL -> invalidate the current info, the FS is 2468 * no longer exported. May be called from either vfs_export 2469 * or unmount, so check if it hasn't already been done. 2470 / 2471* if (mp == NULL) { 2472 if (nfs_pub.np_valid) { 2473 nfs_pub.np_valid = 0; 2474 if (nfs_pub.np_index != NULL) { 2475 FREE(nfs_pub.np_index, M_TEMP); 2476 nfs_pub.np_index = NULL; 2477 } 2478 } 2479 return (0); 2480 } 2481 2482 /* 2483 * Only one allowed at a time. 2484 / 2485* if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2486 return (EBUSY); 2487 2488 /* 2489 * Get real filehandle for root of exported FS. 2490 / 2491* bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 2492 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2493 2494 if ((error = VFS_ROOT(mp, &rvp))) 2495 return (error); 2496 2497 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2498 return (error); 2499 2500 vput(rvp); 2501 2502 /* 2503 * If an indexfile was specified, pull it in. 2504 / 2505* if (argp->ex_indexfile != NULL) { 2506 MALLOC(nfs_pub.np_index, char , MAXNAMLEN + 1, M_TEMP, 2507* M_WAITOK); 2508 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2509 MAXNAMLEN, (size_t )0); 2510* if (!error) { 2511 /* 2512 * Check for illegal filenames. 2513 / 2514* for (cp = nfs_pub.np_index; cp; cp++) { 2515* if (cp == '/') { 2516* error = EINVAL; 2517 break; 2518 } 2519 } 2520 } 2521 if (error) { 2522 FREE(nfs_pub.np_index, M_TEMP); 2523 return (error); 2524 } 2525 } 2526 2527 nfs_pub.np_mount = mp; 2528 nfs_pub.np_valid = 1; 2529 return (0); 2530} 2531 2532struct netcred * 2533vfs_export_lookup(mp, nep, nam) 2534 register struct mount mp; 2535* struct netexport nep; 2536* struct sockaddr nam; 2537{ 2538* register struct netcred np; 2539* register struct radix_node_head rnh; 2540* struct sockaddr saddr; 2541* 2542 np = NULL; 2543 if (mp->mnt_flag & MNT_EXPORTED) { 2544 /* 2545 * Lookup in the export list first. 2546 / 2547* if (nam != NULL) { 2548 saddr = nam; 2549 rnh = nep->ne_rtable[saddr->sa_family]; 2550 if (rnh != NULL) { 2551 np = (struct netcred ) 2552* (rnh->rnh_matchaddr)((caddr_t)saddr, 2553* rnh); 2554 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2555 np = NULL; 2556 } 2557 } 2558 /* 2559 * If no address match, use the default if it exists. 2560 / 2561* if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2562 np = &nep->ne_defexported; 2563 } 2564 return (np); 2565} 2566 2567/* 2568 * perform msync on all vnodes under a mount point 2569 * the mount point must be locked. 2570 / 2571void 2572vfs_msync(struct mount mp, int flags) { 2573 struct vnode vp, nvp; 2574 struct vm_object obj; 2575* int anyio, tries; 2576 2577 tries = 5; 2578loop: 2579 anyio = 0; 2580 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2581 2582 nvp = vp->v_mntvnodes.le_next; 2583 2584 if (vp->v_mount != mp) { 2585 goto loop; 2586 } 2587 2588 if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? / 2589* continue; 2590 2591 if (flags != MNT_WAIT) { 2592 obj = vp->v_object; 2593 if (obj == NULL \|\| (obj->flags & OBJ_MIGHTBEDIRTY) == 0) 2594 continue; 2595 if (VOP_ISLOCKED(vp)) 2596 continue; 2597 } 2598 2599 simple_lock(&vp->v_interlock); 2600 if (vp->v_object && 2601 (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { 2602 if (!vget(vp, 2603 LK_INTERLOCK \| LK_EXCLUSIVE \| LK_RETRY \| LK_NOOBJ, curproc)) { 2604 if (vp->v_object) { 2605 vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0); 2606 anyio = 1; 2607 } 2608 vput(vp); 2609 } 2610 } else { 2611 simple_unlock(&vp->v_interlock); 2612 } 2613 } 2614 if (anyio && (--tries > 0)) 2615 goto loop; 2616} 2617 2618/* 2619 * Create the VM object needed for VMIO and mmap support. This 2620 * is done for all VREG files in the system. Some filesystems might 2621 * afford the additional metadata buffering capability of the 2622 * VMIO code by making the device node be VMIO mode also. 2623 * 2624 * vp must be locked when vfs_object_create is called. 2625 / 2626int 2627vfs_object_create(vp, p, cred) 2628* struct vnode vp; 2629* struct proc p; 2630* struct ucred cred; 2631{ 2632* struct vattr vat; 2633 vm_object_t object; 2634 int error = 0; 2635 2636 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) 2637 return 0; 2638 2639retry: 2640 if ((object = vp->v_object) == NULL) { 2641 if (vp->v_type == VREG) { 2642 if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) 2643 goto retn; 2644 object = vnode_pager_alloc(vp, vat.va_size, 0, 0); 2645 } else if (bdevsw(vp->v_rdev) != NULL) { 2646 /* 2647 * This simply allocates the biggest object possible 2648 * for a VBLK vnode. This should be fixed, but doesn't 2649 * cause any problems (yet). 2650 / 2651* object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); 2652 } else { 2653 goto retn; 2654 } 2655 /* 2656 * Dereference the reference we just created. This assumes 2657 * that the object is associated with the vp. 2658 / 2659* object->ref_count--; 2660 vp->v_usecount--; 2661 } else { 2662 if (object->flags & OBJ_DEAD) { 2663 VOP_UNLOCK(vp, 0, p); 2664 tsleep(object, PVM, "vodead", 0); 2665 vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, p); 2666 goto retry; 2667 } 2668 } 2669 2670 KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); 2671 vp->v_flag \|= VOBJBUF; 2672 2673retn: 2674 return error; 2675} 2676 2677static void 2678vfree(vp) 2679 struct vnode vp; 2680{ 2681* int s; 2682 2683 s = splbio(); 2684 simple_lock(&vnode_free_list_slock); 2685 if (vp->v_flag & VTBFREE) { 2686 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2687 vp->v_flag &= ~VTBFREE; 2688 } 2689 if (vp->v_flag & VAGE) { 2690 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 2691 } else { 2692 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 2693 } 2694 freevnodes++; 2695 simple_unlock(&vnode_free_list_slock); 2696 vp->v_flag &= ~VAGE; 2697 vp->v_flag \|= VFREE; 2698 splx(s); 2699} 2700 2701void 2702vbusy(vp) 2703 struct vnode vp; 2704{ 2705* int s; 2706 2707 s = splbio(); 2708 simple_lock(&vnode_free_list_slock); 2709 if (vp->v_flag & VTBFREE) { 2710 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist); 2711 vp->v_flag &= ~VTBFREE; 2712 } else { 2713 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 2714 freevnodes--; 2715 } 2716 simple_unlock(&vnode_free_list_slock); 2717 vp->v_flag &= ~(VFREE\|VAGE); 2718 splx(s); 2719} 2720 2721/* 2722 * Record a process's interest in events which might happen to 2723 * a vnode. Because poll uses the historic select-style interface 2724 * internally, this routine serves as both the ``check for any 2725 * pending events'' and the ``record my interest in future events'' 2726 * functions. (These are done together, while the lock is held, 2727 * to avoid race conditions.) 2728 / 2729int 2730vn_pollrecord(vp, p, events) 2731* struct vnode vp; 2732* struct proc p; 2733* short events; 2734{ 2735 simple_lock(&vp->v_pollinfo.vpi_lock); 2736 if (vp->v_pollinfo.vpi_revents & events) { 2737 /* 2738 * This leaves events we are not interested 2739 * in available for the other process which 2740 * which presumably had requested them 2741 * (otherwise they would never have been 2742 * recorded). 2743 / 2744* events &= vp->v_pollinfo.vpi_revents; 2745 vp->v_pollinfo.vpi_revents &= ~events; 2746 2747 simple_unlock(&vp->v_pollinfo.vpi_lock); 2748 return events; 2749 } 2750 vp->v_pollinfo.vpi_events \|= events; 2751 selrecord(p, &vp->v_pollinfo.vpi_selinfo); 2752 simple_unlock(&vp->v_pollinfo.vpi_lock); 2753 return 0; 2754} 2755 2756/* 2757 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 2758 * it is possible for us to miss an event due to race conditions, but 2759 * that condition is expected to be rare, so for the moment it is the 2760 * preferred interface. 2761 / 2762void 2763vn_pollevent(vp, events) 2764* struct vnode vp; 2765* short events; 2766{ 2767 simple_lock(&vp->v_pollinfo.vpi_lock); 2768 if (vp->v_pollinfo.vpi_events & events) { 2769 /* 2770 * We clear vpi_events so that we don't 2771 * call selwakeup() twice if two events are 2772 * posted before the polling process(es) is 2773 * awakened. This also ensures that we take at 2774 * most one selwakeup() if the polling process 2775 * is no longer interested. However, it does 2776 * mean that only one event can be noticed at 2777 * a time. (Perhaps we should only clear those 2778 * event bits which we note?) XXX 2779 / 2780* vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? / 2781* vp->v_pollinfo.vpi_revents \|= events; 2782 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2783 } 2784 simple_unlock(&vp->v_pollinfo.vpi_lock); 2785} 2786 2787/* 2788 * Wake up anyone polling on vp because it is being revoked. 2789 * This depends on dead_poll() returning POLLHUP for correct 2790 * behavior. 2791 / 2792void 2793vn_pollgone(vp) 2794* struct vnode vp; 2795{ 2796* simple_lock(&vp->v_pollinfo.vpi_lock); 2797 if (vp->v_pollinfo.vpi_events) { 2798 vp->v_pollinfo.vpi_events = 0; 2799 selwakeup(&vp->v_pollinfo.vpi_selinfo); 2800 } 2801 simple_unlock(&vp->v_pollinfo.vpi_lock); 2802} 2803 2804 2805 2806/* 2807 * Routine to create and manage a filesystem syncer vnode. 2808 / 2809#define sync_close ((int () __P((struct vop_close_args )))nullop) 2810static int sync_fsync __P((struct vop_fsync_args )); 2811static int sync_inactive __P((struct vop_inactive_args )); 2812static int sync_reclaim __P((struct vop_reclaim_args )); 2813#define sync_lock ((int () __P((struct vop_lock_args )))vop_nolock) 2814#define sync_unlock ((int () __P((struct vop_unlock_args )))vop_nounlock) 2815static int sync_print __P((struct vop_print_args )); 2816#define sync_islocked ((int() __P((struct vop_islocked_args )))vop_noislocked) 2817* 2818static vop_t *sync_vnodeop_p; 2819static struct vnodeopv_entry_desc sync_vnodeop_entries[] = { 2820* { &vop_default_desc, (vop_t ) vop_eopnotsupp }, 2821* { &vop_close_desc, (vop_t ) sync_close }, / close / 2822* { &vop_fsync_desc, (vop_t ) sync_fsync }, / fsync / 2823* { &vop_inactive_desc, (vop_t ) sync_inactive }, / inactive / 2824* { &vop_reclaim_desc, (vop_t ) sync_reclaim }, / reclaim / 2825* { &vop_lock_desc, (vop_t ) sync_lock }, / lock / 2826* { &vop_unlock_desc, (vop_t ) sync_unlock }, / unlock / 2827* { &vop_print_desc, (vop_t ) sync_print }, / print / 2828* { &vop_islocked_desc, (vop_t ) sync_islocked }, / islocked / 2829* { NULL, NULL } 2830}; 2831static struct vnodeopv_desc sync_vnodeop_opv_desc = 2832 { &sync_vnodeop_p, sync_vnodeop_entries }; 2833 2834VNODEOP_SET(sync_vnodeop_opv_desc); 2835 2836/* 2837 * Create a new filesystem syncer vnode for the specified mount point. 2838 / 2839int 2840vfs_allocate_syncvnode(mp) 2841* struct mount mp; 2842{ 2843* struct vnode vp; 2844* static long start, incr, next; 2845 int error; 2846 2847 /* Allocate a new vnode / 2848* if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) { 2849 mp->mnt_syncer = NULL; 2850 return (error); 2851 } 2852 vp->v_type = VNON; 2853 /* 2854 * Place the vnode onto the syncer worklist. We attempt to 2855 * scatter them about on the list so that they will go off 2856 * at evenly distributed times even if all the filesystems 2857 * are mounted at once. 2858 / 2859* next += incr; 2860 if (next == 0 \|\| next > syncer_maxdelay) { 2861 start /= 2; 2862 incr /= 2; 2863 if (start == 0) { 2864 start = syncer_maxdelay / 2; 2865 incr = syncer_maxdelay; 2866 } 2867 next = start; 2868 } 2869 vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); 2870 mp->mnt_syncer = vp; 2871 return (0); 2872} 2873 2874/* 2875 * Do a lazy sync of the filesystem. 2876 / 2877static int 2878sync_fsync(ap) 2879* struct vop_fsync_args /* { 2880 struct vnode a_vp; 2881* struct ucred a_cred; 2882* int a_waitfor; 2883 struct proc a_p; 2884* } / ap; 2885{ 2886 struct vnode syncvp = ap->a_vp; 2887* struct mount mp = syncvp->v_mount; 2888* struct proc p = ap->a_p; 2889* int asyncflag; 2890 2891 /* 2892 * We only need to do something if this is a lazy evaluation. 2893 / 2894* if (ap->a_waitfor != MNT_LAZY) 2895 return (0); 2896 2897 /* 2898 * Move ourselves to the back of the sync list. 2899 / 2900* vn_syncer_add_to_worklist(syncvp, syncdelay); 2901 2902 /* 2903 * Walk the list of vnodes pushing all that are dirty and 2904 * not already on the sync list. 2905 / 2906* simple_lock(&mountlist_slock); 2907 if (vfs_busy(mp, LK_EXCLUSIVE \| LK_NOWAIT, &mountlist_slock, p) != 0) { 2908 simple_unlock(&mountlist_slock); 2909 return (0); 2910 } 2911 asyncflag = mp->mnt_flag & MNT_ASYNC; 2912 mp->mnt_flag &= ~MNT_ASYNC; 2913 vfs_msync(mp, MNT_NOWAIT); 2914 VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); 2915 if (asyncflag) 2916 mp->mnt_flag \|= MNT_ASYNC; 2917 vfs_unbusy(mp, p); 2918 return (0); 2919} 2920 2921/* 2922 * The syncer vnode is no referenced. 2923 / 2924static int 2925sync_inactive(ap) 2926* struct vop_inactive_args /* { 2927 struct vnode a_vp; 2928* struct proc a_p; 2929* } / ap; 2930{ 2931 2932 vgone(ap->a_vp); 2933 return (0); 2934} 2935 2936/* 2937 * The syncer vnode is no longer needed and is being decommissioned. 2938 * 2939 * Modifications to the worklist must be protected at splbio(). 2940 / 2941static int 2942sync_reclaim(ap) 2943* struct vop_reclaim_args /* { 2944 struct vnode a_vp; 2945* } / ap; 2946{ 2947 struct vnode vp = ap->a_vp; 2948* int s; 2949 2950 s = splbio(); 2951 vp->v_mount->mnt_syncer = NULL; 2952 if (vp->v_flag & VONWORKLST) { 2953 LIST_REMOVE(vp, v_synclist); 2954 vp->v_flag &= ~VONWORKLST; 2955 } 2956 splx(s); 2957 2958 return (0); 2959} 2960 2961/* 2962 * Print out a syncer vnode. 2963 / 2964static int 2965sync_print(ap) 2966* struct vop_print_args /* { 2967 struct vnode a_vp; 2968* } / ap; 2969{ 2970 struct vnode vp = ap->a_vp; 2971* 2972 printf("syncer vnode"); 2973 if (vp->v_vnlock != NULL) 2974 lockmgr_printinfo(vp->v_vnlock); 2975 printf("\n"); 2976 return (0); 2977}