vnode.h revision 30354
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)vnode.h	8.7 (Berkeley) 2/4/94
34 * $Id: vnode.h,v 1.49 1997/09/21 04:24:09 dyson Exp $
35 */
36
37#ifndef _SYS_VNODE_H_
38#define	_SYS_VNODE_H_
39
40#include <sys/lock.h>
41#include <sys/queue.h>
42
43#ifdef SMP
44#include <machine/smp.h>
45#endif
46
47/*
48 * The vnode is the focus of all file activity in UNIX.  There is a
49 * unique vnode allocated for each active file, each current directory,
50 * each mounted-on file, text file, and the root.
51 */
52
53/*
54 * Vnode types.  VNON means no type.
55 */
56enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
57
58/*
59 * Vnode tag types.
60 * These are for the benefit of external programs only (e.g., pstat)
61 * and should NEVER be inspected by the kernel.
62 */
63enum vtagtype	{
64	VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC,
65	VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS,
66	VT_UNION, VT_MSDOSFS, VT_DEVFS, VT_TFS
67};
68
69/*
70 * Each underlying filesystem allocates its own private area and hangs
71 * it from v_data.  If non-null, this area is freed in getnewvnode().
72 */
73LIST_HEAD(buflists, buf);
74
75typedef	int 	vop_t __P((void *));
76struct vm_object;
77struct namecache;
78
79/*
80 * Reading or writing any of these items requires holding the appropriate lock.
81 * v_freelist is locked by the global vnode_free_list simple lock.
82 * v_mntvnodes is locked by the global mntvnodes simple lock.
83 * v_flag, v_usecount, v_holdcount and v_writecount are
84 *    locked by the v_interlock simple lock.
85 */
86struct vnode {
87	u_long	v_flag;				/* vnode flags (see below) */
88	int	v_usecount;			/* reference count of users */
89	int	v_writecount;			/* reference count of writers */
90	int	v_holdcnt;			/* page & buffer references */
91	daddr_t	v_lastr;			/* last read (read-ahead) */
92	u_long	v_id;				/* capability identifier */
93	struct	mount *v_mount;			/* ptr to vfs we are in */
94	vop_t	**v_op;				/* vnode operations vector */
95	TAILQ_ENTRY(vnode) v_freelist;		/* vnode freelist */
96	LIST_ENTRY(vnode) v_mntvnodes;		/* vnodes for mount point */
97	struct	buflists v_cleanblkhd;		/* clean blocklist head */
98	struct	buflists v_dirtyblkhd;		/* dirty blocklist head */
99	long	v_numoutput;			/* num of writes in progress */
100	enum	vtype v_type;			/* vnode type */
101	union {
102		struct mount	*vu_mountedhere;/* ptr to mounted vfs (VDIR) */
103		struct socket	*vu_socket;	/* unix ipc (VSOCK) */
104		struct specinfo	*vu_specinfo;	/* device (VCHR, VBLK) */
105		struct fifoinfo	*vu_fifoinfo;	/* fifo (VFIFO) */
106	} v_un;
107	struct	nqlease *v_lease;		/* Soft reference to lease */
108	daddr_t	v_lastw;			/* last write (write cluster) */
109	daddr_t	v_cstart;			/* start block of cluster */
110	daddr_t	v_lasta;			/* last allocation */
111	int	v_clen;				/* length of current cluster */
112	struct vm_object *v_object;		/* Place to store VM object */
113	struct	simplelock v_interlock;		/* lock on usecount and flag */
114	struct	lock *v_vnlock;			/* used for non-locking fs's */
115	enum	vtagtype v_tag;			/* type of underlying data */
116	void 	*v_data;			/* private data for fs */
117	LIST_HEAD(, namecache) v_cache_src;	/* Cache entries from us */
118	TAILQ_HEAD(, namecache) v_cache_dst;	/* Cache entries to us */
119	struct	vnode *v_dd;			/* .. vnode */
120	u_long	v_ddid;				/* .. capability identifier */
121};
122#define	v_mountedhere	v_un.vu_mountedhere
123#define	v_socket	v_un.vu_socket
124#define	v_specinfo	v_un.vu_specinfo
125#define	v_fifoinfo	v_un.vu_fifoinfo
126
127/*
128 * Vnode flags.
129 */
130#define	VROOT		0x00001	/* root of its file system */
131#define	VTEXT		0x00002	/* vnode is a pure text prototype */
132#define	VSYSTEM		0x00004	/* vnode being used by kernel */
133#define	VISTTY		0x00008	/* vnode represents a tty */
134#define	VXLOCK		0x00100	/* vnode is locked to change underlying type */
135#define	VXWANT		0x00200	/* process is waiting for vnode */
136#define	VBWAIT		0x00400	/* waiting for output to complete */
137#define	VALIASED	0x00800	/* vnode has an alias */
138#define	VDIROP		0x01000	/* LFS: vnode is involved in a directory op */
139#define	VVMIO		0x02000	/* VMIO flag */
140#define	VNINACT		0x04000	/* LFS: skip ufs_inactive() in lfs_vunref */
141#define	VAGE		0x08000	/* Insert vnode at head of free list */
142#define	VOLOCK		0x10000	/* vnode is locked waiting for an object */
143#define	VOWANT		0x20000	/* a process is waiting for VOLOCK */
144#define	VDOOMED		0x40000	/* This vnode is being recycled */
145#define	VFREE		0x80000	/* This vnode is on the freelist */
146
147/*
148 * Vnode attributes.  A field value of VNOVAL represents a field whose value
149 * is unavailable (getattr) or which is not to be changed (setattr).
150 */
151struct vattr {
152	enum vtype	va_type;	/* vnode type (for create) */
153	u_short		va_mode;	/* files access mode and type */
154	short		va_nlink;	/* number of references to file */
155	uid_t		va_uid;		/* owner user id */
156	gid_t		va_gid;		/* owner group id */
157	long		va_fsid;	/* file system id (dev for now) */
158	long		va_fileid;	/* file id */
159	u_quad_t	va_size;	/* file size in bytes */
160	long		va_blocksize;	/* blocksize preferred for i/o */
161	struct timespec	va_atime;	/* time of last access */
162	struct timespec	va_mtime;	/* time of last modification */
163	struct timespec	va_ctime;	/* time file changed */
164	u_long		va_gen;		/* generation number of file */
165	u_long		va_flags;	/* flags defined for file */
166	dev_t		va_rdev;	/* device the special file represents */
167	u_quad_t	va_bytes;	/* bytes of disk space held by file */
168	u_quad_t	va_filerev;	/* file modification number */
169	u_int		va_vaflags;	/* operations flags, see below */
170	long		va_spare;	/* remain quad aligned */
171};
172
173/*
174 * Flags for va_vaflags.
175 */
176#define	VA_UTIMES_NULL	0x01		/* utimes argument was NULL */
177#define VA_EXCLUSIVE	0x02		/* exclusive create request */
178
179/*
180 * Flags for ioflag.
181 */
182#define	IO_UNIT		0x01		/* do I/O as atomic unit */
183#define	IO_APPEND	0x02		/* append write to end */
184#define	IO_SYNC		0x04		/* do I/O synchronously */
185#define	IO_NODELOCKED	0x08		/* underlying node already locked */
186#define	IO_NDELAY	0x10		/* FNDELAY flag set in file table */
187#define	IO_VMIO		0x20		/* data already in VMIO space */
188
189/*
190 *  Modes.  Some values same as Ixxx entries from inode.h for now.
191 */
192#define	VSUID	04000		/* set user id on execution */
193#define	VSGID	02000		/* set group id on execution */
194#define	VSVTX	01000		/* save swapped text even after use */
195#define	VREAD	00400		/* read, write, execute permissions */
196#define	VWRITE	00200
197#define	VEXEC	00100
198
199/*
200 * Token indicating no attribute value yet assigned.
201 */
202#define	VNOVAL	(-1)
203
204#ifdef KERNEL
205
206#ifdef MALLOC_DECLARE
207MALLOC_DECLARE(M_VNODE);
208#endif
209
210/*
211 * Convert between vnode types and inode formats (since POSIX.1
212 * defines mode word of stat structure in terms of inode formats).
213 */
214extern enum vtype	iftovt_tab[];
215extern int		vttoif_tab[];
216#define IFTOVT(mode)	(iftovt_tab[((mode) & S_IFMT) >> 12])
217#define VTTOIF(indx)	(vttoif_tab[(int)(indx)])
218#define MAKEIMODE(indx, mode)	(int)(VTTOIF(indx) | (mode))
219
220/*
221 * Flags to various vnode functions.
222 */
223#define	SKIPSYSTEM	0x0001		/* vflush: skip vnodes marked VSYSTEM */
224#define	FORCECLOSE	0x0002		/* vflush: force file closure */
225#define	WRITECLOSE	0x0004		/* vflush: only close writable files */
226#define	DOCLOSE		0x0008		/* vclean: close active files */
227#define	V_SAVE		0x0001		/* vinvalbuf: sync file first */
228#define	V_SAVEMETA	0x0002		/* vinvalbuf: leave indirect blocks */
229#define	REVOKEALL	0x0001		/* vop_revoke: revoke all aliases */
230
231#define	VREF(vp)	vref(vp)
232
233#ifdef DIAGNOSTIC
234#define	VATTR_NULL(vap)	vattr_null(vap)
235#else
236#define	VATTR_NULL(vap)	(*(vap) = va_null)	/* initialize a vattr */
237#endif /* DIAGNOSTIC */
238
239#define	NULLVP	((struct vnode *)NULL)
240
241#ifdef VFS_LKM
242#define	VNODEOP_SET(f) DATA_SET(MODVNOPS,f)
243#else
244#define	VNODEOP_SET(f) DATA_SET(vfs_opv_descs_,f)
245#endif
246
247/*
248 * Global vnode data.
249 */
250extern	struct vnode *rootvnode;	/* root (i.e. "/") vnode */
251extern	int desiredvnodes;		/* number of vnodes desired */
252extern	int prtactive;			/* nonzero to call vprint() */
253extern	struct vattr va_null;		/* predefined null vattr structure */
254
255/*
256 * Macro/function to check for client cache inconsistency w.r.t. leasing.
257 */
258#define	LEASE_READ	0x1		/* Check lease for readers */
259#define	LEASE_WRITE	0x2		/* Check lease for modifiers */
260
261
262extern void	(*lease_updatetime) __P((int deltat));
263
264#ifdef NFS
265#ifdef NQNFS
266#define	LEASE_UPDATETIME(dt)		lease_updatetime(dt)
267#else
268#define	LEASE_UPDATETIME(dt)
269#endif /* NQNFS */
270#else
271#define	LEASE_UPDATETIME(dt) \
272	do { if(lease_updatetime) lease_updatetime(dt); } while(0)
273#endif /* NFS */
274
275#define VSHOULDFREE(vp)	\
276	(!((vp)->v_flag & (VFREE|VDOOMED)) && \
277	 !(vp)->v_holdcnt && !(vp)->v_usecount)
278
279#define VSHOULDBUSY(vp)	\
280	(((vp)->v_flag & VFREE) && \
281	 ((vp)->v_holdcnt || (vp)->v_usecount))
282
283
284#endif /* KERNEL */
285
286
287/*
288 * Mods for extensibility.
289 */
290
291/*
292 * Flags for vdesc_flags:
293 */
294#define VDESC_MAX_VPS		16
295/* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
296#define VDESC_VP0_WILLRELE	0x0001
297#define VDESC_VP1_WILLRELE	0x0002
298#define VDESC_VP2_WILLRELE	0x0004
299#define VDESC_VP3_WILLRELE	0x0008
300#define VDESC_NOMAP_VPP		0x0100
301#define VDESC_VPP_WILLRELE	0x0200
302
303/*
304 * VDESC_NO_OFFSET is used to identify the end of the offset list
305 * and in places where no such field exists.
306 */
307#define VDESC_NO_OFFSET -1
308
309/*
310 * This structure describes the vnode operation taking place.
311 */
312struct vnodeop_desc {
313	int	vdesc_offset;		/* offset in vector--first for speed */
314	char    *vdesc_name;		/* a readable name for debugging */
315	int	vdesc_flags;		/* VDESC_* flags */
316
317	/*
318	 * These ops are used by bypass routines to map and locate arguments.
319	 * Creds and procs are not needed in bypass routines, but sometimes
320	 * they are useful to (for example) transport layers.
321	 * Nameidata is useful because it has a cred in it.
322	 */
323	int	*vdesc_vp_offsets;	/* list ended by VDESC_NO_OFFSET */
324	int	vdesc_vpp_offset;	/* return vpp location */
325	int	vdesc_cred_offset;	/* cred location, if any */
326	int	vdesc_proc_offset;	/* proc location, if any */
327	int	vdesc_componentname_offset; /* if any */
328	/*
329	 * Finally, we've got a list of private data (about each operation)
330	 * for each transport layer.  (Support to manage this list is not
331	 * yet part of BSD.)
332	 */
333	caddr_t	*vdesc_transports;
334};
335
336#ifdef KERNEL
337/*
338 * A list of all the operation descs.
339 */
340extern struct vnodeop_desc *vnodeop_descs[];
341
342/*
343 * Interlock for scanning list of vnodes attached to a mountpoint
344 */
345extern struct simplelock mntvnode_slock;
346
347/*
348 * This macro is very helpful in defining those offsets in the vdesc struct.
349 *
350 * This is stolen from X11R4.  I ignored all the fancy stuff for
351 * Crays, so if you decide to port this to such a serious machine,
352 * you might want to consult Intrinsic.h's XtOffset{,Of,To}.
353 */
354#define VOPARG_OFFSET(p_type,field) \
355        ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
356#define VOPARG_OFFSETOF(s_type,field) \
357	VOPARG_OFFSET(s_type*,field)
358#define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
359	((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
360
361
362/*
363 * This structure is used to configure the new vnodeops vector.
364 */
365struct vnodeopv_entry_desc {
366	struct vnodeop_desc *opve_op;   /* which operation this is */
367	vop_t *opve_impl;		/* code implementing this operation */
368};
369struct vnodeopv_desc {
370			/* ptr to the ptr to the vector where op should go */
371	vop_t ***opv_desc_vector_p;
372	struct vnodeopv_entry_desc *opv_desc_ops;   /* null terminated list */
373};
374
375/*
376 * A default routine which just returns an error.
377 */
378int vn_default_error __P((void));
379
380/*
381 * A generic structure.
382 * This can be used by bypass routines to identify generic arguments.
383 */
384struct vop_generic_args {
385	struct vnodeop_desc *a_desc;
386	/* other random data follows, presumably */
387};
388
389#ifdef DEBUG_VFS_LOCKS
390/*
391 * Macros to aid in tracing VFS locking problems.  Not totally
392 * reliable since if the process sleeps between changing the lock
393 * state and checking it with the assert, some other process could
394 * change the state.  They are good enough for debugging a single
395 * filesystem using a single-threaded test.  I find that 'cvs co src'
396 * is a pretty good test.
397 */
398
399/*
400 * [dfr] Kludge until I get around to fixing all the vfs locking.
401 */
402#define IS_LOCKING_VFS(vp)	((vp)->v_tag == VT_UFS		\
403				 || (vp)->v_tag == VT_MFS	\
404				 || (vp)->v_tag == VT_NFS	\
405				 || (vp)->v_tag == VT_LFS	\
406				 || (vp)->v_tag == VT_ISOFS	\
407				 || (vp)->v_tag == VT_MSDOSFS	\
408				 || (vp)->v_tag == VT_DEVFS)
409
410#define ASSERT_VOP_LOCKED(vp, str)				\
411    if ((vp) && IS_LOCKING_VFS(vp) && !VOP_ISLOCKED(vp)) {	\
412	panic("%s: %x is not locked but should be", str, vp);	\
413    }
414
415#define ASSERT_VOP_UNLOCKED(vp, str)				\
416    if ((vp) && IS_LOCKING_VFS(vp) && VOP_ISLOCKED(vp)) {	\
417	panic("%s: %x is locked but shouldn't be", str, vp);	\
418    }
419
420#else
421
422#define ASSERT_VOP_LOCKED(vp, str)
423#define ASSERT_VOP_UNLOCKED(vp, str)
424
425#endif
426
427/*
428 * VOCALL calls an op given an ops vector.  We break it out because BSD's
429 * vclean changes the ops vector and then wants to call ops with the old
430 * vector.
431 */
432#define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))
433
434/*
435 * This call works for vnodes in the kernel.
436 */
437#define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))
438#define VDESC(OP) (& __CONCAT(OP,_desc))
439#define VOFFSET(OP) (VDESC(OP)->vdesc_offset)
440
441/*
442 * Finally, include the default set of vnode operations.
443 */
444#include "vnode_if.h"
445
446/*
447 * Public vnode manipulation functions.
448 */
449struct componentname;
450struct file;
451struct mount;
452struct nameidata;
453struct ostat;
454struct proc;
455struct stat;
456struct ucred;
457struct uio;
458struct vattr;
459struct vnode;
460struct vop_bwrite_args;
461struct vm_zone;
462
463extern struct vm_zone *namei_zone;
464
465extern int	(*lease_check_hook) __P((struct vop_lease_args *));
466
467int 	bdevvp __P((dev_t dev, struct vnode **vpp));
468/* cache_* may belong in namei.h. */
469void	cache_enter __P((struct vnode *dvp, struct vnode *vp,
470	    struct componentname *cnp));
471int	cache_lookup __P((struct vnode *dvp, struct vnode **vpp,
472	    struct componentname *cnp));
473void	cache_purge __P((struct vnode *vp));
474void	cache_purgevfs __P((struct mount *mp));
475void	cvtstat __P((struct stat *st, struct ostat *ost));
476int 	getnewvnode __P((enum vtagtype tag,
477	    struct mount *mp, vop_t **vops, struct vnode **vpp));
478void	insmntque __P((struct vnode *vp, struct mount *mp));
479int	lease_check __P((struct vop_lease_args *ap));
480void 	vattr_null __P((struct vattr *vap));
481void	vbusy __P((struct vnode *));
482int 	vcount __P((struct vnode *vp));
483void	vdrop __P((struct vnode *));
484int	vfinddev __P((dev_t dev, enum vtype type, struct vnode **vpp));
485void	vfree __P((struct vnode *));
486void	vfs_opv_init __P((struct vnodeopv_desc **them));
487int	vflush __P((struct mount *mp, struct vnode *skipvp, int flags));
488int 	vget __P((struct vnode *vp, int lockflag, struct proc *p));
489void 	vgone __P((struct vnode *vp));
490void	vhold __P((struct vnode *));
491int	vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred,
492	    struct proc *p, int slpflag, int slptimeo));
493void	vprint __P((char *label, struct vnode *vp));
494int	vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp,
495	    struct proc *p));
496int	vn_bwrite __P((struct vop_bwrite_args *ap));
497int 	vn_close __P((struct vnode *vp,
498	    int flags, struct ucred *cred, struct proc *p));
499int	vn_lock __P((struct vnode *vp, int flags, struct proc *p));
500int 	vn_open __P((struct nameidata *ndp, int fmode, int cmode));
501int 	vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
502	    int len, off_t offset, enum uio_seg segflg, int ioflg,
503	    struct ucred *cred, int *aresid, struct proc *p));
504int	vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
505int	vfs_cache_lookup __P((struct vop_lookup_args *ap));
506int	vfs_object_create __P((struct vnode *vp, struct proc *p,
507                struct ucred *cred, int waslocked));
508int 	vn_writechk __P((struct vnode *vp));
509int	vop_noislocked __P((struct vop_islocked_args *));
510int	vop_nolock __P((struct vop_lock_args *));
511int	vop_nopoll __P((struct vop_poll_args *));
512int	vop_nounlock __P((struct vop_unlock_args *));
513int	vop_revoke __P((struct vop_revoke_args *));
514int	vop_sharedlock __P((struct vop_lock_args *));
515struct vnode *
516	checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp));
517void 	vput __P((struct vnode *vp));
518void 	vref __P((struct vnode *vp));
519void 	vrele __P((struct vnode *vp));
520#endif /* KERNEL */
521
522#endif /* !_SYS_VNODE_H_ */
523