vfs_vnops.c revision 1.45
1/*	$NetBSD: vfs_vnops.c,v 1.45 2000/11/27 08:39:44 chs Exp $	*/
2
3/*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 *    must display the following acknowledgement:
22 *	This product includes software developed by the University of
23 *	California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 *	@(#)vfs_vnops.c	8.14 (Berkeley) 6/15/95
41 */
42
43#include "fs_union.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/kernel.h>
48#include <sys/file.h>
49#include <sys/stat.h>
50#include <sys/buf.h>
51#include <sys/proc.h>
52#include <sys/mount.h>
53#include <sys/namei.h>
54#include <sys/vnode.h>
55#include <sys/ioctl.h>
56#include <sys/tty.h>
57#include <sys/poll.h>
58
59#include <uvm/uvm_extern.h>
60
61#ifdef UNION
62#include <miscfs/union/union.h>
63#endif
64
65struct 	fileops vnops =
66	{ vn_read, vn_write, vn_ioctl, vn_fcntl, vn_poll, vn_closefile };
67
68/*
69 * Common code for vnode open operations.
70 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
71 */
72int
73vn_open(ndp, fmode, cmode)
74	struct nameidata *ndp;
75	int fmode, cmode;
76{
77	struct vnode *vp;
78	struct proc *p = ndp->ni_cnd.cn_proc;
79	struct ucred *cred = p->p_ucred;
80	struct vattr va;
81	int error;
82
83	if (fmode & O_CREAT) {
84		ndp->ni_cnd.cn_nameiop = CREATE;
85		ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
86		if ((fmode & O_EXCL) == 0 &&
87		    ((fmode & FNOSYMLINK) == 0))
88			ndp->ni_cnd.cn_flags |= FOLLOW;
89		if ((error = namei(ndp)) != 0)
90			return (error);
91		if (ndp->ni_vp == NULL) {
92			VATTR_NULL(&va);
93			va.va_type = VREG;
94			va.va_mode = cmode;
95			if (fmode & O_EXCL)
96				 va.va_vaflags |= VA_EXCLUSIVE;
97			VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
98			error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
99					   &ndp->ni_cnd, &va);
100			if (error)
101				return (error);
102			fmode &= ~O_TRUNC;
103			vp = ndp->ni_vp;
104		} else {
105			VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
106			if (ndp->ni_dvp == ndp->ni_vp)
107				vrele(ndp->ni_dvp);
108			else
109				vput(ndp->ni_dvp);
110			ndp->ni_dvp = NULL;
111			vp = ndp->ni_vp;
112			if (fmode & O_EXCL) {
113				error = EEXIST;
114				goto bad;
115			}
116			if (ndp->ni_vp->v_type == VLNK) {
117				error = EFTYPE;
118				goto bad;
119			}
120			fmode &= ~O_CREAT;
121		}
122	} else {
123		ndp->ni_cnd.cn_nameiop = LOOKUP;
124		ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF;
125		if ((error = namei(ndp)) != 0)
126			return (error);
127		vp = ndp->ni_vp;
128	}
129	if (vp->v_type == VSOCK) {
130		error = EOPNOTSUPP;
131		goto bad;
132	}
133	if ((fmode & O_CREAT) == 0) {
134		if (fmode & FREAD) {
135			if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
136				goto bad;
137		}
138		if (fmode & (FWRITE | O_TRUNC)) {
139			if (vp->v_type == VDIR) {
140				error = EISDIR;
141				goto bad;
142			}
143			if ((error = vn_writechk(vp)) != 0 ||
144			    (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0)
145				goto bad;
146		}
147	}
148	if (fmode & O_TRUNC) {
149		VOP_UNLOCK(vp, 0);			/* XXX */
150		VOP_LEASE(vp, p, cred, LEASE_WRITE);
151		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);	/* XXX */
152		VATTR_NULL(&va);
153		va.va_size = 0;
154		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
155			goto bad;
156	}
157	if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0)
158		goto bad;
159	if (vp->v_type == VREG &&
160	    uvn_attach(vp, fmode & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
161		error = EIO;
162		goto bad;
163	}
164	if (fmode & FWRITE)
165		vp->v_writecount++;
166
167	return (0);
168bad:
169	vput(vp);
170	return (error);
171}
172
173/*
174 * Check for write permissions on the specified vnode.
175 * Prototype text segments cannot be written.
176 */
177int
178vn_writechk(vp)
179	struct vnode *vp;
180{
181
182	/*
183	 * If the vnode is in use as a process's text,
184	 * we can't allow writing.
185	 */
186	if (vp->v_flag & VTEXT)
187		return (ETXTBSY);
188	return (0);
189}
190
191/*
192 * Mark a vnode as being the text image of a running process.
193 */
194void
195vn_marktext(vp)
196	struct vnode *vp;
197{
198	vp->v_flag |= VTEXT;
199}
200
201/*
202 * Vnode close call
203 *
204 * Note: takes an unlocked vnode, while VOP_CLOSE takes a locked node.
205 */
206int
207vn_close(vp, flags, cred, p)
208	struct vnode *vp;
209	int flags;
210	struct ucred *cred;
211	struct proc *p;
212{
213	int error;
214
215	if (flags & FWRITE)
216		vp->v_writecount--;
217	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
218	error = VOP_CLOSE(vp, flags, cred, p);
219	vput(vp);
220	return (error);
221}
222
223/*
224 * Package up an I/O request on a vnode into a uio and do it.
225 */
226int
227vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
228	enum uio_rw rw;
229	struct vnode *vp;
230	caddr_t base;
231	int len;
232	off_t offset;
233	enum uio_seg segflg;
234	int ioflg;
235	struct ucred *cred;
236	size_t *aresid;
237	struct proc *p;
238{
239	struct uio auio;
240	struct iovec aiov;
241	int error;
242
243	if ((ioflg & IO_NODELOCKED) == 0)
244		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
245	auio.uio_iov = &aiov;
246	auio.uio_iovcnt = 1;
247	aiov.iov_base = base;
248	aiov.iov_len = len;
249	auio.uio_resid = len;
250	auio.uio_offset = offset;
251	auio.uio_segflg = segflg;
252	auio.uio_rw = rw;
253	auio.uio_procp = p;
254	if (rw == UIO_READ) {
255		error = VOP_READ(vp, &auio, ioflg, cred);
256	} else {
257		error = VOP_WRITE(vp, &auio, ioflg, cred);
258	}
259	if (aresid)
260		*aresid = auio.uio_resid;
261	else
262		if (auio.uio_resid && error == 0)
263			error = EIO;
264	if ((ioflg & IO_NODELOCKED) == 0)
265		VOP_UNLOCK(vp, 0);
266	return (error);
267}
268
269int
270vn_readdir(fp, buf, segflg, count, done, p, cookies, ncookies)
271	struct file *fp;
272	char *buf;
273	int segflg, *done, *ncookies;
274	u_int count;
275	struct proc *p;
276	off_t **cookies;
277{
278	struct vnode *vp = (struct vnode *)fp->f_data;
279	struct iovec aiov;
280	struct uio auio;
281	int error, eofflag;
282
283unionread:
284	if (vp->v_type != VDIR)
285		return (EINVAL);
286	aiov.iov_base = buf;
287	aiov.iov_len = count;
288	auio.uio_iov = &aiov;
289	auio.uio_iovcnt = 1;
290	auio.uio_rw = UIO_READ;
291	auio.uio_segflg = segflg;
292	auio.uio_procp = p;
293	auio.uio_resid = count;
294	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
295	auio.uio_offset = fp->f_offset;
296	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookies,
297		    ncookies);
298	fp->f_offset = auio.uio_offset;
299	VOP_UNLOCK(vp, 0);
300	if (error)
301		return (error);
302
303#ifdef UNION
304{
305	extern struct vnode *union_dircache __P((struct vnode *));
306
307	if (count == auio.uio_resid && (vp->v_op == union_vnodeop_p)) {
308		struct vnode *lvp;
309
310		lvp = union_dircache(vp);
311		if (lvp != NULLVP) {
312			struct vattr va;
313
314			/*
315			 * If the directory is opaque,
316			 * then don't show lower entries
317			 */
318			error = VOP_GETATTR(vp, &va, fp->f_cred, p);
319			if (va.va_flags & OPAQUE) {
320				vput(lvp);
321				lvp = NULL;
322			}
323		}
324
325		if (lvp != NULLVP) {
326			error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
327			if (error) {
328				vput(lvp);
329				return (error);
330			}
331			VOP_UNLOCK(lvp, 0);
332			fp->f_data = (caddr_t) lvp;
333			fp->f_offset = 0;
334			error = vn_close(vp, FREAD, fp->f_cred, p);
335			if (error)
336				return (error);
337			vp = lvp;
338			goto unionread;
339		}
340	}
341}
342#endif /* UNION */
343
344	if (count == auio.uio_resid && (vp->v_flag & VROOT) &&
345	    (vp->v_mount->mnt_flag & MNT_UNION)) {
346		struct vnode *tvp = vp;
347		vp = vp->v_mount->mnt_vnodecovered;
348		VREF(vp);
349		fp->f_data = (caddr_t) vp;
350		fp->f_offset = 0;
351		vrele(tvp);
352		goto unionread;
353	}
354	*done = count - auio.uio_resid;
355	return error;
356}
357
358/*
359 * File table vnode read routine.
360 */
361int
362vn_read(fp, offset, uio, cred, flags)
363	struct file *fp;
364	off_t *offset;
365	struct uio *uio;
366	struct ucred *cred;
367	int flags;
368{
369	struct vnode *vp = (struct vnode *)fp->f_data;
370	int count, error, ioflag = 0;
371
372	VOP_LEASE(vp, uio->uio_procp, cred, LEASE_READ);
373	if (fp->f_flag & FNONBLOCK)
374		ioflag |= IO_NDELAY;
375	if ((fp->f_flag & (FFSYNC | FRSYNC)) == (FFSYNC | FRSYNC))
376		ioflag |= IO_SYNC;
377	if (fp->f_flag & FALTIO)
378		ioflag |= IO_ALTSEMANTICS;
379	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
380	uio->uio_offset = *offset;
381	count = uio->uio_resid;
382	error = VOP_READ(vp, uio, ioflag, cred);
383	if (flags & FOF_UPDATE_OFFSET)
384		*offset += count - uio->uio_resid;
385	VOP_UNLOCK(vp, 0);
386	return (error);
387}
388
389/*
390 * File table vnode write routine.
391 */
392int
393vn_write(fp, offset, uio, cred, flags)
394	struct file *fp;
395	off_t *offset;
396	struct uio *uio;
397	struct ucred *cred;
398	int flags;
399{
400	struct vnode *vp = (struct vnode *)fp->f_data;
401	int count, error, ioflag = IO_UNIT;
402
403	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
404		ioflag |= IO_APPEND;
405	if (fp->f_flag & FNONBLOCK)
406		ioflag |= IO_NDELAY;
407	if (fp->f_flag & FFSYNC ||
408	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
409		ioflag |= IO_SYNC;
410	else if (fp->f_flag & FDSYNC)
411		ioflag |= IO_DSYNC;
412	if (fp->f_flag & FALTIO)
413		ioflag |= IO_ALTSEMANTICS;
414	VOP_LEASE(vp, uio->uio_procp, cred, LEASE_WRITE);
415	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
416	uio->uio_offset = *offset;
417	count = uio->uio_resid;
418	error = VOP_WRITE(vp, uio, ioflag, cred);
419	if (flags & FOF_UPDATE_OFFSET) {
420		if (ioflag & IO_APPEND)
421			*offset = uio->uio_offset;
422		else
423			*offset += count - uio->uio_resid;
424	}
425	VOP_UNLOCK(vp, 0);
426	return (error);
427}
428
429/*
430 * File table vnode stat routine.
431 */
432int
433vn_stat(vp, sb, p)
434	struct vnode *vp;
435	struct stat *sb;
436	struct proc *p;
437{
438	struct vattr va;
439	int error;
440	mode_t mode;
441
442	error = VOP_GETATTR(vp, &va, p->p_ucred, p);
443	if (error)
444		return (error);
445	/*
446	 * Copy from vattr table
447	 */
448	sb->st_dev = va.va_fsid;
449	sb->st_ino = va.va_fileid;
450	mode = va.va_mode;
451	switch (vp->v_type) {
452	case VREG:
453		mode |= S_IFREG;
454		break;
455	case VDIR:
456		mode |= S_IFDIR;
457		break;
458	case VBLK:
459		mode |= S_IFBLK;
460		break;
461	case VCHR:
462		mode |= S_IFCHR;
463		break;
464	case VLNK:
465		mode |= S_IFLNK;
466		break;
467	case VSOCK:
468		mode |= S_IFSOCK;
469		break;
470	case VFIFO:
471		mode |= S_IFIFO;
472		break;
473	default:
474		return (EBADF);
475	};
476	sb->st_mode = mode;
477	sb->st_nlink = va.va_nlink;
478	sb->st_uid = va.va_uid;
479	sb->st_gid = va.va_gid;
480	sb->st_rdev = va.va_rdev;
481	sb->st_size = va.va_size;
482	sb->st_atimespec = va.va_atime;
483	sb->st_mtimespec = va.va_mtime;
484	sb->st_ctimespec = va.va_ctime;
485	sb->st_blksize = va.va_blocksize;
486	sb->st_flags = va.va_flags;
487	sb->st_gen = 0;
488	sb->st_blocks = va.va_bytes / S_BLKSIZE;
489	return (0);
490}
491
492/*
493 * File table vnode fcntl routine.
494 */
495int
496vn_fcntl(fp, com, data, p)
497	struct file *fp;
498	u_int com;
499	caddr_t data;
500	struct proc *p;
501{
502	struct vnode *vp = ((struct vnode *)fp->f_data);
503	int error;
504
505	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
506	error = VOP_FCNTL(vp, com, data, fp->f_flag, p->p_ucred, p);
507	VOP_UNLOCK(vp, 0);
508	return (error);
509}
510
511/*
512 * File table vnode ioctl routine.
513 */
514int
515vn_ioctl(fp, com, data, p)
516	struct file *fp;
517	u_long com;
518	caddr_t data;
519	struct proc *p;
520{
521	struct vnode *vp = ((struct vnode *)fp->f_data);
522	struct vattr vattr;
523	int error;
524
525	switch (vp->v_type) {
526
527	case VREG:
528	case VDIR:
529		if (com == FIONREAD) {
530			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
531			if (error)
532				return (error);
533			*(int *)data = vattr.va_size - fp->f_offset;
534			return (0);
535		}
536		if (com == FIONBIO || com == FIOASYNC)	/* XXX */
537			return (0);			/* XXX */
538		/* fall into ... */
539
540	default:
541		return (ENOTTY);
542
543	case VFIFO:
544	case VCHR:
545	case VBLK:
546		error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
547		if (error == 0 && com == TIOCSCTTY) {
548			if (p->p_session->s_ttyvp)
549				vrele(p->p_session->s_ttyvp);
550			p->p_session->s_ttyvp = vp;
551			VREF(vp);
552		}
553		return (error);
554	}
555}
556
557/*
558 * File table vnode poll routine.
559 */
560int
561vn_poll(fp, events, p)
562	struct file *fp;
563	int events;
564	struct proc *p;
565{
566
567	return (VOP_POLL(((struct vnode *)fp->f_data), events, p));
568}
569
570/*
571 * Check that the vnode is still valid, and if so
572 * acquire requested lock.
573 */
574int
575vn_lock(vp, flags)
576	struct vnode *vp;
577	int flags;
578{
579	int error;
580
581	do {
582		if ((flags & LK_INTERLOCK) == 0)
583			simple_lock(&vp->v_interlock);
584		if (vp->v_flag & VXLOCK) {
585			vp->v_flag |= VXWANT;
586			ltsleep((caddr_t)vp, PINOD | PNORELOCK,
587			    "vn_lock", 0, &vp->v_interlock);
588			error = ENOENT;
589		} else {
590			error = VOP_LOCK(vp, flags | LK_INTERLOCK);
591			if (error == 0 || error == EDEADLK)
592				return (error);
593		}
594		flags &= ~LK_INTERLOCK;
595	} while (flags & LK_RETRY);
596	return (error);
597}
598
599/*
600 * File table vnode close routine.
601 */
602int
603vn_closefile(fp, p)
604	struct file *fp;
605	struct proc *p;
606{
607
608	return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
609		fp->f_cred, p));
610}
611
612/*
613 * Enable LK_CANRECURSE on lock. Return prior status.
614 */
615u_int
616vn_setrecurse(vp)
617	struct vnode *vp;
618{
619	struct lock *lkp = &vp->v_lock;
620	u_int retval = lkp->lk_flags & LK_CANRECURSE;
621
622	lkp->lk_flags |= LK_CANRECURSE;
623	return retval;
624}
625
626/*
627 * Called when done with locksetrecurse.
628 */
629void
630vn_restorerecurse(vp, flags)
631	struct vnode *vp;
632	u_int flags;
633{
634	struct lock *lkp = &vp->v_lock;
635
636	lkp->lk_flags &= ~LK_CANRECURSE;
637	lkp->lk_flags |= flags;
638}
639