vfs_vnops.c revision 24206
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_vnops.c	8.2 (Berkeley) 1/21/94
39 * $Id: vfs_vnops.c,v 1.33 1997/03/23 03:36:38 bde Exp $
40 */
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/kernel.h>
45#include <sys/fcntl.h>
46#include <sys/file.h>
47#include <sys/stat.h>
48#include <sys/buf.h>
49#include <sys/proc.h>
50#include <sys/mount.h>
51#include <sys/namei.h>
52#include <sys/vnode.h>
53#include <sys/filio.h>
54#include <sys/ttycom.h>
55
56#include <vm/vm.h>
57#include <vm/vm_param.h>
58#include <vm/vm_object.h>
59#include <vm/vnode_pager.h>
60
61static int vn_closefile __P((struct file *fp, struct proc *p));
62static int vn_ioctl __P((struct file *fp, int com, caddr_t data,
63		struct proc *p));
64static int vn_read __P((struct file *fp, struct uio *uio,
65		struct ucred *cred));
66static int vn_select __P((struct file *fp, int which, struct proc *p));
67static int vn_write __P((struct file *fp, struct uio *uio,
68		struct ucred *cred));
69
70struct 	fileops vnops =
71	{ vn_read, vn_write, vn_ioctl, vn_select, vn_closefile };
72
73/*
74 * Common code for vnode open operations.
75 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
76 */
77int
78vn_open(ndp, fmode, cmode)
79	register struct nameidata *ndp;
80	int fmode, cmode;
81{
82	register struct vnode *vp;
83	register struct proc *p = ndp->ni_cnd.cn_proc;
84	register struct ucred *cred = p->p_ucred;
85	struct vattr vat;
86	struct vattr *vap = &vat;
87	int error;
88
89	if (fmode & O_CREAT) {
90		ndp->ni_cnd.cn_nameiop = CREATE;
91		ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
92		if ((fmode & O_EXCL) == 0)
93			ndp->ni_cnd.cn_flags |= FOLLOW;
94		error = namei(ndp);
95		if (error)
96			return (error);
97		if (ndp->ni_vp == NULL) {
98			VATTR_NULL(vap);
99			vap->va_type = VREG;
100			vap->va_mode = cmode;
101			if (fmode & O_EXCL)
102				vap->va_vaflags |= VA_EXCLUSIVE;
103			VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
104			if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
105			    &ndp->ni_cnd, vap))
106				return (error);
107			fmode &= ~O_TRUNC;
108			vp = ndp->ni_vp;
109		} else {
110			VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
111			if (ndp->ni_dvp == ndp->ni_vp)
112				vrele(ndp->ni_dvp);
113			else
114				vput(ndp->ni_dvp);
115			ndp->ni_dvp = NULL;
116			vp = ndp->ni_vp;
117			if (fmode & O_EXCL) {
118				error = EEXIST;
119				goto bad;
120			}
121			fmode &= ~O_CREAT;
122		}
123	} else {
124		ndp->ni_cnd.cn_nameiop = LOOKUP;
125		ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF;
126		error = namei(ndp);
127		if (error)
128			return (error);
129		vp = ndp->ni_vp;
130	}
131	if (vp->v_type == VSOCK) {
132		error = EOPNOTSUPP;
133		goto bad;
134	}
135	if ((fmode & O_CREAT) == 0) {
136		if (fmode & FREAD) {
137			error = VOP_ACCESS(vp, VREAD, cred, p);
138			if (error)
139				goto bad;
140		}
141		if (fmode & (FWRITE | O_TRUNC)) {
142			if (vp->v_type == VDIR) {
143				error = EISDIR;
144				goto bad;
145			}
146			error = vn_writechk(vp);
147			if (error)
148				goto bad;
149		        error = VOP_ACCESS(vp, VWRITE, cred, p);
150			if (error)
151				goto bad;
152		}
153	}
154	if (fmode & O_TRUNC) {
155		VOP_UNLOCK(vp, 0, p);				/* XXX */
156		VOP_LEASE(vp, p, cred, LEASE_WRITE);
157		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
158		VATTR_NULL(vap);
159		vap->va_size = 0;
160		error = VOP_SETATTR(vp, vap, cred, p);
161		if (error)
162			goto bad;
163	}
164	error = VOP_OPEN(vp, fmode, cred, p);
165	if (error)
166		goto bad;
167	/*
168	 * Make sure that a VM object is created for VMIO support.
169	 */
170	if (vp->v_type == VREG) {
171		if ((error = vfs_object_create(vp, p, cred, 1)) != 0)
172			goto bad;
173	}
174
175	if (fmode & FWRITE)
176		vp->v_writecount++;
177	return (0);
178bad:
179	vput(vp);
180	return (error);
181}
182
183/*
184 * Check for write permissions on the specified vnode.
185 * Prototype text segments cannot be written.
186 */
187int
188vn_writechk(vp)
189	register struct vnode *vp;
190{
191
192	/*
193	 * If there's shared text associated with
194	 * the vnode, try to free it up once.  If
195	 * we fail, we can't allow writing.
196	 */
197	if (vp->v_flag & VTEXT)
198		return (ETXTBSY);
199	return (0);
200}
201
202/*
203 * Vnode close call
204 */
205int
206vn_close(vp, flags, cred, p)
207	register struct vnode *vp;
208	int flags;
209	struct ucred *cred;
210	struct proc *p;
211{
212	int error;
213
214	if (flags & FWRITE)
215		vp->v_writecount--;
216	error = VOP_CLOSE(vp, flags, cred, p);
217	vrele(vp);
218	return (error);
219}
220
221/*
222 * Package up an I/O request on a vnode into a uio and do it.
223 */
224int
225vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
226	enum uio_rw rw;
227	struct vnode *vp;
228	caddr_t base;
229	int len;
230	off_t offset;
231	enum uio_seg segflg;
232	int ioflg;
233	struct ucred *cred;
234	int *aresid;
235	struct proc *p;
236{
237	struct uio auio;
238	struct iovec aiov;
239	int error;
240
241	if ((ioflg & IO_NODELOCKED) == 0)
242		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
243	auio.uio_iov = &aiov;
244	auio.uio_iovcnt = 1;
245	aiov.iov_base = base;
246	aiov.iov_len = len;
247	auio.uio_resid = len;
248	auio.uio_offset = offset;
249	auio.uio_segflg = segflg;
250	auio.uio_rw = rw;
251	auio.uio_procp = p;
252	if (rw == UIO_READ) {
253		error = VOP_READ(vp, &auio, ioflg, cred);
254	} else {
255		error = VOP_WRITE(vp, &auio, ioflg, cred);
256	}
257	if (aresid)
258		*aresid = auio.uio_resid;
259	else
260		if (auio.uio_resid && error == 0)
261			error = EIO;
262	if ((ioflg & IO_NODELOCKED) == 0)
263		VOP_UNLOCK(vp, 0, p);
264	return (error);
265}
266
267/*
268 * File table vnode read routine.
269 */
270static int
271vn_read(fp, uio, cred)
272	struct file *fp;
273	struct uio *uio;
274	struct ucred *cred;
275{
276	struct vnode *vp = (struct vnode *)fp->f_data;
277	struct proc *p = uio->uio_procp;
278	int count, error;
279	int flag, seq;
280
281	VOP_LEASE(vp, p, cred, LEASE_READ);
282	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
283	uio->uio_offset = fp->f_offset;
284	count = uio->uio_resid;
285	flag = 0;
286	if (fp->f_flag & FNONBLOCK)
287		flag |= IO_NDELAY;
288
289	/*
290	 * Sequential read heuristic.
291	 * If we have been doing sequential input,
292	 * a rewind operation doesn't turn off
293	 * sequential input mode.
294	 */
295	if (((fp->f_offset == 0) && (fp->f_seqcount > 0)) ||
296		(fp->f_offset == fp->f_nextread)) {
297		int tmpseq = fp->f_seqcount;
298		/*
299		 * XXX we assume that the filesystem block size is
300		 * the default.  Not true, but still gives us a pretty
301		 * good indicator of how sequential the read operations
302		 * are.
303		 */
304		tmpseq += ((count + BKVASIZE - 1) / BKVASIZE);
305		if (tmpseq >= CHAR_MAX)
306			tmpseq = CHAR_MAX;
307		fp->f_seqcount = tmpseq;
308		flag |= (fp->f_seqcount << 16);
309	} else {
310		if (fp->f_seqcount > 1)
311			fp->f_seqcount = 1;
312		else
313			fp->f_seqcount = 0;
314	}
315
316	error = VOP_READ(vp, uio, flag, cred);
317	fp->f_offset += count - uio->uio_resid;
318	fp->f_nextread = fp->f_offset;
319	VOP_UNLOCK(vp, 0, p);
320	return (error);
321}
322
323/*
324 * File table vnode write routine.
325 */
326static int
327vn_write(fp, uio, cred)
328	struct file *fp;
329	struct uio *uio;
330	struct ucred *cred;
331{
332	struct vnode *vp = (struct vnode *)fp->f_data;
333	struct proc *p = uio->uio_procp;
334	int count, error, ioflag = IO_UNIT;
335
336	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
337		ioflag |= IO_APPEND;
338	if (fp->f_flag & FNONBLOCK)
339		ioflag |= IO_NDELAY;
340	if ((fp->f_flag & O_FSYNC) ||
341	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
342		ioflag |= IO_SYNC;
343	VOP_LEASE(vp, p, cred, LEASE_WRITE);
344	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
345	uio->uio_offset = fp->f_offset;
346	count = uio->uio_resid;
347	error = VOP_WRITE(vp, uio, ioflag, cred);
348	if (ioflag & IO_APPEND)
349		fp->f_offset = uio->uio_offset;
350	else
351		fp->f_offset += count - uio->uio_resid;
352	VOP_UNLOCK(vp, 0, p);
353	return (error);
354}
355
356/*
357 * File table vnode stat routine.
358 */
359int
360vn_stat(vp, sb, p)
361	struct vnode *vp;
362	register struct stat *sb;
363	struct proc *p;
364{
365	struct vattr vattr;
366	register struct vattr *vap;
367	int error;
368	u_short mode;
369
370	vap = &vattr;
371	error = VOP_GETATTR(vp, vap, p->p_ucred, p);
372	if (error)
373		return (error);
374	/*
375	 * Copy from vattr table
376	 */
377	sb->st_dev = vap->va_fsid;
378	sb->st_ino = vap->va_fileid;
379	mode = vap->va_mode;
380	switch (vp->v_type) {
381	case VREG:
382		mode |= S_IFREG;
383		break;
384	case VDIR:
385		mode |= S_IFDIR;
386		break;
387	case VBLK:
388		mode |= S_IFBLK;
389		break;
390	case VCHR:
391		mode |= S_IFCHR;
392		break;
393	case VLNK:
394		mode |= S_IFLNK;
395		break;
396	case VSOCK:
397		mode |= S_IFSOCK;
398		break;
399	case VFIFO:
400		mode |= S_IFIFO;
401		break;
402	default:
403		return (EBADF);
404	};
405	sb->st_mode = mode;
406	sb->st_nlink = vap->va_nlink;
407	sb->st_uid = vap->va_uid;
408	sb->st_gid = vap->va_gid;
409	sb->st_rdev = vap->va_rdev;
410	sb->st_size = vap->va_size;
411	sb->st_atimespec = vap->va_atime;
412	sb->st_mtimespec = vap->va_mtime;
413	sb->st_ctimespec = vap->va_ctime;
414	sb->st_blksize = vap->va_blocksize;
415	sb->st_flags = vap->va_flags;
416	if (p->p_ucred->cr_uid != 0)
417		sb->st_gen = 0;
418	else
419		sb->st_gen = vap->va_gen;
420
421#if (S_BLKSIZE == 512)
422	/* Optimize this case */
423	sb->st_blocks = vap->va_bytes >> 9;
424#else
425	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
426#endif
427	return (0);
428}
429
430/*
431 * File table vnode ioctl routine.
432 */
433static int
434vn_ioctl(fp, com, data, p)
435	struct file *fp;
436	int com;
437	caddr_t data;
438	struct proc *p;
439{
440	register struct vnode *vp = ((struct vnode *)fp->f_data);
441	struct vattr vattr;
442	int error;
443
444	switch (vp->v_type) {
445
446	case VREG:
447	case VDIR:
448		if (com == FIONREAD) {
449			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
450			if (error)
451				return (error);
452			*(int *)data = vattr.va_size - fp->f_offset;
453			return (0);
454		}
455		if (com == FIONBIO || com == FIOASYNC)	/* XXX */
456			return (0);			/* XXX */
457		/* fall into ... */
458
459	default:
460		return (ENOTTY);
461
462	case VFIFO:
463	case VCHR:
464	case VBLK:
465		error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
466		if (error == 0 && com == TIOCSCTTY) {
467
468			/* Do nothing if reassigning same control tty */
469			if (p->p_session->s_ttyvp == vp)
470				return (0);
471
472			/* Get rid of reference to old control tty */
473			if (p->p_session->s_ttyvp)
474				vrele(p->p_session->s_ttyvp);
475
476			p->p_session->s_ttyvp = vp;
477			VREF(vp);
478		}
479		return (error);
480	}
481}
482
483/*
484 * File table vnode select routine.
485 */
486static int
487vn_select(fp, which, p)
488	struct file *fp;
489	int which;
490	struct proc *p;
491{
492
493	return (VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag,
494		fp->f_cred, p));
495}
496
497/*
498 * File table vnode close routine.
499 */
500static int
501vn_closefile(fp, p)
502	struct file *fp;
503	struct proc *p;
504{
505
506	return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
507		fp->f_cred, p));
508}
509
510/*
511 * Check that the vnode is still valid, and if so
512 * acquire requested lock.
513 */
514int
515vn_lock(vp, flags, p)
516	struct vnode *vp;
517	int flags;
518	struct proc *p;
519{
520	int error;
521
522	do {
523		if ((flags & LK_INTERLOCK) == 0) {
524			simple_lock(&vp->v_interlock);
525		}
526		if (vp->v_flag & VXLOCK) {
527			vp->v_flag |= VXWANT;
528			simple_unlock(&vp->v_interlock);
529			tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
530			error = ENOENT;
531		} else {
532			error = VOP_LOCK(vp, flags | LK_INTERLOCK, p);
533			if (error == 0)
534				return (error);
535		}
536		flags &= ~LK_INTERLOCK;
537	} while (flags & LK_RETRY);
538	return (error);
539}
540