vfs_vnops.c revision 1.188
1/*	$NetBSD: vfs_vnops.c,v 1.188 2014/01/23 10:13:57 hannken Exp $	*/
2
3/*-
4 * Copyright (c) 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Copyright (c) 1982, 1986, 1989, 1993
34 *	The Regents of the University of California.  All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *	@(#)vfs_vnops.c	8.14 (Berkeley) 6/15/95
66 */
67
68#include <sys/cdefs.h>
69__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.188 2014/01/23 10:13:57 hannken Exp $");
70
71#include "veriexec.h"
72
73#include <sys/param.h>
74#include <sys/systm.h>
75#include <sys/kernel.h>
76#include <sys/file.h>
77#include <sys/stat.h>
78#include <sys/buf.h>
79#include <sys/proc.h>
80#include <sys/mount.h>
81#include <sys/namei.h>
82#include <sys/vnode.h>
83#include <sys/ioctl.h>
84#include <sys/tty.h>
85#include <sys/poll.h>
86#include <sys/kauth.h>
87#include <sys/syslog.h>
88#include <sys/fstrans.h>
89#include <sys/atomic.h>
90#include <sys/filedesc.h>
91#include <sys/wapbl.h>
92
93#include <miscfs/specfs/specdev.h>
94#include <miscfs/fifofs/fifo.h>
95
96#include <uvm/uvm_extern.h>
97#include <uvm/uvm_readahead.h>
98
99#ifdef UNION
100#include <fs/union/union.h>
101#endif
102
103int (*vn_union_readdir_hook) (struct vnode **, struct file *, struct lwp *);
104
105#include <sys/verified_exec.h>
106
107static int vn_read(file_t *fp, off_t *offset, struct uio *uio,
108	    kauth_cred_t cred, int flags);
109static int vn_write(file_t *fp, off_t *offset, struct uio *uio,
110	    kauth_cred_t cred, int flags);
111static int vn_closefile(file_t *fp);
112static int vn_poll(file_t *fp, int events);
113static int vn_fcntl(file_t *fp, u_int com, void *data);
114static int vn_statfile(file_t *fp, struct stat *sb);
115static int vn_ioctl(file_t *fp, u_long com, void *data);
116
117const struct fileops vnops = {
118	.fo_read = vn_read,
119	.fo_write = vn_write,
120	.fo_ioctl = vn_ioctl,
121	.fo_fcntl = vn_fcntl,
122	.fo_poll = vn_poll,
123	.fo_stat = vn_statfile,
124	.fo_close = vn_closefile,
125	.fo_kqfilter = vn_kqfilter,
126	.fo_restart = fnullop_restart,
127};
128
129/*
130 * Common code for vnode open operations.
131 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
132 */
133int
134vn_open(struct nameidata *ndp, int fmode, int cmode)
135{
136	struct vnode *vp;
137	struct lwp *l = curlwp;
138	kauth_cred_t cred = l->l_cred;
139	struct vattr va;
140	int error;
141	const char *pathstring;
142
143	if ((fmode & (O_CREAT | O_DIRECTORY)) == (O_CREAT | O_DIRECTORY))
144		return EINVAL;
145
146	ndp->ni_cnd.cn_flags &= TRYEMULROOT | NOCHROOT;
147
148	if (fmode & O_CREAT) {
149		ndp->ni_cnd.cn_nameiop = CREATE;
150		ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF;
151		if ((fmode & O_EXCL) == 0 &&
152		    ((fmode & O_NOFOLLOW) == 0))
153			ndp->ni_cnd.cn_flags |= FOLLOW;
154	} else {
155		ndp->ni_cnd.cn_nameiop = LOOKUP;
156		ndp->ni_cnd.cn_flags |= LOCKLEAF;
157		if ((fmode & O_NOFOLLOW) == 0)
158			ndp->ni_cnd.cn_flags |= FOLLOW;
159	}
160
161	pathstring = pathbuf_stringcopy_get(ndp->ni_pathbuf);
162	if (pathstring == NULL) {
163		return ENOMEM;
164	}
165
166	error = namei(ndp);
167	if (error)
168		goto out;
169
170	vp = ndp->ni_vp;
171
172#if NVERIEXEC > 0
173	error = veriexec_openchk(l, ndp->ni_vp, pathstring, fmode);
174	if (error)
175		goto bad;
176#endif /* NVERIEXEC > 0 */
177
178	if (fmode & O_CREAT) {
179		if (ndp->ni_vp == NULL) {
180			vattr_null(&va);
181			va.va_type = VREG;
182			va.va_mode = cmode;
183			if (fmode & O_EXCL)
184				 va.va_vaflags |= VA_EXCLUSIVE;
185			error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
186					   &ndp->ni_cnd, &va);
187			vput(ndp->ni_dvp);
188			if (error)
189				goto out;
190			fmode &= ~O_TRUNC;
191			vp = ndp->ni_vp;
192			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
193		} else {
194			VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
195			if (ndp->ni_dvp == ndp->ni_vp)
196				vrele(ndp->ni_dvp);
197			else
198				vput(ndp->ni_dvp);
199			ndp->ni_dvp = NULL;
200			vp = ndp->ni_vp;
201			if (fmode & O_EXCL) {
202				error = EEXIST;
203				goto bad;
204			}
205			fmode &= ~O_CREAT;
206		}
207	} else {
208		vp = ndp->ni_vp;
209	}
210	if (vp->v_type == VSOCK) {
211		error = EOPNOTSUPP;
212		goto bad;
213	}
214	if (ndp->ni_vp->v_type == VLNK) {
215		error = EFTYPE;
216		goto bad;
217	}
218
219	if ((fmode & O_CREAT) == 0) {
220		error = vn_openchk(vp, cred, fmode);
221		if (error != 0)
222			goto bad;
223	}
224
225	if (fmode & O_TRUNC) {
226		vattr_null(&va);
227		va.va_size = 0;
228		error = VOP_SETATTR(vp, &va, cred);
229		if (error != 0)
230			goto bad;
231	}
232	if ((error = VOP_OPEN(vp, fmode, cred)) != 0)
233		goto bad;
234	if (fmode & FWRITE) {
235		mutex_enter(vp->v_interlock);
236		vp->v_writecount++;
237		mutex_exit(vp->v_interlock);
238	}
239
240bad:
241	if (error)
242		vput(vp);
243out:
244	pathbuf_stringcopy_put(ndp->ni_pathbuf, pathstring);
245	return (error);
246}
247
248/*
249 * Check for write permissions on the specified vnode.
250 * Prototype text segments cannot be written.
251 */
252int
253vn_writechk(struct vnode *vp)
254{
255
256	/*
257	 * If the vnode is in use as a process's text,
258	 * we can't allow writing.
259	 */
260	if (vp->v_iflag & VI_TEXT)
261		return (ETXTBSY);
262	return (0);
263}
264
265int
266vn_openchk(struct vnode *vp, kauth_cred_t cred, int fflags)
267{
268	int permbits = 0;
269	int error;
270
271	if ((fflags & O_DIRECTORY) != 0 && vp->v_type != VDIR)
272		return ENOTDIR;
273
274	if ((fflags & FREAD) != 0) {
275		permbits = VREAD;
276	}
277	if ((fflags & (FWRITE | O_TRUNC)) != 0) {
278		permbits |= VWRITE;
279		if (vp->v_type == VDIR) {
280			error = EISDIR;
281			goto bad;
282		}
283		error = vn_writechk(vp);
284		if (error != 0)
285			goto bad;
286	}
287	error = VOP_ACCESS(vp, permbits, cred);
288bad:
289	return error;
290}
291
292/*
293 * Mark a vnode as having executable mappings.
294 */
295void
296vn_markexec(struct vnode *vp)
297{
298
299	if ((vp->v_iflag & VI_EXECMAP) != 0) {
300		/* Safe unlocked, as long as caller holds a reference. */
301		return;
302	}
303
304	mutex_enter(vp->v_interlock);
305	if ((vp->v_iflag & VI_EXECMAP) == 0) {
306		atomic_add_int(&uvmexp.filepages, -vp->v_uobj.uo_npages);
307		atomic_add_int(&uvmexp.execpages, vp->v_uobj.uo_npages);
308		vp->v_iflag |= VI_EXECMAP;
309	}
310	mutex_exit(vp->v_interlock);
311}
312
313/*
314 * Mark a vnode as being the text of a process.
315 * Fail if the vnode is currently writable.
316 */
317int
318vn_marktext(struct vnode *vp)
319{
320
321	if ((vp->v_iflag & (VI_TEXT|VI_EXECMAP)) == (VI_TEXT|VI_EXECMAP)) {
322		/* Safe unlocked, as long as caller holds a reference. */
323		return (0);
324	}
325
326	mutex_enter(vp->v_interlock);
327	if (vp->v_writecount != 0) {
328		KASSERT((vp->v_iflag & VI_TEXT) == 0);
329		mutex_exit(vp->v_interlock);
330		return (ETXTBSY);
331	}
332	if ((vp->v_iflag & VI_EXECMAP) == 0) {
333		atomic_add_int(&uvmexp.filepages, -vp->v_uobj.uo_npages);
334		atomic_add_int(&uvmexp.execpages, vp->v_uobj.uo_npages);
335	}
336	vp->v_iflag |= (VI_TEXT | VI_EXECMAP);
337	mutex_exit(vp->v_interlock);
338	return (0);
339}
340
341/*
342 * Vnode close call
343 *
344 * Note: takes an unlocked vnode, while VOP_CLOSE takes a locked node.
345 */
346int
347vn_close(struct vnode *vp, int flags, kauth_cred_t cred)
348{
349	int error;
350
351	if (flags & FWRITE) {
352		mutex_enter(vp->v_interlock);
353		KASSERT(vp->v_writecount > 0);
354		vp->v_writecount--;
355		mutex_exit(vp->v_interlock);
356	}
357	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
358	error = VOP_CLOSE(vp, flags, cred);
359	vput(vp);
360	return (error);
361}
362
363static int
364enforce_rlimit_fsize(struct vnode *vp, struct uio *uio, int ioflag)
365{
366	struct lwp *l = curlwp;
367	off_t testoff;
368
369	if (uio->uio_rw != UIO_WRITE || vp->v_type != VREG)
370		return 0;
371
372	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
373	if (ioflag & IO_APPEND)
374		testoff = vp->v_size;
375	else
376		testoff = uio->uio_offset;
377
378	if (testoff + uio->uio_resid >
379	    l->l_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
380		mutex_enter(proc_lock);
381		psignal(l->l_proc, SIGXFSZ);
382		mutex_exit(proc_lock);
383		return EFBIG;
384	}
385
386	return 0;
387}
388
389/*
390 * Package up an I/O request on a vnode into a uio and do it.
391 */
392int
393vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset,
394    enum uio_seg segflg, int ioflg, kauth_cred_t cred, size_t *aresid,
395    struct lwp *l)
396{
397	struct uio auio;
398	struct iovec aiov;
399	int error;
400
401	if ((ioflg & IO_NODELOCKED) == 0) {
402		if (rw == UIO_READ) {
403			vn_lock(vp, LK_SHARED | LK_RETRY);
404		} else /* UIO_WRITE */ {
405			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
406		}
407	}
408	auio.uio_iov = &aiov;
409	auio.uio_iovcnt = 1;
410	aiov.iov_base = base;
411	aiov.iov_len = len;
412	auio.uio_resid = len;
413	auio.uio_offset = offset;
414	auio.uio_rw = rw;
415	if (segflg == UIO_SYSSPACE) {
416		UIO_SETUP_SYSSPACE(&auio);
417	} else {
418		auio.uio_vmspace = l->l_proc->p_vmspace;
419	}
420
421	if ((error = enforce_rlimit_fsize(vp, &auio, ioflg)) != 0)
422		goto out;
423
424	if (rw == UIO_READ) {
425		error = VOP_READ(vp, &auio, ioflg, cred);
426	} else {
427		error = VOP_WRITE(vp, &auio, ioflg, cred);
428	}
429
430	if (aresid)
431		*aresid = auio.uio_resid;
432	else
433		if (auio.uio_resid && error == 0)
434			error = EIO;
435
436 out:
437	if ((ioflg & IO_NODELOCKED) == 0) {
438		VOP_UNLOCK(vp);
439	}
440	return (error);
441}
442
443int
444vn_readdir(file_t *fp, char *bf, int segflg, u_int count, int *done,
445    struct lwp *l, off_t **cookies, int *ncookies)
446{
447	struct vnode *vp = (struct vnode *)fp->f_data;
448	struct iovec aiov;
449	struct uio auio;
450	int error, eofflag;
451
452	/* Limit the size on any kernel buffers used by VOP_READDIR */
453	count = min(MAXBSIZE, count);
454
455unionread:
456	if (vp->v_type != VDIR)
457		return (EINVAL);
458	aiov.iov_base = bf;
459	aiov.iov_len = count;
460	auio.uio_iov = &aiov;
461	auio.uio_iovcnt = 1;
462	auio.uio_rw = UIO_READ;
463	if (segflg == UIO_SYSSPACE) {
464		UIO_SETUP_SYSSPACE(&auio);
465	} else {
466		KASSERT(l == curlwp);
467		auio.uio_vmspace = l->l_proc->p_vmspace;
468	}
469	auio.uio_resid = count;
470	vn_lock(vp, LK_SHARED | LK_RETRY);
471	auio.uio_offset = fp->f_offset;
472	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookies,
473		    ncookies);
474	mutex_enter(&fp->f_lock);
475	fp->f_offset = auio.uio_offset;
476	mutex_exit(&fp->f_lock);
477	VOP_UNLOCK(vp);
478	if (error)
479		return (error);
480
481	if (count == auio.uio_resid && vn_union_readdir_hook) {
482		struct vnode *ovp = vp;
483
484		error = (*vn_union_readdir_hook)(&vp, fp, l);
485		if (error)
486			return (error);
487		if (vp != ovp)
488			goto unionread;
489	}
490
491	if (count == auio.uio_resid && (vp->v_vflag & VV_ROOT) &&
492	    (vp->v_mount->mnt_flag & MNT_UNION)) {
493		struct vnode *tvp = vp;
494		vp = vp->v_mount->mnt_vnodecovered;
495		vref(vp);
496		mutex_enter(&fp->f_lock);
497		fp->f_data = vp;
498		fp->f_offset = 0;
499		mutex_exit(&fp->f_lock);
500		vrele(tvp);
501		goto unionread;
502	}
503	*done = count - auio.uio_resid;
504	return error;
505}
506
507/*
508 * File table vnode read routine.
509 */
510static int
511vn_read(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
512    int flags)
513{
514	struct vnode *vp = (struct vnode *)fp->f_data;
515	int error, ioflag, fflag;
516	size_t count;
517
518	ioflag = IO_ADV_ENCODE(fp->f_advice);
519	fflag = fp->f_flag;
520	if (fflag & FNONBLOCK)
521		ioflag |= IO_NDELAY;
522	if ((fflag & (FFSYNC | FRSYNC)) == (FFSYNC | FRSYNC))
523		ioflag |= IO_SYNC;
524	if (fflag & FALTIO)
525		ioflag |= IO_ALTSEMANTICS;
526	if (fflag & FDIRECT)
527		ioflag |= IO_DIRECT;
528	vn_lock(vp, LK_SHARED | LK_RETRY);
529	uio->uio_offset = *offset;
530	count = uio->uio_resid;
531	error = VOP_READ(vp, uio, ioflag, cred);
532	if (flags & FOF_UPDATE_OFFSET)
533		*offset += count - uio->uio_resid;
534	VOP_UNLOCK(vp);
535	return (error);
536}
537
538/*
539 * File table vnode write routine.
540 */
541static int
542vn_write(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
543    int flags)
544{
545	struct vnode *vp = (struct vnode *)fp->f_data;
546	int error, ioflag, fflag;
547	size_t count;
548
549	ioflag = IO_ADV_ENCODE(fp->f_advice) | IO_UNIT;
550	fflag = fp->f_flag;
551	if (vp->v_type == VREG && (fflag & O_APPEND))
552		ioflag |= IO_APPEND;
553	if (fflag & FNONBLOCK)
554		ioflag |= IO_NDELAY;
555	if (fflag & FFSYNC ||
556	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
557		ioflag |= IO_SYNC;
558	else if (fflag & FDSYNC)
559		ioflag |= IO_DSYNC;
560	if (fflag & FALTIO)
561		ioflag |= IO_ALTSEMANTICS;
562	if (fflag & FDIRECT)
563		ioflag |= IO_DIRECT;
564	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
565	uio->uio_offset = *offset;
566	count = uio->uio_resid;
567
568	if ((error = enforce_rlimit_fsize(vp, uio, ioflag)) != 0)
569		goto out;
570
571	error = VOP_WRITE(vp, uio, ioflag, cred);
572
573	if (flags & FOF_UPDATE_OFFSET) {
574		if (ioflag & IO_APPEND) {
575			/*
576			 * SUSv3 describes behaviour for count = 0 as following:
577			 * "Before any action ... is taken, and if nbyte is zero
578			 * and the file is a regular file, the write() function
579			 * ... in the absence of errors ... shall return zero
580			 * and have no other results."
581			 */
582			if (count)
583				*offset = uio->uio_offset;
584		} else
585			*offset += count - uio->uio_resid;
586	}
587
588 out:
589	VOP_UNLOCK(vp);
590	return (error);
591}
592
593/*
594 * File table vnode stat routine.
595 */
596static int
597vn_statfile(file_t *fp, struct stat *sb)
598{
599	struct vnode *vp = fp->f_data;
600	int error;
601
602	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
603	error = vn_stat(vp, sb);
604	VOP_UNLOCK(vp);
605	return error;
606}
607
608int
609vn_stat(struct vnode *vp, struct stat *sb)
610{
611	struct vattr va;
612	int error;
613	mode_t mode;
614
615	memset(&va, 0, sizeof(va));
616	error = VOP_GETATTR(vp, &va, kauth_cred_get());
617	if (error)
618		return (error);
619	/*
620	 * Copy from vattr table
621	 */
622	memset(sb, 0, sizeof(*sb));
623	sb->st_dev = va.va_fsid;
624	sb->st_ino = va.va_fileid;
625	mode = va.va_mode;
626	switch (vp->v_type) {
627	case VREG:
628		mode |= S_IFREG;
629		break;
630	case VDIR:
631		mode |= S_IFDIR;
632		break;
633	case VBLK:
634		mode |= S_IFBLK;
635		break;
636	case VCHR:
637		mode |= S_IFCHR;
638		break;
639	case VLNK:
640		mode |= S_IFLNK;
641		break;
642	case VSOCK:
643		mode |= S_IFSOCK;
644		break;
645	case VFIFO:
646		mode |= S_IFIFO;
647		break;
648	default:
649		return (EBADF);
650	};
651	sb->st_mode = mode;
652	sb->st_nlink = va.va_nlink;
653	sb->st_uid = va.va_uid;
654	sb->st_gid = va.va_gid;
655	sb->st_rdev = va.va_rdev;
656	sb->st_size = va.va_size;
657	sb->st_atimespec = va.va_atime;
658	sb->st_mtimespec = va.va_mtime;
659	sb->st_ctimespec = va.va_ctime;
660	sb->st_birthtimespec = va.va_birthtime;
661	sb->st_blksize = va.va_blocksize;
662	sb->st_flags = va.va_flags;
663	sb->st_gen = 0;
664	sb->st_blocks = va.va_bytes / S_BLKSIZE;
665	return (0);
666}
667
668/*
669 * File table vnode fcntl routine.
670 */
671static int
672vn_fcntl(file_t *fp, u_int com, void *data)
673{
674	struct vnode *vp = fp->f_data;
675	int error;
676
677	error = VOP_FCNTL(vp, com, data, fp->f_flag, kauth_cred_get());
678	return (error);
679}
680
681/*
682 * File table vnode ioctl routine.
683 */
684static int
685vn_ioctl(file_t *fp, u_long com, void *data)
686{
687	struct vnode *vp = fp->f_data, *ovp;
688	struct vattr vattr;
689	int error;
690
691	switch (vp->v_type) {
692
693	case VREG:
694	case VDIR:
695		if (com == FIONREAD) {
696			vn_lock(vp, LK_SHARED | LK_RETRY);
697			error = VOP_GETATTR(vp, &vattr, kauth_cred_get());
698			VOP_UNLOCK(vp);
699			if (error)
700				return (error);
701			*(int *)data = vattr.va_size - fp->f_offset;
702			return (0);
703		}
704		if ((com == FIONWRITE) || (com == FIONSPACE)) {
705			/*
706			 * Files don't have send queues, so there never
707			 * are any bytes in them, nor is there any
708			 * open space in them.
709			 */
710			*(int *)data = 0;
711			return (0);
712		}
713		if (com == FIOGETBMAP) {
714			daddr_t *block;
715
716			if (*(daddr_t *)data < 0)
717				return (EINVAL);
718			block = (daddr_t *)data;
719			return (VOP_BMAP(vp, *block, NULL, block, NULL));
720		}
721		if (com == OFIOGETBMAP) {
722			daddr_t ibn, obn;
723
724			if (*(int32_t *)data < 0)
725				return (EINVAL);
726			ibn = (daddr_t)*(int32_t *)data;
727			error = VOP_BMAP(vp, ibn, NULL, &obn, NULL);
728			*(int32_t *)data = (int32_t)obn;
729			return error;
730		}
731		if (com == FIONBIO || com == FIOASYNC)	/* XXX */
732			return (0);			/* XXX */
733		/* fall into ... */
734	case VFIFO:
735	case VCHR:
736	case VBLK:
737		error = VOP_IOCTL(vp, com, data, fp->f_flag,
738		    kauth_cred_get());
739		if (error == 0 && com == TIOCSCTTY) {
740			vref(vp);
741			mutex_enter(proc_lock);
742			ovp = curproc->p_session->s_ttyvp;
743			curproc->p_session->s_ttyvp = vp;
744			mutex_exit(proc_lock);
745			if (ovp != NULL)
746				vrele(ovp);
747		}
748		return (error);
749
750	default:
751		return (EPASSTHROUGH);
752	}
753}
754
755/*
756 * File table vnode poll routine.
757 */
758static int
759vn_poll(file_t *fp, int events)
760{
761
762	return (VOP_POLL(fp->f_data, events));
763}
764
765/*
766 * File table vnode kqfilter routine.
767 */
768int
769vn_kqfilter(file_t *fp, struct knote *kn)
770{
771
772	return (VOP_KQFILTER(fp->f_data, kn));
773}
774
775/*
776 * Check that the vnode is still valid, and if so
777 * acquire requested lock.
778 */
779int
780vn_lock(struct vnode *vp, int flags)
781{
782	int error;
783
784#if 0
785	KASSERT(vp->v_usecount > 0 || (vp->v_iflag & VI_ONWORKLST) != 0);
786#endif
787	KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY)) == 0);
788	KASSERT(!mutex_owned(vp->v_interlock));
789
790#ifdef DIAGNOSTIC
791	if (wapbl_vphaswapbl(vp))
792		WAPBL_JUNLOCK_ASSERT(wapbl_vptomp(vp));
793#endif
794
795	do {
796		/*
797		 * XXX PR 37706 forced unmount of file systems is unsafe.
798		 * Race between vclean() and this the remaining problem.
799		 */
800		mutex_enter(vp->v_interlock);
801		if (vp->v_iflag & VI_XLOCK) {
802			if (flags & LK_NOWAIT) {
803				mutex_exit(vp->v_interlock);
804				return EBUSY;
805			}
806			vwait(vp, VI_XLOCK);
807			mutex_exit(vp->v_interlock);
808			error = ENOENT;
809		} else {
810			mutex_exit(vp->v_interlock);
811			error = VOP_LOCK(vp, (flags & ~LK_RETRY));
812			if (error == 0 || error == EDEADLK || error == EBUSY)
813				return (error);
814		}
815	} while (flags & LK_RETRY);
816	return (error);
817}
818
819/*
820 * File table vnode close routine.
821 */
822static int
823vn_closefile(file_t *fp)
824{
825
826	return vn_close(fp->f_data, fp->f_flag, fp->f_cred);
827}
828
829/*
830 * Simplified in-kernel wrapper calls for extended attribute access.
831 * Both calls pass in a NULL credential, authorizing a "kernel" access.
832 * Set IO_NODELOCKED in ioflg if the vnode is already locked.
833 */
834int
835vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
836    const char *attrname, size_t *buflen, void *bf, struct lwp *l)
837{
838	struct uio auio;
839	struct iovec aiov;
840	int error;
841
842	aiov.iov_len = *buflen;
843	aiov.iov_base = bf;
844
845	auio.uio_iov = &aiov;
846	auio.uio_iovcnt = 1;
847	auio.uio_rw = UIO_READ;
848	auio.uio_offset = 0;
849	auio.uio_resid = *buflen;
850	UIO_SETUP_SYSSPACE(&auio);
851
852	if ((ioflg & IO_NODELOCKED) == 0)
853		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
854
855	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL, NULL);
856
857	if ((ioflg & IO_NODELOCKED) == 0)
858		VOP_UNLOCK(vp);
859
860	if (error == 0)
861		*buflen = *buflen - auio.uio_resid;
862
863	return (error);
864}
865
866/*
867 * XXX Failure mode if partially written?
868 */
869int
870vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
871    const char *attrname, size_t buflen, const void *bf, struct lwp *l)
872{
873	struct uio auio;
874	struct iovec aiov;
875	int error;
876
877	aiov.iov_len = buflen;
878	aiov.iov_base = __UNCONST(bf);		/* XXXUNCONST kills const */
879
880	auio.uio_iov = &aiov;
881	auio.uio_iovcnt = 1;
882	auio.uio_rw = UIO_WRITE;
883	auio.uio_offset = 0;
884	auio.uio_resid = buflen;
885	UIO_SETUP_SYSSPACE(&auio);
886
887	if ((ioflg & IO_NODELOCKED) == 0) {
888		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
889	}
890
891	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NULL);
892
893	if ((ioflg & IO_NODELOCKED) == 0) {
894		VOP_UNLOCK(vp);
895	}
896
897	return (error);
898}
899
900int
901vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
902    const char *attrname, struct lwp *l)
903{
904	int error;
905
906	if ((ioflg & IO_NODELOCKED) == 0) {
907		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
908	}
909
910	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NULL);
911	if (error == EOPNOTSUPP)
912		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, NULL);
913
914	if ((ioflg & IO_NODELOCKED) == 0) {
915		VOP_UNLOCK(vp);
916	}
917
918	return (error);
919}
920
921void
922vn_ra_allocctx(struct vnode *vp)
923{
924	struct uvm_ractx *ra = NULL;
925
926	KASSERT(mutex_owned(vp->v_interlock));
927
928	if (vp->v_type != VREG) {
929		return;
930	}
931	if (vp->v_ractx != NULL) {
932		return;
933	}
934	if (vp->v_ractx == NULL) {
935		mutex_exit(vp->v_interlock);
936		ra = uvm_ra_allocctx();
937		mutex_enter(vp->v_interlock);
938		if (ra != NULL && vp->v_ractx == NULL) {
939			vp->v_ractx = ra;
940			ra = NULL;
941		}
942	}
943	if (ra != NULL) {
944		uvm_ra_freectx(ra);
945	}
946}
947
948int
949vn_fifo_bypass(void *v)
950{
951	struct vop_generic_args *ap = v;
952
953	return VOCALL(fifo_vnodeop_p, ap->a_desc->vdesc_offset, v);
954}
955