1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 *    notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 *    notice, this list of conditions and the following disclaimer in the
45 *    documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 *    must display the following acknowledgement:
48 *	This product includes software developed by the University of
49 *	California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 *    may be used to endorse or promote products derived from this software
52 *    without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 *	@(#)vfs_vnops.c	8.14 (Berkeley) 6/15/95
67 *
68 */
69/*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections.  This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76#include <sys/param.h>
77#include <sys/types.h>
78#include <sys/systm.h>
79#include <sys/kernel.h>
80#include <sys/file_internal.h>
81#include <sys/stat.h>
82#include <sys/proc_internal.h>
83#include <sys/kauth.h>
84#include <sys/mount_internal.h>
85#include <sys/namei.h>
86#include <sys/vnode_internal.h>
87#include <sys/ioctl.h>
88#include <sys/tty.h>
89/* Temporary workaround for ubc.h until <rdar://4714366 is resolved */
90#define ubc_setcred ubc_setcred_deprecated
91#include <sys/ubc.h>
92#undef ubc_setcred
93int	ubc_setcred(struct vnode *, struct proc *);
94#include <sys/conf.h>
95#include <sys/disk.h>
96#include <sys/fsevents.h>
97#include <sys/kdebug.h>
98#include <sys/xattr.h>
99#include <sys/ubc_internal.h>
100#include <sys/uio_internal.h>
101#include <sys/resourcevar.h>
102#include <sys/signalvar.h>
103
104#include <vm/vm_kern.h>
105#include <vm/vm_map.h>
106
107#include <miscfs/specfs/specdev.h>
108#include <miscfs/fifofs/fifo.h>
109
110#if CONFIG_MACF
111#include <security/mac_framework.h>
112#endif
113
114#if CONFIG_PROTECT
115#include <sys/cprotect.h>
116#endif
117
118
119static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
120static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
121			vfs_context_t ctx);
122static int vn_read(struct fileproc *fp, struct uio *uio, int flags,
123			vfs_context_t ctx);
124static int vn_write(struct fileproc *fp, struct uio *uio, int flags,
125			vfs_context_t ctx);
126static int vn_select( struct fileproc *fp, int which, void * wql,
127			vfs_context_t ctx);
128static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn,
129			vfs_context_t ctx);
130static void filt_vndetach(struct knote *kn);
131static int filt_vnode(struct knote *kn, long hint);
132static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx);
133#if 0
134static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident,
135			vfs_context_t ctx);
136#endif
137
138struct 	fileops vnops =
139	{ vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL };
140
141struct  filterops vnode_filtops = {
142	.f_isfd = 1,
143	.f_attach = NULL,
144	.f_detach = filt_vndetach,
145	.f_event = filt_vnode
146};
147
148/*
149 * Common code for vnode open operations.
150 * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine.
151 *
152 * XXX the profusion of interfaces here is probably a bad thing.
153 */
154int
155vn_open(struct nameidata *ndp, int fmode, int cmode)
156{
157	return(vn_open_modflags(ndp, &fmode, cmode));
158}
159
160int
161vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode)
162{
163	struct vnode_attr va;
164
165	VATTR_INIT(&va);
166	VATTR_SET(&va, va_mode, cmode);
167
168	return(vn_open_auth(ndp, fmodep, &va));
169}
170
171static int
172vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx)
173{
174	int error;
175
176	if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) {
177		goto bad;
178	}
179
180	/* Call out to allow 3rd party notification of open.
181	 * Ignore result of kauth_authorize_fileop call.
182	 */
183#if CONFIG_MACF
184	mac_vnode_notify_open(ctx, vp, fmode);
185#endif
186	kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
187						   (uintptr_t)vp, 0);
188
189	return 0;
190
191bad:
192	return error;
193
194}
195
196/*
197 * May do nameidone() to allow safely adding an FSEvent.  Cue off of ni_dvp to
198 * determine whether that has happened.
199 */
200static int
201vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx)
202{
203	uint32_t status = 0;
204	vnode_t dvp = ndp->ni_dvp;
205	int batched;
206	int error;
207	vnode_t vp;
208
209	batched = vnode_compound_open_available(ndp->ni_dvp);
210	*did_open = FALSE;
211
212	VATTR_SET(vap, va_type, VREG);
213	if (fmode & O_EXCL)
214		vap->va_vaflags |= VA_EXCLUSIVE;
215
216#if NAMEDRSRCFORK
217	if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
218		if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
219			goto out;
220		if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
221			goto out;
222		*did_create = TRUE;
223	} else {
224#endif
225		if (!batched) {
226			if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
227				goto out;
228		}
229
230		error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx);
231		if (error != 0) {
232			if (batched) {
233				*did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE;
234			} else {
235				*did_create = FALSE;
236			}
237
238			if (error == EKEEPLOOKING) {
239				if (*did_create) {
240					panic("EKEEPLOOKING, but we did a create?");
241				}
242				if (!batched) {
243					panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?");
244				}
245				if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
246					panic("EKEEPLOOKING, but continue flag not set?");
247				}
248
249				/*
250				 * Do NOT drop the dvp: we need everything to continue the lookup.
251				 */
252				return error;
253			}
254		} else {
255			if (batched) {
256				*did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0;
257				*did_open = TRUE;
258			} else {
259				*did_create = TRUE;
260			}
261		}
262#if NAMEDRSRCFORK
263	}
264#endif
265
266	/*
267	* Unlock the fsnode (if locked) here so that we are free
268	* to drop the dvp iocount and prevent deadlock in build_path().
269	* nameidone() will still do the right thing later.
270	*/
271	vp = ndp->ni_vp;
272	namei_unlock_fsnode(ndp);
273
274	if (*did_create) {
275		int	update_flags = 0;
276
277		// Make sure the name & parent pointers are hooked up
278		if (vp->v_name == NULL)
279			update_flags |= VNODE_UPDATE_NAME;
280		if (vp->v_parent == NULLVP)
281			update_flags |= VNODE_UPDATE_PARENT;
282
283		if (update_flags)
284			vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
285
286		vnode_put(dvp);
287		ndp->ni_dvp = NULLVP;
288
289#if CONFIG_FSE
290		if (need_fsevent(FSE_CREATE_FILE, vp)) {
291			add_fsevent(FSE_CREATE_FILE, ctx,
292					FSE_ARG_VNODE, vp,
293					FSE_ARG_DONE);
294		}
295#endif
296	}
297out:
298	if (ndp->ni_dvp != NULLVP) {
299		vnode_put(dvp);
300		ndp->ni_dvp = NULLVP;
301	}
302
303	return error;
304}
305
306/*
307 * Open a file with authorization, updating the contents of the structures
308 * pointed to by ndp, fmodep, and vap as necessary to perform the requested
309 * operation.  This function is used for both opens of existing files, and
310 * creation of new files.
311 *
312 * Parameters:	ndp			The nami data pointer describing the
313 *					file
314 *		fmodep			A pointer to an int containg the mode
315 *					information to be used for the open
316 *		vap			A pointer to the vnode attribute
317 *					descriptor to be used for the open
318 *
319 * Indirect:	*			Contents of the data structures pointed
320 *					to by the parameters are modified as
321 *					necessary to the requested operation.
322 *
323 * Returns:	0			Success
324 *		!0			errno value
325 *
326 * Notes:	The kauth_filesec_t in 'vap', if any, is in host byte order.
327 *
328 *		The contents of '*ndp' will be modified, based on the other
329 *		arguments to this function, and to return file and directory
330 *		data necessary to satisfy the requested operation.
331 *
332 *		If the file does not exist and we are creating it, then the
333 *		O_TRUNC flag will be cleared in '*fmodep' to indicate to the
334 *		caller that the file was not truncated.
335 *
336 *		If the file exists and the O_EXCL flag was not specified, then
337 *		the O_CREAT flag will be cleared in '*fmodep' to indicate to
338 *		the caller that the existing file was merely opened rather
339 *		than created.
340 *
341 *		The contents of '*vap' will be modified as necessary to
342 *		complete the operation, including setting of supported
343 *		attribute, clearing of fields containing unsupported attributes
344 *		in the request, if the request proceeds without them, etc..
345 *
346 * XXX:		This function is too complicated in actings on its arguments
347 *
348 * XXX:		We should enummerate the possible errno values here, and where
349 *		in the code they originated.
350 */
351int
352vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap)
353{
354	struct vnode *vp;
355	struct vnode *dvp;
356	vfs_context_t ctx = ndp->ni_cnd.cn_context;
357	int error;
358	int fmode;
359	uint32_t origcnflags;
360	boolean_t did_create;
361	boolean_t did_open;
362	boolean_t need_vnop_open;
363	boolean_t batched;
364	boolean_t ref_failed;
365
366again:
367	vp = NULL;
368	dvp = NULL;
369	batched = FALSE;
370	did_create = FALSE;
371	need_vnop_open = TRUE;
372	ref_failed = FALSE;
373	fmode = *fmodep;
374	origcnflags = ndp->ni_cnd.cn_flags;
375
376	/*
377	 * O_CREAT
378	 */
379	if (fmode & O_CREAT) {
380	        if ( (fmode & O_DIRECTORY) ) {
381		        error = EINVAL;
382			goto out;
383		}
384		ndp->ni_cnd.cn_nameiop = CREATE;
385#if CONFIG_TRIGGERS
386		ndp->ni_op = OP_LINK;
387#endif
388		/* Inherit USEDVP, vnode_open() supported flags only */
389		ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
390		ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
391		ndp->ni_flag = NAMEI_COMPOUNDOPEN;
392#if NAMEDRSRCFORK
393		/* open calls are allowed for resource forks. */
394		ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
395#endif
396		if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0)
397			ndp->ni_cnd.cn_flags |= FOLLOW;
398
399continue_create_lookup:
400		if ( (error = namei(ndp)) )
401			goto out;
402
403		dvp = ndp->ni_dvp;
404		vp = ndp->ni_vp;
405
406		batched = vnode_compound_open_available(dvp);
407
408		/* not found, create */
409		if (vp == NULL) {
410			/* must have attributes for a new file */
411			if (vap == NULL) {
412				error = EINVAL;
413				goto out;
414			}
415			/*
416			 * Attempt a create.   For a system supporting compound VNOPs, we may
417			 * find an existing file or create one; in either case, we will already
418			 * have the file open and no VNOP_OPEN() will be needed.
419			 */
420			error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx);
421
422			dvp = ndp->ni_dvp;
423			vp = ndp->ni_vp;
424
425			/*
426			 * Detected a node that the filesystem couldn't handle.  Don't call
427			 * nameidone() yet, because we need that path buffer.
428			 */
429			if (error == EKEEPLOOKING) {
430				if (!batched) {
431					panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?");
432				}
433				goto continue_create_lookup;
434			}
435
436			nameidone(ndp);
437			if (dvp) {
438				panic("Shouldn't have a dvp here.");
439			}
440
441			if (error) {
442				/*
443				 * Check for a creation or unlink race.
444				 */
445				if (((error == EEXIST) && !(fmode & O_EXCL)) ||
446						((error == ENOENT) && (fmode & O_CREAT))){
447					if (vp)
448						vnode_put(vp);
449					goto again;
450				}
451				goto bad;
452			}
453
454			need_vnop_open = !did_open;
455		}
456		else {
457			if (fmode & O_EXCL)
458				error = EEXIST;
459
460			/*
461			 * We have a vnode.  Use compound open if available
462			 * or else fall through to "traditional" path.  Note: can't
463			 * do a compound open for root, because the parent belongs
464			 * to a different FS.
465			 */
466			if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) {
467				error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
468
469				if (error == 0) {
470					vp = ndp->ni_vp;
471					need_vnop_open = FALSE;
472				} else if (error == EKEEPLOOKING) {
473					if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
474						panic("EKEEPLOOKING, but continue flag not set?");
475					}
476					goto continue_create_lookup;
477				}
478			}
479			nameidone(ndp);
480			vnode_put(dvp);
481			ndp->ni_dvp = NULLVP;
482
483			if (error) {
484				goto bad;
485			}
486
487			fmode &= ~O_CREAT;
488
489			/* Fall through */
490		}
491	} else {
492		/*
493		 * Not O_CREAT
494		 */
495		ndp->ni_cnd.cn_nameiop = LOOKUP;
496		/* Inherit USEDVP, vnode_open() supported flags only */
497		ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
498		ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT;
499#if NAMEDRSRCFORK
500		/* open calls are allowed for resource forks. */
501		ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
502#endif
503		ndp->ni_flag = NAMEI_COMPOUNDOPEN;
504
505		/* preserve NOFOLLOW from vnode_open() */
506		if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) {
507			ndp->ni_cnd.cn_flags &= ~FOLLOW;
508		}
509
510		/* Do a lookup, possibly going directly to filesystem for compound operation */
511		do {
512			if ( (error = namei(ndp)) )
513				goto out;
514			vp = ndp->ni_vp;
515			dvp = ndp->ni_dvp;
516
517			/* Check for batched lookup-open */
518			batched = vnode_compound_open_available(dvp);
519			if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) {
520				error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
521				vp = ndp->ni_vp;
522				if (error == 0) {
523					need_vnop_open = FALSE;
524				} else if (error == EKEEPLOOKING) {
525					if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
526						panic("EKEEPLOOKING, but continue flag not set?");
527					}
528				}
529			}
530		} while (error == EKEEPLOOKING);
531
532		nameidone(ndp);
533		vnode_put(dvp);
534		ndp->ni_dvp = NULLVP;
535
536		if (error) {
537			goto bad;
538		}
539	}
540
541	/*
542	 * By this point, nameidone() is called, dvp iocount is dropped,
543	 * and dvp pointer is cleared.
544	 */
545	if (ndp->ni_dvp != NULLVP) {
546		panic("Haven't cleaned up adequately in vn_open_auth()");
547	}
548
549	/*
550	 * Expect to use this code for filesystems without compound VNOPs, for the root
551	 * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(),
552	 * and for shadow files, which do not live on the same filesystems as their "parents."
553	 */
554	if (need_vnop_open) {
555		if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) {
556			panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?");
557		}
558
559		if (!did_create) {
560			error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL);
561			if (error) {
562				goto bad;
563			}
564		}
565
566#if CONFIG_PROTECT
567		/*
568		 * Perform any content protection access checks prior to calling
569		 * into the filesystem, if the raw encrypted mode was not
570		 * requested.
571		 *
572		 * If the va_dataprotect_flags are NOT active, or if they are,
573		 * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need
574		 * to perform the checks.
575		 */
576		if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) ||
577				((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) {
578			error = cp_handle_open (vp, fmode);
579			if (error) {
580				goto bad;
581			}
582		}
583#endif
584
585		error = VNOP_OPEN(vp, fmode, ctx);
586		if (error) {
587			goto bad;
588		}
589		need_vnop_open = FALSE;
590	}
591
592	// if the vnode is tagged VOPENEVT and the current process
593	// has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
594	// flag to the open mode so that this open won't count against
595	// the vnode when carbon delete() does a vnode_isinuse() to see
596	// if a file is currently in use.  this allows spotlight
597	// importers to not interfere with carbon apps that depend on
598	// the no-delete-if-busy semantics of carbon delete().
599	//
600	if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
601		fmode |= O_EVTONLY;
602	}
603
604	/*
605	 * Grab reference, etc.
606	 */
607	error = vn_open_auth_finish(vp, fmode, ctx);
608	if (error) {
609		ref_failed = TRUE;
610		goto bad;
611	}
612
613	/* Compound VNOP open is responsible for doing the truncate */
614	if (batched || did_create)
615		fmode &= ~O_TRUNC;
616
617	*fmodep = fmode;
618	return (0);
619
620bad:
621	/* Opened either explicitly or by a batched create */
622	if (!need_vnop_open) {
623		VNOP_CLOSE(vp, fmode, ctx);
624	}
625
626	ndp->ni_vp = NULL;
627	if (vp) {
628#if NAMEDRSRCFORK
629		/* Aggressively recycle shadow files if we error'd out during open() */
630		if ((vnode_isnamedstream(vp)) &&
631			(vp->v_parent != NULLVP) &&
632			(vnode_isshadow(vp))) {
633				vnode_recycle(vp);
634		}
635#endif
636		vnode_put(vp);
637		/*
638		 * Check for a race against unlink.  We had a vnode
639		 * but according to vnode_authorize or VNOP_OPEN it
640		 * no longer exists.
641		 *
642		 * EREDRIVEOPEN: means that we were hit by the tty allocation race.
643		 */
644		if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) {
645			goto again;
646		}
647	}
648
649out:
650	return (error);
651}
652
653#if vn_access_DEPRECATED
654/*
655 * Authorize an action against a vnode.  This has been the canonical way to
656 * ensure that the credential/process/etc. referenced by a vfs_context
657 * is granted the rights called out in 'mode' against the vnode 'vp'.
658 *
659 * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult
660 * to add support for more rights.  As such, this interface will be deprecated
661 * and callers will use vnode_authorize instead.
662 */
663int
664vn_access(vnode_t vp, int mode, vfs_context_t context)
665{
666 	kauth_action_t	action;
667
668  	action = 0;
669 	if (mode & VREAD)
670 		action |= KAUTH_VNODE_READ_DATA;
671 	if (mode & VWRITE)
672		action |= KAUTH_VNODE_WRITE_DATA;
673  	if (mode & VEXEC)
674  		action |= KAUTH_VNODE_EXECUTE;
675
676 	return(vnode_authorize(vp, NULL, action, context));
677}
678#endif	/* vn_access_DEPRECATED */
679
680/*
681 * Vnode close call
682 */
683int
684vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
685{
686	int error;
687
688#if NAMEDRSRCFORK
689	/* Sync data from resource fork shadow file if needed. */
690	if ((vp->v_flag & VISNAMEDSTREAM) &&
691	    (vp->v_parent != NULLVP) &&
692	    vnode_isshadow(vp)) {
693		if (flags & FWASWRITTEN) {
694			(void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
695		}
696	}
697#endif
698
699	/* work around for foxhound */
700	if (vnode_isspec(vp))
701		(void)vnode_rele_ext(vp, flags, 0);
702
703	error = VNOP_CLOSE(vp, flags, ctx);
704
705#if CONFIG_FSE
706	if (flags & FWASWRITTEN) {
707	        if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
708		        add_fsevent(FSE_CONTENT_MODIFIED, ctx,
709				    FSE_ARG_VNODE, vp,
710				    FSE_ARG_DONE);
711		}
712	}
713#endif
714
715	if (!vnode_isspec(vp))
716		(void)vnode_rele_ext(vp, flags, 0);
717
718	return (error);
719}
720
721static int
722vn_read_swapfile(
723	struct vnode	*vp,
724	uio_t		uio)
725{
726	int	error;
727	off_t	swap_count, this_count;
728	off_t	file_end, read_end;
729	off_t	prev_resid;
730	char 	*my_swap_page;
731
732	/*
733	 * Reading from a swap file will get you zeroes.
734	 */
735
736	my_swap_page = NULL;
737	error = 0;
738	swap_count = uio_resid(uio);
739
740	file_end = ubc_getsize(vp);
741	read_end = uio->uio_offset + uio_resid(uio);
742	if (uio->uio_offset >= file_end) {
743		/* uio starts after end of file: nothing to read */
744		swap_count = 0;
745	} else if (read_end > file_end) {
746		/* uio extends beyond end of file: stop before that */
747		swap_count -= (read_end - file_end);
748	}
749
750	while (swap_count > 0) {
751		if (my_swap_page == NULL) {
752			MALLOC(my_swap_page, char *, PAGE_SIZE,
753			       M_TEMP, M_WAITOK);
754			memset(my_swap_page, '\0', PAGE_SIZE);
755			/* add an end-of-line to keep line counters happy */
756			my_swap_page[PAGE_SIZE-1] = '\n';
757		}
758		this_count = swap_count;
759		if (this_count > PAGE_SIZE) {
760			this_count = PAGE_SIZE;
761		}
762
763		prev_resid = uio_resid(uio);
764		error = uiomove((caddr_t) my_swap_page,
765				this_count,
766				uio);
767		if (error) {
768			break;
769		}
770		swap_count -= (prev_resid - uio_resid(uio));
771	}
772	if (my_swap_page != NULL) {
773		FREE(my_swap_page, M_TEMP);
774		my_swap_page = NULL;
775	}
776
777	return error;
778}
779/*
780 * Package up an I/O request on a vnode into a uio and do it.
781 */
782int
783vn_rdwr(
784	enum uio_rw rw,
785	struct vnode *vp,
786	caddr_t base,
787	int len,
788	off_t offset,
789	enum uio_seg segflg,
790	int ioflg,
791	kauth_cred_t cred,
792	int *aresid,
793	proc_t p)
794{
795	int64_t resid;
796	int result;
797
798	result = vn_rdwr_64(rw,
799			vp,
800			(uint64_t)(uintptr_t)base,
801			(int64_t)len,
802			offset,
803			segflg,
804			ioflg,
805			cred,
806			&resid,
807			p);
808
809	/* "resid" should be bounded above by "len," which is an int */
810	if (aresid != NULL) {
811		*aresid = resid;
812	}
813
814	return result;
815}
816
817
818int
819vn_rdwr_64(
820	enum uio_rw rw,
821	struct vnode *vp,
822	uint64_t base,
823	int64_t len,
824	off_t offset,
825	enum uio_seg segflg,
826	int ioflg,
827	kauth_cred_t cred,
828	int64_t *aresid,
829	proc_t p)
830{
831	uio_t auio;
832	int spacetype;
833	struct vfs_context context;
834	int error=0;
835	char uio_buf[ UIO_SIZEOF(1) ];
836
837	context.vc_thread = current_thread();
838	context.vc_ucred = cred;
839
840	if (UIO_SEG_IS_USER_SPACE(segflg)) {
841		spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
842	}
843	else {
844		spacetype = UIO_SYSSPACE;
845	}
846	auio = uio_createwithbuffer(1, offset, spacetype, rw,
847								  &uio_buf[0], sizeof(uio_buf));
848	uio_addiov(auio, base, len);
849
850#if CONFIG_MACF
851	/* XXXMAC
852	 * 	IO_NOAUTH should be re-examined.
853 	 *	Likely that mediation should be performed in caller.
854	 */
855	if ((ioflg & IO_NOAUTH) == 0) {
856	/* passed cred is fp->f_cred */
857		if (rw == UIO_READ)
858			error = mac_vnode_check_read(&context, cred, vp);
859		else
860			error = mac_vnode_check_write(&context, cred, vp);
861	}
862#endif
863
864	if (error == 0) {
865		if (rw == UIO_READ) {
866			if (vnode_isswap(vp)) {
867				error = vn_read_swapfile(vp, auio);
868			} else {
869				error = VNOP_READ(vp, auio, ioflg, &context);
870			}
871		} else {
872			error = VNOP_WRITE(vp, auio, ioflg, &context);
873		}
874	}
875
876	if (aresid)
877		*aresid = uio_resid(auio);
878	else
879		if (uio_resid(auio) && error == 0)
880			error = EIO;
881	return (error);
882}
883
884/*
885 * File table vnode read routine.
886 */
887static int
888vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
889{
890	struct vnode *vp;
891	int error, ioflag;
892	off_t count;
893
894	vp = (struct vnode *)fp->f_fglob->fg_data;
895	if ( (error = vnode_getwithref(vp)) ) {
896		return(error);
897	}
898
899#if CONFIG_MACF
900	error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp);
901	if (error) {
902		(void)vnode_put(vp);
903		return (error);
904	}
905#endif
906
907	/* This signals to VNOP handlers that this read came from a file table read */
908	ioflag = IO_SYSCALL_DISPATCH;
909
910	if (fp->f_fglob->fg_flag & FNONBLOCK)
911		ioflag |= IO_NDELAY;
912	if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
913	    ioflag |= IO_NOCACHE;
914	if (fp->f_fglob->fg_flag & FENCRYPTED) {
915		ioflag |= IO_ENCRYPTED;
916	}
917	if (fp->f_fglob->fg_flag & FNORDAHEAD)
918	    ioflag |= IO_RAOFF;
919
920	if ((flags & FOF_OFFSET) == 0)
921		uio->uio_offset = fp->f_fglob->fg_offset;
922	count = uio_resid(uio);
923
924	if (vnode_isswap(vp)) {
925		/* special case for swap files */
926		error = vn_read_swapfile(vp, uio);
927	} else {
928		error = VNOP_READ(vp, uio, ioflag, ctx);
929	}
930	if ((flags & FOF_OFFSET) == 0)
931		fp->f_fglob->fg_offset += count - uio_resid(uio);
932
933	(void)vnode_put(vp);
934	return (error);
935}
936
937
938/*
939 * File table vnode write routine.
940 */
941static int
942vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
943{
944	struct vnode *vp;
945	int error, ioflag;
946	off_t count;
947	int clippedsize = 0;
948	int partialwrite=0;
949	int residcount, oldcount;
950	proc_t p = vfs_context_proc(ctx);
951
952	count = 0;
953	vp = (struct vnode *)fp->f_fglob->fg_data;
954	if ( (error = vnode_getwithref(vp)) ) {
955		return(error);
956	}
957
958#if CONFIG_MACF
959	error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp);
960	if (error) {
961		(void)vnode_put(vp);
962		return (error);
963	}
964#endif
965
966	/*
967	 * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write originated
968	 * from a file table write.
969	 */
970	ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH);
971
972	if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
973		ioflag |= IO_APPEND;
974	if (fp->f_fglob->fg_flag & FNONBLOCK)
975		ioflag |= IO_NDELAY;
976	if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
977	        ioflag |= IO_NOCACHE;
978	if (fp->f_fglob->fg_flag & FNODIRECT)
979		ioflag |= IO_NODIRECT;
980	if (fp->f_fglob->fg_flag & FSINGLE_WRITER)
981		ioflag |= IO_SINGLE_WRITER;
982
983	/*
984	 * Treat synchronous mounts and O_FSYNC on the fd as equivalent.
985	 *
986	 * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay
987	 * XXX the non-essential metadata without some additional VFS work;
988	 * XXX the intent at this point is to plumb the interface for it.
989	 */
990	if ((fp->f_fglob->fg_flag & (O_FSYNC|O_DSYNC)) ||
991		(vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) {
992		ioflag |= IO_SYNC;
993	}
994
995	if ((flags & FOF_OFFSET) == 0) {
996		uio->uio_offset = fp->f_fglob->fg_offset;
997		count = uio_resid(uio);
998	}
999	if (((flags & FOF_OFFSET) == 0) &&
1000	 	vfs_context_proc(ctx) && (vp->v_type == VREG) &&
1001            (((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) ||
1002             ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) {
1003	     	/*
1004		 * If the requested residual would cause us to go past the
1005		 * administrative limit, then we need to adjust the residual
1006		 * down to cause fewer bytes than requested to be written.  If
1007		 * we can't do that (e.g. the residual is already 1 byte),
1008		 * then we fail the write with EFBIG.
1009		 */
1010		residcount = uio_resid(uio);
1011            	if ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
1012			clippedsize =  (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
1013		} else if ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) {
1014			clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset);
1015		}
1016		if (clippedsize >= residcount) {
1017			psignal(p, SIGXFSZ);
1018			vnode_put(vp);
1019			return (EFBIG);
1020		}
1021		partialwrite = 1;
1022		uio_setresid(uio, residcount-clippedsize);
1023	}
1024	if ((flags & FOF_OFFSET) != 0) {
1025		/* for pwrite, append should  be ignored */
1026		ioflag &= ~IO_APPEND;
1027		if (p && (vp->v_type == VREG) &&
1028            	((rlim_t)uio->uio_offset  >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
1029		psignal(p, SIGXFSZ);
1030		vnode_put(vp);
1031		return (EFBIG);
1032	}
1033		if (p && (vp->v_type == VREG) &&
1034			((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
1035			//Debugger("vn_bwrite:overstepping the bounds");
1036			residcount = uio_resid(uio);
1037			clippedsize =  (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
1038			partialwrite = 1;
1039			uio_setresid(uio, residcount-clippedsize);
1040		}
1041	}
1042
1043	error = VNOP_WRITE(vp, uio, ioflag, ctx);
1044
1045	if (partialwrite) {
1046		oldcount = uio_resid(uio);
1047		uio_setresid(uio, oldcount + clippedsize);
1048	}
1049
1050	if ((flags & FOF_OFFSET) == 0) {
1051		if (ioflag & IO_APPEND)
1052			fp->f_fglob->fg_offset = uio->uio_offset;
1053		else
1054			fp->f_fglob->fg_offset += count - uio_resid(uio);
1055	}
1056
1057	/*
1058	 * Set the credentials on successful writes
1059	 */
1060	if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) {
1061		/*
1062		 * When called from aio subsystem, we only have the proc from
1063		 * which to get the credential, at this point, so use that
1064		 * instead.  This means aio functions are incompatible with
1065		 * per-thread credentials (aio operations are proxied).  We
1066		 * can't easily correct the aio vs. settid race in this case
1067		 * anyway, so we disallow it.
1068		 */
1069		if ((flags & FOF_PCRED) == 0) {
1070			ubc_setthreadcred(vp, p, current_thread());
1071		} else {
1072			ubc_setcred(vp, p);
1073		}
1074	}
1075	(void)vnode_put(vp);
1076	return (error);
1077}
1078
1079/*
1080 * File table vnode stat routine.
1081 *
1082 * Returns:	0			Success
1083 *		EBADF
1084 *		ENOMEM
1085 *	vnode_getattr:???
1086 */
1087int
1088vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1089{
1090	struct vnode_attr va;
1091	int error;
1092	u_short mode;
1093	kauth_filesec_t fsec;
1094	struct stat *sb = (struct stat *)0;	/* warning avoidance ; protected by isstat64 */
1095	struct stat64 * sb64 = (struct stat64 *)0;  /* warning avoidance ; protected by isstat64 */
1096
1097	if (isstat64 != 0)
1098		sb64 = (struct stat64 *)sbptr;
1099	else
1100		sb = (struct stat *)sbptr;
1101	memset(&va, 0, sizeof(va));
1102	VATTR_INIT(&va);
1103	VATTR_WANTED(&va, va_fsid);
1104	VATTR_WANTED(&va, va_fileid);
1105	VATTR_WANTED(&va, va_mode);
1106	VATTR_WANTED(&va, va_type);
1107	VATTR_WANTED(&va, va_nlink);
1108	VATTR_WANTED(&va, va_uid);
1109	VATTR_WANTED(&va, va_gid);
1110	VATTR_WANTED(&va, va_rdev);
1111	VATTR_WANTED(&va, va_data_size);
1112	VATTR_WANTED(&va, va_access_time);
1113	VATTR_WANTED(&va, va_modify_time);
1114	VATTR_WANTED(&va, va_change_time);
1115	VATTR_WANTED(&va, va_create_time);
1116	VATTR_WANTED(&va, va_flags);
1117	VATTR_WANTED(&va, va_gen);
1118	VATTR_WANTED(&va, va_iosize);
1119	/* lower layers will synthesise va_total_alloc from va_data_size if required */
1120	VATTR_WANTED(&va, va_total_alloc);
1121	if (xsec != NULL) {
1122		VATTR_WANTED(&va, va_uuuid);
1123		VATTR_WANTED(&va, va_guuid);
1124		VATTR_WANTED(&va, va_acl);
1125	}
1126	error = vnode_getattr(vp, &va, ctx);
1127	if (error)
1128		goto out;
1129	/*
1130	 * Copy from vattr table
1131	 */
1132	if (isstat64 != 0) {
1133		sb64->st_dev = va.va_fsid;
1134		sb64->st_ino = (ino64_t)va.va_fileid;
1135
1136	} else {
1137		sb->st_dev = va.va_fsid;
1138		sb->st_ino = (ino_t)va.va_fileid;
1139	}
1140	mode = va.va_mode;
1141	switch (vp->v_type) {
1142	case VREG:
1143		mode |= S_IFREG;
1144		break;
1145	case VDIR:
1146		mode |= S_IFDIR;
1147		break;
1148	case VBLK:
1149		mode |= S_IFBLK;
1150		break;
1151	case VCHR:
1152		mode |= S_IFCHR;
1153		break;
1154	case VLNK:
1155		mode |= S_IFLNK;
1156		break;
1157	case VSOCK:
1158		mode |= S_IFSOCK;
1159		break;
1160	case VFIFO:
1161		mode |= S_IFIFO;
1162		break;
1163	default:
1164		error = EBADF;
1165		goto out;
1166	};
1167	if (isstat64 != 0) {
1168		sb64->st_mode = mode;
1169		sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
1170		sb64->st_uid = va.va_uid;
1171		sb64->st_gid = va.va_gid;
1172		sb64->st_rdev = va.va_rdev;
1173		sb64->st_size = va.va_data_size;
1174		sb64->st_atimespec = va.va_access_time;
1175		sb64->st_mtimespec = va.va_modify_time;
1176		sb64->st_ctimespec = va.va_change_time;
1177		sb64->st_birthtimespec =
1178				VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time;
1179		sb64->st_blksize = va.va_iosize;
1180		sb64->st_flags = va.va_flags;
1181		sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512;
1182	} else {
1183		sb->st_mode = mode;
1184		sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
1185		sb->st_uid = va.va_uid;
1186		sb->st_gid = va.va_gid;
1187		sb->st_rdev = va.va_rdev;
1188		sb->st_size = va.va_data_size;
1189		sb->st_atimespec = va.va_access_time;
1190		sb->st_mtimespec = va.va_modify_time;
1191		sb->st_ctimespec = va.va_change_time;
1192		sb->st_blksize = va.va_iosize;
1193		sb->st_flags = va.va_flags;
1194		sb->st_blocks = roundup(va.va_total_alloc, 512) / 512;
1195	}
1196
1197	/* if we're interested in extended security data and we got an ACL */
1198	if (xsec != NULL) {
1199		if (!VATTR_IS_SUPPORTED(&va, va_acl) &&
1200		    !VATTR_IS_SUPPORTED(&va, va_uuuid) &&
1201		    !VATTR_IS_SUPPORTED(&va, va_guuid)) {
1202			*xsec = KAUTH_FILESEC_NONE;
1203		} else {
1204
1205			if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
1206				fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount);
1207			} else {
1208				fsec = kauth_filesec_alloc(0);
1209			}
1210			if (fsec == NULL) {
1211				error = ENOMEM;
1212				goto out;
1213			}
1214			fsec->fsec_magic = KAUTH_FILESEC_MAGIC;
1215			if (VATTR_IS_SUPPORTED(&va, va_uuuid)) {
1216				fsec->fsec_owner = va.va_uuuid;
1217			} else {
1218				fsec->fsec_owner = kauth_null_guid;
1219			}
1220			if (VATTR_IS_SUPPORTED(&va, va_guuid)) {
1221				fsec->fsec_group = va.va_guuid;
1222			} else {
1223				fsec->fsec_group = kauth_null_guid;
1224			}
1225			if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
1226				bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl));
1227			} else {
1228				fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL;
1229			}
1230			*xsec = fsec;
1231		}
1232	}
1233
1234	/* Do not give the generation number out to unpriviledged users */
1235	if (va.va_gen && !vfs_context_issuser(ctx)) {
1236		if (isstat64 != 0)
1237			sb64->st_gen = 0;
1238		else
1239			sb->st_gen = 0;
1240	} else {
1241		if (isstat64 != 0)
1242			sb64->st_gen = va.va_gen;
1243		else
1244			sb->st_gen = va.va_gen;
1245	}
1246
1247	error = 0;
1248out:
1249	if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL)
1250		kauth_acl_free(va.va_acl);
1251	return (error);
1252}
1253
1254int
1255vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1256{
1257	int error;
1258
1259#if CONFIG_MACF
1260	error = mac_vnode_check_stat(ctx, NOCRED, vp);
1261	if (error)
1262		return (error);
1263#endif
1264
1265	/* authorize */
1266	if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0)
1267		return(error);
1268
1269	/* actual stat */
1270	return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx));
1271}
1272
1273
1274/*
1275 * File table vnode ioctl routine.
1276 */
1277static int
1278vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
1279{
1280	struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data);
1281	off_t file_size;
1282	int error;
1283	struct vnode *ttyvp;
1284	int funnel_state;
1285	struct session * sessp;
1286
1287	if ( (error = vnode_getwithref(vp)) ) {
1288		return(error);
1289	}
1290
1291#if CONFIG_MACF
1292	error = mac_vnode_check_ioctl(ctx, vp, com);
1293	if (error)
1294		goto out;
1295#endif
1296
1297	switch (vp->v_type) {
1298	case VREG:
1299	case VDIR:
1300		if (com == FIONREAD) {
1301			if ((error = vnode_size(vp, &file_size, ctx)) != 0)
1302				goto out;
1303			*(int *)data = file_size - fp->f_fglob->fg_offset;
1304			goto out;
1305		}
1306		if (com == FIONBIO || com == FIOASYNC) {	/* XXX */
1307			goto out;
1308		}
1309		/* fall into ... */
1310
1311	default:
1312		error = ENOTTY;
1313		goto out;
1314
1315	case VFIFO:
1316	case VCHR:
1317	case VBLK:
1318
1319		/* Should not be able to set block size from user space */
1320		if (com == DKIOCSETBLOCKSIZE) {
1321			error = EPERM;
1322			goto out;
1323		}
1324
1325		if (com == FIODTYPE) {
1326			if (vp->v_type == VBLK) {
1327				if (major(vp->v_rdev) >= nblkdev) {
1328					error = ENXIO;
1329					goto out;
1330				}
1331				*(int *)data = D_TYPEMASK & bdevsw[major(vp->v_rdev)].d_type;
1332
1333			} else if (vp->v_type == VCHR) {
1334				if (major(vp->v_rdev) >= nchrdev) {
1335					error = ENXIO;
1336					goto out;
1337				}
1338				*(int *)data = D_TYPEMASK & cdevsw[major(vp->v_rdev)].d_type;
1339			} else {
1340				error = ENOTTY;
1341				goto out;
1342			}
1343			goto out;
1344		}
1345		error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx);
1346
1347		if (error == 0 && com == TIOCSCTTY) {
1348			error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
1349			if (error != 0) {
1350				panic("vnode_ref_ext() failed despite VNODE_REF_FORCE?!");
1351			}
1352
1353			funnel_state = thread_funnel_set(kernel_flock, TRUE);
1354			sessp = proc_session(vfs_context_proc(ctx));
1355
1356			session_lock(sessp);
1357			ttyvp = sessp->s_ttyvp;
1358			sessp->s_ttyvp = vp;
1359			sessp->s_ttyvid = vnode_vid(vp);
1360			session_unlock(sessp);
1361			session_rele(sessp);
1362			thread_funnel_set(kernel_flock, funnel_state);
1363
1364			if (ttyvp)
1365				vnode_rele(ttyvp);
1366		}
1367	}
1368out:
1369	(void)vnode_put(vp);
1370	return(error);
1371}
1372
1373/*
1374 * File table vnode select routine.
1375 */
1376static int
1377vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
1378{
1379	int error;
1380	struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data;
1381	struct vfs_context context;
1382
1383	if ( (error = vnode_getwithref(vp)) == 0 ) {
1384		context.vc_thread = current_thread();
1385		context.vc_ucred = fp->f_fglob->fg_cred;
1386
1387#if CONFIG_MACF
1388		/*
1389		 * XXX We should use a per thread credential here; minimally,
1390		 * XXX the process credential should have a persistent
1391		 * XXX reference on it before being passed in here.
1392		 */
1393		error = mac_vnode_check_select(ctx, vp, which);
1394		if (error == 0)
1395#endif
1396	        error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx);
1397
1398		(void)vnode_put(vp);
1399	}
1400	return(error);
1401
1402}
1403
1404/*
1405 * File table vnode close routine.
1406 */
1407static int
1408vn_closefile(struct fileglob *fg, vfs_context_t ctx)
1409{
1410	struct vnode *vp = (struct vnode *)fg->fg_data;
1411	int error;
1412	struct flock lf;
1413
1414	if ( (error = vnode_getwithref(vp)) == 0 ) {
1415
1416		if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) {
1417			lf.l_whence = SEEK_SET;
1418			lf.l_start = 0;
1419			lf.l_len = 0;
1420			lf.l_type = F_UNLCK;
1421
1422			(void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx);
1423		}
1424	        error = vn_close(vp, fg->fg_flag, ctx);
1425
1426		(void)vnode_put(vp);
1427	}
1428	return(error);
1429}
1430
1431/*
1432 * Returns:	0			Success
1433 *	VNOP_PATHCONF:???
1434 */
1435int
1436vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx)
1437{
1438	int	error = 0;
1439	struct vfs_attr vfa;
1440
1441	switch(name) {
1442	case _PC_EXTENDED_SECURITY_NP:
1443		*retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0;
1444		break;
1445	case _PC_AUTH_OPAQUE_NP:
1446		*retval = vfs_authopaque(vnode_mount(vp));
1447		break;
1448	case _PC_2_SYMLINKS:
1449		*retval = 1;	/* XXX NOTSUP on MSDOS, etc. */
1450		break;
1451	case _PC_ALLOC_SIZE_MIN:
1452		*retval = 1;	/* XXX lie: 1 byte */
1453		break;
1454	case _PC_ASYNC_IO:	/* unistd.h: _POSIX_ASYNCHRONUS_IO */
1455		*retval = 1;	/* [AIO] option is supported */
1456		break;
1457	case _PC_PRIO_IO:	/* unistd.h: _POSIX_PRIORITIZED_IO */
1458		*retval = 0;	/* [PIO] option is not supported */
1459		break;
1460	case _PC_REC_INCR_XFER_SIZE:
1461		*retval = 4096;	/* XXX go from MIN to MAX 4K at a time */
1462		break;
1463	case _PC_REC_MIN_XFER_SIZE:
1464		*retval = 4096;	/* XXX recommend 4K minimum reads/writes */
1465		break;
1466	case _PC_REC_MAX_XFER_SIZE:
1467		*retval = 65536; /* XXX recommend 64K maximum reads/writes */
1468		break;
1469	case _PC_REC_XFER_ALIGN:
1470		*retval = 4096;	/* XXX recommend page aligned buffers */
1471		break;
1472	case _PC_SYMLINK_MAX:
1473		*retval = 255;	/* Minimum acceptable POSIX value */
1474		break;
1475	case _PC_SYNC_IO:	/* unistd.h: _POSIX_SYNCHRONIZED_IO */
1476		*retval = 0;	/* [SIO] option is not supported */
1477		break;
1478	case _PC_XATTR_SIZE_BITS:
1479		/* The number of bits used to store maximum extended
1480		 * attribute size in bytes.  For example, if the maximum
1481		 * attribute size supported by a file system is 128K, the
1482		 * value returned will be 18.  However a value 18 can mean
1483		 * that the maximum attribute size can be anywhere from
1484		 * (256KB - 1) to 128KB.  As a special case, the resource
1485		 * fork can have much larger size, and some file system
1486		 * specific extended attributes can have smaller and preset
1487		 * size; for example, Finder Info is always 32 bytes.
1488		 */
1489		memset(&vfa, 0, sizeof(vfa));
1490		VFSATTR_INIT(&vfa);
1491		VFSATTR_WANTED(&vfa, f_capabilities);
1492		if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 &&
1493		    (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) &&
1494		    (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1495		    (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1496			/* Supports native extended attributes */
1497			error = VNOP_PATHCONF(vp, name, retval, ctx);
1498		} else {
1499			/* Number of bits used to represent the maximum size of
1500			 * extended attribute stored in an Apple Double file.
1501			 */
1502			*retval = AD_XATTR_SIZE_BITS;
1503		}
1504		break;
1505	default:
1506		error = VNOP_PATHCONF(vp, name, retval, ctx);
1507		break;
1508	}
1509
1510	return (error);
1511}
1512
1513static int
1514vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
1515{
1516	int error;
1517	struct vnode *vp;
1518
1519	vp = (struct vnode *)fp->f_fglob->fg_data;
1520
1521	/*
1522	 * Don't attach a knote to a dead vnode.
1523	 */
1524	if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) {
1525		switch (kn->kn_filter) {
1526			case EVFILT_READ:
1527			case EVFILT_WRITE:
1528				if (vnode_isfifo(vp)) {
1529					/* We'll only watch FIFOs that use our fifofs */
1530					if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) {
1531						error = ENOTSUP;
1532					}
1533
1534				} else if (!vnode_isreg(vp)) {
1535					if (vnode_ischr(vp) &&
1536							(error = spec_kqfilter(vp, kn)) == 0) {
1537						/* claimed by a special device */
1538						vnode_put(vp);
1539						return 0;
1540					}
1541
1542					error = EINVAL;
1543				}
1544				break;
1545			case EVFILT_VNODE:
1546				break;
1547			default:
1548				error = EINVAL;
1549		}
1550
1551		if (error) {
1552			vnode_put(vp);
1553			return error;
1554		}
1555
1556#if CONFIG_MACF
1557		error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp);
1558		if (error) {
1559			vnode_put(vp);
1560			return error;
1561		}
1562#endif
1563
1564		kn->kn_hook = (void*)vp;
1565		kn->kn_hookid = vnode_vid(vp);
1566		kn->kn_fop = &vnode_filtops;
1567
1568		vnode_lock(vp);
1569		KNOTE_ATTACH(&vp->v_knotes, kn);
1570		vnode_unlock(vp);
1571
1572		/* Ask the filesystem to provide remove notifications, but ignore failure */
1573		VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn,  ctx);
1574
1575		vnode_put(vp);
1576	}
1577
1578	return (error);
1579}
1580
1581static void
1582filt_vndetach(struct knote *kn)
1583{
1584	vfs_context_t ctx = vfs_context_current();
1585	struct vnode *vp;
1586	vp = (struct vnode *)kn->kn_hook;
1587	if (vnode_getwithvid(vp, kn->kn_hookid))
1588		return;
1589
1590	vnode_lock(vp);
1591	KNOTE_DETACH(&vp->v_knotes, kn);
1592	vnode_unlock(vp);
1593
1594	/*
1595	 * Tell a (generally networked) filesystem that we're no longer watching
1596	 * If the FS wants to track contexts, it should still be using the one from
1597	 * the VNODE_MONITOR_BEGIN.
1598	 */
1599	VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx);
1600	vnode_put(vp);
1601}
1602
1603
1604/*
1605 * Used for EVFILT_READ
1606 *
1607 * Takes only VFIFO or VREG. vnode is locked.  We handle the "poll" case
1608 * differently than the regular case for VREG files.  If not in poll(),
1609 * then we need to know current fileproc offset for VREG.
1610 */
1611static intptr_t
1612vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll)
1613{
1614	if (vnode_isfifo(vp)) {
1615		int cnt;
1616		int err = fifo_charcount(vp, &cnt);
1617		if (err == 0) {
1618			return (intptr_t)cnt;
1619		} else {
1620			return (intptr_t)0;
1621		}
1622	} else if (vnode_isreg(vp)) {
1623		if (ispoll) {
1624			return (intptr_t)1;
1625		}
1626
1627		off_t amount;
1628		amount = vp->v_un.vu_ubcinfo->ui_size - current_offset;
1629		if (amount > (off_t)INTPTR_MAX) {
1630			return INTPTR_MAX;
1631		} else if (amount < (off_t)INTPTR_MIN) {
1632			return INTPTR_MIN;
1633		} else {
1634			return (intptr_t)amount;
1635		}
1636	} else {
1637		panic("Should never have an EVFILT_READ except for reg or fifo.");
1638		return 0;
1639	}
1640}
1641
1642/*
1643 * Used for EVFILT_WRITE.
1644 *
1645 * For regular vnodes, we can always write (1).  For named pipes,
1646 * see how much space there is in the buffer.  Nothing else is covered.
1647 */
1648static intptr_t
1649vnode_writable_space_count(vnode_t vp)
1650{
1651	if (vnode_isfifo(vp)) {
1652		long spc;
1653		int err = fifo_freespace(vp, &spc);
1654		if (err == 0) {
1655			return (intptr_t)spc;
1656		} else {
1657			return (intptr_t)0;
1658		}
1659	} else if (vnode_isreg(vp)) {
1660		return (intptr_t)1;
1661	} else {
1662		panic("Should never have an EVFILT_READ except for reg or fifo.");
1663		return 0;
1664	}
1665}
1666
1667/*
1668 * Determine whether this knote should be active
1669 *
1670 * This is kind of subtle.
1671 * 	--First, notice if the vnode has been revoked: in so, override hint
1672 * 	--EVFILT_READ knotes are checked no matter what the hint is
1673 * 	--Other knotes activate based on hint.
1674 * 	--If hint is revoke, set special flags and activate
1675 */
1676static int
1677filt_vnode(struct knote *kn, long hint)
1678{
1679	vnode_t vp = (struct vnode *)kn->kn_hook;
1680	int activate = 0;
1681	long orig_hint = hint;
1682
1683	if (0 == hint) {
1684		vnode_lock(vp);
1685
1686		if (vnode_getiocount(vp, kn->kn_hookid, VNODE_NODEAD | VNODE_WITHID) != 0) {
1687			/* Is recycled */
1688			hint = NOTE_REVOKE;
1689		}
1690	} else {
1691		lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
1692	}
1693
1694	/* Special handling for vnodes that are in recycle or already gone */
1695	if (NOTE_REVOKE == hint) {
1696		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1697		activate = 1;
1698
1699		if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) {
1700			kn->kn_fflags |= NOTE_REVOKE;
1701		}
1702	} else {
1703		switch(kn->kn_filter) {
1704			case EVFILT_READ:
1705				kn->kn_data = vnode_readable_data_count(vp, kn->kn_fp->f_fglob->fg_offset, (kn->kn_flags & EV_POLL));
1706
1707				if (kn->kn_data != 0) {
1708					activate = 1;
1709				}
1710				break;
1711			case EVFILT_WRITE:
1712				kn->kn_data = vnode_writable_space_count(vp);
1713
1714				if (kn->kn_data != 0) {
1715					activate = 1;
1716				}
1717				break;
1718			case EVFILT_VNODE:
1719				/* Check events this note matches against the hint */
1720				if (kn->kn_sfflags & hint) {
1721					kn->kn_fflags |= hint; /* Set which event occurred */
1722				}
1723				if (kn->kn_fflags != 0) {
1724					activate = 1;
1725				}
1726				break;
1727			default:
1728				panic("Invalid knote filter on a vnode!\n");
1729		}
1730	}
1731
1732	if (orig_hint == 0) {
1733		/*
1734		 * Definitely need to unlock, may need to put
1735		 */
1736		if (hint == 0) {
1737			vnode_put_locked(vp);
1738		}
1739		vnode_unlock(vp);
1740	}
1741
1742	return (activate);
1743}
1744