1/*
2 * Copyright (c) 1995-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1989, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed by the University of
48 *	California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 *    may be used to endorse or promote products derived from this software
51 *    without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 *	@(#)vfs_syscalls.c	8.41 (Berkeley) 6/15/95
66 */
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections.  This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
79#include <sys/file_internal.h>
80#include <sys/stat.h>
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
86#include <sys/malloc.h>
87#include <sys/mman.h>
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
92#include <sys/quota.h>
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
95#include <sys/sysproto.h>
96#include <sys/xattr.h>
97#include <sys/ubc_internal.h>
98#include <sys/disk.h>
99#include <machine/cons.h>
100#include <machine/limits.h>
101#include <miscfs/specfs/specdev.h>
102#include <miscfs/union/union.h>
103
104#include <bsm/audit_kernel.h>
105#include <bsm/audit_kevents.h>
106
107#include <mach/mach_types.h>
108#include <kern/kern_types.h>
109#include <kern/kalloc.h>
110
111#include <vm/vm_pageout.h>
112
113#include <libkern/OSAtomic.h>
114
115#if CONFIG_MACF
116#include <security/mac.h>
117#include <security/mac_framework.h>
118#endif
119
120#if CONFIG_FSE
121#define GET_PATH(x) \
122	(x) = get_pathbuff();
123#define RELEASE_PATH(x) \
124	release_pathbuff(x);
125#else
126#define GET_PATH(x)	\
127	MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
128#define RELEASE_PATH(x) \
129	FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
130#endif /* CONFIG_FSE */
131
132/* struct for checkdirs iteration */
133struct cdirargs {
134	vnode_t olddp;
135	vnode_t newdp;
136};
137/* callback  for checkdirs iteration */
138static int checkdirs_callback(proc_t p, void * arg);
139
140static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
141static int checkdirs(vnode_t olddp, vfs_context_t ctx);
142void enablequotas(struct mount *mp, vfs_context_t ctx);
143static int getfsstat_callback(mount_t mp, void * arg);
144static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
145static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
146static int sync_callback(mount_t, void *);
147static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
148			user_addr_t bufp, int *sizep, boolean_t is_64_bit,
149						boolean_t partial_copy);
150static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp);
151int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
152
153__private_extern__
154int sync_internal(void);
155
156__private_extern__
157int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, register_t *);
158
159__private_extern__
160int unlink1(vfs_context_t, struct nameidata *, int);
161
162
163#ifdef __APPLE_API_OBSOLETE
164struct fstatv_args {
165       int fd;			/* file descriptor of the target file */
166       struct vstat *vsb;	/* vstat structure for returned info  */
167};
168struct lstatv_args {
169       const char *path;	/* pathname of the target file       */
170       struct vstat *vsb;	/* vstat structure for returned info */
171};
172struct mkcomplex_args {
173        const char *path;	/* pathname of the file to be created */
174		mode_t mode;		/* access mode for the newly created file */
175        u_long type;		/* format of the complex file */
176};
177struct statv_args {
178        const char *path;	/* pathname of the target file       */
179        struct vstat *vsb;	/* vstat structure for returned info */
180};
181
182int fstatv(proc_t p, struct fstatv_args *uap, register_t *retval);
183int lstatv(proc_t p, struct lstatv_args *uap, register_t *retval);
184int mkcomplex(proc_t p, struct mkcomplex_args *uap, register_t *retval);
185int statv(proc_t p, struct statv_args *uap, register_t *retval);
186
187#endif /* __APPLE_API_OBSOLETE */
188
189/*
190 * incremented each time a mount or unmount operation occurs
191 * used to invalidate the cached value of the rootvp in the
192 * mount structure utilized by cache_lookup_path
193 */
194int mount_generation = 0;
195
196/* counts number of mount and unmount operations */
197unsigned int vfs_nummntops=0;
198
199extern struct fileops vnops;
200extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
201
202
203/*
204 * Virtual File System System Calls
205 */
206
207/*
208 * Mount a file system.
209 */
210/* ARGSUSED */
211int
212mount(proc_t p, struct mount_args *uap, __unused register_t *retval)
213{
214	struct __mac_mount_args muap;
215
216	muap.type = uap->type;
217	muap.path = uap->path;
218	muap.flags = uap->flags;
219	muap.data = uap->data;
220	muap.mac_p = USER_ADDR_NULL;
221	return (__mac_mount(p, &muap, retval));
222}
223
224int
225__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused register_t *retval)
226{
227	struct vnode *vp;
228	struct vnode *devvp = NULLVP;
229	struct vnode *device_vnode = NULLVP;
230#if CONFIG_MACF
231	struct vnode *rvp;
232#endif
233	struct mount *mp;
234	struct vfstable *vfsp = (struct vfstable *)0;
235	int error, flag = 0;
236	struct vnode_attr va;
237	vfs_context_t ctx = vfs_context_current();
238	struct nameidata nd;
239	struct nameidata nd1;
240	char fstypename[MFSNAMELEN];
241	size_t dummy=0;
242	user_addr_t devpath = USER_ADDR_NULL;
243	user_addr_t fsmountargs =  uap->data;
244	int ronly = 0;
245	int mntalloc = 0;
246	mode_t accessmode;
247	boolean_t is_64bit;
248	boolean_t is_rwlock_locked = FALSE;
249
250	AUDIT_ARG(fflags, uap->flags);
251
252	is_64bit = proc_is64bit(p);
253
254	/*
255	 * Get vnode to be covered
256	 */
257	NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
258		   UIO_USERSPACE, uap->path, ctx);
259	error = namei(&nd);
260	if (error)
261		return (error);
262	vp = nd.ni_vp;
263
264	if ((vp->v_flag & VROOT) &&
265		(vp->v_mount->mnt_flag & MNT_ROOTFS))
266			uap->flags |= MNT_UPDATE;
267
268	error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
269	if (error)
270		goto out1;
271
272	if (uap->flags & MNT_UPDATE) {
273		if ((vp->v_flag & VROOT) == 0) {
274			error = EINVAL;
275			goto out1;
276		}
277		mp = vp->v_mount;
278
279		/* unmount in progress return error */
280		mount_lock(mp);
281		if (mp->mnt_lflag & MNT_LUNMOUNT) {
282			mount_unlock(mp);
283			error = EBUSY;
284			goto out1;
285		}
286		mount_unlock(mp);
287		lck_rw_lock_exclusive(&mp->mnt_rwlock);
288		is_rwlock_locked = TRUE;
289		/*
290		 * We only allow the filesystem to be reloaded if it
291		 * is currently mounted read-only.
292		 */
293		if ((uap->flags & MNT_RELOAD) &&
294		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
295			error = ENOTSUP;
296			goto out1;
297		}
298		/*
299		 * Only root, or the user that did the original mount is
300		 * permitted to update it.
301		 */
302		if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
303		    (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
304			goto out1;
305		}
306#if CONFIG_MACF
307		error = mac_mount_check_remount(ctx, mp);
308		if (error != 0) {
309			lck_rw_done(&mp->mnt_rwlock);
310			goto out1;
311		}
312#endif
313		/*
314		 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
315		 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
316		 */
317		if (suser(vfs_context_ucred(ctx), NULL)) {
318			uap->flags |= MNT_NOSUID | MNT_NODEV;
319			if (mp->mnt_flag & MNT_NOEXEC)
320				uap->flags |= MNT_NOEXEC;
321		}
322		flag = mp->mnt_flag;
323
324		mp->mnt_flag |=
325		    uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
326
327		vfsp = mp->mnt_vtable;
328		goto update;
329	}
330	/*
331	 * If the user is not root, ensure that they own the directory
332	 * onto which we are attempting to mount.
333	 */
334	VATTR_INIT(&va);
335	VATTR_WANTED(&va, va_uid);
336	if ((error = vnode_getattr(vp, &va, ctx)) ||
337	    (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
338	     (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))) {
339		goto out1;
340	}
341	/*
342	 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
343	 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
344	 */
345	if (suser(vfs_context_ucred(ctx), NULL)) {
346		uap->flags |= MNT_NOSUID | MNT_NODEV;
347		if (vp->v_mount->mnt_flag & MNT_NOEXEC)
348			uap->flags |= MNT_NOEXEC;
349	}
350	if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
351		goto out1;
352
353	if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
354		goto out1;
355
356	if (vp->v_type != VDIR) {
357		error = ENOTDIR;
358		goto out1;
359	}
360
361	/* XXXAUDIT: Should we capture the type on the error path as well? */
362	AUDIT_ARG(text, fstypename);
363	mount_list_lock();
364	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
365		if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN))
366			break;
367	mount_list_unlock();
368	if (vfsp == NULL) {
369		error = ENODEV;
370		goto out1;
371	}
372#if CONFIG_MACF
373	error = mac_mount_check_mount(ctx, vp,
374	    &nd.ni_cnd, vfsp->vfc_name);
375	if (error != 0)
376		goto out1;
377#endif
378	if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
379		error = EBUSY;
380		goto out1;
381	}
382	vnode_lock_spin(vp);
383	SET(vp->v_flag, VMOUNT);
384	vnode_unlock(vp);
385
386	/*
387	 * Allocate and initialize the filesystem.
388	 */
389	MALLOC_ZONE(mp, struct mount *, (u_long)sizeof(struct mount),
390		M_MOUNT, M_WAITOK);
391	bzero((char *)mp, (u_long)sizeof(struct mount));
392	mntalloc = 1;
393
394	/* Initialize the default IO constraints */
395	mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
396	mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
397	mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
398	mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
399	mp->mnt_devblocksize = DEV_BSIZE;
400	mp->mnt_alignmentmask = PAGE_MASK;
401	mp->mnt_ioflags = 0;
402	mp->mnt_realrootvp = NULLVP;
403	mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
404
405	TAILQ_INIT(&mp->mnt_vnodelist);
406	TAILQ_INIT(&mp->mnt_workerqueue);
407	TAILQ_INIT(&mp->mnt_newvnodes);
408	mount_lock_init(mp);
409	lck_rw_lock_exclusive(&mp->mnt_rwlock);
410	is_rwlock_locked = TRUE;
411	mp->mnt_op = vfsp->vfc_vfsops;
412	mp->mnt_vtable = vfsp;
413	mount_list_lock();
414	vfsp->vfc_refcount++;
415	mount_list_unlock();
416	//mp->mnt_stat.f_type = vfsp->vfc_typenum;
417	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
418	strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
419	strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
420	mp->mnt_vnodecovered = vp;
421	mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
422	mp->mnt_devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
423
424	/* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
425	vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
426
427update:
428	/*
429	 * Set the mount level flags.
430	 */
431	if (uap->flags & MNT_RDONLY)
432		mp->mnt_flag |= MNT_RDONLY;
433	else if (mp->mnt_flag & MNT_RDONLY)
434		mp->mnt_kern_flag |= MNTK_WANTRDWR;
435	mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
436			  MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
437			  MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
438			  MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
439	mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC |	MNT_NODEV |
440				      MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
441				      MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
442					  MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
443
444#if CONFIG_MACF
445	if (uap->flags & MNT_MULTILABEL) {
446		if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
447			error = EINVAL;
448			goto out1;
449		}
450		mp->mnt_flag |= MNT_MULTILABEL;
451	}
452#endif
453
454	if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
455		if (is_64bit) {
456			if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
457				goto out1;
458			fsmountargs += sizeof(devpath);
459		} else {
460			char *tmp;
461			if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
462				goto out1;
463			/* munge into LP64 addr */
464			devpath = CAST_USER_ADDR_T(tmp);
465			fsmountargs += sizeof(tmp);
466		}
467
468		/* if it is not update and device name needs to be parsed */
469		if ((devpath)) {
470			NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
471			if ( (error = namei(&nd1)) )
472				goto out1;
473
474			strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN);
475			devvp = nd1.ni_vp;
476
477			nameidone(&nd1);
478
479			if (devvp->v_type != VBLK) {
480				error = ENOTBLK;
481				goto out2;
482			}
483			if (major(devvp->v_rdev) >= nblkdev) {
484				error = ENXIO;
485				goto out2;
486			}
487			/*
488			* If mount by non-root, then verify that user has necessary
489			* permissions on the device.
490			*/
491			if (suser(vfs_context_ucred(ctx), NULL) != 0) {
492				accessmode = KAUTH_VNODE_READ_DATA;
493				if ((mp->mnt_flag & MNT_RDONLY) == 0)
494					accessmode |= KAUTH_VNODE_WRITE_DATA;
495				if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
496					goto out2;
497			}
498		}
499		if (devpath && ((uap->flags & MNT_UPDATE) == 0)) {
500			if ( (error = vnode_ref(devvp)) )
501				goto out2;
502			/*
503			* Disallow multiple mounts of the same device.
504			* Disallow mounting of a device that is currently in use
505			* (except for root, which might share swap device for miniroot).
506			* Flush out any old buffers remaining from a previous use.
507			*/
508			if ( (error = vfs_mountedon(devvp)) )
509				goto out3;
510
511			if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
512				error = EBUSY;
513				goto out3;
514			}
515			if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
516				error = ENOTBLK;
517				goto out3;
518			}
519			if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
520				goto out3;
521
522			ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
523#if CONFIG_MACF
524			error = mac_vnode_check_open(ctx,
525			    devvp,
526			    ronly ? FREAD : FREAD|FWRITE);
527			if (error)
528				goto out3;
529#endif /* MAC */
530			if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
531				goto out3;
532
533			mp->mnt_devvp = devvp;
534			device_vnode = devvp;
535		} else {
536			if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
537				/*
538				 * If upgrade to read-write by non-root, then verify
539				 * that user has necessary permissions on the device.
540				 */
541				device_vnode = mp->mnt_devvp;
542				if (device_vnode && suser(vfs_context_ucred(ctx), NULL)) {
543					if ((error = vnode_authorize(device_vnode, NULL,
544						 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0)
545						goto out2;
546				}
547			}
548			device_vnode = NULLVP;
549		}
550	}
551#if CONFIG_MACF
552	if ((uap->flags & MNT_UPDATE) == 0) {
553		mac_mount_label_init(mp);
554		mac_mount_label_associate(ctx, mp);
555	}
556	if (uap->mac_p != USER_ADDR_NULL) {
557		struct user_mac mac;
558		char *labelstr = NULL;
559		size_t ulen = 0;
560
561		if ((uap->flags & MNT_UPDATE) != 0) {
562			error = mac_mount_check_label_update(
563			    ctx, mp);
564			if (error != 0)
565				goto out3;
566		}
567		if (is_64bit) {
568			error = copyin(uap->mac_p, &mac, sizeof(mac));
569		} else {
570			struct mac mac32;
571			error = copyin(uap->mac_p, &mac32, sizeof(mac32));
572			mac.m_buflen = mac32.m_buflen;
573			mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
574		}
575		if (error != 0)
576			goto out3;
577		if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
578		    (mac.m_buflen < 2)) {
579			error = EINVAL;
580			goto out3;
581		}
582		MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
583		error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
584		if (error != 0) {
585			FREE(labelstr, M_MACTEMP);
586			goto out3;
587		}
588		AUDIT_ARG(mac_string, labelstr);
589		error = mac_mount_label_internalize(mp->mnt_mntlabel, labelstr);
590		FREE(labelstr, M_MACTEMP);
591		if (error != 0)
592			goto out3;
593	}
594#endif
595	if (device_vnode != NULL) {
596		VNOP_IOCTL(device_vnode, DKIOCGETBSDUNIT, (caddr_t)&mp->mnt_devbsdunit, 0, NULL);
597		mp->mnt_devbsdunit %= LOWPRI_MAX_NUM_DEV;
598	}
599
600	/*
601	 * Mount the filesystem.
602	 */
603	error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
604
605	if (uap->flags & MNT_UPDATE) {
606		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
607			mp->mnt_flag &= ~MNT_RDONLY;
608		mp->mnt_flag &=~
609		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
610		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
611		if (error)
612			mp->mnt_flag = flag;
613		vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
614		lck_rw_done(&mp->mnt_rwlock);
615		is_rwlock_locked = FALSE;
616		if (!error)
617			enablequotas(mp, ctx);
618		goto out2;
619	}
620	/*
621	 * Put the new filesystem on the mount list after root.
622	 */
623	if (error == 0) {
624		struct vfs_attr	vfsattr;
625#if CONFIG_MACF
626		if (vfs_flags(mp) & MNT_MULTILABEL) {
627			error = VFS_ROOT(mp, &rvp, ctx);
628			if (error) {
629				printf("%s() VFS_ROOT returned %d\n", __func__, error);
630				goto out3;
631			}
632
633			/* VFS_ROOT provides reference so needref = 0 */
634			error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
635			if (error)
636				goto out3;
637		}
638#endif	/* MAC */
639
640		vnode_lock_spin(vp);
641		CLR(vp->v_flag, VMOUNT);
642		vp->v_mountedhere = mp;
643		vnode_unlock(vp);
644
645		/*
646		 * taking the name_cache_lock exclusively will
647		 * insure that everyone is out of the fast path who
648		 * might be trying to use a now stale copy of
649		 * vp->v_mountedhere->mnt_realrootvp
650		 * bumping mount_generation causes the cached values
651		 * to be invalidated
652		 */
653		name_cache_lock();
654		mount_generation++;
655		name_cache_unlock();
656
657		vnode_ref(vp);
658
659		error = checkdirs(vp, ctx);
660		if (error != 0)  {
661			/* Unmount the filesystem as cdir/rdirs cannot be updated */
662			goto out4;
663		}
664		/*
665		 * there is no cleanup code here so I have made it void
666		 * we need to revisit this
667		 */
668		(void)VFS_START(mp, 0, ctx);
669
670		mount_list_add(mp);
671		lck_rw_done(&mp->mnt_rwlock);
672		is_rwlock_locked = FALSE;
673
674		/* Check if this mounted file system supports EAs or named streams. */
675		/* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
676		VFSATTR_INIT(&vfsattr);
677		VFSATTR_WANTED(&vfsattr, f_capabilities);
678		if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
679		    vfs_getattr(mp, &vfsattr, ctx) == 0 &&
680		    VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
681			if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
682			    (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
683				mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
684			}
685#if NAMEDSTREAMS
686			if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
687			    (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
688				mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
689			}
690#endif
691			/* Check if this file system supports path from id lookups. */
692			if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
693			    (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
694				mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
695			} else if (mp->mnt_flag & MNT_DOVOLFS) {
696				/* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
697				mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
698			}
699		}
700		if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
701			mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
702		}
703		if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
704			mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
705		}
706		/* increment the operations count */
707		OSAddAtomic(1, (SInt32 *)&vfs_nummntops);
708		enablequotas(mp, ctx);
709
710		if (device_vnode) {
711			device_vnode->v_specflags |= SI_MOUNTEDON;
712
713			/*
714			 *   cache the IO attributes for the underlying physical media...
715			 *   an error return indicates the underlying driver doesn't
716			 *   support all the queries necessary... however, reasonable
717			 *   defaults will have been set, so no reason to bail or care
718			 */
719			vfs_init_io_attributes(device_vnode, mp);
720		}
721
722		/* Now that mount is setup, notify the listeners */
723		vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
724	} else {
725		vnode_lock_spin(vp);
726		CLR(vp->v_flag, VMOUNT);
727		vnode_unlock(vp);
728		mount_list_lock();
729		mp->mnt_vtable->vfc_refcount--;
730		mount_list_unlock();
731
732		if (device_vnode ) {
733			VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
734			vnode_rele(device_vnode);
735		}
736		lck_rw_done(&mp->mnt_rwlock);
737		is_rwlock_locked = FALSE;
738		mount_lock_destroy(mp);
739#if CONFIG_MACF
740		mac_mount_label_destroy(mp);
741#endif
742		FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
743	}
744	nameidone(&nd);
745
746	/*
747	 * drop I/O count on covered 'vp' and
748	 * on the device vp if there was one
749	 */
750	if (devpath && devvp)
751	        vnode_put(devvp);
752	vnode_put(vp);
753
754	return(error);
755out4:
756	(void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
757	if (device_vnode != NULLVP) {
758		VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
759                       ctx);
760
761	}
762	vnode_lock_spin(vp);
763	vp->v_mountedhere = (mount_t) 0;
764	vnode_unlock(vp);
765	vnode_rele(vp);
766out3:
767	if (devpath && ((uap->flags & MNT_UPDATE) == 0))
768		vnode_rele(devvp);
769out2:
770	if (devpath && devvp)
771	        vnode_put(devvp);
772out1:
773	/* Release mnt_rwlock only when it was taken */
774	if (is_rwlock_locked == TRUE) {
775		lck_rw_done(&mp->mnt_rwlock);
776	}
777	if (mntalloc) {
778#if CONFIG_MACF
779		mac_mount_label_destroy(mp);
780#endif
781		mount_list_lock();
782		vfsp->vfc_refcount--;
783		mount_list_unlock();
784		FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
785	}
786	vnode_put(vp);
787	nameidone(&nd);
788
789	return(error);
790}
791
792void
793enablequotas(struct mount *mp, vfs_context_t ctx)
794{
795	struct nameidata qnd;
796	int type;
797	char qfpath[MAXPATHLEN];
798	const char *qfname = QUOTAFILENAME;
799	const char *qfopsname = QUOTAOPSNAME;
800	const char *qfextension[] = INITQFNAMES;
801
802	/* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
803	if ((strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 )
804                && (strncmp( mp->mnt_vfsstat.f_fstypename, "ufs", sizeof("ufs")) != 0))
805	  return;
806
807	/*
808	 * Enable filesystem disk quotas if necessary.
809	 * We ignore errors as this should not interfere with final mount
810	 */
811	for (type=0; type < MAXQUOTAS; type++) {
812		snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
813		NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE32, CAST_USER_ADDR_T(qfpath), ctx);
814		if (namei(&qnd) != 0)
815			continue; 	    /* option file to trigger quotas is not present */
816		vnode_put(qnd.ni_vp);
817		nameidone(&qnd);
818		snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
819
820		(void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
821	}
822	return;
823}
824
825
826static int
827checkdirs_callback(proc_t p, void * arg)
828{
829	struct cdirargs * cdrp = (struct cdirargs * )arg;
830	vnode_t olddp = cdrp->olddp;
831	vnode_t newdp = cdrp->newdp;
832	struct filedesc *fdp;
833	vnode_t tvp;
834	vnode_t fdp_cvp;
835	vnode_t fdp_rvp;
836	int cdir_changed = 0;
837	int rdir_changed = 0;
838
839	/*
840	 * XXX Also needs to iterate each thread in the process to see if it
841	 * XXX is using a per-thread current working directory, and, if so,
842	 * XXX update that as well.
843	 */
844
845	proc_fdlock(p);
846	fdp = p->p_fd;
847	if (fdp == (struct filedesc *)0) {
848		proc_fdunlock(p);
849		return(PROC_RETURNED);
850	}
851	fdp_cvp = fdp->fd_cdir;
852	fdp_rvp = fdp->fd_rdir;
853	proc_fdunlock(p);
854
855	if (fdp_cvp == olddp) {
856		vnode_ref(newdp);
857		tvp = fdp->fd_cdir;
858		fdp_cvp = newdp;
859		cdir_changed = 1;
860		vnode_rele(tvp);
861	}
862	if (fdp_rvp == olddp) {
863		vnode_ref(newdp);
864		tvp = fdp->fd_rdir;
865		fdp_rvp = newdp;
866		rdir_changed = 1;
867		vnode_rele(tvp);
868	}
869	if (cdir_changed || rdir_changed) {
870		proc_fdlock(p);
871		fdp->fd_cdir = fdp_cvp;
872		fdp->fd_rdir = fdp_rvp;
873		proc_fdunlock(p);
874	}
875	return(PROC_RETURNED);
876}
877
878
879
880/*
881 * Scan all active processes to see if any of them have a current
882 * or root directory onto which the new filesystem has just been
883 * mounted. If so, replace them with the new mount point.
884 */
885static int
886checkdirs(vnode_t olddp, vfs_context_t ctx)
887{
888	vnode_t newdp;
889	vnode_t tvp;
890	int err;
891	struct cdirargs cdr;
892	struct uthread * uth = get_bsdthread_info(current_thread());
893
894	if (olddp->v_usecount == 1)
895		return(0);
896	if (uth != (struct uthread *)0)
897		uth->uu_notrigger = 1;
898	err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
899	if (uth != (struct uthread *)0)
900		uth->uu_notrigger = 0;
901
902	if (err != 0) {
903#if DIAGNOSTIC
904		panic("mount: lost mount: error %d", err);
905#endif
906		return(err);
907	}
908
909	cdr.olddp = olddp;
910	cdr.newdp = newdp;
911	/* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
912	proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
913
914	if (rootvnode == olddp) {
915		vnode_ref(newdp);
916		tvp = rootvnode;
917		rootvnode = newdp;
918		vnode_rele(tvp);
919	}
920
921	vnode_put(newdp);
922	return(0);
923}
924
925/*
926 * Unmount a file system.
927 *
928 * Note: unmount takes a path to the vnode mounted on as argument,
929 * not special file (as before).
930 */
931/* ARGSUSED */
932int
933unmount(__unused proc_t p, struct unmount_args *uap, __unused register_t *retval)
934{
935	vnode_t vp;
936	struct mount *mp;
937	int error;
938	struct nameidata nd;
939	vfs_context_t ctx = vfs_context_current();
940
941	NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
942		UIO_USERSPACE, uap->path, ctx);
943	error = namei(&nd);
944	if (error)
945		return (error);
946	vp = nd.ni_vp;
947	mp = vp->v_mount;
948	nameidone(&nd);
949
950#if CONFIG_MACF
951	error = mac_mount_check_umount(ctx, mp);
952	if (error != 0) {
953		vnode_put(vp);
954		return (error);
955	}
956#endif
957	/*
958	 * Must be the root of the filesystem
959	 */
960	if ((vp->v_flag & VROOT) == 0) {
961		vnode_put(vp);
962		return (EINVAL);
963	}
964	mount_ref(mp, 0);
965	vnode_put(vp);
966	/* safedounmount consumes the mount ref */
967	return (safedounmount(mp, uap->flags, ctx));
968}
969
970int
971vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
972{
973	mount_t mp;
974
975	mp = mount_list_lookupby_fsid(fsid, 0, 1);
976	if (mp == (mount_t)0) {
977		return(ENOENT);
978	}
979	mount_ref(mp, 0);
980	mount_iterdrop(mp);
981	/* safedounmount consumes the mount ref */
982	return(safedounmount(mp, flags, ctx));
983}
984
985
986/*
987 * The mount struct comes with a mount ref which will be consumed.
988 * Do the actual file system unmount, prevent some common foot shooting.
989 */
990int
991safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
992{
993	int error;
994	proc_t p = vfs_context_proc(ctx);
995
996	/*
997	 * Only root, or the user that did the original mount is
998	 * permitted to unmount this filesystem.
999	 */
1000	if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1001	    (error = suser(kauth_cred_get(), &p->p_acflag)))
1002		goto out;
1003
1004	/*
1005	 * Don't allow unmounting the root file system.
1006	 */
1007	if (mp->mnt_flag & MNT_ROOTFS) {
1008		error = EBUSY; /* the root is always busy */
1009		goto out;
1010	}
1011
1012	return (dounmount(mp, flags, 1, ctx));
1013
1014out:
1015	mount_drop(mp, 0);
1016	return(error);
1017}
1018
1019/*
1020 * Do the actual file system unmount.
1021 */
1022int
1023dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1024{
1025	vnode_t coveredvp = (vnode_t)0;
1026	int error;
1027	int needwakeup = 0;
1028	int forcedunmount = 0;
1029	int lflags = 0;
1030	struct vnode *devvp = NULLVP;
1031
1032	if (flags & MNT_FORCE)
1033		forcedunmount = 1;
1034	mount_lock(mp);
1035	/* XXX post jaguar fix LK_DRAIN - then clean this up */
1036	if ((flags & MNT_FORCE)) {
1037		mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1038		mp->mnt_lflag |= MNT_LFORCE;
1039	}
1040	if (mp->mnt_lflag & MNT_LUNMOUNT) {
1041		mp->mnt_lflag |= MNT_LWAIT;
1042		if(withref != 0)
1043			mount_drop(mp, 1);
1044		msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1045		/*
1046		 * The prior unmount attempt has probably succeeded.
1047		 * Do not dereference mp here - returning EBUSY is safest.
1048		 */
1049		return (EBUSY);
1050	}
1051	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1052	mp->mnt_lflag |= MNT_LUNMOUNT;
1053	mp->mnt_flag &=~ MNT_ASYNC;
1054	/*
1055	 * anyone currently in the fast path that
1056	 * trips over the cached rootvp will be
1057	 * dumped out and forced into the slow path
1058	 * to regenerate a new cached value
1059	 */
1060	mp->mnt_realrootvp = NULLVP;
1061	mount_unlock(mp);
1062
1063	/*
1064	 * taking the name_cache_lock exclusively will
1065	 * insure that everyone is out of the fast path who
1066	 * might be trying to use a now stale copy of
1067	 * vp->v_mountedhere->mnt_realrootvp
1068	 * bumping mount_generation causes the cached values
1069	 * to be invalidated
1070	 */
1071	name_cache_lock();
1072	mount_generation++;
1073	name_cache_unlock();
1074
1075
1076	lck_rw_lock_exclusive(&mp->mnt_rwlock);
1077	if (withref != 0)
1078		mount_drop(mp, 0);
1079#if CONFIG_FSE
1080	fsevent_unmount(mp);  /* has to come first! */
1081#endif
1082	error = 0;
1083	if (forcedunmount == 0) {
1084		ubc_umount(mp);	/* release cached vnodes */
1085		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1086			error = VFS_SYNC(mp, MNT_WAIT, ctx);
1087			if (error) {
1088				mount_lock(mp);
1089				mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1090				mp->mnt_lflag &= ~MNT_LUNMOUNT;
1091				mp->mnt_lflag &= ~MNT_LFORCE;
1092				goto out;
1093			}
1094		}
1095	}
1096
1097	if (forcedunmount)
1098		lflags |= FORCECLOSE;
1099	error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
1100	if ((forcedunmount == 0) && error) {
1101		mount_lock(mp);
1102		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1103		mp->mnt_lflag &= ~MNT_LUNMOUNT;
1104		mp->mnt_lflag &= ~MNT_LFORCE;
1105		goto out;
1106	}
1107
1108	/* make sure there are no one in the mount iterations or lookup */
1109	mount_iterdrain(mp);
1110
1111	error = VFS_UNMOUNT(mp, flags, ctx);
1112	if (error) {
1113		mount_iterreset(mp);
1114		mount_lock(mp);
1115		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1116		mp->mnt_lflag &= ~MNT_LUNMOUNT;
1117		mp->mnt_lflag &= ~MNT_LFORCE;
1118		goto out;
1119	}
1120
1121	/* increment the operations count */
1122	if (!error)
1123		OSAddAtomic(1, (SInt32 *)&vfs_nummntops);
1124
1125	if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1126		/* hold an io reference and drop the usecount before close */
1127		devvp = mp->mnt_devvp;
1128		vnode_clearmountedon(devvp);
1129		vnode_getalways(devvp);
1130		vnode_rele(devvp);
1131		VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1132                       ctx);
1133		vnode_put(devvp);
1134	}
1135	lck_rw_done(&mp->mnt_rwlock);
1136	mount_list_remove(mp);
1137	lck_rw_lock_exclusive(&mp->mnt_rwlock);
1138
1139	/* mark the mount point hook in the vp but not drop the ref yet */
1140	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1141			vnode_getwithref(coveredvp);
1142			vnode_lock_spin(coveredvp);
1143			coveredvp->v_mountedhere = (struct mount *)0;
1144			vnode_unlock(coveredvp);
1145			vnode_put(coveredvp);
1146	}
1147
1148	mount_list_lock();
1149	mp->mnt_vtable->vfc_refcount--;
1150	mount_list_unlock();
1151
1152	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1153	vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1154	mount_lock(mp);
1155	mp->mnt_lflag |= MNT_LDEAD;
1156
1157	if (mp->mnt_lflag & MNT_LWAIT) {
1158	        /*
1159		 * do the wakeup here
1160		 * in case we block in mount_refdrain
1161		 * which will drop the mount lock
1162		 * and allow anyone blocked in vfs_busy
1163		 * to wakeup and see the LDEAD state
1164		 */
1165		mp->mnt_lflag &= ~MNT_LWAIT;
1166		wakeup((caddr_t)mp);
1167	}
1168	mount_refdrain(mp);
1169out:
1170	if (mp->mnt_lflag & MNT_LWAIT) {
1171		mp->mnt_lflag &= ~MNT_LWAIT;
1172		needwakeup = 1;
1173	}
1174	mount_unlock(mp);
1175	lck_rw_done(&mp->mnt_rwlock);
1176
1177	if (needwakeup)
1178		wakeup((caddr_t)mp);
1179	if (!error) {
1180		if ((coveredvp != NULLVP)) {
1181			vnode_getwithref(coveredvp);
1182			vnode_rele(coveredvp);
1183			vnode_lock_spin(coveredvp);
1184			if(mp->mnt_crossref == 0) {
1185				vnode_unlock(coveredvp);
1186				mount_lock_destroy(mp);
1187#if CONFIG_MACF
1188				mac_mount_label_destroy(mp);
1189#endif
1190				FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1191			}  else {
1192				coveredvp->v_lflag |= VL_MOUNTDEAD;
1193				vnode_unlock(coveredvp);
1194			}
1195			vnode_put(coveredvp);
1196		} else if (mp->mnt_flag & MNT_ROOTFS) {
1197				mount_lock_destroy(mp);
1198#if CONFIG_MACF
1199				mac_mount_label_destroy(mp);
1200#endif
1201				FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1202		} else
1203			panic("dounmount: no coveredvp");
1204	}
1205	return (error);
1206}
1207
1208void
1209mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
1210{
1211		vnode_lock(dp);
1212		mp->mnt_crossref--;
1213		if (mp->mnt_crossref < 0)
1214			panic("mount cross refs -ve");
1215		if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) {
1216			dp->v_lflag &= ~VL_MOUNTDEAD;
1217			if (need_put)
1218			        vnode_put_locked(dp);
1219			vnode_unlock(dp);
1220			mount_lock_destroy(mp);
1221#if CONFIG_MACF
1222			mac_mount_label_destroy(mp);
1223#endif
1224			FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1225			return;
1226		}
1227		if (need_put)
1228		        vnode_put_locked(dp);
1229		vnode_unlock(dp);
1230}
1231
1232
1233/*
1234 * Sync each mounted filesystem.
1235 */
1236#if DIAGNOSTIC
1237int syncprt = 0;
1238struct ctldebug debug0 = { "syncprt", &syncprt };
1239#endif
1240
1241int print_vmpage_stat=0;
1242
1243static int
1244sync_callback(mount_t mp, __unused void * arg)
1245{
1246	int asyncflag;
1247
1248	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1249			asyncflag = mp->mnt_flag & MNT_ASYNC;
1250			mp->mnt_flag &= ~MNT_ASYNC;
1251			VFS_SYNC(mp, MNT_NOWAIT, vfs_context_current());
1252			if (asyncflag)
1253				mp->mnt_flag |= MNT_ASYNC;
1254	}
1255	return(VFS_RETURNED);
1256}
1257
1258
1259extern unsigned int vp_pagein, vp_pgodirty, vp_pgoclean;
1260extern unsigned int dp_pgins, dp_pgouts;
1261
1262/* ARGSUSED */
1263int
1264sync(__unused proc_t p, __unused struct sync_args *uap, __unused register_t *retval)
1265{
1266
1267	vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
1268	{
1269	if(print_vmpage_stat) {
1270		vm_countdirtypages();
1271		printf("VP: %d: %d: %d: %d: %d\n", vp_pgodirty, vp_pgoclean, vp_pagein,
1272			dp_pgins, dp_pgouts);
1273	}
1274	}
1275#if DIAGNOSTIC
1276	if (syncprt)
1277		vfs_bufstats();
1278#endif /* DIAGNOSTIC */
1279	return (0);
1280}
1281
1282/*
1283 * Change filesystem quotas.
1284 */
1285#if QUOTA
1286static int quotactl_funneled(proc_t p, struct quotactl_args *uap, register_t *retval);
1287
1288int
1289quotactl(proc_t p, struct quotactl_args *uap, register_t *retval)
1290{
1291	boolean_t funnel_state;
1292	int error;
1293
1294	funnel_state = thread_funnel_set(kernel_flock, TRUE);
1295	error = quotactl_funneled(p, uap, retval);
1296	thread_funnel_set(kernel_flock, funnel_state);
1297	return(error);
1298}
1299
1300static int
1301quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused register_t *retval)
1302{
1303	struct mount *mp;
1304	int error, quota_cmd, quota_status;
1305	caddr_t datap;
1306	size_t fnamelen;
1307	struct nameidata nd;
1308	vfs_context_t ctx = vfs_context_current();
1309	struct dqblk my_dqblk;
1310
1311	AUDIT_ARG(uid, uap->uid, 0, 0, 0);
1312	AUDIT_ARG(cmd, uap->cmd);
1313	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1314		UIO_USERSPACE, uap->path, ctx);
1315	error = namei(&nd);
1316	if (error)
1317		return (error);
1318	mp = nd.ni_vp->v_mount;
1319	vnode_put(nd.ni_vp);
1320	nameidone(&nd);
1321
1322	/* copyin any data we will need for downstream code */
1323	quota_cmd = uap->cmd >> SUBCMDSHIFT;
1324
1325	switch (quota_cmd) {
1326	case Q_QUOTAON:
1327		/* uap->arg specifies a file from which to take the quotas */
1328		fnamelen = MAXPATHLEN;
1329		datap = kalloc(MAXPATHLEN);
1330		error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
1331		break;
1332	case Q_GETQUOTA:
1333		/* uap->arg is a pointer to a dqblk structure. */
1334		datap = (caddr_t) &my_dqblk;
1335		break;
1336	case Q_SETQUOTA:
1337	case Q_SETUSE:
1338		/* uap->arg is a pointer to a dqblk structure. */
1339		datap = (caddr_t) &my_dqblk;
1340		if (proc_is64bit(p)) {
1341			struct user_dqblk	my_dqblk64;
1342			error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
1343			if (error == 0) {
1344				munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
1345			}
1346		}
1347		else {
1348			error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
1349		}
1350		break;
1351	case Q_QUOTASTAT:
1352		/* uap->arg is a pointer to an integer */
1353		datap = (caddr_t) &quota_status;
1354		break;
1355	default:
1356		datap = NULL;
1357		break;
1358	} /* switch */
1359
1360	if (error == 0) {
1361		error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
1362	}
1363
1364	switch (quota_cmd) {
1365	case Q_QUOTAON:
1366		if (datap != NULL)
1367			kfree(datap, MAXPATHLEN);
1368		break;
1369	case Q_GETQUOTA:
1370		/* uap->arg is a pointer to a dqblk structure we need to copy out to */
1371		if (error == 0) {
1372			if (proc_is64bit(p)) {
1373				struct user_dqblk	my_dqblk64;
1374				munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
1375				error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
1376			}
1377			else {
1378				error = copyout(datap, uap->arg, sizeof (struct dqblk));
1379			}
1380		}
1381		break;
1382	case Q_QUOTASTAT:
1383		/* uap->arg is a pointer to an integer */
1384		if (error == 0) {
1385			error = copyout(datap, uap->arg, sizeof(quota_status));
1386		}
1387		break;
1388	default:
1389		break;
1390	} /* switch */
1391
1392	return (error);
1393}
1394#else
1395int
1396quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused register_t *retval)
1397{
1398	return (EOPNOTSUPP);
1399}
1400#endif /* QUOTA */
1401
1402/*
1403 * Get filesystem statistics.
1404 *
1405 * Returns:	0			Success
1406 *	namei:???
1407 *	vfs_update_vfsstat:???
1408 *	munge_statfs:EFAULT
1409 */
1410/* ARGSUSED */
1411int
1412statfs(__unused proc_t p, struct statfs_args *uap, __unused register_t *retval)
1413{
1414	struct mount *mp;
1415	struct vfsstatfs *sp;
1416	int error;
1417	struct nameidata nd;
1418	vfs_context_t ctx = vfs_context_current();
1419	vnode_t vp;
1420
1421	NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1422		UIO_USERSPACE, uap->path, ctx);
1423	error = namei(&nd);
1424	if (error)
1425		return (error);
1426	vp = nd.ni_vp;
1427	mp = vp->v_mount;
1428	sp = &mp->mnt_vfsstat;
1429	nameidone(&nd);
1430
1431	error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
1432	vnode_put(vp);
1433	if (error != 0)
1434		return (error);
1435
1436	error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1437	return (error);
1438}
1439
1440/*
1441 * Get filesystem statistics.
1442 */
1443/* ARGSUSED */
1444int
1445fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused register_t *retval)
1446{
1447	vnode_t vp;
1448	struct mount *mp;
1449	struct vfsstatfs *sp;
1450	int error;
1451
1452	AUDIT_ARG(fd, uap->fd);
1453
1454	if ( (error = file_vnode(uap->fd, &vp)) )
1455		return (error);
1456
1457	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1458
1459	mp = vp->v_mount;
1460	if (!mp) {
1461		file_drop(uap->fd);
1462		return (EBADF);
1463	}
1464	sp = &mp->mnt_vfsstat;
1465	if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
1466		file_drop(uap->fd);
1467		return (error);
1468	}
1469	file_drop(uap->fd);
1470
1471	error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1472
1473	return (error);
1474}
1475
1476/*
1477 * Common routine to handle copying of statfs64 data to user space
1478 */
1479static int
1480statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
1481{
1482	int error;
1483	struct statfs64 sfs;
1484
1485	bzero(&sfs, sizeof(sfs));
1486
1487	sfs.f_bsize = sfsp->f_bsize;
1488	sfs.f_iosize = (int32_t)sfsp->f_iosize;
1489	sfs.f_blocks = sfsp->f_blocks;
1490	sfs.f_bfree = sfsp->f_bfree;
1491	sfs.f_bavail = sfsp->f_bavail;
1492	sfs.f_files = sfsp->f_files;
1493	sfs.f_ffree = sfsp->f_ffree;
1494	sfs.f_fsid = sfsp->f_fsid;
1495	sfs.f_owner = sfsp->f_owner;
1496	sfs.f_type = mp->mnt_vtable->vfc_typenum;
1497	sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1498	sfs.f_fssubtype = sfsp->f_fssubtype;
1499	strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
1500	strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
1501	strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
1502
1503	error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
1504
1505	return(error);
1506}
1507
1508/*
1509 * Get file system statistics in 64-bit mode
1510 */
1511int
1512statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused register_t *retval)
1513{
1514	struct mount *mp;
1515	struct vfsstatfs *sp;
1516	int error;
1517	struct nameidata nd;
1518	vfs_context_t ctxp = vfs_context_current();
1519	vnode_t vp;
1520
1521	NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1522		UIO_USERSPACE, uap->path, ctxp);
1523	error = namei(&nd);
1524	if (error)
1525		return (error);
1526	vp = nd.ni_vp;
1527	mp = vp->v_mount;
1528	sp = &mp->mnt_vfsstat;
1529	nameidone(&nd);
1530
1531	error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
1532	vnode_put(vp);
1533	if (error != 0)
1534		return (error);
1535
1536	error = statfs64_common(mp, sp, uap->buf);
1537
1538	return (error);
1539}
1540
1541/*
1542 * Get file system statistics in 64-bit mode
1543 */
1544int
1545fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused register_t *retval)
1546{
1547	struct vnode *vp;
1548	struct mount *mp;
1549	struct vfsstatfs *sp;
1550	int error;
1551
1552	AUDIT_ARG(fd, uap->fd);
1553
1554	if ( (error = file_vnode(uap->fd, &vp)) )
1555		return (error);
1556
1557	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1558
1559	mp = vp->v_mount;
1560	if (!mp) {
1561		file_drop(uap->fd);
1562		return (EBADF);
1563	}
1564	sp = &mp->mnt_vfsstat;
1565	if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
1566		file_drop(uap->fd);
1567		return (error);
1568	}
1569	file_drop(uap->fd);
1570
1571	error = statfs64_common(mp, sp, uap->buf);
1572
1573	return (error);
1574}
1575
1576struct getfsstat_struct {
1577	user_addr_t	sfsp;
1578	user_addr_t	*mp;
1579	int		count;
1580	int		maxcount;
1581	int		flags;
1582	int		error;
1583};
1584
1585
1586static int
1587getfsstat_callback(mount_t mp, void * arg)
1588{
1589
1590	struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1591	struct vfsstatfs *sp;
1592	int error, my_size;
1593	vfs_context_t ctx = vfs_context_current();
1594
1595	if (fstp->sfsp && fstp->count < fstp->maxcount) {
1596		sp = &mp->mnt_vfsstat;
1597		/*
1598		 * If MNT_NOWAIT is specified, do not refresh the
1599		 * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
1600		 */
1601		if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) &&
1602			(error = vfs_update_vfsstat(mp, ctx,
1603			    VFS_USER_EVENT))) {
1604			KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1605			return(VFS_RETURNED);
1606		}
1607
1608		/*
1609		 * Need to handle LP64 version of struct statfs
1610		 */
1611		error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
1612		if (error) {
1613			fstp->error = error;
1614			return(VFS_RETURNED_DONE);
1615		}
1616		fstp->sfsp += my_size;
1617
1618		if (fstp->mp) {
1619			error = mac_mount_label_get(mp, *fstp->mp);
1620			if (error) {
1621				fstp->error = error;
1622				return(VFS_RETURNED_DONE);
1623			}
1624			fstp->mp++;
1625		}
1626	}
1627	fstp->count++;
1628	return(VFS_RETURNED);
1629}
1630
1631/*
1632 * Get statistics on all filesystems.
1633 */
1634int
1635getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
1636{
1637	struct __mac_getfsstat_args muap;
1638
1639	muap.buf = uap->buf;
1640	muap.bufsize = uap->bufsize;
1641	muap.mac = USER_ADDR_NULL;
1642	muap.macsize = 0;
1643	muap.flags = uap->flags;
1644
1645	return (__mac_getfsstat(p, &muap, retval));
1646}
1647
1648int
1649__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
1650{
1651	user_addr_t sfsp;
1652	user_addr_t *mp;
1653	int count, maxcount;
1654	struct getfsstat_struct fst;
1655
1656	if (IS_64BIT_PROCESS(p)) {
1657		maxcount = uap->bufsize / sizeof(struct user_statfs);
1658	}
1659	else {
1660		maxcount = uap->bufsize / sizeof(struct statfs);
1661	}
1662	sfsp = uap->buf;
1663	count = 0;
1664
1665	mp = NULL;
1666
1667#if CONFIG_MACF
1668	if (uap->mac != USER_ADDR_NULL) {
1669		u_int32_t *mp0;
1670		int error;
1671		int i;
1672
1673		count = (int)(uap->macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
1674		if (count != maxcount)
1675			return (EINVAL);
1676
1677		/* Copy in the array */
1678		MALLOC(mp0, u_int32_t *, uap->macsize, M_MACTEMP, M_WAITOK);
1679		error = copyin(uap->mac, mp0, uap->macsize);
1680		if (error)
1681			return (error);
1682
1683		/* Normalize to an array of user_addr_t */
1684		MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
1685		for (i = 0; i < count; i++) {
1686			if (IS_64BIT_PROCESS(p))
1687				mp[i] = ((user_addr_t *)mp0)[i];
1688			else
1689				mp[i] = (user_addr_t)mp0[i];
1690		}
1691		FREE(mp0, M_MACTEMP);
1692	}
1693#endif
1694
1695
1696	fst.sfsp = sfsp;
1697	fst.mp = mp;
1698	fst.flags = uap->flags;
1699	fst.count = 0;
1700	fst.error = 0;
1701	fst.maxcount = maxcount;
1702
1703
1704	vfs_iterate(0, getfsstat_callback, &fst);
1705
1706	if (mp)
1707		FREE(mp, M_MACTEMP);
1708
1709	if (fst.error ) {
1710		KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1711		return(fst.error);
1712	}
1713
1714	if (fst.sfsp && fst.count > fst.maxcount)
1715		*retval = fst.maxcount;
1716	else
1717		*retval = fst.count;
1718	return (0);
1719}
1720
1721static int
1722getfsstat64_callback(mount_t mp, void * arg)
1723{
1724	struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1725	struct vfsstatfs *sp;
1726	int error;
1727
1728	if (fstp->sfsp && fstp->count < fstp->maxcount) {
1729		sp = &mp->mnt_vfsstat;
1730		/*
1731		 * If MNT_NOWAIT is specified, do not refresh the
1732		 * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
1733		 */
1734		if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) &&
1735		    (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
1736			KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1737			return(VFS_RETURNED);
1738		}
1739
1740		error = statfs64_common(mp, sp, fstp->sfsp);
1741		if (error) {
1742			fstp->error = error;
1743			return(VFS_RETURNED_DONE);
1744		}
1745		fstp->sfsp += sizeof(struct statfs64);
1746	}
1747	fstp->count++;
1748	return(VFS_RETURNED);
1749}
1750
1751/*
1752 * Get statistics on all file systems in 64 bit mode.
1753 */
1754int
1755getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
1756{
1757	user_addr_t sfsp;
1758	int count, maxcount;
1759	struct getfsstat_struct fst;
1760
1761	maxcount = uap->bufsize / sizeof(struct statfs64);
1762
1763	sfsp = uap->buf;
1764	count = 0;
1765
1766	fst.sfsp = sfsp;
1767	fst.flags = uap->flags;
1768	fst.count = 0;
1769	fst.error = 0;
1770	fst.maxcount = maxcount;
1771
1772	vfs_iterate(0, getfsstat64_callback, &fst);
1773
1774	if (fst.error ) {
1775		KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1776		return(fst.error);
1777	}
1778
1779	if (fst.sfsp && fst.count > fst.maxcount)
1780		*retval = fst.maxcount;
1781	else
1782		*retval = fst.count;
1783
1784	return (0);
1785}
1786
1787#if COMPAT_GETFSSTAT
1788ogetfsstat(proc_t p, struct getfsstat_args *uap, register_t *retval)
1789{
1790	return (ENOTSUP);
1791}
1792#endif
1793
1794/*
1795 * Change current working directory to a given file descriptor.
1796 */
1797/* ARGSUSED */
1798static int
1799common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1800{
1801	struct filedesc *fdp = p->p_fd;
1802	vnode_t vp;
1803	vnode_t tdp;
1804	vnode_t tvp;
1805	struct mount *mp;
1806	int error;
1807	vfs_context_t ctx = vfs_context_current();
1808
1809	if (per_thread && uap->fd == -1) {
1810		/*
1811		 * Switching back from per-thread to per process CWD; verify we
1812		 * in fact have one before proceeding.  The only success case
1813		 * for this code path is to return 0 preemptively after zapping
1814		 * the thread structure contents.
1815		 */
1816		thread_t th = vfs_context_thread(ctx);
1817		if (th) {
1818			uthread_t uth = get_bsdthread_info(th);
1819			tvp = uth->uu_cdir;
1820			uth->uu_cdir = NULLVP;
1821			if (tvp != NULLVP) {
1822				vnode_rele(tvp);
1823				return (0);
1824			}
1825		}
1826		return (EBADF);
1827	}
1828
1829	if ( (error = file_vnode(uap->fd, &vp)) )
1830		return(error);
1831	if ( (error = vnode_getwithref(vp)) ) {
1832	        file_drop(uap->fd);
1833		return(error);
1834	}
1835
1836	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1837
1838	if (vp->v_type != VDIR) {
1839		error = ENOTDIR;
1840		goto out;
1841	}
1842
1843#if CONFIG_MACF
1844	error = mac_vnode_check_chdir(ctx, vp);
1845	if (error)
1846		goto out;
1847#endif
1848	error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
1849	if (error)
1850		goto out;
1851
1852	while (!error && (mp = vp->v_mountedhere) != NULL) {
1853		if (vfs_busy(mp, LK_NOWAIT)) {
1854			error = EACCES;
1855			goto out;
1856		}
1857		error = VFS_ROOT(mp, &tdp, ctx);
1858		vfs_unbusy(mp);
1859		if (error)
1860			break;
1861		vnode_put(vp);
1862		vp = tdp;
1863	}
1864	if (error)
1865		goto out;
1866	if ( (error = vnode_ref(vp)) )
1867	        goto out;
1868	vnode_put(vp);
1869
1870	if (per_thread) {
1871		thread_t th = vfs_context_thread(ctx);
1872		if (th) {
1873			uthread_t uth = get_bsdthread_info(th);
1874			tvp = uth->uu_cdir;
1875			uth->uu_cdir = vp;
1876			OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag);
1877		} else {
1878			vnode_rele(vp);
1879			return (ENOENT);
1880		}
1881	} else {
1882		proc_fdlock(p);
1883		tvp = fdp->fd_cdir;
1884		fdp->fd_cdir = vp;
1885		proc_fdunlock(p);
1886	}
1887
1888	if (tvp)
1889	        vnode_rele(tvp);
1890	file_drop(uap->fd);
1891
1892	return (0);
1893out:
1894	vnode_put(vp);
1895	file_drop(uap->fd);
1896
1897	return(error);
1898}
1899
1900int
1901fchdir(proc_t p, struct fchdir_args *uap, __unused register_t *retval)
1902{
1903	return common_fchdir(p, uap, 0);
1904}
1905
1906int
1907__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused register_t *retval)
1908{
1909	return common_fchdir(p, (void *)uap, 1);
1910}
1911
1912/*
1913 * Change current working directory (``.'').
1914 *
1915 * Returns:	0			Success
1916 *	change_dir:ENOTDIR
1917 *	change_dir:???
1918 *	vnode_ref:ENOENT		No such file or directory
1919 */
1920/* ARGSUSED */
1921static int
1922common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1923{
1924	struct filedesc *fdp = p->p_fd;
1925	int error;
1926	struct nameidata nd;
1927	vnode_t tvp;
1928	vfs_context_t ctx = vfs_context_current();
1929
1930	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1931		UIO_USERSPACE, uap->path, ctx);
1932	error = change_dir(&nd, ctx);
1933	if (error)
1934		return (error);
1935	if ( (error = vnode_ref(nd.ni_vp)) ) {
1936	        vnode_put(nd.ni_vp);
1937		return (error);
1938	}
1939	/*
1940	 * drop the iocount we picked up in change_dir
1941	 */
1942	vnode_put(nd.ni_vp);
1943
1944	if (per_thread) {
1945		thread_t th = vfs_context_thread(ctx);
1946		if (th) {
1947			uthread_t uth = get_bsdthread_info(th);
1948			tvp = uth->uu_cdir;
1949			uth->uu_cdir = nd.ni_vp;
1950			OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag);
1951		} else {
1952			vnode_rele(nd.ni_vp);
1953			return (ENOENT);
1954		}
1955	} else {
1956		proc_fdlock(p);
1957		tvp = fdp->fd_cdir;
1958		fdp->fd_cdir = nd.ni_vp;
1959		proc_fdunlock(p);
1960	}
1961
1962	if (tvp)
1963	        vnode_rele(tvp);
1964
1965	return (0);
1966}
1967
1968int
1969chdir(proc_t p, struct chdir_args *uap, __unused register_t *retval)
1970{
1971	return common_chdir(p, (void *)uap, 0);
1972}
1973
1974int
1975__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused register_t *retval)
1976{
1977	return common_chdir(p, (void *)uap, 1);
1978}
1979
1980
1981/*
1982 * Change notion of root (``/'') directory.
1983 */
1984/* ARGSUSED */
1985int
1986chroot(proc_t p, struct chroot_args *uap, __unused register_t *retval)
1987{
1988	struct filedesc *fdp = p->p_fd;
1989	int error;
1990	struct nameidata nd;
1991	vnode_t tvp;
1992	vfs_context_t ctx = vfs_context_current();
1993
1994	if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1995		return (error);
1996
1997	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1998		UIO_USERSPACE, uap->path, ctx);
1999	error = change_dir(&nd, ctx);
2000	if (error)
2001		return (error);
2002
2003#if CONFIG_MACF
2004	error = mac_vnode_check_chroot(ctx, nd.ni_vp,
2005	    &nd.ni_cnd);
2006	if (error) {
2007		vnode_put(nd.ni_vp);
2008		return (error);
2009	}
2010#endif
2011
2012	if ( (error = vnode_ref(nd.ni_vp)) ) {
2013	        vnode_put(nd.ni_vp);
2014		return (error);
2015	}
2016	vnode_put(nd.ni_vp);
2017
2018	proc_fdlock(p);
2019	tvp = fdp->fd_rdir;
2020	fdp->fd_rdir = nd.ni_vp;
2021	fdp->fd_flags |= FD_CHROOT;
2022	proc_fdunlock(p);
2023
2024	if (tvp != NULL)
2025		vnode_rele(tvp);
2026
2027	return (0);
2028}
2029
2030/*
2031 * Common routine for chroot and chdir.
2032 *
2033 * Returns:	0			Success
2034 *		ENOTDIR			Not a directory
2035 *		namei:???		[anything namei can return]
2036 *		vnode_authorize:???	[anything vnode_authorize can return]
2037 */
2038static int
2039change_dir(struct nameidata *ndp, vfs_context_t ctx)
2040{
2041	vnode_t vp;
2042	int error;
2043
2044	if ((error = namei(ndp)))
2045		return (error);
2046	nameidone(ndp);
2047	vp = ndp->ni_vp;
2048
2049	if (vp->v_type != VDIR) {
2050		vnode_put(vp);
2051		return (ENOTDIR);
2052	}
2053
2054#if CONFIG_MACF
2055	error = mac_vnode_check_chdir(ctx, vp);
2056	if (error) {
2057		vnode_put(vp);
2058		return (error);
2059	}
2060#endif
2061
2062	error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2063	if (error) {
2064		vnode_put(vp);
2065		return (error);
2066	}
2067
2068	return (error);
2069}
2070
2071/*
2072 * Check permissions, allocate an open file structure,
2073 * and call the device open routine if any.
2074 *
2075 * Returns:	0			Success
2076 *		EINVAL
2077 *		EINTR
2078 *	falloc:ENFILE
2079 *	falloc:EMFILE
2080 *	falloc:ENOMEM
2081 *	vn_open_auth:???
2082 *	dupfdopen:???
2083 *	VNOP_ADVLOCK:???
2084 *	vnode_setsize:???
2085 */
2086#warning XXX implement uid, gid
2087int
2088open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, register_t *retval)
2089{
2090	proc_t p = vfs_context_proc(ctx);
2091	uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2092	struct filedesc *fdp = p->p_fd;
2093	struct fileproc *fp;
2094	vnode_t vp;
2095	int flags, oflags;
2096	struct fileproc *nfp;
2097	int type, indx, error;
2098	struct flock lf;
2099	int no_controlling_tty = 0;
2100	int deny_controlling_tty = 0;
2101	struct session *sessp = SESSION_NULL;
2102	struct vfs_context context = *vfs_context_current();	/* local copy */
2103
2104	oflags = uflags;
2105
2106	if ((oflags & O_ACCMODE) == O_ACCMODE)
2107		return(EINVAL);
2108	flags = FFLAGS(uflags);
2109
2110	AUDIT_ARG(fflags, oflags);
2111	AUDIT_ARG(mode, vap->va_mode);
2112
2113	if ( (error = falloc(p, &nfp, &indx, ctx)) ) {
2114		return (error);
2115	}
2116	fp = nfp;
2117	uu->uu_dupfd = -indx - 1;
2118
2119	if (!(p->p_flag & P_CONTROLT)) {
2120		sessp = proc_session(p);
2121		no_controlling_tty = 1;
2122		/*
2123		 * If conditions would warrant getting a controlling tty if
2124		 * the device being opened is a tty (see ttyopen in tty.c),
2125		 * but the open flags deny it, set a flag in the session to
2126		 * prevent it.
2127		 */
2128		if (SESS_LEADER(p, sessp) &&
2129		    sessp->s_ttyvp == NULL &&
2130		    (flags & O_NOCTTY)) {
2131			session_lock(sessp);
2132		    	sessp->s_flags |= S_NOCTTY;
2133			session_unlock(sessp);
2134			deny_controlling_tty = 1;
2135		}
2136	}
2137
2138	if ((error = vn_open_auth(ndp, &flags, vap))) {
2139		if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){	/* XXX from fdopen */
2140			if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) {
2141				fp_drop(p, indx, NULL, 0);
2142			        *retval = indx;
2143				if (deny_controlling_tty) {
2144					session_lock(sessp);
2145					sessp->s_flags &= ~S_NOCTTY;
2146					session_unlock(sessp);
2147				}
2148				if (sessp != SESSION_NULL)
2149					session_rele(sessp);
2150				return (0);
2151			}
2152		}
2153		if (error == ERESTART)
2154		        error = EINTR;
2155		fp_free(p, indx, fp);
2156
2157		if (deny_controlling_tty) {
2158			session_lock(sessp);
2159			sessp->s_flags &= ~S_NOCTTY;
2160			session_unlock(sessp);
2161		}
2162		if (sessp != SESSION_NULL)
2163			session_rele(sessp);
2164		return (error);
2165	}
2166	uu->uu_dupfd = 0;
2167	vp = ndp->ni_vp;
2168
2169	fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
2170	fp->f_fglob->fg_type = DTYPE_VNODE;
2171	fp->f_fglob->fg_ops = &vnops;
2172	fp->f_fglob->fg_data = (caddr_t)vp;
2173
2174	if (flags & (O_EXLOCK | O_SHLOCK)) {
2175		lf.l_whence = SEEK_SET;
2176		lf.l_start = 0;
2177		lf.l_len = 0;
2178		if (flags & O_EXLOCK)
2179			lf.l_type = F_WRLCK;
2180		else
2181			lf.l_type = F_RDLCK;
2182		type = F_FLOCK;
2183		if ((flags & FNONBLOCK) == 0)
2184			type |= F_WAIT;
2185#if CONFIG_MACF
2186		error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
2187		    F_SETLK, &lf);
2188		if (error)
2189			goto bad;
2190#endif
2191		if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx)))
2192			goto bad;
2193		fp->f_fglob->fg_flag |= FHASLOCK;
2194	}
2195
2196	/* try to truncate by setting the size attribute */
2197	if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
2198		goto bad;
2199
2200	/*
2201	 * If the open flags denied the acquisition of a controlling tty,
2202	 * clear the flag in the session structure that prevented the lower
2203	 * level code from assigning one.
2204	 */
2205	if (deny_controlling_tty) {
2206		session_lock(sessp);
2207		sessp->s_flags &= ~S_NOCTTY;
2208		session_unlock(sessp);
2209	}
2210
2211	/*
2212	 * If a controlling tty was set by the tty line discipline, then we
2213	 * want to set the vp of the tty into the session structure.  We have
2214	 * a race here because we can't get to the vp for the tp in ttyopen,
2215	 * because it's not passed as a parameter in the open path.
2216	 */
2217	if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
2218		vnode_t ttyvp;
2219		vnode_ref(vp);
2220		session_lock(sessp);
2221		ttyvp = sessp->s_ttyvp;
2222		sessp->s_ttyvp = vp;
2223		sessp->s_ttyvid = vnode_vid(vp);
2224		session_unlock(sessp);
2225		if (ttyvp != NULLVP)
2226			vnode_rele(ttyvp);
2227	}
2228
2229	vnode_put(vp);
2230
2231	proc_fdlock(p);
2232	procfdtbl_releasefd(p, indx, NULL);
2233	fp_drop(p, indx, fp, 1);
2234	proc_fdunlock(p);
2235
2236	*retval = indx;
2237
2238	if (sessp != SESSION_NULL)
2239		session_rele(sessp);
2240	return (0);
2241bad:
2242	if (deny_controlling_tty) {
2243		session_lock(sessp);
2244		sessp->s_flags &= ~S_NOCTTY;
2245		session_unlock(sessp);
2246	}
2247	if (sessp != SESSION_NULL)
2248		session_rele(sessp);
2249
2250	/* Modify local copy (to not damage thread copy) */
2251	context.vc_ucred = fp->f_fglob->fg_cred;
2252
2253	vn_close(vp, fp->f_fglob->fg_flag, &context);
2254	vnode_put(vp);
2255	fp_free(p, indx, fp);
2256
2257	return (error);
2258
2259}
2260
2261/*
2262 * An open system call using an extended argument list compared to the regular
2263 * system call 'open'.
2264 *
2265 * Parameters:	p			Process requesting the open
2266 *		uap			User argument descriptor (see below)
2267 *		retval			Pointer to an area to receive the
2268 *					return calue from the system call
2269 *
2270 * Indirect:	uap->path		Path to open (same as 'open')
2271 *		uap->flags		Flags to open (same as 'open'
2272 *		uap->uid		UID to set, if creating
2273 *		uap->gid		GID to set, if creating
2274 *		uap->mode		File mode, if creating (same as 'open')
2275 *		uap->xsecurity		ACL to set, if creating
2276 *
2277 * Returns:	0			Success
2278 *		!0			errno value
2279 *
2280 * Notes:	The kauth_filesec_t in 'va', if any, is in host byte order.
2281 *
2282 * XXX:		We should enummerate the possible errno values here, and where
2283 *		in the code they originated.
2284 */
2285int
2286open_extended(proc_t p, struct open_extended_args *uap, register_t *retval)
2287{
2288	struct filedesc *fdp = p->p_fd;
2289	int ciferror;
2290	kauth_filesec_t xsecdst;
2291	struct vnode_attr va;
2292	struct nameidata nd;
2293	int cmode;
2294
2295	xsecdst = NULL;
2296	if ((uap->xsecurity != USER_ADDR_NULL) &&
2297	    ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
2298		return ciferror;
2299
2300	VATTR_INIT(&va);
2301	cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2302	VATTR_SET(&va, va_mode, cmode);
2303	if (uap->uid != KAUTH_UID_NONE)
2304		VATTR_SET(&va, va_uid, uap->uid);
2305	if (uap->gid != KAUTH_GID_NONE)
2306		VATTR_SET(&va, va_gid, uap->gid);
2307	if (xsecdst != NULL)
2308		VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2309
2310	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2311
2312	ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
2313	if (xsecdst != NULL)
2314		kauth_filesec_free(xsecdst);
2315
2316	return ciferror;
2317}
2318
2319int
2320open(proc_t p, struct open_args *uap, register_t *retval)
2321{
2322	__pthread_testcancel(1);
2323	return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
2324}
2325
2326
2327int
2328open_nocancel(proc_t p, struct open_nocancel_args *uap, register_t *retval)
2329{
2330	struct filedesc *fdp = p->p_fd;
2331	struct vnode_attr va;
2332	struct nameidata nd;
2333	int cmode;
2334
2335	VATTR_INIT(&va);
2336	/* Mask off all but regular access permissions */
2337	cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2338	VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2339
2340	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2341
2342	return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
2343}
2344
2345
2346/*
2347 * Create a special file.
2348 */
2349static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
2350
2351int
2352mknod(proc_t p, struct mknod_args *uap, __unused register_t *retval)
2353{
2354	struct vnode_attr va;
2355	vfs_context_t ctx = vfs_context_current();
2356	int error;
2357	int whiteout = 0;
2358	struct nameidata nd;
2359	vnode_t	vp, dvp;
2360
2361 	VATTR_INIT(&va);
2362 	VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2363 	VATTR_SET(&va, va_rdev, uap->dev);
2364
2365	/* If it's a mknod() of a FIFO, call mkfifo1() instead */
2366	if ((uap->mode & S_IFMT) == S_IFIFO)
2367 		return(mkfifo1(ctx, uap->path, &va));
2368
2369	AUDIT_ARG(mode, uap->mode);
2370	AUDIT_ARG(dev, uap->dev);
2371
2372	if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
2373		return (error);
2374	NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2375		UIO_USERSPACE, uap->path, ctx);
2376	error = namei(&nd);
2377	if (error)
2378		return (error);
2379	dvp = nd.ni_dvp;
2380	vp = nd.ni_vp;
2381
2382	if (vp != NULL) {
2383		error = EEXIST;
2384		goto out;
2385	}
2386
2387	switch (uap->mode & S_IFMT) {
2388	case S_IFMT:	/* used by badsect to flag bad sectors */
2389		VATTR_SET(&va, va_type, VBAD);
2390		break;
2391	case S_IFCHR:
2392		VATTR_SET(&va, va_type, VCHR);
2393		break;
2394	case S_IFBLK:
2395		VATTR_SET(&va, va_type, VBLK);
2396		break;
2397	case S_IFWHT:
2398		whiteout = 1;
2399		break;
2400	default:
2401		error = EINVAL;
2402		goto out;
2403	}
2404
2405#if CONFIG_MACF
2406	if (!whiteout) {
2407		error = mac_vnode_check_create(ctx,
2408		    nd.ni_dvp, &nd.ni_cnd, &va);
2409		if (error)
2410			goto out;
2411	}
2412#endif
2413
2414 	if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2415 		goto out;
2416
2417	if (whiteout) {
2418		error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, ctx);
2419	} else {
2420		error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
2421	}
2422	if (error)
2423		goto out;
2424
2425	if (vp) {
2426		int	update_flags = 0;
2427
2428	        // Make sure the name & parent pointers are hooked up
2429	        if (vp->v_name == NULL)
2430			update_flags |= VNODE_UPDATE_NAME;
2431		if (vp->v_parent == NULLVP)
2432		        update_flags |= VNODE_UPDATE_PARENT;
2433
2434		if (update_flags)
2435		        vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
2436
2437#if CONFIG_FSE
2438		add_fsevent(FSE_CREATE_FILE, ctx,
2439		    FSE_ARG_VNODE, vp,
2440		    FSE_ARG_DONE);
2441#endif
2442	}
2443
2444out:
2445	/*
2446	 * nameidone has to happen before we vnode_put(dvp)
2447	 * since it may need to release the fs_nodelock on the dvp
2448	 */
2449	nameidone(&nd);
2450
2451	if (vp)
2452	        vnode_put(vp);
2453	vnode_put(dvp);
2454
2455	return (error);
2456}
2457
2458/*
2459 * Create a named pipe.
2460 *
2461 * Returns:	0			Success
2462 *		EEXIST
2463 *	namei:???
2464 *	vnode_authorize:???
2465 *	vn_create:???
2466 */
2467static int
2468mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
2469{
2470	vnode_t	vp, dvp;
2471	int error;
2472	struct nameidata nd;
2473
2474	NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2475		UIO_USERSPACE, upath, ctx);
2476	error = namei(&nd);
2477	if (error)
2478		return (error);
2479	dvp = nd.ni_dvp;
2480	vp = nd.ni_vp;
2481
2482   	/* check that this is a new file and authorize addition */
2483   	if (vp != NULL) {
2484   		error = EEXIST;
2485   		goto out;
2486   	}
2487   	VATTR_SET(vap, va_type, VFIFO);
2488
2489#if CONFIG_MACF
2490	error = mac_vnode_check_create(ctx, nd.ni_dvp,
2491	    &nd.ni_cnd, vap);
2492	if (error)
2493		goto out;
2494#endif
2495
2496
2497   	if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2498   		goto out;
2499
2500
2501  	error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx);
2502out:
2503	/*
2504	 * nameidone has to happen before we vnode_put(dvp)
2505	 * since it may need to release the fs_nodelock on the dvp
2506	 */
2507	nameidone(&nd);
2508
2509	if (vp)
2510	        vnode_put(vp);
2511	vnode_put(dvp);
2512
2513	return error;
2514}
2515
2516
2517/*
2518 * A mkfifo system call using an extended argument list compared to the regular
2519 * system call 'mkfifo'.
2520 *
2521 * Parameters:	p			Process requesting the open
2522 *		uap			User argument descriptor (see below)
2523 *		retval			(Ignored)
2524 *
2525 * Indirect:	uap->path		Path to fifo (same as 'mkfifo')
2526 *		uap->uid		UID to set
2527 *		uap->gid		GID to set
2528 *		uap->mode		File mode to set (same as 'mkfifo')
2529 *		uap->xsecurity		ACL to set, if creating
2530 *
2531 * Returns:	0			Success
2532 *		!0			errno value
2533 *
2534 * Notes:	The kauth_filesec_t in 'va', if any, is in host byte order.
2535 *
2536 * XXX:		We should enummerate the possible errno values here, and where
2537 *		in the code they originated.
2538 */
2539int
2540mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused register_t *retval)
2541{
2542	int ciferror;
2543	kauth_filesec_t xsecdst;
2544	struct vnode_attr va;
2545
2546	xsecdst = KAUTH_FILESEC_NONE;
2547	if (uap->xsecurity != USER_ADDR_NULL) {
2548		if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
2549			return ciferror;
2550	}
2551
2552	VATTR_INIT(&va);
2553   	VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2554	if (uap->uid != KAUTH_UID_NONE)
2555		VATTR_SET(&va, va_uid, uap->uid);
2556	if (uap->gid != KAUTH_GID_NONE)
2557		VATTR_SET(&va, va_gid, uap->gid);
2558	if (xsecdst != KAUTH_FILESEC_NONE)
2559		VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2560
2561	ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
2562
2563	if (xsecdst != KAUTH_FILESEC_NONE)
2564		kauth_filesec_free(xsecdst);
2565	return ciferror;
2566}
2567
2568/* ARGSUSED */
2569int
2570mkfifo(proc_t p, struct mkfifo_args *uap, __unused register_t *retval)
2571{
2572	struct vnode_attr va;
2573
2574   	VATTR_INIT(&va);
2575   	VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2576
2577	return(mkfifo1(vfs_context_current(), uap->path, &va));
2578}
2579
2580/*
2581 * Make a hard file link.
2582 *
2583 * Returns:	0			Success
2584 *		EPERM
2585 *		EEXIST
2586 *		EXDEV
2587 *	namei:???
2588 *	vnode_authorize:???
2589 *	VNOP_LINK:???
2590 */
2591/* ARGSUSED */
2592int
2593link(__unused proc_t p, struct link_args *uap, __unused register_t *retval)
2594{
2595	vnode_t	vp, dvp, lvp;
2596	struct nameidata nd;
2597	vfs_context_t ctx = vfs_context_current();
2598	int error;
2599	fse_info finfo;
2600	int need_event, has_listeners;
2601	char *target_path = NULL;
2602
2603	vp = dvp = lvp = NULLVP;
2604
2605	/* look up the object we are linking to */
2606	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2607		UIO_USERSPACE, uap->path, ctx);
2608	error = namei(&nd);
2609	if (error)
2610		return (error);
2611	vp = nd.ni_vp;
2612
2613	nameidone(&nd);
2614
2615	/*
2616	 * Normally, linking to directories is not supported.
2617	 * However, some file systems may have limited support.
2618	 */
2619	if (vp->v_type == VDIR) {
2620		if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
2621			error = EPERM;   /* POSIX */
2622			goto out;
2623		}
2624		/* Linking to a directory requires ownership. */
2625		if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
2626			struct vnode_attr dva;
2627
2628			VATTR_INIT(&dva);
2629			VATTR_WANTED(&dva, va_uid);
2630			if (vnode_getattr(vp, &dva, ctx) != 0 ||
2631			    !VATTR_IS_SUPPORTED(&dva, va_uid) ||
2632			    (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
2633				error = EACCES;
2634				goto out;
2635			}
2636		}
2637	}
2638
2639	/* lookup the target node */
2640	nd.ni_cnd.cn_nameiop = CREATE;
2641	nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
2642	nd.ni_dirp = uap->link;
2643	error = namei(&nd);
2644	if (error != 0)
2645		goto out;
2646	dvp = nd.ni_dvp;
2647	lvp = nd.ni_vp;
2648
2649#if CONFIG_MACF
2650	if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
2651		goto out2;
2652#endif
2653
2654  	/* or to anything that kauth doesn't want us to (eg. immutable items) */
2655  	if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
2656 		goto out2;
2657
2658	/* target node must not exist */
2659	if (lvp != NULLVP) {
2660		error = EEXIST;
2661		goto out2;
2662	}
2663  	/* cannot link across mountpoints */
2664  	if (vnode_mount(vp) != vnode_mount(dvp)) {
2665  		error = EXDEV;
2666  		goto out2;
2667  	}
2668
2669  	/* authorize creation of the target note */
2670  	if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2671  		goto out2;
2672
2673	/* and finally make the link */
2674	error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
2675	if (error)
2676		goto out2;
2677
2678#if CONFIG_FSE
2679	need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2680#else
2681	need_event = 0;
2682#endif
2683	has_listeners = kauth_authorize_fileop_has_listeners();
2684
2685	if (need_event || has_listeners) {
2686		char *link_to_path = NULL;
2687		int len, link_name_len;
2688
2689		/* build the path to the new link file */
2690		GET_PATH(target_path);
2691		if (target_path == NULL) {
2692			error = ENOMEM;
2693			goto out2;
2694		}
2695
2696		len = MAXPATHLEN;
2697		vn_getpath(dvp, target_path, &len);
2698		if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
2699		    target_path[len-1] = '/';
2700		    strlcpy(&target_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
2701		    len += nd.ni_cnd.cn_namelen;
2702		}
2703
2704		if (has_listeners) {
2705		        /* build the path to file we are linking to */
2706			GET_PATH(link_to_path);
2707			if (link_to_path == NULL) {
2708				error = ENOMEM;
2709				goto out2;
2710			}
2711
2712			link_name_len = MAXPATHLEN;
2713			vn_getpath(vp, link_to_path, &link_name_len);
2714
2715			/*
2716			 * Call out to allow 3rd party notification of rename.
2717			 * Ignore result of kauth_authorize_fileop call.
2718			 */
2719			kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
2720					       (uintptr_t)link_to_path, (uintptr_t)target_path);
2721			if (link_to_path != NULL) {
2722				RELEASE_PATH(link_to_path);
2723			}
2724		}
2725#if CONFIG_FSE
2726		if (need_event) {
2727		        /* construct fsevent */
2728		        if (get_fse_info(vp, &finfo, ctx) == 0) {
2729			        // build the path to the destination of the link
2730			        add_fsevent(FSE_CREATE_FILE, ctx,
2731					    FSE_ARG_STRING, len, target_path,
2732					    FSE_ARG_FINFO, &finfo,
2733					    FSE_ARG_DONE);
2734			}
2735		}
2736#endif
2737	}
2738out2:
2739	/*
2740	 * nameidone has to happen before we vnode_put(dvp)
2741	 * since it may need to release the fs_nodelock on the dvp
2742	 */
2743	nameidone(&nd);
2744	if (target_path != NULL) {
2745		RELEASE_PATH(target_path);
2746	}
2747out:
2748	if (lvp)
2749		vnode_put(lvp);
2750	if (dvp)
2751		vnode_put(dvp);
2752	vnode_put(vp);
2753	return (error);
2754}
2755
2756/*
2757 * Make a symbolic link.
2758 *
2759 * We could add support for ACLs here too...
2760 */
2761/* ARGSUSED */
2762int
2763symlink(proc_t p, struct symlink_args *uap, __unused register_t *retval)
2764{
2765	struct vnode_attr va;
2766	char *path;
2767	int error;
2768	struct nameidata nd;
2769	vfs_context_t ctx = vfs_context_current();
2770	vnode_t	vp, dvp;
2771	size_t dummy=0;
2772
2773	MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2774	error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
2775	if (error)
2776		goto out;
2777	AUDIT_ARG(text, path);	/* This is the link string */
2778
2779	NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2780		UIO_USERSPACE, uap->link, ctx);
2781	error = namei(&nd);
2782	if (error)
2783		goto out;
2784	dvp = nd.ni_dvp;
2785	vp = nd.ni_vp;
2786
2787	VATTR_INIT(&va);
2788	VATTR_SET(&va, va_type, VLNK);
2789	VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
2790#if CONFIG_MACF
2791	error = mac_vnode_check_create(ctx,
2792			dvp, &nd.ni_cnd, &va);
2793#endif
2794	if (error != 0) {
2795	    goto skipit;
2796	}
2797
2798	if (vp != NULL) {
2799	    error = EEXIST;
2800	    goto skipit;
2801	}
2802
2803	/* authorize */
2804	if (error == 0)
2805		error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
2806	/* get default ownership, etc. */
2807	if (error == 0)
2808		error = vnode_authattr_new(dvp, &va, 0, ctx);
2809	if (error == 0)
2810		error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
2811
2812	/* do fallback attribute handling */
2813	if (error == 0)
2814		error = vnode_setattr_fallback(vp, &va, ctx);
2815
2816	if (error == 0) {
2817		int	update_flags = 0;
2818
2819		if (vp == NULL) {
2820			nd.ni_cnd.cn_nameiop = LOOKUP;
2821			nd.ni_cnd.cn_flags = 0;
2822			error = namei(&nd);
2823			vp = nd.ni_vp;
2824
2825			if (vp == NULL)
2826				goto skipit;
2827		}
2828
2829#if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
2830		/* call out to allow 3rd party notification of rename.
2831		 * Ignore result of kauth_authorize_fileop call.
2832		 */
2833		if (kauth_authorize_fileop_has_listeners() &&
2834		    namei(&nd) == 0) {
2835			char *new_link_path = NULL;
2836			int		len;
2837
2838			/* build the path to the new link file */
2839			new_link_path = get_pathbuff();
2840			len = MAXPATHLEN;
2841			vn_getpath(dvp, new_link_path, &len);
2842			if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
2843				new_link_path[len - 1] = '/';
2844				strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
2845			}
2846
2847			kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
2848					   (uintptr_t)path, (uintptr_t)new_link_path);
2849			if (new_link_path != NULL)
2850				release_pathbuff(new_link_path);
2851		}
2852#endif
2853		// Make sure the name & parent pointers are hooked up
2854		if (vp->v_name == NULL)
2855			update_flags |= VNODE_UPDATE_NAME;
2856		if (vp->v_parent == NULLVP)
2857			update_flags |= VNODE_UPDATE_PARENT;
2858
2859		if (update_flags)
2860			vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
2861
2862#if CONFIG_FSE
2863		add_fsevent(FSE_CREATE_FILE, ctx,
2864			    FSE_ARG_VNODE, vp,
2865			    FSE_ARG_DONE);
2866#endif
2867	}
2868
2869skipit:
2870	/*
2871	 * nameidone has to happen before we vnode_put(dvp)
2872	 * since it may need to release the fs_nodelock on the dvp
2873	 */
2874	nameidone(&nd);
2875
2876	if (vp)
2877	        vnode_put(vp);
2878	vnode_put(dvp);
2879out:
2880	FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
2881
2882	return (error);
2883}
2884
2885/*
2886 * Delete a whiteout from the filesystem.
2887 */
2888/* ARGSUSED */
2889#warning XXX authorization not implmented for whiteouts
2890int
2891undelete(__unused proc_t p, struct undelete_args *uap, __unused register_t *retval)
2892{
2893	int error;
2894	struct nameidata nd;
2895	vfs_context_t ctx = vfs_context_current();
2896	vnode_t	vp, dvp;
2897
2898	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1,
2899		UIO_USERSPACE, uap->path, ctx);
2900	error = namei(&nd);
2901	if (error)
2902		return (error);
2903	dvp = nd.ni_dvp;
2904	vp = nd.ni_vp;
2905
2906	if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2907		error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
2908	} else
2909	        error = EEXIST;
2910
2911	/*
2912	 * nameidone has to happen before we vnode_put(dvp)
2913	 * since it may need to release the fs_nodelock on the dvp
2914	 */
2915	nameidone(&nd);
2916
2917	if (vp)
2918	        vnode_put(vp);
2919	vnode_put(dvp);
2920
2921	return (error);
2922}
2923
2924/*
2925 * Delete a name from the filesystem.
2926 */
2927/* ARGSUSED */
2928int
2929unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
2930{
2931	vnode_t	vp, dvp;
2932	int error;
2933	struct componentname *cnp;
2934	char  *path = NULL;
2935	int  len;
2936	fse_info  finfo;
2937	int flags = 0;
2938	int need_event = 0;
2939	int has_listeners = 0;
2940
2941#if NAMEDRSRCFORK
2942	/* unlink or delete is allowed on rsrc forks and named streams */
2943	ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
2944#endif
2945
2946	ndp->ni_cnd.cn_flags |= LOCKPARENT;
2947	cnp = &ndp->ni_cnd;
2948
2949	error = namei(ndp);
2950	if (error)
2951		return (error);
2952	dvp = ndp->ni_dvp;
2953	vp = ndp->ni_vp;
2954
2955	/* With Carbon delete semantics, busy files cannot be deleted */
2956	if (nodelbusy) {
2957		flags |= VNODE_REMOVE_NODELETEBUSY;
2958	}
2959
2960	/*
2961	 * Normally, unlinking of directories is not supported.
2962	 * However, some file systems may have limited support.
2963	 */
2964	if ((vp->v_type == VDIR) &&
2965	    !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
2966		error = EPERM;	/* POSIX */
2967	}
2968
2969	/*
2970	 * The root of a mounted filesystem cannot be deleted.
2971	 */
2972	if (vp->v_flag & VROOT) {
2973		error = EBUSY;
2974	}
2975	if (error)
2976		goto out;
2977
2978
2979	/* authorize the delete operation */
2980#if CONFIG_MACF
2981	if (!error)
2982		error = mac_vnode_check_unlink(ctx,
2983		    dvp, vp, cnp);
2984#endif /* MAC */
2985	if (!error)
2986		error = vnode_authorize(vp, ndp->ni_dvp, KAUTH_VNODE_DELETE, ctx);
2987	if (error)
2988		goto out;
2989
2990#if CONFIG_FSE
2991	need_event = need_fsevent(FSE_DELETE, dvp);
2992	if (need_event) {
2993		if ((vp->v_flag & VISHARDLINK) == 0) {
2994			get_fse_info(vp, &finfo, ctx);
2995		}
2996	}
2997#endif
2998	has_listeners = kauth_authorize_fileop_has_listeners();
2999	if (need_event || has_listeners) {
3000		GET_PATH(path);
3001		if (path == NULL) {
3002			error = ENOMEM;
3003			goto out;
3004		}
3005		len = MAXPATHLEN;
3006		vn_getpath(vp, path, &len);
3007	}
3008
3009#if NAMEDRSRCFORK
3010	if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
3011		error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
3012	else
3013#endif
3014		error = VNOP_REMOVE(dvp, vp, &ndp->ni_cnd, flags, ctx);
3015
3016	/*
3017	 * Call out to allow 3rd party notification of delete.
3018	 * Ignore result of kauth_authorize_fileop call.
3019	 */
3020	if (!error) {
3021		if (has_listeners) {
3022			kauth_authorize_fileop(vfs_context_ucred(ctx),
3023				KAUTH_FILEOP_DELETE,
3024				(uintptr_t)vp,
3025				(uintptr_t)path);
3026		}
3027
3028		if (vp->v_flag & VISHARDLINK) {
3029		    //
3030		    // if a hardlink gets deleted we want to blow away the
3031		    // v_parent link because the path that got us to this
3032		    // instance of the link is no longer valid.  this will
3033		    // force the next call to get the path to ask the file
3034		    // system instead of just following the v_parent link.
3035		    //
3036		    vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
3037		}
3038
3039#if CONFIG_FSE
3040		if (need_event) {
3041			if (vp->v_flag & VISHARDLINK) {
3042				get_fse_info(vp, &finfo, ctx);
3043			}
3044			add_fsevent(FSE_DELETE, ctx,
3045						FSE_ARG_STRING, len, path,
3046						FSE_ARG_FINFO, &finfo,
3047						FSE_ARG_DONE);
3048		}
3049#endif
3050	}
3051	if (path != NULL)
3052		RELEASE_PATH(path);
3053
3054	/*
3055	 * nameidone has to happen before we vnode_put(dvp)
3056	 * since it may need to release the fs_nodelock on the dvp
3057	 */
3058out:
3059#if NAMEDRSRCFORK
3060	/* recycle deleted rsrc fork to force reclaim on shadow file if necessary */
3061	if ((vnode_isnamedstream(ndp->ni_vp)) &&
3062			(ndp->ni_vp->v_parent != NULLVP) &&
3063			(vnode_isshadow(ndp->ni_vp))) {
3064		vnode_recycle(ndp->ni_vp);
3065	}
3066#endif
3067
3068	nameidone(ndp);
3069	vnode_put(dvp);
3070	vnode_put(vp);
3071	return (error);
3072}
3073
3074/*
3075 * Delete a name from the filesystem using POSIX semantics.
3076 */
3077int
3078unlink(__unused proc_t p, struct unlink_args *uap, __unused register_t *retval)
3079{
3080	struct nameidata nd;
3081	vfs_context_t ctx = vfs_context_current();
3082
3083	NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3084	return unlink1(ctx, &nd, 0);
3085}
3086
3087/*
3088 * Delete a name from the filesystem using Carbon semantics.
3089 */
3090int
3091delete(__unused proc_t p, struct delete_args *uap, __unused register_t *retval)
3092{
3093	struct nameidata nd;
3094	vfs_context_t ctx = vfs_context_current();
3095
3096	NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3097	return unlink1(ctx, &nd, 1);
3098}
3099
3100/*
3101 * Reposition read/write file offset.
3102 */
3103int
3104lseek(proc_t p, struct lseek_args *uap, off_t *retval)
3105{
3106	struct fileproc *fp;
3107	vnode_t vp;
3108	struct vfs_context *ctx;
3109	off_t offset = uap->offset, file_size;
3110	int error;
3111
3112	if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
3113	        if (error == ENOTSUP)
3114		        return (ESPIPE);
3115		return (error);
3116	}
3117	if (vnode_isfifo(vp)) {
3118		file_drop(uap->fd);
3119		return(ESPIPE);
3120	}
3121
3122
3123	ctx = vfs_context_current();
3124#if CONFIG_MACF
3125	if (uap->whence == L_INCR && uap->offset == 0)
3126		error = mac_file_check_get_offset(vfs_context_ucred(ctx),
3127		    fp->f_fglob);
3128	else
3129		error = mac_file_check_change_offset(vfs_context_ucred(ctx),
3130		    fp->f_fglob);
3131	if (error) {
3132		file_drop(uap->fd);
3133		return (error);
3134	}
3135#endif
3136	if ( (error = vnode_getwithref(vp)) ) {
3137		file_drop(uap->fd);
3138		return(error);
3139	}
3140
3141	switch (uap->whence) {
3142	case L_INCR:
3143		offset += fp->f_fglob->fg_offset;
3144		break;
3145	case L_XTND:
3146		if ((error = vnode_size(vp, &file_size, ctx)) != 0)
3147			break;
3148		offset += file_size;
3149		break;
3150	case L_SET:
3151		break;
3152	default:
3153		error = EINVAL;
3154	}
3155	if (error == 0) {
3156		if (uap->offset > 0 && offset < 0) {
3157			/* Incremented/relative move past max size */
3158			error = EOVERFLOW;
3159		} else {
3160			/*
3161			 * Allow negative offsets on character devices, per
3162			 * POSIX 1003.1-2001.  Most likely for writing disk
3163			 * labels.
3164			 */
3165			if (offset < 0 && vp->v_type != VCHR) {
3166				/* Decremented/relative move before start */
3167				error = EINVAL;
3168			} else {
3169				/* Success */
3170				fp->f_fglob->fg_offset = offset;
3171				*retval = fp->f_fglob->fg_offset;
3172			}
3173		}
3174	}
3175	(void)vnode_put(vp);
3176	file_drop(uap->fd);
3177	return (error);
3178}
3179
3180
3181/*
3182 * Check access permissions.
3183 *
3184 * Returns:	0			Success
3185 *		vnode_authorize:???
3186 */
3187static int
3188access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
3189{
3190 	kauth_action_t action;
3191	int error;
3192
3193 	/*
3194 	 * If just the regular access bits, convert them to something
3195	 * that vnode_authorize will understand.
3196 	 */
3197 	if (!(uflags & _ACCESS_EXTENDED_MASK)) {
3198 		action = 0;
3199  		if (uflags & R_OK)
3200			action |= KAUTH_VNODE_READ_DATA;	/* aka KAUTH_VNODE_LIST_DIRECTORY */
3201  		if (uflags & W_OK) {
3202			if (vnode_isdir(vp)) {
3203				action |= KAUTH_VNODE_ADD_FILE |
3204				    KAUTH_VNODE_ADD_SUBDIRECTORY;
3205				/* might want delete rights here too */
3206			} else {
3207				action |= KAUTH_VNODE_WRITE_DATA;
3208			}
3209		}
3210  		if (uflags & X_OK) {
3211			if (vnode_isdir(vp)) {
3212				action |= KAUTH_VNODE_SEARCH;
3213			} else {
3214				action |= KAUTH_VNODE_EXECUTE;
3215			}
3216		}
3217  	} else {
3218		/* take advantage of definition of uflags */
3219		action = uflags >> 8;
3220	}
3221
3222#if CONFIG_MACF
3223	error = mac_vnode_check_access(ctx, vp, uflags);
3224	if (error)
3225		return (error);
3226#endif /* MAC */
3227
3228 	/* action == 0 means only check for existence */
3229 	if (action != 0) {
3230 		error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
3231	} else {
3232		error = 0;
3233	}
3234
3235	return(error);
3236}
3237
3238
3239
3240/*
3241 * access_extended
3242 *
3243 * Description:	uap->entries			Pointer to argument descriptor
3244 *		uap->size			Size of the area pointed to by
3245 *						the descriptor
3246 *		uap->results			Pointer to the results array
3247 *
3248 * Returns:	0			Success
3249 *		ENOMEM			Insufficient memory
3250 *		EINVAL			Invalid arguments
3251 *		namei:EFAULT		Bad address
3252 *		namei:ENAMETOOLONG	Filename too long
3253 *		namei:ENOENT		No such file or directory
3254 *		namei:ELOOP		Too many levels of symbolic links
3255 *		namei:EBADF		Bad file descriptor
3256 *		namei:ENOTDIR		Not a directory
3257 *		namei:???
3258 *		access1:
3259 *
3260 * Implicit returns:
3261 *		uap->results		Array contents modified
3262 *
3263 * Notes:	The uap->entries are structured as an arbitrary length array
3264 *		of accessx descriptors, followed by one or more NULL terniated
3265 *		strings
3266 *
3267 *			struct accessx_descriptor[0]
3268 *			...
3269 *			struct accessx_descriptor[n]
3270 *			char name_data[0];
3271 *
3272 *		We determine the entry count by walking the buffer containing
3273 *		the uap->entries argument descriptor.  For each descrptor we
3274 *		see, the valid values for the offset ad_name_offset will be
3275 *		in the byte range:
3276 *
3277 *			[ uap->entries + sizeof(struct accessx_descriptor) ]
3278 *						to
3279 *				[ uap->entries + uap->size - 2 ]
3280 *
3281 *		since we must have at least one string, and the string must
3282 *		be at least one character plus the NUL terminator in length.
3283 *
3284 * XXX:		Need to support the check-as uid argument
3285 */
3286int
3287access_extended(__unused proc_t p, struct access_extended_args *uap, __unused register_t *retval)
3288{
3289	struct accessx_descriptor *input = NULL;
3290	errno_t *result = NULL;
3291	errno_t error = 0;
3292	int wantdelete = 0;
3293	unsigned int desc_max, desc_actual, i, j;
3294	struct vfs_context context;
3295	struct nameidata nd;
3296 	int niopts;
3297	vnode_t vp = NULL;
3298	vnode_t dvp = NULL;
3299#define ACCESSX_MAX_DESCR_ON_STACK 10
3300	struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
3301
3302	context.vc_ucred = NULL;
3303
3304	/*
3305	 * Validate parameters; if valid, copy the descriptor array and string
3306	 * arguments into local memory.  Before proceeding, the following
3307	 * conditions must have been met:
3308	 *
3309	 * o	The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
3310	 * o	There must be sufficient room in the request for at least one
3311	 *	descriptor and a one yte NUL terminated string.
3312	 * o	The allocation of local storage must not fail.
3313	 */
3314	if (uap->size > ACCESSX_MAX_TABLESIZE)
3315		return(ENOMEM);
3316	if (uap->size < (sizeof(struct accessx_descriptor) + 2))
3317		return(EINVAL);
3318	if (uap->size <= sizeof (stack_input)) {
3319		input = stack_input;
3320	} else {
3321	MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
3322	if (input == NULL) {
3323		error = ENOMEM;
3324		goto out;
3325	}
3326	}
3327	error = copyin(uap->entries, input, uap->size);
3328	if (error)
3329		goto out;
3330
3331	/*
3332	 * Force NUL termination of the copyin buffer to avoid nami() running
3333	 * off the end.  If the caller passes us bogus data, they may get a
3334	 * bogus result.
3335	 */
3336	((char *)input)[uap->size - 1] = 0;
3337
3338	/*
3339	 * Access is defined as checking against the process' real identity,
3340 	 * even if operations are checking the effective identity.  This
3341	 * requires that we use a local vfs context.
3342 	 */
3343	context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3344	context.vc_thread = current_thread();
3345
3346	/*
3347	 * Find out how many entries we have, so we can allocate the result
3348	 * array by walking the list and adjusting the count downward by the
3349	 * earliest string offset we see.
3350	 */
3351	desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
3352	desc_actual = desc_max;
3353	for (i = 0; i < desc_actual; i++) {
3354		/*
3355		 * Take the offset to the name string for this entry and
3356		 * convert to an input array index, which would be one off
3357		 * the end of the array if this entry was the lowest-addressed
3358		 * name string.
3359		 */
3360		j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
3361
3362		/*
3363		 * An offset greater than the max allowable offset is an error.
3364		 * It is also an error for any valid entry to point
3365		 * to a location prior to the end of the current entry, if
3366		 * it's not a reference to the string of the previous entry.
3367		 */
3368		if (j > desc_max || (j != 0 && j <= i)) {
3369			error = EINVAL;
3370			goto out;
3371		}
3372
3373		/*
3374		 * An offset of 0 means use the previous descriptor's offset;
3375		 * this is used to chain multiple requests for the same file
3376		 * to avoid multiple lookups.
3377		 */
3378		if (j == 0) {
3379			/* This is not valid for the first entry */
3380			if (i == 0) {
3381				error = EINVAL;
3382				goto out;
3383			}
3384			continue;
3385		}
3386
3387		/*
3388		 * If the offset of the string for this descriptor is before
3389		 * what we believe is the current actual last descriptor,
3390		 * then we need to adjust our estimate downward; this permits
3391		 * the string table following the last descriptor to be out
3392		 * of order relative to the descriptor list.
3393		 */
3394		if (j < desc_actual)
3395			desc_actual = j;
3396	}
3397
3398	/*
3399	 * We limit the actual number of descriptors we are willing to process
3400	 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being
3401	 * requested does not exceed this limit,
3402	 */
3403	if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
3404		error = ENOMEM;
3405		goto out;
3406	}
3407	MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
3408	if (result == NULL) {
3409		error = ENOMEM;
3410		goto out;
3411	}
3412
3413	/*
3414	 * Do the work by iterating over the descriptor entries we know to
3415	 * at least appear to contain valid data.
3416	 */
3417	error = 0;
3418	for (i = 0; i < desc_actual; i++) {
3419		/*
3420		 * If the ad_name_offset is 0, then we use the previous
3421		 * results to make the check; otherwise, we are looking up
3422		 * a new file name.
3423		 */
3424		if (input[i].ad_name_offset != 0) {
3425			/* discard old vnodes */
3426			if (vp) {
3427				vnode_put(vp);
3428				vp = NULL;
3429			}
3430			if (dvp) {
3431				vnode_put(dvp);
3432				dvp = NULL;
3433			}
3434
3435			/*
3436			 * Scan forward in the descriptor list to see if we
3437			 * need the parent vnode.  We will need it if we are
3438			 * deleting, since we must have rights  to remove
3439			 * entries in the parent directory, as well as the
3440			 * rights to delete the object itself.
3441			 */
3442			wantdelete = input[i].ad_flags & _DELETE_OK;
3443			for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
3444				if (input[j].ad_flags & _DELETE_OK)
3445					wantdelete = 1;
3446
3447			niopts = FOLLOW | AUDITVNPATH1;
3448
3449			/* need parent for vnode_authorize for deletion test */
3450			if (wantdelete)
3451				niopts |= WANTPARENT;
3452
3453			/* do the lookup */
3454			NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), &context);
3455			error = namei(&nd);
3456			if (!error) {
3457				vp = nd.ni_vp;
3458				if (wantdelete)
3459					dvp = nd.ni_dvp;
3460			}
3461			nameidone(&nd);
3462		}
3463
3464		/*
3465		 * Handle lookup errors.
3466		 */
3467		switch(error) {
3468		case ENOENT:
3469		case EACCES:
3470		case EPERM:
3471		case ENOTDIR:
3472			result[i] = error;
3473			break;
3474		case 0:
3475			/* run this access check */
3476			result[i] = access1(vp, dvp, input[i].ad_flags, &context);
3477			break;
3478		default:
3479			/* fatal lookup error */
3480
3481			goto out;
3482		}
3483	}
3484
3485	/* copy out results */
3486	error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
3487
3488out:
3489	if (input && input != stack_input)
3490		FREE(input, M_TEMP);
3491	if (result)
3492		FREE(result, M_TEMP);
3493	if (vp)
3494		vnode_put(vp);
3495	if (dvp)
3496		vnode_put(dvp);
3497	if (IS_VALID_CRED(context.vc_ucred))
3498 		kauth_cred_unref(&context.vc_ucred);
3499	return(error);
3500}
3501
3502
3503/*
3504 * Returns:	0			Success
3505 *		namei:EFAULT		Bad address
3506 *		namei:ENAMETOOLONG	Filename too long
3507 *		namei:ENOENT		No such file or directory
3508 *		namei:ELOOP		Too many levels of symbolic links
3509 *		namei:EBADF		Bad file descriptor
3510 *		namei:ENOTDIR		Not a directory
3511 *		namei:???
3512 *		access1:
3513 */
3514int
3515access(__unused proc_t p, struct access_args *uap, __unused register_t *retval)
3516{
3517	int error;
3518	struct nameidata nd;
3519 	int niopts;
3520	struct vfs_context context;
3521
3522#if NAMEDRSRCFORK
3523	int is_namedstream = 0;
3524#endif
3525
3526 	/*
3527 	 * Access is defined as checking against the process'
3528 	 * real identity, even if operations are checking the
3529 	 * effective identity.  So we need to tweak the credential
3530 	 * in the context.
3531 	 */
3532	context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3533	context.vc_thread = current_thread();
3534
3535	niopts = FOLLOW | AUDITVNPATH1;
3536 	/* need parent for vnode_authorize for deletion test */
3537 	if (uap->flags & _DELETE_OK)
3538 		niopts |= WANTPARENT;
3539 	NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context);
3540
3541#if NAMEDRSRCFORK
3542	/* access(F_OK) calls are allowed for resource forks. */
3543	if (uap->flags == F_OK)
3544		nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3545#endif
3546 	error = namei(&nd);
3547 	if (error)
3548 		goto out;
3549
3550#if NAMEDRSRCFORK
3551	/* Grab reference on the shadow stream file vnode to
3552	 * force an inactive on release which will mark it for
3553	 * recycle
3554	 */
3555	if (vnode_isnamedstream(nd.ni_vp) &&
3556			(nd.ni_vp->v_parent != NULLVP) &&
3557			(vnode_isshadow(nd.ni_vp))) {
3558		is_namedstream = 1;
3559		vnode_ref(nd.ni_vp);
3560	}
3561#endif
3562
3563	error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
3564
3565#if NAMEDRSRCFORK
3566	if (is_namedstream) {
3567		vnode_rele(nd.ni_vp);
3568	}
3569#endif
3570
3571 	vnode_put(nd.ni_vp);
3572 	if (uap->flags & _DELETE_OK)
3573 		vnode_put(nd.ni_dvp);
3574  	nameidone(&nd);
3575
3576out:
3577 	kauth_cred_unref(&context.vc_ucred);
3578 	return(error);
3579}
3580
3581
3582/*
3583 * Returns:	0			Success
3584 *		EFAULT
3585 *	copyout:EFAULT
3586 *	namei:???
3587 *	vn_stat:???
3588 */
3589static int
3590stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3591{
3592	struct stat sb;
3593	struct stat64 sb64;
3594	struct user_stat user_sb;
3595	struct user_stat64 user_sb64;
3596	caddr_t sbp;
3597	int error, my_size;
3598	kauth_filesec_t fsec;
3599	size_t xsecurity_bufsize;
3600	void * statptr;
3601
3602#if NAMEDRSRCFORK
3603	int is_namedstream = 0;
3604	/* stat calls are allowed for resource forks. */
3605	ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3606#endif
3607	error = namei(ndp);
3608	if (error)
3609		return (error);
3610	fsec = KAUTH_FILESEC_NONE;
3611	if (isstat64 != 0)
3612		statptr	 = (void *)&sb64;
3613	else
3614		statptr	 = (void *)&sb;
3615
3616#if NAMEDRSRCFORK
3617	/* Grab reference on the shadow stream file vnode to
3618	 * force an inactive on release which will mark it for
3619	 * recycle.
3620	 */
3621	if (vnode_isnamedstream(ndp->ni_vp) &&
3622			(ndp->ni_vp->v_parent != NULLVP) &&
3623			(vnode_isshadow(ndp->ni_vp))) {
3624		is_namedstream = 1;
3625		vnode_ref (ndp->ni_vp);
3626	}
3627#endif
3628
3629	error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
3630
3631#if NAMEDRSRCFORK
3632	if (is_namedstream) {
3633		vnode_rele (ndp->ni_vp);
3634	}
3635#endif
3636
3637	vnode_put(ndp->ni_vp);
3638	nameidone(ndp);
3639
3640	if (error)
3641		return (error);
3642	/* Zap spare fields */
3643	if (isstat64 != 0) {
3644		sb64.st_lspare = 0;
3645		sb64.st_qspare[0] = 0LL;
3646		sb64.st_qspare[1] = 0LL;
3647		if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3648			munge_stat64(&sb64, &user_sb64);
3649			my_size = sizeof(user_sb64);
3650			sbp = (caddr_t)&user_sb64;
3651		} else {
3652			my_size = sizeof(sb64);
3653			sbp = (caddr_t)&sb64;
3654		}
3655		/*
3656		 * Check if we raced (post lookup) against the last unlink of a file.
3657		 */
3658		if ((sb64.st_nlink == 0) && S_ISREG(sb64.st_mode)) {
3659			sb64.st_nlink = 1;
3660		}
3661	} else {
3662		sb.st_lspare = 0;
3663		sb.st_qspare[0] = 0LL;
3664		sb.st_qspare[1] = 0LL;
3665		if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3666			munge_stat(&sb, &user_sb);
3667			my_size = sizeof(user_sb);
3668			sbp = (caddr_t)&user_sb;
3669		} else {
3670			my_size = sizeof(sb);
3671			sbp = (caddr_t)&sb;
3672		}
3673
3674		/*
3675		 * Check if we raced (post lookup) against the last unlink of a file.
3676		 */
3677		if ((sb.st_nlink == 0) && S_ISREG(sb.st_mode)) {
3678			sb.st_nlink = 1;
3679		}
3680	}
3681	if ((error = copyout(sbp, ub, my_size)) != 0)
3682		goto out;
3683
3684	/* caller wants extended security information? */
3685	if (xsecurity != USER_ADDR_NULL) {
3686
3687		/* did we get any? */
3688		if (fsec == KAUTH_FILESEC_NONE) {
3689			if (susize(xsecurity_size, 0) != 0) {
3690				error = EFAULT;
3691				goto out;
3692			}
3693		} else {
3694			/* find the user buffer size */
3695			xsecurity_bufsize = fusize(xsecurity_size);
3696
3697			/* copy out the actual data size */
3698			if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3699				error = EFAULT;
3700				goto out;
3701			}
3702
3703			/* if the caller supplied enough room, copy out to it */
3704			if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
3705				error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3706		}
3707	}
3708out:
3709	if (fsec != KAUTH_FILESEC_NONE)
3710		kauth_filesec_free(fsec);
3711	return (error);
3712}
3713
3714/*
3715 * Get file status; this version follows links.
3716 *
3717 * Returns:	0			Success
3718 *	stat2:???			[see stat2() in this file]
3719 */
3720static int
3721stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3722{
3723	struct nameidata nd;
3724	vfs_context_t ctx = vfs_context_current();
3725
3726	NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
3727	    UIO_USERSPACE, path, ctx);
3728	return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
3729}
3730
3731int
3732stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused register_t *retval)
3733{
3734	return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
3735}
3736
3737/*
3738 * Returns:	0			Success
3739 *	stat1:???			[see stat1() in this file]
3740 */
3741int
3742stat(__unused proc_t p, struct stat_args *uap, __unused register_t *retval)
3743{
3744	return(stat1(uap->path, uap->ub, 0, 0, 0));
3745}
3746
3747int
3748stat64(__unused proc_t p, struct stat64_args *uap, __unused register_t *retval)
3749{
3750	return(stat1(uap->path, uap->ub, 0, 0, 1));
3751}
3752
3753int
3754stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused register_t *retval)
3755{
3756	return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
3757}
3758/*
3759 * Get file status; this version does not follow links.
3760 */
3761static int
3762lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3763{
3764	struct nameidata nd;
3765	vfs_context_t ctx = vfs_context_current();
3766
3767	NDINIT(&nd, LOOKUP, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
3768	    UIO_USERSPACE, path, ctx);
3769
3770	return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
3771}
3772
3773int
3774lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused register_t *retval)
3775{
3776	return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
3777}
3778
3779int
3780lstat(__unused proc_t p, struct lstat_args *uap, __unused register_t *retval)
3781{
3782	return(lstat1(uap->path, uap->ub, 0, 0, 0));
3783}
3784int
3785lstat64(__unused proc_t p, struct lstat64_args *uap, __unused register_t *retval)
3786{
3787	return(lstat1(uap->path, uap->ub, 0, 0, 1));
3788}
3789
3790int
3791lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused register_t *retval)
3792{
3793	return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
3794}
3795
3796/*
3797 * Get configurable pathname variables.
3798 *
3799 * Returns:	0			Success
3800 *	namei:???
3801 *	vn_pathconf:???
3802 *
3803 * Notes:	Global implementation  constants are intended to be
3804 *		implemented in this function directly; all other constants
3805 *		are per-FS implementation, and therefore must be handled in
3806 *		each respective FS, instead.
3807 *
3808 * XXX We implement some things globally right now that should actually be
3809 * XXX per-FS; we will need to deal with this at some point.
3810 */
3811/* ARGSUSED */
3812int
3813pathconf(__unused proc_t p, struct pathconf_args *uap, register_t *retval)
3814{
3815	int error;
3816	struct nameidata nd;
3817	vfs_context_t ctx = vfs_context_current();
3818
3819	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
3820		UIO_USERSPACE, uap->path, ctx);
3821	error = namei(&nd);
3822	if (error)
3823		return (error);
3824
3825	error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
3826
3827	vnode_put(nd.ni_vp);
3828	nameidone(&nd);
3829	return (error);
3830}
3831
3832/*
3833 * Return target name of a symbolic link.
3834 */
3835/* ARGSUSED */
3836int
3837readlink(proc_t p, struct readlink_args *uap, register_t *retval)
3838{
3839	vnode_t vp;
3840	uio_t auio;
3841	int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3842	int error;
3843	struct nameidata nd;
3844	vfs_context_t ctx = vfs_context_current();
3845	char uio_buf[ UIO_SIZEOF(1) ];
3846
3847	NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1,
3848		UIO_USERSPACE, uap->path, ctx);
3849	error = namei(&nd);
3850	if (error)
3851		return (error);
3852	vp = nd.ni_vp;
3853
3854	nameidone(&nd);
3855
3856	auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
3857								  &uio_buf[0], sizeof(uio_buf));
3858	uio_addiov(auio, uap->buf, uap->count);
3859	if (vp->v_type != VLNK)
3860		error = EINVAL;
3861	else {
3862#if CONFIG_MACF
3863		error = mac_vnode_check_readlink(ctx,
3864		    vp);
3865#endif
3866		if (error == 0)
3867			error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
3868		if (error == 0)
3869			error = VNOP_READLINK(vp, auio, ctx);
3870	}
3871	vnode_put(vp);
3872	// LP64todo - fix this
3873	*retval = uap->count - (int)uio_resid(auio);
3874	return (error);
3875}
3876
3877/*
3878 * Change file flags.
3879 */
3880static int
3881chflags1(vnode_t vp, int flags, vfs_context_t ctx)
3882{
3883	struct vnode_attr va;
3884 	kauth_action_t action;
3885	int error;
3886
3887	VATTR_INIT(&va);
3888	VATTR_SET(&va, va_flags, flags);
3889
3890#if CONFIG_MACF
3891	error = mac_vnode_check_setflags(ctx, vp, flags);
3892	if (error)
3893		goto out;
3894#endif
3895
3896	/* request authorisation, disregard immutability */
3897 	if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
3898		goto out;
3899	/*
3900	 * Request that the auth layer disregard those file flags it's allowed to when
3901	 * authorizing this operation; we need to do this in order to be able to
3902	 * clear immutable flags.
3903	 */
3904	if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
3905		goto out;
3906	error = vnode_setattr(vp, &va, ctx);
3907
3908	if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
3909		error = ENOTSUP;
3910	}
3911out:
3912	vnode_put(vp);
3913	return(error);
3914}
3915
3916/*
3917 * Change flags of a file given a path name.
3918 */
3919/* ARGSUSED */
3920int
3921chflags(__unused proc_t p, struct chflags_args *uap, __unused register_t *retval)
3922{
3923	vnode_t vp;
3924	vfs_context_t ctx = vfs_context_current();
3925	int error;
3926	struct nameidata nd;
3927
3928	AUDIT_ARG(fflags, uap->flags);
3929	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
3930		UIO_USERSPACE, uap->path, ctx);
3931	error = namei(&nd);
3932	if (error)
3933		return (error);
3934	vp = nd.ni_vp;
3935	nameidone(&nd);
3936
3937	error = chflags1(vp, uap->flags, ctx);
3938
3939	return(error);
3940}
3941
3942/*
3943 * Change flags of a file given a file descriptor.
3944 */
3945/* ARGSUSED */
3946int
3947fchflags(__unused proc_t p, struct fchflags_args *uap, __unused register_t *retval)
3948{
3949	vnode_t vp;
3950	int error;
3951
3952	AUDIT_ARG(fd, uap->fd);
3953	AUDIT_ARG(fflags, uap->flags);
3954	if ( (error = file_vnode(uap->fd, &vp)) )
3955		return (error);
3956
3957	if ((error = vnode_getwithref(vp))) {
3958		file_drop(uap->fd);
3959		return(error);
3960	}
3961
3962	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3963
3964	error = chflags1(vp, uap->flags, vfs_context_current());
3965
3966	file_drop(uap->fd);
3967	return (error);
3968}
3969
3970/*
3971 * Change security information on a filesystem object.
3972 *
3973 * Returns:	0			Success
3974 *		EPERM			Operation not permitted
3975 *		vnode_authattr:???	[anything vnode_authattr can return]
3976 *		vnode_authorize:???	[anything vnode_authorize can return]
3977 *		vnode_setattr:???	[anything vnode_setattr can return]
3978 *
3979 * Notes:	If vnode_authattr or vnode_authorize return EACCES, it will be
3980 *		translated to EPERM before being returned.
3981 */
3982static int
3983chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
3984{
3985	kauth_action_t action;
3986	int error;
3987
3988	AUDIT_ARG(mode, (mode_t)vap->va_mode);
3989#warning XXX audit new args
3990
3991#if NAMEDSTREAMS
3992	/* chmod calls are not allowed for resource forks. */
3993	if (vp->v_flag & VISNAMEDSTREAM) {
3994		return (EPERM);
3995	}
3996#endif
3997
3998#if CONFIG_MACF
3999	error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode);
4000	if (error)
4001		return (error);
4002#endif
4003
4004 	/* make sure that the caller is allowed to set this security information */
4005	if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
4006	    ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4007		if (error == EACCES)
4008			error = EPERM;
4009		return(error);
4010	}
4011
4012	error = vnode_setattr(vp, vap, ctx);
4013
4014	return (error);
4015}
4016
4017
4018/*
4019 * Change mode of a file given path name.
4020 *
4021 * Returns:	0			Success
4022 *		namei:???		[anything namei can return]
4023 *		chmod2:???		[anything chmod2 can return]
4024 */
4025static int
4026chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
4027{
4028	struct nameidata nd;
4029	int error;
4030
4031	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4032		UIO_USERSPACE, path, ctx);
4033	if ((error = namei(&nd)))
4034		return (error);
4035	error = chmod2(ctx, nd.ni_vp, vap);
4036	vnode_put(nd.ni_vp);
4037	nameidone(&nd);
4038	return(error);
4039}
4040
4041/*
4042 * A chmod system call using an extended argument list compared to the regular
4043 * system call 'mkfifo'.
4044 *
4045 * Parameters:	p			Process requesting the open
4046 *		uap			User argument descriptor (see below)
4047 *		retval			(ignored)
4048 *
4049 * Indirect:	uap->path		Path to object (same as 'chmod')
4050 *		uap->uid		UID to set
4051 *		uap->gid		GID to set
4052 *		uap->mode		File mode to set (same as 'chmod')
4053 *		uap->xsecurity		ACL to set (or delete)
4054 *
4055 * Returns:	0			Success
4056 *		!0			errno value
4057 *
4058 * Notes:	The kauth_filesec_t in 'va', if any, is in host byte order.
4059 *
4060 * XXX:		We should enummerate the possible errno values here, and where
4061 *		in the code they originated.
4062 */
4063int
4064chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused register_t *retval)
4065{
4066	int error;
4067	struct vnode_attr va;
4068	kauth_filesec_t xsecdst;
4069
4070	VATTR_INIT(&va);
4071	if (uap->mode != -1)
4072		VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4073	if (uap->uid != KAUTH_UID_NONE)
4074		VATTR_SET(&va, va_uid, uap->uid);
4075	if (uap->gid != KAUTH_GID_NONE)
4076		VATTR_SET(&va, va_gid, uap->gid);
4077
4078	xsecdst = NULL;
4079	switch(uap->xsecurity) {
4080		/* explicit remove request */
4081	case CAST_USER_ADDR_T((void *)1):	/* _FILESEC_REMOVE_ACL */
4082		VATTR_SET(&va, va_acl, NULL);
4083		break;
4084		/* not being set */
4085	case USER_ADDR_NULL:
4086		break;
4087	default:
4088		if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4089			return(error);
4090		VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4091		KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
4092	}
4093
4094	error = chmod1(vfs_context_current(), uap->path, &va);
4095
4096	if (xsecdst != NULL)
4097		kauth_filesec_free(xsecdst);
4098	return(error);
4099}
4100
4101/*
4102 * Returns:	0			Success
4103 *		chmod1:???		[anything chmod1 can return]
4104 */
4105int
4106chmod(__unused proc_t p, struct chmod_args *uap, __unused register_t *retval)
4107{
4108	struct vnode_attr va;
4109
4110	VATTR_INIT(&va);
4111	VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4112
4113	return(chmod1(vfs_context_current(), uap->path, &va));
4114}
4115
4116/*
4117 * Change mode of a file given a file descriptor.
4118 */
4119static int
4120fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
4121{
4122	vnode_t vp;
4123	int error;
4124
4125	AUDIT_ARG(fd, fd);
4126
4127	if ((error = file_vnode(fd, &vp)) != 0)
4128		return (error);
4129	if ((error = vnode_getwithref(vp)) != 0) {
4130		file_drop(fd);
4131		return(error);
4132	}
4133	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4134
4135	error = chmod2(vfs_context_current(), vp, vap);
4136	(void)vnode_put(vp);
4137	file_drop(fd);
4138
4139	return (error);
4140}
4141
4142int
4143fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused register_t *retval)
4144{
4145	int error;
4146	struct vnode_attr va;
4147	kauth_filesec_t xsecdst;
4148
4149	VATTR_INIT(&va);
4150	if (uap->mode != -1)
4151		VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4152	if (uap->uid != KAUTH_UID_NONE)
4153		VATTR_SET(&va, va_uid, uap->uid);
4154	if (uap->gid != KAUTH_GID_NONE)
4155		VATTR_SET(&va, va_gid, uap->gid);
4156
4157	xsecdst = NULL;
4158	switch(uap->xsecurity) {
4159	case USER_ADDR_NULL:
4160		VATTR_SET(&va, va_acl, NULL);
4161		break;
4162	case CAST_USER_ADDR_T(-1):
4163		break;
4164	default:
4165		if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4166			return(error);
4167		VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4168	}
4169
4170	error = fchmod1(p, uap->fd, &va);
4171
4172
4173	switch(uap->xsecurity) {
4174	case USER_ADDR_NULL:
4175	case CAST_USER_ADDR_T(-1):
4176		break;
4177	default:
4178		if (xsecdst != NULL)
4179			kauth_filesec_free(xsecdst);
4180	}
4181	return(error);
4182}
4183
4184int
4185fchmod(proc_t p, struct fchmod_args *uap, __unused register_t *retval)
4186{
4187	struct vnode_attr va;
4188
4189	VATTR_INIT(&va);
4190	VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4191
4192	return(fchmod1(p, uap->fd, &va));
4193}
4194
4195
4196/*
4197 * Set ownership given a path name.
4198 */
4199/* ARGSUSED */
4200static int
4201chown1(vfs_context_t ctx, struct chown_args *uap, __unused register_t *retval, int follow)
4202{
4203	vnode_t vp;
4204	struct vnode_attr va;
4205	int error;
4206	struct nameidata nd;
4207	kauth_action_t action;
4208
4209	AUDIT_ARG(owner, uap->uid, uap->gid);
4210
4211	NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
4212		UIO_USERSPACE, uap->path, ctx);
4213	error = namei(&nd);
4214	if (error)
4215		return (error);
4216	vp = nd.ni_vp;
4217
4218	nameidone(&nd);
4219
4220	VATTR_INIT(&va);
4221	if (uap->uid != VNOVAL)
4222		VATTR_SET(&va, va_uid, uap->uid);
4223	if (uap->gid != VNOVAL)
4224		VATTR_SET(&va, va_gid, uap->gid);
4225
4226#if CONFIG_MACF
4227	error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4228	if (error)
4229		goto out;
4230#endif
4231
4232	/* preflight and authorize attribute changes */
4233	if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4234		goto out;
4235	if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4236		goto out;
4237	error = vnode_setattr(vp, &va, ctx);
4238
4239out:
4240	/*
4241	 * EACCES is only allowed from namei(); permissions failure should
4242	 * return EPERM, so we need to translate the error code.
4243	 */
4244	if (error == EACCES)
4245		error = EPERM;
4246
4247	vnode_put(vp);
4248	return (error);
4249}
4250
4251int
4252chown(__unused proc_t p, struct chown_args *uap, register_t *retval)
4253{
4254	return chown1(vfs_context_current(), uap, retval, 1);
4255}
4256
4257int
4258lchown(__unused proc_t p, struct lchown_args *uap, register_t *retval)
4259{
4260	/* Argument list identical, but machine generated; cast for chown1() */
4261	return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
4262}
4263
4264/*
4265 * Set ownership given a file descriptor.
4266 */
4267/* ARGSUSED */
4268int
4269fchown(__unused proc_t p, struct fchown_args *uap, __unused register_t *retval)
4270{
4271	struct vnode_attr va;
4272	vfs_context_t ctx = vfs_context_current();
4273	vnode_t vp;
4274	int error;
4275	kauth_action_t action;
4276
4277	AUDIT_ARG(owner, uap->uid, uap->gid);
4278	AUDIT_ARG(fd, uap->fd);
4279
4280	if ( (error = file_vnode(uap->fd, &vp)) )
4281		return (error);
4282
4283	if ( (error = vnode_getwithref(vp)) ) {
4284		file_drop(uap->fd);
4285		return(error);
4286	}
4287	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4288
4289	VATTR_INIT(&va);
4290	if (uap->uid != VNOVAL)
4291		VATTR_SET(&va, va_uid, uap->uid);
4292	if (uap->gid != VNOVAL)
4293		VATTR_SET(&va, va_gid, uap->gid);
4294
4295#if NAMEDSTREAMS
4296	/* chown calls are not allowed for resource forks. */
4297	if (vp->v_flag & VISNAMEDSTREAM) {
4298		error = EPERM;
4299		goto out;
4300	}
4301#endif
4302
4303#if CONFIG_MACF
4304	error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4305	if (error)
4306		goto out;
4307#endif
4308
4309 	/* preflight and authorize attribute changes */
4310	if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4311		goto out;
4312	if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4313		if (error == EACCES)
4314			error = EPERM;
4315		goto out;
4316	}
4317	error = vnode_setattr(vp, &va, ctx);
4318
4319out:
4320	(void)vnode_put(vp);
4321	file_drop(uap->fd);
4322	return (error);
4323}
4324
4325static int
4326getutimes(user_addr_t usrtvp, struct timespec *tsp)
4327{
4328	struct user_timeval tv[2];
4329	int error;
4330
4331	if (usrtvp == USER_ADDR_NULL) {
4332		struct timeval old_tv;
4333		/* XXX Y2038 bug because of microtime argument */
4334		microtime(&old_tv);
4335		TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
4336		tsp[1] = tsp[0];
4337	} else {
4338		if (IS_64BIT_PROCESS(current_proc())) {
4339			error = copyin(usrtvp, (void *)tv, sizeof(tv));
4340		} else {
4341			struct timeval old_tv[2];
4342			error = copyin(usrtvp, (void *)old_tv, sizeof(old_tv));
4343			tv[0].tv_sec = old_tv[0].tv_sec;
4344			tv[0].tv_usec = old_tv[0].tv_usec;
4345			tv[1].tv_sec = old_tv[1].tv_sec;
4346			tv[1].tv_usec = old_tv[1].tv_usec;
4347		}
4348		if (error)
4349			return (error);
4350		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
4351		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
4352	}
4353	return 0;
4354}
4355
4356static int
4357setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
4358	int nullflag)
4359{
4360	int error;
4361	struct vnode_attr va;
4362	kauth_action_t action;
4363
4364	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4365
4366	VATTR_INIT(&va);
4367	VATTR_SET(&va, va_access_time, ts[0]);
4368	VATTR_SET(&va, va_modify_time, ts[1]);
4369	if (nullflag)
4370		va.va_vaflags |= VA_UTIMES_NULL;
4371
4372#if NAMEDSTREAMS
4373	/* utimes calls are not allowed for resource forks. */
4374	if (vp->v_flag & VISNAMEDSTREAM) {
4375		error = EPERM;
4376		goto out;
4377	}
4378#endif
4379
4380#if CONFIG_MACF
4381	error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
4382	if (error)
4383		goto out;
4384#endif
4385	if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
4386		if (!nullflag && error == EACCES)
4387			error = EPERM;
4388		goto out;
4389	}
4390
4391	/* since we may not need to auth anything, check here */
4392	if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4393		if (!nullflag && error == EACCES)
4394			error = EPERM;
4395		goto out;
4396	}
4397	error = vnode_setattr(vp, &va, ctx);
4398
4399out:
4400	return error;
4401}
4402
4403/*
4404 * Set the access and modification times of a file.
4405 */
4406/* ARGSUSED */
4407int
4408utimes(__unused proc_t p, struct utimes_args *uap, __unused register_t *retval)
4409{
4410	struct timespec ts[2];
4411	user_addr_t usrtvp;
4412	int error;
4413	struct nameidata nd;
4414	vfs_context_t ctx = vfs_context_current();
4415
4416	/*
4417	 * AUDIT: Needed to change the order of operations to do the
4418	 * name lookup first because auditing wants the path.
4419	 */
4420	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4421		UIO_USERSPACE, uap->path, ctx);
4422	error = namei(&nd);
4423	if (error)
4424		return (error);
4425	nameidone(&nd);
4426
4427	/*
4428	 * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch
4429	 * the current time instead.
4430	 */
4431	usrtvp = uap->tptr;
4432	if ((error = getutimes(usrtvp, ts)) != 0)
4433		goto out;
4434
4435	error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
4436
4437out:
4438	vnode_put(nd.ni_vp);
4439	return (error);
4440}
4441
4442/*
4443 * Set the access and modification times of a file.
4444 */
4445/* ARGSUSED */
4446int
4447futimes(__unused proc_t p, struct futimes_args *uap, __unused register_t *retval)
4448{
4449	struct timespec ts[2];
4450	vnode_t vp;
4451	user_addr_t usrtvp;
4452	int error;
4453
4454	AUDIT_ARG(fd, uap->fd);
4455	usrtvp = uap->tptr;
4456	if ((error = getutimes(usrtvp, ts)) != 0)
4457		return (error);
4458	if ((error = file_vnode(uap->fd, &vp)) != 0)
4459		return (error);
4460	if((error = vnode_getwithref(vp))) {
4461		file_drop(uap->fd);
4462		return(error);
4463	}
4464
4465	error =  setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
4466	vnode_put(vp);
4467	file_drop(uap->fd);
4468	return(error);
4469}
4470
4471/*
4472 * Truncate a file given its path name.
4473 */
4474/* ARGSUSED */
4475int
4476truncate(__unused proc_t p, struct truncate_args *uap, __unused register_t *retval)
4477{
4478	vnode_t vp;
4479	struct vnode_attr va;
4480	vfs_context_t ctx = vfs_context_current();
4481	int error;
4482	struct nameidata nd;
4483	kauth_action_t action;
4484
4485	if (uap->length < 0)
4486		return(EINVAL);
4487	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4488		UIO_USERSPACE, uap->path, ctx);
4489	if ((error = namei(&nd)))
4490		return (error);
4491	vp = nd.ni_vp;
4492
4493	nameidone(&nd);
4494
4495	VATTR_INIT(&va);
4496	VATTR_SET(&va, va_data_size, uap->length);
4497
4498#if CONFIG_MACF
4499	error = mac_vnode_check_truncate(ctx, NOCRED, vp);
4500	if (error)
4501		goto out;
4502#endif
4503
4504	if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4505		goto out;
4506	if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4507		goto out;
4508	error = vnode_setattr(vp, &va, ctx);
4509out:
4510	vnode_put(vp);
4511	return (error);
4512}
4513
4514/*
4515 * Truncate a file given a file descriptor.
4516 */
4517/* ARGSUSED */
4518int
4519ftruncate(proc_t p, struct ftruncate_args *uap, register_t *retval)
4520{
4521	vfs_context_t ctx = vfs_context_current();
4522	struct vnode_attr va;
4523	vnode_t vp;
4524	struct fileproc *fp;
4525	int error ;
4526	int fd = uap->fd;
4527
4528	AUDIT_ARG(fd, uap->fd);
4529	if (uap->length < 0)
4530		return(EINVAL);
4531
4532	if ( (error = fp_lookup(p,fd,&fp,0)) ) {
4533		return(error);
4534	}
4535
4536	if (fp->f_fglob->fg_type == DTYPE_PSXSHM) {
4537		error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
4538		goto out;
4539	}
4540	if (fp->f_fglob->fg_type != DTYPE_VNODE)  {
4541		error = EINVAL;
4542		goto out;
4543	}
4544
4545	vp = (vnode_t)fp->f_fglob->fg_data;
4546
4547	if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
4548		AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
4549		error = EINVAL;
4550		goto out;
4551	}
4552
4553	if ((error = vnode_getwithref(vp)) != 0) {
4554		goto out;
4555	}
4556
4557	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4558
4559#if CONFIG_MACF
4560	error = mac_vnode_check_truncate(ctx,
4561	    fp->f_fglob->fg_cred, vp);
4562	if (error) {
4563		(void)vnode_put(vp);
4564		goto out;
4565	}
4566#endif
4567	VATTR_INIT(&va);
4568	VATTR_SET(&va, va_data_size, uap->length);
4569	error = vnode_setattr(vp, &va, ctx);
4570	(void)vnode_put(vp);
4571out:
4572	file_drop(fd);
4573	return (error);
4574}
4575
4576
4577/*
4578 * Sync an open file.
4579 */
4580/* ARGSUSED */
4581int
4582fsync(proc_t p, struct fsync_args *uap, register_t *retval)
4583{
4584	__pthread_testcancel(1);
4585	return(fsync_nocancel(p, (struct fsync_nocancel_args *)uap, retval));
4586}
4587
4588int
4589fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused register_t *retval)
4590{
4591	vnode_t vp;
4592	struct fileproc *fp;
4593	vfs_context_t ctx = vfs_context_current();
4594	int error;
4595
4596	if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
4597		return (error);
4598	if ( (error = vnode_getwithref(vp)) ) {
4599		file_drop(uap->fd);
4600		return(error);
4601	}
4602
4603	error = VNOP_FSYNC(vp, MNT_WAIT, ctx);
4604
4605#if NAMEDRSRCFORK
4606	/* Sync resource fork shadow file if necessary. */
4607	if ((error == 0) &&
4608	    (vp->v_flag & VISNAMEDSTREAM) &&
4609	    (vp->v_parent != NULLVP) &&
4610	    (vnode_isshadow(vp)) &&
4611	    (fp->f_flags & FP_WRITTEN)) {
4612		(void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
4613	}
4614#endif
4615
4616	(void)vnode_put(vp);
4617	file_drop(uap->fd);
4618	return (error);
4619}
4620
4621/*
4622 * Duplicate files.  Source must be a file, target must be a file or
4623 * must not exist.
4624 *
4625 * XXX Copyfile authorisation checking is woefully inadequate, and will not
4626 *     perform inheritance correctly.
4627 */
4628/* ARGSUSED */
4629int
4630copyfile(__unused proc_t p, struct copyfile_args *uap, __unused register_t *retval)
4631{
4632	vnode_t tvp, fvp, tdvp, sdvp;
4633	struct nameidata fromnd, tond;
4634	int error;
4635	vfs_context_t ctx = vfs_context_current();
4636
4637	/* Check that the flags are valid. */
4638
4639	if (uap->flags & ~CPF_MASK) {
4640		return(EINVAL);
4641	}
4642
4643	NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1,
4644		UIO_USERSPACE, uap->from, ctx);
4645	if ((error = namei(&fromnd)))
4646		return (error);
4647	fvp = fromnd.ni_vp;
4648
4649	NDINIT(&tond, CREATE,  LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
4650	    UIO_USERSPACE, uap->to, ctx);
4651	if ((error = namei(&tond))) {
4652		goto out1;
4653	}
4654	tdvp = tond.ni_dvp;
4655	tvp = tond.ni_vp;
4656
4657	if (tvp != NULL) {
4658		if (!(uap->flags & CPF_OVERWRITE)) {
4659			error = EEXIST;
4660			goto out;
4661		}
4662	}
4663	if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
4664		error = EISDIR;
4665		goto out;
4666	}
4667
4668	if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4669		goto out;
4670
4671	if (fvp == tdvp)
4672		error = EINVAL;
4673	/*
4674	 * If source is the same as the destination (that is the
4675	 * same inode number) then there is nothing to do.
4676	 * (fixed to have POSIX semantics - CSM 3/2/98)
4677	 */
4678	if (fvp == tvp)
4679		error = -1;
4680	if (!error)
4681	        error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
4682out:
4683	sdvp = tond.ni_startdir;
4684	/*
4685	 * nameidone has to happen before we vnode_put(tdvp)
4686	 * since it may need to release the fs_nodelock on the tdvp
4687	 */
4688	nameidone(&tond);
4689
4690	if (tvp)
4691		vnode_put(tvp);
4692	vnode_put(tdvp);
4693	vnode_put(sdvp);
4694out1:
4695	vnode_put(fvp);
4696
4697	if (fromnd.ni_startdir)
4698	        vnode_put(fromnd.ni_startdir);
4699	nameidone(&fromnd);
4700
4701	if (error == -1)
4702		return (0);
4703	return (error);
4704}
4705
4706
4707/*
4708 * Rename files.  Source and destination must either both be directories,
4709 * or both not be directories.  If target is a directory, it must be empty.
4710 */
4711/* ARGSUSED */
4712int
4713rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval)
4714{
4715	vnode_t tvp, tdvp;
4716	vnode_t fvp, fdvp;
4717	struct nameidata fromnd, tond;
4718	vfs_context_t ctx = vfs_context_current();
4719	int error;
4720	int do_retry;
4721	int mntrename;
4722	int need_event;
4723	const char *oname;
4724	char *from_name = NULL, *to_name = NULL;
4725	int from_len, to_len;
4726	int holding_mntlock;
4727	mount_t locked_mp = NULL;
4728	vnode_t oparent;
4729	fse_info from_finfo, to_finfo;
4730
4731	holding_mntlock = 0;
4732    do_retry = 0;
4733retry:
4734	fvp = tvp = NULL;
4735	fdvp = tdvp = NULL;
4736	mntrename = FALSE;
4737
4738	NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx);
4739
4740	if ( (error = namei(&fromnd)) )
4741	        goto out1;
4742	fdvp = fromnd.ni_dvp;
4743	fvp  = fromnd.ni_vp;
4744
4745#if CONFIG_MACF
4746	error = mac_vnode_check_rename_from(ctx, fdvp, fvp, &fromnd.ni_cnd);
4747	if (error)
4748		goto out1;
4749#endif
4750
4751	NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK , UIO_USERSPACE, uap->to, ctx);
4752	if (fvp->v_type == VDIR)
4753		tond.ni_cnd.cn_flags |= WILLBEDIR;
4754
4755	if ( (error = namei(&tond)) ) {
4756		/*
4757		 * Translate error code for rename("dir1", "dir2/.").
4758		 */
4759	        if (error == EISDIR && fvp->v_type == VDIR)
4760		        error = EINVAL;
4761		goto out1;
4762	}
4763	tdvp = tond.ni_dvp;
4764	tvp  = tond.ni_vp;
4765
4766#if CONFIG_MACF
4767	error = mac_vnode_check_rename_to(ctx,
4768	    tdvp, tvp, fdvp == tdvp, &tond.ni_cnd);
4769	if (error)
4770		goto out1;
4771#endif
4772
4773	if (tvp != NULL) {
4774		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
4775			error = ENOTDIR;
4776			goto out1;
4777		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
4778			error = EISDIR;
4779			goto out1;
4780		}
4781	}
4782	if (fvp == tdvp) {
4783		error = EINVAL;
4784		goto out1;
4785	}
4786        /*
4787         * If the source and destination are the same (i.e. they're
4788         * links to the same vnode) and the target file system is
4789         * case sensitive, then there is nothing to do.
4790         */
4791	if (fvp == tvp) {
4792		int pathconf_val;
4793
4794		/*
4795		 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
4796		 * then assume that this file system is case sensitive.
4797		 */
4798		if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
4799		    pathconf_val != 0) {
4800			goto out1;
4801		}
4802	}
4803
4804	/*
4805	 * Authorization.
4806	 *
4807	 * If tvp is a directory and not the same as fdvp, or tdvp is not
4808	 * the same as fdvp, the node is moving between directories and we
4809	 * need rights to remove from the old and add to the new.
4810	 *
4811	 * If tvp already exists and is not a directory, we need to be
4812	 * allowed to delete it.
4813	 *
4814	 * Note that we do not inherit when renaming.
4815	 *
4816	 * XXX This needs to be revisited to implement the deferred-inherit bit
4817	 */
4818	{
4819		int moving = 0;
4820
4821		error = 0;
4822		if ((tvp != NULL) && vnode_isdir(tvp)) {
4823			if (tvp != fdvp)
4824				moving = 1;
4825		} else if (tdvp != fdvp) {
4826			moving = 1;
4827		}
4828		/*
4829		 * must have delete rights to remove the old name even in
4830		 * the simple case of fdvp == tdvp.
4831		 *
4832		 * If fvp is a directory, and we are changing it's parent,
4833		 * then we also need rights to rewrite its ".." entry as well.
4834		 */
4835		if (vnode_isdir(fvp)) {
4836			if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
4837				goto auth_exit;
4838		} else {
4839		if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
4840			goto auth_exit;
4841		}
4842		if (moving) {
4843			/* moving into tdvp or tvp, must have rights to add */
4844			if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
4845				 NULL,
4846				 vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
4847				 ctx)) != 0) {
4848                /*
4849                 * We could encounter a race where after doing the namei, tvp stops
4850                 * being valid. If so, simply re-drive the rename call from the
4851                 * top.
4852                 */
4853                 if (error == ENOENT) {
4854                     do_retry = 1;
4855                 }
4856				goto auth_exit;
4857			}
4858		} else {
4859			/* node staying in same directory, must be allowed to add new name */
4860			if ((error = vnode_authorize(fdvp, NULL,
4861				 vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4862				goto auth_exit;
4863		}
4864		/* overwriting tvp */
4865		if ((tvp != NULL) && !vnode_isdir(tvp) &&
4866		    ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
4867            /*
4868             * We could encounter a race where after doing the namei, tvp stops
4869             * being valid. If so, simply re-drive the rename call from the
4870             * top.
4871             */
4872            if (error == ENOENT) {
4873                do_retry = 1;
4874            }
4875			goto auth_exit;
4876		}
4877
4878		/* XXX more checks? */
4879
4880auth_exit:
4881		/* authorization denied */
4882		if (error != 0)
4883			goto out1;
4884	}
4885	/*
4886	 * Allow the renaming of mount points.
4887	 * - target must not exist
4888	 * - target must reside in the same directory as source
4889	 * - union mounts cannot be renamed
4890	 * - "/" cannot be renamed
4891	 */
4892	if ((fvp->v_flag & VROOT) &&
4893	    (fvp->v_type == VDIR) &&
4894	    (tvp == NULL)  &&
4895	    (fvp->v_mountedhere == NULL)  &&
4896	    (fdvp == tdvp)  &&
4897	    ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
4898	    (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
4899		vnode_t coveredvp;
4900
4901		/* switch fvp to the covered vnode */
4902		coveredvp = fvp->v_mount->mnt_vnodecovered;
4903		if ( (vnode_getwithref(coveredvp)) ) {
4904		        error = ENOENT;
4905			goto out1;
4906		}
4907		vnode_put(fvp);
4908
4909		fvp = coveredvp;
4910		mntrename = TRUE;
4911	}
4912	/*
4913	 * Check for cross-device rename.
4914	 */
4915	if ((fvp->v_mount != tdvp->v_mount) ||
4916	    (tvp && (fvp->v_mount != tvp->v_mount))) {
4917		error = EXDEV;
4918		goto out1;
4919	}
4920	/*
4921	 * Avoid renaming "." and "..".
4922	 */
4923	if (fvp->v_type == VDIR &&
4924	    ((fdvp == fvp) ||
4925	     (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') ||
4926	     ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) {
4927		error = EINVAL;
4928		goto out1;
4929	}
4930	/*
4931	 * The following edge case is caught here:
4932	 * (to cannot be a descendent of from)
4933	 *
4934	 *       o fdvp
4935	 *      /
4936	 *     /
4937	 *    o fvp
4938	 *     \
4939	 *      \
4940	 *       o tdvp
4941	 *      /
4942	 *     /
4943	 *    o tvp
4944	 */
4945	if (tdvp->v_parent == fvp) {
4946		error = EINVAL;
4947		goto out1;
4948	}
4949
4950	/*
4951	 * If source is the same as the destination (that is the
4952	 * same inode number) then there is nothing to do...
4953	 * EXCEPT if the underlying file system supports case
4954	 * insensitivity and is case preserving.  In this case
4955	 * the file system needs to handle the special case of
4956	 * getting the same vnode as target (fvp) and source (tvp).
4957	 *
4958	 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
4959	 * and _PC_CASE_PRESERVING can have this exception, and they need to
4960	 * handle the special case of getting the same vnode as target and
4961	 * source.  NOTE: Then the target is unlocked going into vnop_rename,
4962	 * so not to cause locking problems. There is a single reference on tvp.
4963	 *
4964	 * NOTE - that fvp == tvp also occurs if they are hard linked - NOTE
4965	 * that correct behaviour then is just to remove the source (link)
4966	 */
4967	if (fvp == tvp && fdvp == tdvp) {
4968		if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
4969	       	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
4970			  fromnd.ni_cnd.cn_namelen)) {
4971			goto out1;
4972		}
4973	}
4974
4975	if (holding_mntlock && fvp->v_mount != locked_mp) {
4976	        /*
4977		 * we're holding a reference and lock
4978		 * on locked_mp, but it no longer matches
4979		 * what we want to do... so drop our hold
4980		 */
4981		mount_unlock_renames(locked_mp);
4982		mount_drop(locked_mp, 0);
4983	        holding_mntlock = 0;
4984	}
4985	if (tdvp != fdvp && fvp->v_type == VDIR) {
4986	        /*
4987		 * serialize renames that re-shape
4988		 * the tree... if holding_mntlock is
4989		 * set, then we're ready to go...
4990		 * otherwise we
4991		 * first need to drop the iocounts
4992		 * we picked up, second take the
4993		 * lock to serialize the access,
4994		 * then finally start the lookup
4995		 * process over with the lock held
4996		 */
4997	        if (!holding_mntlock) {
4998		        /*
4999			 * need to grab a reference on
5000			 * the mount point before we
5001			 * drop all the iocounts... once
5002			 * the iocounts are gone, the mount
5003			 * could follow
5004			 */
5005			locked_mp = fvp->v_mount;
5006			mount_ref(locked_mp, 0);
5007
5008			/*
5009			 * nameidone has to happen before we vnode_put(tvp)
5010			 * since it may need to release the fs_nodelock on the tvp
5011			 */
5012			nameidone(&tond);
5013
5014			if (tvp)
5015			        vnode_put(tvp);
5016			vnode_put(tdvp);
5017
5018			/*
5019			 * nameidone has to happen before we vnode_put(fdvp)
5020			 * since it may need to release the fs_nodelock on the fvp
5021			 */
5022			nameidone(&fromnd);
5023
5024			vnode_put(fvp);
5025			vnode_put(fdvp);
5026
5027			mount_lock_renames(locked_mp);
5028			holding_mntlock = 1;
5029
5030			goto retry;
5031		}
5032	} else {
5033	        /*
5034		 * when we dropped the iocounts to take
5035		 * the lock, we allowed the identity of
5036		 * the various vnodes to change... if they did,
5037		 * we may no longer be dealing with a rename
5038		 * that reshapes the tree... once we're holding
5039		 * the iocounts, the vnodes can't change type
5040		 * so we're free to drop the lock at this point
5041		 * and continue on
5042		 */
5043	        if (holding_mntlock) {
5044			mount_unlock_renames(locked_mp);
5045			mount_drop(locked_mp, 0);
5046		        holding_mntlock = 0;
5047		}
5048	}
5049	// save these off so we can later verify that fvp is the same
5050	oname   = fvp->v_name;
5051	oparent = fvp->v_parent;
5052
5053#if CONFIG_FSE
5054	need_event = need_fsevent(FSE_RENAME, fvp);
5055	if (need_event) {
5056	        get_fse_info(fvp, &from_finfo, ctx);
5057
5058		if (tvp) {
5059		        get_fse_info(tvp, &to_finfo, ctx);
5060		}
5061	}
5062#else
5063	need_event = 0;
5064#endif /* CONFIG_FSE */
5065
5066	if (need_event || kauth_authorize_fileop_has_listeners()) {
5067		GET_PATH(from_name);
5068		if (from_name == NULL) {
5069			error = ENOMEM;
5070			goto out1;
5071		}
5072		from_len = MAXPATHLEN;
5073		vn_getpath(fdvp, from_name, &from_len);
5074		if ((from_len + 1 + fromnd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
5075		    if (from_len > 2) {
5076			from_name[from_len-1] = '/';
5077		    } else {
5078			from_len--;
5079		    }
5080		    strlcpy(&from_name[from_len], fromnd.ni_cnd.cn_nameptr, MAXPATHLEN-from_len);
5081		    from_len += fromnd.ni_cnd.cn_namelen + 1;
5082		    from_name[from_len] = '\0';
5083		}
5084
5085		GET_PATH(to_name);
5086		if (to_name == NULL) {
5087			error = ENOMEM;
5088			goto out1;
5089		}
5090
5091		to_len = MAXPATHLEN;
5092		vn_getpath(tdvp, to_name, &to_len);
5093		// if the path is not just "/", then append a "/"
5094		if ((to_len + 1 + tond.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
5095		    if (to_len > 2) {
5096			to_name[to_len-1] = '/';
5097		    } else {
5098			to_len--;
5099		    }
5100		    strlcpy(&to_name[to_len], tond.ni_cnd.cn_nameptr, MAXPATHLEN-to_len);
5101		    to_len += tond.ni_cnd.cn_namelen + 1;
5102		    to_name[to_len] = '\0';
5103		}
5104	}
5105
5106	error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd,
5107			    tdvp, tvp, &tond.ni_cnd,
5108			    ctx);
5109
5110	if (holding_mntlock) {
5111		/*
5112		 * we can drop our serialization
5113		 * lock now
5114		 */
5115		mount_unlock_renames(locked_mp);
5116		mount_drop(locked_mp, 0);
5117		holding_mntlock = 0;
5118	}
5119	if (error) {
5120        /*
5121         * We may encounter a race in the VNOP where the destination didn't
5122         * exist when we did the namei, but it does by the time we go and
5123		 * try to create the entry. In this case, we should re-drive this rename
5124		 * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
5125		 * but other filesystem susceptible to this race could return it, too.
5126		 */
5127        if (error == ERECYCLE) {
5128            do_retry = 1;
5129        }
5130
5131		goto out1;
5132	}
5133
5134	/* call out to allow 3rd party notification of rename.
5135	 * Ignore result of kauth_authorize_fileop call.
5136	 */
5137	kauth_authorize_fileop(vfs_context_ucred(ctx),
5138			KAUTH_FILEOP_RENAME,
5139			(uintptr_t)from_name, (uintptr_t)to_name);
5140
5141#if CONFIG_FSE
5142	if (from_name != NULL && to_name != NULL) {
5143	        if (tvp) {
5144		        add_fsevent(FSE_RENAME, ctx,
5145				    FSE_ARG_STRING, from_len, from_name,
5146				    FSE_ARG_FINFO, &from_finfo,
5147				    FSE_ARG_STRING, to_len, to_name,
5148				    FSE_ARG_FINFO, &to_finfo,
5149				    FSE_ARG_DONE);
5150		} else {
5151		        add_fsevent(FSE_RENAME, ctx,
5152				    FSE_ARG_STRING, from_len, from_name,
5153				    FSE_ARG_FINFO, &from_finfo,
5154				    FSE_ARG_STRING, to_len, to_name,
5155				    FSE_ARG_DONE);
5156		}
5157	}
5158#endif /* CONFIG_FSE */
5159
5160	/*
5161	 * update filesystem's mount point data
5162	 */
5163	if (mntrename) {
5164	        char *cp, *pathend, *mpname;
5165		char * tobuf;
5166		struct mount *mp;
5167		int maxlen;
5168		size_t len = 0;
5169
5170		mp = fvp->v_mountedhere;
5171
5172		if (vfs_busy(mp, LK_NOWAIT)) {
5173		        error = EBUSY;
5174			goto out1;
5175		}
5176		MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
5177
5178		error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
5179		if (!error) {
5180		        /* find current mount point prefix */
5181		        pathend = &mp->mnt_vfsstat.f_mntonname[0];
5182			for (cp = pathend; *cp != '\0'; ++cp) {
5183			        if (*cp == '/')
5184				        pathend = cp + 1;
5185			}
5186			/* find last component of target name */
5187			for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
5188			        if (*cp == '/')
5189				        mpname = cp + 1;
5190			}
5191			/* append name to prefix */
5192			maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
5193			bzero(pathend, maxlen);
5194			strlcpy(pathend, mpname, maxlen);
5195		}
5196		FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
5197
5198		vfs_unbusy(mp);
5199	}
5200	/*
5201	 * fix up name & parent pointers.  note that we first
5202	 * check that fvp has the same name/parent pointers it
5203	 * had before the rename call... this is a 'weak' check
5204	 * at best...
5205	 */
5206	if (oname == fvp->v_name && oparent == fvp->v_parent) {
5207	        int update_flags;
5208
5209	        update_flags = VNODE_UPDATE_NAME;
5210
5211		if (fdvp != tdvp)
5212		        update_flags |= VNODE_UPDATE_PARENT;
5213
5214	        vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
5215	}
5216out1:
5217	if (to_name != NULL) {
5218		RELEASE_PATH(to_name);
5219		to_name = NULL;
5220	}
5221	if (from_name != NULL) {
5222		RELEASE_PATH(from_name);
5223		from_name = NULL;
5224	}
5225	if (holding_mntlock) {
5226	        mount_unlock_renames(locked_mp);
5227		mount_drop(locked_mp, 0);
5228		holding_mntlock = 0;
5229	}
5230	if (tdvp) {
5231		/*
5232		 * nameidone has to happen before we vnode_put(tdvp)
5233		 * since it may need to release the fs_nodelock on the tdvp
5234		 */
5235		nameidone(&tond);
5236
5237		if (tvp)
5238		        vnode_put(tvp);
5239	        vnode_put(tdvp);
5240	}
5241	if (fdvp) {
5242		/*
5243		 * nameidone has to happen before we vnode_put(fdvp)
5244		 * since it may need to release the fs_nodelock on the fdvp
5245		 */
5246		nameidone(&fromnd);
5247
5248		if (fvp)
5249		        vnode_put(fvp);
5250	        vnode_put(fdvp);
5251	}
5252
5253    /*
5254     * If things changed after we did the namei, then we will re-drive
5255     * this rename call from the top.
5256     */
5257	if(do_retry) {
5258        do_retry = 0;
5259		goto retry;
5260	}
5261
5262	return (error);
5263}
5264
5265/*
5266 * Make a directory file.
5267 *
5268 * Returns:	0			Success
5269 *		EEXIST
5270 *	namei:???
5271 *	vnode_authorize:???
5272 *	vn_create:???
5273 */
5274/* ARGSUSED */
5275static int
5276mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
5277{
5278	vnode_t	vp, dvp;
5279	int error;
5280	int update_flags = 0;
5281	struct nameidata nd;
5282
5283	AUDIT_ARG(mode, vap->va_mode);
5284	NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
5285		UIO_USERSPACE, path, ctx);
5286	nd.ni_cnd.cn_flags |= WILLBEDIR;
5287	error = namei(&nd);
5288	if (error)
5289		return (error);
5290	dvp = nd.ni_dvp;
5291	vp = nd.ni_vp;
5292
5293  	if (vp != NULL) {
5294  		error = EEXIST;
5295  		goto out;
5296  	}
5297
5298	VATTR_SET(vap, va_type, VDIR);
5299
5300#if CONFIG_MACF
5301	error = mac_vnode_check_create(ctx,
5302	    nd.ni_dvp, &nd.ni_cnd, vap);
5303	if (error)
5304		goto out;
5305#endif
5306
5307  	/* authorize addition of a directory to the parent */
5308  	if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
5309  		goto out;
5310
5311
5312	/* make the directory */
5313  	if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0)
5314  		goto out;
5315
5316	// Make sure the name & parent pointers are hooked up
5317	if (vp->v_name == NULL)
5318	        update_flags |= VNODE_UPDATE_NAME;
5319	if (vp->v_parent == NULLVP)
5320	        update_flags |= VNODE_UPDATE_PARENT;
5321
5322	if (update_flags)
5323	        vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
5324
5325#if CONFIG_FSE
5326	add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
5327#endif
5328
5329out:
5330	/*
5331	 * nameidone has to happen before we vnode_put(dvp)
5332	 * since it may need to release the fs_nodelock on the dvp
5333	 */
5334	nameidone(&nd);
5335
5336	if (vp)
5337	        vnode_put(vp);
5338	vnode_put(dvp);
5339
5340	return (error);
5341}
5342
5343
5344int
5345mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused register_t *retval)
5346{
5347	int ciferror;
5348	kauth_filesec_t xsecdst;
5349	struct vnode_attr va;
5350
5351	xsecdst = NULL;
5352	if ((uap->xsecurity != USER_ADDR_NULL) &&
5353	    ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
5354		return ciferror;
5355
5356	VATTR_INIT(&va);
5357  	VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5358	if (xsecdst != NULL)
5359		VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5360
5361	ciferror = mkdir1(vfs_context_current(), uap->path, &va);
5362	if (xsecdst != NULL)
5363		kauth_filesec_free(xsecdst);
5364	return ciferror;
5365}
5366
5367int
5368mkdir(proc_t p, struct mkdir_args *uap, __unused register_t *retval)
5369{
5370	struct vnode_attr va;
5371
5372	VATTR_INIT(&va);
5373  	VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5374
5375	return(mkdir1(vfs_context_current(), uap->path, &va));
5376}
5377
5378/*
5379 * Remove a directory file.
5380 */
5381/* ARGSUSED */
5382int
5383rmdir(__unused proc_t p, struct rmdir_args *uap, __unused register_t *retval)
5384{
5385	vnode_t vp, dvp;
5386	int error;
5387	struct nameidata nd;
5388	vfs_context_t ctx = vfs_context_current();
5389
5390	int restart_flag, oldvp_id = -1;
5391
5392	/*
5393	 * This loop exists to restart rmdir in the unlikely case that two
5394	 * processes are simultaneously trying to remove the same directory
5395	 * containing orphaned appleDouble files.
5396	 */
5397	do {
5398		restart_flag = 0;
5399
5400		NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1,
5401				UIO_USERSPACE, uap->path, ctx);
5402		error = namei(&nd);
5403		if (error)
5404			return (error);
5405
5406		dvp = nd.ni_dvp;
5407		vp = nd.ni_vp;
5408
5409
5410		/*
5411		 * If being restarted check if the new vp
5412		 * still has the same v_id.
5413		 */
5414		if (oldvp_id != -1 && oldvp_id != vp->v_id) {
5415			error = ENOENT;
5416			goto out;
5417		}
5418
5419		if (vp->v_type != VDIR) {
5420			/*
5421			 * rmdir only deals with directories
5422			 */
5423			error = ENOTDIR;
5424		} else if (dvp == vp) {
5425			/*
5426			 * No rmdir "." please.
5427			 */
5428			error = EINVAL;
5429		} else if (vp->v_flag & VROOT) {
5430			/*
5431			 * The root of a mounted filesystem cannot be deleted.
5432			 */
5433			error = EBUSY;
5434		} else {
5435#if CONFIG_MACF
5436			error = mac_vnode_check_unlink(ctx, dvp,
5437					vp, &nd.ni_cnd);
5438			if (!error)
5439#endif
5440				error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, ctx);
5441		}
5442		if (!error) {
5443			char     *path = NULL;
5444			int       len;
5445			fse_info  finfo;
5446			int has_listeners = 0;
5447			int need_event = 0;
5448
5449#if CONFIG_FSE
5450			need_event = need_fsevent(FSE_DELETE, dvp);
5451			if (need_event) {
5452				get_fse_info(vp, &finfo, ctx);
5453			}
5454#endif
5455			has_listeners = kauth_authorize_fileop_has_listeners();
5456			if (need_event || has_listeners) {
5457				GET_PATH(path);
5458				if (path == NULL) {
5459					error = ENOMEM;
5460					goto out;
5461				}
5462				len = MAXPATHLEN;
5463				vn_getpath(vp, path, &len);
5464			}
5465
5466			error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5467
5468			/*
5469			 * Special case to remove orphaned AppleDouble
5470			 * files. I don't like putting this in the kernel,
5471			 * but carbon does not like putting this in carbon either,
5472			 * so here we are.
5473			 */
5474			if (error == ENOTEMPTY) {
5475				error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
5476				if (error == EBUSY) {
5477					oldvp_id = vp->v_id;
5478					goto out;
5479				}
5480
5481
5482				/*
5483				 * Assuming everything went well, we will try the RMDIR again
5484				 */
5485				if (!error)
5486					error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5487			}
5488
5489			/*
5490			 * Call out to allow 3rd party notification of delete.
5491			 * Ignore result of kauth_authorize_fileop call.
5492			 */
5493			if (!error) {
5494				if (has_listeners) {
5495					kauth_authorize_fileop(vfs_context_ucred(ctx),
5496							KAUTH_FILEOP_DELETE,
5497							(uintptr_t)vp,
5498							(uintptr_t)path);
5499				}
5500
5501				if (vp->v_flag & VISHARDLINK) {
5502				    // see the comment in unlink1() about why we update
5503				    // the parent of a hard link when it is removed
5504				    vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
5505				}
5506
5507#if CONFIG_FSE
5508				if (need_event) {
5509					add_fsevent(FSE_DELETE, ctx,
5510							FSE_ARG_STRING, len, path,
5511							FSE_ARG_FINFO, &finfo,
5512							FSE_ARG_DONE);
5513				}
5514#endif
5515			}
5516			if (path != NULL)
5517				RELEASE_PATH(path);
5518		}
5519
5520out:
5521		/*
5522		 * nameidone has to happen before we vnode_put(dvp)
5523		 * since it may need to release the fs_nodelock on the dvp
5524		 */
5525		nameidone(&nd);
5526
5527		vnode_put(dvp);
5528		vnode_put(vp);
5529
5530		if (restart_flag == 0) {
5531			wakeup_one((caddr_t)vp);
5532			return (error);
5533		}
5534		tsleep(vp, PVFS, "rm AD", 1);
5535
5536	} while (restart_flag != 0);
5537
5538	return (error);
5539
5540}
5541
5542/* Get direntry length padded to 8 byte alignment */
5543#define DIRENT64_LEN(namlen) \
5544	((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
5545
5546static errno_t
5547vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
5548                int *numdirent, vfs_context_t ctxp)
5549{
5550	/* Check if fs natively supports VNODE_READDIR_EXTENDED */
5551	if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) {
5552		return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
5553	} else {
5554		size_t bufsize;
5555		void * bufptr;
5556		uio_t auio;
5557		struct direntry entry64;
5558		struct dirent *dep;
5559		int bytesread;
5560		int error;
5561
5562		/*
5563		 * Our kernel buffer needs to be smaller since re-packing
5564		 * will expand each dirent.  The worse case (when the name
5565		 * length is 3) corresponds to a struct direntry size of 32
5566		 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
5567		 * (4-byte aligned).  So having a buffer that is 3/8 the size
5568		 * will prevent us from reading more than we can pack.
5569                 *
5570		 * Since this buffer is wired memory, we will limit the
5571		 * buffer size to a maximum of 32K. We would really like to
5572		 * use 32K in the MIN(), but we use magic number 87371 to
5573		 * prevent uio_resid() * 3 / 8 from overflowing.
5574		 */
5575		bufsize = 3 * MIN(uio_resid(uio), 87371) / 8;
5576		MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
5577
5578		auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
5579		uio_addiov(auio, (uintptr_t)bufptr, bufsize);
5580		auio->uio_offset = uio->uio_offset;
5581
5582		error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
5583
5584		dep = (struct dirent *)bufptr;
5585		bytesread = bufsize - uio_resid(auio);
5586
5587		/*
5588		 * Convert all the entries and copy them out to user's buffer.
5589		 */
5590		while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
5591			/* Convert a dirent to a dirent64. */
5592			entry64.d_ino = dep->d_ino;
5593			entry64.d_seekoff = 0;
5594			entry64.d_reclen = DIRENT64_LEN(dep->d_namlen);
5595			entry64.d_namlen = dep->d_namlen;
5596			entry64.d_type = dep->d_type;
5597			bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1);
5598
5599			/* Move to next entry. */
5600			dep = (struct dirent *)((char *)dep + dep->d_reclen);
5601
5602			/* Copy entry64 to user's buffer. */
5603			error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio);
5604		}
5605
5606		/* Update the real offset using the offset we got from VNOP_READDIR. */
5607		if (error == 0) {
5608			uio->uio_offset = auio->uio_offset;
5609		}
5610		uio_free(auio);
5611		FREE(bufptr, M_TEMP);
5612		return (error);
5613	}
5614}
5615
5616/*
5617 * Read a block of directory entries in a file system independent format.
5618 */
5619static int
5620getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
5621                     off_t *offset, int flags)
5622{
5623	vnode_t vp;
5624	struct vfs_context context = *vfs_context_current();	/* local copy */
5625	struct fileproc *fp;
5626	uio_t auio;
5627	int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5628	off_t loff;
5629	int error, eofflag, numdirent;
5630	char uio_buf[ UIO_SIZEOF(1) ];
5631
5632	error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
5633	if (error) {
5634		return (error);
5635	}
5636	if ((fp->f_fglob->fg_flag & FREAD) == 0) {
5637		AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5638		error = EBADF;
5639		goto out;
5640	}
5641
5642#if CONFIG_MACF
5643	error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
5644	if (error)
5645		goto out;
5646#endif
5647	if ( (error = vnode_getwithref(vp)) ) {
5648		goto out;
5649	}
5650	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5651
5652unionread:
5653	if (vp->v_type != VDIR) {
5654		(void)vnode_put(vp);
5655		error = EINVAL;
5656		goto out;
5657	}
5658
5659#if CONFIG_MACF
5660	error = mac_vnode_check_readdir(&context, vp);
5661	if (error != 0) {
5662		(void)vnode_put(vp);
5663		goto out;
5664	}
5665#endif /* MAC */
5666
5667	loff = fp->f_fglob->fg_offset;
5668	auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
5669	uio_addiov(auio, bufp, bufsize);
5670
5671	if (flags & VNODE_READDIR_EXTENDED) {
5672		error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
5673		fp->f_fglob->fg_offset = uio_offset(auio);
5674	} else {
5675		error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
5676		fp->f_fglob->fg_offset = uio_offset(auio);
5677	}
5678	if (error) {
5679		(void)vnode_put(vp);
5680		goto out;
5681	}
5682
5683	if ((user_ssize_t)bufsize == uio_resid(auio)){
5684		if (union_dircheckp) {
5685			error = union_dircheckp(&vp, fp, &context);
5686			if (error == -1)
5687				goto unionread;
5688			if (error)
5689				goto out;
5690		}
5691
5692		if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) {
5693			struct vnode *tvp = vp;
5694			vp = vp->v_mount->mnt_vnodecovered;
5695			vnode_getwithref(vp);
5696			vnode_ref(vp);
5697			fp->f_fglob->fg_data = (caddr_t) vp;
5698			fp->f_fglob->fg_offset = 0;
5699			vnode_rele(tvp);
5700			vnode_put(tvp);
5701			goto unionread;
5702		}
5703	}
5704
5705	vnode_put(vp);
5706	if (offset) {
5707		*offset = loff;
5708	}
5709	// LP64todo - fix this
5710	*bytesread = bufsize - uio_resid(auio);
5711out:
5712	file_drop(fd);
5713	return (error);
5714}
5715
5716
5717int
5718getdirentries(__unused struct proc *p, struct getdirentries_args *uap, register_t *retval)
5719{
5720	off_t offset;
5721	long loff;
5722	ssize_t bytesread;
5723	int error;
5724
5725	AUDIT_ARG(fd, uap->fd);
5726	error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
5727
5728	if (error == 0) {
5729		loff = (long)offset;
5730		error = copyout((caddr_t)&loff, uap->basep, sizeof(long));
5731		*retval = bytesread;
5732	}
5733	return (error);
5734}
5735
5736int
5737getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
5738{
5739	off_t offset;
5740	ssize_t bytesread;
5741	int error;
5742
5743	AUDIT_ARG(fd, uap->fd);
5744	error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
5745
5746	if (error == 0) {
5747		*retval = bytesread;
5748		error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
5749	}
5750	return (error);
5751}
5752
5753
5754/*
5755 * Set the mode mask for creation of filesystem nodes.
5756 */
5757#warning XXX implement xsecurity
5758
5759#define UMASK_NOXSECURITY	 (void *)1	/* leave existing xsecurity alone */
5760static int
5761umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, register_t *retval)
5762{
5763	struct filedesc *fdp;
5764
5765	AUDIT_ARG(mask, newmask);
5766	proc_fdlock(p);
5767	fdp = p->p_fd;
5768	*retval = fdp->fd_cmask;
5769	fdp->fd_cmask = newmask & ALLPERMS;
5770	proc_fdunlock(p);
5771	return (0);
5772}
5773
5774
5775int
5776umask_extended(proc_t p, struct umask_extended_args *uap, register_t *retval)
5777{
5778	int ciferror;
5779	kauth_filesec_t xsecdst;
5780
5781	xsecdst = KAUTH_FILESEC_NONE;
5782	if (uap->xsecurity != USER_ADDR_NULL) {
5783		if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5784			return ciferror;
5785	} else {
5786		xsecdst = KAUTH_FILESEC_NONE;
5787	}
5788
5789	ciferror = umask1(p, uap->newmask, xsecdst, retval);
5790
5791	if (xsecdst != KAUTH_FILESEC_NONE)
5792		kauth_filesec_free(xsecdst);
5793	return ciferror;
5794}
5795
5796int
5797umask(proc_t p, struct umask_args *uap, register_t *retval)
5798{
5799	return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
5800}
5801
5802/*
5803 * Void all references to file by ripping underlying filesystem
5804 * away from vnode.
5805 */
5806/* ARGSUSED */
5807int
5808revoke(proc_t p, struct revoke_args *uap, __unused register_t *retval)
5809{
5810	vnode_t vp;
5811	struct vnode_attr va;
5812	vfs_context_t ctx = vfs_context_current();
5813	int error;
5814	struct nameidata nd;
5815
5816	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
5817		UIO_USERSPACE, uap->path, ctx);
5818	error = namei(&nd);
5819	if (error)
5820		return (error);
5821	vp = nd.ni_vp;
5822
5823	nameidone(&nd);
5824
5825#if CONFIG_MACF
5826	error = mac_vnode_check_revoke(ctx, vp);
5827	if (error)
5828		goto out;
5829#endif
5830
5831	VATTR_INIT(&va);
5832	VATTR_WANTED(&va, va_uid);
5833	if ((error = vnode_getattr(vp, &va, ctx)))
5834		goto out;
5835	if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
5836	    (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
5837		goto out;
5838	if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
5839		VNOP_REVOKE(vp, REVOKEALL, ctx);
5840out:
5841	vnode_put(vp);
5842	return (error);
5843}
5844
5845
5846/*
5847 *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
5848 *  The following system calls are designed to support features
5849 *  which are specific to the HFS & HFS Plus volume formats
5850 */
5851
5852#ifdef __APPLE_API_OBSOLETE
5853
5854/************************************************/
5855/* *** Following calls will be deleted soon *** */
5856/************************************************/
5857
5858/*
5859 * Make a complex file.  A complex file is one with multiple forks (data streams)
5860 */
5861/* ARGSUSED */
5862int
5863mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused register_t *retval)
5864{
5865	return (ENOTSUP);
5866}
5867
5868/*
5869 * Extended stat call which returns volumeid and vnodeid as well as other info
5870 */
5871/* ARGSUSED */
5872int
5873statv(__unused proc_t p,
5874	  __unused struct statv_args *uap,
5875	  __unused register_t *retval)
5876{
5877	return (ENOTSUP);	/*  We'll just return an error for now */
5878
5879} /* end of statv system call */
5880
5881/*
5882* Extended lstat call which returns volumeid and vnodeid as well as other info
5883*/
5884/* ARGSUSED */
5885int
5886lstatv(__unused proc_t p,
5887	   __unused struct lstatv_args *uap,
5888	   __unused register_t *retval)
5889{
5890       return (ENOTSUP);	/*  We'll just return an error for now */
5891} /* end of lstatv system call */
5892
5893/*
5894* Extended fstat call which returns volumeid and vnodeid as well as other info
5895*/
5896/* ARGSUSED */
5897int
5898fstatv(__unused proc_t p,
5899	   __unused struct fstatv_args *uap,
5900	   __unused register_t *retval)
5901{
5902       return (ENOTSUP);	/*  We'll just return an error for now */
5903} /* end of fstatv system call */
5904
5905
5906/************************************************/
5907/* *** Preceding calls will be deleted soon *** */
5908/************************************************/
5909
5910#endif /* __APPLE_API_OBSOLETE */
5911
5912/*
5913* Obtain attribute information on objects in a directory while enumerating
5914* the directory.  This call does not yet support union mounted directories.
5915* TO DO
5916*  1.union mounted directories.
5917*/
5918
5919/* ARGSUSED */
5920int
5921getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, register_t *retval)
5922{
5923	vnode_t vp;
5924	struct fileproc *fp;
5925	uio_t auio = NULL;
5926	int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5927	uint32_t count;
5928	uint32_t newstate;
5929	int error, eofflag;
5930	uint32_t loff;
5931	struct attrlist attributelist;
5932	vfs_context_t ctx = vfs_context_current();
5933	int fd = uap->fd;
5934	char uio_buf[ UIO_SIZEOF(1) ];
5935	kauth_action_t action;
5936
5937	AUDIT_ARG(fd, fd);
5938
5939	/* Get the attributes into kernel space */
5940	if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
5941		return(error);
5942	}
5943	if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
5944		return(error);
5945	}
5946	if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
5947		return (error);
5948	}
5949	if ((fp->f_fglob->fg_flag & FREAD) == 0) {
5950		AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5951		error = EBADF;
5952		goto out;
5953	}
5954
5955
5956#if CONFIG_MACF
5957	error = mac_file_check_change_offset(vfs_context_ucred(ctx),
5958	    fp->f_fglob);
5959	if (error)
5960		goto out;
5961#endif
5962
5963
5964	if ( (error = vnode_getwithref(vp)) )
5965		goto out;
5966
5967	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5968
5969	if (vp->v_type != VDIR) {
5970		(void)vnode_put(vp);
5971		error = EINVAL;
5972		goto out;
5973	}
5974
5975#if CONFIG_MACF
5976	error = mac_vnode_check_readdir(ctx, vp);
5977	if (error != 0) {
5978		(void)vnode_put(vp);
5979		goto out;
5980	}
5981#endif /* MAC */
5982
5983	/* set up the uio structure which will contain the users return buffer */
5984	loff = fp->f_fglob->fg_offset;
5985	auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ,
5986	    &uio_buf[0], sizeof(uio_buf));
5987	uio_addiov(auio, uap->buffer, uap->buffersize);
5988
5989	/*
5990	 * If the only item requested is file names, we can let that past with
5991	 * just LIST_DIRECTORY.  If they want any other attributes, that means
5992	 * they need SEARCH as well.
5993	 */
5994	action = KAUTH_VNODE_LIST_DIRECTORY;
5995	if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
5996	    attributelist.fileattr || attributelist.dirattr)
5997		action |= KAUTH_VNODE_SEARCH;
5998
5999	if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
6000		u_long ulcount = count;
6001
6002		error = VNOP_READDIRATTR(vp, &attributelist, auio,
6003					 count,
6004		                         uap->options, (unsigned long *)&newstate, &eofflag,
6005		                         &ulcount, ctx);
6006		if (!error)
6007			count = ulcount;
6008	}
6009	(void)vnode_put(vp);
6010
6011	if (error)
6012		goto out;
6013	fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
6014
6015	if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
6016		goto out;
6017	if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
6018		goto out;
6019	if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
6020		goto out;
6021
6022	*retval = eofflag;  /* similar to getdirentries */
6023	error = 0;
6024out:
6025	file_drop(fd);
6026	return (error); /* return error earlier, an retval of 0 or 1 now */
6027
6028} /* end of getdirentryattr system call */
6029
6030/*
6031* Exchange data between two files
6032*/
6033
6034/* ARGSUSED */
6035int
6036exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused register_t *retval)
6037{
6038
6039	struct nameidata fnd, snd;
6040	vfs_context_t ctx = vfs_context_current();
6041	vnode_t fvp;
6042	vnode_t svp;
6043	int error;
6044	u_long nameiflags;
6045	char *fpath = NULL;
6046	char *spath = NULL;
6047	int   flen, slen;
6048	fse_info f_finfo, s_finfo;
6049
6050	nameiflags = 0;
6051	if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6052
6053    NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1,
6054        	UIO_USERSPACE, uap->path1, ctx);
6055
6056    error = namei(&fnd);
6057    if (error)
6058        goto out2;
6059
6060	nameidone(&fnd);
6061	fvp = fnd.ni_vp;
6062
6063    NDINIT(&snd, LOOKUP | CN_NBMOUNTLOOK, nameiflags | AUDITVNPATH2,
6064        	UIO_USERSPACE, uap->path2, ctx);
6065
6066    error = namei(&snd);
6067    if (error) {
6068		vnode_put(fvp);
6069		goto out2;
6070    }
6071	nameidone(&snd);
6072	svp = snd.ni_vp;
6073
6074	/*
6075	 * if the files are the same, return an inval error
6076	 */
6077	if (svp == fvp) {
6078		error = EINVAL;
6079		goto out;
6080	}
6081
6082	/*
6083	 * if the files are on different volumes, return an error
6084	 */
6085	if (svp->v_mount != fvp->v_mount) {
6086	        error = EXDEV;
6087		goto out;
6088	}
6089
6090#if CONFIG_MACF
6091	error = mac_vnode_check_exchangedata(ctx,
6092	    fvp, svp);
6093	if (error)
6094		goto out;
6095#endif
6096	if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
6097	    ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
6098		goto out;
6099
6100	if (
6101#if CONFIG_FSE
6102	need_fsevent(FSE_EXCHANGE, fvp) ||
6103#endif
6104	kauth_authorize_fileop_has_listeners()) {
6105		GET_PATH(fpath);
6106		GET_PATH(spath);
6107		if (fpath == NULL || spath == NULL) {
6108			error = ENOMEM;
6109			goto out;
6110		}
6111		flen = MAXPATHLEN;
6112		slen = MAXPATHLEN;
6113		if (vn_getpath(fvp, fpath, &flen) != 0 || fpath[0] == '\0') {
6114		        printf("exchange: vn_getpath(fvp=%p) failed <<%s>>\n",
6115			       fvp, fpath);
6116		}
6117		if (vn_getpath(svp, spath, &slen) != 0 || spath[0] == '\0') {
6118		        printf("exchange: vn_getpath(svp=%p) failed <<%s>>\n",
6119			       svp, spath);
6120		}
6121#if CONFIG_FSE
6122		get_fse_info(fvp, &f_finfo, ctx);
6123		get_fse_info(svp, &s_finfo, ctx);
6124#endif
6125	}
6126	/* Ok, make the call */
6127	error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
6128
6129	if (error == 0) {
6130	    const char *tmpname;
6131
6132	    if (fpath != NULL && spath != NULL) {
6133	            /* call out to allow 3rd party notification of exchangedata.
6134		     * Ignore result of kauth_authorize_fileop call.
6135		     */
6136	            kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
6137					   (uintptr_t)fpath, (uintptr_t)spath);
6138	    }
6139	    name_cache_lock();
6140
6141	    tmpname     = fvp->v_name;
6142	    fvp->v_name = svp->v_name;
6143	    svp->v_name = tmpname;
6144
6145	    if (fvp->v_parent != svp->v_parent) {
6146		vnode_t tmp;
6147
6148		tmp           = fvp->v_parent;
6149		fvp->v_parent = svp->v_parent;
6150		svp->v_parent = tmp;
6151	    }
6152	    name_cache_unlock();
6153
6154#if CONFIG_FSE
6155	    if (fpath != NULL && spath != NULL) {
6156	            add_fsevent(FSE_EXCHANGE, ctx,
6157				FSE_ARG_STRING, flen, fpath,
6158				FSE_ARG_FINFO, &f_finfo,
6159				FSE_ARG_STRING, slen, spath,
6160				FSE_ARG_FINFO, &s_finfo,
6161				FSE_ARG_DONE);
6162	    }
6163#endif
6164	}
6165
6166out:
6167	if (fpath != NULL)
6168	        RELEASE_PATH(fpath);
6169	if (spath != NULL)
6170	        RELEASE_PATH(spath);
6171	vnode_put(svp);
6172	vnode_put(fvp);
6173out2:
6174        return (error);
6175}
6176
6177
6178/* ARGSUSED */
6179
6180int
6181searchfs(proc_t p, struct searchfs_args *uap, __unused register_t *retval)
6182{
6183	vnode_t vp;
6184	int error=0;
6185	int fserror = 0;
6186	struct nameidata nd;
6187	struct user_fssearchblock searchblock;
6188	struct searchstate *state;
6189	struct attrlist *returnattrs;
6190	void *searchparams1,*searchparams2;
6191	uio_t auio = NULL;
6192	int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6193	u_long nummatches;
6194	int mallocsize;
6195	u_long nameiflags;
6196	vfs_context_t ctx = vfs_context_current();
6197	char uio_buf[ UIO_SIZEOF(1) ];
6198
6199	/* Start by copying in fsearchblock paramater list */
6200    if (IS_64BIT_PROCESS(p)) {
6201       error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
6202    }
6203    else {
6204        struct fssearchblock tmp_searchblock;
6205        error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
6206        // munge into 64-bit version
6207        searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
6208        searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
6209        searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
6210        searchblock.maxmatches = tmp_searchblock.maxmatches;
6211        searchblock.timelimit.tv_sec = tmp_searchblock.timelimit.tv_sec;
6212        searchblock.timelimit.tv_usec = tmp_searchblock.timelimit.tv_usec;
6213        searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
6214        searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
6215        searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
6216        searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
6217        searchblock.searchattrs = tmp_searchblock.searchattrs;
6218    }
6219	if (error)
6220		return(error);
6221
6222	/* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
6223	 */
6224	if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
6225		searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
6226		return(EINVAL);
6227
6228	/* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
6229	/* It all has to do into local memory and it's not that big so we might as well  put it all together. */
6230	/* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
6231	/* block.  											      */
6232
6233	mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
6234		      sizeof(struct attrlist) + sizeof(struct searchstate);
6235
6236	MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
6237
6238	/* Now set up the various pointers to the correct place in our newly allocated memory */
6239
6240	searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
6241	returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
6242	state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
6243
6244	/* Now copy in the stuff given our local variables. */
6245
6246	if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
6247		goto freeandexit;
6248
6249	if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
6250		goto freeandexit;
6251
6252	if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
6253		goto freeandexit;
6254
6255	if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
6256		goto freeandexit;
6257
6258	/* set up the uio structure which will contain the users return buffer */
6259
6260	auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
6261								  &uio_buf[0], sizeof(uio_buf));
6262    uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
6263
6264	nameiflags = 0;
6265	if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6266	NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1,
6267		UIO_USERSPACE, uap->path, ctx);
6268
6269	error = namei(&nd);
6270	if (error)
6271		goto freeandexit;
6272
6273	nameidone(&nd);
6274	vp = nd.ni_vp;
6275
6276
6277	/*
6278	 * If searchblock.maxmatches == 0, then skip the search. This has happened
6279	 * before and sometimes the underlyning code doesnt deal with it well.
6280	 */
6281	 if (searchblock.maxmatches == 0) {
6282		nummatches = 0;
6283		goto saveandexit;
6284	 }
6285
6286	/*
6287	   Allright, we have everything we need, so lets make that call.
6288
6289	   We keep special track of the return value from the file system:
6290	   EAGAIN is an acceptable error condition that shouldn't keep us
6291	   from copying out any results...
6292	 */
6293
6294	fserror = VNOP_SEARCHFS(vp,
6295							searchparams1,
6296							searchparams2,
6297							&searchblock.searchattrs,
6298							searchblock.maxmatches,
6299							&searchblock.timelimit,
6300							returnattrs,
6301							&nummatches,
6302							uap->scriptcode,
6303							uap->options,
6304							auio,
6305							state,
6306							ctx);
6307
6308saveandexit:
6309
6310	vnode_put(vp);
6311
6312	/* Now copy out the stuff that needs copying out. That means the number of matches, the
6313	   search state.  Everything was already put into he return buffer by the vop call. */
6314
6315	if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
6316		goto freeandexit;
6317
6318    if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6319		goto freeandexit;
6320
6321	error = fserror;
6322
6323freeandexit:
6324
6325	FREE(searchparams1,M_TEMP);
6326
6327	return(error);
6328
6329
6330} /* end of searchfs system call */
6331
6332
6333/*
6334 * Make a filesystem-specific control call:
6335 */
6336/* ARGSUSED */
6337int
6338fsctl (proc_t p, struct fsctl_args *uap, __unused register_t *retval)
6339{
6340	int error;
6341	boolean_t is64bit;
6342	struct nameidata nd;
6343	u_long nameiflags;
6344	u_long cmd = uap->cmd;
6345	u_int size;
6346#define STK_PARAMS 128
6347	char stkbuf[STK_PARAMS];
6348	caddr_t data, memp;
6349	vfs_context_t ctx = vfs_context_current();
6350
6351	size = IOCPARM_LEN(cmd);
6352	if (size > IOCPARM_MAX) return (EINVAL);
6353
6354    is64bit = proc_is64bit(p);
6355
6356	memp = NULL;
6357	if (size > sizeof (stkbuf)) {
6358		if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
6359		data = memp;
6360	} else {
6361		data = &stkbuf[0];
6362	};
6363
6364	if (cmd & IOC_IN) {
6365		if (size) {
6366			error = copyin(uap->data, data, size);
6367			if (error) goto FSCtl_Exit;
6368		} else {
6369		    if (is64bit) {
6370    			*(user_addr_t *)data = uap->data;
6371		    }
6372		    else {
6373    			*(uint32_t *)data = (uint32_t)uap->data;
6374		    }
6375		};
6376	} else if ((cmd & IOC_OUT) && size) {
6377		/*
6378		 * Zero the buffer so the user always
6379		 * gets back something deterministic.
6380		 */
6381		bzero(data, size);
6382	} else if (cmd & IOC_VOID) {
6383        if (is64bit) {
6384            *(user_addr_t *)data = uap->data;
6385        }
6386        else {
6387            *(uint32_t *)data = (uint32_t)uap->data;
6388        }
6389	}
6390
6391	/* Get the vnode for the file we are getting info on:  */
6392	nameiflags = 0;
6393	if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6394	NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, uap->path, ctx);
6395	if ((error = namei(&nd))) goto FSCtl_Exit;
6396
6397#if CONFIG_MACF
6398	error = mac_mount_check_fsctl(ctx, vnode_mount(nd.ni_vp), cmd);
6399	if (error) {
6400		vnode_put(nd.ni_vp);
6401		nameidone(&nd);
6402		goto FSCtl_Exit;
6403	}
6404#endif
6405
6406	/* Invoke the filesystem-specific code */
6407	error = VNOP_IOCTL(nd.ni_vp, IOCBASECMD(cmd), data, uap->options, ctx);
6408
6409	vnode_put(nd.ni_vp);
6410	nameidone(&nd);
6411
6412	/*
6413	 * Copy any data to user, size was
6414	 * already set and checked above.
6415	 */
6416	if (error == 0 && (cmd & IOC_OUT) && size)
6417		error = copyout(data, uap->data, size);
6418
6419FSCtl_Exit:
6420	if (memp) kfree(memp, size);
6421
6422	return error;
6423}
6424/* end of fsctl system call */
6425
6426/*
6427 * An in-kernel sync for power management to call.
6428 */
6429__private_extern__ int
6430sync_internal(void)
6431{
6432	int error;
6433
6434	struct sync_args data;
6435
6436	int retval[2];
6437
6438
6439	error = sync(current_proc(), &data, &retval[0]);
6440
6441
6442	return (error);
6443} /* end of sync_internal call */
6444
6445
6446/*
6447 *  Retrieve the data of an extended attribute.
6448 */
6449int
6450getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
6451{
6452	vnode_t vp;
6453	struct nameidata nd;
6454	char attrname[XATTR_MAXNAMELEN+1];
6455	vfs_context_t ctx = vfs_context_current();
6456	uio_t auio = NULL;
6457	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6458	size_t attrsize = 0;
6459	size_t namelen;
6460	u_long nameiflags;
6461	int error;
6462	char uio_buf[ UIO_SIZEOF(1) ];
6463
6464	if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6465		return (EINVAL);
6466
6467	nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6468	NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6469	if ((error = namei(&nd))) {
6470		return (error);
6471	}
6472	vp = nd.ni_vp;
6473	nameidone(&nd);
6474
6475	if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6476		goto out;
6477	}
6478	if (xattr_protected(attrname)) {
6479		error = EPERM;
6480		goto out;
6481	}
6482	if (uap->value && uap->size > 0) {
6483		auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
6484		                            &uio_buf[0], sizeof(uio_buf));
6485		uio_addiov(auio, uap->value, uap->size);
6486	}
6487
6488	error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
6489out:
6490	vnode_put(vp);
6491
6492	if (auio) {
6493		*retval = uap->size - uio_resid(auio);
6494	} else {
6495		*retval = (user_ssize_t)attrsize;
6496	}
6497
6498	return (error);
6499}
6500
6501/*
6502 * Retrieve the data of an extended attribute.
6503 */
6504int
6505fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
6506{
6507	vnode_t vp;
6508	char attrname[XATTR_MAXNAMELEN+1];
6509	uio_t auio = NULL;
6510	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6511	size_t attrsize = 0;
6512	size_t namelen;
6513	int error;
6514	char uio_buf[ UIO_SIZEOF(1) ];
6515
6516	if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6517		return (EINVAL);
6518
6519	if ( (error = file_vnode(uap->fd, &vp)) ) {
6520		return (error);
6521	}
6522	if ( (error = vnode_getwithref(vp)) ) {
6523		file_drop(uap->fd);
6524		return(error);
6525	}
6526	if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6527		goto out;
6528	}
6529	if (xattr_protected(attrname)) {
6530		error = EPERM;
6531		goto out;
6532	}
6533	if (uap->value && uap->size > 0) {
6534		auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
6535		                            &uio_buf[0], sizeof(uio_buf));
6536		uio_addiov(auio, uap->value, uap->size);
6537	}
6538
6539	error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
6540out:
6541	(void)vnode_put(vp);
6542	file_drop(uap->fd);
6543
6544	if (auio) {
6545		*retval = uap->size - uio_resid(auio);
6546	} else {
6547		*retval = (user_ssize_t)attrsize;
6548	}
6549	return (error);
6550}
6551
6552/*
6553 * Set the data of an extended attribute.
6554 */
6555int
6556setxattr(proc_t p, struct setxattr_args *uap, int *retval)
6557{
6558	vnode_t vp;
6559	struct nameidata nd;
6560	char attrname[XATTR_MAXNAMELEN+1];
6561	vfs_context_t ctx = vfs_context_current();
6562	uio_t auio = NULL;
6563	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6564	size_t namelen;
6565	u_long nameiflags;
6566	int error;
6567	char uio_buf[ UIO_SIZEOF(1) ];
6568
6569	if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6570		return (EINVAL);
6571
6572	if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6573		return (error);
6574	}
6575	if (xattr_protected(attrname))
6576		return(EPERM);
6577	if (uap->size != 0 && uap->value == 0) {
6578		return (EINVAL);
6579	}
6580
6581	nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6582	NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6583	if ((error = namei(&nd))) {
6584		return (error);
6585	}
6586	vp = nd.ni_vp;
6587	nameidone(&nd);
6588
6589	auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
6590	                            &uio_buf[0], sizeof(uio_buf));
6591	uio_addiov(auio, uap->value, uap->size);
6592
6593	error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
6594#if CONFIG_FSE
6595	if (error == 0) {
6596		add_fsevent(FSE_XATTR_MODIFIED, ctx,
6597		    FSE_ARG_VNODE, vp,
6598		    FSE_ARG_DONE);
6599	}
6600#endif
6601	vnode_put(vp);
6602	*retval = 0;
6603	return (error);
6604}
6605
6606/*
6607 * Set the data of an extended attribute.
6608 */
6609int
6610fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
6611{
6612	vnode_t vp;
6613	char attrname[XATTR_MAXNAMELEN+1];
6614	uio_t auio = NULL;
6615	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6616	size_t namelen;
6617	int error;
6618	char uio_buf[ UIO_SIZEOF(1) ];
6619	vfs_context_t ctx = vfs_context_current();
6620
6621	if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6622		return (EINVAL);
6623
6624	if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6625		return (error);
6626	}
6627	if (xattr_protected(attrname))
6628		return(EPERM);
6629	if (uap->size != 0 && uap->value == 0) {
6630		return (EINVAL);
6631	}
6632	if ( (error = file_vnode(uap->fd, &vp)) ) {
6633		return (error);
6634	}
6635	if ( (error = vnode_getwithref(vp)) ) {
6636		file_drop(uap->fd);
6637		return(error);
6638	}
6639	auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
6640	                            &uio_buf[0], sizeof(uio_buf));
6641	uio_addiov(auio, uap->value, uap->size);
6642
6643	error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
6644#if CONFIG_FSE
6645	if (error == 0) {
6646		add_fsevent(FSE_XATTR_MODIFIED, ctx,
6647		    FSE_ARG_VNODE, vp,
6648		    FSE_ARG_DONE);
6649	}
6650#endif
6651	vnode_put(vp);
6652	file_drop(uap->fd);
6653	*retval = 0;
6654	return (error);
6655}
6656
6657/*
6658 * Remove an extended attribute.
6659 */
6660#warning "code duplication"
6661int
6662removexattr(proc_t p, struct removexattr_args *uap, int *retval)
6663{
6664	vnode_t vp;
6665	struct nameidata nd;
6666	char attrname[XATTR_MAXNAMELEN+1];
6667	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6668	vfs_context_t ctx = vfs_context_current();
6669	size_t namelen;
6670	u_long nameiflags;
6671	int error;
6672
6673	if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6674		return (EINVAL);
6675
6676	error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
6677	if (error != 0) {
6678		return (error);
6679	}
6680	if (xattr_protected(attrname))
6681		return(EPERM);
6682	nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6683	NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6684	if ((error = namei(&nd))) {
6685		return (error);
6686	}
6687	vp = nd.ni_vp;
6688	nameidone(&nd);
6689
6690	error = vn_removexattr(vp, attrname, uap->options, ctx);
6691#if CONFIG_FSE
6692	if (error == 0) {
6693		add_fsevent(FSE_XATTR_REMOVED, ctx,
6694		    FSE_ARG_VNODE, vp,
6695		    FSE_ARG_DONE);
6696	}
6697#endif
6698	vnode_put(vp);
6699	*retval = 0;
6700	return (error);
6701}
6702
6703/*
6704 * Remove an extended attribute.
6705 */
6706#warning "code duplication"
6707int
6708fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
6709{
6710	vnode_t vp;
6711	char attrname[XATTR_MAXNAMELEN+1];
6712	size_t namelen;
6713	int error;
6714	vfs_context_t ctx = vfs_context_current();
6715
6716	if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6717		return (EINVAL);
6718
6719	error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
6720	if (error != 0) {
6721		return (error);
6722	}
6723	if (xattr_protected(attrname))
6724		return(EPERM);
6725	if ( (error = file_vnode(uap->fd, &vp)) ) {
6726		return (error);
6727	}
6728	if ( (error = vnode_getwithref(vp)) ) {
6729		file_drop(uap->fd);
6730		return(error);
6731	}
6732
6733	error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
6734#if CONFIG_FSE
6735	if (error == 0) {
6736		add_fsevent(FSE_XATTR_REMOVED, ctx,
6737		    FSE_ARG_VNODE, vp,
6738		    FSE_ARG_DONE);
6739	}
6740#endif
6741	vnode_put(vp);
6742	file_drop(uap->fd);
6743	*retval = 0;
6744	return (error);
6745}
6746
6747/*
6748 * Retrieve the list of extended attribute names.
6749 */
6750#warning "code duplication"
6751int
6752listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
6753{
6754	vnode_t vp;
6755	struct nameidata nd;
6756	vfs_context_t ctx = vfs_context_current();
6757	uio_t auio = NULL;
6758	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6759	size_t attrsize = 0;
6760	u_long nameiflags;
6761	int error;
6762	char uio_buf[ UIO_SIZEOF(1) ];
6763
6764	if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6765		return (EINVAL);
6766
6767	nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
6768	NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6769	if ((error = namei(&nd))) {
6770		return (error);
6771	}
6772	vp = nd.ni_vp;
6773	nameidone(&nd);
6774	if (uap->namebuf != 0 && uap->bufsize > 0) {
6775		// LP64todo - fix this!
6776		auio = uio_createwithbuffer(1, 0, spacetype,
6777								  	  UIO_READ, &uio_buf[0], sizeof(uio_buf));
6778		uio_addiov(auio, uap->namebuf, uap->bufsize);
6779	}
6780
6781	error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
6782
6783	vnode_put(vp);
6784	if (auio) {
6785		*retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
6786	} else {
6787		*retval = (user_ssize_t)attrsize;
6788	}
6789	return (error);
6790}
6791
6792/*
6793 * Retrieve the list of extended attribute names.
6794 */
6795#warning "code duplication"
6796int
6797flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
6798{
6799	vnode_t vp;
6800	uio_t auio = NULL;
6801	int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6802	size_t attrsize = 0;
6803	int error;
6804	char uio_buf[ UIO_SIZEOF(1) ];
6805
6806	if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6807		return (EINVAL);
6808
6809	if ( (error = file_vnode(uap->fd, &vp)) ) {
6810		return (error);
6811	}
6812	if ( (error = vnode_getwithref(vp)) ) {
6813		file_drop(uap->fd);
6814		return(error);
6815	}
6816	if (uap->namebuf != 0 && uap->bufsize > 0) {
6817		// LP64todo - fix this!
6818		auio = uio_createwithbuffer(1, 0, spacetype,
6819								  	  UIO_READ, &uio_buf[0], sizeof(uio_buf));
6820		uio_addiov(auio, uap->namebuf, uap->bufsize);
6821	}
6822
6823	error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
6824
6825	vnode_put(vp);
6826	file_drop(uap->fd);
6827	if (auio) {
6828		*retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
6829	} else {
6830		*retval = (user_ssize_t)attrsize;
6831	}
6832	return (error);
6833}
6834
6835/*
6836 * Common routine to handle various flavors of statfs data heading out
6837 *	to user space.
6838 *
6839 * Returns:	0			Success
6840 *		EFAULT
6841 */
6842static int
6843munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
6844    user_addr_t bufp, int *sizep, boolean_t is_64_bit,
6845    boolean_t partial_copy)
6846{
6847	int		error;
6848	int		my_size, copy_size;
6849
6850	if (is_64_bit) {
6851		struct user_statfs sfs;
6852		my_size = copy_size = sizeof(sfs);
6853		bzero(&sfs, my_size);
6854		sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
6855		sfs.f_type = mp->mnt_vtable->vfc_typenum;
6856		sfs.f_reserved1 = (short)sfsp->f_fssubtype;
6857		sfs.f_bsize = (user_long_t)sfsp->f_bsize;
6858		sfs.f_iosize = (user_long_t)sfsp->f_iosize;
6859		sfs.f_blocks = (user_long_t)sfsp->f_blocks;
6860		sfs.f_bfree = (user_long_t)sfsp->f_bfree;
6861		sfs.f_bavail = (user_long_t)sfsp->f_bavail;
6862		sfs.f_files = (user_long_t)sfsp->f_files;
6863		sfs.f_ffree = (user_long_t)sfsp->f_ffree;
6864		sfs.f_fsid = sfsp->f_fsid;
6865		sfs.f_owner = sfsp->f_owner;
6866		strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
6867		strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
6868		strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
6869
6870		if (partial_copy) {
6871			copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
6872		}
6873		error = copyout((caddr_t)&sfs, bufp, copy_size);
6874	}
6875	else {
6876		struct statfs sfs;
6877		my_size = copy_size = sizeof(sfs);
6878		bzero(&sfs, my_size);
6879
6880		sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
6881		sfs.f_type = mp->mnt_vtable->vfc_typenum;
6882		sfs.f_reserved1 = (short)sfsp->f_fssubtype;
6883
6884		/*
6885		 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
6886		 * have to fudge the numbers here in that case.   We inflate the blocksize in order
6887		 * to reflect the filesystem size as best we can.
6888		 */
6889		if ((sfsp->f_blocks > LONG_MAX)
6890			/* Hack for 4061702 . I think the real fix is for Carbon to
6891			 * look for some volume capability and not depend on hidden
6892			 * semantics agreed between a FS and carbon.
6893			 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
6894			 * for Carbon to set bNoVolumeSizes volume attribute.
6895			 * Without this the webdavfs files cannot be copied onto
6896			 * disk as they look huge. This change should not affect
6897			 * XSAN as they should not setting these to -1..
6898			 */
6899			 && (sfsp->f_blocks != 0xffffffffffffffffULL)
6900			 && (sfsp->f_bfree != 0xffffffffffffffffULL)
6901			 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
6902			int		shift;
6903
6904			/*
6905			 * Work out how far we have to shift the block count down to make it fit.
6906			 * Note that it's possible to have to shift so far that the resulting
6907			 * blocksize would be unreportably large.  At that point, we will clip
6908			 * any values that don't fit.
6909			 *
6910			 * For safety's sake, we also ensure that f_iosize is never reported as
6911			 * being smaller than f_bsize.
6912			 */
6913			for (shift = 0; shift < 32; shift++) {
6914				if ((sfsp->f_blocks >> shift) <= LONG_MAX)
6915					break;
6916				if ((sfsp->f_bsize << (shift + 1)) > LONG_MAX)
6917					break;
6918			}
6919#define __SHIFT_OR_CLIP(x, s)	((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s)))
6920			sfs.f_blocks = (long)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
6921			sfs.f_bfree = (long)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
6922			sfs.f_bavail = (long)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
6923#undef __SHIFT_OR_CLIP
6924			sfs.f_bsize = (long)(sfsp->f_bsize << shift);
6925			sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
6926		} else {
6927			/* filesystem is small enough to be reported honestly */
6928			sfs.f_bsize = (long)sfsp->f_bsize;
6929			sfs.f_iosize = (long)sfsp->f_iosize;
6930			sfs.f_blocks = (long)sfsp->f_blocks;
6931			sfs.f_bfree = (long)sfsp->f_bfree;
6932			sfs.f_bavail = (long)sfsp->f_bavail;
6933		}
6934		sfs.f_files = (long)sfsp->f_files;
6935		sfs.f_ffree = (long)sfsp->f_ffree;
6936		sfs.f_fsid = sfsp->f_fsid;
6937		sfs.f_owner = sfsp->f_owner;
6938		strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
6939		strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
6940		strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
6941
6942		if (partial_copy) {
6943			copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
6944		}
6945		error = copyout((caddr_t)&sfs, bufp, copy_size);
6946	}
6947
6948	if (sizep != NULL) {
6949		*sizep = my_size;
6950	}
6951	return(error);
6952}
6953
6954/*
6955 * copy stat structure into user_stat structure.
6956 */
6957void munge_stat(struct stat *sbp, struct user_stat *usbp)
6958{
6959        bzero(usbp, sizeof(struct user_stat));
6960
6961	usbp->st_dev = sbp->st_dev;
6962	usbp->st_ino = sbp->st_ino;
6963	usbp->st_mode = sbp->st_mode;
6964	usbp->st_nlink = sbp->st_nlink;
6965	usbp->st_uid = sbp->st_uid;
6966	usbp->st_gid = sbp->st_gid;
6967	usbp->st_rdev = sbp->st_rdev;
6968#ifndef _POSIX_C_SOURCE
6969	usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
6970	usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
6971	usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
6972	usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
6973	usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
6974	usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
6975#else
6976	usbp->st_atime = sbp->st_atime;
6977	usbp->st_atimensec = sbp->st_atimensec;
6978	usbp->st_mtime = sbp->st_mtime;
6979	usbp->st_mtimensec = sbp->st_mtimensec;
6980	usbp->st_ctime = sbp->st_ctime;
6981	usbp->st_ctimensec = sbp->st_ctimensec;
6982#endif
6983	usbp->st_size = sbp->st_size;
6984	usbp->st_blocks = sbp->st_blocks;
6985	usbp->st_blksize = sbp->st_blksize;
6986	usbp->st_flags = sbp->st_flags;
6987	usbp->st_gen = sbp->st_gen;
6988	usbp->st_lspare = sbp->st_lspare;
6989	usbp->st_qspare[0] = sbp->st_qspare[0];
6990	usbp->st_qspare[1] = sbp->st_qspare[1];
6991}
6992
6993/*
6994 * copy stat64 structure into user_stat64 structure.
6995 */
6996void munge_stat64(struct stat64 *sbp, struct user_stat64 *usbp)
6997{
6998        bzero(usbp, sizeof(struct user_stat));
6999
7000	usbp->st_dev = sbp->st_dev;
7001	usbp->st_ino = sbp->st_ino;
7002	usbp->st_mode = sbp->st_mode;
7003	usbp->st_nlink = sbp->st_nlink;
7004	usbp->st_uid = sbp->st_uid;
7005	usbp->st_gid = sbp->st_gid;
7006	usbp->st_rdev = sbp->st_rdev;
7007#ifndef _POSIX_C_SOURCE
7008	usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
7009	usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
7010	usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
7011	usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
7012	usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
7013	usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
7014	usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
7015	usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
7016#else
7017	usbp->st_atime = sbp->st_atime;
7018	usbp->st_atimensec = sbp->st_atimensec;
7019	usbp->st_mtime = sbp->st_mtime;
7020	usbp->st_mtimensec = sbp->st_mtimensec;
7021	usbp->st_ctime = sbp->st_ctime;
7022	usbp->st_ctimensec = sbp->st_ctimensec;
7023	usbp->st_birthtime = sbp->st_birthtime;
7024	usbp->st_birthtimensec = sbp->st_birthtimensec;
7025#endif
7026	usbp->st_size = sbp->st_size;
7027	usbp->st_blocks = sbp->st_blocks;
7028	usbp->st_blksize = sbp->st_blksize;
7029	usbp->st_flags = sbp->st_flags;
7030	usbp->st_gen = sbp->st_gen;
7031	usbp->st_lspare = sbp->st_lspare;
7032	usbp->st_qspare[0] = sbp->st_qspare[0];
7033	usbp->st_qspare[1] = sbp->st_qspare[1];
7034}
7035