vfs_extattr.c revision 77845
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39 * $FreeBSD: head/sys/kern/vfs_extattr.c 77845 2001-06-06 23:34:38Z tmm $
40 */
41
42/* For 4.3 integer FS ID compatibility */
43#include "opt_compat.h"
44#include "opt_ffs.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/sysent.h>
51#include <sys/malloc.h>
52#include <sys/mount.h>
53#include <sys/mutex.h>
54#include <sys/sysproto.h>
55#include <sys/namei.h>
56#include <sys/filedesc.h>
57#include <sys/kernel.h>
58#include <sys/fcntl.h>
59#include <sys/file.h>
60#include <sys/linker.h>
61#include <sys/stat.h>
62#include <sys/sx.h>
63#include <sys/unistd.h>
64#include <sys/vnode.h>
65#include <sys/proc.h>
66#include <sys/dirent.h>
67#include <sys/extattr.h>
68#include <sys/jail.h>
69
70#include <machine/limits.h>
71#include <sys/sysctl.h>
72#include <vm/vm.h>
73#include <vm/vm_object.h>
74#include <vm/vm_zone.h>
75#include <vm/vm_page.h>
76
77static int change_dir __P((struct nameidata *ndp, struct proc *p));
78static void checkdirs __P((struct vnode *olddp, struct vnode *newdp));
79static int chroot_refuse_vdir_fds __P((struct filedesc *fdp));
80static int getutimes __P((const struct timeval *, struct timespec *));
81static int setfown __P((struct proc *, struct vnode *, uid_t, gid_t));
82static int setfmode __P((struct proc *, struct vnode *, int));
83static int setfflags __P((struct proc *, struct vnode *, int));
84static int setutimes __P((struct proc *, struct vnode *,
85    const struct timespec *, int));
86static int	usermount = 0;	/* if 1, non-root can mount fs. */
87
88int (*union_dircheckp) __P((struct proc *, struct vnode **, struct file *));
89
90SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
91
92/*
93 * Virtual File System System Calls
94 */
95
96/*
97 * Mount a file system.
98 */
99#ifndef _SYS_SYSPROTO_H_
100struct mount_args {
101	char	*type;
102	char	*path;
103	int	flags;
104	caddr_t	data;
105};
106#endif
107/* ARGSUSED */
108int
109mount(p, uap)
110	struct proc *p;
111	struct mount_args /* {
112		syscallarg(char *) type;
113		syscallarg(char *) path;
114		syscallarg(int) flags;
115		syscallarg(caddr_t) data;
116	} */ *uap;
117{
118	char *fstype;
119	char *fspath;
120	int error;
121
122	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
123	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
124
125	/*
126	 * vfs_mount() actually takes a kernel string for `type' and
127	 * `path' now, so extract them.
128	 */
129	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
130	if (error)
131		goto finish;
132	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
133	if (error)
134		goto finish;
135	error = vfs_mount(p, fstype, fspath, SCARG(uap, flags),
136	    SCARG(uap, data));
137finish:
138	free(fstype, M_TEMP);
139	free(fspath, M_TEMP);
140	return (error);
141}
142
143/*
144 * vfs_mount(): actually attempt a filesystem mount.
145 *
146 * This routine is designed to be a "generic" entry point for routines
147 * that wish to mount a filesystem. All parameters except `fsdata' are
148 * pointers into kernel space. `fsdata' is currently still a pointer
149 * into userspace.
150 */
151int
152vfs_mount(p, fstype, fspath, fsflags, fsdata)
153	struct proc *p;
154	char *fstype;
155	char *fspath;
156	int fsflags;
157	void *fsdata;
158{
159	struct vnode *vp;
160	struct mount *mp;
161	struct vfsconf *vfsp;
162	int error, flag = 0, flag2 = 0;
163	struct vattr va;
164	struct nameidata nd;
165
166	/*
167	 * Be ultra-paranoid about making sure the type and fspath
168	 * variables will fit in our mp buffers, including the
169	 * terminating NUL.
170	 */
171	if ((strlen(fstype) >= MFSNAMELEN - 1) ||
172	    (strlen(fspath) >= MNAMELEN - 1))
173		return (ENAMETOOLONG);
174
175	if (usermount == 0 && (error = suser(p)))
176		return (error);
177	/*
178	 * Do not allow NFS export by non-root users.
179	 */
180	if (fsflags & MNT_EXPORTED) {
181		error = suser(p);
182		if (error)
183			return (error);
184	}
185	/*
186	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
187	 */
188	if (suser_xxx(p->p_ucred, 0, 0))
189		fsflags |= MNT_NOSUID | MNT_NODEV;
190	/*
191	 * Get vnode to be covered
192	 */
193	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, p);
194	if ((error = namei(&nd)) != 0)
195		return (error);
196	NDFREE(&nd, NDF_ONLY_PNBUF);
197	vp = nd.ni_vp;
198	if (fsflags & MNT_UPDATE) {
199		if ((vp->v_flag & VROOT) == 0) {
200			vput(vp);
201			return (EINVAL);
202		}
203		mp = vp->v_mount;
204		flag = mp->mnt_flag;
205		flag2 = mp->mnt_kern_flag;
206		/*
207		 * We only allow the filesystem to be reloaded if it
208		 * is currently mounted read-only.
209		 */
210		if ((fsflags & MNT_RELOAD) &&
211		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
212			vput(vp);
213			return (EOPNOTSUPP);	/* Needs translation */
214		}
215		/*
216		 * Only root, or the user that did the original mount is
217		 * permitted to update it.
218		 */
219		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
220		    (error = suser(p))) {
221			vput(vp);
222			return (error);
223		}
224		if (vfs_busy(mp, LK_NOWAIT, 0, p)) {
225			vput(vp);
226			return (EBUSY);
227		}
228		mtx_lock(&vp->v_interlock);
229		if ((vp->v_flag & VMOUNT) != 0 ||
230		    vp->v_mountedhere != NULL) {
231			mtx_unlock(&vp->v_interlock);
232			vfs_unbusy(mp, p);
233			vput(vp);
234			return (EBUSY);
235		}
236		vp->v_flag |= VMOUNT;
237		mtx_unlock(&vp->v_interlock);
238		mp->mnt_flag |= fsflags &
239		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
240		VOP_UNLOCK(vp, 0, p);
241		goto update;
242	}
243	/*
244	 * If the user is not root, ensure that they own the directory
245	 * onto which we are attempting to mount.
246	 */
247	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
248	    (va.va_uid != p->p_ucred->cr_uid &&
249	     (error = suser(p)))) {
250		vput(vp);
251		return (error);
252	}
253	if ((error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) {
254		vput(vp);
255		return (error);
256	}
257	if (vp->v_type != VDIR) {
258		vput(vp);
259		return (ENOTDIR);
260	}
261	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
262		if (!strcmp(vfsp->vfc_name, fstype))
263			break;
264	if (vfsp == NULL) {
265		linker_file_t lf;
266
267		/* Only load modules for root (very important!) */
268		if ((error = suser(p)) != 0) {
269			vput(vp);
270			return error;
271		}
272		error = linker_load_file(fstype, &lf);
273		if (error || lf == NULL) {
274			vput(vp);
275			if (lf == NULL)
276				error = ENODEV;
277			return error;
278		}
279		lf->userrefs++;
280		/* lookup again, see if the VFS was loaded */
281		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
282			if (!strcmp(vfsp->vfc_name, fstype))
283				break;
284		if (vfsp == NULL) {
285			lf->userrefs--;
286			linker_file_unload(lf);
287			vput(vp);
288			return (ENODEV);
289		}
290	}
291	mtx_lock(&vp->v_interlock);
292	if ((vp->v_flag & VMOUNT) != 0 ||
293	    vp->v_mountedhere != NULL) {
294		mtx_unlock(&vp->v_interlock);
295		vput(vp);
296		return (EBUSY);
297	}
298	vp->v_flag |= VMOUNT;
299	mtx_unlock(&vp->v_interlock);
300
301	/*
302	 * Allocate and initialize the filesystem.
303	 */
304	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
305	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
306	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
307	mp->mnt_op = vfsp->vfc_vfsops;
308	mp->mnt_vfc = vfsp;
309	vfsp->vfc_refcount++;
310	mp->mnt_stat.f_type = vfsp->vfc_typenum;
311	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
312	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
313	mp->mnt_stat.f_fstypename[MFSNAMELEN - 1] = '\0';
314	mp->mnt_vnodecovered = vp;
315	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
316	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
317	mp->mnt_stat.f_mntonname[MNAMELEN - 1] = '\0';
318	mp->mnt_iosize_max = DFLTPHYS;
319	VOP_UNLOCK(vp, 0, p);
320update:
321	/*
322	 * Set the mount level flags.
323	 */
324	if (fsflags & MNT_RDONLY)
325		mp->mnt_flag |= MNT_RDONLY;
326	else if (mp->mnt_flag & MNT_RDONLY)
327		mp->mnt_kern_flag |= MNTK_WANTRDWR;
328	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
329	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
330	    MNT_NOSYMFOLLOW | MNT_IGNORE |
331	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
332	mp->mnt_flag |= fsflags & (MNT_NOSUID | MNT_NOEXEC |
333	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
334	    MNT_NOSYMFOLLOW | MNT_IGNORE |
335	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
336	/*
337	 * Mount the filesystem.
338	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
339	 * get.  No freeing of cn_pnbuf.
340	 */
341	error = VFS_MOUNT(mp, fspath, fsdata, &nd, p);
342	if (mp->mnt_flag & MNT_UPDATE) {
343		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
344			mp->mnt_flag &= ~MNT_RDONLY;
345		mp->mnt_flag &=~
346		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
347		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
348		if (error) {
349			mp->mnt_flag = flag;
350			mp->mnt_kern_flag = flag2;
351		}
352		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
353			if (mp->mnt_syncer == NULL)
354				error = vfs_allocate_syncvnode(mp);
355		} else {
356			if (mp->mnt_syncer != NULL)
357				vrele(mp->mnt_syncer);
358			mp->mnt_syncer = NULL;
359		}
360		vfs_unbusy(mp, p);
361		mtx_lock(&vp->v_interlock);
362		vp->v_flag &= ~VMOUNT;
363		mtx_unlock(&vp->v_interlock);
364		vrele(vp);
365		return (error);
366	}
367	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
368	/*
369	 * Put the new filesystem on the mount list after root.
370	 */
371	cache_purge(vp);
372	if (!error) {
373		struct vnode *newdp;
374
375		mtx_lock(&vp->v_interlock);
376		vp->v_flag &= ~VMOUNT;
377		vp->v_mountedhere = mp;
378		mtx_unlock(&vp->v_interlock);
379		mtx_lock(&mountlist_mtx);
380		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
381		mtx_unlock(&mountlist_mtx);
382		if (VFS_ROOT(mp, &newdp))
383			panic("mount: lost mount");
384		checkdirs(vp, newdp);
385		vput(newdp);
386		VOP_UNLOCK(vp, 0, p);
387		if ((mp->mnt_flag & MNT_RDONLY) == 0)
388			error = vfs_allocate_syncvnode(mp);
389		vfs_unbusy(mp, p);
390		if ((error = VFS_START(mp, 0, p)) != 0)
391			vrele(vp);
392	} else {
393		mtx_lock(&vp->v_interlock);
394		vp->v_flag &= ~VMOUNT;
395		mtx_unlock(&vp->v_interlock);
396		mp->mnt_vfc->vfc_refcount--;
397		vfs_unbusy(mp, p);
398		free((caddr_t)mp, M_MOUNT);
399		vput(vp);
400	}
401	return (error);
402}
403
404/*
405 * Scan all active processes to see if any of them have a current
406 * or root directory of `olddp'. If so, replace them with the new
407 * mount point.
408 */
409static void
410checkdirs(olddp, newdp)
411	struct vnode *olddp, *newdp;
412{
413	struct filedesc *fdp;
414	struct proc *p;
415
416	if (olddp->v_usecount == 1)
417		return;
418	sx_slock(&allproc_lock);
419	LIST_FOREACH(p, &allproc, p_list) {
420		fdp = p->p_fd;
421		if (fdp == NULL)
422			continue;
423		if (fdp->fd_cdir == olddp) {
424			vrele(fdp->fd_cdir);
425			VREF(newdp);
426			fdp->fd_cdir = newdp;
427		}
428		if (fdp->fd_rdir == olddp) {
429			vrele(fdp->fd_rdir);
430			VREF(newdp);
431			fdp->fd_rdir = newdp;
432		}
433	}
434	sx_sunlock(&allproc_lock);
435	if (rootvnode == olddp) {
436		vrele(rootvnode);
437		VREF(newdp);
438		rootvnode = newdp;
439	}
440}
441
442/*
443 * Unmount a file system.
444 *
445 * Note: unmount takes a path to the vnode mounted on as argument,
446 * not special file (as before).
447 */
448#ifndef _SYS_SYSPROTO_H_
449struct unmount_args {
450	char	*path;
451	int	flags;
452};
453#endif
454/* ARGSUSED */
455int
456unmount(p, uap)
457	struct proc *p;
458	register struct unmount_args /* {
459		syscallarg(char *) path;
460		syscallarg(int) flags;
461	} */ *uap;
462{
463	register struct vnode *vp;
464	struct mount *mp;
465	int error;
466	struct nameidata nd;
467
468	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
469	    SCARG(uap, path), p);
470	if ((error = namei(&nd)) != 0)
471		return (error);
472	vp = nd.ni_vp;
473	NDFREE(&nd, NDF_ONLY_PNBUF);
474	mp = vp->v_mount;
475
476	/*
477	 * Only root, or the user that did the original mount is
478	 * permitted to unmount this filesystem.
479	 */
480	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
481	    (error = suser(p))) {
482		vput(vp);
483		return (error);
484	}
485
486	/*
487	 * Don't allow unmounting the root file system.
488	 */
489	if (mp->mnt_flag & MNT_ROOTFS) {
490		vput(vp);
491		return (EINVAL);
492	}
493
494	/*
495	 * Must be the root of the filesystem
496	 */
497	if ((vp->v_flag & VROOT) == 0) {
498		vput(vp);
499		return (EINVAL);
500	}
501	vput(vp);
502	return (dounmount(mp, SCARG(uap, flags), p));
503}
504
505/*
506 * Do the actual file system unmount.
507 */
508int
509dounmount(mp, flags, p)
510	struct mount *mp;
511	int flags;
512	struct proc *p;
513{
514	struct vnode *coveredvp, *fsrootvp;
515	int error;
516	int async_flag;
517
518	mtx_lock(&mountlist_mtx);
519	mp->mnt_kern_flag |= MNTK_UNMOUNT;
520	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_mtx, p);
521	vn_start_write(NULL, &mp, V_WAIT);
522
523	if (mp->mnt_flag & MNT_EXPUBLIC)
524		vfs_setpublicfs(NULL, NULL, NULL);
525
526	vfs_msync(mp, MNT_WAIT);
527	async_flag = mp->mnt_flag & MNT_ASYNC;
528	mp->mnt_flag &=~ MNT_ASYNC;
529	cache_purgevfs(mp);	/* remove cache entries for this file sys */
530	if (mp->mnt_syncer != NULL)
531		vrele(mp->mnt_syncer);
532	/* Move process cdir/rdir refs on fs root to underlying vnode. */
533	if (VFS_ROOT(mp, &fsrootvp) == 0) {
534		if (mp->mnt_vnodecovered != NULL)
535			checkdirs(fsrootvp, mp->mnt_vnodecovered);
536		if (fsrootvp == rootvnode) {
537			vrele(rootvnode);
538			rootvnode = NULL;
539		}
540		vput(fsrootvp);
541	}
542	if (((mp->mnt_flag & MNT_RDONLY) ||
543	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
544	    (flags & MNT_FORCE)) {
545		error = VFS_UNMOUNT(mp, flags, p);
546	}
547	vn_finished_write(mp);
548	mtx_lock(&mountlist_mtx);
549	if (error) {
550		/* Undo cdir/rdir and rootvnode changes made above. */
551		if (VFS_ROOT(mp, &fsrootvp) == 0) {
552			if (mp->mnt_vnodecovered != NULL)
553				checkdirs(mp->mnt_vnodecovered, fsrootvp);
554			if (rootvnode == NULL) {
555				rootvnode = fsrootvp;
556				vref(rootvnode);
557			}
558			vput(fsrootvp);
559		}
560		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
561			(void) vfs_allocate_syncvnode(mp);
562		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
563		mp->mnt_flag |= async_flag;
564		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
565		    &mountlist_mtx, p);
566		if (mp->mnt_kern_flag & MNTK_MWAIT)
567			wakeup((caddr_t)mp);
568		return (error);
569	}
570	TAILQ_REMOVE(&mountlist, mp, mnt_list);
571	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
572		coveredvp->v_mountedhere = (struct mount *)0;
573		vrele(coveredvp);
574	}
575	mp->mnt_vfc->vfc_refcount--;
576	if (!LIST_EMPTY(&mp->mnt_vnodelist))
577		panic("unmount: dangling vnode");
578	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, p);
579	lockdestroy(&mp->mnt_lock);
580	if (mp->mnt_kern_flag & MNTK_MWAIT)
581		wakeup((caddr_t)mp);
582	free((caddr_t)mp, M_MOUNT);
583	return (0);
584}
585
586/*
587 * Sync each mounted filesystem.
588 */
589#ifndef _SYS_SYSPROTO_H_
590struct sync_args {
591        int     dummy;
592};
593#endif
594
595#ifdef DEBUG
596static int syncprt = 0;
597SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
598#endif
599
600/* ARGSUSED */
601int
602sync(p, uap)
603	struct proc *p;
604	struct sync_args *uap;
605{
606	struct mount *mp, *nmp;
607	int asyncflag;
608
609	mtx_lock(&mountlist_mtx);
610	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
611		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
612			nmp = TAILQ_NEXT(mp, mnt_list);
613			continue;
614		}
615		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
616		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
617			asyncflag = mp->mnt_flag & MNT_ASYNC;
618			mp->mnt_flag &= ~MNT_ASYNC;
619			vfs_msync(mp, MNT_NOWAIT);
620			VFS_SYNC(mp, MNT_NOWAIT,
621			    ((p != NULL) ? p->p_ucred : NOCRED), p);
622			mp->mnt_flag |= asyncflag;
623			vn_finished_write(mp);
624		}
625		mtx_lock(&mountlist_mtx);
626		nmp = TAILQ_NEXT(mp, mnt_list);
627		vfs_unbusy(mp, p);
628	}
629	mtx_unlock(&mountlist_mtx);
630#if 0
631/*
632 * XXX don't call vfs_bufstats() yet because that routine
633 * was not imported in the Lite2 merge.
634 */
635#ifdef DIAGNOSTIC
636	if (syncprt)
637		vfs_bufstats();
638#endif /* DIAGNOSTIC */
639#endif
640	return (0);
641}
642
643/* XXX PRISON: could be per prison flag */
644static int prison_quotas;
645#if 0
646SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
647#endif
648
649/*
650 * Change filesystem quotas.
651 */
652#ifndef _SYS_SYSPROTO_H_
653struct quotactl_args {
654	char *path;
655	int cmd;
656	int uid;
657	caddr_t arg;
658};
659#endif
660/* ARGSUSED */
661int
662quotactl(p, uap)
663	struct proc *p;
664	register struct quotactl_args /* {
665		syscallarg(char *) path;
666		syscallarg(int) cmd;
667		syscallarg(int) uid;
668		syscallarg(caddr_t) arg;
669	} */ *uap;
670{
671	struct mount *mp;
672	int error;
673	struct nameidata nd;
674
675	if (jailed(p->p_ucred) && !prison_quotas)
676		return (EPERM);
677	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
678	if ((error = namei(&nd)) != 0)
679		return (error);
680	NDFREE(&nd, NDF_ONLY_PNBUF);
681	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
682	vrele(nd.ni_vp);
683	if (error)
684		return (error);
685	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
686	    SCARG(uap, arg), p);
687	vn_finished_write(mp);
688	return (error);
689}
690
691/*
692 * Get filesystem statistics.
693 */
694#ifndef _SYS_SYSPROTO_H_
695struct statfs_args {
696	char *path;
697	struct statfs *buf;
698};
699#endif
700/* ARGSUSED */
701int
702statfs(p, uap)
703	struct proc *p;
704	register struct statfs_args /* {
705		syscallarg(char *) path;
706		syscallarg(struct statfs *) buf;
707	} */ *uap;
708{
709	register struct mount *mp;
710	register struct statfs *sp;
711	int error;
712	struct nameidata nd;
713	struct statfs sb;
714
715	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
716	if ((error = namei(&nd)) != 0)
717		return (error);
718	mp = nd.ni_vp->v_mount;
719	sp = &mp->mnt_stat;
720	NDFREE(&nd, NDF_ONLY_PNBUF);
721	vrele(nd.ni_vp);
722	error = VFS_STATFS(mp, sp, p);
723	if (error)
724		return (error);
725	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
726	if (suser_xxx(p->p_ucred, 0, 0)) {
727		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
728		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
729		sp = &sb;
730	}
731	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
732}
733
734/*
735 * Get filesystem statistics.
736 */
737#ifndef _SYS_SYSPROTO_H_
738struct fstatfs_args {
739	int fd;
740	struct statfs *buf;
741};
742#endif
743/* ARGSUSED */
744int
745fstatfs(p, uap)
746	struct proc *p;
747	register struct fstatfs_args /* {
748		syscallarg(int) fd;
749		syscallarg(struct statfs *) buf;
750	} */ *uap;
751{
752	struct file *fp;
753	struct mount *mp;
754	register struct statfs *sp;
755	int error;
756	struct statfs sb;
757
758	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
759		return (error);
760	mp = ((struct vnode *)fp->f_data)->v_mount;
761	sp = &mp->mnt_stat;
762	error = VFS_STATFS(mp, sp, p);
763	if (error)
764		return (error);
765	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
766	if (suser_xxx(p->p_ucred, 0, 0)) {
767		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
768		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
769		sp = &sb;
770	}
771	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
772}
773
774/*
775 * Get statistics on all filesystems.
776 */
777#ifndef _SYS_SYSPROTO_H_
778struct getfsstat_args {
779	struct statfs *buf;
780	long bufsize;
781	int flags;
782};
783#endif
784int
785getfsstat(p, uap)
786	struct proc *p;
787	register struct getfsstat_args /* {
788		syscallarg(struct statfs *) buf;
789		syscallarg(long) bufsize;
790		syscallarg(int) flags;
791	} */ *uap;
792{
793	register struct mount *mp, *nmp;
794	register struct statfs *sp;
795	caddr_t sfsp;
796	long count, maxcount, error;
797
798	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
799	sfsp = (caddr_t)SCARG(uap, buf);
800	count = 0;
801	mtx_lock(&mountlist_mtx);
802	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
803		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
804			nmp = TAILQ_NEXT(mp, mnt_list);
805			continue;
806		}
807		if (sfsp && count < maxcount) {
808			sp = &mp->mnt_stat;
809			/*
810			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
811			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
812			 * overrides MNT_WAIT.
813			 */
814			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
815			    (SCARG(uap, flags) & MNT_WAIT)) &&
816			    (error = VFS_STATFS(mp, sp, p))) {
817				mtx_lock(&mountlist_mtx);
818				nmp = TAILQ_NEXT(mp, mnt_list);
819				vfs_unbusy(mp, p);
820				continue;
821			}
822			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
823			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
824			if (error) {
825				vfs_unbusy(mp, p);
826				return (error);
827			}
828			sfsp += sizeof(*sp);
829		}
830		count++;
831		mtx_lock(&mountlist_mtx);
832		nmp = TAILQ_NEXT(mp, mnt_list);
833		vfs_unbusy(mp, p);
834	}
835	mtx_unlock(&mountlist_mtx);
836	if (sfsp && count > maxcount)
837		p->p_retval[0] = maxcount;
838	else
839		p->p_retval[0] = count;
840	return (0);
841}
842
843/*
844 * Change current working directory to a given file descriptor.
845 */
846#ifndef _SYS_SYSPROTO_H_
847struct fchdir_args {
848	int	fd;
849};
850#endif
851/* ARGSUSED */
852int
853fchdir(p, uap)
854	struct proc *p;
855	struct fchdir_args /* {
856		syscallarg(int) fd;
857	} */ *uap;
858{
859	register struct filedesc *fdp = p->p_fd;
860	struct vnode *vp, *tdp;
861	struct mount *mp;
862	struct file *fp;
863	int error;
864
865	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
866		return (error);
867	vp = (struct vnode *)fp->f_data;
868	VREF(vp);
869	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
870	if (vp->v_type != VDIR)
871		error = ENOTDIR;
872	else
873		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
874	while (!error && (mp = vp->v_mountedhere) != NULL) {
875		if (vfs_busy(mp, 0, 0, p))
876			continue;
877		error = VFS_ROOT(mp, &tdp);
878		vfs_unbusy(mp, p);
879		if (error)
880			break;
881		vput(vp);
882		vp = tdp;
883	}
884	if (error) {
885		vput(vp);
886		return (error);
887	}
888	VOP_UNLOCK(vp, 0, p);
889	vrele(fdp->fd_cdir);
890	fdp->fd_cdir = vp;
891	return (0);
892}
893
894/*
895 * Change current working directory (``.'').
896 */
897#ifndef _SYS_SYSPROTO_H_
898struct chdir_args {
899	char	*path;
900};
901#endif
902/* ARGSUSED */
903int
904chdir(p, uap)
905	struct proc *p;
906	struct chdir_args /* {
907		syscallarg(char *) path;
908	} */ *uap;
909{
910	register struct filedesc *fdp = p->p_fd;
911	int error;
912	struct nameidata nd;
913
914	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
915	    SCARG(uap, path), p);
916	if ((error = change_dir(&nd, p)) != 0)
917		return (error);
918	NDFREE(&nd, NDF_ONLY_PNBUF);
919	vrele(fdp->fd_cdir);
920	fdp->fd_cdir = nd.ni_vp;
921	return (0);
922}
923
924/*
925 * Helper function for raised chroot(2) security function:  Refuse if
926 * any filedescriptors are open directories.
927 */
928static int
929chroot_refuse_vdir_fds(fdp)
930	struct filedesc *fdp;
931{
932	struct vnode *vp;
933	struct file *fp;
934	int error;
935	int fd;
936
937	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
938		error = getvnode(fdp, fd, &fp);
939		if (error)
940			continue;
941		vp = (struct vnode *)fp->f_data;
942		if (vp->v_type != VDIR)
943			continue;
944		return(EPERM);
945	}
946	return (0);
947}
948
949/*
950 * This sysctl determines if we will allow a process to chroot(2) if it
951 * has a directory open:
952 *	0: disallowed for all processes.
953 *	1: allowed for processes that were not already chroot(2)'ed.
954 *	2: allowed for all processes.
955 */
956
957static int chroot_allow_open_directories = 1;
958
959SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
960     &chroot_allow_open_directories, 0, "");
961
962/*
963 * Change notion of root (``/'') directory.
964 */
965#ifndef _SYS_SYSPROTO_H_
966struct chroot_args {
967	char	*path;
968};
969#endif
970/* ARGSUSED */
971int
972chroot(p, uap)
973	struct proc *p;
974	struct chroot_args /* {
975		syscallarg(char *) path;
976	} */ *uap;
977{
978	register struct filedesc *fdp = p->p_fd;
979	int error;
980	struct nameidata nd;
981
982	error = suser_xxx(0, p, PRISON_ROOT);
983	if (error)
984		return (error);
985	if (chroot_allow_open_directories == 0 ||
986	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode))
987		error = chroot_refuse_vdir_fds(fdp);
988	if (error)
989		return (error);
990	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
991	    SCARG(uap, path), p);
992	if ((error = change_dir(&nd, p)) != 0)
993		return (error);
994	NDFREE(&nd, NDF_ONLY_PNBUF);
995	vrele(fdp->fd_rdir);
996	fdp->fd_rdir = nd.ni_vp;
997	if (!fdp->fd_jdir) {
998		fdp->fd_jdir = nd.ni_vp;
999                VREF(fdp->fd_jdir);
1000	}
1001	return (0);
1002}
1003
1004/*
1005 * Common routine for chroot and chdir.
1006 */
1007static int
1008change_dir(ndp, p)
1009	register struct nameidata *ndp;
1010	struct proc *p;
1011{
1012	struct vnode *vp;
1013	int error;
1014
1015	error = namei(ndp);
1016	if (error)
1017		return (error);
1018	vp = ndp->ni_vp;
1019	if (vp->v_type != VDIR)
1020		error = ENOTDIR;
1021	else
1022		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
1023	if (error)
1024		vput(vp);
1025	else
1026		VOP_UNLOCK(vp, 0, p);
1027	return (error);
1028}
1029
1030/*
1031 * Check permissions, allocate an open file structure,
1032 * and call the device open routine if any.
1033 */
1034#ifndef _SYS_SYSPROTO_H_
1035struct open_args {
1036	char	*path;
1037	int	flags;
1038	int	mode;
1039};
1040#endif
1041int
1042open(p, uap)
1043	struct proc *p;
1044	register struct open_args /* {
1045		syscallarg(char *) path;
1046		syscallarg(int) flags;
1047		syscallarg(int) mode;
1048	} */ *uap;
1049{
1050	struct filedesc *fdp = p->p_fd;
1051	struct file *fp;
1052	struct vnode *vp;
1053	struct vattr vat;
1054	struct mount *mp;
1055	int cmode, flags, oflags;
1056	struct file *nfp;
1057	int type, indx, error;
1058	struct flock lf;
1059	struct nameidata nd;
1060
1061	oflags = SCARG(uap, flags);
1062	if ((oflags & O_ACCMODE) == O_ACCMODE)
1063		return (EINVAL);
1064	flags = FFLAGS(oflags);
1065	error = falloc(p, &nfp, &indx);
1066	if (error)
1067		return (error);
1068	fp = nfp;
1069	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1070	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
1071	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
1072	/*
1073	 * Bump the ref count to prevent another process from closing
1074	 * the descriptor while we are blocked in vn_open()
1075	 */
1076	fhold(fp);
1077	error = vn_open(&nd, &flags, cmode);
1078	if (error) {
1079		/*
1080		 * release our own reference
1081		 */
1082		fdrop(fp, p);
1083
1084		/*
1085		 * handle special fdopen() case.  bleh.  dupfdopen() is
1086		 * responsible for dropping the old contents of ofiles[indx]
1087		 * if it succeeds.
1088		 */
1089		if ((error == ENODEV || error == ENXIO) &&
1090		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
1091		    (error =
1092			dupfdopen(p, fdp, indx, p->p_dupfd, flags, error)) == 0) {
1093			p->p_retval[0] = indx;
1094			return (0);
1095		}
1096		/*
1097		 * Clean up the descriptor, but only if another thread hadn't
1098		 * replaced or closed it.
1099		 */
1100		if (fdp->fd_ofiles[indx] == fp) {
1101			fdp->fd_ofiles[indx] = NULL;
1102			fdrop(fp, p);
1103		}
1104
1105		if (error == ERESTART)
1106			error = EINTR;
1107		return (error);
1108	}
1109	p->p_dupfd = 0;
1110	NDFREE(&nd, NDF_ONLY_PNBUF);
1111	vp = nd.ni_vp;
1112
1113	/*
1114	 * There should be 2 references on the file, one from the descriptor
1115	 * table, and one for us.
1116	 *
1117	 * Handle the case where someone closed the file (via its file
1118	 * descriptor) while we were blocked.  The end result should look
1119	 * like opening the file succeeded but it was immediately closed.
1120	 */
1121	if (fp->f_count == 1) {
1122		KASSERT(fdp->fd_ofiles[indx] != fp,
1123		    ("Open file descriptor lost all refs"));
1124		VOP_UNLOCK(vp, 0, p);
1125		vn_close(vp, flags & FMASK, fp->f_cred, p);
1126		fdrop(fp, p);
1127		p->p_retval[0] = indx;
1128		return 0;
1129	}
1130
1131	fp->f_data = (caddr_t)vp;
1132	fp->f_flag = flags & FMASK;
1133	fp->f_ops = &vnops;
1134	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1135	VOP_UNLOCK(vp, 0, p);
1136	if (flags & (O_EXLOCK | O_SHLOCK)) {
1137		lf.l_whence = SEEK_SET;
1138		lf.l_start = 0;
1139		lf.l_len = 0;
1140		if (flags & O_EXLOCK)
1141			lf.l_type = F_WRLCK;
1142		else
1143			lf.l_type = F_RDLCK;
1144		type = F_FLOCK;
1145		if ((flags & FNONBLOCK) == 0)
1146			type |= F_WAIT;
1147		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1148			goto bad;
1149		fp->f_flag |= FHASLOCK;
1150	}
1151	if (flags & O_TRUNC) {
1152		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1153			goto bad;
1154		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1155		VATTR_NULL(&vat);
1156		vat.va_size = 0;
1157		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1158		error = VOP_SETATTR(vp, &vat, p->p_ucred, p);
1159		VOP_UNLOCK(vp, 0, p);
1160		vn_finished_write(mp);
1161		if (error)
1162			goto bad;
1163	}
1164	/* assert that vn_open created a backing object if one is needed */
1165	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1166		("open: vmio vnode has no backing object after vn_open"));
1167	/*
1168	 * Release our private reference, leaving the one associated with
1169	 * the descriptor table intact.
1170	 */
1171	fdrop(fp, p);
1172	p->p_retval[0] = indx;
1173	return (0);
1174bad:
1175	if (fdp->fd_ofiles[indx] == fp) {
1176		fdp->fd_ofiles[indx] = NULL;
1177		fdrop(fp, p);
1178	}
1179	fdrop(fp, p);
1180	return (error);
1181}
1182
1183#ifdef COMPAT_43
1184/*
1185 * Create a file.
1186 */
1187#ifndef _SYS_SYSPROTO_H_
1188struct ocreat_args {
1189	char	*path;
1190	int	mode;
1191};
1192#endif
1193int
1194ocreat(p, uap)
1195	struct proc *p;
1196	register struct ocreat_args /* {
1197		syscallarg(char *) path;
1198		syscallarg(int) mode;
1199	} */ *uap;
1200{
1201	struct open_args /* {
1202		syscallarg(char *) path;
1203		syscallarg(int) flags;
1204		syscallarg(int) mode;
1205	} */ nuap;
1206
1207	SCARG(&nuap, path) = SCARG(uap, path);
1208	SCARG(&nuap, mode) = SCARG(uap, mode);
1209	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1210	return (open(p, &nuap));
1211}
1212#endif /* COMPAT_43 */
1213
1214/*
1215 * Create a special file.
1216 */
1217#ifndef _SYS_SYSPROTO_H_
1218struct mknod_args {
1219	char	*path;
1220	int	mode;
1221	int	dev;
1222};
1223#endif
1224/* ARGSUSED */
1225int
1226mknod(p, uap)
1227	struct proc *p;
1228	register struct mknod_args /* {
1229		syscallarg(char *) path;
1230		syscallarg(int) mode;
1231		syscallarg(int) dev;
1232	} */ *uap;
1233{
1234	struct vnode *vp;
1235	struct mount *mp;
1236	struct vattr vattr;
1237	int error;
1238	int whiteout = 0;
1239	struct nameidata nd;
1240
1241	switch (SCARG(uap, mode) & S_IFMT) {
1242	case S_IFCHR:
1243	case S_IFBLK:
1244		error = suser(p);
1245		break;
1246	default:
1247		error = suser_xxx(0, p, PRISON_ROOT);
1248		break;
1249	}
1250	if (error)
1251		return (error);
1252restart:
1253	bwillwrite();
1254	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1255	if ((error = namei(&nd)) != 0)
1256		return (error);
1257	vp = nd.ni_vp;
1258	if (vp != NULL) {
1259		vrele(vp);
1260		error = EEXIST;
1261	} else {
1262		VATTR_NULL(&vattr);
1263		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1264		vattr.va_rdev = SCARG(uap, dev);
1265		whiteout = 0;
1266
1267		switch (SCARG(uap, mode) & S_IFMT) {
1268		case S_IFMT:	/* used by badsect to flag bad sectors */
1269			vattr.va_type = VBAD;
1270			break;
1271		case S_IFCHR:
1272			vattr.va_type = VCHR;
1273			break;
1274		case S_IFBLK:
1275			vattr.va_type = VBLK;
1276			break;
1277		case S_IFWHT:
1278			whiteout = 1;
1279			break;
1280		default:
1281			error = EINVAL;
1282			break;
1283		}
1284	}
1285	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1286		NDFREE(&nd, NDF_ONLY_PNBUF);
1287		vput(nd.ni_dvp);
1288		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1289			return (error);
1290		goto restart;
1291	}
1292	if (!error) {
1293		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1294		if (whiteout)
1295			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1296		else {
1297			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1298						&nd.ni_cnd, &vattr);
1299			if (error == 0)
1300				vput(nd.ni_vp);
1301		}
1302	}
1303	NDFREE(&nd, NDF_ONLY_PNBUF);
1304	vput(nd.ni_dvp);
1305	vn_finished_write(mp);
1306	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1307	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1308	return (error);
1309}
1310
1311/*
1312 * Create a named pipe.
1313 */
1314#ifndef _SYS_SYSPROTO_H_
1315struct mkfifo_args {
1316	char	*path;
1317	int	mode;
1318};
1319#endif
1320/* ARGSUSED */
1321int
1322mkfifo(p, uap)
1323	struct proc *p;
1324	register struct mkfifo_args /* {
1325		syscallarg(char *) path;
1326		syscallarg(int) mode;
1327	} */ *uap;
1328{
1329	struct mount *mp;
1330	struct vattr vattr;
1331	int error;
1332	struct nameidata nd;
1333
1334restart:
1335	bwillwrite();
1336	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1337	if ((error = namei(&nd)) != 0)
1338		return (error);
1339	if (nd.ni_vp != NULL) {
1340		NDFREE(&nd, NDF_ONLY_PNBUF);
1341		vrele(nd.ni_vp);
1342		vput(nd.ni_dvp);
1343		return (EEXIST);
1344	}
1345	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1346		NDFREE(&nd, NDF_ONLY_PNBUF);
1347		vput(nd.ni_dvp);
1348		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1349			return (error);
1350		goto restart;
1351	}
1352	VATTR_NULL(&vattr);
1353	vattr.va_type = VFIFO;
1354	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
1355	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1356	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1357	if (error == 0)
1358		vput(nd.ni_vp);
1359	NDFREE(&nd, NDF_ONLY_PNBUF);
1360	vput(nd.ni_dvp);
1361	vn_finished_write(mp);
1362	return (error);
1363}
1364
1365/*
1366 * Make a hard file link.
1367 */
1368#ifndef _SYS_SYSPROTO_H_
1369struct link_args {
1370	char	*path;
1371	char	*link;
1372};
1373#endif
1374/* ARGSUSED */
1375int
1376link(p, uap)
1377	struct proc *p;
1378	register struct link_args /* {
1379		syscallarg(char *) path;
1380		syscallarg(char *) link;
1381	} */ *uap;
1382{
1383	struct vnode *vp;
1384	struct mount *mp;
1385	struct nameidata nd;
1386	int error;
1387
1388	bwillwrite();
1389	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p);
1390	if ((error = namei(&nd)) != 0)
1391		return (error);
1392	NDFREE(&nd, NDF_ONLY_PNBUF);
1393	vp = nd.ni_vp;
1394	if (vp->v_type == VDIR) {
1395		vrele(vp);
1396		return (EPERM);		/* POSIX */
1397	}
1398	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1399		vrele(vp);
1400		return (error);
1401	}
1402	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1403	if ((error = namei(&nd)) == 0) {
1404		if (nd.ni_vp != NULL) {
1405			vrele(nd.ni_vp);
1406			error = EEXIST;
1407		} else {
1408			VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1409			VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1410			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1411		}
1412		NDFREE(&nd, NDF_ONLY_PNBUF);
1413		vput(nd.ni_dvp);
1414	}
1415	vrele(vp);
1416	vn_finished_write(mp);
1417	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1418	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1419	return (error);
1420}
1421
1422/*
1423 * Make a symbolic link.
1424 */
1425#ifndef _SYS_SYSPROTO_H_
1426struct symlink_args {
1427	char	*path;
1428	char	*link;
1429};
1430#endif
1431/* ARGSUSED */
1432int
1433symlink(p, uap)
1434	struct proc *p;
1435	register struct symlink_args /* {
1436		syscallarg(char *) path;
1437		syscallarg(char *) link;
1438	} */ *uap;
1439{
1440	struct mount *mp;
1441	struct vattr vattr;
1442	char *path;
1443	int error;
1444	struct nameidata nd;
1445
1446	path = zalloc(namei_zone);
1447	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1448		goto out;
1449restart:
1450	bwillwrite();
1451	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
1452	if ((error = namei(&nd)) != 0)
1453		goto out;
1454	if (nd.ni_vp) {
1455		NDFREE(&nd, NDF_ONLY_PNBUF);
1456		vrele(nd.ni_vp);
1457		vput(nd.ni_dvp);
1458		error = EEXIST;
1459		goto out;
1460	}
1461	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1462		NDFREE(&nd, NDF_ONLY_PNBUF);
1463		vput(nd.ni_dvp);
1464		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1465			return (error);
1466		goto restart;
1467	}
1468	VATTR_NULL(&vattr);
1469	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
1470	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1471	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1472	NDFREE(&nd, NDF_ONLY_PNBUF);
1473	if (error == 0)
1474		vput(nd.ni_vp);
1475	vput(nd.ni_dvp);
1476	vn_finished_write(mp);
1477	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1478	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1479out:
1480	zfree(namei_zone, path);
1481	return (error);
1482}
1483
1484/*
1485 * Delete a whiteout from the filesystem.
1486 */
1487/* ARGSUSED */
1488int
1489undelete(p, uap)
1490	struct proc *p;
1491	register struct undelete_args /* {
1492		syscallarg(char *) path;
1493	} */ *uap;
1494{
1495	int error;
1496	struct mount *mp;
1497	struct nameidata nd;
1498
1499restart:
1500	bwillwrite();
1501	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1502	    SCARG(uap, path), p);
1503	error = namei(&nd);
1504	if (error)
1505		return (error);
1506
1507	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1508		NDFREE(&nd, NDF_ONLY_PNBUF);
1509		if (nd.ni_vp)
1510			vrele(nd.ni_vp);
1511		vput(nd.ni_dvp);
1512		return (EEXIST);
1513	}
1514	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1515		NDFREE(&nd, NDF_ONLY_PNBUF);
1516		vput(nd.ni_dvp);
1517		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1518			return (error);
1519		goto restart;
1520	}
1521	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1522	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1523	NDFREE(&nd, NDF_ONLY_PNBUF);
1524	vput(nd.ni_dvp);
1525	vn_finished_write(mp);
1526	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1527	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1528	return (error);
1529}
1530
1531/*
1532 * Delete a name from the filesystem.
1533 */
1534#ifndef _SYS_SYSPROTO_H_
1535struct unlink_args {
1536	char	*path;
1537};
1538#endif
1539/* ARGSUSED */
1540int
1541unlink(p, uap)
1542	struct proc *p;
1543	struct unlink_args /* {
1544		syscallarg(char *) path;
1545	} */ *uap;
1546{
1547	struct mount *mp;
1548	struct vnode *vp;
1549	int error;
1550	struct nameidata nd;
1551
1552restart:
1553	bwillwrite();
1554	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
1555	if ((error = namei(&nd)) != 0)
1556		return (error);
1557	vp = nd.ni_vp;
1558	if (vp->v_type == VDIR)
1559		error = EPERM;		/* POSIX */
1560	else {
1561		/*
1562		 * The root of a mounted filesystem cannot be deleted.
1563		 *
1564		 * XXX: can this only be a VDIR case?
1565		 */
1566		if (vp->v_flag & VROOT)
1567			error = EBUSY;
1568	}
1569	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1570		NDFREE(&nd, NDF_ONLY_PNBUF);
1571		vrele(vp);
1572		vput(nd.ni_dvp);
1573		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1574			return (error);
1575		goto restart;
1576	}
1577	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
1578	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1579	if (!error) {
1580		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1581		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1582	}
1583	NDFREE(&nd, NDF_ONLY_PNBUF);
1584	vput(nd.ni_dvp);
1585	vput(vp);
1586	vn_finished_write(mp);
1587	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1588	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1589	return (error);
1590}
1591
1592/*
1593 * Reposition read/write file offset.
1594 */
1595#ifndef _SYS_SYSPROTO_H_
1596struct lseek_args {
1597	int	fd;
1598	int	pad;
1599	off_t	offset;
1600	int	whence;
1601};
1602#endif
1603int
1604lseek(p, uap)
1605	struct proc *p;
1606	register struct lseek_args /* {
1607		syscallarg(int) fd;
1608		syscallarg(int) pad;
1609		syscallarg(off_t) offset;
1610		syscallarg(int) whence;
1611	} */ *uap;
1612{
1613	struct ucred *cred = p->p_ucred;
1614	register struct filedesc *fdp = p->p_fd;
1615	register struct file *fp;
1616	struct vattr vattr;
1617	int error;
1618
1619	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
1620	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
1621		return (EBADF);
1622	if (fp->f_type != DTYPE_VNODE)
1623		return (ESPIPE);
1624	switch (SCARG(uap, whence)) {
1625	case L_INCR:
1626		fp->f_offset += SCARG(uap, offset);
1627		break;
1628	case L_XTND:
1629		error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p);
1630		if (error)
1631			return (error);
1632		fp->f_offset = SCARG(uap, offset) + vattr.va_size;
1633		break;
1634	case L_SET:
1635		fp->f_offset = SCARG(uap, offset);
1636		break;
1637	default:
1638		return (EINVAL);
1639	}
1640	*(off_t *)(p->p_retval) = fp->f_offset;
1641	return (0);
1642}
1643
1644#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1645/*
1646 * Reposition read/write file offset.
1647 */
1648#ifndef _SYS_SYSPROTO_H_
1649struct olseek_args {
1650	int	fd;
1651	long	offset;
1652	int	whence;
1653};
1654#endif
1655int
1656olseek(p, uap)
1657	struct proc *p;
1658	register struct olseek_args /* {
1659		syscallarg(int) fd;
1660		syscallarg(long) offset;
1661		syscallarg(int) whence;
1662	} */ *uap;
1663{
1664	struct lseek_args /* {
1665		syscallarg(int) fd;
1666		syscallarg(int) pad;
1667		syscallarg(off_t) offset;
1668		syscallarg(int) whence;
1669	} */ nuap;
1670	int error;
1671
1672	SCARG(&nuap, fd) = SCARG(uap, fd);
1673	SCARG(&nuap, offset) = SCARG(uap, offset);
1674	SCARG(&nuap, whence) = SCARG(uap, whence);
1675	error = lseek(p, &nuap);
1676	return (error);
1677}
1678#endif /* COMPAT_43 */
1679
1680/*
1681 * Check access permissions.
1682 */
1683#ifndef _SYS_SYSPROTO_H_
1684struct access_args {
1685	char	*path;
1686	int	flags;
1687};
1688#endif
1689int
1690access(p, uap)
1691	struct proc *p;
1692	register struct access_args /* {
1693		syscallarg(char *) path;
1694		syscallarg(int) flags;
1695	} */ *uap;
1696{
1697	struct ucred *cred, *tmpcred;
1698	register struct vnode *vp;
1699	int error, flags;
1700	struct nameidata nd;
1701
1702	cred = p->p_ucred;
1703	/*
1704	 * Create and modify a temporary credential instead of one that
1705	 * is potentially shared.  This could also mess up socket
1706	 * buffer accounting which can run in an interrupt context.
1707	 *
1708	 * XXX - Depending on how "threads" are finally implemented, it
1709	 * may be better to explicitly pass the credential to namei()
1710	 * rather than to modify the potentially shared process structure.
1711	 */
1712	tmpcred = crdup(cred);
1713	tmpcred->cr_uid = cred->cr_ruid;
1714	tmpcred->cr_groups[0] = cred->cr_rgid;
1715	p->p_ucred = tmpcred;
1716	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1717	    SCARG(uap, path), p);
1718	if ((error = namei(&nd)) != 0)
1719		goto out1;
1720	vp = nd.ni_vp;
1721
1722	/* Flags == 0 means only check for existence. */
1723	if (SCARG(uap, flags)) {
1724		flags = 0;
1725		if (SCARG(uap, flags) & R_OK)
1726			flags |= VREAD;
1727		if (SCARG(uap, flags) & W_OK)
1728			flags |= VWRITE;
1729		if (SCARG(uap, flags) & X_OK)
1730			flags |= VEXEC;
1731		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1732			error = VOP_ACCESS(vp, flags, tmpcred, p);
1733	}
1734	NDFREE(&nd, NDF_ONLY_PNBUF);
1735	vput(vp);
1736out1:
1737	p->p_ucred = cred;
1738	crfree(tmpcred);
1739	return (error);
1740}
1741
1742#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1743/*
1744 * Get file status; this version follows links.
1745 */
1746#ifndef _SYS_SYSPROTO_H_
1747struct ostat_args {
1748	char	*path;
1749	struct ostat *ub;
1750};
1751#endif
1752/* ARGSUSED */
1753int
1754ostat(p, uap)
1755	struct proc *p;
1756	register struct ostat_args /* {
1757		syscallarg(char *) path;
1758		syscallarg(struct ostat *) ub;
1759	} */ *uap;
1760{
1761	struct stat sb;
1762	struct ostat osb;
1763	int error;
1764	struct nameidata nd;
1765
1766	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1767	    SCARG(uap, path), p);
1768	if ((error = namei(&nd)) != 0)
1769		return (error);
1770	NDFREE(&nd, NDF_ONLY_PNBUF);
1771	error = vn_stat(nd.ni_vp, &sb, p);
1772	vput(nd.ni_vp);
1773	if (error)
1774		return (error);
1775	cvtstat(&sb, &osb);
1776	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1777	return (error);
1778}
1779
1780/*
1781 * Get file status; this version does not follow links.
1782 */
1783#ifndef _SYS_SYSPROTO_H_
1784struct olstat_args {
1785	char	*path;
1786	struct ostat *ub;
1787};
1788#endif
1789/* ARGSUSED */
1790int
1791olstat(p, uap)
1792	struct proc *p;
1793	register struct olstat_args /* {
1794		syscallarg(char *) path;
1795		syscallarg(struct ostat *) ub;
1796	} */ *uap;
1797{
1798	struct vnode *vp;
1799	struct stat sb;
1800	struct ostat osb;
1801	int error;
1802	struct nameidata nd;
1803
1804	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1805	    SCARG(uap, path), p);
1806	if ((error = namei(&nd)) != 0)
1807		return (error);
1808	vp = nd.ni_vp;
1809	error = vn_stat(vp, &sb, p);
1810	NDFREE(&nd, NDF_ONLY_PNBUF);
1811	vput(vp);
1812	if (error)
1813		return (error);
1814	cvtstat(&sb, &osb);
1815	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1816	return (error);
1817}
1818
1819/*
1820 * Convert from an old to a new stat structure.
1821 */
1822void
1823cvtstat(st, ost)
1824	struct stat *st;
1825	struct ostat *ost;
1826{
1827
1828	ost->st_dev = st->st_dev;
1829	ost->st_ino = st->st_ino;
1830	ost->st_mode = st->st_mode;
1831	ost->st_nlink = st->st_nlink;
1832	ost->st_uid = st->st_uid;
1833	ost->st_gid = st->st_gid;
1834	ost->st_rdev = st->st_rdev;
1835	if (st->st_size < (quad_t)1 << 32)
1836		ost->st_size = st->st_size;
1837	else
1838		ost->st_size = -2;
1839	ost->st_atime = st->st_atime;
1840	ost->st_mtime = st->st_mtime;
1841	ost->st_ctime = st->st_ctime;
1842	ost->st_blksize = st->st_blksize;
1843	ost->st_blocks = st->st_blocks;
1844	ost->st_flags = st->st_flags;
1845	ost->st_gen = st->st_gen;
1846}
1847#endif /* COMPAT_43 || COMPAT_SUNOS */
1848
1849/*
1850 * Get file status; this version follows links.
1851 */
1852#ifndef _SYS_SYSPROTO_H_
1853struct stat_args {
1854	char	*path;
1855	struct stat *ub;
1856};
1857#endif
1858/* ARGSUSED */
1859int
1860stat(p, uap)
1861	struct proc *p;
1862	register struct stat_args /* {
1863		syscallarg(char *) path;
1864		syscallarg(struct stat *) ub;
1865	} */ *uap;
1866{
1867	struct stat sb;
1868	int error;
1869	struct nameidata nd;
1870
1871	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1872	    SCARG(uap, path), p);
1873	if ((error = namei(&nd)) != 0)
1874		return (error);
1875	error = vn_stat(nd.ni_vp, &sb, p);
1876	NDFREE(&nd, NDF_ONLY_PNBUF);
1877	vput(nd.ni_vp);
1878	if (error)
1879		return (error);
1880	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1881	return (error);
1882}
1883
1884/*
1885 * Get file status; this version does not follow links.
1886 */
1887#ifndef _SYS_SYSPROTO_H_
1888struct lstat_args {
1889	char	*path;
1890	struct stat *ub;
1891};
1892#endif
1893/* ARGSUSED */
1894int
1895lstat(p, uap)
1896	struct proc *p;
1897	register struct lstat_args /* {
1898		syscallarg(char *) path;
1899		syscallarg(struct stat *) ub;
1900	} */ *uap;
1901{
1902	int error;
1903	struct vnode *vp;
1904	struct stat sb;
1905	struct nameidata nd;
1906
1907	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1908	    SCARG(uap, path), p);
1909	if ((error = namei(&nd)) != 0)
1910		return (error);
1911	vp = nd.ni_vp;
1912	error = vn_stat(vp, &sb, p);
1913	NDFREE(&nd, NDF_ONLY_PNBUF);
1914	vput(vp);
1915	if (error)
1916		return (error);
1917	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
1918	return (error);
1919}
1920
1921/*
1922 * Implementation of the NetBSD stat() function.
1923 * XXX This should probably be collapsed with the FreeBSD version,
1924 * as the differences are only due to vn_stat() clearing spares at
1925 * the end of the structures.  vn_stat could be split to avoid this,
1926 * and thus collapse the following to close to zero code.
1927 */
1928void
1929cvtnstat(sb, nsb)
1930	struct stat *sb;
1931	struct nstat *nsb;
1932{
1933	nsb->st_dev = sb->st_dev;
1934	nsb->st_ino = sb->st_ino;
1935	nsb->st_mode = sb->st_mode;
1936	nsb->st_nlink = sb->st_nlink;
1937	nsb->st_uid = sb->st_uid;
1938	nsb->st_gid = sb->st_gid;
1939	nsb->st_rdev = sb->st_rdev;
1940	nsb->st_atimespec = sb->st_atimespec;
1941	nsb->st_mtimespec = sb->st_mtimespec;
1942	nsb->st_ctimespec = sb->st_ctimespec;
1943	nsb->st_size = sb->st_size;
1944	nsb->st_blocks = sb->st_blocks;
1945	nsb->st_blksize = sb->st_blksize;
1946	nsb->st_flags = sb->st_flags;
1947	nsb->st_gen = sb->st_gen;
1948	nsb->st_qspare[0] = sb->st_qspare[0];
1949	nsb->st_qspare[1] = sb->st_qspare[1];
1950}
1951
1952#ifndef _SYS_SYSPROTO_H_
1953struct nstat_args {
1954	char	*path;
1955	struct nstat *ub;
1956};
1957#endif
1958/* ARGSUSED */
1959int
1960nstat(p, uap)
1961	struct proc *p;
1962	register struct nstat_args /* {
1963		syscallarg(char *) path;
1964		syscallarg(struct nstat *) ub;
1965	} */ *uap;
1966{
1967	struct stat sb;
1968	struct nstat nsb;
1969	int error;
1970	struct nameidata nd;
1971
1972	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1973	    SCARG(uap, path), p);
1974	if ((error = namei(&nd)) != 0)
1975		return (error);
1976	NDFREE(&nd, NDF_ONLY_PNBUF);
1977	error = vn_stat(nd.ni_vp, &sb, p);
1978	vput(nd.ni_vp);
1979	if (error)
1980		return (error);
1981	cvtnstat(&sb, &nsb);
1982	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
1983	return (error);
1984}
1985
1986/*
1987 * NetBSD lstat.  Get file status; this version does not follow links.
1988 */
1989#ifndef _SYS_SYSPROTO_H_
1990struct lstat_args {
1991	char	*path;
1992	struct stat *ub;
1993};
1994#endif
1995/* ARGSUSED */
1996int
1997nlstat(p, uap)
1998	struct proc *p;
1999	register struct nlstat_args /* {
2000		syscallarg(char *) path;
2001		syscallarg(struct nstat *) ub;
2002	} */ *uap;
2003{
2004	int error;
2005	struct vnode *vp;
2006	struct stat sb;
2007	struct nstat nsb;
2008	struct nameidata nd;
2009
2010	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2011	    SCARG(uap, path), p);
2012	if ((error = namei(&nd)) != 0)
2013		return (error);
2014	vp = nd.ni_vp;
2015	NDFREE(&nd, NDF_ONLY_PNBUF);
2016	error = vn_stat(vp, &sb, p);
2017	vput(vp);
2018	if (error)
2019		return (error);
2020	cvtnstat(&sb, &nsb);
2021	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2022	return (error);
2023}
2024
2025/*
2026 * Get configurable pathname variables.
2027 */
2028#ifndef _SYS_SYSPROTO_H_
2029struct pathconf_args {
2030	char	*path;
2031	int	name;
2032};
2033#endif
2034/* ARGSUSED */
2035int
2036pathconf(p, uap)
2037	struct proc *p;
2038	register struct pathconf_args /* {
2039		syscallarg(char *) path;
2040		syscallarg(int) name;
2041	} */ *uap;
2042{
2043	int error;
2044	struct nameidata nd;
2045
2046	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2047	    SCARG(uap, path), p);
2048	if ((error = namei(&nd)) != 0)
2049		return (error);
2050	NDFREE(&nd, NDF_ONLY_PNBUF);
2051	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval);
2052	vput(nd.ni_vp);
2053	return (error);
2054}
2055
2056/*
2057 * Return target name of a symbolic link.
2058 */
2059#ifndef _SYS_SYSPROTO_H_
2060struct readlink_args {
2061	char	*path;
2062	char	*buf;
2063	int	count;
2064};
2065#endif
2066/* ARGSUSED */
2067int
2068readlink(p, uap)
2069	struct proc *p;
2070	register struct readlink_args /* {
2071		syscallarg(char *) path;
2072		syscallarg(char *) buf;
2073		syscallarg(int) count;
2074	} */ *uap;
2075{
2076	register struct vnode *vp;
2077	struct iovec aiov;
2078	struct uio auio;
2079	int error;
2080	struct nameidata nd;
2081
2082	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2083	    SCARG(uap, path), p);
2084	if ((error = namei(&nd)) != 0)
2085		return (error);
2086	NDFREE(&nd, NDF_ONLY_PNBUF);
2087	vp = nd.ni_vp;
2088	if (vp->v_type != VLNK)
2089		error = EINVAL;
2090	else {
2091		aiov.iov_base = SCARG(uap, buf);
2092		aiov.iov_len = SCARG(uap, count);
2093		auio.uio_iov = &aiov;
2094		auio.uio_iovcnt = 1;
2095		auio.uio_offset = 0;
2096		auio.uio_rw = UIO_READ;
2097		auio.uio_segflg = UIO_USERSPACE;
2098		auio.uio_procp = p;
2099		auio.uio_resid = SCARG(uap, count);
2100		error = VOP_READLINK(vp, &auio, p->p_ucred);
2101	}
2102	vput(vp);
2103	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
2104	return (error);
2105}
2106
2107/*
2108 * Common implementation code for chflags() and fchflags().
2109 */
2110static int
2111setfflags(p, vp, flags)
2112	struct proc *p;
2113	struct vnode *vp;
2114	int flags;
2115{
2116	int error;
2117	struct mount *mp;
2118	struct vattr vattr;
2119
2120	/*
2121	 * Prevent non-root users from setting flags on devices.  When
2122	 * a device is reused, users can retain ownership of the device
2123	 * if they are allowed to set flags and programs assume that
2124	 * chown can't fail when done as root.
2125	 */
2126	if ((vp->v_type == VCHR || vp->v_type == VBLK) &&
2127	    ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
2128		return (error);
2129
2130	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2131		return (error);
2132	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2133	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2134	VATTR_NULL(&vattr);
2135	vattr.va_flags = flags;
2136	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2137	VOP_UNLOCK(vp, 0, p);
2138	vn_finished_write(mp);
2139	return (error);
2140}
2141
2142/*
2143 * Change flags of a file given a path name.
2144 */
2145#ifndef _SYS_SYSPROTO_H_
2146struct chflags_args {
2147	char	*path;
2148	int	flags;
2149};
2150#endif
2151/* ARGSUSED */
2152int
2153chflags(p, uap)
2154	struct proc *p;
2155	register struct chflags_args /* {
2156		syscallarg(char *) path;
2157		syscallarg(int) flags;
2158	} */ *uap;
2159{
2160	int error;
2161	struct nameidata nd;
2162
2163	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2164	if ((error = namei(&nd)) != 0)
2165		return (error);
2166	NDFREE(&nd, NDF_ONLY_PNBUF);
2167	error = setfflags(p, nd.ni_vp, SCARG(uap, flags));
2168	vrele(nd.ni_vp);
2169	return error;
2170}
2171
2172/*
2173 * Change flags of a file given a file descriptor.
2174 */
2175#ifndef _SYS_SYSPROTO_H_
2176struct fchflags_args {
2177	int	fd;
2178	int	flags;
2179};
2180#endif
2181/* ARGSUSED */
2182int
2183fchflags(p, uap)
2184	struct proc *p;
2185	register struct fchflags_args /* {
2186		syscallarg(int) fd;
2187		syscallarg(int) flags;
2188	} */ *uap;
2189{
2190	struct file *fp;
2191	int error;
2192
2193	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2194		return (error);
2195	return setfflags(p, (struct vnode *) fp->f_data, SCARG(uap, flags));
2196}
2197
2198/*
2199 * Common implementation code for chmod(), lchmod() and fchmod().
2200 */
2201static int
2202setfmode(p, vp, mode)
2203	struct proc *p;
2204	struct vnode *vp;
2205	int mode;
2206{
2207	int error;
2208	struct mount *mp;
2209	struct vattr vattr;
2210
2211	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2212		return (error);
2213	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2214	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2215	VATTR_NULL(&vattr);
2216	vattr.va_mode = mode & ALLPERMS;
2217	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2218	VOP_UNLOCK(vp, 0, p);
2219	vn_finished_write(mp);
2220	return error;
2221}
2222
2223/*
2224 * Change mode of a file given path name.
2225 */
2226#ifndef _SYS_SYSPROTO_H_
2227struct chmod_args {
2228	char	*path;
2229	int	mode;
2230};
2231#endif
2232/* ARGSUSED */
2233int
2234chmod(p, uap)
2235	struct proc *p;
2236	register struct chmod_args /* {
2237		syscallarg(char *) path;
2238		syscallarg(int) mode;
2239	} */ *uap;
2240{
2241	int error;
2242	struct nameidata nd;
2243
2244	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2245	if ((error = namei(&nd)) != 0)
2246		return (error);
2247	NDFREE(&nd, NDF_ONLY_PNBUF);
2248	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2249	vrele(nd.ni_vp);
2250	return error;
2251}
2252
2253/*
2254 * Change mode of a file given path name (don't follow links.)
2255 */
2256#ifndef _SYS_SYSPROTO_H_
2257struct lchmod_args {
2258	char	*path;
2259	int	mode;
2260};
2261#endif
2262/* ARGSUSED */
2263int
2264lchmod(p, uap)
2265	struct proc *p;
2266	register struct lchmod_args /* {
2267		syscallarg(char *) path;
2268		syscallarg(int) mode;
2269	} */ *uap;
2270{
2271	int error;
2272	struct nameidata nd;
2273
2274	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2275	if ((error = namei(&nd)) != 0)
2276		return (error);
2277	NDFREE(&nd, NDF_ONLY_PNBUF);
2278	error = setfmode(p, nd.ni_vp, SCARG(uap, mode));
2279	vrele(nd.ni_vp);
2280	return error;
2281}
2282
2283/*
2284 * Change mode of a file given a file descriptor.
2285 */
2286#ifndef _SYS_SYSPROTO_H_
2287struct fchmod_args {
2288	int	fd;
2289	int	mode;
2290};
2291#endif
2292/* ARGSUSED */
2293int
2294fchmod(p, uap)
2295	struct proc *p;
2296	register struct fchmod_args /* {
2297		syscallarg(int) fd;
2298		syscallarg(int) mode;
2299	} */ *uap;
2300{
2301	struct file *fp;
2302	int error;
2303
2304	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2305		return (error);
2306	return setfmode(p, (struct vnode *)fp->f_data, SCARG(uap, mode));
2307}
2308
2309/*
2310 * Common implementation for chown(), lchown(), and fchown()
2311 */
2312static int
2313setfown(p, vp, uid, gid)
2314	struct proc *p;
2315	struct vnode *vp;
2316	uid_t uid;
2317	gid_t gid;
2318{
2319	int error;
2320	struct mount *mp;
2321	struct vattr vattr;
2322
2323	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2324		return (error);
2325	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2326	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2327	VATTR_NULL(&vattr);
2328	vattr.va_uid = uid;
2329	vattr.va_gid = gid;
2330	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2331	VOP_UNLOCK(vp, 0, p);
2332	vn_finished_write(mp);
2333	return error;
2334}
2335
2336/*
2337 * Set ownership given a path name.
2338 */
2339#ifndef _SYS_SYSPROTO_H_
2340struct chown_args {
2341	char	*path;
2342	int	uid;
2343	int	gid;
2344};
2345#endif
2346/* ARGSUSED */
2347int
2348chown(p, uap)
2349	struct proc *p;
2350	register struct chown_args /* {
2351		syscallarg(char *) path;
2352		syscallarg(int) uid;
2353		syscallarg(int) gid;
2354	} */ *uap;
2355{
2356	int error;
2357	struct nameidata nd;
2358
2359	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2360	if ((error = namei(&nd)) != 0)
2361		return (error);
2362	NDFREE(&nd, NDF_ONLY_PNBUF);
2363	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2364	vrele(nd.ni_vp);
2365	return (error);
2366}
2367
2368/*
2369 * Set ownership given a path name, do not cross symlinks.
2370 */
2371#ifndef _SYS_SYSPROTO_H_
2372struct lchown_args {
2373	char	*path;
2374	int	uid;
2375	int	gid;
2376};
2377#endif
2378/* ARGSUSED */
2379int
2380lchown(p, uap)
2381	struct proc *p;
2382	register struct lchown_args /* {
2383		syscallarg(char *) path;
2384		syscallarg(int) uid;
2385		syscallarg(int) gid;
2386	} */ *uap;
2387{
2388	int error;
2389	struct nameidata nd;
2390
2391	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2392	if ((error = namei(&nd)) != 0)
2393		return (error);
2394	NDFREE(&nd, NDF_ONLY_PNBUF);
2395	error = setfown(p, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2396	vrele(nd.ni_vp);
2397	return (error);
2398}
2399
2400/*
2401 * Set ownership given a file descriptor.
2402 */
2403#ifndef _SYS_SYSPROTO_H_
2404struct fchown_args {
2405	int	fd;
2406	int	uid;
2407	int	gid;
2408};
2409#endif
2410/* ARGSUSED */
2411int
2412fchown(p, uap)
2413	struct proc *p;
2414	register struct fchown_args /* {
2415		syscallarg(int) fd;
2416		syscallarg(int) uid;
2417		syscallarg(int) gid;
2418	} */ *uap;
2419{
2420	struct file *fp;
2421	int error;
2422
2423	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2424		return (error);
2425	return setfown(p, (struct vnode *)fp->f_data,
2426		SCARG(uap, uid), SCARG(uap, gid));
2427}
2428
2429/*
2430 * Common implementation code for utimes(), lutimes(), and futimes().
2431 */
2432static int
2433getutimes(usrtvp, tsp)
2434	const struct timeval *usrtvp;
2435	struct timespec *tsp;
2436{
2437	struct timeval tv[2];
2438	int error;
2439
2440	if (usrtvp == NULL) {
2441		microtime(&tv[0]);
2442		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2443		tsp[1] = tsp[0];
2444	} else {
2445		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2446			return (error);
2447		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2448		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2449	}
2450	return 0;
2451}
2452
2453/*
2454 * Common implementation code for utimes(), lutimes(), and futimes().
2455 */
2456static int
2457setutimes(p, vp, ts, nullflag)
2458	struct proc *p;
2459	struct vnode *vp;
2460	const struct timespec *ts;
2461	int nullflag;
2462{
2463	int error;
2464	struct mount *mp;
2465	struct vattr vattr;
2466
2467	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2468		return (error);
2469	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2470	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2471	VATTR_NULL(&vattr);
2472	vattr.va_atime = ts[0];
2473	vattr.va_mtime = ts[1];
2474	if (nullflag)
2475		vattr.va_vaflags |= VA_UTIMES_NULL;
2476	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2477	VOP_UNLOCK(vp, 0, p);
2478	vn_finished_write(mp);
2479	return error;
2480}
2481
2482/*
2483 * Set the access and modification times of a file.
2484 */
2485#ifndef _SYS_SYSPROTO_H_
2486struct utimes_args {
2487	char	*path;
2488	struct	timeval *tptr;
2489};
2490#endif
2491/* ARGSUSED */
2492int
2493utimes(p, uap)
2494	struct proc *p;
2495	register struct utimes_args /* {
2496		syscallarg(char *) path;
2497		syscallarg(struct timeval *) tptr;
2498	} */ *uap;
2499{
2500	struct timespec ts[2];
2501	struct timeval *usrtvp;
2502	int error;
2503	struct nameidata nd;
2504
2505	usrtvp = SCARG(uap, tptr);
2506	if ((error = getutimes(usrtvp, ts)) != 0)
2507		return (error);
2508	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2509	if ((error = namei(&nd)) != 0)
2510		return (error);
2511	NDFREE(&nd, NDF_ONLY_PNBUF);
2512	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2513	vrele(nd.ni_vp);
2514	return (error);
2515}
2516
2517/*
2518 * Set the access and modification times of a file.
2519 */
2520#ifndef _SYS_SYSPROTO_H_
2521struct lutimes_args {
2522	char	*path;
2523	struct	timeval *tptr;
2524};
2525#endif
2526/* ARGSUSED */
2527int
2528lutimes(p, uap)
2529	struct proc *p;
2530	register struct lutimes_args /* {
2531		syscallarg(char *) path;
2532		syscallarg(struct timeval *) tptr;
2533	} */ *uap;
2534{
2535	struct timespec ts[2];
2536	struct timeval *usrtvp;
2537	int error;
2538	struct nameidata nd;
2539
2540	usrtvp = SCARG(uap, tptr);
2541	if ((error = getutimes(usrtvp, ts)) != 0)
2542		return (error);
2543	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2544	if ((error = namei(&nd)) != 0)
2545		return (error);
2546	NDFREE(&nd, NDF_ONLY_PNBUF);
2547	error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL);
2548	vrele(nd.ni_vp);
2549	return (error);
2550}
2551
2552/*
2553 * Set the access and modification times of a file.
2554 */
2555#ifndef _SYS_SYSPROTO_H_
2556struct futimes_args {
2557	int	fd;
2558	struct	timeval *tptr;
2559};
2560#endif
2561/* ARGSUSED */
2562int
2563futimes(p, uap)
2564	struct proc *p;
2565	register struct futimes_args /* {
2566		syscallarg(int ) fd;
2567		syscallarg(struct timeval *) tptr;
2568	} */ *uap;
2569{
2570	struct timespec ts[2];
2571	struct file *fp;
2572	struct timeval *usrtvp;
2573	int error;
2574
2575	usrtvp = SCARG(uap, tptr);
2576	if ((error = getutimes(usrtvp, ts)) != 0)
2577		return (error);
2578	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2579		return (error);
2580	return setutimes(p, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
2581}
2582
2583/*
2584 * Truncate a file given its path name.
2585 */
2586#ifndef _SYS_SYSPROTO_H_
2587struct truncate_args {
2588	char	*path;
2589	int	pad;
2590	off_t	length;
2591};
2592#endif
2593/* ARGSUSED */
2594int
2595truncate(p, uap)
2596	struct proc *p;
2597	register struct truncate_args /* {
2598		syscallarg(char *) path;
2599		syscallarg(int) pad;
2600		syscallarg(off_t) length;
2601	} */ *uap;
2602{
2603	struct mount *mp;
2604	struct vnode *vp;
2605	struct vattr vattr;
2606	int error;
2607	struct nameidata nd;
2608
2609	if (uap->length < 0)
2610		return(EINVAL);
2611	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
2612	if ((error = namei(&nd)) != 0)
2613		return (error);
2614	vp = nd.ni_vp;
2615	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2616		vrele(vp);
2617		return (error);
2618	}
2619	NDFREE(&nd, NDF_ONLY_PNBUF);
2620	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2621	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2622	if (vp->v_type == VDIR)
2623		error = EISDIR;
2624	else if ((error = vn_writechk(vp)) == 0 &&
2625	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
2626		VATTR_NULL(&vattr);
2627		vattr.va_size = SCARG(uap, length);
2628		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
2629	}
2630	vput(vp);
2631	vn_finished_write(mp);
2632	return (error);
2633}
2634
2635/*
2636 * Truncate a file given a file descriptor.
2637 */
2638#ifndef _SYS_SYSPROTO_H_
2639struct ftruncate_args {
2640	int	fd;
2641	int	pad;
2642	off_t	length;
2643};
2644#endif
2645/* ARGSUSED */
2646int
2647ftruncate(p, uap)
2648	struct proc *p;
2649	register struct ftruncate_args /* {
2650		syscallarg(int) fd;
2651		syscallarg(int) pad;
2652		syscallarg(off_t) length;
2653	} */ *uap;
2654{
2655	struct mount *mp;
2656	struct vattr vattr;
2657	struct vnode *vp;
2658	struct file *fp;
2659	int error;
2660
2661	if (uap->length < 0)
2662		return(EINVAL);
2663	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2664		return (error);
2665	if ((fp->f_flag & FWRITE) == 0)
2666		return (EINVAL);
2667	vp = (struct vnode *)fp->f_data;
2668	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2669		return (error);
2670	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
2671	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2672	if (vp->v_type == VDIR)
2673		error = EISDIR;
2674	else if ((error = vn_writechk(vp)) == 0) {
2675		VATTR_NULL(&vattr);
2676		vattr.va_size = SCARG(uap, length);
2677		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
2678	}
2679	VOP_UNLOCK(vp, 0, p);
2680	vn_finished_write(mp);
2681	return (error);
2682}
2683
2684#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2685/*
2686 * Truncate a file given its path name.
2687 */
2688#ifndef _SYS_SYSPROTO_H_
2689struct otruncate_args {
2690	char	*path;
2691	long	length;
2692};
2693#endif
2694/* ARGSUSED */
2695int
2696otruncate(p, uap)
2697	struct proc *p;
2698	register struct otruncate_args /* {
2699		syscallarg(char *) path;
2700		syscallarg(long) length;
2701	} */ *uap;
2702{
2703	struct truncate_args /* {
2704		syscallarg(char *) path;
2705		syscallarg(int) pad;
2706		syscallarg(off_t) length;
2707	} */ nuap;
2708
2709	SCARG(&nuap, path) = SCARG(uap, path);
2710	SCARG(&nuap, length) = SCARG(uap, length);
2711	return (truncate(p, &nuap));
2712}
2713
2714/*
2715 * Truncate a file given a file descriptor.
2716 */
2717#ifndef _SYS_SYSPROTO_H_
2718struct oftruncate_args {
2719	int	fd;
2720	long	length;
2721};
2722#endif
2723/* ARGSUSED */
2724int
2725oftruncate(p, uap)
2726	struct proc *p;
2727	register struct oftruncate_args /* {
2728		syscallarg(int) fd;
2729		syscallarg(long) length;
2730	} */ *uap;
2731{
2732	struct ftruncate_args /* {
2733		syscallarg(int) fd;
2734		syscallarg(int) pad;
2735		syscallarg(off_t) length;
2736	} */ nuap;
2737
2738	SCARG(&nuap, fd) = SCARG(uap, fd);
2739	SCARG(&nuap, length) = SCARG(uap, length);
2740	return (ftruncate(p, &nuap));
2741}
2742#endif /* COMPAT_43 || COMPAT_SUNOS */
2743
2744/*
2745 * Sync an open file.
2746 */
2747#ifndef _SYS_SYSPROTO_H_
2748struct fsync_args {
2749	int	fd;
2750};
2751#endif
2752/* ARGSUSED */
2753int
2754fsync(p, uap)
2755	struct proc *p;
2756	struct fsync_args /* {
2757		syscallarg(int) fd;
2758	} */ *uap;
2759{
2760	struct vnode *vp;
2761	struct mount *mp;
2762	struct file *fp;
2763	vm_object_t obj;
2764	int error;
2765
2766	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2767		return (error);
2768	vp = (struct vnode *)fp->f_data;
2769	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2770		return (error);
2771	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
2772	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2773		mtx_lock(&vm_mtx);
2774		vm_object_page_clean(obj, 0, 0, 0);
2775		mtx_unlock(&vm_mtx);
2776	}
2777	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
2778#ifdef SOFTUPDATES
2779	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2780	    error = softdep_fsync(vp);
2781#endif
2782
2783	VOP_UNLOCK(vp, 0, p);
2784	vn_finished_write(mp);
2785	return (error);
2786}
2787
2788/*
2789 * Rename files.  Source and destination must either both be directories,
2790 * or both not be directories.  If target is a directory, it must be empty.
2791 */
2792#ifndef _SYS_SYSPROTO_H_
2793struct rename_args {
2794	char	*from;
2795	char	*to;
2796};
2797#endif
2798/* ARGSUSED */
2799int
2800rename(p, uap)
2801	struct proc *p;
2802	register struct rename_args /* {
2803		syscallarg(char *) from;
2804		syscallarg(char *) to;
2805	} */ *uap;
2806{
2807	struct mount *mp;
2808	struct vnode *tvp, *fvp, *tdvp;
2809	struct nameidata fromnd, tond;
2810	int error;
2811
2812	bwillwrite();
2813	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2814	    SCARG(uap, from), p);
2815	if ((error = namei(&fromnd)) != 0)
2816		return (error);
2817	fvp = fromnd.ni_vp;
2818	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
2819		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2820		vrele(fromnd.ni_dvp);
2821		vrele(fvp);
2822		goto out1;
2823	}
2824	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
2825	    UIO_USERSPACE, SCARG(uap, to), p);
2826	if (fromnd.ni_vp->v_type == VDIR)
2827		tond.ni_cnd.cn_flags |= WILLBEDIR;
2828	if ((error = namei(&tond)) != 0) {
2829		/* Translate error code for rename("dir1", "dir2/."). */
2830		if (error == EISDIR && fvp->v_type == VDIR)
2831			error = EINVAL;
2832		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2833		vrele(fromnd.ni_dvp);
2834		vrele(fvp);
2835		goto out1;
2836	}
2837	tdvp = tond.ni_dvp;
2838	tvp = tond.ni_vp;
2839	if (tvp != NULL) {
2840		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2841			error = ENOTDIR;
2842			goto out;
2843		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2844			error = EISDIR;
2845			goto out;
2846		}
2847	}
2848	if (fvp == tdvp)
2849		error = EINVAL;
2850	/*
2851	 * If source is the same as the destination (that is the
2852	 * same inode number with the same name in the same directory),
2853	 * then there is nothing to do.
2854	 */
2855	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2856	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2857	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2858	      fromnd.ni_cnd.cn_namelen))
2859		error = -1;
2860out:
2861	if (!error) {
2862		VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
2863		if (fromnd.ni_dvp != tdvp) {
2864			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2865		}
2866		if (tvp) {
2867			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
2868		}
2869		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2870				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2871		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2872		NDFREE(&tond, NDF_ONLY_PNBUF);
2873	} else {
2874		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2875		NDFREE(&tond, NDF_ONLY_PNBUF);
2876		if (tdvp == tvp)
2877			vrele(tdvp);
2878		else
2879			vput(tdvp);
2880		if (tvp)
2881			vput(tvp);
2882		vrele(fromnd.ni_dvp);
2883		vrele(fvp);
2884	}
2885	vrele(tond.ni_startdir);
2886	vn_finished_write(mp);
2887	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2888	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2889	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2890	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2891out1:
2892	if (fromnd.ni_startdir)
2893		vrele(fromnd.ni_startdir);
2894	if (error == -1)
2895		return (0);
2896	return (error);
2897}
2898
2899/*
2900 * Make a directory file.
2901 */
2902#ifndef _SYS_SYSPROTO_H_
2903struct mkdir_args {
2904	char	*path;
2905	int	mode;
2906};
2907#endif
2908/* ARGSUSED */
2909int
2910mkdir(p, uap)
2911	struct proc *p;
2912	register struct mkdir_args /* {
2913		syscallarg(char *) path;
2914		syscallarg(int) mode;
2915	} */ *uap;
2916{
2917	struct mount *mp;
2918	struct vnode *vp;
2919	struct vattr vattr;
2920	int error;
2921	struct nameidata nd;
2922
2923restart:
2924	bwillwrite();
2925	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
2926	nd.ni_cnd.cn_flags |= WILLBEDIR;
2927	if ((error = namei(&nd)) != 0)
2928		return (error);
2929	vp = nd.ni_vp;
2930	if (vp != NULL) {
2931		NDFREE(&nd, NDF_ONLY_PNBUF);
2932		vrele(vp);
2933		vput(nd.ni_dvp);
2934		return (EEXIST);
2935	}
2936	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2937		NDFREE(&nd, NDF_ONLY_PNBUF);
2938		vput(nd.ni_dvp);
2939		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2940			return (error);
2941		goto restart;
2942	}
2943	VATTR_NULL(&vattr);
2944	vattr.va_type = VDIR;
2945	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
2946	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
2947	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2948	NDFREE(&nd, NDF_ONLY_PNBUF);
2949	vput(nd.ni_dvp);
2950	if (!error)
2951		vput(nd.ni_vp);
2952	vn_finished_write(mp);
2953	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2954	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2955	return (error);
2956}
2957
2958/*
2959 * Remove a directory file.
2960 */
2961#ifndef _SYS_SYSPROTO_H_
2962struct rmdir_args {
2963	char	*path;
2964};
2965#endif
2966/* ARGSUSED */
2967int
2968rmdir(p, uap)
2969	struct proc *p;
2970	struct rmdir_args /* {
2971		syscallarg(char *) path;
2972	} */ *uap;
2973{
2974	struct mount *mp;
2975	struct vnode *vp;
2976	int error;
2977	struct nameidata nd;
2978
2979restart:
2980	bwillwrite();
2981	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2982	    SCARG(uap, path), p);
2983	if ((error = namei(&nd)) != 0)
2984		return (error);
2985	vp = nd.ni_vp;
2986	if (vp->v_type != VDIR) {
2987		error = ENOTDIR;
2988		goto out;
2989	}
2990	/*
2991	 * No rmdir "." please.
2992	 */
2993	if (nd.ni_dvp == vp) {
2994		error = EINVAL;
2995		goto out;
2996	}
2997	/*
2998	 * The root of a mounted filesystem cannot be deleted.
2999	 */
3000	if (vp->v_flag & VROOT) {
3001		error = EBUSY;
3002		goto out;
3003	}
3004	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3005		NDFREE(&nd, NDF_ONLY_PNBUF);
3006		if (nd.ni_dvp == vp)
3007			vrele(nd.ni_dvp);
3008		else
3009			vput(nd.ni_dvp);
3010		vput(vp);
3011		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3012			return (error);
3013		goto restart;
3014	}
3015	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
3016	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3017	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3018	vn_finished_write(mp);
3019out:
3020	NDFREE(&nd, NDF_ONLY_PNBUF);
3021	if (nd.ni_dvp == vp)
3022		vrele(nd.ni_dvp);
3023	else
3024		vput(nd.ni_dvp);
3025	vput(vp);
3026	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3027	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3028	return (error);
3029}
3030
3031#ifdef COMPAT_43
3032/*
3033 * Read a block of directory entries in a file system independent format.
3034 */
3035#ifndef _SYS_SYSPROTO_H_
3036struct ogetdirentries_args {
3037	int	fd;
3038	char	*buf;
3039	u_int	count;
3040	long	*basep;
3041};
3042#endif
3043int
3044ogetdirentries(p, uap)
3045	struct proc *p;
3046	register struct ogetdirentries_args /* {
3047		syscallarg(int) fd;
3048		syscallarg(char *) buf;
3049		syscallarg(u_int) count;
3050		syscallarg(long *) basep;
3051	} */ *uap;
3052{
3053	struct vnode *vp;
3054	struct file *fp;
3055	struct uio auio, kuio;
3056	struct iovec aiov, kiov;
3057	struct dirent *dp, *edp;
3058	caddr_t dirbuf;
3059	int error, eofflag, readcnt;
3060	long loff;
3061
3062	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3063		return (error);
3064	if ((fp->f_flag & FREAD) == 0)
3065		return (EBADF);
3066	vp = (struct vnode *)fp->f_data;
3067unionread:
3068	if (vp->v_type != VDIR)
3069		return (EINVAL);
3070	aiov.iov_base = SCARG(uap, buf);
3071	aiov.iov_len = SCARG(uap, count);
3072	auio.uio_iov = &aiov;
3073	auio.uio_iovcnt = 1;
3074	auio.uio_rw = UIO_READ;
3075	auio.uio_segflg = UIO_USERSPACE;
3076	auio.uio_procp = p;
3077	auio.uio_resid = SCARG(uap, count);
3078	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3079	loff = auio.uio_offset = fp->f_offset;
3080#	if (BYTE_ORDER != LITTLE_ENDIAN)
3081		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3082			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3083			    NULL, NULL);
3084			fp->f_offset = auio.uio_offset;
3085		} else
3086#	endif
3087	{
3088		kuio = auio;
3089		kuio.uio_iov = &kiov;
3090		kuio.uio_segflg = UIO_SYSSPACE;
3091		kiov.iov_len = SCARG(uap, count);
3092		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3093		kiov.iov_base = dirbuf;
3094		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3095			    NULL, NULL);
3096		fp->f_offset = kuio.uio_offset;
3097		if (error == 0) {
3098			readcnt = SCARG(uap, count) - kuio.uio_resid;
3099			edp = (struct dirent *)&dirbuf[readcnt];
3100			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3101#				if (BYTE_ORDER == LITTLE_ENDIAN)
3102					/*
3103					 * The expected low byte of
3104					 * dp->d_namlen is our dp->d_type.
3105					 * The high MBZ byte of dp->d_namlen
3106					 * is our dp->d_namlen.
3107					 */
3108					dp->d_type = dp->d_namlen;
3109					dp->d_namlen = 0;
3110#				else
3111					/*
3112					 * The dp->d_type is the high byte
3113					 * of the expected dp->d_namlen,
3114					 * so must be zero'ed.
3115					 */
3116					dp->d_type = 0;
3117#				endif
3118				if (dp->d_reclen > 0) {
3119					dp = (struct dirent *)
3120					    ((char *)dp + dp->d_reclen);
3121				} else {
3122					error = EIO;
3123					break;
3124				}
3125			}
3126			if (dp >= edp)
3127				error = uiomove(dirbuf, readcnt, &auio);
3128		}
3129		FREE(dirbuf, M_TEMP);
3130	}
3131	VOP_UNLOCK(vp, 0, p);
3132	if (error)
3133		return (error);
3134	if (SCARG(uap, count) == auio.uio_resid) {
3135		if (union_dircheckp) {
3136			error = union_dircheckp(p, &vp, fp);
3137			if (error == -1)
3138				goto unionread;
3139			if (error)
3140				return (error);
3141		}
3142		if ((vp->v_flag & VROOT) &&
3143		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3144			struct vnode *tvp = vp;
3145			vp = vp->v_mount->mnt_vnodecovered;
3146			VREF(vp);
3147			fp->f_data = (caddr_t) vp;
3148			fp->f_offset = 0;
3149			vrele(tvp);
3150			goto unionread;
3151		}
3152	}
3153	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3154	    sizeof(long));
3155	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
3156	return (error);
3157}
3158#endif /* COMPAT_43 */
3159
3160/*
3161 * Read a block of directory entries in a file system independent format.
3162 */
3163#ifndef _SYS_SYSPROTO_H_
3164struct getdirentries_args {
3165	int	fd;
3166	char	*buf;
3167	u_int	count;
3168	long	*basep;
3169};
3170#endif
3171int
3172getdirentries(p, uap)
3173	struct proc *p;
3174	register struct getdirentries_args /* {
3175		syscallarg(int) fd;
3176		syscallarg(char *) buf;
3177		syscallarg(u_int) count;
3178		syscallarg(long *) basep;
3179	} */ *uap;
3180{
3181	struct vnode *vp;
3182	struct file *fp;
3183	struct uio auio;
3184	struct iovec aiov;
3185	long loff;
3186	int error, eofflag;
3187
3188	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3189		return (error);
3190	if ((fp->f_flag & FREAD) == 0)
3191		return (EBADF);
3192	vp = (struct vnode *)fp->f_data;
3193unionread:
3194	if (vp->v_type != VDIR)
3195		return (EINVAL);
3196	aiov.iov_base = SCARG(uap, buf);
3197	aiov.iov_len = SCARG(uap, count);
3198	auio.uio_iov = &aiov;
3199	auio.uio_iovcnt = 1;
3200	auio.uio_rw = UIO_READ;
3201	auio.uio_segflg = UIO_USERSPACE;
3202	auio.uio_procp = p;
3203	auio.uio_resid = SCARG(uap, count);
3204	/* vn_lock(vp, LK_SHARED | LK_RETRY, p); */
3205	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3206	loff = auio.uio_offset = fp->f_offset;
3207	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3208	fp->f_offset = auio.uio_offset;
3209	VOP_UNLOCK(vp, 0, p);
3210	if (error)
3211		return (error);
3212	if (SCARG(uap, count) == auio.uio_resid) {
3213		if (union_dircheckp) {
3214			error = union_dircheckp(p, &vp, fp);
3215			if (error == -1)
3216				goto unionread;
3217			if (error)
3218				return (error);
3219		}
3220		if ((vp->v_flag & VROOT) &&
3221		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3222			struct vnode *tvp = vp;
3223			vp = vp->v_mount->mnt_vnodecovered;
3224			VREF(vp);
3225			fp->f_data = (caddr_t) vp;
3226			fp->f_offset = 0;
3227			vrele(tvp);
3228			goto unionread;
3229		}
3230	}
3231	if (SCARG(uap, basep) != NULL) {
3232		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3233		    sizeof(long));
3234	}
3235	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
3236	return (error);
3237}
3238#ifndef _SYS_SYSPROTO_H_
3239struct getdents_args {
3240	int fd;
3241	char *buf;
3242	size_t count;
3243};
3244#endif
3245int
3246getdents(p, uap)
3247	struct proc *p;
3248	register struct getdents_args /* {
3249		syscallarg(int) fd;
3250		syscallarg(char *) buf;
3251		syscallarg(u_int) count;
3252	} */ *uap;
3253{
3254	struct getdirentries_args ap;
3255	ap.fd = uap->fd;
3256	ap.buf = uap->buf;
3257	ap.count = uap->count;
3258	ap.basep = NULL;
3259	return getdirentries(p, &ap);
3260}
3261
3262/*
3263 * Set the mode mask for creation of filesystem nodes.
3264 *
3265 * MP SAFE
3266 */
3267#ifndef _SYS_SYSPROTO_H_
3268struct umask_args {
3269	int	newmask;
3270};
3271#endif
3272int
3273umask(p, uap)
3274	struct proc *p;
3275	struct umask_args /* {
3276		syscallarg(int) newmask;
3277	} */ *uap;
3278{
3279	register struct filedesc *fdp;
3280
3281	fdp = p->p_fd;
3282	p->p_retval[0] = fdp->fd_cmask;
3283	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3284	return (0);
3285}
3286
3287/*
3288 * Void all references to file by ripping underlying filesystem
3289 * away from vnode.
3290 */
3291#ifndef _SYS_SYSPROTO_H_
3292struct revoke_args {
3293	char	*path;
3294};
3295#endif
3296/* ARGSUSED */
3297int
3298revoke(p, uap)
3299	struct proc *p;
3300	register struct revoke_args /* {
3301		syscallarg(char *) path;
3302	} */ *uap;
3303{
3304	struct mount *mp;
3305	struct vnode *vp;
3306	struct vattr vattr;
3307	int error;
3308	struct nameidata nd;
3309
3310	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3311	if ((error = namei(&nd)) != 0)
3312		return (error);
3313	vp = nd.ni_vp;
3314	NDFREE(&nd, NDF_ONLY_PNBUF);
3315	if (vp->v_type != VCHR) {
3316		error = EINVAL;
3317		goto out;
3318	}
3319	if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0)
3320		goto out;
3321	if (p->p_ucred->cr_uid != vattr.va_uid &&
3322	    (error = suser_xxx(0, p, PRISON_ROOT)))
3323		goto out;
3324	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3325		goto out;
3326	if (vcount(vp) > 1)
3327		VOP_REVOKE(vp, REVOKEALL);
3328	vn_finished_write(mp);
3329out:
3330	vrele(vp);
3331	return (error);
3332}
3333
3334/*
3335 * Convert a user file descriptor to a kernel file entry.
3336 */
3337int
3338getvnode(fdp, fd, fpp)
3339	struct filedesc *fdp;
3340	int fd;
3341	struct file **fpp;
3342{
3343	struct file *fp;
3344
3345	if ((u_int)fd >= fdp->fd_nfiles ||
3346	    (fp = fdp->fd_ofiles[fd]) == NULL)
3347		return (EBADF);
3348	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
3349		return (EINVAL);
3350	*fpp = fp;
3351	return (0);
3352}
3353/*
3354 * Get (NFS) file handle
3355 */
3356#ifndef _SYS_SYSPROTO_H_
3357struct getfh_args {
3358	char	*fname;
3359	fhandle_t *fhp;
3360};
3361#endif
3362int
3363getfh(p, uap)
3364	struct proc *p;
3365	register struct getfh_args *uap;
3366{
3367	struct nameidata nd;
3368	fhandle_t fh;
3369	register struct vnode *vp;
3370	int error;
3371
3372	/*
3373	 * Must be super user
3374	 */
3375	error = suser(p);
3376	if (error)
3377		return (error);
3378	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
3379	error = namei(&nd);
3380	if (error)
3381		return (error);
3382	NDFREE(&nd, NDF_ONLY_PNBUF);
3383	vp = nd.ni_vp;
3384	bzero(&fh, sizeof(fh));
3385	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3386	error = VFS_VPTOFH(vp, &fh.fh_fid);
3387	vput(vp);
3388	if (error)
3389		return (error);
3390	error = copyout(&fh, uap->fhp, sizeof (fh));
3391	return (error);
3392}
3393
3394/*
3395 * syscall for the rpc.lockd to use to translate a NFS file handle into
3396 * an open descriptor.
3397 *
3398 * warning: do not remove the suser() call or this becomes one giant
3399 * security hole.
3400 */
3401#ifndef _SYS_SYSPROTO_H_
3402struct fhopen_args {
3403	const struct fhandle *u_fhp;
3404	int flags;
3405};
3406#endif
3407int
3408fhopen(p, uap)
3409	struct proc *p;
3410	struct fhopen_args /* {
3411		syscallarg(const struct fhandle *) u_fhp;
3412		syscallarg(int) flags;
3413	} */ *uap;
3414{
3415	struct mount *mp;
3416	struct vnode *vp;
3417	struct fhandle fhp;
3418	struct vattr vat;
3419	struct vattr *vap = &vat;
3420	struct flock lf;
3421	struct file *fp;
3422	register struct filedesc *fdp = p->p_fd;
3423	int fmode, mode, error, type;
3424	struct file *nfp;
3425	int indx;
3426
3427	/*
3428	 * Must be super user
3429	 */
3430	error = suser(p);
3431	if (error)
3432		return (error);
3433
3434	fmode = FFLAGS(SCARG(uap, flags));
3435	/* why not allow a non-read/write open for our lockd? */
3436	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3437		return (EINVAL);
3438	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3439	if (error)
3440		return(error);
3441	/* find the mount point */
3442	mp = vfs_getvfs(&fhp.fh_fsid);
3443	if (mp == NULL)
3444		return (ESTALE);
3445	/* now give me my vnode, it gets returned to me locked */
3446	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3447	if (error)
3448		return (error);
3449 	/*
3450	 * from now on we have to make sure not
3451	 * to forget about the vnode
3452	 * any error that causes an abort must vput(vp)
3453	 * just set error = err and 'goto bad;'.
3454	 */
3455
3456	/*
3457	 * from vn_open
3458	 */
3459	if (vp->v_type == VLNK) {
3460		error = EMLINK;
3461		goto bad;
3462	}
3463	if (vp->v_type == VSOCK) {
3464		error = EOPNOTSUPP;
3465		goto bad;
3466	}
3467	mode = 0;
3468	if (fmode & (FWRITE | O_TRUNC)) {
3469		if (vp->v_type == VDIR) {
3470			error = EISDIR;
3471			goto bad;
3472		}
3473		error = vn_writechk(vp);
3474		if (error)
3475			goto bad;
3476		mode |= VWRITE;
3477	}
3478	if (fmode & FREAD)
3479		mode |= VREAD;
3480	if (mode) {
3481		error = VOP_ACCESS(vp, mode, p->p_ucred, p);
3482		if (error)
3483			goto bad;
3484	}
3485	if (fmode & O_TRUNC) {
3486		VOP_UNLOCK(vp, 0, p);				/* XXX */
3487		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3488			vrele(vp);
3489			return (error);
3490		}
3491		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3492		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
3493		VATTR_NULL(vap);
3494		vap->va_size = 0;
3495		error = VOP_SETATTR(vp, vap, p->p_ucred, p);
3496		vn_finished_write(mp);
3497		if (error)
3498			goto bad;
3499	}
3500	error = VOP_OPEN(vp, fmode, p->p_ucred, p);
3501	if (error)
3502		goto bad;
3503	/*
3504	 * Make sure that a VM object is created for VMIO support.
3505	 */
3506	if (vn_canvmio(vp) == TRUE) {
3507		if ((error = vfs_object_create(vp, p, p->p_ucred)) != 0)
3508			goto bad;
3509	}
3510	if (fmode & FWRITE)
3511		vp->v_writecount++;
3512
3513	/*
3514	 * end of vn_open code
3515	 */
3516
3517	if ((error = falloc(p, &nfp, &indx)) != 0)
3518		goto bad;
3519	fp = nfp;
3520
3521	/*
3522	 * Hold an extra reference to avoid having fp ripped out
3523	 * from under us while we block in the lock op
3524	 */
3525	fhold(fp);
3526	nfp->f_data = (caddr_t)vp;
3527	nfp->f_flag = fmode & FMASK;
3528	nfp->f_ops = &vnops;
3529	nfp->f_type = DTYPE_VNODE;
3530	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3531		lf.l_whence = SEEK_SET;
3532		lf.l_start = 0;
3533		lf.l_len = 0;
3534		if (fmode & O_EXLOCK)
3535			lf.l_type = F_WRLCK;
3536		else
3537			lf.l_type = F_RDLCK;
3538		type = F_FLOCK;
3539		if ((fmode & FNONBLOCK) == 0)
3540			type |= F_WAIT;
3541		VOP_UNLOCK(vp, 0, p);
3542		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3543			/*
3544			 * The lock request failed.  Normally close the
3545			 * descriptor but handle the case where someone might
3546			 * have dup()d or close()d it when we weren't looking.
3547			 */
3548			if (fdp->fd_ofiles[indx] == fp) {
3549				fdp->fd_ofiles[indx] = NULL;
3550				fdrop(fp, p);
3551			}
3552			/*
3553			 * release our private reference
3554			 */
3555			fdrop(fp, p);
3556			return(error);
3557		}
3558		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3559		fp->f_flag |= FHASLOCK;
3560	}
3561	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3562		vfs_object_create(vp, p, p->p_ucred);
3563
3564	VOP_UNLOCK(vp, 0, p);
3565	fdrop(fp, p);
3566	p->p_retval[0] = indx;
3567	return (0);
3568
3569bad:
3570	vput(vp);
3571	return (error);
3572}
3573
3574/*
3575 * Stat an (NFS) file handle.
3576 */
3577#ifndef _SYS_SYSPROTO_H_
3578struct fhstat_args {
3579	struct fhandle *u_fhp;
3580	struct stat *sb;
3581};
3582#endif
3583int
3584fhstat(p, uap)
3585	struct proc *p;
3586	register struct fhstat_args /* {
3587		syscallarg(struct fhandle *) u_fhp;
3588		syscallarg(struct stat *) sb;
3589	} */ *uap;
3590{
3591	struct stat sb;
3592	fhandle_t fh;
3593	struct mount *mp;
3594	struct vnode *vp;
3595	int error;
3596
3597	/*
3598	 * Must be super user
3599	 */
3600	error = suser(p);
3601	if (error)
3602		return (error);
3603
3604	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3605	if (error)
3606		return (error);
3607
3608	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3609		return (ESTALE);
3610	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3611		return (error);
3612	error = vn_stat(vp, &sb, p);
3613	vput(vp);
3614	if (error)
3615		return (error);
3616	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3617	return (error);
3618}
3619
3620/*
3621 * Implement fstatfs() for (NFS) file handles.
3622 */
3623#ifndef _SYS_SYSPROTO_H_
3624struct fhstatfs_args {
3625	struct fhandle *u_fhp;
3626	struct statfs *buf;
3627};
3628#endif
3629int
3630fhstatfs(p, uap)
3631	struct proc *p;
3632	struct fhstatfs_args /* {
3633		syscallarg(struct fhandle) *u_fhp;
3634		syscallarg(struct statfs) *buf;
3635	} */ *uap;
3636{
3637	struct statfs *sp;
3638	struct mount *mp;
3639	struct vnode *vp;
3640	struct statfs sb;
3641	fhandle_t fh;
3642	int error;
3643
3644	/*
3645	 * Must be super user
3646	 */
3647	if ((error = suser(p)))
3648		return (error);
3649
3650	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3651		return (error);
3652
3653	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3654		return (ESTALE);
3655	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3656		return (error);
3657	mp = vp->v_mount;
3658	sp = &mp->mnt_stat;
3659	vput(vp);
3660	if ((error = VFS_STATFS(mp, sp, p)) != 0)
3661		return (error);
3662	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3663	if (suser_xxx(p->p_ucred, 0, 0)) {
3664		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3665		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3666		sp = &sb;
3667	}
3668	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3669}
3670
3671/*
3672 * Syscall to push extended attribute configuration information into the
3673 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3674 * a command (int cmd), and attribute name and misc data.  For now, the
3675 * attribute name is left in userspace for consumption by the VFS_op.
3676 * It will probably be changed to be copied into sysspace by the
3677 * syscall in the future, once issues with various consumers of the
3678 * attribute code have raised their hands.
3679 *
3680 * Currently this is used only by UFS Extended Attributes.
3681 */
3682int
3683extattrctl(p, uap)
3684	struct proc *p;
3685	struct extattrctl_args *uap;
3686{
3687	struct vnode *filename_vp;
3688	struct nameidata nd;
3689	struct mount *mp;
3690	char attrname[EXTATTR_MAXNAMELEN];
3691	int error;
3692
3693	/*
3694	 * SCARG(uap, attrname) not always defined.  We check again later
3695	 * when we invoke the VFS call so as to pass in NULL there if needed.
3696	 */
3697	if (SCARG(uap, attrname) != NULL) {
3698		error = copyinstr(SCARG(uap, attrname), attrname,
3699		    EXTATTR_MAXNAMELEN, NULL);
3700		if (error)
3701			return (error);
3702	}
3703
3704	/*
3705	 * SCARG(uap, filename) not always defined.  If it is, grab
3706	 * a vnode lock, which VFS_EXTATTRCTL() will later release.
3707	 */
3708	filename_vp = NULL;
3709	if (SCARG(uap, filename) != NULL) {
3710		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3711		    SCARG(uap, filename), p);
3712		if ((error = namei(&nd)) != 0)
3713			return (error);
3714		filename_vp = nd.ni_vp;
3715		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3716	}
3717
3718	/* SCARG(uap, path) always defined. */
3719	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3720	if ((error = namei(&nd)) != 0)
3721		return (error);
3722	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
3723	NDFREE(&nd, 0);
3724	if (error) {
3725		if (filename_vp)
3726			vrele(filename_vp);
3727		return (error);
3728	}
3729
3730	if (SCARG(uap, attrname) != NULL) {
3731		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3732		    SCARG(uap, attrnamespace), attrname, p);
3733	} else {
3734		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3735		    SCARG(uap, attrnamespace), NULL, p);
3736	}
3737
3738	vn_finished_write(mp);
3739	/*
3740	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3741	 * filename_vp, so vrele it if it is defined.
3742	 */
3743	if (filename_vp != NULL)
3744		vrele(filename_vp);
3745
3746	return (error);
3747}
3748
3749/*
3750 * extattr_set_vp(): Set a named extended attribute on a file or directory
3751 *
3752 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3753 *            kernelspace string pointer "attrname",
3754 *            userspace iovec array pointer "iovp", unsigned int iovcnt
3755 *            proc "p"
3756 * Returns: 0 on success, an error number otherwise
3757 * Locks: none
3758 * References: vp must be a valid reference for the duration of the call
3759 */
3760static int
3761extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3762    struct iovec *iovp, unsigned iovcnt, struct proc *p)
3763{
3764	struct mount *mp;
3765	struct uio auio;
3766	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3767	u_int iovlen, cnt;
3768	int error, i;
3769
3770	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3771		return (error);
3772	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3773	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3774
3775	iovlen = iovcnt * sizeof(struct iovec);
3776	if (iovcnt > UIO_SMALLIOV) {
3777		if (iovcnt > UIO_MAXIOV) {
3778			error = EINVAL;
3779			goto done;
3780		}
3781		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3782		needfree = iov;
3783	} else
3784		iov = aiov;
3785	auio.uio_iov = iov;
3786	auio.uio_iovcnt = iovcnt;
3787	auio.uio_rw = UIO_WRITE;
3788	auio.uio_segflg = UIO_USERSPACE;
3789	auio.uio_procp = p;
3790	auio.uio_offset = 0;
3791	if ((error = copyin((caddr_t)iovp, (caddr_t)iov, iovlen)))
3792		goto done;
3793	auio.uio_resid = 0;
3794	for (i = 0; i < iovcnt; i++) {
3795		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3796			error = EINVAL;
3797			goto done;
3798		}
3799		auio.uio_resid += iov->iov_len;
3800		iov++;
3801	}
3802	cnt = auio.uio_resid;
3803	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3804	    p->p_ucred, p);
3805	cnt -= auio.uio_resid;
3806	p->p_retval[0] = cnt;
3807done:
3808	if (needfree)
3809		FREE(needfree, M_IOV);
3810	VOP_UNLOCK(vp, 0, p);
3811	vn_finished_write(mp);
3812	return (error);
3813}
3814
3815int
3816extattr_set_file(p, uap)
3817	struct proc *p;
3818	struct extattr_set_file_args *uap;
3819{
3820	struct nameidata nd;
3821	char attrname[EXTATTR_MAXNAMELEN];
3822	int error;
3823
3824	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3825	    NULL);
3826	if (error)
3827		return (error);
3828
3829	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3830	if ((error = namei(&nd)) != 0)
3831		return (error);
3832	NDFREE(&nd, NDF_ONLY_PNBUF);
3833
3834	error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
3835	    SCARG(uap, iovp), SCARG(uap, iovcnt), p);
3836
3837	vrele(nd.ni_vp);
3838	return (error);
3839}
3840
3841int
3842extattr_set_fd(p, uap)
3843	struct proc *p;
3844	struct extattr_set_fd_args *uap;
3845{
3846	struct file *fp;
3847	char attrname[EXTATTR_MAXNAMELEN];
3848	int error;
3849
3850	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3851	    NULL);
3852	if (error)
3853		return (error);
3854
3855	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3856		return (error);
3857
3858	error = extattr_set_vp((struct vnode *)fp->f_data,
3859	    SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp),
3860	    SCARG(uap, iovcnt), p);
3861
3862	return (error);
3863}
3864
3865/*
3866 * extattr_get_vp(): Get a named extended attribute on a file or directory
3867 *
3868 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3869 *            kernelspace string pointer "attrname",
3870 *            userspace iovec array pointer "iovp", unsigned int iovcnt,
3871 *            proc "p"
3872 * Returns: 0 on success, an error number otherwise
3873 * Locks: none
3874 * References: vp must be a valid reference for the duration of the call
3875 */
3876static int
3877extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3878    struct iovec *iovp, unsigned iovcnt, struct proc *p)
3879{
3880	struct uio auio;
3881	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
3882	u_int iovlen, cnt;
3883	int error, i;
3884
3885	VOP_LEASE(vp, p, p->p_ucred, LEASE_READ);
3886	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3887
3888	iovlen = iovcnt * sizeof (struct iovec);
3889	if (iovcnt > UIO_SMALLIOV) {
3890		if (iovcnt > UIO_MAXIOV) {
3891			error = EINVAL;
3892			goto done;
3893		}
3894		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
3895		needfree = iov;
3896	} else
3897		iov = aiov;
3898	auio.uio_iov = iov;
3899	auio.uio_iovcnt = iovcnt;
3900	auio.uio_rw = UIO_READ;
3901	auio.uio_segflg = UIO_USERSPACE;
3902	auio.uio_procp = p;
3903	auio.uio_offset = 0;
3904	if ((error = copyin((caddr_t)iovp, (caddr_t)iov, iovlen)))
3905		goto done;
3906	auio.uio_resid = 0;
3907	for (i = 0; i < iovcnt; i++) {
3908		if (iov->iov_len > INT_MAX - auio.uio_resid) {
3909			error = EINVAL;
3910			goto done;
3911		}
3912		auio.uio_resid += iov->iov_len;
3913		iov++;
3914	}
3915	cnt = auio.uio_resid;
3916	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio,
3917	    p->p_ucred, p);
3918	cnt -= auio.uio_resid;
3919	p->p_retval[0] = cnt;
3920done:
3921	if (needfree)
3922		FREE(needfree, M_IOV);
3923	VOP_UNLOCK(vp, 0, p);
3924	return (error);
3925}
3926
3927int
3928extattr_get_file(p, uap)
3929	struct proc *p;
3930	struct extattr_get_file_args *uap;
3931{
3932	struct nameidata nd;
3933	char attrname[EXTATTR_MAXNAMELEN];
3934	int error;
3935
3936	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3937	    NULL);
3938	if (error)
3939		return (error);
3940
3941	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
3942	if ((error = namei(&nd)) != 0)
3943		return (error);
3944	NDFREE(&nd, NDF_ONLY_PNBUF);
3945
3946	error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
3947	    SCARG(uap, iovp), SCARG(uap, iovcnt), p);
3948
3949	vrele(nd.ni_vp);
3950	return (error);
3951}
3952
3953int
3954extattr_get_fd(p, uap)
3955	struct proc *p;
3956	struct extattr_get_fd_args *uap;
3957{
3958	struct file *fp;
3959	char attrname[EXTATTR_MAXNAMELEN];
3960	int error;
3961
3962	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
3963	    NULL);
3964	if (error)
3965		return (error);
3966
3967	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3968		return (error);
3969
3970	error = extattr_get_vp((struct vnode *)fp->f_data,
3971	    SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp),
3972	    SCARG(uap, iovcnt), p);
3973
3974	return (error);
3975}
3976
3977/*
3978 * extattr_delete_vp(): Delete a named extended attribute on a file or
3979 *                      directory
3980 *
3981 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3982 *            kernelspace string pointer "attrname", proc "p"
3983 * Returns: 0 on success, an error number otherwise
3984 * Locks: none
3985 * References: vp must be a valid reference for the duration of the call
3986 */
3987static int
3988extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3989    struct proc *p)
3990{
3991	struct mount *mp;
3992	int error;
3993
3994	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3995		return (error);
3996	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
3997	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3998
3999	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4000	    p->p_ucred, p);
4001
4002	VOP_UNLOCK(vp, 0, p);
4003	vn_finished_write(mp);
4004	return (error);
4005}
4006
4007int
4008extattr_delete_file(p, uap)
4009	struct proc *p;
4010	struct extattr_delete_file_args *uap;
4011{
4012	struct nameidata nd;
4013	char attrname[EXTATTR_MAXNAMELEN];
4014	int error;
4015
4016	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4017	     NULL);
4018	if (error)
4019		return(error);
4020
4021	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
4022	if ((error = namei(&nd)) != 0)
4023		return(error);
4024	NDFREE(&nd, NDF_ONLY_PNBUF);
4025
4026	error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace),
4027	    attrname, p);
4028
4029	vrele(nd.ni_vp);
4030	return(error);
4031}
4032
4033int
4034extattr_delete_fd(p, uap)
4035	struct proc *p;
4036	struct extattr_delete_fd_args *uap;
4037{
4038	struct file *fp;
4039	char attrname[EXTATTR_MAXNAMELEN];
4040	int error;
4041
4042	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4043	    NULL);
4044	if (error)
4045		return (error);
4046
4047	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
4048		return (error);
4049
4050	error = extattr_delete_vp((struct vnode *)fp->f_data,
4051	    SCARG(uap, attrnamespace), attrname, p);
4052
4053	return (error);
4054}
4055