vfs_extattr.c revision 92751
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39 * $FreeBSD: head/sys/kern/vfs_extattr.c 92751 2002-03-20 04:09:59Z jeff $
40 */
41
42/* For 4.3 integer FS ID compatibility */
43#include "opt_compat.h"
44#include "opt_ffs.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/sysent.h>
51#include <sys/malloc.h>
52#include <sys/mount.h>
53#include <sys/mutex.h>
54#include <sys/sysproto.h>
55#include <sys/namei.h>
56#include <sys/filedesc.h>
57#include <sys/kernel.h>
58#include <sys/fcntl.h>
59#include <sys/file.h>
60#include <sys/linker.h>
61#include <sys/stat.h>
62#include <sys/sx.h>
63#include <sys/unistd.h>
64#include <sys/vnode.h>
65#include <sys/proc.h>
66#include <sys/dirent.h>
67#include <sys/extattr.h>
68#include <sys/jail.h>
69#include <sys/sysctl.h>
70
71#include <machine/limits.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/uma.h>
77
78static int change_dir(struct nameidata *ndp, struct thread *td);
79static void checkdirs(struct vnode *olddp, struct vnode *newdp);
80static int chroot_refuse_vdir_fds(struct filedesc *fdp);
81static int getutimes(const struct timeval *, struct timespec *);
82static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
83static int setfmode(struct thread *td, struct vnode *, int);
84static int setfflags(struct thread *td, struct vnode *, int);
85static int setutimes(struct thread *td, struct vnode *,
86    const struct timespec *, int);
87static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
88    struct thread *td);
89
90static int	usermount = 0;	/* if 1, non-root can mount fs. */
91
92int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
93
94SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
95
96/*
97 * Virtual File System System Calls
98 */
99
100#ifndef _SYS_SYSPROTO_H_
101struct nmount_args {
102	struct iovec    *iovp;
103	unsigned int    iovcnt;
104	int             flags;
105	};
106#endif
107/* ARGSUSED */
108int
109nmount(td, uap)
110	struct thread *td;
111	struct nmount_args /* {
112		syscallarg(struct iovec *) iovp;
113		syscallarg(unsigned int) iovcnt;
114		syscallarg(int) flags;
115	} */ *uap;
116{
117
118	return(EOPNOTSUPP);
119}
120
121/*
122 * Mount a file system.
123 */
124#ifndef _SYS_SYSPROTO_H_
125struct mount_args {
126	char	*type;
127	char	*path;
128	int	flags;
129	caddr_t	data;
130};
131#endif
132/* ARGSUSED */
133int
134mount(td, uap)
135	struct thread *td;
136	struct mount_args /* {
137		syscallarg(char *) type;
138		syscallarg(char *) path;
139		syscallarg(int) flags;
140		syscallarg(caddr_t) data;
141	} */ *uap;
142{
143	char *fstype;
144	char *fspath;
145	int error;
146
147	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
148	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK | M_ZERO);
149
150	/*
151	 * vfs_mount() actually takes a kernel string for `type' and
152	 * `path' now, so extract them.
153	 */
154	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
155	if (error)
156		goto finish;
157	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
158	if (error)
159		goto finish;
160	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
161	    SCARG(uap, data));
162finish:
163	free(fstype, M_TEMP);
164	free(fspath, M_TEMP);
165	return (error);
166}
167
168/*
169 * vfs_mount(): actually attempt a filesystem mount.
170 *
171 * This routine is designed to be a "generic" entry point for routines
172 * that wish to mount a filesystem. All parameters except `fsdata' are
173 * pointers into kernel space. `fsdata' is currently still a pointer
174 * into userspace.
175 */
176int
177vfs_mount(td, fstype, fspath, fsflags, fsdata)
178	struct thread *td;
179	const char *fstype;
180	char *fspath;
181	int fsflags;
182	void *fsdata;
183{
184	struct vnode *vp;
185	struct mount *mp;
186	struct vfsconf *vfsp;
187	int error, flag = 0, flag2 = 0;
188	struct vattr va;
189	struct nameidata nd;
190
191	/*
192	 * Be ultra-paranoid about making sure the type and fspath
193	 * variables will fit in our mp buffers, including the
194	 * terminating NUL.
195	 */
196	if ((strlen(fstype) >= MFSNAMELEN - 1) ||
197	    (strlen(fspath) >= MNAMELEN - 1))
198		return (ENAMETOOLONG);
199
200	if (usermount == 0) {
201		error = suser_td(td);
202		if (error)
203			return (error);
204	}
205	/*
206	 * Do not allow NFS export by non-root users.
207	 */
208	if (fsflags & MNT_EXPORTED) {
209		error = suser_td(td);
210		if (error)
211			return (error);
212	}
213	/*
214	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
215	 */
216	if (suser_xxx(td->td_ucred, 0, 0))
217		fsflags |= MNT_NOSUID | MNT_NODEV;
218	/*
219	 * Get vnode to be covered
220	 */
221	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
222	if ((error = namei(&nd)) != 0)
223		return (error);
224	NDFREE(&nd, NDF_ONLY_PNBUF);
225	vp = nd.ni_vp;
226	if (fsflags & MNT_UPDATE) {
227		if ((vp->v_flag & VROOT) == 0) {
228			vput(vp);
229			return (EINVAL);
230		}
231		mp = vp->v_mount;
232		flag = mp->mnt_flag;
233		flag2 = mp->mnt_kern_flag;
234		/*
235		 * We only allow the filesystem to be reloaded if it
236		 * is currently mounted read-only.
237		 */
238		if ((fsflags & MNT_RELOAD) &&
239		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
240			vput(vp);
241			return (EOPNOTSUPP);	/* Needs translation */
242		}
243		/*
244		 * Only root, or the user that did the original mount is
245		 * permitted to update it.
246		 */
247		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
248			error = suser_td(td);
249			if (error) {
250				vput(vp);
251				return (error);
252			}
253		}
254		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
255			vput(vp);
256			return (EBUSY);
257		}
258		mtx_lock(&vp->v_interlock);
259		if ((vp->v_flag & VMOUNT) != 0 ||
260		    vp->v_mountedhere != NULL) {
261			mtx_unlock(&vp->v_interlock);
262			vfs_unbusy(mp, td);
263			vput(vp);
264			return (EBUSY);
265		}
266		vp->v_flag |= VMOUNT;
267		mtx_unlock(&vp->v_interlock);
268		mp->mnt_flag |= fsflags &
269		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
270		VOP_UNLOCK(vp, 0, td);
271		goto update;
272	}
273	/*
274	 * If the user is not root, ensure that they own the directory
275	 * onto which we are attempting to mount.
276	 */
277	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
278	if (error) {
279		vput(vp);
280		return (error);
281	}
282	if (va.va_uid != td->td_ucred->cr_uid) {
283		error = suser_td(td);
284		if (error) {
285			vput(vp);
286			return (error);
287		}
288	}
289	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0))
290	    != 0) {
291		vput(vp);
292		return (error);
293	}
294	if (vp->v_type != VDIR) {
295		vput(vp);
296		return (ENOTDIR);
297	}
298	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
299		if (!strcmp(vfsp->vfc_name, fstype))
300			break;
301	if (vfsp == NULL) {
302		linker_file_t lf;
303
304		/* Only load modules for root (very important!) */
305		error = suser_td(td);
306		if (error) {
307			vput(vp);
308			return error;
309		}
310		error = linker_load_file(fstype, &lf);
311		if (error || lf == NULL) {
312			vput(vp);
313			if (lf == NULL)
314				error = ENODEV;
315			return error;
316		}
317		lf->userrefs++;
318		/* lookup again, see if the VFS was loaded */
319		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
320			if (!strcmp(vfsp->vfc_name, fstype))
321				break;
322		if (vfsp == NULL) {
323			lf->userrefs--;
324			linker_file_unload(lf);
325			vput(vp);
326			return (ENODEV);
327		}
328	}
329	mtx_lock(&vp->v_interlock);
330	if ((vp->v_flag & VMOUNT) != 0 ||
331	    vp->v_mountedhere != NULL) {
332		mtx_unlock(&vp->v_interlock);
333		vput(vp);
334		return (EBUSY);
335	}
336	vp->v_flag |= VMOUNT;
337	mtx_unlock(&vp->v_interlock);
338
339	/*
340	 * Allocate and initialize the filesystem.
341	 */
342	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
343	TAILQ_INIT(&mp->mnt_nvnodelist);
344	TAILQ_INIT(&mp->mnt_reservedvnlist);
345	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
346	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
347	mp->mnt_op = vfsp->vfc_vfsops;
348	mp->mnt_vfc = vfsp;
349	vfsp->vfc_refcount++;
350	mp->mnt_stat.f_type = vfsp->vfc_typenum;
351	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
352	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
353	mp->mnt_stat.f_fstypename[MFSNAMELEN - 1] = '\0';
354	mp->mnt_vnodecovered = vp;
355	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
356	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
357	mp->mnt_stat.f_mntonname[MNAMELEN - 1] = '\0';
358	mp->mnt_iosize_max = DFLTPHYS;
359	VOP_UNLOCK(vp, 0, td);
360update:
361	/*
362	 * Set the mount level flags.
363	 */
364	if (fsflags & MNT_RDONLY)
365		mp->mnt_flag |= MNT_RDONLY;
366	else if (mp->mnt_flag & MNT_RDONLY)
367		mp->mnt_kern_flag |= MNTK_WANTRDWR;
368	mp->mnt_flag &=~ MNT_UPDATEMASK;
369	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
370	/*
371	 * Mount the filesystem.
372	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
373	 * get.  No freeing of cn_pnbuf.
374	 */
375	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
376	if (mp->mnt_flag & MNT_UPDATE) {
377		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
378			mp->mnt_flag &= ~MNT_RDONLY;
379		mp->mnt_flag &=~
380		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
381		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
382		if (error) {
383			mp->mnt_flag = flag;
384			mp->mnt_kern_flag = flag2;
385		}
386		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
387			if (mp->mnt_syncer == NULL)
388				error = vfs_allocate_syncvnode(mp);
389		} else {
390			if (mp->mnt_syncer != NULL)
391				vrele(mp->mnt_syncer);
392			mp->mnt_syncer = NULL;
393		}
394		vfs_unbusy(mp, td);
395		mtx_lock(&vp->v_interlock);
396		vp->v_flag &= ~VMOUNT;
397		mtx_unlock(&vp->v_interlock);
398		vrele(vp);
399		return (error);
400	}
401	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
402	/*
403	 * Put the new filesystem on the mount list after root.
404	 */
405	cache_purge(vp);
406	if (!error) {
407		struct vnode *newdp;
408
409		mtx_lock(&vp->v_interlock);
410		vp->v_flag &= ~VMOUNT;
411		vp->v_mountedhere = mp;
412		mtx_unlock(&vp->v_interlock);
413		mtx_lock(&mountlist_mtx);
414		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
415		mtx_unlock(&mountlist_mtx);
416		if (VFS_ROOT(mp, &newdp))
417			panic("mount: lost mount");
418		checkdirs(vp, newdp);
419		vput(newdp);
420		VOP_UNLOCK(vp, 0, td);
421		if ((mp->mnt_flag & MNT_RDONLY) == 0)
422			error = vfs_allocate_syncvnode(mp);
423		vfs_unbusy(mp, td);
424		if ((error = VFS_START(mp, 0, td)) != 0)
425			vrele(vp);
426	} else {
427		mtx_lock(&vp->v_interlock);
428		vp->v_flag &= ~VMOUNT;
429		mtx_unlock(&vp->v_interlock);
430		mp->mnt_vfc->vfc_refcount--;
431		vfs_unbusy(mp, td);
432		free((caddr_t)mp, M_MOUNT);
433		vput(vp);
434	}
435	return (error);
436}
437
438/*
439 * Scan all active processes to see if any of them have a current
440 * or root directory of `olddp'. If so, replace them with the new
441 * mount point.
442 */
443static void
444checkdirs(olddp, newdp)
445	struct vnode *olddp, *newdp;
446{
447	struct filedesc *fdp;
448	struct proc *p;
449	int nrele;
450
451	if (olddp->v_usecount == 1)
452		return;
453	sx_slock(&allproc_lock);
454	LIST_FOREACH(p, &allproc, p_list) {
455		PROC_LOCK(p);
456		fdp = p->p_fd;
457		if (fdp == NULL) {
458			PROC_UNLOCK(p);
459			continue;
460		}
461		nrele = 0;
462		FILEDESC_LOCK(fdp);
463		if (fdp->fd_cdir == olddp) {
464			VREF(newdp);
465			fdp->fd_cdir = newdp;
466			nrele++;
467		}
468		if (fdp->fd_rdir == olddp) {
469			VREF(newdp);
470			fdp->fd_rdir = newdp;
471			nrele++;
472		}
473		FILEDESC_UNLOCK(fdp);
474		PROC_UNLOCK(p);
475		while (nrele--)
476			vrele(olddp);
477	}
478	sx_sunlock(&allproc_lock);
479	if (rootvnode == olddp) {
480		vrele(rootvnode);
481		VREF(newdp);
482		rootvnode = newdp;
483	}
484}
485
486/*
487 * Unmount a file system.
488 *
489 * Note: unmount takes a path to the vnode mounted on as argument,
490 * not special file (as before).
491 */
492#ifndef _SYS_SYSPROTO_H_
493struct unmount_args {
494	char	*path;
495	int	flags;
496};
497#endif
498/* ARGSUSED */
499int
500unmount(td, uap)
501	struct thread *td;
502	register struct unmount_args /* {
503		syscallarg(char *) path;
504		syscallarg(int) flags;
505	} */ *uap;
506{
507	register struct vnode *vp;
508	struct mount *mp;
509	int error;
510	struct nameidata nd;
511
512	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
513	    SCARG(uap, path), td);
514	if ((error = namei(&nd)) != 0)
515		return (error);
516	vp = nd.ni_vp;
517	NDFREE(&nd, NDF_ONLY_PNBUF);
518	mp = vp->v_mount;
519
520	/*
521	 * Only root, or the user that did the original mount is
522	 * permitted to unmount this filesystem.
523	 */
524	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
525		error = suser_td(td);
526		if (error) {
527			vput(vp);
528			return (error);
529		}
530	}
531
532	/*
533	 * Don't allow unmounting the root file system.
534	 */
535	if (mp->mnt_flag & MNT_ROOTFS) {
536		vput(vp);
537		return (EINVAL);
538	}
539
540	/*
541	 * Must be the root of the filesystem
542	 */
543	if ((vp->v_flag & VROOT) == 0) {
544		vput(vp);
545		return (EINVAL);
546	}
547	vput(vp);
548	return (dounmount(mp, SCARG(uap, flags), td));
549}
550
551/*
552 * Do the actual file system unmount.
553 */
554int
555dounmount(mp, flags, td)
556	struct mount *mp;
557	int flags;
558	struct thread *td;
559{
560	struct vnode *coveredvp, *fsrootvp;
561	int error;
562	int async_flag;
563
564	mtx_lock(&mountlist_mtx);
565	mp->mnt_kern_flag |= MNTK_UNMOUNT;
566	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
567	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
568	if (error) {
569		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
570		if (mp->mnt_kern_flag & MNTK_MWAIT)
571			wakeup((caddr_t)mp);
572		return (error);
573	}
574	vn_start_write(NULL, &mp, V_WAIT);
575
576	if (mp->mnt_flag & MNT_EXPUBLIC)
577		vfs_setpublicfs(NULL, NULL, NULL);
578
579	vfs_msync(mp, MNT_WAIT);
580	async_flag = mp->mnt_flag & MNT_ASYNC;
581	mp->mnt_flag &=~ MNT_ASYNC;
582	cache_purgevfs(mp);	/* remove cache entries for this file sys */
583	if (mp->mnt_syncer != NULL)
584		vrele(mp->mnt_syncer);
585	/* Move process cdir/rdir refs on fs root to underlying vnode. */
586	if (VFS_ROOT(mp, &fsrootvp) == 0) {
587		if (mp->mnt_vnodecovered != NULL)
588			checkdirs(fsrootvp, mp->mnt_vnodecovered);
589		if (fsrootvp == rootvnode) {
590			vrele(rootvnode);
591			rootvnode = NULL;
592		}
593		vput(fsrootvp);
594	}
595	if (((mp->mnt_flag & MNT_RDONLY) ||
596	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
597	    (flags & MNT_FORCE)) {
598		error = VFS_UNMOUNT(mp, flags, td);
599	}
600	vn_finished_write(mp);
601	if (error) {
602		/* Undo cdir/rdir and rootvnode changes made above. */
603		if (VFS_ROOT(mp, &fsrootvp) == 0) {
604			if (mp->mnt_vnodecovered != NULL)
605				checkdirs(mp->mnt_vnodecovered, fsrootvp);
606			if (rootvnode == NULL) {
607				rootvnode = fsrootvp;
608				vref(rootvnode);
609			}
610			vput(fsrootvp);
611		}
612		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
613			(void) vfs_allocate_syncvnode(mp);
614		mtx_lock(&mountlist_mtx);
615		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
616		mp->mnt_flag |= async_flag;
617		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
618		    &mountlist_mtx, td);
619		if (mp->mnt_kern_flag & MNTK_MWAIT)
620			wakeup((caddr_t)mp);
621		return (error);
622	}
623	mtx_lock(&mountlist_mtx);
624	TAILQ_REMOVE(&mountlist, mp, mnt_list);
625	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
626		coveredvp->v_mountedhere = NULL;
627	mp->mnt_vfc->vfc_refcount--;
628	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
629		panic("unmount: dangling vnode");
630	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
631	lockdestroy(&mp->mnt_lock);
632	if (coveredvp != NULL)
633		vrele(coveredvp);
634	if (mp->mnt_kern_flag & MNTK_MWAIT)
635		wakeup((caddr_t)mp);
636	free((caddr_t)mp, M_MOUNT);
637	return (0);
638}
639
640/*
641 * Sync each mounted filesystem.
642 */
643#ifndef _SYS_SYSPROTO_H_
644struct sync_args {
645        int     dummy;
646};
647#endif
648
649#ifdef DEBUG
650static int syncprt = 0;
651SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
652#endif
653
654/* ARGSUSED */
655int
656sync(td, uap)
657	struct thread *td;
658	struct sync_args *uap;
659{
660	struct mount *mp, *nmp;
661	int asyncflag;
662
663	mtx_lock(&mountlist_mtx);
664	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
665		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
666			nmp = TAILQ_NEXT(mp, mnt_list);
667			continue;
668		}
669		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
670		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
671			asyncflag = mp->mnt_flag & MNT_ASYNC;
672			mp->mnt_flag &= ~MNT_ASYNC;
673			vfs_msync(mp, MNT_NOWAIT);
674			VFS_SYNC(mp, MNT_NOWAIT,
675			    ((td != NULL) ? td->td_ucred : NOCRED), td);
676			mp->mnt_flag |= asyncflag;
677			vn_finished_write(mp);
678		}
679		mtx_lock(&mountlist_mtx);
680		nmp = TAILQ_NEXT(mp, mnt_list);
681		vfs_unbusy(mp, td);
682	}
683	mtx_unlock(&mountlist_mtx);
684#if 0
685/*
686 * XXX don't call vfs_bufstats() yet because that routine
687 * was not imported in the Lite2 merge.
688 */
689#ifdef DIAGNOSTIC
690	if (syncprt)
691		vfs_bufstats();
692#endif /* DIAGNOSTIC */
693#endif
694	return (0);
695}
696
697/* XXX PRISON: could be per prison flag */
698static int prison_quotas;
699#if 0
700SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
701#endif
702
703/*
704 * Change filesystem quotas.
705 */
706#ifndef _SYS_SYSPROTO_H_
707struct quotactl_args {
708	char *path;
709	int cmd;
710	int uid;
711	caddr_t arg;
712};
713#endif
714/* ARGSUSED */
715int
716quotactl(td, uap)
717	struct thread *td;
718	register struct quotactl_args /* {
719		syscallarg(char *) path;
720		syscallarg(int) cmd;
721		syscallarg(int) uid;
722		syscallarg(caddr_t) arg;
723	} */ *uap;
724{
725	struct mount *mp;
726	int error;
727	struct nameidata nd;
728
729	if (jailed(td->td_ucred) && !prison_quotas)
730		return (EPERM);
731	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
732	if ((error = namei(&nd)) != 0)
733		return (error);
734	NDFREE(&nd, NDF_ONLY_PNBUF);
735	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
736	vrele(nd.ni_vp);
737	if (error)
738		return (error);
739	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
740	    SCARG(uap, arg), td);
741	vn_finished_write(mp);
742	return (error);
743}
744
745/*
746 * Get filesystem statistics.
747 */
748#ifndef _SYS_SYSPROTO_H_
749struct statfs_args {
750	char *path;
751	struct statfs *buf;
752};
753#endif
754/* ARGSUSED */
755int
756statfs(td, uap)
757	struct thread *td;
758	register struct statfs_args /* {
759		syscallarg(char *) path;
760		syscallarg(struct statfs *) buf;
761	} */ *uap;
762{
763	register struct mount *mp;
764	register struct statfs *sp;
765	int error;
766	struct nameidata nd;
767	struct statfs sb;
768
769	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
770	if ((error = namei(&nd)) != 0)
771		return (error);
772	mp = nd.ni_vp->v_mount;
773	sp = &mp->mnt_stat;
774	NDFREE(&nd, NDF_ONLY_PNBUF);
775	vrele(nd.ni_vp);
776	error = VFS_STATFS(mp, sp, td);
777	if (error)
778		return (error);
779	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
780	if (suser_xxx(td->td_ucred, 0, 0)) {
781		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
782		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
783		sp = &sb;
784	}
785	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
786}
787
788/*
789 * Get filesystem statistics.
790 */
791#ifndef _SYS_SYSPROTO_H_
792struct fstatfs_args {
793	int fd;
794	struct statfs *buf;
795};
796#endif
797/* ARGSUSED */
798int
799fstatfs(td, uap)
800	struct thread *td;
801	register struct fstatfs_args /* {
802		syscallarg(int) fd;
803		syscallarg(struct statfs *) buf;
804	} */ *uap;
805{
806	struct file *fp;
807	struct mount *mp;
808	register struct statfs *sp;
809	int error;
810	struct statfs sb;
811
812	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
813		return (error);
814	mp = ((struct vnode *)fp->f_data)->v_mount;
815	fdrop(fp, td);
816	if (mp == NULL)
817		return (EBADF);
818	sp = &mp->mnt_stat;
819	error = VFS_STATFS(mp, sp, td);
820	if (error)
821		return (error);
822	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
823	if (suser_xxx(td->td_ucred, 0, 0)) {
824		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
825		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
826		sp = &sb;
827	}
828	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
829}
830
831/*
832 * Get statistics on all filesystems.
833 */
834#ifndef _SYS_SYSPROTO_H_
835struct getfsstat_args {
836	struct statfs *buf;
837	long bufsize;
838	int flags;
839};
840#endif
841int
842getfsstat(td, uap)
843	struct thread *td;
844	register struct getfsstat_args /* {
845		syscallarg(struct statfs *) buf;
846		syscallarg(long) bufsize;
847		syscallarg(int) flags;
848	} */ *uap;
849{
850	register struct mount *mp, *nmp;
851	register struct statfs *sp;
852	caddr_t sfsp;
853	long count, maxcount, error;
854
855	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
856	sfsp = (caddr_t)SCARG(uap, buf);
857	count = 0;
858	mtx_lock(&mountlist_mtx);
859	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
860		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
861			nmp = TAILQ_NEXT(mp, mnt_list);
862			continue;
863		}
864		if (sfsp && count < maxcount) {
865			sp = &mp->mnt_stat;
866			/*
867			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
868			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
869			 * overrides MNT_WAIT.
870			 */
871			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
872			    (SCARG(uap, flags) & MNT_WAIT)) &&
873			    (error = VFS_STATFS(mp, sp, td))) {
874				mtx_lock(&mountlist_mtx);
875				nmp = TAILQ_NEXT(mp, mnt_list);
876				vfs_unbusy(mp, td);
877				continue;
878			}
879			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
880			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
881			if (error) {
882				vfs_unbusy(mp, td);
883				return (error);
884			}
885			sfsp += sizeof(*sp);
886		}
887		count++;
888		mtx_lock(&mountlist_mtx);
889		nmp = TAILQ_NEXT(mp, mnt_list);
890		vfs_unbusy(mp, td);
891	}
892	mtx_unlock(&mountlist_mtx);
893	if (sfsp && count > maxcount)
894		td->td_retval[0] = maxcount;
895	else
896		td->td_retval[0] = count;
897	return (0);
898}
899
900/*
901 * Change current working directory to a given file descriptor.
902 */
903#ifndef _SYS_SYSPROTO_H_
904struct fchdir_args {
905	int	fd;
906};
907#endif
908/* ARGSUSED */
909int
910fchdir(td, uap)
911	struct thread *td;
912	struct fchdir_args /* {
913		syscallarg(int) fd;
914	} */ *uap;
915{
916	register struct filedesc *fdp = td->td_proc->p_fd;
917	struct vnode *vp, *tdp, *vpold;
918	struct mount *mp;
919	struct file *fp;
920	int error;
921
922	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
923		return (error);
924	vp = (struct vnode *)fp->f_data;
925	VREF(vp);
926	fdrop(fp, td);
927	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
928	if (vp->v_type != VDIR)
929		error = ENOTDIR;
930	else
931		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
932	while (!error && (mp = vp->v_mountedhere) != NULL) {
933		if (vfs_busy(mp, 0, 0, td))
934			continue;
935		error = VFS_ROOT(mp, &tdp);
936		vfs_unbusy(mp, td);
937		if (error)
938			break;
939		vput(vp);
940		vp = tdp;
941	}
942	if (error) {
943		vput(vp);
944		return (error);
945	}
946	VOP_UNLOCK(vp, 0, td);
947	FILEDESC_LOCK(fdp);
948	vpold = fdp->fd_cdir;
949	fdp->fd_cdir = vp;
950	FILEDESC_UNLOCK(fdp);
951	vrele(vpold);
952	return (0);
953}
954
955/*
956 * Change current working directory (``.'').
957 */
958#ifndef _SYS_SYSPROTO_H_
959struct chdir_args {
960	char	*path;
961};
962#endif
963/* ARGSUSED */
964int
965chdir(td, uap)
966	struct thread *td;
967	struct chdir_args /* {
968		syscallarg(char *) path;
969	} */ *uap;
970{
971	register struct filedesc *fdp = td->td_proc->p_fd;
972	int error;
973	struct nameidata nd;
974	struct vnode *vp;
975
976	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
977	    SCARG(uap, path), td);
978	if ((error = change_dir(&nd, td)) != 0)
979		return (error);
980	NDFREE(&nd, NDF_ONLY_PNBUF);
981	FILEDESC_LOCK(fdp);
982	vp = fdp->fd_cdir;
983	fdp->fd_cdir = nd.ni_vp;
984	FILEDESC_UNLOCK(fdp);
985	vrele(vp);
986	return (0);
987}
988
989/*
990 * Helper function for raised chroot(2) security function:  Refuse if
991 * any filedescriptors are open directories.
992 */
993static int
994chroot_refuse_vdir_fds(fdp)
995	struct filedesc *fdp;
996{
997	struct vnode *vp;
998	struct file *fp;
999	int fd;
1000
1001	FILEDESC_LOCK(fdp);
1002	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1003		fp = fget_locked(fdp, fd);
1004		if (fp == NULL)
1005			continue;
1006		if (fp->f_type == DTYPE_VNODE) {
1007			vp = (struct vnode *)fp->f_data;
1008			if (vp->v_type == VDIR) {
1009				FILEDESC_UNLOCK(fdp);
1010				return (EPERM);
1011			}
1012		}
1013	}
1014	FILEDESC_UNLOCK(fdp);
1015	return (0);
1016}
1017
1018/*
1019 * This sysctl determines if we will allow a process to chroot(2) if it
1020 * has a directory open:
1021 *	0: disallowed for all processes.
1022 *	1: allowed for processes that were not already chroot(2)'ed.
1023 *	2: allowed for all processes.
1024 */
1025
1026static int chroot_allow_open_directories = 1;
1027
1028SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
1029     &chroot_allow_open_directories, 0, "");
1030
1031/*
1032 * Change notion of root (``/'') directory.
1033 */
1034#ifndef _SYS_SYSPROTO_H_
1035struct chroot_args {
1036	char	*path;
1037};
1038#endif
1039/* ARGSUSED */
1040int
1041chroot(td, uap)
1042	struct thread *td;
1043	struct chroot_args /* {
1044		syscallarg(char *) path;
1045	} */ *uap;
1046{
1047	register struct filedesc *fdp = td->td_proc->p_fd;
1048	int error;
1049	struct nameidata nd;
1050	struct vnode *vp;
1051
1052	error = suser_xxx(0, td->td_proc, PRISON_ROOT);
1053	if (error)
1054		return (error);
1055	FILEDESC_LOCK(fdp);
1056	if (chroot_allow_open_directories == 0 ||
1057	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
1058		FILEDESC_UNLOCK(fdp);
1059		error = chroot_refuse_vdir_fds(fdp);
1060	} else
1061		FILEDESC_UNLOCK(fdp);
1062	if (error)
1063		return (error);
1064	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1065	    SCARG(uap, path), td);
1066	if ((error = change_dir(&nd, td)) != 0)
1067		return (error);
1068	NDFREE(&nd, NDF_ONLY_PNBUF);
1069	FILEDESC_LOCK(fdp);
1070	vp = fdp->fd_rdir;
1071	fdp->fd_rdir = nd.ni_vp;
1072	if (!fdp->fd_jdir) {
1073		fdp->fd_jdir = nd.ni_vp;
1074                VREF(fdp->fd_jdir);
1075	}
1076	FILEDESC_UNLOCK(fdp);
1077	vrele(vp);
1078	return (0);
1079}
1080
1081/*
1082 * Common routine for chroot and chdir.
1083 */
1084static int
1085change_dir(ndp, td)
1086	register struct nameidata *ndp;
1087	struct thread *td;
1088{
1089	struct vnode *vp;
1090	int error;
1091
1092	error = namei(ndp);
1093	if (error)
1094		return (error);
1095	vp = ndp->ni_vp;
1096	if (vp->v_type != VDIR)
1097		error = ENOTDIR;
1098	else
1099		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1100	if (error)
1101		vput(vp);
1102	else
1103		VOP_UNLOCK(vp, 0, td);
1104	return (error);
1105}
1106
1107/*
1108 * Check permissions, allocate an open file structure,
1109 * and call the device open routine if any.
1110 */
1111#ifndef _SYS_SYSPROTO_H_
1112struct open_args {
1113	char	*path;
1114	int	flags;
1115	int	mode;
1116};
1117#endif
1118int
1119open(td, uap)
1120	struct thread *td;
1121	register struct open_args /* {
1122		syscallarg(char *) path;
1123		syscallarg(int) flags;
1124		syscallarg(int) mode;
1125	} */ *uap;
1126{
1127	struct proc *p = td->td_proc;
1128	struct filedesc *fdp = p->p_fd;
1129	struct file *fp;
1130	struct vnode *vp;
1131	struct vattr vat;
1132	struct mount *mp;
1133	int cmode, flags, oflags;
1134	struct file *nfp;
1135	int type, indx, error;
1136	struct flock lf;
1137	struct nameidata nd;
1138
1139	oflags = SCARG(uap, flags);
1140	if ((oflags & O_ACCMODE) == O_ACCMODE)
1141		return (EINVAL);
1142	flags = FFLAGS(oflags);
1143	error = falloc(td, &nfp, &indx);
1144	if (error)
1145		return (error);
1146	fp = nfp;
1147	FILEDESC_LOCK(fdp);
1148	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1149	FILEDESC_UNLOCK(fdp);
1150	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1151	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
1152	/*
1153	 * Bump the ref count to prevent another process from closing
1154	 * the descriptor while we are blocked in vn_open()
1155	 */
1156	fhold(fp);
1157	error = vn_open(&nd, &flags, cmode);
1158	if (error) {
1159		/*
1160		 * release our own reference
1161		 */
1162		fdrop(fp, td);
1163
1164		/*
1165		 * handle special fdopen() case.  bleh.  dupfdopen() is
1166		 * responsible for dropping the old contents of ofiles[indx]
1167		 * if it succeeds.
1168		 */
1169		if ((error == ENODEV || error == ENXIO) &&
1170		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1171		    (error =
1172			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1173			td->td_retval[0] = indx;
1174			return (0);
1175		}
1176		/*
1177		 * Clean up the descriptor, but only if another thread hadn't
1178		 * replaced or closed it.
1179		 */
1180		FILEDESC_LOCK(fdp);
1181		if (fdp->fd_ofiles[indx] == fp) {
1182			fdp->fd_ofiles[indx] = NULL;
1183			FILEDESC_UNLOCK(fdp);
1184			fdrop(fp, td);
1185		} else
1186			FILEDESC_UNLOCK(fdp);
1187
1188		if (error == ERESTART)
1189			error = EINTR;
1190		return (error);
1191	}
1192	td->td_dupfd = 0;
1193	NDFREE(&nd, NDF_ONLY_PNBUF);
1194	vp = nd.ni_vp;
1195
1196	/*
1197	 * There should be 2 references on the file, one from the descriptor
1198	 * table, and one for us.
1199	 *
1200	 * Handle the case where someone closed the file (via its file
1201	 * descriptor) while we were blocked.  The end result should look
1202	 * like opening the file succeeded but it was immediately closed.
1203	 */
1204	FILEDESC_LOCK(fdp);
1205	FILE_LOCK(fp);
1206	if (fp->f_count == 1) {
1207		KASSERT(fdp->fd_ofiles[indx] != fp,
1208		    ("Open file descriptor lost all refs"));
1209		FILEDESC_UNLOCK(fdp);
1210		FILE_UNLOCK(fp);
1211		VOP_UNLOCK(vp, 0, td);
1212		vn_close(vp, flags & FMASK, fp->f_cred, td);
1213		fdrop(fp, td);
1214		td->td_retval[0] = indx;
1215		return 0;
1216	}
1217
1218	fp->f_data = (caddr_t)vp;
1219	fp->f_flag = flags & FMASK;
1220	fp->f_ops = &vnops;
1221	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1222	FILEDESC_UNLOCK(fdp);
1223	FILE_UNLOCK(fp);
1224	VOP_UNLOCK(vp, 0, td);
1225	if (flags & (O_EXLOCK | O_SHLOCK)) {
1226		lf.l_whence = SEEK_SET;
1227		lf.l_start = 0;
1228		lf.l_len = 0;
1229		if (flags & O_EXLOCK)
1230			lf.l_type = F_WRLCK;
1231		else
1232			lf.l_type = F_RDLCK;
1233		type = F_FLOCK;
1234		if ((flags & FNONBLOCK) == 0)
1235			type |= F_WAIT;
1236		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1237			goto bad;
1238		fp->f_flag |= FHASLOCK;
1239	}
1240	if (flags & O_TRUNC) {
1241		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1242			goto bad;
1243		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1244		VATTR_NULL(&vat);
1245		vat.va_size = 0;
1246		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1247		error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1248		VOP_UNLOCK(vp, 0, td);
1249		vn_finished_write(mp);
1250		if (error)
1251			goto bad;
1252	}
1253	/* assert that vn_open created a backing object if one is needed */
1254	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1255		("open: vmio vnode has no backing object after vn_open"));
1256	/*
1257	 * Release our private reference, leaving the one associated with
1258	 * the descriptor table intact.
1259	 */
1260	fdrop(fp, td);
1261	td->td_retval[0] = indx;
1262	return (0);
1263bad:
1264	FILEDESC_LOCK(fdp);
1265	if (fdp->fd_ofiles[indx] == fp) {
1266		fdp->fd_ofiles[indx] = NULL;
1267		FILEDESC_UNLOCK(fdp);
1268		fdrop(fp, td);
1269	} else
1270		FILEDESC_UNLOCK(fdp);
1271	return (error);
1272}
1273
1274#ifdef COMPAT_43
1275/*
1276 * Create a file.
1277 */
1278#ifndef _SYS_SYSPROTO_H_
1279struct ocreat_args {
1280	char	*path;
1281	int	mode;
1282};
1283#endif
1284int
1285ocreat(td, uap)
1286	struct thread *td;
1287	register struct ocreat_args /* {
1288		syscallarg(char *) path;
1289		syscallarg(int) mode;
1290	} */ *uap;
1291{
1292	struct open_args /* {
1293		syscallarg(char *) path;
1294		syscallarg(int) flags;
1295		syscallarg(int) mode;
1296	} */ nuap;
1297
1298	SCARG(&nuap, path) = SCARG(uap, path);
1299	SCARG(&nuap, mode) = SCARG(uap, mode);
1300	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1301	return (open(td, &nuap));
1302}
1303#endif /* COMPAT_43 */
1304
1305/*
1306 * Create a special file.
1307 */
1308#ifndef _SYS_SYSPROTO_H_
1309struct mknod_args {
1310	char	*path;
1311	int	mode;
1312	int	dev;
1313};
1314#endif
1315/* ARGSUSED */
1316int
1317mknod(td, uap)
1318	struct thread *td;
1319	register struct mknod_args /* {
1320		syscallarg(char *) path;
1321		syscallarg(int) mode;
1322		syscallarg(int) dev;
1323	} */ *uap;
1324{
1325	struct vnode *vp;
1326	struct mount *mp;
1327	struct vattr vattr;
1328	int error;
1329	int whiteout = 0;
1330	struct nameidata nd;
1331
1332	switch (SCARG(uap, mode) & S_IFMT) {
1333	case S_IFCHR:
1334	case S_IFBLK:
1335		error = suser_td(td);
1336		break;
1337	default:
1338		error = suser_xxx(0, td->td_proc, PRISON_ROOT);
1339		break;
1340	}
1341	if (error)
1342		return (error);
1343restart:
1344	bwillwrite();
1345	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1346	if ((error = namei(&nd)) != 0)
1347		return (error);
1348	vp = nd.ni_vp;
1349	if (vp != NULL) {
1350		vrele(vp);
1351		error = EEXIST;
1352	} else {
1353		VATTR_NULL(&vattr);
1354		FILEDESC_LOCK(td->td_proc->p_fd);
1355		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1356		FILEDESC_UNLOCK(td->td_proc->p_fd);
1357		vattr.va_rdev = SCARG(uap, dev);
1358		whiteout = 0;
1359
1360		switch (SCARG(uap, mode) & S_IFMT) {
1361		case S_IFMT:	/* used by badsect to flag bad sectors */
1362			vattr.va_type = VBAD;
1363			break;
1364		case S_IFCHR:
1365			vattr.va_type = VCHR;
1366			break;
1367		case S_IFBLK:
1368			vattr.va_type = VBLK;
1369			break;
1370		case S_IFWHT:
1371			whiteout = 1;
1372			break;
1373		default:
1374			error = EINVAL;
1375			break;
1376		}
1377	}
1378	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1379		NDFREE(&nd, NDF_ONLY_PNBUF);
1380		vput(nd.ni_dvp);
1381		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1382			return (error);
1383		goto restart;
1384	}
1385	if (!error) {
1386		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1387		if (whiteout)
1388			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1389		else {
1390			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1391						&nd.ni_cnd, &vattr);
1392			if (error == 0)
1393				vput(nd.ni_vp);
1394		}
1395	}
1396	NDFREE(&nd, NDF_ONLY_PNBUF);
1397	vput(nd.ni_dvp);
1398	vn_finished_write(mp);
1399	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1400	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1401	return (error);
1402}
1403
1404/*
1405 * Create a named pipe.
1406 */
1407#ifndef _SYS_SYSPROTO_H_
1408struct mkfifo_args {
1409	char	*path;
1410	int	mode;
1411};
1412#endif
1413/* ARGSUSED */
1414int
1415mkfifo(td, uap)
1416	struct thread *td;
1417	register struct mkfifo_args /* {
1418		syscallarg(char *) path;
1419		syscallarg(int) mode;
1420	} */ *uap;
1421{
1422	struct mount *mp;
1423	struct vattr vattr;
1424	int error;
1425	struct nameidata nd;
1426
1427restart:
1428	bwillwrite();
1429	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1430	if ((error = namei(&nd)) != 0)
1431		return (error);
1432	if (nd.ni_vp != NULL) {
1433		NDFREE(&nd, NDF_ONLY_PNBUF);
1434		vrele(nd.ni_vp);
1435		vput(nd.ni_dvp);
1436		return (EEXIST);
1437	}
1438	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1439		NDFREE(&nd, NDF_ONLY_PNBUF);
1440		vput(nd.ni_dvp);
1441		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1442			return (error);
1443		goto restart;
1444	}
1445	VATTR_NULL(&vattr);
1446	vattr.va_type = VFIFO;
1447	FILEDESC_LOCK(td->td_proc->p_fd);
1448	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1449	FILEDESC_UNLOCK(td->td_proc->p_fd);
1450	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1451	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1452	if (error == 0)
1453		vput(nd.ni_vp);
1454	NDFREE(&nd, NDF_ONLY_PNBUF);
1455	vput(nd.ni_dvp);
1456	vn_finished_write(mp);
1457	return (error);
1458}
1459
1460/*
1461 * Make a hard file link.
1462 */
1463#ifndef _SYS_SYSPROTO_H_
1464struct link_args {
1465	char	*path;
1466	char	*link;
1467};
1468#endif
1469/* ARGSUSED */
1470int
1471link(td, uap)
1472	struct thread *td;
1473	register struct link_args /* {
1474		syscallarg(char *) path;
1475		syscallarg(char *) link;
1476	} */ *uap;
1477{
1478	struct vnode *vp;
1479	struct mount *mp;
1480	struct nameidata nd;
1481	int error;
1482
1483	bwillwrite();
1484	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
1485	if ((error = namei(&nd)) != 0)
1486		return (error);
1487	NDFREE(&nd, NDF_ONLY_PNBUF);
1488	vp = nd.ni_vp;
1489	if (vp->v_type == VDIR) {
1490		vrele(vp);
1491		return (EPERM);		/* POSIX */
1492	}
1493	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1494		vrele(vp);
1495		return (error);
1496	}
1497	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
1498	if ((error = namei(&nd)) == 0) {
1499		if (nd.ni_vp != NULL) {
1500			vrele(nd.ni_vp);
1501			error = EEXIST;
1502		} else {
1503			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1504			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1505			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1506		}
1507		NDFREE(&nd, NDF_ONLY_PNBUF);
1508		vput(nd.ni_dvp);
1509	}
1510	vrele(vp);
1511	vn_finished_write(mp);
1512	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1513	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1514	return (error);
1515}
1516
1517/*
1518 * Make a symbolic link.
1519 */
1520#ifndef _SYS_SYSPROTO_H_
1521struct symlink_args {
1522	char	*path;
1523	char	*link;
1524};
1525#endif
1526/* ARGSUSED */
1527int
1528symlink(td, uap)
1529	struct thread *td;
1530	register struct symlink_args /* {
1531		syscallarg(char *) path;
1532		syscallarg(char *) link;
1533	} */ *uap;
1534{
1535	struct mount *mp;
1536	struct vattr vattr;
1537	char *path;
1538	int error;
1539	struct nameidata nd;
1540
1541	path = uma_zalloc(namei_zone, M_WAITOK);
1542	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1543		goto out;
1544restart:
1545	bwillwrite();
1546	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
1547	if ((error = namei(&nd)) != 0)
1548		goto out;
1549	if (nd.ni_vp) {
1550		NDFREE(&nd, NDF_ONLY_PNBUF);
1551		vrele(nd.ni_vp);
1552		vput(nd.ni_dvp);
1553		error = EEXIST;
1554		goto out;
1555	}
1556	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1557		NDFREE(&nd, NDF_ONLY_PNBUF);
1558		vput(nd.ni_dvp);
1559		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1560			return (error);
1561		goto restart;
1562	}
1563	VATTR_NULL(&vattr);
1564	FILEDESC_LOCK(td->td_proc->p_fd);
1565	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1566	FILEDESC_UNLOCK(td->td_proc->p_fd);
1567	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1568	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1569	NDFREE(&nd, NDF_ONLY_PNBUF);
1570	if (error == 0)
1571		vput(nd.ni_vp);
1572	vput(nd.ni_dvp);
1573	vn_finished_write(mp);
1574	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1575	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1576out:
1577	uma_zfree(namei_zone, path);
1578	return (error);
1579}
1580
1581/*
1582 * Delete a whiteout from the filesystem.
1583 */
1584/* ARGSUSED */
1585int
1586undelete(td, uap)
1587	struct thread *td;
1588	register struct undelete_args /* {
1589		syscallarg(char *) path;
1590	} */ *uap;
1591{
1592	int error;
1593	struct mount *mp;
1594	struct nameidata nd;
1595
1596restart:
1597	bwillwrite();
1598	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1599	    SCARG(uap, path), td);
1600	error = namei(&nd);
1601	if (error)
1602		return (error);
1603
1604	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1605		NDFREE(&nd, NDF_ONLY_PNBUF);
1606		if (nd.ni_vp)
1607			vrele(nd.ni_vp);
1608		vput(nd.ni_dvp);
1609		return (EEXIST);
1610	}
1611	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1612		NDFREE(&nd, NDF_ONLY_PNBUF);
1613		vput(nd.ni_dvp);
1614		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1615			return (error);
1616		goto restart;
1617	}
1618	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1619	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1620	NDFREE(&nd, NDF_ONLY_PNBUF);
1621	vput(nd.ni_dvp);
1622	vn_finished_write(mp);
1623	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1624	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1625	return (error);
1626}
1627
1628/*
1629 * Delete a name from the filesystem.
1630 */
1631#ifndef _SYS_SYSPROTO_H_
1632struct unlink_args {
1633	char	*path;
1634};
1635#endif
1636/* ARGSUSED */
1637int
1638unlink(td, uap)
1639	struct thread *td;
1640	struct unlink_args /* {
1641		syscallarg(char *) path;
1642	} */ *uap;
1643{
1644	struct mount *mp;
1645	struct vnode *vp;
1646	int error;
1647	struct nameidata nd;
1648
1649restart:
1650	bwillwrite();
1651	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1652	if ((error = namei(&nd)) != 0)
1653		return (error);
1654	vp = nd.ni_vp;
1655	if (vp->v_type == VDIR)
1656		error = EPERM;		/* POSIX */
1657	else {
1658		/*
1659		 * The root of a mounted filesystem cannot be deleted.
1660		 *
1661		 * XXX: can this only be a VDIR case?
1662		 */
1663		if (vp->v_flag & VROOT)
1664			error = EBUSY;
1665	}
1666	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1667		NDFREE(&nd, NDF_ONLY_PNBUF);
1668		vrele(vp);
1669		vput(nd.ni_dvp);
1670		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1671			return (error);
1672		goto restart;
1673	}
1674	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1675	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1676	if (!error) {
1677		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1678		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1679	}
1680	NDFREE(&nd, NDF_ONLY_PNBUF);
1681	vput(nd.ni_dvp);
1682	vput(vp);
1683	vn_finished_write(mp);
1684	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1685	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1686	return (error);
1687}
1688
1689/*
1690 * Reposition read/write file offset.
1691 */
1692#ifndef _SYS_SYSPROTO_H_
1693struct lseek_args {
1694	int	fd;
1695	int	pad;
1696	off_t	offset;
1697	int	whence;
1698};
1699#endif
1700int
1701lseek(td, uap)
1702	struct thread *td;
1703	register struct lseek_args /* {
1704		syscallarg(int) fd;
1705		syscallarg(int) pad;
1706		syscallarg(off_t) offset;
1707		syscallarg(int) whence;
1708	} */ *uap;
1709{
1710	struct ucred *cred = td->td_ucred;
1711	struct file *fp;
1712	struct vnode *vp;
1713	struct vattr vattr;
1714	off_t offset;
1715	int error, noneg;
1716
1717	if ((error = fget(td, uap->fd, &fp)) != 0)
1718		return (error);
1719	if (fp->f_type != DTYPE_VNODE) {
1720		fdrop(fp, td);
1721		return (ESPIPE);
1722	}
1723	vp = (struct vnode *)fp->f_data;
1724	noneg = (vp->v_type != VCHR);
1725	offset = SCARG(uap, offset);
1726	switch (SCARG(uap, whence)) {
1727	case L_INCR:
1728		if (noneg &&
1729		    (fp->f_offset < 0 ||
1730		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
1731			return (EOVERFLOW);
1732		offset += fp->f_offset;
1733		break;
1734	case L_XTND:
1735		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1736		error = VOP_GETATTR(vp, &vattr, cred, td);
1737		VOP_UNLOCK(vp, 0, td);
1738		if (error)
1739			return (error);
1740		if (noneg &&
1741		    (vattr.va_size > OFF_MAX ||
1742		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
1743			return (EOVERFLOW);
1744		offset += vattr.va_size;
1745		break;
1746	case L_SET:
1747		break;
1748	default:
1749		fdrop(fp, td);
1750		return (EINVAL);
1751	}
1752	if (noneg && offset < 0)
1753		return (EINVAL);
1754	fp->f_offset = offset;
1755	*(off_t *)(td->td_retval) = fp->f_offset;
1756	fdrop(fp, td);
1757	return (0);
1758}
1759
1760#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1761/*
1762 * Reposition read/write file offset.
1763 */
1764#ifndef _SYS_SYSPROTO_H_
1765struct olseek_args {
1766	int	fd;
1767	long	offset;
1768	int	whence;
1769};
1770#endif
1771int
1772olseek(td, uap)
1773	struct thread *td;
1774	register struct olseek_args /* {
1775		syscallarg(int) fd;
1776		syscallarg(long) offset;
1777		syscallarg(int) whence;
1778	} */ *uap;
1779{
1780	struct lseek_args /* {
1781		syscallarg(int) fd;
1782		syscallarg(int) pad;
1783		syscallarg(off_t) offset;
1784		syscallarg(int) whence;
1785	} */ nuap;
1786	int error;
1787
1788	SCARG(&nuap, fd) = SCARG(uap, fd);
1789	SCARG(&nuap, offset) = SCARG(uap, offset);
1790	SCARG(&nuap, whence) = SCARG(uap, whence);
1791	error = lseek(td, &nuap);
1792	return (error);
1793}
1794#endif /* COMPAT_43 */
1795
1796/*
1797 * Check access permissions using passed credentials.
1798 */
1799static int
1800vn_access(vp, user_flags, cred, td)
1801	struct vnode	*vp;
1802	int		user_flags;
1803	struct ucred	*cred;
1804	struct thread	*td;
1805{
1806	int error, flags;
1807
1808	/* Flags == 0 means only check for existence. */
1809	error = 0;
1810	if (user_flags) {
1811		flags = 0;
1812		if (user_flags & R_OK)
1813			flags |= VREAD;
1814		if (user_flags & W_OK)
1815			flags |= VWRITE;
1816		if (user_flags & X_OK)
1817			flags |= VEXEC;
1818		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1819			error = VOP_ACCESS(vp, flags, cred, td);
1820	}
1821	return (error);
1822}
1823
1824/*
1825 * Check access permissions using "real" credentials.
1826 */
1827#ifndef _SYS_SYSPROTO_H_
1828struct access_args {
1829	char	*path;
1830	int	flags;
1831};
1832#endif
1833int
1834access(td, uap)
1835	struct thread *td;
1836	register struct access_args /* {
1837		syscallarg(char *) path;
1838		syscallarg(int) flags;
1839	} */ *uap;
1840{
1841	struct ucred *cred, *tmpcred;
1842	register struct vnode *vp;
1843	int error;
1844	struct nameidata nd;
1845
1846	/*
1847	 * Create and modify a temporary credential instead of one that
1848	 * is potentially shared.  This could also mess up socket
1849	 * buffer accounting which can run in an interrupt context.
1850	 *
1851	 * XXX - Depending on how "threads" are finally implemented, it
1852	 * may be better to explicitly pass the credential to namei()
1853	 * rather than to modify the potentially shared process structure.
1854	 */
1855	cred = td->td_ucred;
1856	tmpcred = crdup(cred);
1857	tmpcred->cr_uid = cred->cr_ruid;
1858	tmpcred->cr_groups[0] = cred->cr_rgid;
1859	td->td_ucred = tmpcred;
1860	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1861	    SCARG(uap, path), td);
1862	if ((error = namei(&nd)) != 0)
1863		goto out1;
1864	vp = nd.ni_vp;
1865
1866	error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
1867	NDFREE(&nd, NDF_ONLY_PNBUF);
1868	vput(vp);
1869out1:
1870	td->td_ucred = cred;
1871	crfree(tmpcred);
1872	return (error);
1873}
1874
1875/*
1876 * Check access permissions using "effective" credentials.
1877 */
1878#ifndef _SYS_SYSPROTO_H_
1879struct eaccess_args {
1880	char	*path;
1881	int	flags;
1882};
1883#endif
1884int
1885eaccess(td, uap)
1886	struct thread *td;
1887	register struct eaccess_args /* {
1888		syscallarg(char *) path;
1889		syscallarg(int) flags;
1890	} */ *uap;
1891{
1892	struct nameidata nd;
1893	struct vnode *vp;
1894	int error;
1895
1896	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1897	    SCARG(uap, path), td);
1898	if ((error = namei(&nd)) != 0)
1899		return (error);
1900	vp = nd.ni_vp;
1901
1902	error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
1903	NDFREE(&nd, NDF_ONLY_PNBUF);
1904	vput(vp);
1905	return (error);
1906}
1907
1908#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1909/*
1910 * Get file status; this version follows links.
1911 */
1912#ifndef _SYS_SYSPROTO_H_
1913struct ostat_args {
1914	char	*path;
1915	struct ostat *ub;
1916};
1917#endif
1918/* ARGSUSED */
1919int
1920ostat(td, uap)
1921	struct thread *td;
1922	register struct ostat_args /* {
1923		syscallarg(char *) path;
1924		syscallarg(struct ostat *) ub;
1925	} */ *uap;
1926{
1927	struct stat sb;
1928	struct ostat osb;
1929	int error;
1930	struct nameidata nd;
1931
1932	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1933	    SCARG(uap, path), td);
1934	if ((error = namei(&nd)) != 0)
1935		return (error);
1936	NDFREE(&nd, NDF_ONLY_PNBUF);
1937	error = vn_stat(nd.ni_vp, &sb, td);
1938	vput(nd.ni_vp);
1939	if (error)
1940		return (error);
1941	cvtstat(&sb, &osb);
1942	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1943	return (error);
1944}
1945
1946/*
1947 * Get file status; this version does not follow links.
1948 */
1949#ifndef _SYS_SYSPROTO_H_
1950struct olstat_args {
1951	char	*path;
1952	struct ostat *ub;
1953};
1954#endif
1955/* ARGSUSED */
1956int
1957olstat(td, uap)
1958	struct thread *td;
1959	register struct olstat_args /* {
1960		syscallarg(char *) path;
1961		syscallarg(struct ostat *) ub;
1962	} */ *uap;
1963{
1964	struct vnode *vp;
1965	struct stat sb;
1966	struct ostat osb;
1967	int error;
1968	struct nameidata nd;
1969
1970	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1971	    SCARG(uap, path), td);
1972	if ((error = namei(&nd)) != 0)
1973		return (error);
1974	vp = nd.ni_vp;
1975	error = vn_stat(vp, &sb, td);
1976	NDFREE(&nd, NDF_ONLY_PNBUF);
1977	vput(vp);
1978	if (error)
1979		return (error);
1980	cvtstat(&sb, &osb);
1981	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
1982	return (error);
1983}
1984
1985/*
1986 * Convert from an old to a new stat structure.
1987 */
1988void
1989cvtstat(st, ost)
1990	struct stat *st;
1991	struct ostat *ost;
1992{
1993
1994	ost->st_dev = st->st_dev;
1995	ost->st_ino = st->st_ino;
1996	ost->st_mode = st->st_mode;
1997	ost->st_nlink = st->st_nlink;
1998	ost->st_uid = st->st_uid;
1999	ost->st_gid = st->st_gid;
2000	ost->st_rdev = st->st_rdev;
2001	if (st->st_size < (quad_t)1 << 32)
2002		ost->st_size = st->st_size;
2003	else
2004		ost->st_size = -2;
2005	ost->st_atime = st->st_atime;
2006	ost->st_mtime = st->st_mtime;
2007	ost->st_ctime = st->st_ctime;
2008	ost->st_blksize = st->st_blksize;
2009	ost->st_blocks = st->st_blocks;
2010	ost->st_flags = st->st_flags;
2011	ost->st_gen = st->st_gen;
2012}
2013#endif /* COMPAT_43 || COMPAT_SUNOS */
2014
2015/*
2016 * Get file status; this version follows links.
2017 */
2018#ifndef _SYS_SYSPROTO_H_
2019struct stat_args {
2020	char	*path;
2021	struct stat *ub;
2022};
2023#endif
2024/* ARGSUSED */
2025int
2026stat(td, uap)
2027	struct thread *td;
2028	register struct stat_args /* {
2029		syscallarg(char *) path;
2030		syscallarg(struct stat *) ub;
2031	} */ *uap;
2032{
2033	struct stat sb;
2034	int error;
2035	struct nameidata nd;
2036
2037#ifdef LOOKUP_SHARED
2038	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
2039	    UIO_USERSPACE, SCARG(uap, path), td);
2040#else
2041	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2042	    SCARG(uap, path), td);
2043#endif
2044	if ((error = namei(&nd)) != 0)
2045		return (error);
2046	error = vn_stat(nd.ni_vp, &sb, td);
2047	NDFREE(&nd, NDF_ONLY_PNBUF);
2048	vput(nd.ni_vp);
2049	if (error)
2050		return (error);
2051	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2052	return (error);
2053}
2054
2055/*
2056 * Get file status; this version does not follow links.
2057 */
2058#ifndef _SYS_SYSPROTO_H_
2059struct lstat_args {
2060	char	*path;
2061	struct stat *ub;
2062};
2063#endif
2064/* ARGSUSED */
2065int
2066lstat(td, uap)
2067	struct thread *td;
2068	register struct lstat_args /* {
2069		syscallarg(char *) path;
2070		syscallarg(struct stat *) ub;
2071	} */ *uap;
2072{
2073	int error;
2074	struct vnode *vp;
2075	struct stat sb;
2076	struct nameidata nd;
2077
2078	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2079	    SCARG(uap, path), td);
2080	if ((error = namei(&nd)) != 0)
2081		return (error);
2082	vp = nd.ni_vp;
2083	error = vn_stat(vp, &sb, td);
2084	NDFREE(&nd, NDF_ONLY_PNBUF);
2085	vput(vp);
2086	if (error)
2087		return (error);
2088	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2089	return (error);
2090}
2091
2092/*
2093 * Implementation of the NetBSD stat() function.
2094 * XXX This should probably be collapsed with the FreeBSD version,
2095 * as the differences are only due to vn_stat() clearing spares at
2096 * the end of the structures.  vn_stat could be split to avoid this,
2097 * and thus collapse the following to close to zero code.
2098 */
2099void
2100cvtnstat(sb, nsb)
2101	struct stat *sb;
2102	struct nstat *nsb;
2103{
2104	nsb->st_dev = sb->st_dev;
2105	nsb->st_ino = sb->st_ino;
2106	nsb->st_mode = sb->st_mode;
2107	nsb->st_nlink = sb->st_nlink;
2108	nsb->st_uid = sb->st_uid;
2109	nsb->st_gid = sb->st_gid;
2110	nsb->st_rdev = sb->st_rdev;
2111	nsb->st_atimespec = sb->st_atimespec;
2112	nsb->st_mtimespec = sb->st_mtimespec;
2113	nsb->st_ctimespec = sb->st_ctimespec;
2114	nsb->st_size = sb->st_size;
2115	nsb->st_blocks = sb->st_blocks;
2116	nsb->st_blksize = sb->st_blksize;
2117	nsb->st_flags = sb->st_flags;
2118	nsb->st_gen = sb->st_gen;
2119	nsb->st_qspare[0] = sb->st_qspare[0];
2120	nsb->st_qspare[1] = sb->st_qspare[1];
2121}
2122
2123#ifndef _SYS_SYSPROTO_H_
2124struct nstat_args {
2125	char	*path;
2126	struct nstat *ub;
2127};
2128#endif
2129/* ARGSUSED */
2130int
2131nstat(td, uap)
2132	struct thread *td;
2133	register struct nstat_args /* {
2134		syscallarg(char *) path;
2135		syscallarg(struct nstat *) ub;
2136	} */ *uap;
2137{
2138	struct stat sb;
2139	struct nstat nsb;
2140	int error;
2141	struct nameidata nd;
2142
2143	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2144	    SCARG(uap, path), td);
2145	if ((error = namei(&nd)) != 0)
2146		return (error);
2147	NDFREE(&nd, NDF_ONLY_PNBUF);
2148	error = vn_stat(nd.ni_vp, &sb, td);
2149	vput(nd.ni_vp);
2150	if (error)
2151		return (error);
2152	cvtnstat(&sb, &nsb);
2153	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2154	return (error);
2155}
2156
2157/*
2158 * NetBSD lstat.  Get file status; this version does not follow links.
2159 */
2160#ifndef _SYS_SYSPROTO_H_
2161struct lstat_args {
2162	char	*path;
2163	struct stat *ub;
2164};
2165#endif
2166/* ARGSUSED */
2167int
2168nlstat(td, uap)
2169	struct thread *td;
2170	register struct nlstat_args /* {
2171		syscallarg(char *) path;
2172		syscallarg(struct nstat *) ub;
2173	} */ *uap;
2174{
2175	int error;
2176	struct vnode *vp;
2177	struct stat sb;
2178	struct nstat nsb;
2179	struct nameidata nd;
2180
2181	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2182	    SCARG(uap, path), td);
2183	if ((error = namei(&nd)) != 0)
2184		return (error);
2185	vp = nd.ni_vp;
2186	NDFREE(&nd, NDF_ONLY_PNBUF);
2187	error = vn_stat(vp, &sb, td);
2188	vput(vp);
2189	if (error)
2190		return (error);
2191	cvtnstat(&sb, &nsb);
2192	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2193	return (error);
2194}
2195
2196/*
2197 * Get configurable pathname variables.
2198 */
2199#ifndef _SYS_SYSPROTO_H_
2200struct pathconf_args {
2201	char	*path;
2202	int	name;
2203};
2204#endif
2205/* ARGSUSED */
2206int
2207pathconf(td, uap)
2208	struct thread *td;
2209	register struct pathconf_args /* {
2210		syscallarg(char *) path;
2211		syscallarg(int) name;
2212	} */ *uap;
2213{
2214	int error;
2215	struct nameidata nd;
2216
2217	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2218	    SCARG(uap, path), td);
2219	if ((error = namei(&nd)) != 0)
2220		return (error);
2221	NDFREE(&nd, NDF_ONLY_PNBUF);
2222	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
2223	vput(nd.ni_vp);
2224	return (error);
2225}
2226
2227/*
2228 * Return target name of a symbolic link.
2229 */
2230#ifndef _SYS_SYSPROTO_H_
2231struct readlink_args {
2232	char	*path;
2233	char	*buf;
2234	int	count;
2235};
2236#endif
2237/* ARGSUSED */
2238int
2239readlink(td, uap)
2240	struct thread *td;
2241	register struct readlink_args /* {
2242		syscallarg(char *) path;
2243		syscallarg(char *) buf;
2244		syscallarg(int) count;
2245	} */ *uap;
2246{
2247	register struct vnode *vp;
2248	struct iovec aiov;
2249	struct uio auio;
2250	int error;
2251	struct nameidata nd;
2252
2253	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2254	    SCARG(uap, path), td);
2255	if ((error = namei(&nd)) != 0)
2256		return (error);
2257	NDFREE(&nd, NDF_ONLY_PNBUF);
2258	vp = nd.ni_vp;
2259	if (vp->v_type != VLNK)
2260		error = EINVAL;
2261	else {
2262		aiov.iov_base = SCARG(uap, buf);
2263		aiov.iov_len = SCARG(uap, count);
2264		auio.uio_iov = &aiov;
2265		auio.uio_iovcnt = 1;
2266		auio.uio_offset = 0;
2267		auio.uio_rw = UIO_READ;
2268		auio.uio_segflg = UIO_USERSPACE;
2269		auio.uio_td = td;
2270		auio.uio_resid = SCARG(uap, count);
2271		error = VOP_READLINK(vp, &auio, td->td_ucred);
2272	}
2273	vput(vp);
2274	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2275	return (error);
2276}
2277
2278/*
2279 * Common implementation code for chflags() and fchflags().
2280 */
2281static int
2282setfflags(td, vp, flags)
2283	struct thread *td;
2284	struct vnode *vp;
2285	int flags;
2286{
2287	int error;
2288	struct mount *mp;
2289	struct vattr vattr;
2290
2291	/*
2292	 * Prevent non-root users from setting flags on devices.  When
2293	 * a device is reused, users can retain ownership of the device
2294	 * if they are allowed to set flags and programs assume that
2295	 * chown can't fail when done as root.
2296	 */
2297	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2298		error = suser_xxx(td->td_ucred, td->td_proc, PRISON_ROOT);
2299		if (error)
2300			return (error);
2301	}
2302
2303	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2304		return (error);
2305	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2306	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2307	VATTR_NULL(&vattr);
2308	vattr.va_flags = flags;
2309	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2310	VOP_UNLOCK(vp, 0, td);
2311	vn_finished_write(mp);
2312	return (error);
2313}
2314
2315/*
2316 * Change flags of a file given a path name.
2317 */
2318#ifndef _SYS_SYSPROTO_H_
2319struct chflags_args {
2320	char	*path;
2321	int	flags;
2322};
2323#endif
2324/* ARGSUSED */
2325int
2326chflags(td, uap)
2327	struct thread *td;
2328	register struct chflags_args /* {
2329		syscallarg(char *) path;
2330		syscallarg(int) flags;
2331	} */ *uap;
2332{
2333	int error;
2334	struct nameidata nd;
2335
2336	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2337	if ((error = namei(&nd)) != 0)
2338		return (error);
2339	NDFREE(&nd, NDF_ONLY_PNBUF);
2340	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2341	vrele(nd.ni_vp);
2342	return error;
2343}
2344
2345/*
2346 * Change flags of a file given a file descriptor.
2347 */
2348#ifndef _SYS_SYSPROTO_H_
2349struct fchflags_args {
2350	int	fd;
2351	int	flags;
2352};
2353#endif
2354/* ARGSUSED */
2355int
2356fchflags(td, uap)
2357	struct thread *td;
2358	register struct fchflags_args /* {
2359		syscallarg(int) fd;
2360		syscallarg(int) flags;
2361	} */ *uap;
2362{
2363	struct file *fp;
2364	int error;
2365
2366	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2367		return (error);
2368	error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2369	fdrop(fp, td);
2370	return (error);
2371}
2372
2373/*
2374 * Common implementation code for chmod(), lchmod() and fchmod().
2375 */
2376static int
2377setfmode(td, vp, mode)
2378	struct thread *td;
2379	struct vnode *vp;
2380	int mode;
2381{
2382	int error;
2383	struct mount *mp;
2384	struct vattr vattr;
2385
2386	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2387		return (error);
2388	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2389	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2390	VATTR_NULL(&vattr);
2391	vattr.va_mode = mode & ALLPERMS;
2392	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2393	VOP_UNLOCK(vp, 0, td);
2394	vn_finished_write(mp);
2395	return error;
2396}
2397
2398/*
2399 * Change mode of a file given path name.
2400 */
2401#ifndef _SYS_SYSPROTO_H_
2402struct chmod_args {
2403	char	*path;
2404	int	mode;
2405};
2406#endif
2407/* ARGSUSED */
2408int
2409chmod(td, uap)
2410	struct thread *td;
2411	register struct chmod_args /* {
2412		syscallarg(char *) path;
2413		syscallarg(int) mode;
2414	} */ *uap;
2415{
2416	int error;
2417	struct nameidata nd;
2418
2419	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2420	if ((error = namei(&nd)) != 0)
2421		return (error);
2422	NDFREE(&nd, NDF_ONLY_PNBUF);
2423	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2424	vrele(nd.ni_vp);
2425	return error;
2426}
2427
2428/*
2429 * Change mode of a file given path name (don't follow links.)
2430 */
2431#ifndef _SYS_SYSPROTO_H_
2432struct lchmod_args {
2433	char	*path;
2434	int	mode;
2435};
2436#endif
2437/* ARGSUSED */
2438int
2439lchmod(td, uap)
2440	struct thread *td;
2441	register struct lchmod_args /* {
2442		syscallarg(char *) path;
2443		syscallarg(int) mode;
2444	} */ *uap;
2445{
2446	int error;
2447	struct nameidata nd;
2448
2449	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2450	if ((error = namei(&nd)) != 0)
2451		return (error);
2452	NDFREE(&nd, NDF_ONLY_PNBUF);
2453	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2454	vrele(nd.ni_vp);
2455	return error;
2456}
2457
2458/*
2459 * Change mode of a file given a file descriptor.
2460 */
2461#ifndef _SYS_SYSPROTO_H_
2462struct fchmod_args {
2463	int	fd;
2464	int	mode;
2465};
2466#endif
2467/* ARGSUSED */
2468int
2469fchmod(td, uap)
2470	struct thread *td;
2471	register struct fchmod_args /* {
2472		syscallarg(int) fd;
2473		syscallarg(int) mode;
2474	} */ *uap;
2475{
2476	struct file *fp;
2477	struct vnode *vp;
2478	int error;
2479
2480	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2481		return (error);
2482	vp = (struct vnode *)fp->f_data;
2483	error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
2484	fdrop(fp, td);
2485	return (error);
2486}
2487
2488/*
2489 * Common implementation for chown(), lchown(), and fchown()
2490 */
2491static int
2492setfown(td, vp, uid, gid)
2493	struct thread *td;
2494	struct vnode *vp;
2495	uid_t uid;
2496	gid_t gid;
2497{
2498	int error;
2499	struct mount *mp;
2500	struct vattr vattr;
2501
2502	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2503		return (error);
2504	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2505	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2506	VATTR_NULL(&vattr);
2507	vattr.va_uid = uid;
2508	vattr.va_gid = gid;
2509	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2510	VOP_UNLOCK(vp, 0, td);
2511	vn_finished_write(mp);
2512	return error;
2513}
2514
2515/*
2516 * Set ownership given a path name.
2517 */
2518#ifndef _SYS_SYSPROTO_H_
2519struct chown_args {
2520	char	*path;
2521	int	uid;
2522	int	gid;
2523};
2524#endif
2525/* ARGSUSED */
2526int
2527chown(td, uap)
2528	struct thread *td;
2529	register struct chown_args /* {
2530		syscallarg(char *) path;
2531		syscallarg(int) uid;
2532		syscallarg(int) gid;
2533	} */ *uap;
2534{
2535	int error;
2536	struct nameidata nd;
2537
2538	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2539	if ((error = namei(&nd)) != 0)
2540		return (error);
2541	NDFREE(&nd, NDF_ONLY_PNBUF);
2542	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2543	vrele(nd.ni_vp);
2544	return (error);
2545}
2546
2547/*
2548 * Set ownership given a path name, do not cross symlinks.
2549 */
2550#ifndef _SYS_SYSPROTO_H_
2551struct lchown_args {
2552	char	*path;
2553	int	uid;
2554	int	gid;
2555};
2556#endif
2557/* ARGSUSED */
2558int
2559lchown(td, uap)
2560	struct thread *td;
2561	register struct lchown_args /* {
2562		syscallarg(char *) path;
2563		syscallarg(int) uid;
2564		syscallarg(int) gid;
2565	} */ *uap;
2566{
2567	int error;
2568	struct nameidata nd;
2569
2570	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2571	if ((error = namei(&nd)) != 0)
2572		return (error);
2573	NDFREE(&nd, NDF_ONLY_PNBUF);
2574	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2575	vrele(nd.ni_vp);
2576	return (error);
2577}
2578
2579/*
2580 * Set ownership given a file descriptor.
2581 */
2582#ifndef _SYS_SYSPROTO_H_
2583struct fchown_args {
2584	int	fd;
2585	int	uid;
2586	int	gid;
2587};
2588#endif
2589/* ARGSUSED */
2590int
2591fchown(td, uap)
2592	struct thread *td;
2593	register struct fchown_args /* {
2594		syscallarg(int) fd;
2595		syscallarg(int) uid;
2596		syscallarg(int) gid;
2597	} */ *uap;
2598{
2599	struct file *fp;
2600	struct vnode *vp;
2601	int error;
2602
2603	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2604		return (error);
2605	vp = (struct vnode *)fp->f_data;
2606	error = setfown(td, (struct vnode *)fp->f_data,
2607		SCARG(uap, uid), SCARG(uap, gid));
2608	fdrop(fp, td);
2609	return (error);
2610}
2611
2612/*
2613 * Common implementation code for utimes(), lutimes(), and futimes().
2614 */
2615static int
2616getutimes(usrtvp, tsp)
2617	const struct timeval *usrtvp;
2618	struct timespec *tsp;
2619{
2620	struct timeval tv[2];
2621	int error;
2622
2623	if (usrtvp == NULL) {
2624		microtime(&tv[0]);
2625		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2626		tsp[1] = tsp[0];
2627	} else {
2628		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2629			return (error);
2630		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2631		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2632	}
2633	return 0;
2634}
2635
2636/*
2637 * Common implementation code for utimes(), lutimes(), and futimes().
2638 */
2639static int
2640setutimes(td, vp, ts, nullflag)
2641	struct thread *td;
2642	struct vnode *vp;
2643	const struct timespec *ts;
2644	int nullflag;
2645{
2646	int error;
2647	struct mount *mp;
2648	struct vattr vattr;
2649
2650	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2651		return (error);
2652	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2653	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2654	VATTR_NULL(&vattr);
2655	vattr.va_atime = ts[0];
2656	vattr.va_mtime = ts[1];
2657	if (nullflag)
2658		vattr.va_vaflags |= VA_UTIMES_NULL;
2659	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2660	VOP_UNLOCK(vp, 0, td);
2661	vn_finished_write(mp);
2662	return error;
2663}
2664
2665/*
2666 * Set the access and modification times of a file.
2667 */
2668#ifndef _SYS_SYSPROTO_H_
2669struct utimes_args {
2670	char	*path;
2671	struct	timeval *tptr;
2672};
2673#endif
2674/* ARGSUSED */
2675int
2676utimes(td, uap)
2677	struct thread *td;
2678	register struct utimes_args /* {
2679		syscallarg(char *) path;
2680		syscallarg(struct timeval *) tptr;
2681	} */ *uap;
2682{
2683	struct timespec ts[2];
2684	struct timeval *usrtvp;
2685	int error;
2686	struct nameidata nd;
2687
2688	usrtvp = SCARG(uap, tptr);
2689	if ((error = getutimes(usrtvp, ts)) != 0)
2690		return (error);
2691	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2692	if ((error = namei(&nd)) != 0)
2693		return (error);
2694	NDFREE(&nd, NDF_ONLY_PNBUF);
2695	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
2696	vrele(nd.ni_vp);
2697	return (error);
2698}
2699
2700/*
2701 * Set the access and modification times of a file.
2702 */
2703#ifndef _SYS_SYSPROTO_H_
2704struct lutimes_args {
2705	char	*path;
2706	struct	timeval *tptr;
2707};
2708#endif
2709/* ARGSUSED */
2710int
2711lutimes(td, uap)
2712	struct thread *td;
2713	register struct lutimes_args /* {
2714		syscallarg(char *) path;
2715		syscallarg(struct timeval *) tptr;
2716	} */ *uap;
2717{
2718	struct timespec ts[2];
2719	struct timeval *usrtvp;
2720	int error;
2721	struct nameidata nd;
2722
2723	usrtvp = SCARG(uap, tptr);
2724	if ((error = getutimes(usrtvp, ts)) != 0)
2725		return (error);
2726	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2727	if ((error = namei(&nd)) != 0)
2728		return (error);
2729	NDFREE(&nd, NDF_ONLY_PNBUF);
2730	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
2731	vrele(nd.ni_vp);
2732	return (error);
2733}
2734
2735/*
2736 * Set the access and modification times of a file.
2737 */
2738#ifndef _SYS_SYSPROTO_H_
2739struct futimes_args {
2740	int	fd;
2741	struct	timeval *tptr;
2742};
2743#endif
2744/* ARGSUSED */
2745int
2746futimes(td, uap)
2747	struct thread *td;
2748	register struct futimes_args /* {
2749		syscallarg(int ) fd;
2750		syscallarg(struct timeval *) tptr;
2751	} */ *uap;
2752{
2753	struct timespec ts[2];
2754	struct file *fp;
2755	struct timeval *usrtvp;
2756	int error;
2757
2758	usrtvp = SCARG(uap, tptr);
2759	if ((error = getutimes(usrtvp, ts)) != 0)
2760		return (error);
2761	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2762		return (error);
2763	error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
2764	fdrop(fp, td);
2765	return (error);
2766}
2767
2768/*
2769 * Truncate a file given its path name.
2770 */
2771#ifndef _SYS_SYSPROTO_H_
2772struct truncate_args {
2773	char	*path;
2774	int	pad;
2775	off_t	length;
2776};
2777#endif
2778/* ARGSUSED */
2779int
2780truncate(td, uap)
2781	struct thread *td;
2782	register struct truncate_args /* {
2783		syscallarg(char *) path;
2784		syscallarg(int) pad;
2785		syscallarg(off_t) length;
2786	} */ *uap;
2787{
2788	struct mount *mp;
2789	struct vnode *vp;
2790	struct vattr vattr;
2791	int error;
2792	struct nameidata nd;
2793
2794	if (uap->length < 0)
2795		return(EINVAL);
2796	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2797	if ((error = namei(&nd)) != 0)
2798		return (error);
2799	vp = nd.ni_vp;
2800	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2801		vrele(vp);
2802		return (error);
2803	}
2804	NDFREE(&nd, NDF_ONLY_PNBUF);
2805	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2806	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2807	if (vp->v_type == VDIR)
2808		error = EISDIR;
2809	else if ((error = vn_writechk(vp)) == 0 &&
2810	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2811		VATTR_NULL(&vattr);
2812		vattr.va_size = SCARG(uap, length);
2813		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2814	}
2815	vput(vp);
2816	vn_finished_write(mp);
2817	return (error);
2818}
2819
2820/*
2821 * Truncate a file given a file descriptor.
2822 */
2823#ifndef _SYS_SYSPROTO_H_
2824struct ftruncate_args {
2825	int	fd;
2826	int	pad;
2827	off_t	length;
2828};
2829#endif
2830/* ARGSUSED */
2831int
2832ftruncate(td, uap)
2833	struct thread *td;
2834	register struct ftruncate_args /* {
2835		syscallarg(int) fd;
2836		syscallarg(int) pad;
2837		syscallarg(off_t) length;
2838	} */ *uap;
2839{
2840	struct mount *mp;
2841	struct vattr vattr;
2842	struct vnode *vp;
2843	struct file *fp;
2844	int error;
2845
2846	if (uap->length < 0)
2847		return(EINVAL);
2848	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2849		return (error);
2850	if ((fp->f_flag & FWRITE) == 0) {
2851		fdrop(fp, td);
2852		return (EINVAL);
2853	}
2854	vp = (struct vnode *)fp->f_data;
2855	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2856		fdrop(fp, td);
2857		return (error);
2858	}
2859	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2860	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2861	if (vp->v_type == VDIR)
2862		error = EISDIR;
2863	else if ((error = vn_writechk(vp)) == 0) {
2864		VATTR_NULL(&vattr);
2865		vattr.va_size = SCARG(uap, length);
2866		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2867	}
2868	VOP_UNLOCK(vp, 0, td);
2869	vn_finished_write(mp);
2870	fdrop(fp, td);
2871	return (error);
2872}
2873
2874#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2875/*
2876 * Truncate a file given its path name.
2877 */
2878#ifndef _SYS_SYSPROTO_H_
2879struct otruncate_args {
2880	char	*path;
2881	long	length;
2882};
2883#endif
2884/* ARGSUSED */
2885int
2886otruncate(td, uap)
2887	struct thread *td;
2888	register struct otruncate_args /* {
2889		syscallarg(char *) path;
2890		syscallarg(long) length;
2891	} */ *uap;
2892{
2893	struct truncate_args /* {
2894		syscallarg(char *) path;
2895		syscallarg(int) pad;
2896		syscallarg(off_t) length;
2897	} */ nuap;
2898
2899	SCARG(&nuap, path) = SCARG(uap, path);
2900	SCARG(&nuap, length) = SCARG(uap, length);
2901	return (truncate(td, &nuap));
2902}
2903
2904/*
2905 * Truncate a file given a file descriptor.
2906 */
2907#ifndef _SYS_SYSPROTO_H_
2908struct oftruncate_args {
2909	int	fd;
2910	long	length;
2911};
2912#endif
2913/* ARGSUSED */
2914int
2915oftruncate(td, uap)
2916	struct thread *td;
2917	register struct oftruncate_args /* {
2918		syscallarg(int) fd;
2919		syscallarg(long) length;
2920	} */ *uap;
2921{
2922	struct ftruncate_args /* {
2923		syscallarg(int) fd;
2924		syscallarg(int) pad;
2925		syscallarg(off_t) length;
2926	} */ nuap;
2927
2928	SCARG(&nuap, fd) = SCARG(uap, fd);
2929	SCARG(&nuap, length) = SCARG(uap, length);
2930	return (ftruncate(td, &nuap));
2931}
2932#endif /* COMPAT_43 || COMPAT_SUNOS */
2933
2934/*
2935 * Sync an open file.
2936 */
2937#ifndef _SYS_SYSPROTO_H_
2938struct fsync_args {
2939	int	fd;
2940};
2941#endif
2942/* ARGSUSED */
2943int
2944fsync(td, uap)
2945	struct thread *td;
2946	struct fsync_args /* {
2947		syscallarg(int) fd;
2948	} */ *uap;
2949{
2950	struct vnode *vp;
2951	struct mount *mp;
2952	struct file *fp;
2953	vm_object_t obj;
2954	int error;
2955
2956	GIANT_REQUIRED;
2957
2958	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2959		return (error);
2960	vp = (struct vnode *)fp->f_data;
2961	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2962		fdrop(fp, td);
2963		return (error);
2964	}
2965	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2966	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2967		vm_object_page_clean(obj, 0, 0, 0);
2968	}
2969	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2970#ifdef SOFTUPDATES
2971	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
2972	    error = softdep_fsync(vp);
2973#endif
2974
2975	VOP_UNLOCK(vp, 0, td);
2976	vn_finished_write(mp);
2977	fdrop(fp, td);
2978	return (error);
2979}
2980
2981/*
2982 * Rename files.  Source and destination must either both be directories,
2983 * or both not be directories.  If target is a directory, it must be empty.
2984 */
2985#ifndef _SYS_SYSPROTO_H_
2986struct rename_args {
2987	char	*from;
2988	char	*to;
2989};
2990#endif
2991/* ARGSUSED */
2992int
2993rename(td, uap)
2994	struct thread *td;
2995	register struct rename_args /* {
2996		syscallarg(char *) from;
2997		syscallarg(char *) to;
2998	} */ *uap;
2999{
3000	struct mount *mp;
3001	struct vnode *tvp, *fvp, *tdvp;
3002	struct nameidata fromnd, tond;
3003	int error;
3004
3005	bwillwrite();
3006	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3007	    SCARG(uap, from), td);
3008	if ((error = namei(&fromnd)) != 0)
3009		return (error);
3010	fvp = fromnd.ni_vp;
3011	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
3012		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3013		vrele(fromnd.ni_dvp);
3014		vrele(fvp);
3015		goto out1;
3016	}
3017	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
3018	    UIO_USERSPACE, SCARG(uap, to), td);
3019	if (fromnd.ni_vp->v_type == VDIR)
3020		tond.ni_cnd.cn_flags |= WILLBEDIR;
3021	if ((error = namei(&tond)) != 0) {
3022		/* Translate error code for rename("dir1", "dir2/."). */
3023		if (error == EISDIR && fvp->v_type == VDIR)
3024			error = EINVAL;
3025		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3026		vrele(fromnd.ni_dvp);
3027		vrele(fvp);
3028		goto out1;
3029	}
3030	tdvp = tond.ni_dvp;
3031	tvp = tond.ni_vp;
3032	if (tvp != NULL) {
3033		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3034			error = ENOTDIR;
3035			goto out;
3036		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3037			error = EISDIR;
3038			goto out;
3039		}
3040	}
3041	if (fvp == tdvp)
3042		error = EINVAL;
3043	/*
3044	 * If source is the same as the destination (that is the
3045	 * same inode number with the same name in the same directory),
3046	 * then there is nothing to do.
3047	 */
3048	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
3049	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3050	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
3051	      fromnd.ni_cnd.cn_namelen))
3052		error = -1;
3053out:
3054	if (!error) {
3055		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3056		if (fromnd.ni_dvp != tdvp) {
3057			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3058		}
3059		if (tvp) {
3060			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3061		}
3062		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3063				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3064		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3065		NDFREE(&tond, NDF_ONLY_PNBUF);
3066	} else {
3067		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3068		NDFREE(&tond, NDF_ONLY_PNBUF);
3069		if (tdvp == tvp)
3070			vrele(tdvp);
3071		else
3072			vput(tdvp);
3073		if (tvp)
3074			vput(tvp);
3075		vrele(fromnd.ni_dvp);
3076		vrele(fvp);
3077	}
3078	vrele(tond.ni_startdir);
3079	vn_finished_write(mp);
3080	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3081	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3082	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3083	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3084out1:
3085	if (fromnd.ni_startdir)
3086		vrele(fromnd.ni_startdir);
3087	if (error == -1)
3088		return (0);
3089	return (error);
3090}
3091
3092/*
3093 * Make a directory file.
3094 */
3095#ifndef _SYS_SYSPROTO_H_
3096struct mkdir_args {
3097	char	*path;
3098	int	mode;
3099};
3100#endif
3101/* ARGSUSED */
3102int
3103mkdir(td, uap)
3104	struct thread *td;
3105	register struct mkdir_args /* {
3106		syscallarg(char *) path;
3107		syscallarg(int) mode;
3108	} */ *uap;
3109{
3110
3111	return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
3112}
3113
3114int
3115vn_mkdir(path, mode, segflg, td)
3116	char *path;
3117	int mode;
3118	enum uio_seg segflg;
3119	struct thread *td;
3120{
3121	struct mount *mp;
3122	struct vnode *vp;
3123	struct vattr vattr;
3124	int error;
3125	struct nameidata nd;
3126
3127restart:
3128	bwillwrite();
3129	NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td);
3130	nd.ni_cnd.cn_flags |= WILLBEDIR;
3131	if ((error = namei(&nd)) != 0)
3132		return (error);
3133	vp = nd.ni_vp;
3134	if (vp != NULL) {
3135		NDFREE(&nd, NDF_ONLY_PNBUF);
3136		vrele(vp);
3137		vput(nd.ni_dvp);
3138		return (EEXIST);
3139	}
3140	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3141		NDFREE(&nd, NDF_ONLY_PNBUF);
3142		vput(nd.ni_dvp);
3143		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3144			return (error);
3145		goto restart;
3146	}
3147	VATTR_NULL(&vattr);
3148	vattr.va_type = VDIR;
3149	FILEDESC_LOCK(td->td_proc->p_fd);
3150	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3151	FILEDESC_UNLOCK(td->td_proc->p_fd);
3152	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3153	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3154	NDFREE(&nd, NDF_ONLY_PNBUF);
3155	vput(nd.ni_dvp);
3156	if (!error)
3157		vput(nd.ni_vp);
3158	vn_finished_write(mp);
3159	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3160	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3161	return (error);
3162}
3163
3164/*
3165 * Remove a directory file.
3166 */
3167#ifndef _SYS_SYSPROTO_H_
3168struct rmdir_args {
3169	char	*path;
3170};
3171#endif
3172/* ARGSUSED */
3173int
3174rmdir(td, uap)
3175	struct thread *td;
3176	struct rmdir_args /* {
3177		syscallarg(char *) path;
3178	} */ *uap;
3179{
3180	struct mount *mp;
3181	struct vnode *vp;
3182	int error;
3183	struct nameidata nd;
3184
3185restart:
3186	bwillwrite();
3187	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3188	    SCARG(uap, path), td);
3189	if ((error = namei(&nd)) != 0)
3190		return (error);
3191	vp = nd.ni_vp;
3192	if (vp->v_type != VDIR) {
3193		error = ENOTDIR;
3194		goto out;
3195	}
3196	/*
3197	 * No rmdir "." please.
3198	 */
3199	if (nd.ni_dvp == vp) {
3200		error = EINVAL;
3201		goto out;
3202	}
3203	/*
3204	 * The root of a mounted filesystem cannot be deleted.
3205	 */
3206	if (vp->v_flag & VROOT) {
3207		error = EBUSY;
3208		goto out;
3209	}
3210	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3211		NDFREE(&nd, NDF_ONLY_PNBUF);
3212		if (nd.ni_dvp == vp)
3213			vrele(nd.ni_dvp);
3214		else
3215			vput(nd.ni_dvp);
3216		vput(vp);
3217		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3218			return (error);
3219		goto restart;
3220	}
3221	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3222	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3223	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3224	vn_finished_write(mp);
3225out:
3226	NDFREE(&nd, NDF_ONLY_PNBUF);
3227	if (nd.ni_dvp == vp)
3228		vrele(nd.ni_dvp);
3229	else
3230		vput(nd.ni_dvp);
3231	vput(vp);
3232	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3233	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3234	return (error);
3235}
3236
3237#ifdef COMPAT_43
3238/*
3239 * Read a block of directory entries in a file system independent format.
3240 */
3241#ifndef _SYS_SYSPROTO_H_
3242struct ogetdirentries_args {
3243	int	fd;
3244	char	*buf;
3245	u_int	count;
3246	long	*basep;
3247};
3248#endif
3249int
3250ogetdirentries(td, uap)
3251	struct thread *td;
3252	register struct ogetdirentries_args /* {
3253		syscallarg(int) fd;
3254		syscallarg(char *) buf;
3255		syscallarg(u_int) count;
3256		syscallarg(long *) basep;
3257	} */ *uap;
3258{
3259	struct vnode *vp;
3260	struct file *fp;
3261	struct uio auio, kuio;
3262	struct iovec aiov, kiov;
3263	struct dirent *dp, *edp;
3264	caddr_t dirbuf;
3265	int error, eofflag, readcnt;
3266	long loff;
3267
3268	/* XXX arbitrary sanity limit on `count'. */
3269	if (SCARG(uap, count) > 64 * 1024)
3270		return (EINVAL);
3271	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3272		return (error);
3273	if ((fp->f_flag & FREAD) == 0) {
3274		fdrop(fp, td);
3275		return (EBADF);
3276	}
3277	vp = (struct vnode *)fp->f_data;
3278unionread:
3279	if (vp->v_type != VDIR) {
3280		fdrop(fp, td);
3281		return (EINVAL);
3282	}
3283	aiov.iov_base = SCARG(uap, buf);
3284	aiov.iov_len = SCARG(uap, count);
3285	auio.uio_iov = &aiov;
3286	auio.uio_iovcnt = 1;
3287	auio.uio_rw = UIO_READ;
3288	auio.uio_segflg = UIO_USERSPACE;
3289	auio.uio_td = td;
3290	auio.uio_resid = SCARG(uap, count);
3291	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3292	loff = auio.uio_offset = fp->f_offset;
3293#	if (BYTE_ORDER != LITTLE_ENDIAN)
3294		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3295			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3296			    NULL, NULL);
3297			fp->f_offset = auio.uio_offset;
3298		} else
3299#	endif
3300	{
3301		kuio = auio;
3302		kuio.uio_iov = &kiov;
3303		kuio.uio_segflg = UIO_SYSSPACE;
3304		kiov.iov_len = SCARG(uap, count);
3305		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3306		kiov.iov_base = dirbuf;
3307		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3308			    NULL, NULL);
3309		fp->f_offset = kuio.uio_offset;
3310		if (error == 0) {
3311			readcnt = SCARG(uap, count) - kuio.uio_resid;
3312			edp = (struct dirent *)&dirbuf[readcnt];
3313			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3314#				if (BYTE_ORDER == LITTLE_ENDIAN)
3315					/*
3316					 * The expected low byte of
3317					 * dp->d_namlen is our dp->d_type.
3318					 * The high MBZ byte of dp->d_namlen
3319					 * is our dp->d_namlen.
3320					 */
3321					dp->d_type = dp->d_namlen;
3322					dp->d_namlen = 0;
3323#				else
3324					/*
3325					 * The dp->d_type is the high byte
3326					 * of the expected dp->d_namlen,
3327					 * so must be zero'ed.
3328					 */
3329					dp->d_type = 0;
3330#				endif
3331				if (dp->d_reclen > 0) {
3332					dp = (struct dirent *)
3333					    ((char *)dp + dp->d_reclen);
3334				} else {
3335					error = EIO;
3336					break;
3337				}
3338			}
3339			if (dp >= edp)
3340				error = uiomove(dirbuf, readcnt, &auio);
3341		}
3342		FREE(dirbuf, M_TEMP);
3343	}
3344	VOP_UNLOCK(vp, 0, td);
3345	if (error) {
3346		fdrop(fp, td);
3347		return (error);
3348	}
3349	if (SCARG(uap, count) == auio.uio_resid) {
3350		if (union_dircheckp) {
3351			error = union_dircheckp(td, &vp, fp);
3352			if (error == -1)
3353				goto unionread;
3354			if (error) {
3355				fdrop(fp, td);
3356				return (error);
3357			}
3358		}
3359		if ((vp->v_flag & VROOT) &&
3360		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3361			struct vnode *tvp = vp;
3362			vp = vp->v_mount->mnt_vnodecovered;
3363			VREF(vp);
3364			fp->f_data = (caddr_t) vp;
3365			fp->f_offset = 0;
3366			vrele(tvp);
3367			goto unionread;
3368		}
3369	}
3370	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3371	    sizeof(long));
3372	fdrop(fp, td);
3373	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3374	return (error);
3375}
3376#endif /* COMPAT_43 */
3377
3378/*
3379 * Read a block of directory entries in a file system independent format.
3380 */
3381#ifndef _SYS_SYSPROTO_H_
3382struct getdirentries_args {
3383	int	fd;
3384	char	*buf;
3385	u_int	count;
3386	long	*basep;
3387};
3388#endif
3389int
3390getdirentries(td, uap)
3391	struct thread *td;
3392	register struct getdirentries_args /* {
3393		syscallarg(int) fd;
3394		syscallarg(char *) buf;
3395		syscallarg(u_int) count;
3396		syscallarg(long *) basep;
3397	} */ *uap;
3398{
3399	struct vnode *vp;
3400	struct file *fp;
3401	struct uio auio;
3402	struct iovec aiov;
3403	long loff;
3404	int error, eofflag;
3405
3406	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3407		return (error);
3408	if ((fp->f_flag & FREAD) == 0) {
3409		fdrop(fp, td);
3410		return (EBADF);
3411	}
3412	vp = (struct vnode *)fp->f_data;
3413unionread:
3414	if (vp->v_type != VDIR) {
3415		fdrop(fp, td);
3416		return (EINVAL);
3417	}
3418	aiov.iov_base = SCARG(uap, buf);
3419	aiov.iov_len = SCARG(uap, count);
3420	auio.uio_iov = &aiov;
3421	auio.uio_iovcnt = 1;
3422	auio.uio_rw = UIO_READ;
3423	auio.uio_segflg = UIO_USERSPACE;
3424	auio.uio_td = td;
3425	auio.uio_resid = SCARG(uap, count);
3426	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3427	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3428	loff = auio.uio_offset = fp->f_offset;
3429	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3430	fp->f_offset = auio.uio_offset;
3431	VOP_UNLOCK(vp, 0, td);
3432	if (error) {
3433		fdrop(fp, td);
3434		return (error);
3435	}
3436	if (SCARG(uap, count) == auio.uio_resid) {
3437		if (union_dircheckp) {
3438			error = union_dircheckp(td, &vp, fp);
3439			if (error == -1)
3440				goto unionread;
3441			if (error) {
3442				fdrop(fp, td);
3443				return (error);
3444			}
3445		}
3446		if ((vp->v_flag & VROOT) &&
3447		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3448			struct vnode *tvp = vp;
3449			vp = vp->v_mount->mnt_vnodecovered;
3450			VREF(vp);
3451			fp->f_data = (caddr_t) vp;
3452			fp->f_offset = 0;
3453			vrele(tvp);
3454			goto unionread;
3455		}
3456	}
3457	if (SCARG(uap, basep) != NULL) {
3458		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3459		    sizeof(long));
3460	}
3461	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3462	fdrop(fp, td);
3463	return (error);
3464}
3465#ifndef _SYS_SYSPROTO_H_
3466struct getdents_args {
3467	int fd;
3468	char *buf;
3469	size_t count;
3470};
3471#endif
3472int
3473getdents(td, uap)
3474	struct thread *td;
3475	register struct getdents_args /* {
3476		syscallarg(int) fd;
3477		syscallarg(char *) buf;
3478		syscallarg(u_int) count;
3479	} */ *uap;
3480{
3481	struct getdirentries_args ap;
3482	ap.fd = uap->fd;
3483	ap.buf = uap->buf;
3484	ap.count = uap->count;
3485	ap.basep = NULL;
3486	return getdirentries(td, &ap);
3487}
3488
3489/*
3490 * Set the mode mask for creation of filesystem nodes.
3491 *
3492 * MP SAFE
3493 */
3494#ifndef _SYS_SYSPROTO_H_
3495struct umask_args {
3496	int	newmask;
3497};
3498#endif
3499int
3500umask(td, uap)
3501	struct thread *td;
3502	struct umask_args /* {
3503		syscallarg(int) newmask;
3504	} */ *uap;
3505{
3506	register struct filedesc *fdp;
3507
3508	FILEDESC_LOCK(td->td_proc->p_fd);
3509	fdp = td->td_proc->p_fd;
3510	td->td_retval[0] = fdp->fd_cmask;
3511	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3512	FILEDESC_UNLOCK(td->td_proc->p_fd);
3513	return (0);
3514}
3515
3516/*
3517 * Void all references to file by ripping underlying filesystem
3518 * away from vnode.
3519 */
3520#ifndef _SYS_SYSPROTO_H_
3521struct revoke_args {
3522	char	*path;
3523};
3524#endif
3525/* ARGSUSED */
3526int
3527revoke(td, uap)
3528	struct thread *td;
3529	register struct revoke_args /* {
3530		syscallarg(char *) path;
3531	} */ *uap;
3532{
3533	struct mount *mp;
3534	struct vnode *vp;
3535	struct vattr vattr;
3536	int error;
3537	struct nameidata nd;
3538
3539	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
3540	    td);
3541	if ((error = namei(&nd)) != 0)
3542		return (error);
3543	vp = nd.ni_vp;
3544	NDFREE(&nd, NDF_ONLY_PNBUF);
3545	if (vp->v_type != VCHR) {
3546		vput(vp);
3547		return (EINVAL);
3548	}
3549	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3550	if (error) {
3551		vput(vp);
3552		return (error);
3553	}
3554	VOP_UNLOCK(vp, 0, td);
3555	if (td->td_ucred->cr_uid != vattr.va_uid) {
3556		error = suser_xxx(0, td->td_proc, PRISON_ROOT);
3557		if (error)
3558			goto out;
3559	}
3560	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3561		goto out;
3562	if (vcount(vp) > 1)
3563		VOP_REVOKE(vp, REVOKEALL);
3564	vn_finished_write(mp);
3565out:
3566	vrele(vp);
3567	return (error);
3568}
3569
3570/*
3571 * Convert a user file descriptor to a kernel file entry.
3572 * The file entry is locked upon returning.
3573 */
3574int
3575getvnode(fdp, fd, fpp)
3576	struct filedesc *fdp;
3577	int fd;
3578	struct file **fpp;
3579{
3580	int error;
3581	struct file *fp;
3582
3583	fp = NULL;
3584	if (fdp == NULL)
3585		error = EBADF;
3586	else {
3587		FILEDESC_LOCK(fdp);
3588		if ((u_int)fd >= fdp->fd_nfiles ||
3589		    (fp = fdp->fd_ofiles[fd]) == NULL)
3590			error = EBADF;
3591		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3592			fp = NULL;
3593			error = EINVAL;
3594		} else {
3595			fhold(fp);
3596			error = 0;
3597		}
3598		FILEDESC_UNLOCK(fdp);
3599	}
3600	*fpp = fp;
3601	return (error);
3602}
3603/*
3604 * Get (NFS) file handle
3605 */
3606#ifndef _SYS_SYSPROTO_H_
3607struct getfh_args {
3608	char	*fname;
3609	fhandle_t *fhp;
3610};
3611#endif
3612int
3613getfh(td, uap)
3614	struct thread *td;
3615	register struct getfh_args *uap;
3616{
3617	struct nameidata nd;
3618	fhandle_t fh;
3619	register struct vnode *vp;
3620	int error;
3621
3622	/*
3623	 * Must be super user
3624	 */
3625	error = suser_td(td);
3626	if (error)
3627		return (error);
3628	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3629	error = namei(&nd);
3630	if (error)
3631		return (error);
3632	NDFREE(&nd, NDF_ONLY_PNBUF);
3633	vp = nd.ni_vp;
3634	bzero(&fh, sizeof(fh));
3635	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3636	error = VFS_VPTOFH(vp, &fh.fh_fid);
3637	vput(vp);
3638	if (error)
3639		return (error);
3640	error = copyout(&fh, uap->fhp, sizeof (fh));
3641	return (error);
3642}
3643
3644/*
3645 * syscall for the rpc.lockd to use to translate a NFS file handle into
3646 * an open descriptor.
3647 *
3648 * warning: do not remove the suser() call or this becomes one giant
3649 * security hole.
3650 */
3651#ifndef _SYS_SYSPROTO_H_
3652struct fhopen_args {
3653	const struct fhandle *u_fhp;
3654	int flags;
3655};
3656#endif
3657int
3658fhopen(td, uap)
3659	struct thread *td;
3660	struct fhopen_args /* {
3661		syscallarg(const struct fhandle *) u_fhp;
3662		syscallarg(int) flags;
3663	} */ *uap;
3664{
3665	struct proc *p = td->td_proc;
3666	struct mount *mp;
3667	struct vnode *vp;
3668	struct fhandle fhp;
3669	struct vattr vat;
3670	struct vattr *vap = &vat;
3671	struct flock lf;
3672	struct file *fp;
3673	register struct filedesc *fdp = p->p_fd;
3674	int fmode, mode, error, type;
3675	struct file *nfp;
3676	int indx;
3677
3678	/*
3679	 * Must be super user
3680	 */
3681	error = suser_td(td);
3682	if (error)
3683		return (error);
3684
3685	fmode = FFLAGS(SCARG(uap, flags));
3686	/* why not allow a non-read/write open for our lockd? */
3687	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3688		return (EINVAL);
3689	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3690	if (error)
3691		return(error);
3692	/* find the mount point */
3693	mp = vfs_getvfs(&fhp.fh_fsid);
3694	if (mp == NULL)
3695		return (ESTALE);
3696	/* now give me my vnode, it gets returned to me locked */
3697	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3698	if (error)
3699		return (error);
3700 	/*
3701	 * from now on we have to make sure not
3702	 * to forget about the vnode
3703	 * any error that causes an abort must vput(vp)
3704	 * just set error = err and 'goto bad;'.
3705	 */
3706
3707	/*
3708	 * from vn_open
3709	 */
3710	if (vp->v_type == VLNK) {
3711		error = EMLINK;
3712		goto bad;
3713	}
3714	if (vp->v_type == VSOCK) {
3715		error = EOPNOTSUPP;
3716		goto bad;
3717	}
3718	mode = 0;
3719	if (fmode & (FWRITE | O_TRUNC)) {
3720		if (vp->v_type == VDIR) {
3721			error = EISDIR;
3722			goto bad;
3723		}
3724		error = vn_writechk(vp);
3725		if (error)
3726			goto bad;
3727		mode |= VWRITE;
3728	}
3729	if (fmode & FREAD)
3730		mode |= VREAD;
3731	if (mode) {
3732		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3733		if (error)
3734			goto bad;
3735	}
3736	if (fmode & O_TRUNC) {
3737		VOP_UNLOCK(vp, 0, td);				/* XXX */
3738		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3739			vrele(vp);
3740			return (error);
3741		}
3742		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3743		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3744		VATTR_NULL(vap);
3745		vap->va_size = 0;
3746		error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3747		vn_finished_write(mp);
3748		if (error)
3749			goto bad;
3750	}
3751	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3752	if (error)
3753		goto bad;
3754	/*
3755	 * Make sure that a VM object is created for VMIO support.
3756	 */
3757	if (vn_canvmio(vp) == TRUE) {
3758		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3759			goto bad;
3760	}
3761	if (fmode & FWRITE)
3762		vp->v_writecount++;
3763
3764	/*
3765	 * end of vn_open code
3766	 */
3767
3768	if ((error = falloc(td, &nfp, &indx)) != 0) {
3769		if (fmode & FWRITE)
3770			vp->v_writecount--;
3771		goto bad;
3772	}
3773	fp = nfp;
3774
3775	/*
3776	 * Hold an extra reference to avoid having fp ripped out
3777	 * from under us while we block in the lock op
3778	 */
3779	fhold(fp);
3780	nfp->f_data = (caddr_t)vp;
3781	nfp->f_flag = fmode & FMASK;
3782	nfp->f_ops = &vnops;
3783	nfp->f_type = DTYPE_VNODE;
3784	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3785		lf.l_whence = SEEK_SET;
3786		lf.l_start = 0;
3787		lf.l_len = 0;
3788		if (fmode & O_EXLOCK)
3789			lf.l_type = F_WRLCK;
3790		else
3791			lf.l_type = F_RDLCK;
3792		type = F_FLOCK;
3793		if ((fmode & FNONBLOCK) == 0)
3794			type |= F_WAIT;
3795		VOP_UNLOCK(vp, 0, td);
3796		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
3797			/*
3798			 * The lock request failed.  Normally close the
3799			 * descriptor but handle the case where someone might
3800			 * have dup()d or close()d it when we weren't looking.
3801			 */
3802			FILEDESC_LOCK(fdp);
3803			if (fdp->fd_ofiles[indx] == fp) {
3804				fdp->fd_ofiles[indx] = NULL;
3805				FILEDESC_UNLOCK(fdp);
3806				fdrop(fp, td);
3807			} else
3808				FILEDESC_UNLOCK(fdp);
3809			/*
3810			 * release our private reference
3811			 */
3812			fdrop(fp, td);
3813			return(error);
3814		}
3815		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3816		fp->f_flag |= FHASLOCK;
3817	}
3818	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3819		vfs_object_create(vp, td, td->td_ucred);
3820
3821	VOP_UNLOCK(vp, 0, td);
3822	fdrop(fp, td);
3823	td->td_retval[0] = indx;
3824	return (0);
3825
3826bad:
3827	vput(vp);
3828	return (error);
3829}
3830
3831/*
3832 * Stat an (NFS) file handle.
3833 */
3834#ifndef _SYS_SYSPROTO_H_
3835struct fhstat_args {
3836	struct fhandle *u_fhp;
3837	struct stat *sb;
3838};
3839#endif
3840int
3841fhstat(td, uap)
3842	struct thread *td;
3843	register struct fhstat_args /* {
3844		syscallarg(struct fhandle *) u_fhp;
3845		syscallarg(struct stat *) sb;
3846	} */ *uap;
3847{
3848	struct stat sb;
3849	fhandle_t fh;
3850	struct mount *mp;
3851	struct vnode *vp;
3852	int error;
3853
3854	/*
3855	 * Must be super user
3856	 */
3857	error = suser_td(td);
3858	if (error)
3859		return (error);
3860
3861	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3862	if (error)
3863		return (error);
3864
3865	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3866		return (ESTALE);
3867	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3868		return (error);
3869	error = vn_stat(vp, &sb, td);
3870	vput(vp);
3871	if (error)
3872		return (error);
3873	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3874	return (error);
3875}
3876
3877/*
3878 * Implement fstatfs() for (NFS) file handles.
3879 */
3880#ifndef _SYS_SYSPROTO_H_
3881struct fhstatfs_args {
3882	struct fhandle *u_fhp;
3883	struct statfs *buf;
3884};
3885#endif
3886int
3887fhstatfs(td, uap)
3888	struct thread *td;
3889	struct fhstatfs_args /* {
3890		syscallarg(struct fhandle) *u_fhp;
3891		syscallarg(struct statfs) *buf;
3892	} */ *uap;
3893{
3894	struct statfs *sp;
3895	struct mount *mp;
3896	struct vnode *vp;
3897	struct statfs sb;
3898	fhandle_t fh;
3899	int error;
3900
3901	/*
3902	 * Must be super user
3903	 */
3904	error = suser_td(td);
3905	if (error)
3906		return (error);
3907
3908	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3909		return (error);
3910
3911	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3912		return (ESTALE);
3913	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3914		return (error);
3915	mp = vp->v_mount;
3916	sp = &mp->mnt_stat;
3917	vput(vp);
3918	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3919		return (error);
3920	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3921	if (suser_xxx(td->td_ucred, 0, 0)) {
3922		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
3923		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3924		sp = &sb;
3925	}
3926	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3927}
3928
3929/*
3930 * Syscall to push extended attribute configuration information into the
3931 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3932 * a command (int cmd), and attribute name and misc data.  For now, the
3933 * attribute name is left in userspace for consumption by the VFS_op.
3934 * It will probably be changed to be copied into sysspace by the
3935 * syscall in the future, once issues with various consumers of the
3936 * attribute code have raised their hands.
3937 *
3938 * Currently this is used only by UFS Extended Attributes.
3939 */
3940int
3941extattrctl(td, uap)
3942	struct thread *td;
3943	struct extattrctl_args *uap;
3944{
3945	struct vnode *filename_vp;
3946	struct nameidata nd;
3947	struct mount *mp, *mp_writable;
3948	char attrname[EXTATTR_MAXNAMELEN];
3949	int error;
3950
3951	/*
3952	 * SCARG(uap, attrname) not always defined.  We check again later
3953	 * when we invoke the VFS call so as to pass in NULL there if needed.
3954	 */
3955	if (SCARG(uap, attrname) != NULL) {
3956		error = copyinstr(SCARG(uap, attrname), attrname,
3957		    EXTATTR_MAXNAMELEN, NULL);
3958		if (error)
3959			return (error);
3960	}
3961
3962	/*
3963	 * SCARG(uap, filename) not always defined.  If it is, grab
3964	 * a vnode lock, which VFS_EXTATTRCTL() will later release.
3965	 */
3966	filename_vp = NULL;
3967	if (SCARG(uap, filename) != NULL) {
3968		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3969		    SCARG(uap, filename), td);
3970		if ((error = namei(&nd)) != 0)
3971			return (error);
3972		filename_vp = nd.ni_vp;
3973		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3974	}
3975
3976	/* SCARG(uap, path) always defined. */
3977	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3978	if ((error = namei(&nd)) != 0) {
3979		if (filename_vp != NULL)
3980			vput(filename_vp);
3981		return (error);
3982	}
3983	mp = nd.ni_vp->v_mount;
3984	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3985	NDFREE(&nd, 0);
3986	if (error) {
3987		if (filename_vp != NULL)
3988			vput(filename_vp);
3989		return (error);
3990	}
3991
3992	if (SCARG(uap, attrname) != NULL) {
3993		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3994		    SCARG(uap, attrnamespace), attrname, td);
3995	} else {
3996		error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), filename_vp,
3997		    SCARG(uap, attrnamespace), NULL, td);
3998	}
3999
4000	vn_finished_write(mp_writable);
4001	/*
4002	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4003	 * filename_vp, so vrele it if it is defined.
4004	 */
4005	if (filename_vp != NULL)
4006		vrele(filename_vp);
4007
4008	return (error);
4009}
4010
4011/*-
4012 * Set a named extended attribute on a file or directory
4013 *
4014 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4015 *            kernelspace string pointer "attrname", userspace buffer
4016 *            pointer "data", buffer length "nbytes", thread "td".
4017 * Returns: 0 on success, an error number otherwise
4018 * Locks: none
4019 * References: vp must be a valid reference for the duration of the call
4020 */
4021static int
4022extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4023    void *data, size_t nbytes, struct thread *td)
4024{
4025	struct mount *mp;
4026	struct uio auio;
4027	struct iovec aiov;
4028	ssize_t cnt;
4029	int error;
4030
4031	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4032		return (error);
4033	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4034	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4035
4036	aiov.iov_base = data;
4037	aiov.iov_len = nbytes;
4038	auio.uio_iov = &aiov;
4039	auio.uio_iovcnt = 1;
4040	auio.uio_offset = 0;
4041	if (nbytes > INT_MAX) {
4042		error = EINVAL;
4043		goto done;
4044	}
4045	auio.uio_resid = nbytes;
4046	auio.uio_rw = UIO_WRITE;
4047	auio.uio_segflg = UIO_USERSPACE;
4048	auio.uio_td = td;
4049	cnt = nbytes;
4050
4051	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4052	    td->td_ucred, td);
4053	cnt -= auio.uio_resid;
4054	td->td_retval[0] = cnt;
4055
4056done:
4057	VOP_UNLOCK(vp, 0, td);
4058	vn_finished_write(mp);
4059	return (error);
4060}
4061
4062int
4063extattr_set_file(td, uap)
4064	struct thread *td;
4065	struct extattr_set_file_args *uap;
4066{
4067	struct nameidata nd;
4068	char attrname[EXTATTR_MAXNAMELEN];
4069	int error;
4070
4071	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4072	    NULL);
4073	if (error)
4074		return (error);
4075
4076	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4077	if ((error = namei(&nd)) != 0)
4078		return (error);
4079	NDFREE(&nd, NDF_ONLY_PNBUF);
4080
4081	error = extattr_set_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
4082	    SCARG(uap, data), SCARG(uap, nbytes), td);
4083
4084	vrele(nd.ni_vp);
4085	return (error);
4086}
4087
4088int
4089extattr_set_fd(td, uap)
4090	struct thread *td;
4091	struct extattr_set_fd_args *uap;
4092{
4093	struct file *fp;
4094	char attrname[EXTATTR_MAXNAMELEN];
4095	int error;
4096
4097	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4098	    NULL);
4099	if (error)
4100		return (error);
4101
4102	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4103		return (error);
4104
4105	error = extattr_set_vp((struct vnode *)fp->f_data,
4106	    SCARG(uap, attrnamespace), attrname, SCARG(uap, data),
4107	    SCARG(uap, nbytes), td);
4108	fdrop(fp, td);
4109
4110	return (error);
4111}
4112
4113/*-
4114 * Get a named extended attribute on a file or directory
4115 *
4116 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4117 *            kernelspace string pointer "attrname", userspace buffer
4118 *            pointer "data", buffer length "nbytes", thread "td".
4119 * Returns: 0 on success, an error number otherwise
4120 * Locks: none
4121 * References: vp must be a valid reference for the duration of the call
4122 */
4123static int
4124extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4125    void *data, size_t nbytes, struct thread *td)
4126{
4127	struct uio auio;
4128	struct iovec aiov;
4129	ssize_t cnt;
4130	size_t size;
4131	int error;
4132
4133	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4134	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4135
4136	/*
4137	 * Slightly unusual semantics: if the user provides a NULL data
4138	 * pointer, they don't want to receive the data, just the
4139	 * maximum read length.
4140	 */
4141	if (data != NULL) {
4142		aiov.iov_base = data;
4143		aiov.iov_len = nbytes;
4144		auio.uio_iov = &aiov;
4145		auio.uio_offset = 0;
4146		if (nbytes > INT_MAX) {
4147			error = EINVAL;
4148			goto done;
4149		}
4150		auio.uio_resid = nbytes;
4151		auio.uio_rw = UIO_READ;
4152		auio.uio_segflg = UIO_USERSPACE;
4153		auio.uio_td = td;
4154		cnt = nbytes;
4155		error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio,
4156		    NULL, td->td_ucred, td);
4157		cnt -= auio.uio_resid;
4158		td->td_retval[0] = cnt;
4159	} else {
4160		error = VOP_GETEXTATTR(vp, attrnamespace, attrname, NULL,
4161		    &size, td->td_ucred, td);
4162		td->td_retval[0] = size;
4163	}
4164done:
4165	VOP_UNLOCK(vp, 0, td);
4166	return (error);
4167}
4168
4169int
4170extattr_get_file(td, uap)
4171	struct thread *td;
4172	struct extattr_get_file_args *uap;
4173{
4174	struct nameidata nd;
4175	char attrname[EXTATTR_MAXNAMELEN];
4176	int error;
4177
4178	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4179	    NULL);
4180	if (error)
4181		return (error);
4182
4183	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4184	if ((error = namei(&nd)) != 0)
4185		return (error);
4186	NDFREE(&nd, NDF_ONLY_PNBUF);
4187
4188	error = extattr_get_vp(nd.ni_vp, SCARG(uap, attrnamespace), attrname,
4189	    SCARG(uap, data), SCARG(uap, nbytes), td);
4190
4191	vrele(nd.ni_vp);
4192	return (error);
4193}
4194
4195int
4196extattr_get_fd(td, uap)
4197	struct thread *td;
4198	struct extattr_get_fd_args *uap;
4199{
4200	struct file *fp;
4201	char attrname[EXTATTR_MAXNAMELEN];
4202	int error;
4203
4204	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4205	    NULL);
4206	if (error)
4207		return (error);
4208
4209	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4210		return (error);
4211
4212	error = extattr_get_vp((struct vnode *)fp->f_data,
4213	    SCARG(uap, attrnamespace), attrname, SCARG(uap, data),
4214	    SCARG(uap, nbytes), td);
4215
4216	fdrop(fp, td);
4217	return (error);
4218}
4219
4220/*
4221 * extattr_delete_vp(): Delete a named extended attribute on a file or
4222 *                      directory
4223 *
4224 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4225 *            kernelspace string pointer "attrname", proc "p"
4226 * Returns: 0 on success, an error number otherwise
4227 * Locks: none
4228 * References: vp must be a valid reference for the duration of the call
4229 */
4230static int
4231extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4232    struct thread *td)
4233{
4234	struct mount *mp;
4235	int error;
4236
4237	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4238		return (error);
4239	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4240	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4241
4242	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4243	    td);
4244
4245	VOP_UNLOCK(vp, 0, td);
4246	vn_finished_write(mp);
4247	return (error);
4248}
4249
4250int
4251extattr_delete_file(td, uap)
4252	struct thread *td;
4253	struct extattr_delete_file_args *uap;
4254{
4255	struct nameidata nd;
4256	char attrname[EXTATTR_MAXNAMELEN];
4257	int error;
4258
4259	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4260	     NULL);
4261	if (error)
4262		return(error);
4263
4264	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
4265	if ((error = namei(&nd)) != 0)
4266		return(error);
4267	NDFREE(&nd, NDF_ONLY_PNBUF);
4268
4269	error = extattr_delete_vp(nd.ni_vp, SCARG(uap, attrnamespace),
4270	    attrname, td);
4271
4272	vrele(nd.ni_vp);
4273	return(error);
4274}
4275
4276int
4277extattr_delete_fd(td, uap)
4278	struct thread *td;
4279	struct extattr_delete_fd_args *uap;
4280{
4281	struct file *fp;
4282	struct vnode *vp;
4283	char attrname[EXTATTR_MAXNAMELEN];
4284	int error;
4285
4286	error = copyinstr(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN,
4287	    NULL);
4288	if (error)
4289		return (error);
4290
4291	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4292		return (error);
4293	vp = (struct vnode *)fp->f_data;
4294
4295	error = extattr_delete_vp((struct vnode *)fp->f_data,
4296	    SCARG(uap, attrnamespace), attrname, td);
4297
4298	fdrop(fp, td);
4299	return (error);
4300}
4301