vfs_syscalls.c revision 111935
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39 * $FreeBSD: head/sys/kern/vfs_syscalls.c 111935 2003-03-05 23:15:23Z rwatson $
40 */
41
42/* For 4.3 integer FS ID compatibility */
43#include "opt_compat.h"
44#include "opt_mac.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/sysent.h>
51#include <sys/mac.h>
52#include <sys/malloc.h>
53#include <sys/mount.h>
54#include <sys/mutex.h>
55#include <sys/sysproto.h>
56#include <sys/namei.h>
57#include <sys/filedesc.h>
58#include <sys/kernel.h>
59#include <sys/fcntl.h>
60#include <sys/file.h>
61#include <sys/linker.h>
62#include <sys/stat.h>
63#include <sys/sx.h>
64#include <sys/unistd.h>
65#include <sys/vnode.h>
66#include <sys/proc.h>
67#include <sys/dirent.h>
68#include <sys/extattr.h>
69#include <sys/jail.h>
70#include <sys/syscallsubr.h>
71#include <sys/sysctl.h>
72
73#include <machine/limits.h>
74#include <machine/stdarg.h>
75
76#include <vm/vm.h>
77#include <vm/vm_object.h>
78#include <vm/vm_page.h>
79#include <vm/uma.h>
80
81static int change_dir(struct nameidata *ndp, struct thread *td);
82static int chroot_refuse_vdir_fds(struct filedesc *fdp);
83static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
84static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
85static int setfmode(struct thread *td, struct vnode *, int);
86static int setfflags(struct thread *td, struct vnode *, int);
87static int setutimes(struct thread *td, struct vnode *,
88    const struct timespec *, int, int);
89static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
90    struct thread *td);
91
92int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
93int (*softdep_fsync_hook)(struct vnode *);
94
95/*
96 * The module initialization routine for POSIX asynchronous I/O will
97 * set this to the version of AIO that it implements.  (Zero means
98 * that it is not implemented.)  This value is used here by pathconf()
99 * and in kern_descrip.c by fpathconf().
100 */
101int async_io_version;
102
103/*
104 * Sync each mounted filesystem.
105 */
106#ifndef _SYS_SYSPROTO_H_
107struct sync_args {
108        int     dummy;
109};
110#endif
111
112#ifdef DEBUG
113static int syncprt = 0;
114SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115#endif
116
117/* ARGSUSED */
118int
119sync(td, uap)
120	struct thread *td;
121	struct sync_args *uap;
122{
123	struct mount *mp, *nmp;
124	int asyncflag;
125
126	mtx_lock(&mountlist_mtx);
127	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
128		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
129			nmp = TAILQ_NEXT(mp, mnt_list);
130			continue;
131		}
132		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
133		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
134			asyncflag = mp->mnt_flag & MNT_ASYNC;
135			mp->mnt_flag &= ~MNT_ASYNC;
136			vfs_msync(mp, MNT_NOWAIT);
137			VFS_SYNC(mp, MNT_NOWAIT,
138			    ((td != NULL) ? td->td_ucred : NOCRED), td);
139			mp->mnt_flag |= asyncflag;
140			vn_finished_write(mp);
141		}
142		mtx_lock(&mountlist_mtx);
143		nmp = TAILQ_NEXT(mp, mnt_list);
144		vfs_unbusy(mp, td);
145	}
146	mtx_unlock(&mountlist_mtx);
147#if 0
148/*
149 * XXX don't call vfs_bufstats() yet because that routine
150 * was not imported in the Lite2 merge.
151 */
152#ifdef DIAGNOSTIC
153	if (syncprt)
154		vfs_bufstats();
155#endif /* DIAGNOSTIC */
156#endif
157	return (0);
158}
159
160/* XXX PRISON: could be per prison flag */
161static int prison_quotas;
162#if 0
163SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
164#endif
165
166/*
167 * Change filesystem quotas.
168 */
169#ifndef _SYS_SYSPROTO_H_
170struct quotactl_args {
171	char *path;
172	int cmd;
173	int uid;
174	caddr_t arg;
175};
176#endif
177/* ARGSUSED */
178int
179quotactl(td, uap)
180	struct thread *td;
181	register struct quotactl_args /* {
182		char *path;
183		int cmd;
184		int uid;
185		caddr_t arg;
186	} */ *uap;
187{
188	struct mount *mp;
189	int error;
190	struct nameidata nd;
191
192	if (jailed(td->td_ucred) && !prison_quotas)
193		return (EPERM);
194	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
195	if ((error = namei(&nd)) != 0)
196		return (error);
197	NDFREE(&nd, NDF_ONLY_PNBUF);
198	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
199	vrele(nd.ni_vp);
200	if (error)
201		return (error);
202	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203	vn_finished_write(mp);
204	return (error);
205}
206
207/*
208 * Get filesystem statistics.
209 */
210#ifndef _SYS_SYSPROTO_H_
211struct statfs_args {
212	char *path;
213	struct statfs *buf;
214};
215#endif
216/* ARGSUSED */
217int
218statfs(td, uap)
219	struct thread *td;
220	register struct statfs_args /* {
221		char *path;
222		struct statfs *buf;
223	} */ *uap;
224{
225	register struct mount *mp;
226	register struct statfs *sp;
227	int error;
228	struct nameidata nd;
229	struct statfs sb;
230
231	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
232	if ((error = namei(&nd)) != 0)
233		return (error);
234	mp = nd.ni_vp->v_mount;
235	sp = &mp->mnt_stat;
236	NDFREE(&nd, NDF_ONLY_PNBUF);
237	vrele(nd.ni_vp);
238#ifdef MAC
239	error = mac_check_mount_stat(td->td_ucred, mp);
240	if (error)
241		return (error);
242#endif
243	error = VFS_STATFS(mp, sp, td);
244	if (error)
245		return (error);
246	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
247	if (suser(td)) {
248		bcopy(sp, &sb, sizeof(sb));
249		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
250		sp = &sb;
251	}
252	return (copyout(sp, uap->buf, sizeof(*sp)));
253}
254
255/*
256 * Get filesystem statistics.
257 */
258#ifndef _SYS_SYSPROTO_H_
259struct fstatfs_args {
260	int fd;
261	struct statfs *buf;
262};
263#endif
264/* ARGSUSED */
265int
266fstatfs(td, uap)
267	struct thread *td;
268	register struct fstatfs_args /* {
269		int fd;
270		struct statfs *buf;
271	} */ *uap;
272{
273	struct file *fp;
274	struct mount *mp;
275	register struct statfs *sp;
276	int error;
277	struct statfs sb;
278
279	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
280		return (error);
281	mp = ((struct vnode *)fp->f_data)->v_mount;
282	fdrop(fp, td);
283	if (mp == NULL)
284		return (EBADF);
285#ifdef MAC
286	error = mac_check_mount_stat(td->td_ucred, mp);
287	if (error)
288		return (error);
289#endif
290	sp = &mp->mnt_stat;
291	error = VFS_STATFS(mp, sp, td);
292	if (error)
293		return (error);
294	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
295	if (suser(td)) {
296		bcopy(sp, &sb, sizeof(sb));
297		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
298		sp = &sb;
299	}
300	return (copyout(sp, uap->buf, sizeof(*sp)));
301}
302
303/*
304 * Get statistics on all filesystems.
305 */
306#ifndef _SYS_SYSPROTO_H_
307struct getfsstat_args {
308	struct statfs *buf;
309	long bufsize;
310	int flags;
311};
312#endif
313int
314getfsstat(td, uap)
315	struct thread *td;
316	register struct getfsstat_args /* {
317		struct statfs *buf;
318		long bufsize;
319		int flags;
320	} */ *uap;
321{
322	register struct mount *mp, *nmp;
323	register struct statfs *sp;
324	caddr_t sfsp;
325	long count, maxcount, error;
326
327	maxcount = uap->bufsize / sizeof(struct statfs);
328	sfsp = (caddr_t)uap->buf;
329	count = 0;
330	mtx_lock(&mountlist_mtx);
331	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
332#ifdef MAC
333		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
334			nmp = TAILQ_NEXT(mp, mnt_list);
335			continue;
336		}
337#endif
338		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
339			nmp = TAILQ_NEXT(mp, mnt_list);
340			continue;
341		}
342		if (sfsp && count < maxcount) {
343			sp = &mp->mnt_stat;
344			/*
345			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
346			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
347			 * overrides MNT_WAIT.
348			 */
349			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
350			    (uap->flags & MNT_WAIT)) &&
351			    (error = VFS_STATFS(mp, sp, td))) {
352				mtx_lock(&mountlist_mtx);
353				nmp = TAILQ_NEXT(mp, mnt_list);
354				vfs_unbusy(mp, td);
355				continue;
356			}
357			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
358			error = copyout(sp, sfsp, sizeof(*sp));
359			if (error) {
360				vfs_unbusy(mp, td);
361				return (error);
362			}
363			sfsp += sizeof(*sp);
364		}
365		count++;
366		mtx_lock(&mountlist_mtx);
367		nmp = TAILQ_NEXT(mp, mnt_list);
368		vfs_unbusy(mp, td);
369	}
370	mtx_unlock(&mountlist_mtx);
371	if (sfsp && count > maxcount)
372		td->td_retval[0] = maxcount;
373	else
374		td->td_retval[0] = count;
375	return (0);
376}
377
378/*
379 * Change current working directory to a given file descriptor.
380 */
381#ifndef _SYS_SYSPROTO_H_
382struct fchdir_args {
383	int	fd;
384};
385#endif
386/* ARGSUSED */
387int
388fchdir(td, uap)
389	struct thread *td;
390	struct fchdir_args /* {
391		int fd;
392	} */ *uap;
393{
394	register struct filedesc *fdp = td->td_proc->p_fd;
395	struct vnode *vp, *tdp, *vpold;
396	struct mount *mp;
397	struct file *fp;
398	int error;
399
400	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
401		return (error);
402	vp = fp->f_data;
403	VREF(vp);
404	fdrop(fp, td);
405	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
406	if (vp->v_type != VDIR)
407		error = ENOTDIR;
408#ifdef MAC
409	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
410	}
411#endif
412	else
413		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
414	while (!error && (mp = vp->v_mountedhere) != NULL) {
415		if (vfs_busy(mp, 0, 0, td))
416			continue;
417		error = VFS_ROOT(mp, &tdp);
418		vfs_unbusy(mp, td);
419		if (error)
420			break;
421		vput(vp);
422		vp = tdp;
423	}
424	if (error) {
425		vput(vp);
426		return (error);
427	}
428	VOP_UNLOCK(vp, 0, td);
429	FILEDESC_LOCK(fdp);
430	vpold = fdp->fd_cdir;
431	fdp->fd_cdir = vp;
432	FILEDESC_UNLOCK(fdp);
433	vrele(vpold);
434	return (0);
435}
436
437/*
438 * Change current working directory (``.'').
439 */
440#ifndef _SYS_SYSPROTO_H_
441struct chdir_args {
442	char	*path;
443};
444#endif
445/* ARGSUSED */
446int
447chdir(td, uap)
448	struct thread *td;
449	struct chdir_args /* {
450		char *path;
451	} */ *uap;
452{
453
454	return (kern_chdir(td, uap->path, UIO_USERSPACE));
455}
456
457int
458kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
459{
460	register struct filedesc *fdp = td->td_proc->p_fd;
461	int error;
462	struct nameidata nd;
463	struct vnode *vp;
464
465	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
466	if ((error = change_dir(&nd, td)) != 0)
467		return (error);
468	VOP_UNLOCK(nd.ni_vp, 0, td);
469	NDFREE(&nd, NDF_ONLY_PNBUF);
470	FILEDESC_LOCK(fdp);
471	vp = fdp->fd_cdir;
472	fdp->fd_cdir = nd.ni_vp;
473	FILEDESC_UNLOCK(fdp);
474	vrele(vp);
475	return (0);
476}
477
478/*
479 * Helper function for raised chroot(2) security function:  Refuse if
480 * any filedescriptors are open directories.
481 */
482static int
483chroot_refuse_vdir_fds(fdp)
484	struct filedesc *fdp;
485{
486	struct vnode *vp;
487	struct file *fp;
488	int fd;
489
490	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
491	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
492		fp = fget_locked(fdp, fd);
493		if (fp == NULL)
494			continue;
495		if (fp->f_type == DTYPE_VNODE) {
496			vp = fp->f_data;
497			if (vp->v_type == VDIR)
498				return (EPERM);
499		}
500	}
501	return (0);
502}
503
504/*
505 * This sysctl determines if we will allow a process to chroot(2) if it
506 * has a directory open:
507 *	0: disallowed for all processes.
508 *	1: allowed for processes that were not already chroot(2)'ed.
509 *	2: allowed for all processes.
510 */
511
512static int chroot_allow_open_directories = 1;
513
514SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
515     &chroot_allow_open_directories, 0, "");
516
517/*
518 * Change notion of root (``/'') directory.
519 */
520#ifndef _SYS_SYSPROTO_H_
521struct chroot_args {
522	char	*path;
523};
524#endif
525/* ARGSUSED */
526int
527chroot(td, uap)
528	struct thread *td;
529	struct chroot_args /* {
530		char *path;
531	} */ *uap;
532{
533	register struct filedesc *fdp = td->td_proc->p_fd;
534	int error;
535	struct nameidata nd;
536	struct vnode *vp;
537
538	error = suser_cred(td->td_ucred, PRISON_ROOT);
539	if (error)
540		return (error);
541	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
542	mtx_lock(&Giant);
543	if ((error = change_dir(&nd, td)) != 0)
544		goto error;
545#ifdef MAC
546	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp))) {
547		vput(nd.ni_vp);
548		goto error;
549	}
550#endif
551	VOP_UNLOCK(nd.ni_vp, 0, td);
552	FILEDESC_LOCK(fdp);
553	if (chroot_allow_open_directories == 0 ||
554	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
555		error = chroot_refuse_vdir_fds(fdp);
556		if (error)
557			goto error_unlock;
558	}
559	vp = fdp->fd_rdir;
560	fdp->fd_rdir = nd.ni_vp;
561	if (!fdp->fd_jdir) {
562		fdp->fd_jdir = nd.ni_vp;
563                VREF(fdp->fd_jdir);
564	}
565	FILEDESC_UNLOCK(fdp);
566	NDFREE(&nd, NDF_ONLY_PNBUF);
567	vrele(vp);
568	mtx_unlock(&Giant);
569	return (0);
570error_unlock:
571	FILEDESC_UNLOCK(fdp);
572error:
573	mtx_unlock(&Giant);
574	NDFREE(&nd, NDF_ONLY_PNBUF);
575	return (error);
576}
577
578/*
579 * Common routine for chroot and chdir.  On success, the directory vnode
580 * is returned locked, and must be unlocked by the caller.
581 */
582static int
583change_dir(ndp, td)
584	register struct nameidata *ndp;
585	struct thread *td;
586{
587	struct vnode *vp;
588	int error;
589
590	error = namei(ndp);
591	if (error)
592		return (error);
593	vp = ndp->ni_vp;
594	if (vp->v_type != VDIR)
595		error = ENOTDIR;
596#ifdef MAC
597	if (error == 0)
598		error = mac_check_vnode_chdir(td->td_ucred, vp);
599#endif
600	if (error == 0)
601		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
602	if (error)
603		vput(vp);
604	return (error);
605}
606
607/*
608 * Check permissions, allocate an open file structure,
609 * and call the device open routine if any.
610 */
611#ifndef _SYS_SYSPROTO_H_
612struct open_args {
613	char	*path;
614	int	flags;
615	int	mode;
616};
617#endif
618int
619open(td, uap)
620	struct thread *td;
621	register struct open_args /* {
622		char *path;
623		int flags;
624		int mode;
625	} */ *uap;
626{
627
628	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
629}
630
631int
632kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
633    int mode)
634{
635	struct proc *p = td->td_proc;
636	struct filedesc *fdp = p->p_fd;
637	struct file *fp;
638	struct vnode *vp;
639	struct vattr vat;
640	struct mount *mp;
641	int cmode, oflags;
642	struct file *nfp;
643	int type, indx, error;
644	struct flock lf;
645	struct nameidata nd;
646
647	if ((flags & O_ACCMODE) == O_ACCMODE)
648		return (EINVAL);
649	oflags = flags;
650	flags = FFLAGS(flags);
651	error = falloc(td, &nfp, &indx);
652	if (error)
653		return (error);
654	fp = nfp;
655	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
656	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
657	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
658	/*
659	 * Bump the ref count to prevent another process from closing
660	 * the descriptor while we are blocked in vn_open()
661	 */
662	fhold(fp);
663	error = vn_open(&nd, &flags, cmode);
664	if (error) {
665		/*
666		 * release our own reference
667		 */
668		fdrop(fp, td);
669
670		/*
671		 * handle special fdopen() case.  bleh.  dupfdopen() is
672		 * responsible for dropping the old contents of ofiles[indx]
673		 * if it succeeds.
674		 */
675		if ((error == ENODEV || error == ENXIO) &&
676		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
677		    (error =
678			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
679			td->td_retval[0] = indx;
680			return (0);
681		}
682		/*
683		 * Clean up the descriptor, but only if another thread hadn't
684		 * replaced or closed it.
685		 */
686		FILEDESC_LOCK(fdp);
687		if (fdp->fd_ofiles[indx] == fp) {
688			fdp->fd_ofiles[indx] = NULL;
689			FILEDESC_UNLOCK(fdp);
690			fdrop(fp, td);
691		} else
692			FILEDESC_UNLOCK(fdp);
693
694		if (error == ERESTART)
695			error = EINTR;
696		return (error);
697	}
698	td->td_dupfd = 0;
699	NDFREE(&nd, NDF_ONLY_PNBUF);
700	vp = nd.ni_vp;
701
702	/*
703	 * There should be 2 references on the file, one from the descriptor
704	 * table, and one for us.
705	 *
706	 * Handle the case where someone closed the file (via its file
707	 * descriptor) while we were blocked.  The end result should look
708	 * like opening the file succeeded but it was immediately closed.
709	 */
710	FILEDESC_LOCK(fdp);
711	FILE_LOCK(fp);
712	if (fp->f_count == 1) {
713		KASSERT(fdp->fd_ofiles[indx] != fp,
714		    ("Open file descriptor lost all refs"));
715		FILEDESC_UNLOCK(fdp);
716		FILE_UNLOCK(fp);
717		VOP_UNLOCK(vp, 0, td);
718		vn_close(vp, flags & FMASK, fp->f_cred, td);
719		fdrop(fp, td);
720		td->td_retval[0] = indx;
721		return 0;
722	}
723
724	/* assert that vn_open created a backing object if one is needed */
725	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
726		("open: vmio vnode has no backing object after vn_open"));
727
728	fp->f_data = vp;
729	fp->f_flag = flags & FMASK;
730	fp->f_ops = &vnops;
731	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
732	FILEDESC_UNLOCK(fdp);
733	FILE_UNLOCK(fp);
734	VOP_UNLOCK(vp, 0, td);
735	if (flags & (O_EXLOCK | O_SHLOCK)) {
736		lf.l_whence = SEEK_SET;
737		lf.l_start = 0;
738		lf.l_len = 0;
739		if (flags & O_EXLOCK)
740			lf.l_type = F_WRLCK;
741		else
742			lf.l_type = F_RDLCK;
743		type = F_FLOCK;
744		if ((flags & FNONBLOCK) == 0)
745			type |= F_WAIT;
746		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
747			    type)) != 0)
748			goto bad;
749		fp->f_flag |= FHASLOCK;
750	}
751	if (flags & O_TRUNC) {
752		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
753			goto bad;
754		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
755		VATTR_NULL(&vat);
756		vat.va_size = 0;
757		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
758#ifdef MAC
759		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
760		if (error == 0)
761#endif
762			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
763		VOP_UNLOCK(vp, 0, td);
764		vn_finished_write(mp);
765		if (error)
766			goto bad;
767	}
768	/*
769	 * Release our private reference, leaving the one associated with
770	 * the descriptor table intact.
771	 */
772	fdrop(fp, td);
773	td->td_retval[0] = indx;
774	return (0);
775bad:
776	FILEDESC_LOCK(fdp);
777	if (fdp->fd_ofiles[indx] == fp) {
778		fdp->fd_ofiles[indx] = NULL;
779		FILEDESC_UNLOCK(fdp);
780		fdrop(fp, td);
781	} else
782		FILEDESC_UNLOCK(fdp);
783	fdrop(fp, td);
784	return (error);
785}
786
787#ifdef COMPAT_43
788/*
789 * Create a file.
790 */
791#ifndef _SYS_SYSPROTO_H_
792struct ocreat_args {
793	char	*path;
794	int	mode;
795};
796#endif
797int
798ocreat(td, uap)
799	struct thread *td;
800	register struct ocreat_args /* {
801		char *path;
802		int mode;
803	} */ *uap;
804{
805	struct open_args /* {
806		char *path;
807		int flags;
808		int mode;
809	} */ nuap;
810
811	nuap.path = uap->path;
812	nuap.mode = uap->mode;
813	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
814	return (open(td, &nuap));
815}
816#endif /* COMPAT_43 */
817
818/*
819 * Create a special file.
820 */
821#ifndef _SYS_SYSPROTO_H_
822struct mknod_args {
823	char	*path;
824	int	mode;
825	int	dev;
826};
827#endif
828/* ARGSUSED */
829int
830mknod(td, uap)
831	struct thread *td;
832	register struct mknod_args /* {
833		char *path;
834		int mode;
835		int dev;
836	} */ *uap;
837{
838
839	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
840}
841
842int
843kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
844    int dev)
845{
846	struct vnode *vp;
847	struct mount *mp;
848	struct vattr vattr;
849	int error;
850	int whiteout = 0;
851	struct nameidata nd;
852
853	switch (mode & S_IFMT) {
854	case S_IFCHR:
855	case S_IFBLK:
856		error = suser(td);
857		break;
858	default:
859		error = suser_cred(td->td_ucred, PRISON_ROOT);
860		break;
861	}
862	if (error)
863		return (error);
864restart:
865	bwillwrite();
866	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
867	if ((error = namei(&nd)) != 0)
868		return (error);
869	vp = nd.ni_vp;
870	if (vp != NULL) {
871		vrele(vp);
872		error = EEXIST;
873	} else {
874		VATTR_NULL(&vattr);
875		FILEDESC_LOCK(td->td_proc->p_fd);
876		vattr.va_mode = (mode & ALLPERMS) &
877		    ~td->td_proc->p_fd->fd_cmask;
878		FILEDESC_UNLOCK(td->td_proc->p_fd);
879		vattr.va_rdev = dev;
880		whiteout = 0;
881
882		switch (mode & S_IFMT) {
883		case S_IFMT:	/* used by badsect to flag bad sectors */
884			vattr.va_type = VBAD;
885			break;
886		case S_IFCHR:
887			vattr.va_type = VCHR;
888			break;
889		case S_IFBLK:
890			vattr.va_type = VBLK;
891			break;
892		case S_IFWHT:
893			whiteout = 1;
894			break;
895		default:
896			error = EINVAL;
897			break;
898		}
899	}
900	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
901		NDFREE(&nd, NDF_ONLY_PNBUF);
902		vput(nd.ni_dvp);
903		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
904			return (error);
905		goto restart;
906	}
907#ifdef MAC
908	if (error == 0 && !whiteout)
909		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
910		    &nd.ni_cnd, &vattr);
911#endif
912	if (!error) {
913		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
914		if (whiteout)
915			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
916		else {
917			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
918						&nd.ni_cnd, &vattr);
919			if (error == 0)
920				vput(nd.ni_vp);
921		}
922	}
923	NDFREE(&nd, NDF_ONLY_PNBUF);
924	vput(nd.ni_dvp);
925	vn_finished_write(mp);
926	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
927	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
928	return (error);
929}
930
931/*
932 * Create a named pipe.
933 */
934#ifndef _SYS_SYSPROTO_H_
935struct mkfifo_args {
936	char	*path;
937	int	mode;
938};
939#endif
940/* ARGSUSED */
941int
942mkfifo(td, uap)
943	struct thread *td;
944	register struct mkfifo_args /* {
945		char *path;
946		int mode;
947	} */ *uap;
948{
949
950	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
951}
952
953int
954kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
955{
956	struct mount *mp;
957	struct vattr vattr;
958	int error;
959	struct nameidata nd;
960
961restart:
962	bwillwrite();
963	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
964	if ((error = namei(&nd)) != 0)
965		return (error);
966	if (nd.ni_vp != NULL) {
967		NDFREE(&nd, NDF_ONLY_PNBUF);
968		vrele(nd.ni_vp);
969		vput(nd.ni_dvp);
970		return (EEXIST);
971	}
972	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
973		NDFREE(&nd, NDF_ONLY_PNBUF);
974		vput(nd.ni_dvp);
975		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
976			return (error);
977		goto restart;
978	}
979	VATTR_NULL(&vattr);
980	vattr.va_type = VFIFO;
981	FILEDESC_LOCK(td->td_proc->p_fd);
982	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
983	FILEDESC_UNLOCK(td->td_proc->p_fd);
984#ifdef MAC
985	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
986	    &vattr);
987	if (error)
988		goto out;
989#endif
990	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
991	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
992	if (error == 0)
993		vput(nd.ni_vp);
994#ifdef MAC
995out:
996#endif
997	NDFREE(&nd, NDF_ONLY_PNBUF);
998	vput(nd.ni_dvp);
999	vn_finished_write(mp);
1000	return (error);
1001}
1002
1003/*
1004 * Make a hard file link.
1005 */
1006#ifndef _SYS_SYSPROTO_H_
1007struct link_args {
1008	char	*path;
1009	char	*link;
1010};
1011#endif
1012/* ARGSUSED */
1013int
1014link(td, uap)
1015	struct thread *td;
1016	register struct link_args /* {
1017		char *path;
1018		char *link;
1019	} */ *uap;
1020{
1021
1022	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1023}
1024
1025int
1026kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1027{
1028	struct vnode *vp;
1029	struct mount *mp;
1030	struct nameidata nd;
1031	int error;
1032
1033	bwillwrite();
1034	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1035	if ((error = namei(&nd)) != 0)
1036		return (error);
1037	NDFREE(&nd, NDF_ONLY_PNBUF);
1038	vp = nd.ni_vp;
1039	if (vp->v_type == VDIR) {
1040		vrele(vp);
1041		return (EPERM);		/* POSIX */
1042	}
1043	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1044		vrele(vp);
1045		return (error);
1046	}
1047	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1048	if ((error = namei(&nd)) == 0) {
1049		if (nd.ni_vp != NULL) {
1050			vrele(nd.ni_vp);
1051			error = EEXIST;
1052		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1053		    == 0) {
1054			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1055			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1056#ifdef MAC
1057			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1058			    vp, &nd.ni_cnd);
1059			if (error == 0)
1060#endif
1061				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1062			VOP_UNLOCK(vp, 0, td);
1063		}
1064		NDFREE(&nd, NDF_ONLY_PNBUF);
1065		vput(nd.ni_dvp);
1066	}
1067	vrele(vp);
1068	vn_finished_write(mp);
1069	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1070	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1071	return (error);
1072}
1073
1074/*
1075 * Make a symbolic link.
1076 */
1077#ifndef _SYS_SYSPROTO_H_
1078struct symlink_args {
1079	char	*path;
1080	char	*link;
1081};
1082#endif
1083/* ARGSUSED */
1084int
1085symlink(td, uap)
1086	struct thread *td;
1087	register struct symlink_args /* {
1088		char *path;
1089		char *link;
1090	} */ *uap;
1091{
1092
1093	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1094}
1095
1096int
1097kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1098{
1099	struct mount *mp;
1100	struct vattr vattr;
1101	char *syspath;
1102	int error;
1103	struct nameidata nd;
1104
1105	if (segflg == UIO_SYSSPACE) {
1106		syspath = path;
1107	} else {
1108		syspath = uma_zalloc(namei_zone, M_WAITOK);
1109		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1110			goto out;
1111	}
1112restart:
1113	bwillwrite();
1114	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1115	if ((error = namei(&nd)) != 0)
1116		goto out;
1117	if (nd.ni_vp) {
1118		NDFREE(&nd, NDF_ONLY_PNBUF);
1119		vrele(nd.ni_vp);
1120		vput(nd.ni_dvp);
1121		error = EEXIST;
1122		goto out;
1123	}
1124	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1125		NDFREE(&nd, NDF_ONLY_PNBUF);
1126		vput(nd.ni_dvp);
1127		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1128			return (error);
1129		goto restart;
1130	}
1131	VATTR_NULL(&vattr);
1132	FILEDESC_LOCK(td->td_proc->p_fd);
1133	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1134	FILEDESC_UNLOCK(td->td_proc->p_fd);
1135#ifdef MAC
1136	vattr.va_type = VLNK;
1137	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1138	    &vattr);
1139	if (error)
1140		goto out2;
1141#endif
1142	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1143	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1144	if (error == 0)
1145		vput(nd.ni_vp);
1146#ifdef MAC
1147out2:
1148#endif
1149	NDFREE(&nd, NDF_ONLY_PNBUF);
1150	vput(nd.ni_dvp);
1151	vn_finished_write(mp);
1152	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1153	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1154out:
1155	if (segflg != UIO_SYSSPACE)
1156		uma_zfree(namei_zone, syspath);
1157	return (error);
1158}
1159
1160/*
1161 * Delete a whiteout from the filesystem.
1162 */
1163/* ARGSUSED */
1164int
1165undelete(td, uap)
1166	struct thread *td;
1167	register struct undelete_args /* {
1168		char *path;
1169	} */ *uap;
1170{
1171	int error;
1172	struct mount *mp;
1173	struct nameidata nd;
1174
1175restart:
1176	bwillwrite();
1177	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1178	    uap->path, td);
1179	error = namei(&nd);
1180	if (error)
1181		return (error);
1182
1183	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1184		NDFREE(&nd, NDF_ONLY_PNBUF);
1185		if (nd.ni_vp)
1186			vrele(nd.ni_vp);
1187		vput(nd.ni_dvp);
1188		return (EEXIST);
1189	}
1190	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1191		NDFREE(&nd, NDF_ONLY_PNBUF);
1192		vput(nd.ni_dvp);
1193		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1194			return (error);
1195		goto restart;
1196	}
1197	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1198	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1199	NDFREE(&nd, NDF_ONLY_PNBUF);
1200	vput(nd.ni_dvp);
1201	vn_finished_write(mp);
1202	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1203	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1204	return (error);
1205}
1206
1207/*
1208 * Delete a name from the filesystem.
1209 */
1210#ifndef _SYS_SYSPROTO_H_
1211struct unlink_args {
1212	char	*path;
1213};
1214#endif
1215/* ARGSUSED */
1216int
1217unlink(td, uap)
1218	struct thread *td;
1219	struct unlink_args /* {
1220		char *path;
1221	} */ *uap;
1222{
1223
1224	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1225}
1226
1227int
1228kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1229{
1230	struct mount *mp;
1231	struct vnode *vp;
1232	int error;
1233	struct nameidata nd;
1234
1235restart:
1236	bwillwrite();
1237	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1238	if ((error = namei(&nd)) != 0)
1239		return (error);
1240	vp = nd.ni_vp;
1241	if (vp->v_type == VDIR)
1242		error = EPERM;		/* POSIX */
1243	else {
1244		/*
1245		 * The root of a mounted filesystem cannot be deleted.
1246		 *
1247		 * XXX: can this only be a VDIR case?
1248		 */
1249		if (vp->v_vflag & VV_ROOT)
1250			error = EBUSY;
1251	}
1252	if (error == 0) {
1253		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1254			NDFREE(&nd, NDF_ONLY_PNBUF);
1255			if (vp == nd.ni_dvp)
1256				vrele(vp);
1257			else
1258				vput(vp);
1259			vput(nd.ni_dvp);
1260			if ((error = vn_start_write(NULL, &mp,
1261			    V_XSLEEP | PCATCH)) != 0)
1262				return (error);
1263			goto restart;
1264		}
1265#ifdef MAC
1266		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1267		    &nd.ni_cnd);
1268		if (error)
1269			goto out;
1270#endif
1271		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1272		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1273#ifdef MAC
1274out:
1275#endif
1276		vn_finished_write(mp);
1277	}
1278	NDFREE(&nd, NDF_ONLY_PNBUF);
1279	if (vp == nd.ni_dvp)
1280		vrele(vp);
1281	else
1282		vput(vp);
1283	vput(nd.ni_dvp);
1284	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1285	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1286	return (error);
1287}
1288
1289/*
1290 * Reposition read/write file offset.
1291 */
1292#ifndef _SYS_SYSPROTO_H_
1293struct lseek_args {
1294	int	fd;
1295	int	pad;
1296	off_t	offset;
1297	int	whence;
1298};
1299#endif
1300int
1301lseek(td, uap)
1302	struct thread *td;
1303	register struct lseek_args /* {
1304		int fd;
1305		int pad;
1306		off_t offset;
1307		int whence;
1308	} */ *uap;
1309{
1310	struct ucred *cred = td->td_ucred;
1311	struct file *fp;
1312	struct vnode *vp;
1313	struct vattr vattr;
1314	off_t offset;
1315	int error, noneg;
1316
1317	if ((error = fget(td, uap->fd, &fp)) != 0)
1318		return (error);
1319	if (fp->f_type != DTYPE_VNODE) {
1320		fdrop(fp, td);
1321		return (ESPIPE);
1322	}
1323	vp = fp->f_data;
1324	noneg = (vp->v_type != VCHR);
1325	offset = uap->offset;
1326	switch (uap->whence) {
1327	case L_INCR:
1328		if (noneg &&
1329		    (fp->f_offset < 0 ||
1330		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1331			error = EOVERFLOW;
1332			break;
1333		}
1334		offset += fp->f_offset;
1335		break;
1336	case L_XTND:
1337		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1338		error = VOP_GETATTR(vp, &vattr, cred, td);
1339		VOP_UNLOCK(vp, 0, td);
1340		if (error)
1341			break;
1342		if (noneg &&
1343		    (vattr.va_size > OFF_MAX ||
1344		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1345			error = EOVERFLOW;
1346			break;
1347		}
1348		offset += vattr.va_size;
1349		break;
1350	case L_SET:
1351		break;
1352	default:
1353		error = EINVAL;
1354	}
1355	if (error == 0 && noneg && offset < 0)
1356		error = EINVAL;
1357	if (error != 0) {
1358		fdrop(fp, td);
1359		return (error);
1360	}
1361	fp->f_offset = offset;
1362	*(off_t *)(td->td_retval) = fp->f_offset;
1363	fdrop(fp, td);
1364	return (0);
1365}
1366
1367#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1368/*
1369 * Reposition read/write file offset.
1370 */
1371#ifndef _SYS_SYSPROTO_H_
1372struct olseek_args {
1373	int	fd;
1374	long	offset;
1375	int	whence;
1376};
1377#endif
1378int
1379olseek(td, uap)
1380	struct thread *td;
1381	register struct olseek_args /* {
1382		int fd;
1383		long offset;
1384		int whence;
1385	} */ *uap;
1386{
1387	struct lseek_args /* {
1388		int fd;
1389		int pad;
1390		off_t offset;
1391		int whence;
1392	} */ nuap;
1393	int error;
1394
1395	nuap.fd = uap->fd;
1396	nuap.offset = uap->offset;
1397	nuap.whence = uap->whence;
1398	error = lseek(td, &nuap);
1399	return (error);
1400}
1401#endif /* COMPAT_43 */
1402
1403/*
1404 * Check access permissions using passed credentials.
1405 */
1406static int
1407vn_access(vp, user_flags, cred, td)
1408	struct vnode	*vp;
1409	int		user_flags;
1410	struct ucred	*cred;
1411	struct thread	*td;
1412{
1413	int error, flags;
1414
1415	/* Flags == 0 means only check for existence. */
1416	error = 0;
1417	if (user_flags) {
1418		flags = 0;
1419		if (user_flags & R_OK)
1420			flags |= VREAD;
1421		if (user_flags & W_OK)
1422			flags |= VWRITE;
1423		if (user_flags & X_OK)
1424			flags |= VEXEC;
1425#ifdef MAC
1426		error = mac_check_vnode_access(cred, vp, flags);
1427		if (error)
1428			return (error);
1429#endif
1430		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1431			error = VOP_ACCESS(vp, flags, cred, td);
1432	}
1433	return (error);
1434}
1435
1436/*
1437 * Check access permissions using "real" credentials.
1438 */
1439#ifndef _SYS_SYSPROTO_H_
1440struct access_args {
1441	char	*path;
1442	int	flags;
1443};
1444#endif
1445int
1446access(td, uap)
1447	struct thread *td;
1448	register struct access_args /* {
1449		char *path;
1450		int flags;
1451	} */ *uap;
1452{
1453
1454	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1455}
1456
1457int
1458kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1459{
1460	struct ucred *cred, *tmpcred;
1461	register struct vnode *vp;
1462	int error;
1463	struct nameidata nd;
1464
1465	/*
1466	 * Create and modify a temporary credential instead of one that
1467	 * is potentially shared.  This could also mess up socket
1468	 * buffer accounting which can run in an interrupt context.
1469	 *
1470	 * XXX - Depending on how "threads" are finally implemented, it
1471	 * may be better to explicitly pass the credential to namei()
1472	 * rather than to modify the potentially shared process structure.
1473	 */
1474	cred = td->td_ucred;
1475	tmpcred = crdup(cred);
1476	tmpcred->cr_uid = cred->cr_ruid;
1477	tmpcred->cr_groups[0] = cred->cr_rgid;
1478	td->td_ucred = tmpcred;
1479	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1480	if ((error = namei(&nd)) != 0)
1481		goto out1;
1482	vp = nd.ni_vp;
1483
1484	error = vn_access(vp, flags, tmpcred, td);
1485	NDFREE(&nd, NDF_ONLY_PNBUF);
1486	vput(vp);
1487out1:
1488	td->td_ucred = cred;
1489	crfree(tmpcred);
1490	return (error);
1491}
1492
1493/*
1494 * Check access permissions using "effective" credentials.
1495 */
1496#ifndef _SYS_SYSPROTO_H_
1497struct eaccess_args {
1498	char	*path;
1499	int	flags;
1500};
1501#endif
1502int
1503eaccess(td, uap)
1504	struct thread *td;
1505	register struct eaccess_args /* {
1506		char *path;
1507		int flags;
1508	} */ *uap;
1509{
1510	struct nameidata nd;
1511	struct vnode *vp;
1512	int error;
1513
1514	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1515	    uap->path, td);
1516	if ((error = namei(&nd)) != 0)
1517		return (error);
1518	vp = nd.ni_vp;
1519
1520	error = vn_access(vp, uap->flags, td->td_ucred, td);
1521	NDFREE(&nd, NDF_ONLY_PNBUF);
1522	vput(vp);
1523	return (error);
1524}
1525
1526#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1527/*
1528 * Get file status; this version follows links.
1529 */
1530#ifndef _SYS_SYSPROTO_H_
1531struct ostat_args {
1532	char	*path;
1533	struct ostat *ub;
1534};
1535#endif
1536/* ARGSUSED */
1537int
1538ostat(td, uap)
1539	struct thread *td;
1540	register struct ostat_args /* {
1541		char *path;
1542		struct ostat *ub;
1543	} */ *uap;
1544{
1545	struct stat sb;
1546	struct ostat osb;
1547	int error;
1548	struct nameidata nd;
1549
1550	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1551	    uap->path, td);
1552	if ((error = namei(&nd)) != 0)
1553		return (error);
1554	NDFREE(&nd, NDF_ONLY_PNBUF);
1555	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1556	vput(nd.ni_vp);
1557	if (error)
1558		return (error);
1559	cvtstat(&sb, &osb);
1560	error = copyout(&osb, uap->ub, sizeof (osb));
1561	return (error);
1562}
1563
1564/*
1565 * Get file status; this version does not follow links.
1566 */
1567#ifndef _SYS_SYSPROTO_H_
1568struct olstat_args {
1569	char	*path;
1570	struct ostat *ub;
1571};
1572#endif
1573/* ARGSUSED */
1574int
1575olstat(td, uap)
1576	struct thread *td;
1577	register struct olstat_args /* {
1578		char *path;
1579		struct ostat *ub;
1580	} */ *uap;
1581{
1582	struct vnode *vp;
1583	struct stat sb;
1584	struct ostat osb;
1585	int error;
1586	struct nameidata nd;
1587
1588	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1589	    uap->path, td);
1590	if ((error = namei(&nd)) != 0)
1591		return (error);
1592	vp = nd.ni_vp;
1593	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1594	NDFREE(&nd, NDF_ONLY_PNBUF);
1595	vput(vp);
1596	if (error)
1597		return (error);
1598	cvtstat(&sb, &osb);
1599	error = copyout(&osb, uap->ub, sizeof (osb));
1600	return (error);
1601}
1602
1603/*
1604 * Convert from an old to a new stat structure.
1605 */
1606void
1607cvtstat(st, ost)
1608	struct stat *st;
1609	struct ostat *ost;
1610{
1611
1612	ost->st_dev = st->st_dev;
1613	ost->st_ino = st->st_ino;
1614	ost->st_mode = st->st_mode;
1615	ost->st_nlink = st->st_nlink;
1616	ost->st_uid = st->st_uid;
1617	ost->st_gid = st->st_gid;
1618	ost->st_rdev = st->st_rdev;
1619	if (st->st_size < (quad_t)1 << 32)
1620		ost->st_size = st->st_size;
1621	else
1622		ost->st_size = -2;
1623	ost->st_atime = st->st_atime;
1624	ost->st_mtime = st->st_mtime;
1625	ost->st_ctime = st->st_ctime;
1626	ost->st_blksize = st->st_blksize;
1627	ost->st_blocks = st->st_blocks;
1628	ost->st_flags = st->st_flags;
1629	ost->st_gen = st->st_gen;
1630}
1631#endif /* COMPAT_43 || COMPAT_SUNOS */
1632
1633/*
1634 * Get file status; this version follows links.
1635 */
1636#ifndef _SYS_SYSPROTO_H_
1637struct stat_args {
1638	char	*path;
1639	struct stat *ub;
1640};
1641#endif
1642/* ARGSUSED */
1643int
1644stat(td, uap)
1645	struct thread *td;
1646	register struct stat_args /* {
1647		char *path;
1648		struct stat *ub;
1649	} */ *uap;
1650{
1651	struct stat sb;
1652	int error;
1653	struct nameidata nd;
1654
1655#ifdef LOOKUP_SHARED
1656	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1657	    UIO_USERSPACE, uap->path, td);
1658#else
1659	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1660	    uap->path, td);
1661#endif
1662	if ((error = namei(&nd)) != 0)
1663		return (error);
1664	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1665	NDFREE(&nd, NDF_ONLY_PNBUF);
1666	vput(nd.ni_vp);
1667	if (error)
1668		return (error);
1669	error = copyout(&sb, uap->ub, sizeof (sb));
1670	return (error);
1671}
1672
1673/*
1674 * Get file status; this version does not follow links.
1675 */
1676#ifndef _SYS_SYSPROTO_H_
1677struct lstat_args {
1678	char	*path;
1679	struct stat *ub;
1680};
1681#endif
1682/* ARGSUSED */
1683int
1684lstat(td, uap)
1685	struct thread *td;
1686	register struct lstat_args /* {
1687		char *path;
1688		struct stat *ub;
1689	} */ *uap;
1690{
1691	int error;
1692	struct vnode *vp;
1693	struct stat sb;
1694	struct nameidata nd;
1695
1696	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1697	    uap->path, td);
1698	if ((error = namei(&nd)) != 0)
1699		return (error);
1700	vp = nd.ni_vp;
1701	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1702	NDFREE(&nd, NDF_ONLY_PNBUF);
1703	vput(vp);
1704	if (error)
1705		return (error);
1706	error = copyout(&sb, uap->ub, sizeof (sb));
1707	return (error);
1708}
1709
1710/*
1711 * Implementation of the NetBSD stat() function.
1712 * XXX This should probably be collapsed with the FreeBSD version,
1713 * as the differences are only due to vn_stat() clearing spares at
1714 * the end of the structures.  vn_stat could be split to avoid this,
1715 * and thus collapse the following to close to zero code.
1716 */
1717void
1718cvtnstat(sb, nsb)
1719	struct stat *sb;
1720	struct nstat *nsb;
1721{
1722	bzero(nsb, sizeof *nsb);
1723	nsb->st_dev = sb->st_dev;
1724	nsb->st_ino = sb->st_ino;
1725	nsb->st_mode = sb->st_mode;
1726	nsb->st_nlink = sb->st_nlink;
1727	nsb->st_uid = sb->st_uid;
1728	nsb->st_gid = sb->st_gid;
1729	nsb->st_rdev = sb->st_rdev;
1730	nsb->st_atimespec = sb->st_atimespec;
1731	nsb->st_mtimespec = sb->st_mtimespec;
1732	nsb->st_ctimespec = sb->st_ctimespec;
1733	nsb->st_size = sb->st_size;
1734	nsb->st_blocks = sb->st_blocks;
1735	nsb->st_blksize = sb->st_blksize;
1736	nsb->st_flags = sb->st_flags;
1737	nsb->st_gen = sb->st_gen;
1738	nsb->st_birthtimespec = sb->st_birthtimespec;
1739}
1740
1741#ifndef _SYS_SYSPROTO_H_
1742struct nstat_args {
1743	char	*path;
1744	struct nstat *ub;
1745};
1746#endif
1747/* ARGSUSED */
1748int
1749nstat(td, uap)
1750	struct thread *td;
1751	register struct nstat_args /* {
1752		char *path;
1753		struct nstat *ub;
1754	} */ *uap;
1755{
1756	struct stat sb;
1757	struct nstat nsb;
1758	int error;
1759	struct nameidata nd;
1760
1761	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1762	    uap->path, td);
1763	if ((error = namei(&nd)) != 0)
1764		return (error);
1765	NDFREE(&nd, NDF_ONLY_PNBUF);
1766	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1767	vput(nd.ni_vp);
1768	if (error)
1769		return (error);
1770	cvtnstat(&sb, &nsb);
1771	error = copyout(&nsb, uap->ub, sizeof (nsb));
1772	return (error);
1773}
1774
1775/*
1776 * NetBSD lstat.  Get file status; this version does not follow links.
1777 */
1778#ifndef _SYS_SYSPROTO_H_
1779struct lstat_args {
1780	char	*path;
1781	struct stat *ub;
1782};
1783#endif
1784/* ARGSUSED */
1785int
1786nlstat(td, uap)
1787	struct thread *td;
1788	register struct nlstat_args /* {
1789		char *path;
1790		struct nstat *ub;
1791	} */ *uap;
1792{
1793	int error;
1794	struct vnode *vp;
1795	struct stat sb;
1796	struct nstat nsb;
1797	struct nameidata nd;
1798
1799	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1800	    uap->path, td);
1801	if ((error = namei(&nd)) != 0)
1802		return (error);
1803	vp = nd.ni_vp;
1804	NDFREE(&nd, NDF_ONLY_PNBUF);
1805	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1806	vput(vp);
1807	if (error)
1808		return (error);
1809	cvtnstat(&sb, &nsb);
1810	error = copyout(&nsb, uap->ub, sizeof (nsb));
1811	return (error);
1812}
1813
1814/*
1815 * Get configurable pathname variables.
1816 */
1817#ifndef _SYS_SYSPROTO_H_
1818struct pathconf_args {
1819	char	*path;
1820	int	name;
1821};
1822#endif
1823/* ARGSUSED */
1824int
1825pathconf(td, uap)
1826	struct thread *td;
1827	register struct pathconf_args /* {
1828		char *path;
1829		int name;
1830	} */ *uap;
1831{
1832	int error;
1833	struct nameidata nd;
1834
1835	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1836	    uap->path, td);
1837	if ((error = namei(&nd)) != 0)
1838		return (error);
1839	NDFREE(&nd, NDF_ONLY_PNBUF);
1840
1841	/* If asynchronous I/O is available, it works for all files. */
1842	if (uap->name == _PC_ASYNC_IO)
1843		td->td_retval[0] = async_io_version;
1844	else
1845		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1846	vput(nd.ni_vp);
1847	return (error);
1848}
1849
1850/*
1851 * Return target name of a symbolic link.
1852 */
1853#ifndef _SYS_SYSPROTO_H_
1854struct readlink_args {
1855	char	*path;
1856	char	*buf;
1857	int	count;
1858};
1859#endif
1860/* ARGSUSED */
1861int
1862readlink(td, uap)
1863	struct thread *td;
1864	register struct readlink_args /* {
1865		char *path;
1866		char *buf;
1867		int count;
1868	} */ *uap;
1869{
1870
1871	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1872	    UIO_USERSPACE, uap->count));
1873}
1874
1875int
1876kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1877    enum uio_seg bufseg, int count)
1878{
1879	register struct vnode *vp;
1880	struct iovec aiov;
1881	struct uio auio;
1882	int error;
1883	struct nameidata nd;
1884
1885	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1886	if ((error = namei(&nd)) != 0)
1887		return (error);
1888	NDFREE(&nd, NDF_ONLY_PNBUF);
1889	vp = nd.ni_vp;
1890#ifdef MAC
1891	error = mac_check_vnode_readlink(td->td_ucred, vp);
1892	if (error) {
1893		vput(vp);
1894		return (error);
1895	}
1896#endif
1897	if (vp->v_type != VLNK)
1898		error = EINVAL;
1899	else {
1900		aiov.iov_base = buf;
1901		aiov.iov_len = count;
1902		auio.uio_iov = &aiov;
1903		auio.uio_iovcnt = 1;
1904		auio.uio_offset = 0;
1905		auio.uio_rw = UIO_READ;
1906		auio.uio_segflg = bufseg;
1907		auio.uio_td = td;
1908		auio.uio_resid = count;
1909		error = VOP_READLINK(vp, &auio, td->td_ucred);
1910	}
1911	vput(vp);
1912	td->td_retval[0] = count - auio.uio_resid;
1913	return (error);
1914}
1915
1916/*
1917 * Common implementation code for chflags() and fchflags().
1918 */
1919static int
1920setfflags(td, vp, flags)
1921	struct thread *td;
1922	struct vnode *vp;
1923	int flags;
1924{
1925	int error;
1926	struct mount *mp;
1927	struct vattr vattr;
1928
1929	/*
1930	 * Prevent non-root users from setting flags on devices.  When
1931	 * a device is reused, users can retain ownership of the device
1932	 * if they are allowed to set flags and programs assume that
1933	 * chown can't fail when done as root.
1934	 */
1935	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1936		error = suser_cred(td->td_ucred, PRISON_ROOT);
1937		if (error)
1938			return (error);
1939	}
1940
1941	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1942		return (error);
1943	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1944	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1945	VATTR_NULL(&vattr);
1946	vattr.va_flags = flags;
1947#ifdef MAC
1948	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1949	if (error == 0)
1950#endif
1951		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1952	VOP_UNLOCK(vp, 0, td);
1953	vn_finished_write(mp);
1954	return (error);
1955}
1956
1957/*
1958 * Change flags of a file given a path name.
1959 */
1960#ifndef _SYS_SYSPROTO_H_
1961struct chflags_args {
1962	char	*path;
1963	int	flags;
1964};
1965#endif
1966/* ARGSUSED */
1967int
1968chflags(td, uap)
1969	struct thread *td;
1970	register struct chflags_args /* {
1971		char *path;
1972		int flags;
1973	} */ *uap;
1974{
1975	int error;
1976	struct nameidata nd;
1977
1978	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
1979	if ((error = namei(&nd)) != 0)
1980		return (error);
1981	NDFREE(&nd, NDF_ONLY_PNBUF);
1982	error = setfflags(td, nd.ni_vp, uap->flags);
1983	vrele(nd.ni_vp);
1984	return error;
1985}
1986
1987/*
1988 * Same as chflags() but doesn't follow symlinks.
1989 */
1990int
1991lchflags(td, uap)
1992	struct thread *td;
1993	register struct lchflags_args /* {
1994		char *path;
1995		int flags;
1996	} */ *uap;
1997{
1998	int error;
1999	struct nameidata nd;
2000
2001	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2002	if ((error = namei(&nd)) != 0)
2003		return (error);
2004	NDFREE(&nd, NDF_ONLY_PNBUF);
2005	error = setfflags(td, nd.ni_vp, uap->flags);
2006	vrele(nd.ni_vp);
2007	return error;
2008}
2009
2010/*
2011 * Change flags of a file given a file descriptor.
2012 */
2013#ifndef _SYS_SYSPROTO_H_
2014struct fchflags_args {
2015	int	fd;
2016	int	flags;
2017};
2018#endif
2019/* ARGSUSED */
2020int
2021fchflags(td, uap)
2022	struct thread *td;
2023	register struct fchflags_args /* {
2024		int fd;
2025		int flags;
2026	} */ *uap;
2027{
2028	struct file *fp;
2029	int error;
2030
2031	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2032		return (error);
2033	error = setfflags(td, fp->f_data, uap->flags);
2034	fdrop(fp, td);
2035	return (error);
2036}
2037
2038/*
2039 * Common implementation code for chmod(), lchmod() and fchmod().
2040 */
2041static int
2042setfmode(td, vp, mode)
2043	struct thread *td;
2044	struct vnode *vp;
2045	int mode;
2046{
2047	int error;
2048	struct mount *mp;
2049	struct vattr vattr;
2050
2051	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2052		return (error);
2053	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2054	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2055	VATTR_NULL(&vattr);
2056	vattr.va_mode = mode & ALLPERMS;
2057#ifdef MAC
2058	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2059	if (error == 0)
2060#endif
2061		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2062	VOP_UNLOCK(vp, 0, td);
2063	vn_finished_write(mp);
2064	return error;
2065}
2066
2067/*
2068 * Change mode of a file given path name.
2069 */
2070#ifndef _SYS_SYSPROTO_H_
2071struct chmod_args {
2072	char	*path;
2073	int	mode;
2074};
2075#endif
2076/* ARGSUSED */
2077int
2078chmod(td, uap)
2079	struct thread *td;
2080	register struct chmod_args /* {
2081		char *path;
2082		int mode;
2083	} */ *uap;
2084{
2085
2086	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2087}
2088
2089int
2090kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2091{
2092	int error;
2093	struct nameidata nd;
2094
2095	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2096	if ((error = namei(&nd)) != 0)
2097		return (error);
2098	NDFREE(&nd, NDF_ONLY_PNBUF);
2099	error = setfmode(td, nd.ni_vp, mode);
2100	vrele(nd.ni_vp);
2101	return error;
2102}
2103
2104/*
2105 * Change mode of a file given path name (don't follow links.)
2106 */
2107#ifndef _SYS_SYSPROTO_H_
2108struct lchmod_args {
2109	char	*path;
2110	int	mode;
2111};
2112#endif
2113/* ARGSUSED */
2114int
2115lchmod(td, uap)
2116	struct thread *td;
2117	register struct lchmod_args /* {
2118		char *path;
2119		int mode;
2120	} */ *uap;
2121{
2122	int error;
2123	struct nameidata nd;
2124
2125	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2126	if ((error = namei(&nd)) != 0)
2127		return (error);
2128	NDFREE(&nd, NDF_ONLY_PNBUF);
2129	error = setfmode(td, nd.ni_vp, uap->mode);
2130	vrele(nd.ni_vp);
2131	return error;
2132}
2133
2134/*
2135 * Change mode of a file given a file descriptor.
2136 */
2137#ifndef _SYS_SYSPROTO_H_
2138struct fchmod_args {
2139	int	fd;
2140	int	mode;
2141};
2142#endif
2143/* ARGSUSED */
2144int
2145fchmod(td, uap)
2146	struct thread *td;
2147	register struct fchmod_args /* {
2148		int fd;
2149		int mode;
2150	} */ *uap;
2151{
2152	struct file *fp;
2153	struct vnode *vp;
2154	int error;
2155
2156	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2157		return (error);
2158	vp = fp->f_data;
2159	error = setfmode(td, fp->f_data, uap->mode);
2160	fdrop(fp, td);
2161	return (error);
2162}
2163
2164/*
2165 * Common implementation for chown(), lchown(), and fchown()
2166 */
2167static int
2168setfown(td, vp, uid, gid)
2169	struct thread *td;
2170	struct vnode *vp;
2171	uid_t uid;
2172	gid_t gid;
2173{
2174	int error;
2175	struct mount *mp;
2176	struct vattr vattr;
2177
2178	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2179		return (error);
2180	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2181	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2182	VATTR_NULL(&vattr);
2183	vattr.va_uid = uid;
2184	vattr.va_gid = gid;
2185#ifdef MAC
2186	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2187	    vattr.va_gid);
2188	if (error == 0)
2189#endif
2190		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2191	VOP_UNLOCK(vp, 0, td);
2192	vn_finished_write(mp);
2193	return error;
2194}
2195
2196/*
2197 * Set ownership given a path name.
2198 */
2199#ifndef _SYS_SYSPROTO_H_
2200struct chown_args {
2201	char	*path;
2202	int	uid;
2203	int	gid;
2204};
2205#endif
2206/* ARGSUSED */
2207int
2208chown(td, uap)
2209	struct thread *td;
2210	register struct chown_args /* {
2211		char *path;
2212		int uid;
2213		int gid;
2214	} */ *uap;
2215{
2216
2217	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2218}
2219
2220int
2221kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2222    int gid)
2223{
2224	int error;
2225	struct nameidata nd;
2226
2227	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2228	if ((error = namei(&nd)) != 0)
2229		return (error);
2230	NDFREE(&nd, NDF_ONLY_PNBUF);
2231	error = setfown(td, nd.ni_vp, uid, gid);
2232	vrele(nd.ni_vp);
2233	return (error);
2234}
2235
2236/*
2237 * Set ownership given a path name, do not cross symlinks.
2238 */
2239#ifndef _SYS_SYSPROTO_H_
2240struct lchown_args {
2241	char	*path;
2242	int	uid;
2243	int	gid;
2244};
2245#endif
2246/* ARGSUSED */
2247int
2248lchown(td, uap)
2249	struct thread *td;
2250	register struct lchown_args /* {
2251		char *path;
2252		int uid;
2253		int gid;
2254	} */ *uap;
2255{
2256
2257	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2258}
2259
2260int
2261kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2262    int gid)
2263{
2264	int error;
2265	struct nameidata nd;
2266
2267	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2268	if ((error = namei(&nd)) != 0)
2269		return (error);
2270	NDFREE(&nd, NDF_ONLY_PNBUF);
2271	error = setfown(td, nd.ni_vp, uid, gid);
2272	vrele(nd.ni_vp);
2273	return (error);
2274}
2275
2276/*
2277 * Set ownership given a file descriptor.
2278 */
2279#ifndef _SYS_SYSPROTO_H_
2280struct fchown_args {
2281	int	fd;
2282	int	uid;
2283	int	gid;
2284};
2285#endif
2286/* ARGSUSED */
2287int
2288fchown(td, uap)
2289	struct thread *td;
2290	register struct fchown_args /* {
2291		int fd;
2292		int uid;
2293		int gid;
2294	} */ *uap;
2295{
2296	struct file *fp;
2297	struct vnode *vp;
2298	int error;
2299
2300	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2301		return (error);
2302	vp = fp->f_data;
2303	error = setfown(td, fp->f_data, uap->uid, uap->gid);
2304	fdrop(fp, td);
2305	return (error);
2306}
2307
2308/*
2309 * Common implementation code for utimes(), lutimes(), and futimes().
2310 */
2311static int
2312getutimes(usrtvp, tvpseg, tsp)
2313	const struct timeval *usrtvp;
2314	enum uio_seg tvpseg;
2315	struct timespec *tsp;
2316{
2317	struct timeval tv[2];
2318	const struct timeval *tvp;
2319	int error;
2320
2321	if (usrtvp == NULL) {
2322		microtime(&tv[0]);
2323		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2324		tsp[1] = tsp[0];
2325	} else {
2326		if (tvpseg == UIO_SYSSPACE) {
2327			tvp = usrtvp;
2328		} else {
2329			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2330				return (error);
2331			tvp = tv;
2332		}
2333
2334		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2335		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2336	}
2337	return 0;
2338}
2339
2340/*
2341 * Common implementation code for utimes(), lutimes(), and futimes().
2342 */
2343static int
2344setutimes(td, vp, ts, numtimes, nullflag)
2345	struct thread *td;
2346	struct vnode *vp;
2347	const struct timespec *ts;
2348	int numtimes;
2349	int nullflag;
2350{
2351	int error, setbirthtime;
2352	struct mount *mp;
2353	struct vattr vattr;
2354
2355	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2356		return (error);
2357	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2358	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2359	setbirthtime = 0;
2360	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2361	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2362		setbirthtime = 1;
2363	VATTR_NULL(&vattr);
2364	vattr.va_atime = ts[0];
2365	vattr.va_mtime = ts[1];
2366	if (setbirthtime)
2367		vattr.va_birthtime = ts[1];
2368	if (numtimes > 2)
2369		vattr.va_birthtime = ts[2];
2370	if (nullflag)
2371		vattr.va_vaflags |= VA_UTIMES_NULL;
2372#ifdef MAC
2373	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2374	    vattr.va_mtime);
2375#endif
2376	if (error == 0)
2377		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2378	VOP_UNLOCK(vp, 0, td);
2379	vn_finished_write(mp);
2380	return error;
2381}
2382
2383/*
2384 * Set the access and modification times of a file.
2385 */
2386#ifndef _SYS_SYSPROTO_H_
2387struct utimes_args {
2388	char	*path;
2389	struct	timeval *tptr;
2390};
2391#endif
2392/* ARGSUSED */
2393int
2394utimes(td, uap)
2395	struct thread *td;
2396	register struct utimes_args /* {
2397		char *path;
2398		struct timeval *tptr;
2399	} */ *uap;
2400{
2401
2402	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2403	    UIO_USERSPACE));
2404}
2405
2406int
2407kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2408    struct timeval *tptr, enum uio_seg tptrseg)
2409{
2410	struct timespec ts[2];
2411	int error;
2412	struct nameidata nd;
2413
2414	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2415		return (error);
2416	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2417	if ((error = namei(&nd)) != 0)
2418		return (error);
2419	NDFREE(&nd, NDF_ONLY_PNBUF);
2420	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2421	vrele(nd.ni_vp);
2422	return (error);
2423}
2424
2425/*
2426 * Set the access and modification times of a file.
2427 */
2428#ifndef _SYS_SYSPROTO_H_
2429struct lutimes_args {
2430	char	*path;
2431	struct	timeval *tptr;
2432};
2433#endif
2434/* ARGSUSED */
2435int
2436lutimes(td, uap)
2437	struct thread *td;
2438	register struct lutimes_args /* {
2439		char *path;
2440		struct timeval *tptr;
2441	} */ *uap;
2442{
2443
2444	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2445	    UIO_USERSPACE));
2446}
2447
2448int
2449kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2450    struct timeval *tptr, enum uio_seg tptrseg)
2451{
2452	struct timespec ts[2];
2453	int error;
2454	struct nameidata nd;
2455
2456	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2457		return (error);
2458	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2459	if ((error = namei(&nd)) != 0)
2460		return (error);
2461	NDFREE(&nd, NDF_ONLY_PNBUF);
2462	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2463	vrele(nd.ni_vp);
2464	return (error);
2465}
2466
2467/*
2468 * Set the access and modification times of a file.
2469 */
2470#ifndef _SYS_SYSPROTO_H_
2471struct futimes_args {
2472	int	fd;
2473	struct	timeval *tptr;
2474};
2475#endif
2476/* ARGSUSED */
2477int
2478futimes(td, uap)
2479	struct thread *td;
2480	register struct futimes_args /* {
2481		int  fd;
2482		struct timeval *tptr;
2483	} */ *uap;
2484{
2485
2486	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2487}
2488
2489int
2490kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2491    enum uio_seg tptrseg)
2492{
2493	struct timespec ts[2];
2494	struct file *fp;
2495	int error;
2496
2497	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2498		return (error);
2499	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2500		return (error);
2501	error = setutimes(td, fp->f_data, ts, 2, tptr == NULL);
2502	fdrop(fp, td);
2503	return (error);
2504}
2505
2506/*
2507 * Truncate a file given its path name.
2508 */
2509#ifndef _SYS_SYSPROTO_H_
2510struct truncate_args {
2511	char	*path;
2512	int	pad;
2513	off_t	length;
2514};
2515#endif
2516/* ARGSUSED */
2517int
2518truncate(td, uap)
2519	struct thread *td;
2520	register struct truncate_args /* {
2521		char *path;
2522		int pad;
2523		off_t length;
2524	} */ *uap;
2525{
2526
2527	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2528}
2529
2530int
2531kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2532{
2533	struct mount *mp;
2534	struct vnode *vp;
2535	struct vattr vattr;
2536	int error;
2537	struct nameidata nd;
2538
2539	if (length < 0)
2540		return(EINVAL);
2541	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2542	if ((error = namei(&nd)) != 0)
2543		return (error);
2544	vp = nd.ni_vp;
2545	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2546		vrele(vp);
2547		return (error);
2548	}
2549	NDFREE(&nd, NDF_ONLY_PNBUF);
2550	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2551	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2552	if (vp->v_type == VDIR)
2553		error = EISDIR;
2554#ifdef MAC
2555	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2556	}
2557#endif
2558	else if ((error = vn_writechk(vp)) == 0 &&
2559	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2560		VATTR_NULL(&vattr);
2561		vattr.va_size = length;
2562		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2563	}
2564	vput(vp);
2565	vn_finished_write(mp);
2566	return (error);
2567}
2568
2569/*
2570 * Truncate a file given a file descriptor.
2571 */
2572#ifndef _SYS_SYSPROTO_H_
2573struct ftruncate_args {
2574	int	fd;
2575	int	pad;
2576	off_t	length;
2577};
2578#endif
2579/* ARGSUSED */
2580int
2581ftruncate(td, uap)
2582	struct thread *td;
2583	register struct ftruncate_args /* {
2584		int fd;
2585		int pad;
2586		off_t length;
2587	} */ *uap;
2588{
2589	struct mount *mp;
2590	struct vattr vattr;
2591	struct vnode *vp;
2592	struct file *fp;
2593	int error;
2594
2595	if (uap->length < 0)
2596		return(EINVAL);
2597	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2598		return (error);
2599	if ((fp->f_flag & FWRITE) == 0) {
2600		fdrop(fp, td);
2601		return (EINVAL);
2602	}
2603	vp = fp->f_data;
2604	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2605		fdrop(fp, td);
2606		return (error);
2607	}
2608	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2609	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2610	if (vp->v_type == VDIR)
2611		error = EISDIR;
2612#ifdef MAC
2613	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2614	    vp))) {
2615	}
2616#endif
2617	else if ((error = vn_writechk(vp)) == 0) {
2618		VATTR_NULL(&vattr);
2619		vattr.va_size = uap->length;
2620		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2621	}
2622	VOP_UNLOCK(vp, 0, td);
2623	vn_finished_write(mp);
2624	fdrop(fp, td);
2625	return (error);
2626}
2627
2628#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2629/*
2630 * Truncate a file given its path name.
2631 */
2632#ifndef _SYS_SYSPROTO_H_
2633struct otruncate_args {
2634	char	*path;
2635	long	length;
2636};
2637#endif
2638/* ARGSUSED */
2639int
2640otruncate(td, uap)
2641	struct thread *td;
2642	register struct otruncate_args /* {
2643		char *path;
2644		long length;
2645	} */ *uap;
2646{
2647	struct truncate_args /* {
2648		char *path;
2649		int pad;
2650		off_t length;
2651	} */ nuap;
2652
2653	nuap.path = uap->path;
2654	nuap.length = uap->length;
2655	return (truncate(td, &nuap));
2656}
2657
2658/*
2659 * Truncate a file given a file descriptor.
2660 */
2661#ifndef _SYS_SYSPROTO_H_
2662struct oftruncate_args {
2663	int	fd;
2664	long	length;
2665};
2666#endif
2667/* ARGSUSED */
2668int
2669oftruncate(td, uap)
2670	struct thread *td;
2671	register struct oftruncate_args /* {
2672		int fd;
2673		long length;
2674	} */ *uap;
2675{
2676	struct ftruncate_args /* {
2677		int fd;
2678		int pad;
2679		off_t length;
2680	} */ nuap;
2681
2682	nuap.fd = uap->fd;
2683	nuap.length = uap->length;
2684	return (ftruncate(td, &nuap));
2685}
2686#endif /* COMPAT_43 || COMPAT_SUNOS */
2687
2688/*
2689 * Sync an open file.
2690 */
2691#ifndef _SYS_SYSPROTO_H_
2692struct fsync_args {
2693	int	fd;
2694};
2695#endif
2696/* ARGSUSED */
2697int
2698fsync(td, uap)
2699	struct thread *td;
2700	struct fsync_args /* {
2701		int fd;
2702	} */ *uap;
2703{
2704	struct vnode *vp;
2705	struct mount *mp;
2706	struct file *fp;
2707	vm_object_t obj;
2708	int error;
2709
2710	GIANT_REQUIRED;
2711
2712	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2713		return (error);
2714	vp = fp->f_data;
2715	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2716		fdrop(fp, td);
2717		return (error);
2718	}
2719	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2720	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2721		vm_object_page_clean(obj, 0, 0, 0);
2722	}
2723	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2724	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2725	    && softdep_fsync_hook != NULL)
2726		error = (*softdep_fsync_hook)(vp);
2727
2728	VOP_UNLOCK(vp, 0, td);
2729	vn_finished_write(mp);
2730	fdrop(fp, td);
2731	return (error);
2732}
2733
2734/*
2735 * Rename files.  Source and destination must either both be directories,
2736 * or both not be directories.  If target is a directory, it must be empty.
2737 */
2738#ifndef _SYS_SYSPROTO_H_
2739struct rename_args {
2740	char	*from;
2741	char	*to;
2742};
2743#endif
2744/* ARGSUSED */
2745int
2746rename(td, uap)
2747	struct thread *td;
2748	register struct rename_args /* {
2749		char *from;
2750		char *to;
2751	} */ *uap;
2752{
2753
2754	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2755}
2756
2757int
2758kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2759{
2760	struct mount *mp = NULL;
2761	struct vnode *tvp, *fvp, *tdvp;
2762	struct nameidata fromnd, tond;
2763	int error;
2764
2765	bwillwrite();
2766#ifdef MAC
2767	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2768	    from, td);
2769#else
2770	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2771#endif
2772	if ((error = namei(&fromnd)) != 0)
2773		return (error);
2774#ifdef MAC
2775	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2776	    fromnd.ni_vp, &fromnd.ni_cnd);
2777	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2778	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2779#endif
2780	fvp = fromnd.ni_vp;
2781	if (error == 0)
2782		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2783	if (error != 0) {
2784		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2785		vrele(fromnd.ni_dvp);
2786		vrele(fvp);
2787		goto out1;
2788	}
2789	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2790	    NOOBJ, pathseg, to, td);
2791	if (fromnd.ni_vp->v_type == VDIR)
2792		tond.ni_cnd.cn_flags |= WILLBEDIR;
2793	if ((error = namei(&tond)) != 0) {
2794		/* Translate error code for rename("dir1", "dir2/."). */
2795		if (error == EISDIR && fvp->v_type == VDIR)
2796			error = EINVAL;
2797		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2798		vrele(fromnd.ni_dvp);
2799		vrele(fvp);
2800		goto out1;
2801	}
2802	tdvp = tond.ni_dvp;
2803	tvp = tond.ni_vp;
2804	if (tvp != NULL) {
2805		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2806			error = ENOTDIR;
2807			goto out;
2808		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2809			error = EISDIR;
2810			goto out;
2811		}
2812	}
2813	if (fvp == tdvp)
2814		error = EINVAL;
2815	/*
2816	 * If the source is the same as the destination (that is, if they
2817	 * are links to the same vnode), then there is nothing to do.
2818	 */
2819	if (fvp == tvp)
2820		error = -1;
2821#ifdef MAC
2822	else
2823		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2824		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2825#endif
2826out:
2827	if (!error) {
2828		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2829		if (fromnd.ni_dvp != tdvp) {
2830			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2831		}
2832		if (tvp) {
2833			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2834		}
2835		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2836				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2837		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2838		NDFREE(&tond, NDF_ONLY_PNBUF);
2839	} else {
2840		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2841		NDFREE(&tond, NDF_ONLY_PNBUF);
2842		if (tdvp == tvp)
2843			vrele(tdvp);
2844		else
2845			vput(tdvp);
2846		if (tvp)
2847			vput(tvp);
2848		vrele(fromnd.ni_dvp);
2849		vrele(fvp);
2850	}
2851	vrele(tond.ni_startdir);
2852	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2853	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2854	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2855	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2856out1:
2857	vn_finished_write(mp);
2858	if (fromnd.ni_startdir)
2859		vrele(fromnd.ni_startdir);
2860	if (error == -1)
2861		return (0);
2862	return (error);
2863}
2864
2865/*
2866 * Make a directory file.
2867 */
2868#ifndef _SYS_SYSPROTO_H_
2869struct mkdir_args {
2870	char	*path;
2871	int	mode;
2872};
2873#endif
2874/* ARGSUSED */
2875int
2876mkdir(td, uap)
2877	struct thread *td;
2878	register struct mkdir_args /* {
2879		char *path;
2880		int mode;
2881	} */ *uap;
2882{
2883
2884	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2885}
2886
2887int
2888kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2889{
2890	struct mount *mp;
2891	struct vnode *vp;
2892	struct vattr vattr;
2893	int error;
2894	struct nameidata nd;
2895
2896restart:
2897	bwillwrite();
2898	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2899	nd.ni_cnd.cn_flags |= WILLBEDIR;
2900	if ((error = namei(&nd)) != 0)
2901		return (error);
2902	vp = nd.ni_vp;
2903	if (vp != NULL) {
2904		NDFREE(&nd, NDF_ONLY_PNBUF);
2905		vrele(vp);
2906		/*
2907		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2908		 * the strange behaviour of leaving the vnode unlocked
2909		 * if the target is the same vnode as the parent.
2910		 */
2911		if (vp == nd.ni_dvp)
2912			vrele(nd.ni_dvp);
2913		else
2914			vput(nd.ni_dvp);
2915		return (EEXIST);
2916	}
2917	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2918		NDFREE(&nd, NDF_ONLY_PNBUF);
2919		vput(nd.ni_dvp);
2920		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2921			return (error);
2922		goto restart;
2923	}
2924	VATTR_NULL(&vattr);
2925	vattr.va_type = VDIR;
2926	FILEDESC_LOCK(td->td_proc->p_fd);
2927	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2928	FILEDESC_UNLOCK(td->td_proc->p_fd);
2929#ifdef MAC
2930	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2931	    &vattr);
2932	if (error)
2933		goto out;
2934#endif
2935	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2936	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2937#ifdef MAC
2938out:
2939#endif
2940	NDFREE(&nd, NDF_ONLY_PNBUF);
2941	vput(nd.ni_dvp);
2942	if (!error)
2943		vput(nd.ni_vp);
2944	vn_finished_write(mp);
2945	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2946	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2947	return (error);
2948}
2949
2950/*
2951 * Remove a directory file.
2952 */
2953#ifndef _SYS_SYSPROTO_H_
2954struct rmdir_args {
2955	char	*path;
2956};
2957#endif
2958/* ARGSUSED */
2959int
2960rmdir(td, uap)
2961	struct thread *td;
2962	struct rmdir_args /* {
2963		char *path;
2964	} */ *uap;
2965{
2966
2967	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
2968}
2969
2970int
2971kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
2972{
2973	struct mount *mp;
2974	struct vnode *vp;
2975	int error;
2976	struct nameidata nd;
2977
2978restart:
2979	bwillwrite();
2980	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
2981	if ((error = namei(&nd)) != 0)
2982		return (error);
2983	vp = nd.ni_vp;
2984	if (vp->v_type != VDIR) {
2985		error = ENOTDIR;
2986		goto out;
2987	}
2988	/*
2989	 * No rmdir "." please.
2990	 */
2991	if (nd.ni_dvp == vp) {
2992		error = EINVAL;
2993		goto out;
2994	}
2995	/*
2996	 * The root of a mounted filesystem cannot be deleted.
2997	 */
2998	if (vp->v_vflag & VV_ROOT) {
2999		error = EBUSY;
3000		goto out;
3001	}
3002#ifdef MAC
3003	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3004	    &nd.ni_cnd);
3005	if (error)
3006		goto out;
3007#endif
3008	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3009		NDFREE(&nd, NDF_ONLY_PNBUF);
3010		if (nd.ni_dvp == vp)
3011			vrele(nd.ni_dvp);
3012		else
3013			vput(nd.ni_dvp);
3014		vput(vp);
3015		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3016			return (error);
3017		goto restart;
3018	}
3019	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3020	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3021	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3022	vn_finished_write(mp);
3023out:
3024	NDFREE(&nd, NDF_ONLY_PNBUF);
3025	if (nd.ni_dvp == vp)
3026		vrele(nd.ni_dvp);
3027	else
3028		vput(nd.ni_dvp);
3029	vput(vp);
3030	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3031	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3032	return (error);
3033}
3034
3035#ifdef COMPAT_43
3036/*
3037 * Read a block of directory entries in a filesystem independent format.
3038 */
3039#ifndef _SYS_SYSPROTO_H_
3040struct ogetdirentries_args {
3041	int	fd;
3042	char	*buf;
3043	u_int	count;
3044	long	*basep;
3045};
3046#endif
3047int
3048ogetdirentries(td, uap)
3049	struct thread *td;
3050	register struct ogetdirentries_args /* {
3051		int fd;
3052		char *buf;
3053		u_int count;
3054		long *basep;
3055	} */ *uap;
3056{
3057	struct vnode *vp;
3058	struct file *fp;
3059	struct uio auio, kuio;
3060	struct iovec aiov, kiov;
3061	struct dirent *dp, *edp;
3062	caddr_t dirbuf;
3063	int error, eofflag, readcnt;
3064	long loff;
3065
3066	/* XXX arbitrary sanity limit on `count'. */
3067	if (uap->count > 64 * 1024)
3068		return (EINVAL);
3069	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3070		return (error);
3071	if ((fp->f_flag & FREAD) == 0) {
3072		fdrop(fp, td);
3073		return (EBADF);
3074	}
3075	vp = fp->f_data;
3076unionread:
3077	if (vp->v_type != VDIR) {
3078		fdrop(fp, td);
3079		return (EINVAL);
3080	}
3081	aiov.iov_base = uap->buf;
3082	aiov.iov_len = uap->count;
3083	auio.uio_iov = &aiov;
3084	auio.uio_iovcnt = 1;
3085	auio.uio_rw = UIO_READ;
3086	auio.uio_segflg = UIO_USERSPACE;
3087	auio.uio_td = td;
3088	auio.uio_resid = uap->count;
3089	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3090	loff = auio.uio_offset = fp->f_offset;
3091#ifdef MAC
3092	error = mac_check_vnode_readdir(td->td_ucred, vp);
3093	if (error) {
3094		VOP_UNLOCK(vp, 0, td);
3095		fdrop(fp, td);
3096		return (error);
3097	}
3098#endif
3099#	if (BYTE_ORDER != LITTLE_ENDIAN)
3100		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3101			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3102			    NULL, NULL);
3103			fp->f_offset = auio.uio_offset;
3104		} else
3105#	endif
3106	{
3107		kuio = auio;
3108		kuio.uio_iov = &kiov;
3109		kuio.uio_segflg = UIO_SYSSPACE;
3110		kiov.iov_len = uap->count;
3111		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3112		kiov.iov_base = dirbuf;
3113		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3114			    NULL, NULL);
3115		fp->f_offset = kuio.uio_offset;
3116		if (error == 0) {
3117			readcnt = uap->count - kuio.uio_resid;
3118			edp = (struct dirent *)&dirbuf[readcnt];
3119			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3120#				if (BYTE_ORDER == LITTLE_ENDIAN)
3121					/*
3122					 * The expected low byte of
3123					 * dp->d_namlen is our dp->d_type.
3124					 * The high MBZ byte of dp->d_namlen
3125					 * is our dp->d_namlen.
3126					 */
3127					dp->d_type = dp->d_namlen;
3128					dp->d_namlen = 0;
3129#				else
3130					/*
3131					 * The dp->d_type is the high byte
3132					 * of the expected dp->d_namlen,
3133					 * so must be zero'ed.
3134					 */
3135					dp->d_type = 0;
3136#				endif
3137				if (dp->d_reclen > 0) {
3138					dp = (struct dirent *)
3139					    ((char *)dp + dp->d_reclen);
3140				} else {
3141					error = EIO;
3142					break;
3143				}
3144			}
3145			if (dp >= edp)
3146				error = uiomove(dirbuf, readcnt, &auio);
3147		}
3148		FREE(dirbuf, M_TEMP);
3149	}
3150	VOP_UNLOCK(vp, 0, td);
3151	if (error) {
3152		fdrop(fp, td);
3153		return (error);
3154	}
3155	if (uap->count == auio.uio_resid) {
3156		if (union_dircheckp) {
3157			error = union_dircheckp(td, &vp, fp);
3158			if (error == -1)
3159				goto unionread;
3160			if (error) {
3161				fdrop(fp, td);
3162				return (error);
3163			}
3164		}
3165		/*
3166		 * XXX We could delay dropping the lock above but
3167		 * union_dircheckp complicates things.
3168		 */
3169		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3170		if ((vp->v_vflag & VV_ROOT) &&
3171		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3172			struct vnode *tvp = vp;
3173			vp = vp->v_mount->mnt_vnodecovered;
3174			VREF(vp);
3175			fp->f_data = vp;
3176			fp->f_offset = 0;
3177			vput(tvp);
3178			goto unionread;
3179		}
3180		VOP_UNLOCK(vp, 0, td);
3181	}
3182	error = copyout(&loff, uap->basep, sizeof(long));
3183	fdrop(fp, td);
3184	td->td_retval[0] = uap->count - auio.uio_resid;
3185	return (error);
3186}
3187#endif /* COMPAT_43 */
3188
3189/*
3190 * Read a block of directory entries in a filesystem independent format.
3191 */
3192#ifndef _SYS_SYSPROTO_H_
3193struct getdirentries_args {
3194	int	fd;
3195	char	*buf;
3196	u_int	count;
3197	long	*basep;
3198};
3199#endif
3200int
3201getdirentries(td, uap)
3202	struct thread *td;
3203	register struct getdirentries_args /* {
3204		int fd;
3205		char *buf;
3206		u_int count;
3207		long *basep;
3208	} */ *uap;
3209{
3210	struct vnode *vp;
3211	struct file *fp;
3212	struct uio auio;
3213	struct iovec aiov;
3214	long loff;
3215	int error, eofflag;
3216
3217	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3218		return (error);
3219	if ((fp->f_flag & FREAD) == 0) {
3220		fdrop(fp, td);
3221		return (EBADF);
3222	}
3223	vp = fp->f_data;
3224unionread:
3225	if (vp->v_type != VDIR) {
3226		fdrop(fp, td);
3227		return (EINVAL);
3228	}
3229	aiov.iov_base = uap->buf;
3230	aiov.iov_len = uap->count;
3231	auio.uio_iov = &aiov;
3232	auio.uio_iovcnt = 1;
3233	auio.uio_rw = UIO_READ;
3234	auio.uio_segflg = UIO_USERSPACE;
3235	auio.uio_td = td;
3236	auio.uio_resid = uap->count;
3237	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3238	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3239	loff = auio.uio_offset = fp->f_offset;
3240#ifdef MAC
3241	error = mac_check_vnode_readdir(td->td_ucred, vp);
3242	if (error == 0)
3243#endif
3244		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3245		    NULL);
3246	fp->f_offset = auio.uio_offset;
3247	VOP_UNLOCK(vp, 0, td);
3248	if (error) {
3249		fdrop(fp, td);
3250		return (error);
3251	}
3252	if (uap->count == auio.uio_resid) {
3253		if (union_dircheckp) {
3254			error = union_dircheckp(td, &vp, fp);
3255			if (error == -1)
3256				goto unionread;
3257			if (error) {
3258				fdrop(fp, td);
3259				return (error);
3260			}
3261		}
3262		/*
3263		 * XXX We could delay dropping the lock above but
3264		 * union_dircheckp complicates things.
3265		 */
3266		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3267		if ((vp->v_vflag & VV_ROOT) &&
3268		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3269			struct vnode *tvp = vp;
3270			vp = vp->v_mount->mnt_vnodecovered;
3271			VREF(vp);
3272			fp->f_data = vp;
3273			fp->f_offset = 0;
3274			vput(tvp);
3275			goto unionread;
3276		}
3277		VOP_UNLOCK(vp, 0, td);
3278	}
3279	if (uap->basep != NULL) {
3280		error = copyout(&loff, uap->basep, sizeof(long));
3281	}
3282	td->td_retval[0] = uap->count - auio.uio_resid;
3283	fdrop(fp, td);
3284	return (error);
3285}
3286#ifndef _SYS_SYSPROTO_H_
3287struct getdents_args {
3288	int fd;
3289	char *buf;
3290	size_t count;
3291};
3292#endif
3293int
3294getdents(td, uap)
3295	struct thread *td;
3296	register struct getdents_args /* {
3297		int fd;
3298		char *buf;
3299		u_int count;
3300	} */ *uap;
3301{
3302	struct getdirentries_args ap;
3303	ap.fd = uap->fd;
3304	ap.buf = uap->buf;
3305	ap.count = uap->count;
3306	ap.basep = NULL;
3307	return getdirentries(td, &ap);
3308}
3309
3310/*
3311 * Set the mode mask for creation of filesystem nodes.
3312 *
3313 * MP SAFE
3314 */
3315#ifndef _SYS_SYSPROTO_H_
3316struct umask_args {
3317	int	newmask;
3318};
3319#endif
3320int
3321umask(td, uap)
3322	struct thread *td;
3323	struct umask_args /* {
3324		int newmask;
3325	} */ *uap;
3326{
3327	register struct filedesc *fdp;
3328
3329	FILEDESC_LOCK(td->td_proc->p_fd);
3330	fdp = td->td_proc->p_fd;
3331	td->td_retval[0] = fdp->fd_cmask;
3332	fdp->fd_cmask = uap->newmask & ALLPERMS;
3333	FILEDESC_UNLOCK(td->td_proc->p_fd);
3334	return (0);
3335}
3336
3337/*
3338 * Void all references to file by ripping underlying filesystem
3339 * away from vnode.
3340 */
3341#ifndef _SYS_SYSPROTO_H_
3342struct revoke_args {
3343	char	*path;
3344};
3345#endif
3346/* ARGSUSED */
3347int
3348revoke(td, uap)
3349	struct thread *td;
3350	register struct revoke_args /* {
3351		char *path;
3352	} */ *uap;
3353{
3354	struct mount *mp;
3355	struct vnode *vp;
3356	struct vattr vattr;
3357	int error;
3358	struct nameidata nd;
3359
3360	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3361	if ((error = namei(&nd)) != 0)
3362		return (error);
3363	vp = nd.ni_vp;
3364	NDFREE(&nd, NDF_ONLY_PNBUF);
3365	if (vp->v_type != VCHR) {
3366		vput(vp);
3367		return (EINVAL);
3368	}
3369#ifdef MAC
3370	error = mac_check_vnode_revoke(td->td_ucred, vp);
3371	if (error) {
3372		vput(vp);
3373		return (error);
3374	}
3375#endif
3376	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3377	if (error) {
3378		vput(vp);
3379		return (error);
3380	}
3381	VOP_UNLOCK(vp, 0, td);
3382	if (td->td_ucred->cr_uid != vattr.va_uid) {
3383		error = suser_cred(td->td_ucred, PRISON_ROOT);
3384		if (error)
3385			goto out;
3386	}
3387	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3388		goto out;
3389	if (vcount(vp) > 1)
3390		VOP_REVOKE(vp, REVOKEALL);
3391	vn_finished_write(mp);
3392out:
3393	vrele(vp);
3394	return (error);
3395}
3396
3397/*
3398 * Convert a user file descriptor to a kernel file entry.
3399 * The file entry is locked upon returning.
3400 */
3401int
3402getvnode(fdp, fd, fpp)
3403	struct filedesc *fdp;
3404	int fd;
3405	struct file **fpp;
3406{
3407	int error;
3408	struct file *fp;
3409
3410	fp = NULL;
3411	if (fdp == NULL)
3412		error = EBADF;
3413	else {
3414		FILEDESC_LOCK(fdp);
3415		if ((u_int)fd >= fdp->fd_nfiles ||
3416		    (fp = fdp->fd_ofiles[fd]) == NULL)
3417			error = EBADF;
3418		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3419			fp = NULL;
3420			error = EINVAL;
3421		} else {
3422			fhold(fp);
3423			error = 0;
3424		}
3425		FILEDESC_UNLOCK(fdp);
3426	}
3427	*fpp = fp;
3428	return (error);
3429}
3430/*
3431 * Get (NFS) file handle
3432 */
3433#ifndef _SYS_SYSPROTO_H_
3434struct getfh_args {
3435	char	*fname;
3436	fhandle_t *fhp;
3437};
3438#endif
3439int
3440getfh(td, uap)
3441	struct thread *td;
3442	register struct getfh_args *uap;
3443{
3444	struct nameidata nd;
3445	fhandle_t fh;
3446	register struct vnode *vp;
3447	int error;
3448
3449	/*
3450	 * Must be super user
3451	 */
3452	error = suser(td);
3453	if (error)
3454		return (error);
3455	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3456	error = namei(&nd);
3457	if (error)
3458		return (error);
3459	NDFREE(&nd, NDF_ONLY_PNBUF);
3460	vp = nd.ni_vp;
3461	bzero(&fh, sizeof(fh));
3462	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3463	error = VFS_VPTOFH(vp, &fh.fh_fid);
3464	vput(vp);
3465	if (error)
3466		return (error);
3467	error = copyout(&fh, uap->fhp, sizeof (fh));
3468	return (error);
3469}
3470
3471/*
3472 * syscall for the rpc.lockd to use to translate a NFS file handle into
3473 * an open descriptor.
3474 *
3475 * warning: do not remove the suser() call or this becomes one giant
3476 * security hole.
3477 */
3478#ifndef _SYS_SYSPROTO_H_
3479struct fhopen_args {
3480	const struct fhandle *u_fhp;
3481	int flags;
3482};
3483#endif
3484int
3485fhopen(td, uap)
3486	struct thread *td;
3487	struct fhopen_args /* {
3488		const struct fhandle *u_fhp;
3489		int flags;
3490	} */ *uap;
3491{
3492	struct proc *p = td->td_proc;
3493	struct mount *mp;
3494	struct vnode *vp;
3495	struct fhandle fhp;
3496	struct vattr vat;
3497	struct vattr *vap = &vat;
3498	struct flock lf;
3499	struct file *fp;
3500	register struct filedesc *fdp = p->p_fd;
3501	int fmode, mode, error, type;
3502	struct file *nfp;
3503	int indx;
3504
3505	/*
3506	 * Must be super user
3507	 */
3508	error = suser(td);
3509	if (error)
3510		return (error);
3511
3512	fmode = FFLAGS(uap->flags);
3513	/* why not allow a non-read/write open for our lockd? */
3514	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3515		return (EINVAL);
3516	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3517	if (error)
3518		return(error);
3519	/* find the mount point */
3520	mp = vfs_getvfs(&fhp.fh_fsid);
3521	if (mp == NULL)
3522		return (ESTALE);
3523	/* now give me my vnode, it gets returned to me locked */
3524	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3525	if (error)
3526		return (error);
3527 	/*
3528	 * from now on we have to make sure not
3529	 * to forget about the vnode
3530	 * any error that causes an abort must vput(vp)
3531	 * just set error = err and 'goto bad;'.
3532	 */
3533
3534	/*
3535	 * from vn_open
3536	 */
3537	if (vp->v_type == VLNK) {
3538		error = EMLINK;
3539		goto bad;
3540	}
3541	if (vp->v_type == VSOCK) {
3542		error = EOPNOTSUPP;
3543		goto bad;
3544	}
3545	mode = 0;
3546	if (fmode & (FWRITE | O_TRUNC)) {
3547		if (vp->v_type == VDIR) {
3548			error = EISDIR;
3549			goto bad;
3550		}
3551		error = vn_writechk(vp);
3552		if (error)
3553			goto bad;
3554		mode |= VWRITE;
3555	}
3556	if (fmode & FREAD)
3557		mode |= VREAD;
3558	if (fmode & O_APPEND)
3559		mode |= VAPPEND;
3560#ifdef MAC
3561	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3562	if (error)
3563		goto bad;
3564#endif
3565	if (mode) {
3566		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3567		if (error)
3568			goto bad;
3569	}
3570	if (fmode & O_TRUNC) {
3571		VOP_UNLOCK(vp, 0, td);				/* XXX */
3572		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3573			vrele(vp);
3574			return (error);
3575		}
3576		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3577		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3578#ifdef MAC
3579		/*
3580		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3581		 * should be right.
3582		 */
3583		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3584		if (error == 0) {
3585#endif
3586			VATTR_NULL(vap);
3587			vap->va_size = 0;
3588			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3589#ifdef MAC
3590		}
3591#endif
3592		vn_finished_write(mp);
3593		if (error)
3594			goto bad;
3595	}
3596	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3597	if (error)
3598		goto bad;
3599	/*
3600	 * Make sure that a VM object is created for VMIO support.
3601	 */
3602	if (vn_canvmio(vp) == TRUE) {
3603		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3604			goto bad;
3605	}
3606	if (fmode & FWRITE)
3607		vp->v_writecount++;
3608
3609	/*
3610	 * end of vn_open code
3611	 */
3612
3613	if ((error = falloc(td, &nfp, &indx)) != 0) {
3614		if (fmode & FWRITE)
3615			vp->v_writecount--;
3616		goto bad;
3617	}
3618	fp = nfp;
3619
3620	/*
3621	 * Hold an extra reference to avoid having fp ripped out
3622	 * from under us while we block in the lock op
3623	 */
3624	fhold(fp);
3625	nfp->f_data = vp;
3626	nfp->f_flag = fmode & FMASK;
3627	nfp->f_ops = &vnops;
3628	nfp->f_type = DTYPE_VNODE;
3629	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3630		lf.l_whence = SEEK_SET;
3631		lf.l_start = 0;
3632		lf.l_len = 0;
3633		if (fmode & O_EXLOCK)
3634			lf.l_type = F_WRLCK;
3635		else
3636			lf.l_type = F_RDLCK;
3637		type = F_FLOCK;
3638		if ((fmode & FNONBLOCK) == 0)
3639			type |= F_WAIT;
3640		VOP_UNLOCK(vp, 0, td);
3641		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3642			    type)) != 0) {
3643			/*
3644			 * The lock request failed.  Normally close the
3645			 * descriptor but handle the case where someone might
3646			 * have dup()d or close()d it when we weren't looking.
3647			 */
3648			FILEDESC_LOCK(fdp);
3649			if (fdp->fd_ofiles[indx] == fp) {
3650				fdp->fd_ofiles[indx] = NULL;
3651				FILEDESC_UNLOCK(fdp);
3652				fdrop(fp, td);
3653			} else
3654				FILEDESC_UNLOCK(fdp);
3655			/*
3656			 * release our private reference
3657			 */
3658			fdrop(fp, td);
3659			return(error);
3660		}
3661		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3662		fp->f_flag |= FHASLOCK;
3663	}
3664	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3665		vfs_object_create(vp, td, td->td_ucred);
3666
3667	VOP_UNLOCK(vp, 0, td);
3668	fdrop(fp, td);
3669	td->td_retval[0] = indx;
3670	return (0);
3671
3672bad:
3673	vput(vp);
3674	return (error);
3675}
3676
3677/*
3678 * Stat an (NFS) file handle.
3679 */
3680#ifndef _SYS_SYSPROTO_H_
3681struct fhstat_args {
3682	struct fhandle *u_fhp;
3683	struct stat *sb;
3684};
3685#endif
3686int
3687fhstat(td, uap)
3688	struct thread *td;
3689	register struct fhstat_args /* {
3690		struct fhandle *u_fhp;
3691		struct stat *sb;
3692	} */ *uap;
3693{
3694	struct stat sb;
3695	fhandle_t fh;
3696	struct mount *mp;
3697	struct vnode *vp;
3698	int error;
3699
3700	/*
3701	 * Must be super user
3702	 */
3703	error = suser(td);
3704	if (error)
3705		return (error);
3706
3707	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3708	if (error)
3709		return (error);
3710
3711	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3712		return (ESTALE);
3713	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3714		return (error);
3715	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3716	vput(vp);
3717	if (error)
3718		return (error);
3719	error = copyout(&sb, uap->sb, sizeof(sb));
3720	return (error);
3721}
3722
3723/*
3724 * Implement fstatfs() for (NFS) file handles.
3725 */
3726#ifndef _SYS_SYSPROTO_H_
3727struct fhstatfs_args {
3728	struct fhandle *u_fhp;
3729	struct statfs *buf;
3730};
3731#endif
3732int
3733fhstatfs(td, uap)
3734	struct thread *td;
3735	struct fhstatfs_args /* {
3736		struct fhandle *u_fhp;
3737		struct statfs *buf;
3738	} */ *uap;
3739{
3740	struct statfs *sp;
3741	struct mount *mp;
3742	struct vnode *vp;
3743	struct statfs sb;
3744	fhandle_t fh;
3745	int error;
3746
3747	/*
3748	 * Must be super user
3749	 */
3750	error = suser(td);
3751	if (error)
3752		return (error);
3753
3754	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3755		return (error);
3756
3757	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3758		return (ESTALE);
3759	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3760		return (error);
3761	mp = vp->v_mount;
3762	sp = &mp->mnt_stat;
3763	vput(vp);
3764#ifdef MAC
3765	error = mac_check_mount_stat(td->td_ucred, mp);
3766	if (error)
3767		return (error);
3768#endif
3769	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3770		return (error);
3771	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3772	if (suser(td)) {
3773		bcopy(sp, &sb, sizeof(sb));
3774		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3775		sp = &sb;
3776	}
3777	return (copyout(sp, uap->buf, sizeof(*sp)));
3778}
3779
3780/*
3781 * Syscall to push extended attribute configuration information into the
3782 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3783 * a command (int cmd), and attribute name and misc data.  For now, the
3784 * attribute name is left in userspace for consumption by the VFS_op.
3785 * It will probably be changed to be copied into sysspace by the
3786 * syscall in the future, once issues with various consumers of the
3787 * attribute code have raised their hands.
3788 *
3789 * Currently this is used only by UFS Extended Attributes.
3790 */
3791int
3792extattrctl(td, uap)
3793	struct thread *td;
3794	struct extattrctl_args /* {
3795		const char *path;
3796		int cmd;
3797		const char *filename;
3798		int attrnamespace;
3799		const char *attrname;
3800	} */ *uap;
3801{
3802	struct vnode *filename_vp;
3803	struct nameidata nd;
3804	struct mount *mp, *mp_writable;
3805	char attrname[EXTATTR_MAXNAMELEN];
3806	int error;
3807
3808	/*
3809	 * uap->attrname is not always defined.  We check again later when we
3810	 * invoke the VFS call so as to pass in NULL there if needed.
3811	 */
3812	if (uap->attrname != NULL) {
3813		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3814		    NULL);
3815		if (error)
3816			return (error);
3817	}
3818
3819	/*
3820	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3821	 * which VFS_EXTATTRCTL() will later release.
3822	 */
3823	filename_vp = NULL;
3824	if (uap->filename != NULL) {
3825		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3826		    uap->filename, td);
3827		error = namei(&nd);
3828		if (error)
3829			return (error);
3830		filename_vp = nd.ni_vp;
3831		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3832	}
3833
3834	/* uap->path is always defined. */
3835	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3836	error = namei(&nd);
3837	if (error) {
3838		if (filename_vp != NULL)
3839			vput(filename_vp);
3840		return (error);
3841	}
3842	mp = nd.ni_vp->v_mount;
3843	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3844	NDFREE(&nd, 0);
3845	if (error) {
3846		if (filename_vp != NULL)
3847			vput(filename_vp);
3848		return (error);
3849	}
3850
3851	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3852	    uap->attrname != NULL ? attrname : NULL, td);
3853
3854	vn_finished_write(mp_writable);
3855	/*
3856	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3857	 * filename_vp, so vrele it if it is defined.
3858	 */
3859	if (filename_vp != NULL)
3860		vrele(filename_vp);
3861	return (error);
3862}
3863
3864/*-
3865 * Set a named extended attribute on a file or directory
3866 *
3867 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3868 *            kernelspace string pointer "attrname", userspace buffer
3869 *            pointer "data", buffer length "nbytes", thread "td".
3870 * Returns: 0 on success, an error number otherwise
3871 * Locks: none
3872 * References: vp must be a valid reference for the duration of the call
3873 */
3874static int
3875extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3876    void *data, size_t nbytes, struct thread *td)
3877{
3878	struct mount *mp;
3879	struct uio auio;
3880	struct iovec aiov;
3881	ssize_t cnt;
3882	int error;
3883
3884	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3885	if (error)
3886		return (error);
3887	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3888	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3889
3890	aiov.iov_base = data;
3891	aiov.iov_len = nbytes;
3892	auio.uio_iov = &aiov;
3893	auio.uio_iovcnt = 1;
3894	auio.uio_offset = 0;
3895	if (nbytes > INT_MAX) {
3896		error = EINVAL;
3897		goto done;
3898	}
3899	auio.uio_resid = nbytes;
3900	auio.uio_rw = UIO_WRITE;
3901	auio.uio_segflg = UIO_USERSPACE;
3902	auio.uio_td = td;
3903	cnt = nbytes;
3904
3905#ifdef MAC
3906	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3907	    attrname, &auio);
3908	if (error)
3909		goto done;
3910#endif
3911
3912	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3913	    td->td_ucred, td);
3914	cnt -= auio.uio_resid;
3915	td->td_retval[0] = cnt;
3916
3917done:
3918	VOP_UNLOCK(vp, 0, td);
3919	vn_finished_write(mp);
3920	return (error);
3921}
3922
3923int
3924extattr_set_fd(td, uap)
3925	struct thread *td;
3926	struct extattr_set_fd_args /* {
3927		int fd;
3928		int attrnamespace;
3929		const char *attrname;
3930		void *data;
3931		size_t nbytes;
3932	} */ *uap;
3933{
3934	struct file *fp;
3935	char attrname[EXTATTR_MAXNAMELEN];
3936	int error;
3937
3938	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3939	if (error)
3940		return (error);
3941
3942	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
3943	if (error)
3944		return (error);
3945
3946	error = extattr_set_vp(fp->f_data, uap->attrnamespace,
3947	    attrname, uap->data, uap->nbytes, td);
3948	fdrop(fp, td);
3949
3950	return (error);
3951}
3952
3953int
3954extattr_set_file(td, uap)
3955	struct thread *td;
3956	struct extattr_set_file_args /* {
3957		const char *path;
3958		int attrnamespace;
3959		const char *attrname;
3960		void *data;
3961		size_t nbytes;
3962	} */ *uap;
3963{
3964	struct nameidata nd;
3965	char attrname[EXTATTR_MAXNAMELEN];
3966	int error;
3967
3968	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3969	if (error)
3970		return (error);
3971
3972	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3973	error = namei(&nd);
3974	if (error)
3975		return (error);
3976	NDFREE(&nd, NDF_ONLY_PNBUF);
3977
3978	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
3979	    uap->data, uap->nbytes, td);
3980
3981	vrele(nd.ni_vp);
3982	return (error);
3983}
3984
3985int
3986extattr_set_link(td, uap)
3987	struct thread *td;
3988	struct extattr_set_link_args /* {
3989		const char *path;
3990		int attrnamespace;
3991		const char *attrname;
3992		void *data;
3993		size_t nbytes;
3994	} */ *uap;
3995{
3996	struct nameidata nd;
3997	char attrname[EXTATTR_MAXNAMELEN];
3998	int error;
3999
4000	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4001	if (error)
4002		return (error);
4003
4004	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4005	error = namei(&nd);
4006	if (error)
4007		return (error);
4008	NDFREE(&nd, NDF_ONLY_PNBUF);
4009
4010	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4011	    uap->data, uap->nbytes, td);
4012
4013	vrele(nd.ni_vp);
4014	return (error);
4015}
4016
4017/*-
4018 * Get a named extended attribute on a file or directory
4019 *
4020 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4021 *            kernelspace string pointer "attrname", userspace buffer
4022 *            pointer "data", buffer length "nbytes", thread "td".
4023 * Returns: 0 on success, an error number otherwise
4024 * Locks: none
4025 * References: vp must be a valid reference for the duration of the call
4026 */
4027static int
4028extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4029    void *data, size_t nbytes, struct thread *td)
4030{
4031	struct uio auio, *auiop;
4032	struct iovec aiov;
4033	ssize_t cnt;
4034	size_t size, *sizep;
4035	int error;
4036
4037	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4038	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4039
4040	/*
4041	 * Slightly unusual semantics: if the user provides a NULL data
4042	 * pointer, they don't want to receive the data, just the
4043	 * maximum read length.
4044	 */
4045	auiop = NULL;
4046	sizep = NULL;
4047	cnt = 0;
4048	if (data != NULL) {
4049		aiov.iov_base = data;
4050		aiov.iov_len = nbytes;
4051		auio.uio_iov = &aiov;
4052		auio.uio_offset = 0;
4053		if (nbytes > INT_MAX) {
4054			error = EINVAL;
4055			goto done;
4056		}
4057		auio.uio_resid = nbytes;
4058		auio.uio_rw = UIO_READ;
4059		auio.uio_segflg = UIO_USERSPACE;
4060		auio.uio_td = td;
4061		auiop = &auio;
4062		cnt = nbytes;
4063	} else
4064		sizep = &size;
4065
4066#ifdef MAC
4067	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4068	    attrname, &auio);
4069	if (error)
4070		goto done;
4071#endif
4072
4073	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4074	    td->td_ucred, td);
4075
4076	if (auiop != NULL) {
4077		cnt -= auio.uio_resid;
4078		td->td_retval[0] = cnt;
4079	} else
4080		td->td_retval[0] = size;
4081
4082done:
4083	VOP_UNLOCK(vp, 0, td);
4084	return (error);
4085}
4086
4087int
4088extattr_get_fd(td, uap)
4089	struct thread *td;
4090	struct extattr_get_fd_args /* {
4091		int fd;
4092		int attrnamespace;
4093		const char *attrname;
4094		void *data;
4095		size_t nbytes;
4096	} */ *uap;
4097{
4098	struct file *fp;
4099	char attrname[EXTATTR_MAXNAMELEN];
4100	int error;
4101
4102	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4103	if (error)
4104		return (error);
4105
4106	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4107	if (error)
4108		return (error);
4109
4110	error = extattr_get_vp(fp->f_data, uap->attrnamespace,
4111	    attrname, uap->data, uap->nbytes, td);
4112
4113	fdrop(fp, td);
4114	return (error);
4115}
4116
4117int
4118extattr_get_file(td, uap)
4119	struct thread *td;
4120	struct extattr_get_file_args /* {
4121		const char *path;
4122		int attrnamespace;
4123		const char *attrname;
4124		void *data;
4125		size_t nbytes;
4126	} */ *uap;
4127{
4128	struct nameidata nd;
4129	char attrname[EXTATTR_MAXNAMELEN];
4130	int error;
4131
4132	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4133	if (error)
4134		return (error);
4135
4136	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4137	error = namei(&nd);
4138	if (error)
4139		return (error);
4140	NDFREE(&nd, NDF_ONLY_PNBUF);
4141
4142	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4143	    uap->data, uap->nbytes, td);
4144
4145	vrele(nd.ni_vp);
4146	return (error);
4147}
4148
4149int
4150extattr_get_link(td, uap)
4151	struct thread *td;
4152	struct extattr_get_link_args /* {
4153		const char *path;
4154		int attrnamespace;
4155		const char *attrname;
4156		void *data;
4157		size_t nbytes;
4158	} */ *uap;
4159{
4160	struct nameidata nd;
4161	char attrname[EXTATTR_MAXNAMELEN];
4162	int error;
4163
4164	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4165	if (error)
4166		return (error);
4167
4168	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4169	error = namei(&nd);
4170	if (error)
4171		return (error);
4172	NDFREE(&nd, NDF_ONLY_PNBUF);
4173
4174	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4175	    uap->data, uap->nbytes, td);
4176
4177	vrele(nd.ni_vp);
4178	return (error);
4179}
4180
4181/*
4182 * extattr_delete_vp(): Delete a named extended attribute on a file or
4183 *                      directory
4184 *
4185 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4186 *            kernelspace string pointer "attrname", proc "p"
4187 * Returns: 0 on success, an error number otherwise
4188 * Locks: none
4189 * References: vp must be a valid reference for the duration of the call
4190 */
4191static int
4192extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4193    struct thread *td)
4194{
4195	struct mount *mp;
4196	int error;
4197
4198	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4199	if (error)
4200		return (error);
4201	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4202	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4203
4204#ifdef MAC
4205	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4206	    attrname, NULL);
4207	if (error)
4208		goto done;
4209#endif
4210
4211	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4212	    td);
4213#ifdef MAC
4214done:
4215#endif
4216	VOP_UNLOCK(vp, 0, td);
4217	vn_finished_write(mp);
4218	return (error);
4219}
4220
4221int
4222extattr_delete_fd(td, uap)
4223	struct thread *td;
4224	struct extattr_delete_fd_args /* {
4225		int fd;
4226		int attrnamespace;
4227		const char *attrname;
4228	} */ *uap;
4229{
4230	struct file *fp;
4231	struct vnode *vp;
4232	char attrname[EXTATTR_MAXNAMELEN];
4233	int error;
4234
4235	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4236	if (error)
4237		return (error);
4238
4239	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4240	if (error)
4241		return (error);
4242	vp = fp->f_data;
4243
4244	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4245	fdrop(fp, td);
4246	return (error);
4247}
4248
4249int
4250extattr_delete_file(td, uap)
4251	struct thread *td;
4252	struct extattr_delete_file_args /* {
4253		const char *path;
4254		int attrnamespace;
4255		const char *attrname;
4256	} */ *uap;
4257{
4258	struct nameidata nd;
4259	char attrname[EXTATTR_MAXNAMELEN];
4260	int error;
4261
4262	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4263	if (error)
4264		return(error);
4265
4266	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4267	error = namei(&nd);
4268	if (error)
4269		return(error);
4270	NDFREE(&nd, NDF_ONLY_PNBUF);
4271
4272	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4273	vrele(nd.ni_vp);
4274	return(error);
4275}
4276
4277int
4278extattr_delete_link(td, uap)
4279	struct thread *td;
4280	struct extattr_delete_link_args /* {
4281		const char *path;
4282		int attrnamespace;
4283		const char *attrname;
4284	} */ *uap;
4285{
4286	struct nameidata nd;
4287	char attrname[EXTATTR_MAXNAMELEN];
4288	int error;
4289
4290	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4291	if (error)
4292		return(error);
4293
4294	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4295	error = namei(&nd);
4296	if (error)
4297		return(error);
4298	NDFREE(&nd, NDF_ONLY_PNBUF);
4299
4300	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4301	vrele(nd.ni_vp);
4302	return(error);
4303}
4304