vfs_extattr.c revision 109123
133965Sjdp/*
2130561Sobrien * Copyright (c) 1989, 1993
333965Sjdp *	The Regents of the University of California.  All rights reserved.
433965Sjdp * (c) UNIX System Laboratories, Inc.
533965Sjdp * All or some portions of this file are derived from material licensed
633965Sjdp * to the University of California by American Telephone and Telegraph
733965Sjdp * Co. or Unix System Laboratories, Inc. and are reproduced herein with
833965Sjdp * the permission of UNIX System Laboratories, Inc.
933965Sjdp *
1033965Sjdp * Redistribution and use in source and binary forms, with or without
1133965Sjdp * modification, are permitted provided that the following conditions
1233965Sjdp * are met:
1333965Sjdp * 1. Redistributions of source code must retain the above copyright
1433965Sjdp *    notice, this list of conditions and the following disclaimer.
1533965Sjdp * 2. Redistributions in binary form must reproduce the above copyright
1633965Sjdp *    notice, this list of conditions and the following disclaimer in the
1733965Sjdp *    documentation and/or other materials provided with the distribution.
1833965Sjdp * 3. All advertising materials mentioning features or use of this software
19218822Sdim *    must display the following acknowledgement:
20218822Sdim *	This product includes software developed by the University of
2133965Sjdp *	California, Berkeley and its contributors.
2233965Sjdp * 4. Neither the name of the University nor the names of its contributors
2333965Sjdp *    may be used to endorse or promote products derived from this software
2433965Sjdp *    without specific prior written permission.
2533965Sjdp *
2633965Sjdp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2733965Sjdp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2833965Sjdp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29130561Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3033965Sjdp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3133965Sjdp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3233965Sjdp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33130561Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34130561Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3533965Sjdp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3633965Sjdp * SUCH DAMAGE.
3733965Sjdp *
38130561Sobrien *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
3933965Sjdp * $FreeBSD: head/sys/kern/vfs_extattr.c 109123 2003-01-12 01:37:13Z dillon $
40130561Sobrien */
4133965Sjdp
42130561Sobrien/* For 4.3 integer FS ID compatibility */
43130561Sobrien#include "opt_compat.h"
4433965Sjdp#include "opt_mac.h"
45130561Sobrien
46130561Sobrien#include <sys/param.h>
4733965Sjdp#include <sys/systm.h>
4833965Sjdp#include <sys/bio.h>
4933965Sjdp#include <sys/buf.h>
50130561Sobrien#include <sys/sysent.h>
5133965Sjdp#include <sys/mac.h>
52130561Sobrien#include <sys/malloc.h>
5333965Sjdp#include <sys/mount.h>
5433965Sjdp#include <sys/mutex.h>
5533965Sjdp#include <sys/sysproto.h>
56130561Sobrien#include <sys/namei.h>
5733965Sjdp#include <sys/filedesc.h>
5833965Sjdp#include <sys/kernel.h>
59#include <sys/fcntl.h>
60#include <sys/file.h>
61#include <sys/linker.h>
62#include <sys/stat.h>
63#include <sys/sx.h>
64#include <sys/unistd.h>
65#include <sys/vnode.h>
66#include <sys/proc.h>
67#include <sys/dirent.h>
68#include <sys/extattr.h>
69#include <sys/jail.h>
70#include <sys/syscallsubr.h>
71#include <sys/sysctl.h>
72
73#include <machine/limits.h>
74#include <machine/stdarg.h>
75
76#include <vm/vm.h>
77#include <vm/vm_object.h>
78#include <vm/vm_page.h>
79#include <vm/uma.h>
80
81static int change_dir(struct nameidata *ndp, struct thread *td);
82static int chroot_refuse_vdir_fds(struct filedesc *fdp);
83static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
84static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
85static int setfmode(struct thread *td, struct vnode *, int);
86static int setfflags(struct thread *td, struct vnode *, int);
87static int setutimes(struct thread *td, struct vnode *,
88    const struct timespec *, int, int);
89static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
90    struct thread *td);
91
92int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
93int (*softdep_fsync_hook)(struct vnode *);
94
95/*
96 * The module initialization routine for POSIX asynchronous I/O will
97 * set this to the version of AIO that it implements.  (Zero means
98 * that it is not implemented.)  This value is used here by pathconf()
99 * and in kern_descrip.c by fpathconf().
100 */
101int async_io_version;
102
103/*
104 * Sync each mounted filesystem.
105 */
106#ifndef _SYS_SYSPROTO_H_
107struct sync_args {
108        int     dummy;
109};
110#endif
111
112#ifdef DEBUG
113static int syncprt = 0;
114SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
115#endif
116
117/* ARGSUSED */
118int
119sync(td, uap)
120	struct thread *td;
121	struct sync_args *uap;
122{
123	struct mount *mp, *nmp;
124	int asyncflag;
125
126	mtx_lock(&mountlist_mtx);
127	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
128		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
129			nmp = TAILQ_NEXT(mp, mnt_list);
130			continue;
131		}
132		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
133		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
134			asyncflag = mp->mnt_flag & MNT_ASYNC;
135			mp->mnt_flag &= ~MNT_ASYNC;
136			vfs_msync(mp, MNT_NOWAIT);
137			VFS_SYNC(mp, MNT_NOWAIT,
138			    ((td != NULL) ? td->td_ucred : NOCRED), td);
139			mp->mnt_flag |= asyncflag;
140			vn_finished_write(mp);
141		}
142		mtx_lock(&mountlist_mtx);
143		nmp = TAILQ_NEXT(mp, mnt_list);
144		vfs_unbusy(mp, td);
145	}
146	mtx_unlock(&mountlist_mtx);
147#if 0
148/*
149 * XXX don't call vfs_bufstats() yet because that routine
150 * was not imported in the Lite2 merge.
151 */
152#ifdef DIAGNOSTIC
153	if (syncprt)
154		vfs_bufstats();
155#endif /* DIAGNOSTIC */
156#endif
157	return (0);
158}
159
160/* XXX PRISON: could be per prison flag */
161static int prison_quotas;
162#if 0
163SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
164#endif
165
166/*
167 * Change filesystem quotas.
168 */
169#ifndef _SYS_SYSPROTO_H_
170struct quotactl_args {
171	char *path;
172	int cmd;
173	int uid;
174	caddr_t arg;
175};
176#endif
177/* ARGSUSED */
178int
179quotactl(td, uap)
180	struct thread *td;
181	register struct quotactl_args /* {
182		char *path;
183		int cmd;
184		int uid;
185		caddr_t arg;
186	} */ *uap;
187{
188	struct mount *mp;
189	int error;
190	struct nameidata nd;
191
192	if (jailed(td->td_ucred) && !prison_quotas)
193		return (EPERM);
194	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
195	if ((error = namei(&nd)) != 0)
196		return (error);
197	NDFREE(&nd, NDF_ONLY_PNBUF);
198	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
199	vrele(nd.ni_vp);
200	if (error)
201		return (error);
202	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203	vn_finished_write(mp);
204	return (error);
205}
206
207/*
208 * Get filesystem statistics.
209 */
210#ifndef _SYS_SYSPROTO_H_
211struct statfs_args {
212	char *path;
213	struct statfs *buf;
214};
215#endif
216/* ARGSUSED */
217int
218statfs(td, uap)
219	struct thread *td;
220	register struct statfs_args /* {
221		char *path;
222		struct statfs *buf;
223	} */ *uap;
224{
225	register struct mount *mp;
226	register struct statfs *sp;
227	int error;
228	struct nameidata nd;
229	struct statfs sb;
230
231	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
232	if ((error = namei(&nd)) != 0)
233		return (error);
234	mp = nd.ni_vp->v_mount;
235	sp = &mp->mnt_stat;
236	NDFREE(&nd, NDF_ONLY_PNBUF);
237	vrele(nd.ni_vp);
238#ifdef MAC
239	error = mac_check_mount_stat(td->td_ucred, mp);
240	if (error)
241		return (error);
242#endif
243	error = VFS_STATFS(mp, sp, td);
244	if (error)
245		return (error);
246	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
247	if (suser(td)) {
248		bcopy(sp, &sb, sizeof(sb));
249		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
250		sp = &sb;
251	}
252	return (copyout(sp, uap->buf, sizeof(*sp)));
253}
254
255/*
256 * Get filesystem statistics.
257 */
258#ifndef _SYS_SYSPROTO_H_
259struct fstatfs_args {
260	int fd;
261	struct statfs *buf;
262};
263#endif
264/* ARGSUSED */
265int
266fstatfs(td, uap)
267	struct thread *td;
268	register struct fstatfs_args /* {
269		int fd;
270		struct statfs *buf;
271	} */ *uap;
272{
273	struct file *fp;
274	struct mount *mp;
275	register struct statfs *sp;
276	int error;
277	struct statfs sb;
278
279	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
280		return (error);
281	mp = (fp->un_data.vnode)->v_mount;
282	fdrop(fp, td);
283	if (mp == NULL)
284		return (EBADF);
285#ifdef MAC
286	error = mac_check_mount_stat(td->td_ucred, mp);
287	if (error)
288		return (error);
289#endif
290	sp = &mp->mnt_stat;
291	error = VFS_STATFS(mp, sp, td);
292	if (error)
293		return (error);
294	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
295	if (suser(td)) {
296		bcopy(sp, &sb, sizeof(sb));
297		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
298		sp = &sb;
299	}
300	return (copyout(sp, uap->buf, sizeof(*sp)));
301}
302
303/*
304 * Get statistics on all filesystems.
305 */
306#ifndef _SYS_SYSPROTO_H_
307struct getfsstat_args {
308	struct statfs *buf;
309	long bufsize;
310	int flags;
311};
312#endif
313int
314getfsstat(td, uap)
315	struct thread *td;
316	register struct getfsstat_args /* {
317		struct statfs *buf;
318		long bufsize;
319		int flags;
320	} */ *uap;
321{
322	register struct mount *mp, *nmp;
323	register struct statfs *sp;
324	caddr_t sfsp;
325	long count, maxcount, error;
326
327	maxcount = uap->bufsize / sizeof(struct statfs);
328	sfsp = (caddr_t)uap->buf;
329	count = 0;
330	mtx_lock(&mountlist_mtx);
331	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
332#ifdef MAC
333		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
334			nmp = TAILQ_NEXT(mp, mnt_list);
335			continue;
336		}
337#endif
338		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
339			nmp = TAILQ_NEXT(mp, mnt_list);
340			continue;
341		}
342		if (sfsp && count < maxcount) {
343			sp = &mp->mnt_stat;
344			/*
345			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
346			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
347			 * overrides MNT_WAIT.
348			 */
349			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
350			    (uap->flags & MNT_WAIT)) &&
351			    (error = VFS_STATFS(mp, sp, td))) {
352				mtx_lock(&mountlist_mtx);
353				nmp = TAILQ_NEXT(mp, mnt_list);
354				vfs_unbusy(mp, td);
355				continue;
356			}
357			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
358			error = copyout(sp, sfsp, sizeof(*sp));
359			if (error) {
360				vfs_unbusy(mp, td);
361				return (error);
362			}
363			sfsp += sizeof(*sp);
364		}
365		count++;
366		mtx_lock(&mountlist_mtx);
367		nmp = TAILQ_NEXT(mp, mnt_list);
368		vfs_unbusy(mp, td);
369	}
370	mtx_unlock(&mountlist_mtx);
371	if (sfsp && count > maxcount)
372		td->td_retval[0] = maxcount;
373	else
374		td->td_retval[0] = count;
375	return (0);
376}
377
378/*
379 * Change current working directory to a given file descriptor.
380 */
381#ifndef _SYS_SYSPROTO_H_
382struct fchdir_args {
383	int	fd;
384};
385#endif
386/* ARGSUSED */
387int
388fchdir(td, uap)
389	struct thread *td;
390	struct fchdir_args /* {
391		int fd;
392	} */ *uap;
393{
394	register struct filedesc *fdp = td->td_proc->p_fd;
395	struct vnode *vp, *tdp, *vpold;
396	struct mount *mp;
397	struct file *fp;
398	int error;
399
400	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
401		return (error);
402	vp = fp->un_data.vnode;
403	VREF(vp);
404	fdrop(fp, td);
405	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
406	if (vp->v_type != VDIR)
407		error = ENOTDIR;
408#ifdef MAC
409	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
410	}
411#endif
412	else
413		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
414	while (!error && (mp = vp->v_mountedhere) != NULL) {
415		if (vfs_busy(mp, 0, 0, td))
416			continue;
417		error = VFS_ROOT(mp, &tdp);
418		vfs_unbusy(mp, td);
419		if (error)
420			break;
421		vput(vp);
422		vp = tdp;
423	}
424	if (error) {
425		vput(vp);
426		return (error);
427	}
428	VOP_UNLOCK(vp, 0, td);
429	FILEDESC_LOCK(fdp);
430	vpold = fdp->fd_cdir;
431	fdp->fd_cdir = vp;
432	FILEDESC_UNLOCK(fdp);
433	vrele(vpold);
434	return (0);
435}
436
437/*
438 * Change current working directory (``.'').
439 */
440#ifndef _SYS_SYSPROTO_H_
441struct chdir_args {
442	char	*path;
443};
444#endif
445/* ARGSUSED */
446int
447chdir(td, uap)
448	struct thread *td;
449	struct chdir_args /* {
450		char *path;
451	} */ *uap;
452{
453
454	return (kern_chdir(td, uap->path, UIO_USERSPACE));
455}
456
457int
458kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
459{
460	register struct filedesc *fdp = td->td_proc->p_fd;
461	int error;
462	struct nameidata nd;
463	struct vnode *vp;
464
465	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
466	if ((error = change_dir(&nd, td)) != 0)
467		return (error);
468	NDFREE(&nd, NDF_ONLY_PNBUF);
469	FILEDESC_LOCK(fdp);
470	vp = fdp->fd_cdir;
471	fdp->fd_cdir = nd.ni_vp;
472	FILEDESC_UNLOCK(fdp);
473	vrele(vp);
474	return (0);
475}
476
477/*
478 * Helper function for raised chroot(2) security function:  Refuse if
479 * any filedescriptors are open directories.
480 */
481static int
482chroot_refuse_vdir_fds(fdp)
483	struct filedesc *fdp;
484{
485	struct vnode *vp;
486	struct file *fp;
487	int fd;
488
489	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
490	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
491		fp = fget_locked(fdp, fd);
492		if (fp == NULL)
493			continue;
494		if (fp->f_type == DTYPE_VNODE) {
495			vp = fp->un_data.vnode;
496			if (vp->v_type == VDIR)
497				return (EPERM);
498		}
499	}
500	return (0);
501}
502
503/*
504 * This sysctl determines if we will allow a process to chroot(2) if it
505 * has a directory open:
506 *	0: disallowed for all processes.
507 *	1: allowed for processes that were not already chroot(2)'ed.
508 *	2: allowed for all processes.
509 */
510
511static int chroot_allow_open_directories = 1;
512
513SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
514     &chroot_allow_open_directories, 0, "");
515
516/*
517 * Change notion of root (``/'') directory.
518 */
519#ifndef _SYS_SYSPROTO_H_
520struct chroot_args {
521	char	*path;
522};
523#endif
524/* ARGSUSED */
525int
526chroot(td, uap)
527	struct thread *td;
528	struct chroot_args /* {
529		char *path;
530	} */ *uap;
531{
532	register struct filedesc *fdp = td->td_proc->p_fd;
533	int error;
534	struct nameidata nd;
535	struct vnode *vp;
536
537	error = suser_cred(td->td_ucred, PRISON_ROOT);
538	if (error)
539		return (error);
540	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
541	mtx_lock(&Giant);
542	if ((error = change_dir(&nd, td)) != 0)
543		goto error;
544#ifdef MAC
545	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
546		goto error;
547#endif
548	FILEDESC_LOCK(fdp);
549	if (chroot_allow_open_directories == 0 ||
550	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
551		error = chroot_refuse_vdir_fds(fdp);
552		if (error)
553			goto error_unlock;
554	}
555	vp = fdp->fd_rdir;
556	fdp->fd_rdir = nd.ni_vp;
557	if (!fdp->fd_jdir) {
558		fdp->fd_jdir = nd.ni_vp;
559                VREF(fdp->fd_jdir);
560	}
561	FILEDESC_UNLOCK(fdp);
562	NDFREE(&nd, NDF_ONLY_PNBUF);
563	vrele(vp);
564	mtx_unlock(&Giant);
565	return (0);
566error_unlock:
567	FILEDESC_UNLOCK(fdp);
568error:
569	mtx_unlock(&Giant);
570	NDFREE(&nd, 0);
571	return (error);
572}
573
574/*
575 * Common routine for chroot and chdir.
576 */
577static int
578change_dir(ndp, td)
579	register struct nameidata *ndp;
580	struct thread *td;
581{
582	struct vnode *vp;
583	int error;
584
585	error = namei(ndp);
586	if (error)
587		return (error);
588	vp = ndp->ni_vp;
589	if (vp->v_type != VDIR)
590		error = ENOTDIR;
591#ifdef MAC
592	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
593	}
594#endif
595	else
596		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
597	if (error)
598		vput(vp);
599	else
600		VOP_UNLOCK(vp, 0, td);
601	return (error);
602}
603
604/*
605 * Check permissions, allocate an open file structure,
606 * and call the device open routine if any.
607 */
608#ifndef _SYS_SYSPROTO_H_
609struct open_args {
610	char	*path;
611	int	flags;
612	int	mode;
613};
614#endif
615int
616open(td, uap)
617	struct thread *td;
618	register struct open_args /* {
619		char *path;
620		int flags;
621		int mode;
622	} */ *uap;
623{
624
625	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
626}
627
628int
629kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
630    int mode)
631{
632	struct proc *p = td->td_proc;
633	struct filedesc *fdp = p->p_fd;
634	struct file *fp;
635	struct vnode *vp;
636	struct vattr vat;
637	struct mount *mp;
638	int cmode, oflags;
639	struct file *nfp;
640	int type, indx, error;
641	struct flock lf;
642	struct nameidata nd;
643
644	if ((flags & O_ACCMODE) == O_ACCMODE)
645		return (EINVAL);
646	oflags = flags;
647	flags = FFLAGS(flags);
648	error = falloc(td, &nfp, &indx);
649	if (error)
650		return (error);
651	fp = nfp;
652	FILEDESC_LOCK(fdp);
653	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
654	FILEDESC_UNLOCK(fdp);
655	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
656	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
657	/*
658	 * Bump the ref count to prevent another process from closing
659	 * the descriptor while we are blocked in vn_open()
660	 */
661	fhold(fp);
662	error = vn_open(&nd, &flags, cmode);
663	if (error) {
664		/*
665		 * release our own reference
666		 */
667		fdrop(fp, td);
668
669		/*
670		 * handle special fdopen() case.  bleh.  dupfdopen() is
671		 * responsible for dropping the old contents of ofiles[indx]
672		 * if it succeeds.
673		 */
674		if ((error == ENODEV || error == ENXIO) &&
675		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
676		    (error =
677			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
678			td->td_retval[0] = indx;
679			return (0);
680		}
681		/*
682		 * Clean up the descriptor, but only if another thread hadn't
683		 * replaced or closed it.
684		 */
685		FILEDESC_LOCK(fdp);
686		if (fdp->fd_ofiles[indx] == fp) {
687			fdp->fd_ofiles[indx] = NULL;
688			FILEDESC_UNLOCK(fdp);
689			fdrop(fp, td);
690		} else
691			FILEDESC_UNLOCK(fdp);
692
693		if (error == ERESTART)
694			error = EINTR;
695		return (error);
696	}
697	td->td_dupfd = 0;
698	NDFREE(&nd, NDF_ONLY_PNBUF);
699	vp = nd.ni_vp;
700
701	/*
702	 * There should be 2 references on the file, one from the descriptor
703	 * table, and one for us.
704	 *
705	 * Handle the case where someone closed the file (via its file
706	 * descriptor) while we were blocked.  The end result should look
707	 * like opening the file succeeded but it was immediately closed.
708	 */
709	FILEDESC_LOCK(fdp);
710	FILE_LOCK(fp);
711	if (fp->f_count == 1) {
712		KASSERT(fdp->fd_ofiles[indx] != fp,
713		    ("Open file descriptor lost all refs"));
714		FILEDESC_UNLOCK(fdp);
715		FILE_UNLOCK(fp);
716		VOP_UNLOCK(vp, 0, td);
717		vn_close(vp, flags & FMASK, fp->f_cred, td);
718		fdrop(fp, td);
719		td->td_retval[0] = indx;
720		return 0;
721	}
722
723	/* assert that vn_open created a backing object if one is needed */
724	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
725		("open: vmio vnode has no backing object after vn_open"));
726
727	fp->un_data.vnode = vp;
728	fp->f_flag = flags & FMASK;
729	fp->f_ops = &vnops;
730	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
731	FILEDESC_UNLOCK(fdp);
732	FILE_UNLOCK(fp);
733	VOP_UNLOCK(vp, 0, td);
734	if (flags & (O_EXLOCK | O_SHLOCK)) {
735		lf.l_whence = SEEK_SET;
736		lf.l_start = 0;
737		lf.l_len = 0;
738		if (flags & O_EXLOCK)
739			lf.l_type = F_WRLCK;
740		else
741			lf.l_type = F_RDLCK;
742		type = F_FLOCK;
743		if ((flags & FNONBLOCK) == 0)
744			type |= F_WAIT;
745		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
746			    type)) != 0)
747			goto bad;
748		fp->f_flag |= FHASLOCK;
749	}
750	if (flags & O_TRUNC) {
751		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
752			goto bad;
753		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
754		VATTR_NULL(&vat);
755		vat.va_size = 0;
756		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
757#ifdef MAC
758		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
759		if (error == 0)
760#endif
761			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
762		VOP_UNLOCK(vp, 0, td);
763		vn_finished_write(mp);
764		if (error)
765			goto bad;
766	}
767	/*
768	 * Release our private reference, leaving the one associated with
769	 * the descriptor table intact.
770	 */
771	fdrop(fp, td);
772	td->td_retval[0] = indx;
773	return (0);
774bad:
775	FILEDESC_LOCK(fdp);
776	if (fdp->fd_ofiles[indx] == fp) {
777		fdp->fd_ofiles[indx] = NULL;
778		FILEDESC_UNLOCK(fdp);
779		fdrop(fp, td);
780	} else
781		FILEDESC_UNLOCK(fdp);
782	fdrop(fp, td);
783	return (error);
784}
785
786#ifdef COMPAT_43
787/*
788 * Create a file.
789 */
790#ifndef _SYS_SYSPROTO_H_
791struct ocreat_args {
792	char	*path;
793	int	mode;
794};
795#endif
796int
797ocreat(td, uap)
798	struct thread *td;
799	register struct ocreat_args /* {
800		char *path;
801		int mode;
802	} */ *uap;
803{
804	struct open_args /* {
805		char *path;
806		int flags;
807		int mode;
808	} */ nuap;
809
810	nuap.path = uap->path;
811	nuap.mode = uap->mode;
812	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
813	return (open(td, &nuap));
814}
815#endif /* COMPAT_43 */
816
817/*
818 * Create a special file.
819 */
820#ifndef _SYS_SYSPROTO_H_
821struct mknod_args {
822	char	*path;
823	int	mode;
824	int	dev;
825};
826#endif
827/* ARGSUSED */
828int
829mknod(td, uap)
830	struct thread *td;
831	register struct mknod_args /* {
832		char *path;
833		int mode;
834		int dev;
835	} */ *uap;
836{
837
838	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
839}
840
841int
842kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
843    int dev)
844{
845	struct vnode *vp;
846	struct mount *mp;
847	struct vattr vattr;
848	int error;
849	int whiteout = 0;
850	struct nameidata nd;
851
852	switch (mode & S_IFMT) {
853	case S_IFCHR:
854	case S_IFBLK:
855		error = suser(td);
856		break;
857	default:
858		error = suser_cred(td->td_ucred, PRISON_ROOT);
859		break;
860	}
861	if (error)
862		return (error);
863restart:
864	bwillwrite();
865	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
866	if ((error = namei(&nd)) != 0)
867		return (error);
868	vp = nd.ni_vp;
869	if (vp != NULL) {
870		vrele(vp);
871		error = EEXIST;
872	} else {
873		VATTR_NULL(&vattr);
874		FILEDESC_LOCK(td->td_proc->p_fd);
875		vattr.va_mode = (mode & ALLPERMS) &
876		    ~td->td_proc->p_fd->fd_cmask;
877		FILEDESC_UNLOCK(td->td_proc->p_fd);
878		vattr.va_rdev = dev;
879		whiteout = 0;
880
881		switch (mode & S_IFMT) {
882		case S_IFMT:	/* used by badsect to flag bad sectors */
883			vattr.va_type = VBAD;
884			break;
885		case S_IFCHR:
886			vattr.va_type = VCHR;
887			break;
888		case S_IFBLK:
889			vattr.va_type = VBLK;
890			break;
891		case S_IFWHT:
892			whiteout = 1;
893			break;
894		default:
895			error = EINVAL;
896			break;
897		}
898	}
899	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
900		NDFREE(&nd, NDF_ONLY_PNBUF);
901		vput(nd.ni_dvp);
902		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
903			return (error);
904		goto restart;
905	}
906#ifdef MAC
907	if (error == 0 && !whiteout)
908		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
909		    &nd.ni_cnd, &vattr);
910#endif
911	if (!error) {
912		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
913		if (whiteout)
914			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
915		else {
916			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
917						&nd.ni_cnd, &vattr);
918			if (error == 0)
919				vput(nd.ni_vp);
920		}
921	}
922	NDFREE(&nd, NDF_ONLY_PNBUF);
923	vput(nd.ni_dvp);
924	vn_finished_write(mp);
925	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
926	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
927	return (error);
928}
929
930/*
931 * Create a named pipe.
932 */
933#ifndef _SYS_SYSPROTO_H_
934struct mkfifo_args {
935	char	*path;
936	int	mode;
937};
938#endif
939/* ARGSUSED */
940int
941mkfifo(td, uap)
942	struct thread *td;
943	register struct mkfifo_args /* {
944		char *path;
945		int mode;
946	} */ *uap;
947{
948
949	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
950}
951
952int
953kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
954{
955	struct mount *mp;
956	struct vattr vattr;
957	int error;
958	struct nameidata nd;
959
960restart:
961	bwillwrite();
962	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
963	if ((error = namei(&nd)) != 0)
964		return (error);
965	if (nd.ni_vp != NULL) {
966		NDFREE(&nd, NDF_ONLY_PNBUF);
967		vrele(nd.ni_vp);
968		vput(nd.ni_dvp);
969		return (EEXIST);
970	}
971	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
972		NDFREE(&nd, NDF_ONLY_PNBUF);
973		vput(nd.ni_dvp);
974		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
975			return (error);
976		goto restart;
977	}
978	VATTR_NULL(&vattr);
979	vattr.va_type = VFIFO;
980	FILEDESC_LOCK(td->td_proc->p_fd);
981	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
982	FILEDESC_UNLOCK(td->td_proc->p_fd);
983#ifdef MAC
984	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
985	    &vattr);
986	if (error)
987		goto out;
988#endif
989	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
990	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
991	if (error == 0)
992		vput(nd.ni_vp);
993#ifdef MAC
994out:
995#endif
996	NDFREE(&nd, NDF_ONLY_PNBUF);
997	vput(nd.ni_dvp);
998	vn_finished_write(mp);
999	return (error);
1000}
1001
1002/*
1003 * Make a hard file link.
1004 */
1005#ifndef _SYS_SYSPROTO_H_
1006struct link_args {
1007	char	*path;
1008	char	*link;
1009};
1010#endif
1011/* ARGSUSED */
1012int
1013link(td, uap)
1014	struct thread *td;
1015	register struct link_args /* {
1016		char *path;
1017		char *link;
1018	} */ *uap;
1019{
1020
1021	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1022}
1023
1024int
1025kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1026{
1027	struct vnode *vp;
1028	struct mount *mp;
1029	struct nameidata nd;
1030	int error;
1031
1032	bwillwrite();
1033	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1034	if ((error = namei(&nd)) != 0)
1035		return (error);
1036	NDFREE(&nd, NDF_ONLY_PNBUF);
1037	vp = nd.ni_vp;
1038	if (vp->v_type == VDIR) {
1039		vrele(vp);
1040		return (EPERM);		/* POSIX */
1041	}
1042	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1043		vrele(vp);
1044		return (error);
1045	}
1046	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1047	if ((error = namei(&nd)) == 0) {
1048		if (nd.ni_vp != NULL) {
1049			vrele(nd.ni_vp);
1050			error = EEXIST;
1051		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1052		    == 0) {
1053			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1054			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1055#ifdef MAC
1056			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1057			    vp, &nd.ni_cnd);
1058			if (error == 0)
1059#endif
1060				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1061			VOP_UNLOCK(vp, 0, td);
1062		}
1063		NDFREE(&nd, NDF_ONLY_PNBUF);
1064		vput(nd.ni_dvp);
1065	}
1066	vrele(vp);
1067	vn_finished_write(mp);
1068	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1069	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1070	return (error);
1071}
1072
1073/*
1074 * Make a symbolic link.
1075 */
1076#ifndef _SYS_SYSPROTO_H_
1077struct symlink_args {
1078	char	*path;
1079	char	*link;
1080};
1081#endif
1082/* ARGSUSED */
1083int
1084symlink(td, uap)
1085	struct thread *td;
1086	register struct symlink_args /* {
1087		char *path;
1088		char *link;
1089	} */ *uap;
1090{
1091
1092	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1093}
1094
1095int
1096kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1097{
1098	struct mount *mp;
1099	struct vattr vattr;
1100	char *syspath;
1101	int error;
1102	struct nameidata nd;
1103
1104	if (segflg == UIO_SYSSPACE) {
1105		syspath = path;
1106	} else {
1107		syspath = uma_zalloc(namei_zone, M_WAITOK);
1108		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1109			goto out;
1110	}
1111restart:
1112	bwillwrite();
1113	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1114	if ((error = namei(&nd)) != 0)
1115		goto out;
1116	if (nd.ni_vp) {
1117		NDFREE(&nd, NDF_ONLY_PNBUF);
1118		vrele(nd.ni_vp);
1119		vput(nd.ni_dvp);
1120		error = EEXIST;
1121		goto out;
1122	}
1123	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1124		NDFREE(&nd, NDF_ONLY_PNBUF);
1125		vput(nd.ni_dvp);
1126		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1127			return (error);
1128		goto restart;
1129	}
1130	VATTR_NULL(&vattr);
1131	FILEDESC_LOCK(td->td_proc->p_fd);
1132	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1133	FILEDESC_UNLOCK(td->td_proc->p_fd);
1134#ifdef MAC
1135	vattr.va_type = VLNK;
1136	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1137	    &vattr);
1138	if (error)
1139		goto out2;
1140#endif
1141	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1142	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1143	if (error == 0)
1144		vput(nd.ni_vp);
1145#ifdef MAC
1146out2:
1147#endif
1148	NDFREE(&nd, NDF_ONLY_PNBUF);
1149	vput(nd.ni_dvp);
1150	vn_finished_write(mp);
1151	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1152	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1153out:
1154	if (segflg != UIO_SYSSPACE)
1155		uma_zfree(namei_zone, syspath);
1156	return (error);
1157}
1158
1159/*
1160 * Delete a whiteout from the filesystem.
1161 */
1162/* ARGSUSED */
1163int
1164undelete(td, uap)
1165	struct thread *td;
1166	register struct undelete_args /* {
1167		char *path;
1168	} */ *uap;
1169{
1170	int error;
1171	struct mount *mp;
1172	struct nameidata nd;
1173
1174restart:
1175	bwillwrite();
1176	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1177	    uap->path, td);
1178	error = namei(&nd);
1179	if (error)
1180		return (error);
1181
1182	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1183		NDFREE(&nd, NDF_ONLY_PNBUF);
1184		if (nd.ni_vp)
1185			vrele(nd.ni_vp);
1186		vput(nd.ni_dvp);
1187		return (EEXIST);
1188	}
1189	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1190		NDFREE(&nd, NDF_ONLY_PNBUF);
1191		vput(nd.ni_dvp);
1192		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1193			return (error);
1194		goto restart;
1195	}
1196	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1197	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1198	NDFREE(&nd, NDF_ONLY_PNBUF);
1199	vput(nd.ni_dvp);
1200	vn_finished_write(mp);
1201	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1202	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1203	return (error);
1204}
1205
1206/*
1207 * Delete a name from the filesystem.
1208 */
1209#ifndef _SYS_SYSPROTO_H_
1210struct unlink_args {
1211	char	*path;
1212};
1213#endif
1214/* ARGSUSED */
1215int
1216unlink(td, uap)
1217	struct thread *td;
1218	struct unlink_args /* {
1219		char *path;
1220	} */ *uap;
1221{
1222
1223	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1224}
1225
1226int
1227kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1228{
1229	struct mount *mp;
1230	struct vnode *vp;
1231	int error;
1232	struct nameidata nd;
1233
1234restart:
1235	bwillwrite();
1236	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1237	if ((error = namei(&nd)) != 0)
1238		return (error);
1239	vp = nd.ni_vp;
1240	if (vp->v_type == VDIR)
1241		error = EPERM;		/* POSIX */
1242	else {
1243		/*
1244		 * The root of a mounted filesystem cannot be deleted.
1245		 *
1246		 * XXX: can this only be a VDIR case?
1247		 */
1248		if (vp->v_vflag & VV_ROOT)
1249			error = EBUSY;
1250	}
1251	if (error == 0) {
1252		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1253			NDFREE(&nd, NDF_ONLY_PNBUF);
1254			if (vp == nd.ni_dvp)
1255				vrele(vp);
1256			else
1257				vput(vp);
1258			vput(nd.ni_dvp);
1259			if ((error = vn_start_write(NULL, &mp,
1260			    V_XSLEEP | PCATCH)) != 0)
1261				return (error);
1262			goto restart;
1263		}
1264#ifdef MAC
1265		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1266		    &nd.ni_cnd);
1267		if (error)
1268			goto out;
1269#endif
1270		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1271		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1272#ifdef MAC
1273out:
1274#endif
1275		vn_finished_write(mp);
1276	}
1277	NDFREE(&nd, NDF_ONLY_PNBUF);
1278	if (vp == nd.ni_dvp)
1279		vrele(vp);
1280	else
1281		vput(vp);
1282	vput(nd.ni_dvp);
1283	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1284	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1285	return (error);
1286}
1287
1288/*
1289 * Reposition read/write file offset.
1290 */
1291#ifndef _SYS_SYSPROTO_H_
1292struct lseek_args {
1293	int	fd;
1294	int	pad;
1295	off_t	offset;
1296	int	whence;
1297};
1298#endif
1299int
1300lseek(td, uap)
1301	struct thread *td;
1302	register struct lseek_args /* {
1303		int fd;
1304		int pad;
1305		off_t offset;
1306		int whence;
1307	} */ *uap;
1308{
1309	struct ucred *cred = td->td_ucred;
1310	struct file *fp;
1311	struct vnode *vp;
1312	struct vattr vattr;
1313	off_t offset;
1314	int error, noneg;
1315
1316	if ((error = fget(td, uap->fd, &fp)) != 0)
1317		return (error);
1318	if (fp->f_type != DTYPE_VNODE) {
1319		fdrop(fp, td);
1320		return (ESPIPE);
1321	}
1322	vp = fp->un_data.vnode;
1323	noneg = (vp->v_type != VCHR);
1324	offset = uap->offset;
1325	switch (uap->whence) {
1326	case L_INCR:
1327		if (noneg &&
1328		    (fp->f_offset < 0 ||
1329		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1330			error = EOVERFLOW;
1331			break;
1332		}
1333		offset += fp->f_offset;
1334		break;
1335	case L_XTND:
1336		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1337		error = VOP_GETATTR(vp, &vattr, cred, td);
1338		VOP_UNLOCK(vp, 0, td);
1339		if (error)
1340			break;
1341		if (noneg &&
1342		    (vattr.va_size > OFF_MAX ||
1343		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1344			error = EOVERFLOW;
1345			break;
1346		}
1347		offset += vattr.va_size;
1348		break;
1349	case L_SET:
1350		break;
1351	default:
1352		error = EINVAL;
1353	}
1354	if (error == 0 && noneg && offset < 0)
1355		error = EINVAL;
1356	if (error != 0) {
1357		fdrop(fp, td);
1358		return (error);
1359	}
1360	fp->f_offset = offset;
1361	*(off_t *)(td->td_retval) = fp->f_offset;
1362	fdrop(fp, td);
1363	return (0);
1364}
1365
1366#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1367/*
1368 * Reposition read/write file offset.
1369 */
1370#ifndef _SYS_SYSPROTO_H_
1371struct olseek_args {
1372	int	fd;
1373	long	offset;
1374	int	whence;
1375};
1376#endif
1377int
1378olseek(td, uap)
1379	struct thread *td;
1380	register struct olseek_args /* {
1381		int fd;
1382		long offset;
1383		int whence;
1384	} */ *uap;
1385{
1386	struct lseek_args /* {
1387		int fd;
1388		int pad;
1389		off_t offset;
1390		int whence;
1391	} */ nuap;
1392	int error;
1393
1394	nuap.fd = uap->fd;
1395	nuap.offset = uap->offset;
1396	nuap.whence = uap->whence;
1397	error = lseek(td, &nuap);
1398	return (error);
1399}
1400#endif /* COMPAT_43 */
1401
1402/*
1403 * Check access permissions using passed credentials.
1404 */
1405static int
1406vn_access(vp, user_flags, cred, td)
1407	struct vnode	*vp;
1408	int		user_flags;
1409	struct ucred	*cred;
1410	struct thread	*td;
1411{
1412	int error, flags;
1413
1414	/* Flags == 0 means only check for existence. */
1415	error = 0;
1416	if (user_flags) {
1417		flags = 0;
1418		if (user_flags & R_OK)
1419			flags |= VREAD;
1420		if (user_flags & W_OK)
1421			flags |= VWRITE;
1422		if (user_flags & X_OK)
1423			flags |= VEXEC;
1424#ifdef MAC
1425		error = mac_check_vnode_access(cred, vp, flags);
1426		if (error)
1427			return (error);
1428#endif
1429		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1430			error = VOP_ACCESS(vp, flags, cred, td);
1431	}
1432	return (error);
1433}
1434
1435/*
1436 * Check access permissions using "real" credentials.
1437 */
1438#ifndef _SYS_SYSPROTO_H_
1439struct access_args {
1440	char	*path;
1441	int	flags;
1442};
1443#endif
1444int
1445access(td, uap)
1446	struct thread *td;
1447	register struct access_args /* {
1448		char *path;
1449		int flags;
1450	} */ *uap;
1451{
1452
1453	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1454}
1455
1456int
1457kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1458{
1459	struct ucred *cred, *tmpcred;
1460	register struct vnode *vp;
1461	int error;
1462	struct nameidata nd;
1463
1464	/*
1465	 * Create and modify a temporary credential instead of one that
1466	 * is potentially shared.  This could also mess up socket
1467	 * buffer accounting which can run in an interrupt context.
1468	 *
1469	 * XXX - Depending on how "threads" are finally implemented, it
1470	 * may be better to explicitly pass the credential to namei()
1471	 * rather than to modify the potentially shared process structure.
1472	 */
1473	cred = td->td_ucred;
1474	tmpcred = crdup(cred);
1475	tmpcred->cr_uid = cred->cr_ruid;
1476	tmpcred->cr_groups[0] = cred->cr_rgid;
1477	td->td_ucred = tmpcred;
1478	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1479	if ((error = namei(&nd)) != 0)
1480		goto out1;
1481	vp = nd.ni_vp;
1482
1483	error = vn_access(vp, flags, tmpcred, td);
1484	NDFREE(&nd, NDF_ONLY_PNBUF);
1485	vput(vp);
1486out1:
1487	td->td_ucred = cred;
1488	crfree(tmpcred);
1489	return (error);
1490}
1491
1492/*
1493 * Check access permissions using "effective" credentials.
1494 */
1495#ifndef _SYS_SYSPROTO_H_
1496struct eaccess_args {
1497	char	*path;
1498	int	flags;
1499};
1500#endif
1501int
1502eaccess(td, uap)
1503	struct thread *td;
1504	register struct eaccess_args /* {
1505		char *path;
1506		int flags;
1507	} */ *uap;
1508{
1509	struct nameidata nd;
1510	struct vnode *vp;
1511	int error;
1512
1513	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1514	    uap->path, td);
1515	if ((error = namei(&nd)) != 0)
1516		return (error);
1517	vp = nd.ni_vp;
1518
1519	error = vn_access(vp, uap->flags, td->td_ucred, td);
1520	NDFREE(&nd, NDF_ONLY_PNBUF);
1521	vput(vp);
1522	return (error);
1523}
1524
1525#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1526/*
1527 * Get file status; this version follows links.
1528 */
1529#ifndef _SYS_SYSPROTO_H_
1530struct ostat_args {
1531	char	*path;
1532	struct ostat *ub;
1533};
1534#endif
1535/* ARGSUSED */
1536int
1537ostat(td, uap)
1538	struct thread *td;
1539	register struct ostat_args /* {
1540		char *path;
1541		struct ostat *ub;
1542	} */ *uap;
1543{
1544	struct stat sb;
1545	struct ostat osb;
1546	int error;
1547	struct nameidata nd;
1548
1549	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1550	    uap->path, td);
1551	if ((error = namei(&nd)) != 0)
1552		return (error);
1553	NDFREE(&nd, NDF_ONLY_PNBUF);
1554	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1555	vput(nd.ni_vp);
1556	if (error)
1557		return (error);
1558	cvtstat(&sb, &osb);
1559	error = copyout(&osb, uap->ub, sizeof (osb));
1560	return (error);
1561}
1562
1563/*
1564 * Get file status; this version does not follow links.
1565 */
1566#ifndef _SYS_SYSPROTO_H_
1567struct olstat_args {
1568	char	*path;
1569	struct ostat *ub;
1570};
1571#endif
1572/* ARGSUSED */
1573int
1574olstat(td, uap)
1575	struct thread *td;
1576	register struct olstat_args /* {
1577		char *path;
1578		struct ostat *ub;
1579	} */ *uap;
1580{
1581	struct vnode *vp;
1582	struct stat sb;
1583	struct ostat osb;
1584	int error;
1585	struct nameidata nd;
1586
1587	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1588	    uap->path, td);
1589	if ((error = namei(&nd)) != 0)
1590		return (error);
1591	vp = nd.ni_vp;
1592	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1593	NDFREE(&nd, NDF_ONLY_PNBUF);
1594	vput(vp);
1595	if (error)
1596		return (error);
1597	cvtstat(&sb, &osb);
1598	error = copyout(&osb, uap->ub, sizeof (osb));
1599	return (error);
1600}
1601
1602/*
1603 * Convert from an old to a new stat structure.
1604 */
1605void
1606cvtstat(st, ost)
1607	struct stat *st;
1608	struct ostat *ost;
1609{
1610
1611	ost->st_dev = st->st_dev;
1612	ost->st_ino = st->st_ino;
1613	ost->st_mode = st->st_mode;
1614	ost->st_nlink = st->st_nlink;
1615	ost->st_uid = st->st_uid;
1616	ost->st_gid = st->st_gid;
1617	ost->st_rdev = st->st_rdev;
1618	if (st->st_size < (quad_t)1 << 32)
1619		ost->st_size = st->st_size;
1620	else
1621		ost->st_size = -2;
1622	ost->st_atime = st->st_atime;
1623	ost->st_mtime = st->st_mtime;
1624	ost->st_ctime = st->st_ctime;
1625	ost->st_blksize = st->st_blksize;
1626	ost->st_blocks = st->st_blocks;
1627	ost->st_flags = st->st_flags;
1628	ost->st_gen = st->st_gen;
1629}
1630#endif /* COMPAT_43 || COMPAT_SUNOS */
1631
1632/*
1633 * Get file status; this version follows links.
1634 */
1635#ifndef _SYS_SYSPROTO_H_
1636struct stat_args {
1637	char	*path;
1638	struct stat *ub;
1639};
1640#endif
1641/* ARGSUSED */
1642int
1643stat(td, uap)
1644	struct thread *td;
1645	register struct stat_args /* {
1646		char *path;
1647		struct stat *ub;
1648	} */ *uap;
1649{
1650	struct stat sb;
1651	int error;
1652	struct nameidata nd;
1653
1654#ifdef LOOKUP_SHARED
1655	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1656	    UIO_USERSPACE, uap->path, td);
1657#else
1658	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1659	    uap->path, td);
1660#endif
1661	if ((error = namei(&nd)) != 0)
1662		return (error);
1663	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1664	NDFREE(&nd, NDF_ONLY_PNBUF);
1665	vput(nd.ni_vp);
1666	if (error)
1667		return (error);
1668	error = copyout(&sb, uap->ub, sizeof (sb));
1669	return (error);
1670}
1671
1672/*
1673 * Get file status; this version does not follow links.
1674 */
1675#ifndef _SYS_SYSPROTO_H_
1676struct lstat_args {
1677	char	*path;
1678	struct stat *ub;
1679};
1680#endif
1681/* ARGSUSED */
1682int
1683lstat(td, uap)
1684	struct thread *td;
1685	register struct lstat_args /* {
1686		char *path;
1687		struct stat *ub;
1688	} */ *uap;
1689{
1690	int error;
1691	struct vnode *vp;
1692	struct stat sb;
1693	struct nameidata nd;
1694
1695	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1696	    uap->path, td);
1697	if ((error = namei(&nd)) != 0)
1698		return (error);
1699	vp = nd.ni_vp;
1700	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1701	NDFREE(&nd, NDF_ONLY_PNBUF);
1702	vput(vp);
1703	if (error)
1704		return (error);
1705	error = copyout(&sb, uap->ub, sizeof (sb));
1706	return (error);
1707}
1708
1709/*
1710 * Implementation of the NetBSD stat() function.
1711 * XXX This should probably be collapsed with the FreeBSD version,
1712 * as the differences are only due to vn_stat() clearing spares at
1713 * the end of the structures.  vn_stat could be split to avoid this,
1714 * and thus collapse the following to close to zero code.
1715 */
1716void
1717cvtnstat(sb, nsb)
1718	struct stat *sb;
1719	struct nstat *nsb;
1720{
1721	bzero(nsb, sizeof *nsb);
1722	nsb->st_dev = sb->st_dev;
1723	nsb->st_ino = sb->st_ino;
1724	nsb->st_mode = sb->st_mode;
1725	nsb->st_nlink = sb->st_nlink;
1726	nsb->st_uid = sb->st_uid;
1727	nsb->st_gid = sb->st_gid;
1728	nsb->st_rdev = sb->st_rdev;
1729	nsb->st_atimespec = sb->st_atimespec;
1730	nsb->st_mtimespec = sb->st_mtimespec;
1731	nsb->st_ctimespec = sb->st_ctimespec;
1732	nsb->st_size = sb->st_size;
1733	nsb->st_blocks = sb->st_blocks;
1734	nsb->st_blksize = sb->st_blksize;
1735	nsb->st_flags = sb->st_flags;
1736	nsb->st_gen = sb->st_gen;
1737	nsb->st_birthtimespec = sb->st_birthtimespec;
1738}
1739
1740#ifndef _SYS_SYSPROTO_H_
1741struct nstat_args {
1742	char	*path;
1743	struct nstat *ub;
1744};
1745#endif
1746/* ARGSUSED */
1747int
1748nstat(td, uap)
1749	struct thread *td;
1750	register struct nstat_args /* {
1751		char *path;
1752		struct nstat *ub;
1753	} */ *uap;
1754{
1755	struct stat sb;
1756	struct nstat nsb;
1757	int error;
1758	struct nameidata nd;
1759
1760	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1761	    uap->path, td);
1762	if ((error = namei(&nd)) != 0)
1763		return (error);
1764	NDFREE(&nd, NDF_ONLY_PNBUF);
1765	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1766	vput(nd.ni_vp);
1767	if (error)
1768		return (error);
1769	cvtnstat(&sb, &nsb);
1770	error = copyout(&nsb, uap->ub, sizeof (nsb));
1771	return (error);
1772}
1773
1774/*
1775 * NetBSD lstat.  Get file status; this version does not follow links.
1776 */
1777#ifndef _SYS_SYSPROTO_H_
1778struct lstat_args {
1779	char	*path;
1780	struct stat *ub;
1781};
1782#endif
1783/* ARGSUSED */
1784int
1785nlstat(td, uap)
1786	struct thread *td;
1787	register struct nlstat_args /* {
1788		char *path;
1789		struct nstat *ub;
1790	} */ *uap;
1791{
1792	int error;
1793	struct vnode *vp;
1794	struct stat sb;
1795	struct nstat nsb;
1796	struct nameidata nd;
1797
1798	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1799	    uap->path, td);
1800	if ((error = namei(&nd)) != 0)
1801		return (error);
1802	vp = nd.ni_vp;
1803	NDFREE(&nd, NDF_ONLY_PNBUF);
1804	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1805	vput(vp);
1806	if (error)
1807		return (error);
1808	cvtnstat(&sb, &nsb);
1809	error = copyout(&nsb, uap->ub, sizeof (nsb));
1810	return (error);
1811}
1812
1813/*
1814 * Get configurable pathname variables.
1815 */
1816#ifndef _SYS_SYSPROTO_H_
1817struct pathconf_args {
1818	char	*path;
1819	int	name;
1820};
1821#endif
1822/* ARGSUSED */
1823int
1824pathconf(td, uap)
1825	struct thread *td;
1826	register struct pathconf_args /* {
1827		char *path;
1828		int name;
1829	} */ *uap;
1830{
1831	int error;
1832	struct nameidata nd;
1833
1834	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1835	    uap->path, td);
1836	if ((error = namei(&nd)) != 0)
1837		return (error);
1838	NDFREE(&nd, NDF_ONLY_PNBUF);
1839
1840	/* If asynchronous I/O is available, it works for all files. */
1841	if (uap->name == _PC_ASYNC_IO)
1842		td->td_retval[0] = async_io_version;
1843	else
1844		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1845	vput(nd.ni_vp);
1846	return (error);
1847}
1848
1849/*
1850 * Return target name of a symbolic link.
1851 */
1852#ifndef _SYS_SYSPROTO_H_
1853struct readlink_args {
1854	char	*path;
1855	char	*buf;
1856	int	count;
1857};
1858#endif
1859/* ARGSUSED */
1860int
1861readlink(td, uap)
1862	struct thread *td;
1863	register struct readlink_args /* {
1864		char *path;
1865		char *buf;
1866		int count;
1867	} */ *uap;
1868{
1869
1870	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1871	    UIO_USERSPACE, uap->count));
1872}
1873
1874int
1875kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1876    enum uio_seg bufseg, int count)
1877{
1878	register struct vnode *vp;
1879	struct iovec aiov;
1880	struct uio auio;
1881	int error;
1882	struct nameidata nd;
1883
1884	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1885	if ((error = namei(&nd)) != 0)
1886		return (error);
1887	NDFREE(&nd, NDF_ONLY_PNBUF);
1888	vp = nd.ni_vp;
1889#ifdef MAC
1890	error = mac_check_vnode_readlink(td->td_ucred, vp);
1891	if (error) {
1892		vput(vp);
1893		return (error);
1894	}
1895#endif
1896	if (vp->v_type != VLNK)
1897		error = EINVAL;
1898	else {
1899		aiov.iov_base = buf;
1900		aiov.iov_len = count;
1901		auio.uio_iov = &aiov;
1902		auio.uio_iovcnt = 1;
1903		auio.uio_offset = 0;
1904		auio.uio_rw = UIO_READ;
1905		auio.uio_segflg = bufseg;
1906		auio.uio_td = td;
1907		auio.uio_resid = count;
1908		error = VOP_READLINK(vp, &auio, td->td_ucred);
1909	}
1910	vput(vp);
1911	td->td_retval[0] = count - auio.uio_resid;
1912	return (error);
1913}
1914
1915/*
1916 * Common implementation code for chflags() and fchflags().
1917 */
1918static int
1919setfflags(td, vp, flags)
1920	struct thread *td;
1921	struct vnode *vp;
1922	int flags;
1923{
1924	int error;
1925	struct mount *mp;
1926	struct vattr vattr;
1927
1928	/*
1929	 * Prevent non-root users from setting flags on devices.  When
1930	 * a device is reused, users can retain ownership of the device
1931	 * if they are allowed to set flags and programs assume that
1932	 * chown can't fail when done as root.
1933	 */
1934	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1935		error = suser_cred(td->td_ucred, PRISON_ROOT);
1936		if (error)
1937			return (error);
1938	}
1939
1940	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1941		return (error);
1942	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1943	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1944#ifdef MAC
1945	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1946	if (error == 0) {
1947#endif
1948		VATTR_NULL(&vattr);
1949		vattr.va_flags = flags;
1950		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1951#ifdef MAC
1952	}
1953#endif
1954	VOP_UNLOCK(vp, 0, td);
1955	vn_finished_write(mp);
1956	return (error);
1957}
1958
1959/*
1960 * Change flags of a file given a path name.
1961 */
1962#ifndef _SYS_SYSPROTO_H_
1963struct chflags_args {
1964	char	*path;
1965	int	flags;
1966};
1967#endif
1968/* ARGSUSED */
1969int
1970chflags(td, uap)
1971	struct thread *td;
1972	register struct chflags_args /* {
1973		char *path;
1974		int flags;
1975	} */ *uap;
1976{
1977	int error;
1978	struct nameidata nd;
1979
1980	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
1981	if ((error = namei(&nd)) != 0)
1982		return (error);
1983	NDFREE(&nd, NDF_ONLY_PNBUF);
1984	error = setfflags(td, nd.ni_vp, uap->flags);
1985	vrele(nd.ni_vp);
1986	return error;
1987}
1988
1989/*
1990 * Same as chflags() but doesn't follow symlinks.
1991 */
1992int
1993lchflags(td, uap)
1994	struct thread *td;
1995	register struct lchflags_args /* {
1996		char *path;
1997		int flags;
1998	} */ *uap;
1999{
2000	int error;
2001	struct nameidata nd;
2002
2003	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2004	if ((error = namei(&nd)) != 0)
2005		return (error);
2006	NDFREE(&nd, NDF_ONLY_PNBUF);
2007	error = setfflags(td, nd.ni_vp, uap->flags);
2008	vrele(nd.ni_vp);
2009	return error;
2010}
2011
2012/*
2013 * Change flags of a file given a file descriptor.
2014 */
2015#ifndef _SYS_SYSPROTO_H_
2016struct fchflags_args {
2017	int	fd;
2018	int	flags;
2019};
2020#endif
2021/* ARGSUSED */
2022int
2023fchflags(td, uap)
2024	struct thread *td;
2025	register struct fchflags_args /* {
2026		int fd;
2027		int flags;
2028	} */ *uap;
2029{
2030	struct file *fp;
2031	int error;
2032
2033	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2034		return (error);
2035	error = setfflags(td, fp->un_data.vnode, uap->flags);
2036	fdrop(fp, td);
2037	return (error);
2038}
2039
2040/*
2041 * Common implementation code for chmod(), lchmod() and fchmod().
2042 */
2043static int
2044setfmode(td, vp, mode)
2045	struct thread *td;
2046	struct vnode *vp;
2047	int mode;
2048{
2049	int error;
2050	struct mount *mp;
2051	struct vattr vattr;
2052
2053	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2054		return (error);
2055	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2056	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2057	VATTR_NULL(&vattr);
2058	vattr.va_mode = mode & ALLPERMS;
2059#ifdef MAC
2060	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2061	if (error == 0)
2062#endif
2063		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2064	VOP_UNLOCK(vp, 0, td);
2065	vn_finished_write(mp);
2066	return error;
2067}
2068
2069/*
2070 * Change mode of a file given path name.
2071 */
2072#ifndef _SYS_SYSPROTO_H_
2073struct chmod_args {
2074	char	*path;
2075	int	mode;
2076};
2077#endif
2078/* ARGSUSED */
2079int
2080chmod(td, uap)
2081	struct thread *td;
2082	register struct chmod_args /* {
2083		char *path;
2084		int mode;
2085	} */ *uap;
2086{
2087
2088	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2089}
2090
2091int
2092kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2093{
2094	int error;
2095	struct nameidata nd;
2096
2097	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2098	if ((error = namei(&nd)) != 0)
2099		return (error);
2100	NDFREE(&nd, NDF_ONLY_PNBUF);
2101	error = setfmode(td, nd.ni_vp, mode);
2102	vrele(nd.ni_vp);
2103	return error;
2104}
2105
2106/*
2107 * Change mode of a file given path name (don't follow links.)
2108 */
2109#ifndef _SYS_SYSPROTO_H_
2110struct lchmod_args {
2111	char	*path;
2112	int	mode;
2113};
2114#endif
2115/* ARGSUSED */
2116int
2117lchmod(td, uap)
2118	struct thread *td;
2119	register struct lchmod_args /* {
2120		char *path;
2121		int mode;
2122	} */ *uap;
2123{
2124	int error;
2125	struct nameidata nd;
2126
2127	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2128	if ((error = namei(&nd)) != 0)
2129		return (error);
2130	NDFREE(&nd, NDF_ONLY_PNBUF);
2131	error = setfmode(td, nd.ni_vp, uap->mode);
2132	vrele(nd.ni_vp);
2133	return error;
2134}
2135
2136/*
2137 * Change mode of a file given a file descriptor.
2138 */
2139#ifndef _SYS_SYSPROTO_H_
2140struct fchmod_args {
2141	int	fd;
2142	int	mode;
2143};
2144#endif
2145/* ARGSUSED */
2146int
2147fchmod(td, uap)
2148	struct thread *td;
2149	register struct fchmod_args /* {
2150		int fd;
2151		int mode;
2152	} */ *uap;
2153{
2154	struct file *fp;
2155	struct vnode *vp;
2156	int error;
2157
2158	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2159		return (error);
2160	vp = fp->un_data.vnode;
2161	error = setfmode(td, fp->un_data.vnode, uap->mode);
2162	fdrop(fp, td);
2163	return (error);
2164}
2165
2166/*
2167 * Common implementation for chown(), lchown(), and fchown()
2168 */
2169static int
2170setfown(td, vp, uid, gid)
2171	struct thread *td;
2172	struct vnode *vp;
2173	uid_t uid;
2174	gid_t gid;
2175{
2176	int error;
2177	struct mount *mp;
2178	struct vattr vattr;
2179
2180	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2181		return (error);
2182	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2183	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2184	VATTR_NULL(&vattr);
2185	vattr.va_uid = uid;
2186	vattr.va_gid = gid;
2187#ifdef MAC
2188	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2189	    vattr.va_gid);
2190	if (error == 0)
2191#endif
2192		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2193	VOP_UNLOCK(vp, 0, td);
2194	vn_finished_write(mp);
2195	return error;
2196}
2197
2198/*
2199 * Set ownership given a path name.
2200 */
2201#ifndef _SYS_SYSPROTO_H_
2202struct chown_args {
2203	char	*path;
2204	int	uid;
2205	int	gid;
2206};
2207#endif
2208/* ARGSUSED */
2209int
2210chown(td, uap)
2211	struct thread *td;
2212	register struct chown_args /* {
2213		char *path;
2214		int uid;
2215		int gid;
2216	} */ *uap;
2217{
2218
2219	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2220}
2221
2222int
2223kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2224    int gid)
2225{
2226	int error;
2227	struct nameidata nd;
2228
2229	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2230	if ((error = namei(&nd)) != 0)
2231		return (error);
2232	NDFREE(&nd, NDF_ONLY_PNBUF);
2233	error = setfown(td, nd.ni_vp, uid, gid);
2234	vrele(nd.ni_vp);
2235	return (error);
2236}
2237
2238/*
2239 * Set ownership given a path name, do not cross symlinks.
2240 */
2241#ifndef _SYS_SYSPROTO_H_
2242struct lchown_args {
2243	char	*path;
2244	int	uid;
2245	int	gid;
2246};
2247#endif
2248/* ARGSUSED */
2249int
2250lchown(td, uap)
2251	struct thread *td;
2252	register struct lchown_args /* {
2253		char *path;
2254		int uid;
2255		int gid;
2256	} */ *uap;
2257{
2258
2259	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2260}
2261
2262int
2263kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2264    int gid)
2265{
2266	int error;
2267	struct nameidata nd;
2268
2269	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2270	if ((error = namei(&nd)) != 0)
2271		return (error);
2272	NDFREE(&nd, NDF_ONLY_PNBUF);
2273	error = setfown(td, nd.ni_vp, uid, gid);
2274	vrele(nd.ni_vp);
2275	return (error);
2276}
2277
2278/*
2279 * Set ownership given a file descriptor.
2280 */
2281#ifndef _SYS_SYSPROTO_H_
2282struct fchown_args {
2283	int	fd;
2284	int	uid;
2285	int	gid;
2286};
2287#endif
2288/* ARGSUSED */
2289int
2290fchown(td, uap)
2291	struct thread *td;
2292	register struct fchown_args /* {
2293		int fd;
2294		int uid;
2295		int gid;
2296	} */ *uap;
2297{
2298	struct file *fp;
2299	struct vnode *vp;
2300	int error;
2301
2302	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2303		return (error);
2304	vp = fp->un_data.vnode;
2305	error = setfown(td, fp->un_data.vnode, uap->uid, uap->gid);
2306	fdrop(fp, td);
2307	return (error);
2308}
2309
2310/*
2311 * Common implementation code for utimes(), lutimes(), and futimes().
2312 */
2313static int
2314getutimes(usrtvp, tvpseg, tsp)
2315	const struct timeval *usrtvp;
2316	enum uio_seg tvpseg;
2317	struct timespec *tsp;
2318{
2319	struct timeval tv[2];
2320	const struct timeval *tvp;
2321	int error;
2322
2323	if (usrtvp == NULL) {
2324		microtime(&tv[0]);
2325		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2326		tsp[1] = tsp[0];
2327	} else {
2328		if (tvpseg == UIO_SYSSPACE) {
2329			tvp = usrtvp;
2330		} else {
2331			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2332				return (error);
2333			tvp = tv;
2334		}
2335
2336		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2337		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2338	}
2339	return 0;
2340}
2341
2342/*
2343 * Common implementation code for utimes(), lutimes(), and futimes().
2344 */
2345static int
2346setutimes(td, vp, ts, numtimes, nullflag)
2347	struct thread *td;
2348	struct vnode *vp;
2349	const struct timespec *ts;
2350	int numtimes;
2351	int nullflag;
2352{
2353	int error, setbirthtime;
2354	struct mount *mp;
2355	struct vattr vattr;
2356
2357	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2358		return (error);
2359	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2360	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2361	setbirthtime = 0;
2362	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2363	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2364		setbirthtime = 1;
2365	VATTR_NULL(&vattr);
2366	vattr.va_atime = ts[0];
2367	vattr.va_mtime = ts[1];
2368	if (setbirthtime)
2369		vattr.va_birthtime = ts[1];
2370	if (numtimes > 2)
2371		vattr.va_birthtime = ts[2];
2372	if (nullflag)
2373		vattr.va_vaflags |= VA_UTIMES_NULL;
2374#ifdef MAC
2375	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2376	    vattr.va_mtime);
2377#endif
2378	if (error == 0)
2379		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2380	VOP_UNLOCK(vp, 0, td);
2381	vn_finished_write(mp);
2382	return error;
2383}
2384
2385/*
2386 * Set the access and modification times of a file.
2387 */
2388#ifndef _SYS_SYSPROTO_H_
2389struct utimes_args {
2390	char	*path;
2391	struct	timeval *tptr;
2392};
2393#endif
2394/* ARGSUSED */
2395int
2396utimes(td, uap)
2397	struct thread *td;
2398	register struct utimes_args /* {
2399		char *path;
2400		struct timeval *tptr;
2401	} */ *uap;
2402{
2403
2404	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2405	    UIO_USERSPACE));
2406}
2407
2408int
2409kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2410    struct timeval *tptr, enum uio_seg tptrseg)
2411{
2412	struct timespec ts[2];
2413	int error;
2414	struct nameidata nd;
2415
2416	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2417		return (error);
2418	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2419	if ((error = namei(&nd)) != 0)
2420		return (error);
2421	NDFREE(&nd, NDF_ONLY_PNBUF);
2422	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2423	vrele(nd.ni_vp);
2424	return (error);
2425}
2426
2427/*
2428 * Set the access and modification times of a file.
2429 */
2430#ifndef _SYS_SYSPROTO_H_
2431struct lutimes_args {
2432	char	*path;
2433	struct	timeval *tptr;
2434};
2435#endif
2436/* ARGSUSED */
2437int
2438lutimes(td, uap)
2439	struct thread *td;
2440	register struct lutimes_args /* {
2441		char *path;
2442		struct timeval *tptr;
2443	} */ *uap;
2444{
2445
2446	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2447	    UIO_USERSPACE));
2448}
2449
2450int
2451kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2452    struct timeval *tptr, enum uio_seg tptrseg)
2453{
2454	struct timespec ts[2];
2455	int error;
2456	struct nameidata nd;
2457
2458	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2459		return (error);
2460	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2461	if ((error = namei(&nd)) != 0)
2462		return (error);
2463	NDFREE(&nd, NDF_ONLY_PNBUF);
2464	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2465	vrele(nd.ni_vp);
2466	return (error);
2467}
2468
2469/*
2470 * Set the access and modification times of a file.
2471 */
2472#ifndef _SYS_SYSPROTO_H_
2473struct futimes_args {
2474	int	fd;
2475	struct	timeval *tptr;
2476};
2477#endif
2478/* ARGSUSED */
2479int
2480futimes(td, uap)
2481	struct thread *td;
2482	register struct futimes_args /* {
2483		int  fd;
2484		struct timeval *tptr;
2485	} */ *uap;
2486{
2487
2488	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2489}
2490
2491int
2492kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2493    enum uio_seg tptrseg)
2494{
2495	struct timespec ts[2];
2496	struct file *fp;
2497	int error;
2498
2499	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2500		return (error);
2501	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2502		return (error);
2503	error = setutimes(td, fp->un_data.vnode, ts, 2, tptr == NULL);
2504	fdrop(fp, td);
2505	return (error);
2506}
2507
2508/*
2509 * Truncate a file given its path name.
2510 */
2511#ifndef _SYS_SYSPROTO_H_
2512struct truncate_args {
2513	char	*path;
2514	int	pad;
2515	off_t	length;
2516};
2517#endif
2518/* ARGSUSED */
2519int
2520truncate(td, uap)
2521	struct thread *td;
2522	register struct truncate_args /* {
2523		char *path;
2524		int pad;
2525		off_t length;
2526	} */ *uap;
2527{
2528
2529	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2530}
2531
2532int
2533kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2534{
2535	struct mount *mp;
2536	struct vnode *vp;
2537	struct vattr vattr;
2538	int error;
2539	struct nameidata nd;
2540
2541	if (length < 0)
2542		return(EINVAL);
2543	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2544	if ((error = namei(&nd)) != 0)
2545		return (error);
2546	vp = nd.ni_vp;
2547	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2548		vrele(vp);
2549		return (error);
2550	}
2551	NDFREE(&nd, NDF_ONLY_PNBUF);
2552	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2553	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2554	if (vp->v_type == VDIR)
2555		error = EISDIR;
2556#ifdef MAC
2557	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2558	}
2559#endif
2560	else if ((error = vn_writechk(vp)) == 0 &&
2561	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2562		VATTR_NULL(&vattr);
2563		vattr.va_size = length;
2564		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2565	}
2566	vput(vp);
2567	vn_finished_write(mp);
2568	return (error);
2569}
2570
2571/*
2572 * Truncate a file given a file descriptor.
2573 */
2574#ifndef _SYS_SYSPROTO_H_
2575struct ftruncate_args {
2576	int	fd;
2577	int	pad;
2578	off_t	length;
2579};
2580#endif
2581/* ARGSUSED */
2582int
2583ftruncate(td, uap)
2584	struct thread *td;
2585	register struct ftruncate_args /* {
2586		int fd;
2587		int pad;
2588		off_t length;
2589	} */ *uap;
2590{
2591	struct mount *mp;
2592	struct vattr vattr;
2593	struct vnode *vp;
2594	struct file *fp;
2595	int error;
2596
2597	if (uap->length < 0)
2598		return(EINVAL);
2599	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2600		return (error);
2601	if ((fp->f_flag & FWRITE) == 0) {
2602		fdrop(fp, td);
2603		return (EINVAL);
2604	}
2605	vp = fp->un_data.vnode;
2606	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2607		fdrop(fp, td);
2608		return (error);
2609	}
2610	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2611	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2612	if (vp->v_type == VDIR)
2613		error = EISDIR;
2614#ifdef MAC
2615	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2616	    vp))) {
2617	}
2618#endif
2619	else if ((error = vn_writechk(vp)) == 0) {
2620		VATTR_NULL(&vattr);
2621		vattr.va_size = uap->length;
2622		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2623	}
2624	VOP_UNLOCK(vp, 0, td);
2625	vn_finished_write(mp);
2626	fdrop(fp, td);
2627	return (error);
2628}
2629
2630#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2631/*
2632 * Truncate a file given its path name.
2633 */
2634#ifndef _SYS_SYSPROTO_H_
2635struct otruncate_args {
2636	char	*path;
2637	long	length;
2638};
2639#endif
2640/* ARGSUSED */
2641int
2642otruncate(td, uap)
2643	struct thread *td;
2644	register struct otruncate_args /* {
2645		char *path;
2646		long length;
2647	} */ *uap;
2648{
2649	struct truncate_args /* {
2650		char *path;
2651		int pad;
2652		off_t length;
2653	} */ nuap;
2654
2655	nuap.path = uap->path;
2656	nuap.length = uap->length;
2657	return (truncate(td, &nuap));
2658}
2659
2660/*
2661 * Truncate a file given a file descriptor.
2662 */
2663#ifndef _SYS_SYSPROTO_H_
2664struct oftruncate_args {
2665	int	fd;
2666	long	length;
2667};
2668#endif
2669/* ARGSUSED */
2670int
2671oftruncate(td, uap)
2672	struct thread *td;
2673	register struct oftruncate_args /* {
2674		int fd;
2675		long length;
2676	} */ *uap;
2677{
2678	struct ftruncate_args /* {
2679		int fd;
2680		int pad;
2681		off_t length;
2682	} */ nuap;
2683
2684	nuap.fd = uap->fd;
2685	nuap.length = uap->length;
2686	return (ftruncate(td, &nuap));
2687}
2688#endif /* COMPAT_43 || COMPAT_SUNOS */
2689
2690/*
2691 * Sync an open file.
2692 */
2693#ifndef _SYS_SYSPROTO_H_
2694struct fsync_args {
2695	int	fd;
2696};
2697#endif
2698/* ARGSUSED */
2699int
2700fsync(td, uap)
2701	struct thread *td;
2702	struct fsync_args /* {
2703		int fd;
2704	} */ *uap;
2705{
2706	struct vnode *vp;
2707	struct mount *mp;
2708	struct file *fp;
2709	vm_object_t obj;
2710	int error;
2711
2712	GIANT_REQUIRED;
2713
2714	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2715		return (error);
2716	vp = fp->un_data.vnode;
2717	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2718		fdrop(fp, td);
2719		return (error);
2720	}
2721	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2722	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2723		vm_object_page_clean(obj, 0, 0, 0);
2724	}
2725	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2726	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2727	    && softdep_fsync_hook != NULL)
2728		error = (*softdep_fsync_hook)(vp);
2729
2730	VOP_UNLOCK(vp, 0, td);
2731	vn_finished_write(mp);
2732	fdrop(fp, td);
2733	return (error);
2734}
2735
2736/*
2737 * Rename files.  Source and destination must either both be directories,
2738 * or both not be directories.  If target is a directory, it must be empty.
2739 */
2740#ifndef _SYS_SYSPROTO_H_
2741struct rename_args {
2742	char	*from;
2743	char	*to;
2744};
2745#endif
2746/* ARGSUSED */
2747int
2748rename(td, uap)
2749	struct thread *td;
2750	register struct rename_args /* {
2751		char *from;
2752		char *to;
2753	} */ *uap;
2754{
2755
2756	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2757}
2758
2759int
2760kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2761{
2762	struct mount *mp = NULL;
2763	struct vnode *tvp, *fvp, *tdvp;
2764	struct nameidata fromnd, tond;
2765	int error;
2766
2767	bwillwrite();
2768#ifdef MAC
2769	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2770	    from, td);
2771#else
2772	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2773#endif
2774	if ((error = namei(&fromnd)) != 0)
2775		return (error);
2776#ifdef MAC
2777	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2778	    fromnd.ni_vp, &fromnd.ni_cnd);
2779	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2780	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2781#endif
2782	fvp = fromnd.ni_vp;
2783	if (error == 0)
2784		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2785	if (error != 0) {
2786		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2787		vrele(fromnd.ni_dvp);
2788		vrele(fvp);
2789		goto out1;
2790	}
2791	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2792	    NOOBJ, pathseg, to, td);
2793	if (fromnd.ni_vp->v_type == VDIR)
2794		tond.ni_cnd.cn_flags |= WILLBEDIR;
2795	if ((error = namei(&tond)) != 0) {
2796		/* Translate error code for rename("dir1", "dir2/."). */
2797		if (error == EISDIR && fvp->v_type == VDIR)
2798			error = EINVAL;
2799		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2800		vrele(fromnd.ni_dvp);
2801		vrele(fvp);
2802		goto out1;
2803	}
2804	tdvp = tond.ni_dvp;
2805	tvp = tond.ni_vp;
2806	if (tvp != NULL) {
2807		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2808			error = ENOTDIR;
2809			goto out;
2810		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2811			error = EISDIR;
2812			goto out;
2813		}
2814	}
2815	if (fvp == tdvp)
2816		error = EINVAL;
2817	/*
2818	 * If the source is the same as the destination (that is, if they
2819	 * are links to the same vnode), then there is nothing to do.
2820	 */
2821	if (fvp == tvp)
2822		error = -1;
2823#ifdef MAC
2824	else
2825		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2826		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2827#endif
2828out:
2829	if (!error) {
2830		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2831		if (fromnd.ni_dvp != tdvp) {
2832			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2833		}
2834		if (tvp) {
2835			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2836		}
2837		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2838				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2839		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2840		NDFREE(&tond, NDF_ONLY_PNBUF);
2841	} else {
2842		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2843		NDFREE(&tond, NDF_ONLY_PNBUF);
2844		if (tdvp == tvp)
2845			vrele(tdvp);
2846		else
2847			vput(tdvp);
2848		if (tvp)
2849			vput(tvp);
2850		vrele(fromnd.ni_dvp);
2851		vrele(fvp);
2852	}
2853	vrele(tond.ni_startdir);
2854	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2855	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2856	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2857	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2858out1:
2859	vn_finished_write(mp);
2860	if (fromnd.ni_startdir)
2861		vrele(fromnd.ni_startdir);
2862	if (error == -1)
2863		return (0);
2864	return (error);
2865}
2866
2867/*
2868 * Make a directory file.
2869 */
2870#ifndef _SYS_SYSPROTO_H_
2871struct mkdir_args {
2872	char	*path;
2873	int	mode;
2874};
2875#endif
2876/* ARGSUSED */
2877int
2878mkdir(td, uap)
2879	struct thread *td;
2880	register struct mkdir_args /* {
2881		char *path;
2882		int mode;
2883	} */ *uap;
2884{
2885
2886	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2887}
2888
2889int
2890kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2891{
2892	struct mount *mp;
2893	struct vnode *vp;
2894	struct vattr vattr;
2895	int error;
2896	struct nameidata nd;
2897
2898restart:
2899	bwillwrite();
2900	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2901	nd.ni_cnd.cn_flags |= WILLBEDIR;
2902	if ((error = namei(&nd)) != 0)
2903		return (error);
2904	vp = nd.ni_vp;
2905	if (vp != NULL) {
2906		NDFREE(&nd, NDF_ONLY_PNBUF);
2907		vrele(vp);
2908		/*
2909		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2910		 * the strange behaviour of leaving the vnode unlocked
2911		 * if the target is the same vnode as the parent.
2912		 */
2913		if (vp == nd.ni_dvp)
2914			vrele(nd.ni_dvp);
2915		else
2916			vput(nd.ni_dvp);
2917		return (EEXIST);
2918	}
2919	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2920		NDFREE(&nd, NDF_ONLY_PNBUF);
2921		vput(nd.ni_dvp);
2922		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2923			return (error);
2924		goto restart;
2925	}
2926	VATTR_NULL(&vattr);
2927	vattr.va_type = VDIR;
2928	FILEDESC_LOCK(td->td_proc->p_fd);
2929	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2930	FILEDESC_UNLOCK(td->td_proc->p_fd);
2931#ifdef MAC
2932	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2933	    &vattr);
2934	if (error)
2935		goto out;
2936#endif
2937	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2938	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2939#ifdef MAC
2940out:
2941#endif
2942	NDFREE(&nd, NDF_ONLY_PNBUF);
2943	vput(nd.ni_dvp);
2944	if (!error)
2945		vput(nd.ni_vp);
2946	vn_finished_write(mp);
2947	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2948	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2949	return (error);
2950}
2951
2952/*
2953 * Remove a directory file.
2954 */
2955#ifndef _SYS_SYSPROTO_H_
2956struct rmdir_args {
2957	char	*path;
2958};
2959#endif
2960/* ARGSUSED */
2961int
2962rmdir(td, uap)
2963	struct thread *td;
2964	struct rmdir_args /* {
2965		char *path;
2966	} */ *uap;
2967{
2968
2969	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
2970}
2971
2972int
2973kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
2974{
2975	struct mount *mp;
2976	struct vnode *vp;
2977	int error;
2978	struct nameidata nd;
2979
2980restart:
2981	bwillwrite();
2982	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
2983	if ((error = namei(&nd)) != 0)
2984		return (error);
2985	vp = nd.ni_vp;
2986	if (vp->v_type != VDIR) {
2987		error = ENOTDIR;
2988		goto out;
2989	}
2990	/*
2991	 * No rmdir "." please.
2992	 */
2993	if (nd.ni_dvp == vp) {
2994		error = EINVAL;
2995		goto out;
2996	}
2997	/*
2998	 * The root of a mounted filesystem cannot be deleted.
2999	 */
3000	if (vp->v_vflag & VV_ROOT) {
3001		error = EBUSY;
3002		goto out;
3003	}
3004#ifdef MAC
3005	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3006	    &nd.ni_cnd);
3007	if (error)
3008		goto out;
3009#endif
3010	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3011		NDFREE(&nd, NDF_ONLY_PNBUF);
3012		if (nd.ni_dvp == vp)
3013			vrele(nd.ni_dvp);
3014		else
3015			vput(nd.ni_dvp);
3016		vput(vp);
3017		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3018			return (error);
3019		goto restart;
3020	}
3021	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3022	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3023	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3024	vn_finished_write(mp);
3025out:
3026	NDFREE(&nd, NDF_ONLY_PNBUF);
3027	if (nd.ni_dvp == vp)
3028		vrele(nd.ni_dvp);
3029	else
3030		vput(nd.ni_dvp);
3031	vput(vp);
3032	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3033	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3034	return (error);
3035}
3036
3037#ifdef COMPAT_43
3038/*
3039 * Read a block of directory entries in a filesystem independent format.
3040 */
3041#ifndef _SYS_SYSPROTO_H_
3042struct ogetdirentries_args {
3043	int	fd;
3044	char	*buf;
3045	u_int	count;
3046	long	*basep;
3047};
3048#endif
3049int
3050ogetdirentries(td, uap)
3051	struct thread *td;
3052	register struct ogetdirentries_args /* {
3053		int fd;
3054		char *buf;
3055		u_int count;
3056		long *basep;
3057	} */ *uap;
3058{
3059	struct vnode *vp;
3060	struct file *fp;
3061	struct uio auio, kuio;
3062	struct iovec aiov, kiov;
3063	struct dirent *dp, *edp;
3064	caddr_t dirbuf;
3065	int error, eofflag, readcnt;
3066	long loff;
3067
3068	/* XXX arbitrary sanity limit on `count'. */
3069	if (uap->count > 64 * 1024)
3070		return (EINVAL);
3071	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3072		return (error);
3073	if ((fp->f_flag & FREAD) == 0) {
3074		fdrop(fp, td);
3075		return (EBADF);
3076	}
3077	vp = fp->un_data.vnode;
3078unionread:
3079	if (vp->v_type != VDIR) {
3080		fdrop(fp, td);
3081		return (EINVAL);
3082	}
3083	aiov.iov_base = uap->buf;
3084	aiov.iov_len = uap->count;
3085	auio.uio_iov = &aiov;
3086	auio.uio_iovcnt = 1;
3087	auio.uio_rw = UIO_READ;
3088	auio.uio_segflg = UIO_USERSPACE;
3089	auio.uio_td = td;
3090	auio.uio_resid = uap->count;
3091	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3092	loff = auio.uio_offset = fp->f_offset;
3093#ifdef MAC
3094	error = mac_check_vnode_readdir(td->td_ucred, vp);
3095	if (error) {
3096		VOP_UNLOCK(vp, 0, td);
3097		fdrop(fp, td);
3098		return (error);
3099	}
3100#endif
3101#	if (BYTE_ORDER != LITTLE_ENDIAN)
3102		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3103			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3104			    NULL, NULL);
3105			fp->f_offset = auio.uio_offset;
3106		} else
3107#	endif
3108	{
3109		kuio = auio;
3110		kuio.uio_iov = &kiov;
3111		kuio.uio_segflg = UIO_SYSSPACE;
3112		kiov.iov_len = uap->count;
3113		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3114		kiov.iov_base = dirbuf;
3115		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3116			    NULL, NULL);
3117		fp->f_offset = kuio.uio_offset;
3118		if (error == 0) {
3119			readcnt = uap->count - kuio.uio_resid;
3120			edp = (struct dirent *)&dirbuf[readcnt];
3121			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3122#				if (BYTE_ORDER == LITTLE_ENDIAN)
3123					/*
3124					 * The expected low byte of
3125					 * dp->d_namlen is our dp->d_type.
3126					 * The high MBZ byte of dp->d_namlen
3127					 * is our dp->d_namlen.
3128					 */
3129					dp->d_type = dp->d_namlen;
3130					dp->d_namlen = 0;
3131#				else
3132					/*
3133					 * The dp->d_type is the high byte
3134					 * of the expected dp->d_namlen,
3135					 * so must be zero'ed.
3136					 */
3137					dp->d_type = 0;
3138#				endif
3139				if (dp->d_reclen > 0) {
3140					dp = (struct dirent *)
3141					    ((char *)dp + dp->d_reclen);
3142				} else {
3143					error = EIO;
3144					break;
3145				}
3146			}
3147			if (dp >= edp)
3148				error = uiomove(dirbuf, readcnt, &auio);
3149		}
3150		FREE(dirbuf, M_TEMP);
3151	}
3152	VOP_UNLOCK(vp, 0, td);
3153	if (error) {
3154		fdrop(fp, td);
3155		return (error);
3156	}
3157	if (uap->count == auio.uio_resid) {
3158		if (union_dircheckp) {
3159			error = union_dircheckp(td, &vp, fp);
3160			if (error == -1)
3161				goto unionread;
3162			if (error) {
3163				fdrop(fp, td);
3164				return (error);
3165			}
3166		}
3167		/*
3168		 * XXX We could delay dropping the lock above but
3169		 * union_dircheckp complicates things.
3170		 */
3171		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3172		if ((vp->v_vflag & VV_ROOT) &&
3173		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3174			struct vnode *tvp = vp;
3175			vp = vp->v_mount->mnt_vnodecovered;
3176			VREF(vp);
3177			fp->un_data.vnode = vp;
3178			fp->f_offset = 0;
3179			vput(tvp);
3180			goto unionread;
3181		}
3182		VOP_UNLOCK(vp, 0, td);
3183	}
3184	error = copyout(&loff, uap->basep, sizeof(long));
3185	fdrop(fp, td);
3186	td->td_retval[0] = uap->count - auio.uio_resid;
3187	return (error);
3188}
3189#endif /* COMPAT_43 */
3190
3191/*
3192 * Read a block of directory entries in a filesystem independent format.
3193 */
3194#ifndef _SYS_SYSPROTO_H_
3195struct getdirentries_args {
3196	int	fd;
3197	char	*buf;
3198	u_int	count;
3199	long	*basep;
3200};
3201#endif
3202int
3203getdirentries(td, uap)
3204	struct thread *td;
3205	register struct getdirentries_args /* {
3206		int fd;
3207		char *buf;
3208		u_int count;
3209		long *basep;
3210	} */ *uap;
3211{
3212	struct vnode *vp;
3213	struct file *fp;
3214	struct uio auio;
3215	struct iovec aiov;
3216	long loff;
3217	int error, eofflag;
3218
3219	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3220		return (error);
3221	if ((fp->f_flag & FREAD) == 0) {
3222		fdrop(fp, td);
3223		return (EBADF);
3224	}
3225	vp = fp->un_data.vnode;
3226unionread:
3227	if (vp->v_type != VDIR) {
3228		fdrop(fp, td);
3229		return (EINVAL);
3230	}
3231	aiov.iov_base = uap->buf;
3232	aiov.iov_len = uap->count;
3233	auio.uio_iov = &aiov;
3234	auio.uio_iovcnt = 1;
3235	auio.uio_rw = UIO_READ;
3236	auio.uio_segflg = UIO_USERSPACE;
3237	auio.uio_td = td;
3238	auio.uio_resid = uap->count;
3239	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3240	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3241	loff = auio.uio_offset = fp->f_offset;
3242#ifdef MAC
3243	error = mac_check_vnode_readdir(td->td_ucred, vp);
3244	if (error == 0)
3245#endif
3246		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3247		    NULL);
3248	fp->f_offset = auio.uio_offset;
3249	VOP_UNLOCK(vp, 0, td);
3250	if (error) {
3251		fdrop(fp, td);
3252		return (error);
3253	}
3254	if (uap->count == auio.uio_resid) {
3255		if (union_dircheckp) {
3256			error = union_dircheckp(td, &vp, fp);
3257			if (error == -1)
3258				goto unionread;
3259			if (error) {
3260				fdrop(fp, td);
3261				return (error);
3262			}
3263		}
3264		/*
3265		 * XXX We could delay dropping the lock above but
3266		 * union_dircheckp complicates things.
3267		 */
3268		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3269		if ((vp->v_vflag & VV_ROOT) &&
3270		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3271			struct vnode *tvp = vp;
3272			vp = vp->v_mount->mnt_vnodecovered;
3273			VREF(vp);
3274			fp->un_data.vnode = vp;
3275			fp->f_offset = 0;
3276			vput(tvp);
3277			goto unionread;
3278		}
3279		VOP_UNLOCK(vp, 0, td);
3280	}
3281	if (uap->basep != NULL) {
3282		error = copyout(&loff, uap->basep, sizeof(long));
3283	}
3284	td->td_retval[0] = uap->count - auio.uio_resid;
3285	fdrop(fp, td);
3286	return (error);
3287}
3288#ifndef _SYS_SYSPROTO_H_
3289struct getdents_args {
3290	int fd;
3291	char *buf;
3292	size_t count;
3293};
3294#endif
3295int
3296getdents(td, uap)
3297	struct thread *td;
3298	register struct getdents_args /* {
3299		int fd;
3300		char *buf;
3301		u_int count;
3302	} */ *uap;
3303{
3304	struct getdirentries_args ap;
3305	ap.fd = uap->fd;
3306	ap.buf = uap->buf;
3307	ap.count = uap->count;
3308	ap.basep = NULL;
3309	return getdirentries(td, &ap);
3310}
3311
3312/*
3313 * Set the mode mask for creation of filesystem nodes.
3314 *
3315 * MP SAFE
3316 */
3317#ifndef _SYS_SYSPROTO_H_
3318struct umask_args {
3319	int	newmask;
3320};
3321#endif
3322int
3323umask(td, uap)
3324	struct thread *td;
3325	struct umask_args /* {
3326		int newmask;
3327	} */ *uap;
3328{
3329	register struct filedesc *fdp;
3330
3331	FILEDESC_LOCK(td->td_proc->p_fd);
3332	fdp = td->td_proc->p_fd;
3333	td->td_retval[0] = fdp->fd_cmask;
3334	fdp->fd_cmask = uap->newmask & ALLPERMS;
3335	FILEDESC_UNLOCK(td->td_proc->p_fd);
3336	return (0);
3337}
3338
3339/*
3340 * Void all references to file by ripping underlying filesystem
3341 * away from vnode.
3342 */
3343#ifndef _SYS_SYSPROTO_H_
3344struct revoke_args {
3345	char	*path;
3346};
3347#endif
3348/* ARGSUSED */
3349int
3350revoke(td, uap)
3351	struct thread *td;
3352	register struct revoke_args /* {
3353		char *path;
3354	} */ *uap;
3355{
3356	struct mount *mp;
3357	struct vnode *vp;
3358	struct vattr vattr;
3359	int error;
3360	struct nameidata nd;
3361
3362	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3363	if ((error = namei(&nd)) != 0)
3364		return (error);
3365	vp = nd.ni_vp;
3366	NDFREE(&nd, NDF_ONLY_PNBUF);
3367	if (vp->v_type != VCHR) {
3368		vput(vp);
3369		return (EINVAL);
3370	}
3371#ifdef MAC
3372	error = mac_check_vnode_revoke(td->td_ucred, vp);
3373	if (error) {
3374		vput(vp);
3375		return (error);
3376	}
3377#endif
3378	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3379	if (error) {
3380		vput(vp);
3381		return (error);
3382	}
3383	VOP_UNLOCK(vp, 0, td);
3384	if (td->td_ucred->cr_uid != vattr.va_uid) {
3385		error = suser_cred(td->td_ucred, PRISON_ROOT);
3386		if (error)
3387			goto out;
3388	}
3389	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3390		goto out;
3391	if (vcount(vp) > 1)
3392		VOP_REVOKE(vp, REVOKEALL);
3393	vn_finished_write(mp);
3394out:
3395	vrele(vp);
3396	return (error);
3397}
3398
3399/*
3400 * Convert a user file descriptor to a kernel file entry.
3401 * The file entry is locked upon returning.
3402 */
3403int
3404getvnode(fdp, fd, fpp)
3405	struct filedesc *fdp;
3406	int fd;
3407	struct file **fpp;
3408{
3409	int error;
3410	struct file *fp;
3411
3412	fp = NULL;
3413	if (fdp == NULL)
3414		error = EBADF;
3415	else {
3416		FILEDESC_LOCK(fdp);
3417		if ((u_int)fd >= fdp->fd_nfiles ||
3418		    (fp = fdp->fd_ofiles[fd]) == NULL)
3419			error = EBADF;
3420		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3421			fp = NULL;
3422			error = EINVAL;
3423		} else {
3424			fhold(fp);
3425			error = 0;
3426		}
3427		FILEDESC_UNLOCK(fdp);
3428	}
3429	*fpp = fp;
3430	return (error);
3431}
3432/*
3433 * Get (NFS) file handle
3434 */
3435#ifndef _SYS_SYSPROTO_H_
3436struct getfh_args {
3437	char	*fname;
3438	fhandle_t *fhp;
3439};
3440#endif
3441int
3442getfh(td, uap)
3443	struct thread *td;
3444	register struct getfh_args *uap;
3445{
3446	struct nameidata nd;
3447	fhandle_t fh;
3448	register struct vnode *vp;
3449	int error;
3450
3451	/*
3452	 * Must be super user
3453	 */
3454	error = suser(td);
3455	if (error)
3456		return (error);
3457	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3458	error = namei(&nd);
3459	if (error)
3460		return (error);
3461	NDFREE(&nd, NDF_ONLY_PNBUF);
3462	vp = nd.ni_vp;
3463	bzero(&fh, sizeof(fh));
3464	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3465	error = VFS_VPTOFH(vp, &fh.fh_fid);
3466	vput(vp);
3467	if (error)
3468		return (error);
3469	error = copyout(&fh, uap->fhp, sizeof (fh));
3470	return (error);
3471}
3472
3473/*
3474 * syscall for the rpc.lockd to use to translate a NFS file handle into
3475 * an open descriptor.
3476 *
3477 * warning: do not remove the suser() call or this becomes one giant
3478 * security hole.
3479 */
3480#ifndef _SYS_SYSPROTO_H_
3481struct fhopen_args {
3482	const struct fhandle *u_fhp;
3483	int flags;
3484};
3485#endif
3486int
3487fhopen(td, uap)
3488	struct thread *td;
3489	struct fhopen_args /* {
3490		const struct fhandle *u_fhp;
3491		int flags;
3492	} */ *uap;
3493{
3494	struct proc *p = td->td_proc;
3495	struct mount *mp;
3496	struct vnode *vp;
3497	struct fhandle fhp;
3498	struct vattr vat;
3499	struct vattr *vap = &vat;
3500	struct flock lf;
3501	struct file *fp;
3502	register struct filedesc *fdp = p->p_fd;
3503	int fmode, mode, error, type;
3504	struct file *nfp;
3505	int indx;
3506
3507	/*
3508	 * Must be super user
3509	 */
3510	error = suser(td);
3511	if (error)
3512		return (error);
3513
3514	fmode = FFLAGS(uap->flags);
3515	/* why not allow a non-read/write open for our lockd? */
3516	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3517		return (EINVAL);
3518	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3519	if (error)
3520		return(error);
3521	/* find the mount point */
3522	mp = vfs_getvfs(&fhp.fh_fsid);
3523	if (mp == NULL)
3524		return (ESTALE);
3525	/* now give me my vnode, it gets returned to me locked */
3526	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3527	if (error)
3528		return (error);
3529 	/*
3530	 * from now on we have to make sure not
3531	 * to forget about the vnode
3532	 * any error that causes an abort must vput(vp)
3533	 * just set error = err and 'goto bad;'.
3534	 */
3535
3536	/*
3537	 * from vn_open
3538	 */
3539	if (vp->v_type == VLNK) {
3540		error = EMLINK;
3541		goto bad;
3542	}
3543	if (vp->v_type == VSOCK) {
3544		error = EOPNOTSUPP;
3545		goto bad;
3546	}
3547	mode = 0;
3548	if (fmode & (FWRITE | O_TRUNC)) {
3549		if (vp->v_type == VDIR) {
3550			error = EISDIR;
3551			goto bad;
3552		}
3553		error = vn_writechk(vp);
3554		if (error)
3555			goto bad;
3556		mode |= VWRITE;
3557	}
3558	if (fmode & FREAD)
3559		mode |= VREAD;
3560	if (fmode & O_APPEND)
3561		mode |= VAPPEND;
3562#ifdef MAC
3563	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3564	if (error)
3565		goto bad;
3566#endif
3567	if (mode) {
3568		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3569		if (error)
3570			goto bad;
3571	}
3572	if (fmode & O_TRUNC) {
3573		VOP_UNLOCK(vp, 0, td);				/* XXX */
3574		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3575			vrele(vp);
3576			return (error);
3577		}
3578		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3579		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3580#ifdef MAC
3581		/*
3582		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3583		 * should be right.
3584		 */
3585		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3586		if (error == 0) {
3587#endif
3588			VATTR_NULL(vap);
3589			vap->va_size = 0;
3590			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3591#ifdef MAC
3592		}
3593#endif
3594		vn_finished_write(mp);
3595		if (error)
3596			goto bad;
3597	}
3598	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3599	if (error)
3600		goto bad;
3601	/*
3602	 * Make sure that a VM object is created for VMIO support.
3603	 */
3604	if (vn_canvmio(vp) == TRUE) {
3605		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3606			goto bad;
3607	}
3608	if (fmode & FWRITE)
3609		vp->v_writecount++;
3610
3611	/*
3612	 * end of vn_open code
3613	 */
3614
3615	if ((error = falloc(td, &nfp, &indx)) != 0) {
3616		if (fmode & FWRITE)
3617			vp->v_writecount--;
3618		goto bad;
3619	}
3620	fp = nfp;
3621
3622	/*
3623	 * Hold an extra reference to avoid having fp ripped out
3624	 * from under us while we block in the lock op
3625	 */
3626	fhold(fp);
3627	nfp->un_data.vnode = vp;
3628	nfp->f_flag = fmode & FMASK;
3629	nfp->f_ops = &vnops;
3630	nfp->f_type = DTYPE_VNODE;
3631	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3632		lf.l_whence = SEEK_SET;
3633		lf.l_start = 0;
3634		lf.l_len = 0;
3635		if (fmode & O_EXLOCK)
3636			lf.l_type = F_WRLCK;
3637		else
3638			lf.l_type = F_RDLCK;
3639		type = F_FLOCK;
3640		if ((fmode & FNONBLOCK) == 0)
3641			type |= F_WAIT;
3642		VOP_UNLOCK(vp, 0, td);
3643		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3644			    type)) != 0) {
3645			/*
3646			 * The lock request failed.  Normally close the
3647			 * descriptor but handle the case where someone might
3648			 * have dup()d or close()d it when we weren't looking.
3649			 */
3650			FILEDESC_LOCK(fdp);
3651			if (fdp->fd_ofiles[indx] == fp) {
3652				fdp->fd_ofiles[indx] = NULL;
3653				FILEDESC_UNLOCK(fdp);
3654				fdrop(fp, td);
3655			} else
3656				FILEDESC_UNLOCK(fdp);
3657			/*
3658			 * release our private reference
3659			 */
3660			fdrop(fp, td);
3661			return(error);
3662		}
3663		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3664		fp->f_flag |= FHASLOCK;
3665	}
3666	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3667		vfs_object_create(vp, td, td->td_ucred);
3668
3669	VOP_UNLOCK(vp, 0, td);
3670	fdrop(fp, td);
3671	td->td_retval[0] = indx;
3672	return (0);
3673
3674bad:
3675	vput(vp);
3676	return (error);
3677}
3678
3679/*
3680 * Stat an (NFS) file handle.
3681 */
3682#ifndef _SYS_SYSPROTO_H_
3683struct fhstat_args {
3684	struct fhandle *u_fhp;
3685	struct stat *sb;
3686};
3687#endif
3688int
3689fhstat(td, uap)
3690	struct thread *td;
3691	register struct fhstat_args /* {
3692		struct fhandle *u_fhp;
3693		struct stat *sb;
3694	} */ *uap;
3695{
3696	struct stat sb;
3697	fhandle_t fh;
3698	struct mount *mp;
3699	struct vnode *vp;
3700	int error;
3701
3702	/*
3703	 * Must be super user
3704	 */
3705	error = suser(td);
3706	if (error)
3707		return (error);
3708
3709	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3710	if (error)
3711		return (error);
3712
3713	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3714		return (ESTALE);
3715	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3716		return (error);
3717	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3718	vput(vp);
3719	if (error)
3720		return (error);
3721	error = copyout(&sb, uap->sb, sizeof(sb));
3722	return (error);
3723}
3724
3725/*
3726 * Implement fstatfs() for (NFS) file handles.
3727 */
3728#ifndef _SYS_SYSPROTO_H_
3729struct fhstatfs_args {
3730	struct fhandle *u_fhp;
3731	struct statfs *buf;
3732};
3733#endif
3734int
3735fhstatfs(td, uap)
3736	struct thread *td;
3737	struct fhstatfs_args /* {
3738		struct fhandle *u_fhp;
3739		struct statfs *buf;
3740	} */ *uap;
3741{
3742	struct statfs *sp;
3743	struct mount *mp;
3744	struct vnode *vp;
3745	struct statfs sb;
3746	fhandle_t fh;
3747	int error;
3748
3749	/*
3750	 * Must be super user
3751	 */
3752	error = suser(td);
3753	if (error)
3754		return (error);
3755
3756	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3757		return (error);
3758
3759	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3760		return (ESTALE);
3761	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3762		return (error);
3763	mp = vp->v_mount;
3764	sp = &mp->mnt_stat;
3765	vput(vp);
3766#ifdef MAC
3767	error = mac_check_mount_stat(td->td_ucred, mp);
3768	if (error)
3769		return (error);
3770#endif
3771	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3772		return (error);
3773	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3774	if (suser(td)) {
3775		bcopy(sp, &sb, sizeof(sb));
3776		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3777		sp = &sb;
3778	}
3779	return (copyout(sp, uap->buf, sizeof(*sp)));
3780}
3781
3782/*
3783 * Syscall to push extended attribute configuration information into the
3784 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3785 * a command (int cmd), and attribute name and misc data.  For now, the
3786 * attribute name is left in userspace for consumption by the VFS_op.
3787 * It will probably be changed to be copied into sysspace by the
3788 * syscall in the future, once issues with various consumers of the
3789 * attribute code have raised their hands.
3790 *
3791 * Currently this is used only by UFS Extended Attributes.
3792 */
3793int
3794extattrctl(td, uap)
3795	struct thread *td;
3796	struct extattrctl_args /* {
3797		const char *path;
3798		int cmd;
3799		const char *filename;
3800		int attrnamespace;
3801		const char *attrname;
3802	} */ *uap;
3803{
3804	struct vnode *filename_vp;
3805	struct nameidata nd;
3806	struct mount *mp, *mp_writable;
3807	char attrname[EXTATTR_MAXNAMELEN];
3808	int error;
3809
3810	/*
3811	 * uap->attrname is not always defined.  We check again later when we
3812	 * invoke the VFS call so as to pass in NULL there if needed.
3813	 */
3814	if (uap->attrname != NULL) {
3815		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3816		    NULL);
3817		if (error)
3818			return (error);
3819	}
3820
3821	/*
3822	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3823	 * which VFS_EXTATTRCTL() will later release.
3824	 */
3825	filename_vp = NULL;
3826	if (uap->filename != NULL) {
3827		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3828		    uap->filename, td);
3829		error = namei(&nd);
3830		if (error)
3831			return (error);
3832		filename_vp = nd.ni_vp;
3833		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3834	}
3835
3836	/* uap->path is always defined. */
3837	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3838	error = namei(&nd);
3839	if (error) {
3840		if (filename_vp != NULL)
3841			vput(filename_vp);
3842		return (error);
3843	}
3844	mp = nd.ni_vp->v_mount;
3845	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3846	NDFREE(&nd, 0);
3847	if (error) {
3848		if (filename_vp != NULL)
3849			vput(filename_vp);
3850		return (error);
3851	}
3852
3853	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3854	    uap->attrname != NULL ? attrname : NULL, td);
3855
3856	vn_finished_write(mp_writable);
3857	/*
3858	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3859	 * filename_vp, so vrele it if it is defined.
3860	 */
3861	if (filename_vp != NULL)
3862		vrele(filename_vp);
3863	return (error);
3864}
3865
3866/*-
3867 * Set a named extended attribute on a file or directory
3868 *
3869 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3870 *            kernelspace string pointer "attrname", userspace buffer
3871 *            pointer "data", buffer length "nbytes", thread "td".
3872 * Returns: 0 on success, an error number otherwise
3873 * Locks: none
3874 * References: vp must be a valid reference for the duration of the call
3875 */
3876static int
3877extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3878    void *data, size_t nbytes, struct thread *td)
3879{
3880	struct mount *mp;
3881	struct uio auio;
3882	struct iovec aiov;
3883	ssize_t cnt;
3884	int error;
3885
3886	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3887	if (error)
3888		return (error);
3889	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3890	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3891
3892	aiov.iov_base = data;
3893	aiov.iov_len = nbytes;
3894	auio.uio_iov = &aiov;
3895	auio.uio_iovcnt = 1;
3896	auio.uio_offset = 0;
3897	if (nbytes > INT_MAX) {
3898		error = EINVAL;
3899		goto done;
3900	}
3901	auio.uio_resid = nbytes;
3902	auio.uio_rw = UIO_WRITE;
3903	auio.uio_segflg = UIO_USERSPACE;
3904	auio.uio_td = td;
3905	cnt = nbytes;
3906
3907#ifdef MAC
3908	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3909	    attrname, &auio);
3910	if (error)
3911		goto done;
3912#endif
3913
3914	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3915	    td->td_ucred, td);
3916	cnt -= auio.uio_resid;
3917	td->td_retval[0] = cnt;
3918
3919done:
3920	VOP_UNLOCK(vp, 0, td);
3921	vn_finished_write(mp);
3922	return (error);
3923}
3924
3925int
3926extattr_set_fd(td, uap)
3927	struct thread *td;
3928	struct extattr_set_fd_args /* {
3929		int fd;
3930		int attrnamespace;
3931		const char *attrname;
3932		void *data;
3933		size_t nbytes;
3934	} */ *uap;
3935{
3936	struct file *fp;
3937	char attrname[EXTATTR_MAXNAMELEN];
3938	int error;
3939
3940	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3941	if (error)
3942		return (error);
3943
3944	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
3945	if (error)
3946		return (error);
3947
3948	error = extattr_set_vp(fp->un_data.vnode, uap->attrnamespace,
3949	    attrname, uap->data, uap->nbytes, td);
3950	fdrop(fp, td);
3951
3952	return (error);
3953}
3954
3955int
3956extattr_set_file(td, uap)
3957	struct thread *td;
3958	struct extattr_set_file_args /* {
3959		const char *path;
3960		int attrnamespace;
3961		const char *attrname;
3962		void *data;
3963		size_t nbytes;
3964	} */ *uap;
3965{
3966	struct nameidata nd;
3967	char attrname[EXTATTR_MAXNAMELEN];
3968	int error;
3969
3970	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3971	if (error)
3972		return (error);
3973
3974	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3975	error = namei(&nd);
3976	if (error)
3977		return (error);
3978	NDFREE(&nd, NDF_ONLY_PNBUF);
3979
3980	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
3981	    uap->data, uap->nbytes, td);
3982
3983	vrele(nd.ni_vp);
3984	return (error);
3985}
3986
3987int
3988extattr_set_link(td, uap)
3989	struct thread *td;
3990	struct extattr_set_link_args /* {
3991		const char *path;
3992		int attrnamespace;
3993		const char *attrname;
3994		void *data;
3995		size_t nbytes;
3996	} */ *uap;
3997{
3998	struct nameidata nd;
3999	char attrname[EXTATTR_MAXNAMELEN];
4000	int error;
4001
4002	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4003	if (error)
4004		return (error);
4005
4006	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4007	error = namei(&nd);
4008	if (error)
4009		return (error);
4010	NDFREE(&nd, NDF_ONLY_PNBUF);
4011
4012	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4013	    uap->data, uap->nbytes, td);
4014
4015	vrele(nd.ni_vp);
4016	return (error);
4017}
4018
4019/*-
4020 * Get a named extended attribute on a file or directory
4021 *
4022 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4023 *            kernelspace string pointer "attrname", userspace buffer
4024 *            pointer "data", buffer length "nbytes", thread "td".
4025 * Returns: 0 on success, an error number otherwise
4026 * Locks: none
4027 * References: vp must be a valid reference for the duration of the call
4028 */
4029static int
4030extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4031    void *data, size_t nbytes, struct thread *td)
4032{
4033	struct uio auio, *auiop;
4034	struct iovec aiov;
4035	ssize_t cnt;
4036	size_t size, *sizep;
4037	int error;
4038
4039	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4040	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4041
4042	/*
4043	 * Slightly unusual semantics: if the user provides a NULL data
4044	 * pointer, they don't want to receive the data, just the
4045	 * maximum read length.
4046	 */
4047	auiop = NULL;
4048	sizep = NULL;
4049	cnt = 0;
4050	if (data != NULL) {
4051		aiov.iov_base = data;
4052		aiov.iov_len = nbytes;
4053		auio.uio_iov = &aiov;
4054		auio.uio_offset = 0;
4055		if (nbytes > INT_MAX) {
4056			error = EINVAL;
4057			goto done;
4058		}
4059		auio.uio_resid = nbytes;
4060		auio.uio_rw = UIO_READ;
4061		auio.uio_segflg = UIO_USERSPACE;
4062		auio.uio_td = td;
4063		auiop = &auio;
4064		cnt = nbytes;
4065	} else
4066		sizep = &size;
4067
4068#ifdef MAC
4069	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4070	    attrname, &auio);
4071	if (error)
4072		goto done;
4073#endif
4074
4075	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4076	    td->td_ucred, td);
4077
4078	if (auiop != NULL) {
4079		cnt -= auio.uio_resid;
4080		td->td_retval[0] = cnt;
4081	} else
4082		td->td_retval[0] = size;
4083
4084done:
4085	VOP_UNLOCK(vp, 0, td);
4086	return (error);
4087}
4088
4089int
4090extattr_get_fd(td, uap)
4091	struct thread *td;
4092	struct extattr_get_fd_args /* {
4093		int fd;
4094		int attrnamespace;
4095		const char *attrname;
4096		void *data;
4097		size_t nbytes;
4098	} */ *uap;
4099{
4100	struct file *fp;
4101	char attrname[EXTATTR_MAXNAMELEN];
4102	int error;
4103
4104	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4105	if (error)
4106		return (error);
4107
4108	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4109	if (error)
4110		return (error);
4111
4112	error = extattr_get_vp(fp->un_data.vnode, uap->attrnamespace,
4113	    attrname, uap->data, uap->nbytes, td);
4114
4115	fdrop(fp, td);
4116	return (error);
4117}
4118
4119int
4120extattr_get_file(td, uap)
4121	struct thread *td;
4122	struct extattr_get_file_args /* {
4123		const char *path;
4124		int attrnamespace;
4125		const char *attrname;
4126		void *data;
4127		size_t nbytes;
4128	} */ *uap;
4129{
4130	struct nameidata nd;
4131	char attrname[EXTATTR_MAXNAMELEN];
4132	int error;
4133
4134	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4135	if (error)
4136		return (error);
4137
4138	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4139	error = namei(&nd);
4140	if (error)
4141		return (error);
4142	NDFREE(&nd, NDF_ONLY_PNBUF);
4143
4144	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4145	    uap->data, uap->nbytes, td);
4146
4147	vrele(nd.ni_vp);
4148	return (error);
4149}
4150
4151int
4152extattr_get_link(td, uap)
4153	struct thread *td;
4154	struct extattr_get_link_args /* {
4155		const char *path;
4156		int attrnamespace;
4157		const char *attrname;
4158		void *data;
4159		size_t nbytes;
4160	} */ *uap;
4161{
4162	struct nameidata nd;
4163	char attrname[EXTATTR_MAXNAMELEN];
4164	int error;
4165
4166	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4167	if (error)
4168		return (error);
4169
4170	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4171	error = namei(&nd);
4172	if (error)
4173		return (error);
4174	NDFREE(&nd, NDF_ONLY_PNBUF);
4175
4176	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4177	    uap->data, uap->nbytes, td);
4178
4179	vrele(nd.ni_vp);
4180	return (error);
4181}
4182
4183/*
4184 * extattr_delete_vp(): Delete a named extended attribute on a file or
4185 *                      directory
4186 *
4187 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4188 *            kernelspace string pointer "attrname", proc "p"
4189 * Returns: 0 on success, an error number otherwise
4190 * Locks: none
4191 * References: vp must be a valid reference for the duration of the call
4192 */
4193static int
4194extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4195    struct thread *td)
4196{
4197	struct mount *mp;
4198	int error;
4199
4200	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4201	if (error)
4202		return (error);
4203	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4204	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4205
4206#ifdef MAC
4207	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4208	    attrname, NULL);
4209	if (error)
4210		goto done;
4211#endif
4212
4213	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4214	    td);
4215#ifdef MAC
4216done:
4217#endif
4218	VOP_UNLOCK(vp, 0, td);
4219	vn_finished_write(mp);
4220	return (error);
4221}
4222
4223int
4224extattr_delete_fd(td, uap)
4225	struct thread *td;
4226	struct extattr_delete_fd_args /* {
4227		int fd;
4228		int attrnamespace;
4229		const char *attrname;
4230	} */ *uap;
4231{
4232	struct file *fp;
4233	struct vnode *vp;
4234	char attrname[EXTATTR_MAXNAMELEN];
4235	int error;
4236
4237	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4238	if (error)
4239		return (error);
4240
4241	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4242	if (error)
4243		return (error);
4244	vp = fp->un_data.vnode;
4245
4246	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4247	fdrop(fp, td);
4248	return (error);
4249}
4250
4251int
4252extattr_delete_file(td, uap)
4253	struct thread *td;
4254	struct extattr_delete_file_args /* {
4255		const char *path;
4256		int attrnamespace;
4257		const char *attrname;
4258	} */ *uap;
4259{
4260	struct nameidata nd;
4261	char attrname[EXTATTR_MAXNAMELEN];
4262	int error;
4263
4264	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4265	if (error)
4266		return(error);
4267
4268	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4269	error = namei(&nd);
4270	if (error)
4271		return(error);
4272	NDFREE(&nd, NDF_ONLY_PNBUF);
4273
4274	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4275	vrele(nd.ni_vp);
4276	return(error);
4277}
4278
4279int
4280extattr_delete_link(td, uap)
4281	struct thread *td;
4282	struct extattr_delete_link_args /* {
4283		const char *path;
4284		int attrnamespace;
4285		const char *attrname;
4286	} */ *uap;
4287{
4288	struct nameidata nd;
4289	char attrname[EXTATTR_MAXNAMELEN];
4290	int error;
4291
4292	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4293	if (error)
4294		return(error);
4295
4296	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4297	error = namei(&nd);
4298	if (error)
4299		return(error);
4300	NDFREE(&nd, NDF_ONLY_PNBUF);
4301
4302	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4303	vrele(nd.ni_vp);
4304	return(error);
4305}
4306