vfs_extattr.c revision 113275
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39 * $FreeBSD: head/sys/kern/vfs_extattr.c 113275 2003-04-09 02:55:18Z mike $
40 */
41
42/* For 4.3 integer FS ID compatibility */
43#include "opt_compat.h"
44#include "opt_mac.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/sysent.h>
51#include <sys/mac.h>
52#include <sys/malloc.h>
53#include <sys/mount.h>
54#include <sys/mutex.h>
55#include <sys/sysproto.h>
56#include <sys/namei.h>
57#include <sys/filedesc.h>
58#include <sys/kernel.h>
59#include <sys/fcntl.h>
60#include <sys/file.h>
61#include <sys/linker.h>
62#include <sys/stat.h>
63#include <sys/sx.h>
64#include <sys/unistd.h>
65#include <sys/vnode.h>
66#include <sys/proc.h>
67#include <sys/dirent.h>
68#include <sys/extattr.h>
69#include <sys/jail.h>
70#include <sys/syscallsubr.h>
71#include <sys/sysctl.h>
72
73#include <machine/limits.h>
74#include <machine/stdarg.h>
75
76#include <vm/vm.h>
77#include <vm/vm_object.h>
78#include <vm/vm_page.h>
79#include <vm/uma.h>
80
81static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
83static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84static int setfmode(struct thread *td, struct vnode *, int);
85static int setfflags(struct thread *td, struct vnode *, int);
86static int setutimes(struct thread *td, struct vnode *,
87    const struct timespec *, int, int);
88static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89    struct thread *td);
90
91int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
92int (*softdep_fsync_hook)(struct vnode *);
93
94/*
95 * The module initialization routine for POSIX asynchronous I/O will
96 * set this to the version of AIO that it implements.  (Zero means
97 * that it is not implemented.)  This value is used here by pathconf()
98 * and in kern_descrip.c by fpathconf().
99 */
100int async_io_version;
101
102/*
103 * Sync each mounted filesystem.
104 */
105#ifndef _SYS_SYSPROTO_H_
106struct sync_args {
107        int     dummy;
108};
109#endif
110
111#ifdef DEBUG
112static int syncprt = 0;
113SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
114#endif
115
116/* ARGSUSED */
117int
118sync(td, uap)
119	struct thread *td;
120	struct sync_args *uap;
121{
122	struct mount *mp, *nmp;
123	int asyncflag;
124
125	mtx_lock(&mountlist_mtx);
126	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
127		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
128			nmp = TAILQ_NEXT(mp, mnt_list);
129			continue;
130		}
131		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
132		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
133			asyncflag = mp->mnt_flag & MNT_ASYNC;
134			mp->mnt_flag &= ~MNT_ASYNC;
135			vfs_msync(mp, MNT_NOWAIT);
136			VFS_SYNC(mp, MNT_NOWAIT,
137			    ((td != NULL) ? td->td_ucred : NOCRED), td);
138			mp->mnt_flag |= asyncflag;
139			vn_finished_write(mp);
140		}
141		mtx_lock(&mountlist_mtx);
142		nmp = TAILQ_NEXT(mp, mnt_list);
143		vfs_unbusy(mp, td);
144	}
145	mtx_unlock(&mountlist_mtx);
146#if 0
147/*
148 * XXX don't call vfs_bufstats() yet because that routine
149 * was not imported in the Lite2 merge.
150 */
151#ifdef DIAGNOSTIC
152	if (syncprt)
153		vfs_bufstats();
154#endif /* DIAGNOSTIC */
155#endif
156	return (0);
157}
158
159/* XXX PRISON: could be per prison flag */
160static int prison_quotas;
161#if 0
162SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
163#endif
164
165/*
166 * Change filesystem quotas.
167 */
168#ifndef _SYS_SYSPROTO_H_
169struct quotactl_args {
170	char *path;
171	int cmd;
172	int uid;
173	caddr_t arg;
174};
175#endif
176/* ARGSUSED */
177int
178quotactl(td, uap)
179	struct thread *td;
180	register struct quotactl_args /* {
181		char *path;
182		int cmd;
183		int uid;
184		caddr_t arg;
185	} */ *uap;
186{
187	struct mount *mp;
188	int error;
189	struct nameidata nd;
190
191	if (jailed(td->td_ucred) && !prison_quotas)
192		return (EPERM);
193	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
194	if ((error = namei(&nd)) != 0)
195		return (error);
196	NDFREE(&nd, NDF_ONLY_PNBUF);
197	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
198	vrele(nd.ni_vp);
199	if (error)
200		return (error);
201	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
202	vn_finished_write(mp);
203	return (error);
204}
205
206/*
207 * Get filesystem statistics.
208 */
209#ifndef _SYS_SYSPROTO_H_
210struct statfs_args {
211	char *path;
212	struct statfs *buf;
213};
214#endif
215/* ARGSUSED */
216int
217statfs(td, uap)
218	struct thread *td;
219	register struct statfs_args /* {
220		char *path;
221		struct statfs *buf;
222	} */ *uap;
223{
224	register struct mount *mp;
225	register struct statfs *sp;
226	int error;
227	struct nameidata nd;
228	struct statfs sb;
229
230	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
231	if ((error = namei(&nd)) != 0)
232		return (error);
233	mp = nd.ni_vp->v_mount;
234	sp = &mp->mnt_stat;
235	NDFREE(&nd, NDF_ONLY_PNBUF);
236	vrele(nd.ni_vp);
237#ifdef MAC
238	error = mac_check_mount_stat(td->td_ucred, mp);
239	if (error)
240		return (error);
241#endif
242	error = VFS_STATFS(mp, sp, td);
243	if (error)
244		return (error);
245	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
246	if (suser(td)) {
247		bcopy(sp, &sb, sizeof(sb));
248		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
249		sp = &sb;
250	}
251	return (copyout(sp, uap->buf, sizeof(*sp)));
252}
253
254/*
255 * Get filesystem statistics.
256 */
257#ifndef _SYS_SYSPROTO_H_
258struct fstatfs_args {
259	int fd;
260	struct statfs *buf;
261};
262#endif
263/* ARGSUSED */
264int
265fstatfs(td, uap)
266	struct thread *td;
267	register struct fstatfs_args /* {
268		int fd;
269		struct statfs *buf;
270	} */ *uap;
271{
272	struct file *fp;
273	struct mount *mp;
274	register struct statfs *sp;
275	int error;
276	struct statfs sb;
277
278	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
279		return (error);
280	mp = ((struct vnode *)fp->f_data)->v_mount;
281	fdrop(fp, td);
282	if (mp == NULL)
283		return (EBADF);
284#ifdef MAC
285	error = mac_check_mount_stat(td->td_ucred, mp);
286	if (error)
287		return (error);
288#endif
289	sp = &mp->mnt_stat;
290	error = VFS_STATFS(mp, sp, td);
291	if (error)
292		return (error);
293	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
294	if (suser(td)) {
295		bcopy(sp, &sb, sizeof(sb));
296		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
297		sp = &sb;
298	}
299	return (copyout(sp, uap->buf, sizeof(*sp)));
300}
301
302/*
303 * Get statistics on all filesystems.
304 */
305#ifndef _SYS_SYSPROTO_H_
306struct getfsstat_args {
307	struct statfs *buf;
308	long bufsize;
309	int flags;
310};
311#endif
312int
313getfsstat(td, uap)
314	struct thread *td;
315	register struct getfsstat_args /* {
316		struct statfs *buf;
317		long bufsize;
318		int flags;
319	} */ *uap;
320{
321	register struct mount *mp, *nmp;
322	register struct statfs *sp;
323	caddr_t sfsp;
324	long count, maxcount, error;
325
326	maxcount = uap->bufsize / sizeof(struct statfs);
327	sfsp = (caddr_t)uap->buf;
328	count = 0;
329	mtx_lock(&mountlist_mtx);
330	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
331#ifdef MAC
332		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
333			nmp = TAILQ_NEXT(mp, mnt_list);
334			continue;
335		}
336#endif
337		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
338			nmp = TAILQ_NEXT(mp, mnt_list);
339			continue;
340		}
341		if (sfsp && count < maxcount) {
342			sp = &mp->mnt_stat;
343			/*
344			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
345			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
346			 * overrides MNT_WAIT.
347			 */
348			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
349			    (uap->flags & MNT_WAIT)) &&
350			    (error = VFS_STATFS(mp, sp, td))) {
351				mtx_lock(&mountlist_mtx);
352				nmp = TAILQ_NEXT(mp, mnt_list);
353				vfs_unbusy(mp, td);
354				continue;
355			}
356			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
357			error = copyout(sp, sfsp, sizeof(*sp));
358			if (error) {
359				vfs_unbusy(mp, td);
360				return (error);
361			}
362			sfsp += sizeof(*sp);
363		}
364		count++;
365		mtx_lock(&mountlist_mtx);
366		nmp = TAILQ_NEXT(mp, mnt_list);
367		vfs_unbusy(mp, td);
368	}
369	mtx_unlock(&mountlist_mtx);
370	if (sfsp && count > maxcount)
371		td->td_retval[0] = maxcount;
372	else
373		td->td_retval[0] = count;
374	return (0);
375}
376
377/*
378 * Change current working directory to a given file descriptor.
379 */
380#ifndef _SYS_SYSPROTO_H_
381struct fchdir_args {
382	int	fd;
383};
384#endif
385/* ARGSUSED */
386int
387fchdir(td, uap)
388	struct thread *td;
389	struct fchdir_args /* {
390		int fd;
391	} */ *uap;
392{
393	register struct filedesc *fdp = td->td_proc->p_fd;
394	struct vnode *vp, *tdp, *vpold;
395	struct mount *mp;
396	struct file *fp;
397	int error;
398
399	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
400		return (error);
401	vp = fp->f_data;
402	VREF(vp);
403	fdrop(fp, td);
404	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
405	if (vp->v_type != VDIR)
406		error = ENOTDIR;
407#ifdef MAC
408	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
409	}
410#endif
411	else
412		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
413	while (!error && (mp = vp->v_mountedhere) != NULL) {
414		if (vfs_busy(mp, 0, 0, td))
415			continue;
416		error = VFS_ROOT(mp, &tdp);
417		vfs_unbusy(mp, td);
418		if (error)
419			break;
420		vput(vp);
421		vp = tdp;
422	}
423	if (error) {
424		vput(vp);
425		return (error);
426	}
427	VOP_UNLOCK(vp, 0, td);
428	FILEDESC_LOCK(fdp);
429	vpold = fdp->fd_cdir;
430	fdp->fd_cdir = vp;
431	FILEDESC_UNLOCK(fdp);
432	vrele(vpold);
433	return (0);
434}
435
436/*
437 * Change current working directory (``.'').
438 */
439#ifndef _SYS_SYSPROTO_H_
440struct chdir_args {
441	char	*path;
442};
443#endif
444/* ARGSUSED */
445int
446chdir(td, uap)
447	struct thread *td;
448	struct chdir_args /* {
449		char *path;
450	} */ *uap;
451{
452
453	return (kern_chdir(td, uap->path, UIO_USERSPACE));
454}
455
456int
457kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
458{
459	register struct filedesc *fdp = td->td_proc->p_fd;
460	int error;
461	struct nameidata nd;
462	struct vnode *vp;
463
464	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
465	if ((error = namei(&nd)) != 0)
466		return (error);
467	if ((error = change_dir(nd.ni_vp, td)) != 0) {
468		vput(nd.ni_vp);
469		NDFREE(&nd, NDF_ONLY_PNBUF);
470		return (error);
471	}
472	VOP_UNLOCK(nd.ni_vp, 0, td);
473	NDFREE(&nd, NDF_ONLY_PNBUF);
474	FILEDESC_LOCK(fdp);
475	vp = fdp->fd_cdir;
476	fdp->fd_cdir = nd.ni_vp;
477	FILEDESC_UNLOCK(fdp);
478	vrele(vp);
479	return (0);
480}
481
482/*
483 * Helper function for raised chroot(2) security function:  Refuse if
484 * any filedescriptors are open directories.
485 */
486static int
487chroot_refuse_vdir_fds(fdp)
488	struct filedesc *fdp;
489{
490	struct vnode *vp;
491	struct file *fp;
492	int fd;
493
494	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
495	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
496		fp = fget_locked(fdp, fd);
497		if (fp == NULL)
498			continue;
499		if (fp->f_type == DTYPE_VNODE) {
500			vp = fp->f_data;
501			if (vp->v_type == VDIR)
502				return (EPERM);
503		}
504	}
505	return (0);
506}
507
508/*
509 * This sysctl determines if we will allow a process to chroot(2) if it
510 * has a directory open:
511 *	0: disallowed for all processes.
512 *	1: allowed for processes that were not already chroot(2)'ed.
513 *	2: allowed for all processes.
514 */
515
516static int chroot_allow_open_directories = 1;
517
518SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
519     &chroot_allow_open_directories, 0, "");
520
521/*
522 * Change notion of root (``/'') directory.
523 */
524#ifndef _SYS_SYSPROTO_H_
525struct chroot_args {
526	char	*path;
527};
528#endif
529/* ARGSUSED */
530int
531chroot(td, uap)
532	struct thread *td;
533	struct chroot_args /* {
534		char *path;
535	} */ *uap;
536{
537	int error;
538	struct nameidata nd;
539
540	error = suser_cred(td->td_ucred, PRISON_ROOT);
541	if (error)
542		return (error);
543	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
544	mtx_lock(&Giant);
545	error = namei(&nd);
546	if (error)
547		goto error;
548	if ((error = change_dir(nd.ni_vp, td)) != 0)
549		goto e_vunlock;
550#ifdef MAC
551	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
552		goto e_vunlock;
553#endif
554	VOP_UNLOCK(nd.ni_vp, 0, td);
555	error = change_root(nd.ni_vp, td);
556	vrele(nd.ni_vp);
557	NDFREE(&nd, NDF_ONLY_PNBUF);
558	mtx_unlock(&Giant);
559	return (error);
560e_vunlock:
561	vput(nd.ni_vp);
562error:
563	mtx_unlock(&Giant);
564	NDFREE(&nd, NDF_ONLY_PNBUF);
565	return (error);
566}
567
568/*
569 * Common routine for chroot and chdir.  Callers must provide a locked vnode
570 * instance.
571 */
572int
573change_dir(vp, td)
574	struct vnode *vp;
575	struct thread *td;
576{
577	int error;
578
579	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
580	if (vp->v_type != VDIR)
581		return (ENOTDIR);
582#ifdef MAC
583	error = mac_check_vnode_chdir(td->td_ucred, vp);
584	if (error)
585		return (error);
586#endif
587	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
588	return (error);
589}
590
591/*
592 * Common routine for kern_chroot() and jail_attach().  The caller is
593 * responsible for invoking suser() and mac_check_chroot() to authorize this
594 * operation.
595 */
596int
597change_root(vp, td)
598	struct vnode *vp;
599	struct thread *td;
600{
601	struct filedesc *fdp;
602	struct vnode *oldvp;
603	int error;
604
605	mtx_assert(&Giant, MA_OWNED);
606	fdp = td->td_proc->p_fd;
607	FILEDESC_LOCK(fdp);
608	if (chroot_allow_open_directories == 0 ||
609	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
610		error = chroot_refuse_vdir_fds(fdp);
611		if (error) {
612			FILEDESC_UNLOCK(fdp);
613			return (error);
614		}
615	}
616	oldvp = fdp->fd_rdir;
617	fdp->fd_rdir = vp;
618	VREF(fdp->fd_rdir);
619	if (!fdp->fd_jdir) {
620		fdp->fd_jdir = vp;
621		VREF(fdp->fd_jdir);
622	}
623	FILEDESC_UNLOCK(fdp);
624	vrele(oldvp);
625	return (0);
626}
627
628/*
629 * Check permissions, allocate an open file structure,
630 * and call the device open routine if any.
631 */
632#ifndef _SYS_SYSPROTO_H_
633struct open_args {
634	char	*path;
635	int	flags;
636	int	mode;
637};
638#endif
639int
640open(td, uap)
641	struct thread *td;
642	register struct open_args /* {
643		char *path;
644		int flags;
645		int mode;
646	} */ *uap;
647{
648
649	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
650}
651
652int
653kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
654    int mode)
655{
656	struct proc *p = td->td_proc;
657	struct filedesc *fdp = p->p_fd;
658	struct file *fp;
659	struct vnode *vp;
660	struct vattr vat;
661	struct mount *mp;
662	int cmode, oflags;
663	struct file *nfp;
664	int type, indx, error;
665	struct flock lf;
666	struct nameidata nd;
667
668	if ((flags & O_ACCMODE) == O_ACCMODE)
669		return (EINVAL);
670	oflags = flags;
671	flags = FFLAGS(flags);
672	error = falloc(td, &nfp, &indx);
673	if (error)
674		return (error);
675	fp = nfp;
676	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
677	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
678	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
679	/*
680	 * Bump the ref count to prevent another process from closing
681	 * the descriptor while we are blocked in vn_open()
682	 */
683	fhold(fp);
684	error = vn_open(&nd, &flags, cmode);
685	if (error) {
686		/*
687		 * release our own reference
688		 */
689		fdrop(fp, td);
690
691		/*
692		 * handle special fdopen() case.  bleh.  dupfdopen() is
693		 * responsible for dropping the old contents of ofiles[indx]
694		 * if it succeeds.
695		 */
696		if ((error == ENODEV || error == ENXIO) &&
697		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
698		    (error =
699			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
700			td->td_retval[0] = indx;
701			return (0);
702		}
703		/*
704		 * Clean up the descriptor, but only if another thread hadn't
705		 * replaced or closed it.
706		 */
707		FILEDESC_LOCK(fdp);
708		if (fdp->fd_ofiles[indx] == fp) {
709			fdp->fd_ofiles[indx] = NULL;
710			FILEDESC_UNLOCK(fdp);
711			fdrop(fp, td);
712		} else
713			FILEDESC_UNLOCK(fdp);
714
715		if (error == ERESTART)
716			error = EINTR;
717		return (error);
718	}
719	td->td_dupfd = 0;
720	NDFREE(&nd, NDF_ONLY_PNBUF);
721	vp = nd.ni_vp;
722
723	/*
724	 * There should be 2 references on the file, one from the descriptor
725	 * table, and one for us.
726	 *
727	 * Handle the case where someone closed the file (via its file
728	 * descriptor) while we were blocked.  The end result should look
729	 * like opening the file succeeded but it was immediately closed.
730	 */
731	FILEDESC_LOCK(fdp);
732	FILE_LOCK(fp);
733	if (fp->f_count == 1) {
734		KASSERT(fdp->fd_ofiles[indx] != fp,
735		    ("Open file descriptor lost all refs"));
736		FILEDESC_UNLOCK(fdp);
737		FILE_UNLOCK(fp);
738		VOP_UNLOCK(vp, 0, td);
739		vn_close(vp, flags & FMASK, fp->f_cred, td);
740		fdrop(fp, td);
741		td->td_retval[0] = indx;
742		return 0;
743	}
744
745	/* assert that vn_open created a backing object if one is needed */
746	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
747		("open: vmio vnode has no backing object after vn_open"));
748
749	fp->f_data = vp;
750	fp->f_flag = flags & FMASK;
751	fp->f_ops = &vnops;
752	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
753	FILEDESC_UNLOCK(fdp);
754	FILE_UNLOCK(fp);
755	VOP_UNLOCK(vp, 0, td);
756	if (flags & (O_EXLOCK | O_SHLOCK)) {
757		lf.l_whence = SEEK_SET;
758		lf.l_start = 0;
759		lf.l_len = 0;
760		if (flags & O_EXLOCK)
761			lf.l_type = F_WRLCK;
762		else
763			lf.l_type = F_RDLCK;
764		type = F_FLOCK;
765		if ((flags & FNONBLOCK) == 0)
766			type |= F_WAIT;
767		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
768			    type)) != 0)
769			goto bad;
770		fp->f_flag |= FHASLOCK;
771	}
772	if (flags & O_TRUNC) {
773		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
774			goto bad;
775		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
776		VATTR_NULL(&vat);
777		vat.va_size = 0;
778		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
779#ifdef MAC
780		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
781		if (error == 0)
782#endif
783			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
784		VOP_UNLOCK(vp, 0, td);
785		vn_finished_write(mp);
786		if (error)
787			goto bad;
788	}
789	/*
790	 * Release our private reference, leaving the one associated with
791	 * the descriptor table intact.
792	 */
793	fdrop(fp, td);
794	td->td_retval[0] = indx;
795	return (0);
796bad:
797	FILEDESC_LOCK(fdp);
798	if (fdp->fd_ofiles[indx] == fp) {
799		fdp->fd_ofiles[indx] = NULL;
800		FILEDESC_UNLOCK(fdp);
801		fdrop(fp, td);
802	} else
803		FILEDESC_UNLOCK(fdp);
804	fdrop(fp, td);
805	return (error);
806}
807
808#ifdef COMPAT_43
809/*
810 * Create a file.
811 */
812#ifndef _SYS_SYSPROTO_H_
813struct ocreat_args {
814	char	*path;
815	int	mode;
816};
817#endif
818int
819ocreat(td, uap)
820	struct thread *td;
821	register struct ocreat_args /* {
822		char *path;
823		int mode;
824	} */ *uap;
825{
826	struct open_args /* {
827		char *path;
828		int flags;
829		int mode;
830	} */ nuap;
831
832	nuap.path = uap->path;
833	nuap.mode = uap->mode;
834	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
835	return (open(td, &nuap));
836}
837#endif /* COMPAT_43 */
838
839/*
840 * Create a special file.
841 */
842#ifndef _SYS_SYSPROTO_H_
843struct mknod_args {
844	char	*path;
845	int	mode;
846	int	dev;
847};
848#endif
849/* ARGSUSED */
850int
851mknod(td, uap)
852	struct thread *td;
853	register struct mknod_args /* {
854		char *path;
855		int mode;
856		int dev;
857	} */ *uap;
858{
859
860	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
861}
862
863int
864kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
865    int dev)
866{
867	struct vnode *vp;
868	struct mount *mp;
869	struct vattr vattr;
870	int error;
871	int whiteout = 0;
872	struct nameidata nd;
873
874	switch (mode & S_IFMT) {
875	case S_IFCHR:
876	case S_IFBLK:
877		error = suser(td);
878		break;
879	default:
880		error = suser_cred(td->td_ucred, PRISON_ROOT);
881		break;
882	}
883	if (error)
884		return (error);
885restart:
886	bwillwrite();
887	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
888	if ((error = namei(&nd)) != 0)
889		return (error);
890	vp = nd.ni_vp;
891	if (vp != NULL) {
892		vrele(vp);
893		error = EEXIST;
894	} else {
895		VATTR_NULL(&vattr);
896		FILEDESC_LOCK(td->td_proc->p_fd);
897		vattr.va_mode = (mode & ALLPERMS) &
898		    ~td->td_proc->p_fd->fd_cmask;
899		FILEDESC_UNLOCK(td->td_proc->p_fd);
900		vattr.va_rdev = dev;
901		whiteout = 0;
902
903		switch (mode & S_IFMT) {
904		case S_IFMT:	/* used by badsect to flag bad sectors */
905			vattr.va_type = VBAD;
906			break;
907		case S_IFCHR:
908			vattr.va_type = VCHR;
909			break;
910		case S_IFBLK:
911			vattr.va_type = VBLK;
912			break;
913		case S_IFWHT:
914			whiteout = 1;
915			break;
916		default:
917			error = EINVAL;
918			break;
919		}
920	}
921	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
922		NDFREE(&nd, NDF_ONLY_PNBUF);
923		vput(nd.ni_dvp);
924		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
925			return (error);
926		goto restart;
927	}
928#ifdef MAC
929	if (error == 0 && !whiteout)
930		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
931		    &nd.ni_cnd, &vattr);
932#endif
933	if (!error) {
934		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
935		if (whiteout)
936			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
937		else {
938			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
939						&nd.ni_cnd, &vattr);
940			if (error == 0)
941				vput(nd.ni_vp);
942		}
943	}
944	NDFREE(&nd, NDF_ONLY_PNBUF);
945	vput(nd.ni_dvp);
946	vn_finished_write(mp);
947	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
948	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
949	return (error);
950}
951
952/*
953 * Create a named pipe.
954 */
955#ifndef _SYS_SYSPROTO_H_
956struct mkfifo_args {
957	char	*path;
958	int	mode;
959};
960#endif
961/* ARGSUSED */
962int
963mkfifo(td, uap)
964	struct thread *td;
965	register struct mkfifo_args /* {
966		char *path;
967		int mode;
968	} */ *uap;
969{
970
971	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
972}
973
974int
975kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
976{
977	struct mount *mp;
978	struct vattr vattr;
979	int error;
980	struct nameidata nd;
981
982restart:
983	bwillwrite();
984	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
985	if ((error = namei(&nd)) != 0)
986		return (error);
987	if (nd.ni_vp != NULL) {
988		NDFREE(&nd, NDF_ONLY_PNBUF);
989		vrele(nd.ni_vp);
990		vput(nd.ni_dvp);
991		return (EEXIST);
992	}
993	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
994		NDFREE(&nd, NDF_ONLY_PNBUF);
995		vput(nd.ni_dvp);
996		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
997			return (error);
998		goto restart;
999	}
1000	VATTR_NULL(&vattr);
1001	vattr.va_type = VFIFO;
1002	FILEDESC_LOCK(td->td_proc->p_fd);
1003	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1004	FILEDESC_UNLOCK(td->td_proc->p_fd);
1005#ifdef MAC
1006	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1007	    &vattr);
1008	if (error)
1009		goto out;
1010#endif
1011	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1012	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1013	if (error == 0)
1014		vput(nd.ni_vp);
1015#ifdef MAC
1016out:
1017#endif
1018	NDFREE(&nd, NDF_ONLY_PNBUF);
1019	vput(nd.ni_dvp);
1020	vn_finished_write(mp);
1021	return (error);
1022}
1023
1024/*
1025 * Make a hard file link.
1026 */
1027#ifndef _SYS_SYSPROTO_H_
1028struct link_args {
1029	char	*path;
1030	char	*link;
1031};
1032#endif
1033/* ARGSUSED */
1034int
1035link(td, uap)
1036	struct thread *td;
1037	register struct link_args /* {
1038		char *path;
1039		char *link;
1040	} */ *uap;
1041{
1042
1043	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1044}
1045
1046int
1047kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1048{
1049	struct vnode *vp;
1050	struct mount *mp;
1051	struct nameidata nd;
1052	int error;
1053
1054	bwillwrite();
1055	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1056	if ((error = namei(&nd)) != 0)
1057		return (error);
1058	NDFREE(&nd, NDF_ONLY_PNBUF);
1059	vp = nd.ni_vp;
1060	if (vp->v_type == VDIR) {
1061		vrele(vp);
1062		return (EPERM);		/* POSIX */
1063	}
1064	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1065		vrele(vp);
1066		return (error);
1067	}
1068	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1069	if ((error = namei(&nd)) == 0) {
1070		if (nd.ni_vp != NULL) {
1071			vrele(nd.ni_vp);
1072			error = EEXIST;
1073		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1074		    == 0) {
1075			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1076			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1077#ifdef MAC
1078			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1079			    vp, &nd.ni_cnd);
1080			if (error == 0)
1081#endif
1082				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1083			VOP_UNLOCK(vp, 0, td);
1084		}
1085		NDFREE(&nd, NDF_ONLY_PNBUF);
1086		vput(nd.ni_dvp);
1087	}
1088	vrele(vp);
1089	vn_finished_write(mp);
1090	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1091	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1092	return (error);
1093}
1094
1095/*
1096 * Make a symbolic link.
1097 */
1098#ifndef _SYS_SYSPROTO_H_
1099struct symlink_args {
1100	char	*path;
1101	char	*link;
1102};
1103#endif
1104/* ARGSUSED */
1105int
1106symlink(td, uap)
1107	struct thread *td;
1108	register struct symlink_args /* {
1109		char *path;
1110		char *link;
1111	} */ *uap;
1112{
1113
1114	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1115}
1116
1117int
1118kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1119{
1120	struct mount *mp;
1121	struct vattr vattr;
1122	char *syspath;
1123	int error;
1124	struct nameidata nd;
1125
1126	if (segflg == UIO_SYSSPACE) {
1127		syspath = path;
1128	} else {
1129		syspath = uma_zalloc(namei_zone, M_WAITOK);
1130		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1131			goto out;
1132	}
1133restart:
1134	bwillwrite();
1135	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1136	if ((error = namei(&nd)) != 0)
1137		goto out;
1138	if (nd.ni_vp) {
1139		NDFREE(&nd, NDF_ONLY_PNBUF);
1140		vrele(nd.ni_vp);
1141		vput(nd.ni_dvp);
1142		error = EEXIST;
1143		goto out;
1144	}
1145	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1146		NDFREE(&nd, NDF_ONLY_PNBUF);
1147		vput(nd.ni_dvp);
1148		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1149			return (error);
1150		goto restart;
1151	}
1152	VATTR_NULL(&vattr);
1153	FILEDESC_LOCK(td->td_proc->p_fd);
1154	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1155	FILEDESC_UNLOCK(td->td_proc->p_fd);
1156#ifdef MAC
1157	vattr.va_type = VLNK;
1158	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1159	    &vattr);
1160	if (error)
1161		goto out2;
1162#endif
1163	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1164	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1165	if (error == 0)
1166		vput(nd.ni_vp);
1167#ifdef MAC
1168out2:
1169#endif
1170	NDFREE(&nd, NDF_ONLY_PNBUF);
1171	vput(nd.ni_dvp);
1172	vn_finished_write(mp);
1173	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1174	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1175out:
1176	if (segflg != UIO_SYSSPACE)
1177		uma_zfree(namei_zone, syspath);
1178	return (error);
1179}
1180
1181/*
1182 * Delete a whiteout from the filesystem.
1183 */
1184/* ARGSUSED */
1185int
1186undelete(td, uap)
1187	struct thread *td;
1188	register struct undelete_args /* {
1189		char *path;
1190	} */ *uap;
1191{
1192	int error;
1193	struct mount *mp;
1194	struct nameidata nd;
1195
1196restart:
1197	bwillwrite();
1198	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1199	    uap->path, td);
1200	error = namei(&nd);
1201	if (error)
1202		return (error);
1203
1204	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1205		NDFREE(&nd, NDF_ONLY_PNBUF);
1206		if (nd.ni_vp)
1207			vrele(nd.ni_vp);
1208		vput(nd.ni_dvp);
1209		return (EEXIST);
1210	}
1211	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1212		NDFREE(&nd, NDF_ONLY_PNBUF);
1213		vput(nd.ni_dvp);
1214		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1215			return (error);
1216		goto restart;
1217	}
1218	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1219	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1220	NDFREE(&nd, NDF_ONLY_PNBUF);
1221	vput(nd.ni_dvp);
1222	vn_finished_write(mp);
1223	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1224	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1225	return (error);
1226}
1227
1228/*
1229 * Delete a name from the filesystem.
1230 */
1231#ifndef _SYS_SYSPROTO_H_
1232struct unlink_args {
1233	char	*path;
1234};
1235#endif
1236/* ARGSUSED */
1237int
1238unlink(td, uap)
1239	struct thread *td;
1240	struct unlink_args /* {
1241		char *path;
1242	} */ *uap;
1243{
1244
1245	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1246}
1247
1248int
1249kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1250{
1251	struct mount *mp;
1252	struct vnode *vp;
1253	int error;
1254	struct nameidata nd;
1255
1256restart:
1257	bwillwrite();
1258	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1259	if ((error = namei(&nd)) != 0)
1260		return (error);
1261	vp = nd.ni_vp;
1262	if (vp->v_type == VDIR)
1263		error = EPERM;		/* POSIX */
1264	else {
1265		/*
1266		 * The root of a mounted filesystem cannot be deleted.
1267		 *
1268		 * XXX: can this only be a VDIR case?
1269		 */
1270		if (vp->v_vflag & VV_ROOT)
1271			error = EBUSY;
1272	}
1273	if (error == 0) {
1274		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1275			NDFREE(&nd, NDF_ONLY_PNBUF);
1276			if (vp == nd.ni_dvp)
1277				vrele(vp);
1278			else
1279				vput(vp);
1280			vput(nd.ni_dvp);
1281			if ((error = vn_start_write(NULL, &mp,
1282			    V_XSLEEP | PCATCH)) != 0)
1283				return (error);
1284			goto restart;
1285		}
1286#ifdef MAC
1287		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1288		    &nd.ni_cnd);
1289		if (error)
1290			goto out;
1291#endif
1292		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1293		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1294#ifdef MAC
1295out:
1296#endif
1297		vn_finished_write(mp);
1298	}
1299	NDFREE(&nd, NDF_ONLY_PNBUF);
1300	if (vp == nd.ni_dvp)
1301		vrele(vp);
1302	else
1303		vput(vp);
1304	vput(nd.ni_dvp);
1305	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1306	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1307	return (error);
1308}
1309
1310/*
1311 * Reposition read/write file offset.
1312 */
1313#ifndef _SYS_SYSPROTO_H_
1314struct lseek_args {
1315	int	fd;
1316	int	pad;
1317	off_t	offset;
1318	int	whence;
1319};
1320#endif
1321int
1322lseek(td, uap)
1323	struct thread *td;
1324	register struct lseek_args /* {
1325		int fd;
1326		int pad;
1327		off_t offset;
1328		int whence;
1329	} */ *uap;
1330{
1331	struct ucred *cred = td->td_ucred;
1332	struct file *fp;
1333	struct vnode *vp;
1334	struct vattr vattr;
1335	off_t offset;
1336	int error, noneg;
1337
1338	if ((error = fget(td, uap->fd, &fp)) != 0)
1339		return (error);
1340	if (fp->f_type != DTYPE_VNODE) {
1341		fdrop(fp, td);
1342		return (ESPIPE);
1343	}
1344	vp = fp->f_data;
1345	noneg = (vp->v_type != VCHR);
1346	offset = uap->offset;
1347	switch (uap->whence) {
1348	case L_INCR:
1349		if (noneg &&
1350		    (fp->f_offset < 0 ||
1351		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1352			error = EOVERFLOW;
1353			break;
1354		}
1355		offset += fp->f_offset;
1356		break;
1357	case L_XTND:
1358		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1359		error = VOP_GETATTR(vp, &vattr, cred, td);
1360		VOP_UNLOCK(vp, 0, td);
1361		if (error)
1362			break;
1363		if (noneg &&
1364		    (vattr.va_size > OFF_MAX ||
1365		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1366			error = EOVERFLOW;
1367			break;
1368		}
1369		offset += vattr.va_size;
1370		break;
1371	case L_SET:
1372		break;
1373	default:
1374		error = EINVAL;
1375	}
1376	if (error == 0 && noneg && offset < 0)
1377		error = EINVAL;
1378	if (error != 0) {
1379		fdrop(fp, td);
1380		return (error);
1381	}
1382	fp->f_offset = offset;
1383	*(off_t *)(td->td_retval) = fp->f_offset;
1384	fdrop(fp, td);
1385	return (0);
1386}
1387
1388#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1389/*
1390 * Reposition read/write file offset.
1391 */
1392#ifndef _SYS_SYSPROTO_H_
1393struct olseek_args {
1394	int	fd;
1395	long	offset;
1396	int	whence;
1397};
1398#endif
1399int
1400olseek(td, uap)
1401	struct thread *td;
1402	register struct olseek_args /* {
1403		int fd;
1404		long offset;
1405		int whence;
1406	} */ *uap;
1407{
1408	struct lseek_args /* {
1409		int fd;
1410		int pad;
1411		off_t offset;
1412		int whence;
1413	} */ nuap;
1414	int error;
1415
1416	nuap.fd = uap->fd;
1417	nuap.offset = uap->offset;
1418	nuap.whence = uap->whence;
1419	error = lseek(td, &nuap);
1420	return (error);
1421}
1422#endif /* COMPAT_43 */
1423
1424/*
1425 * Check access permissions using passed credentials.
1426 */
1427static int
1428vn_access(vp, user_flags, cred, td)
1429	struct vnode	*vp;
1430	int		user_flags;
1431	struct ucred	*cred;
1432	struct thread	*td;
1433{
1434	int error, flags;
1435
1436	/* Flags == 0 means only check for existence. */
1437	error = 0;
1438	if (user_flags) {
1439		flags = 0;
1440		if (user_flags & R_OK)
1441			flags |= VREAD;
1442		if (user_flags & W_OK)
1443			flags |= VWRITE;
1444		if (user_flags & X_OK)
1445			flags |= VEXEC;
1446#ifdef MAC
1447		error = mac_check_vnode_access(cred, vp, flags);
1448		if (error)
1449			return (error);
1450#endif
1451		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1452			error = VOP_ACCESS(vp, flags, cred, td);
1453	}
1454	return (error);
1455}
1456
1457/*
1458 * Check access permissions using "real" credentials.
1459 */
1460#ifndef _SYS_SYSPROTO_H_
1461struct access_args {
1462	char	*path;
1463	int	flags;
1464};
1465#endif
1466int
1467access(td, uap)
1468	struct thread *td;
1469	register struct access_args /* {
1470		char *path;
1471		int flags;
1472	} */ *uap;
1473{
1474
1475	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1476}
1477
1478int
1479kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1480{
1481	struct ucred *cred, *tmpcred;
1482	register struct vnode *vp;
1483	int error;
1484	struct nameidata nd;
1485
1486	/*
1487	 * Create and modify a temporary credential instead of one that
1488	 * is potentially shared.  This could also mess up socket
1489	 * buffer accounting which can run in an interrupt context.
1490	 *
1491	 * XXX - Depending on how "threads" are finally implemented, it
1492	 * may be better to explicitly pass the credential to namei()
1493	 * rather than to modify the potentially shared process structure.
1494	 */
1495	cred = td->td_ucred;
1496	tmpcred = crdup(cred);
1497	tmpcred->cr_uid = cred->cr_ruid;
1498	tmpcred->cr_groups[0] = cred->cr_rgid;
1499	td->td_ucred = tmpcred;
1500	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1501	if ((error = namei(&nd)) != 0)
1502		goto out1;
1503	vp = nd.ni_vp;
1504
1505	error = vn_access(vp, flags, tmpcred, td);
1506	NDFREE(&nd, NDF_ONLY_PNBUF);
1507	vput(vp);
1508out1:
1509	td->td_ucred = cred;
1510	crfree(tmpcred);
1511	return (error);
1512}
1513
1514/*
1515 * Check access permissions using "effective" credentials.
1516 */
1517#ifndef _SYS_SYSPROTO_H_
1518struct eaccess_args {
1519	char	*path;
1520	int	flags;
1521};
1522#endif
1523int
1524eaccess(td, uap)
1525	struct thread *td;
1526	register struct eaccess_args /* {
1527		char *path;
1528		int flags;
1529	} */ *uap;
1530{
1531	struct nameidata nd;
1532	struct vnode *vp;
1533	int error;
1534
1535	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1536	    uap->path, td);
1537	if ((error = namei(&nd)) != 0)
1538		return (error);
1539	vp = nd.ni_vp;
1540
1541	error = vn_access(vp, uap->flags, td->td_ucred, td);
1542	NDFREE(&nd, NDF_ONLY_PNBUF);
1543	vput(vp);
1544	return (error);
1545}
1546
1547#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1548/*
1549 * Get file status; this version follows links.
1550 */
1551#ifndef _SYS_SYSPROTO_H_
1552struct ostat_args {
1553	char	*path;
1554	struct ostat *ub;
1555};
1556#endif
1557/* ARGSUSED */
1558int
1559ostat(td, uap)
1560	struct thread *td;
1561	register struct ostat_args /* {
1562		char *path;
1563		struct ostat *ub;
1564	} */ *uap;
1565{
1566	struct stat sb;
1567	struct ostat osb;
1568	int error;
1569	struct nameidata nd;
1570
1571	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1572	    uap->path, td);
1573	if ((error = namei(&nd)) != 0)
1574		return (error);
1575	NDFREE(&nd, NDF_ONLY_PNBUF);
1576	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1577	vput(nd.ni_vp);
1578	if (error)
1579		return (error);
1580	cvtstat(&sb, &osb);
1581	error = copyout(&osb, uap->ub, sizeof (osb));
1582	return (error);
1583}
1584
1585/*
1586 * Get file status; this version does not follow links.
1587 */
1588#ifndef _SYS_SYSPROTO_H_
1589struct olstat_args {
1590	char	*path;
1591	struct ostat *ub;
1592};
1593#endif
1594/* ARGSUSED */
1595int
1596olstat(td, uap)
1597	struct thread *td;
1598	register struct olstat_args /* {
1599		char *path;
1600		struct ostat *ub;
1601	} */ *uap;
1602{
1603	struct vnode *vp;
1604	struct stat sb;
1605	struct ostat osb;
1606	int error;
1607	struct nameidata nd;
1608
1609	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1610	    uap->path, td);
1611	if ((error = namei(&nd)) != 0)
1612		return (error);
1613	vp = nd.ni_vp;
1614	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1615	NDFREE(&nd, NDF_ONLY_PNBUF);
1616	vput(vp);
1617	if (error)
1618		return (error);
1619	cvtstat(&sb, &osb);
1620	error = copyout(&osb, uap->ub, sizeof (osb));
1621	return (error);
1622}
1623
1624/*
1625 * Convert from an old to a new stat structure.
1626 */
1627void
1628cvtstat(st, ost)
1629	struct stat *st;
1630	struct ostat *ost;
1631{
1632
1633	ost->st_dev = st->st_dev;
1634	ost->st_ino = st->st_ino;
1635	ost->st_mode = st->st_mode;
1636	ost->st_nlink = st->st_nlink;
1637	ost->st_uid = st->st_uid;
1638	ost->st_gid = st->st_gid;
1639	ost->st_rdev = st->st_rdev;
1640	if (st->st_size < (quad_t)1 << 32)
1641		ost->st_size = st->st_size;
1642	else
1643		ost->st_size = -2;
1644	ost->st_atime = st->st_atime;
1645	ost->st_mtime = st->st_mtime;
1646	ost->st_ctime = st->st_ctime;
1647	ost->st_blksize = st->st_blksize;
1648	ost->st_blocks = st->st_blocks;
1649	ost->st_flags = st->st_flags;
1650	ost->st_gen = st->st_gen;
1651}
1652#endif /* COMPAT_43 || COMPAT_SUNOS */
1653
1654/*
1655 * Get file status; this version follows links.
1656 */
1657#ifndef _SYS_SYSPROTO_H_
1658struct stat_args {
1659	char	*path;
1660	struct stat *ub;
1661};
1662#endif
1663/* ARGSUSED */
1664int
1665stat(td, uap)
1666	struct thread *td;
1667	register struct stat_args /* {
1668		char *path;
1669		struct stat *ub;
1670	} */ *uap;
1671{
1672	struct stat sb;
1673	int error;
1674	struct nameidata nd;
1675
1676#ifdef LOOKUP_SHARED
1677	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1678	    UIO_USERSPACE, uap->path, td);
1679#else
1680	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1681	    uap->path, td);
1682#endif
1683	if ((error = namei(&nd)) != 0)
1684		return (error);
1685	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1686	NDFREE(&nd, NDF_ONLY_PNBUF);
1687	vput(nd.ni_vp);
1688	if (error)
1689		return (error);
1690	error = copyout(&sb, uap->ub, sizeof (sb));
1691	return (error);
1692}
1693
1694/*
1695 * Get file status; this version does not follow links.
1696 */
1697#ifndef _SYS_SYSPROTO_H_
1698struct lstat_args {
1699	char	*path;
1700	struct stat *ub;
1701};
1702#endif
1703/* ARGSUSED */
1704int
1705lstat(td, uap)
1706	struct thread *td;
1707	register struct lstat_args /* {
1708		char *path;
1709		struct stat *ub;
1710	} */ *uap;
1711{
1712	int error;
1713	struct vnode *vp;
1714	struct stat sb;
1715	struct nameidata nd;
1716
1717	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1718	    uap->path, td);
1719	if ((error = namei(&nd)) != 0)
1720		return (error);
1721	vp = nd.ni_vp;
1722	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1723	NDFREE(&nd, NDF_ONLY_PNBUF);
1724	vput(vp);
1725	if (error)
1726		return (error);
1727	error = copyout(&sb, uap->ub, sizeof (sb));
1728	return (error);
1729}
1730
1731/*
1732 * Implementation of the NetBSD stat() function.
1733 * XXX This should probably be collapsed with the FreeBSD version,
1734 * as the differences are only due to vn_stat() clearing spares at
1735 * the end of the structures.  vn_stat could be split to avoid this,
1736 * and thus collapse the following to close to zero code.
1737 */
1738void
1739cvtnstat(sb, nsb)
1740	struct stat *sb;
1741	struct nstat *nsb;
1742{
1743	bzero(nsb, sizeof *nsb);
1744	nsb->st_dev = sb->st_dev;
1745	nsb->st_ino = sb->st_ino;
1746	nsb->st_mode = sb->st_mode;
1747	nsb->st_nlink = sb->st_nlink;
1748	nsb->st_uid = sb->st_uid;
1749	nsb->st_gid = sb->st_gid;
1750	nsb->st_rdev = sb->st_rdev;
1751	nsb->st_atimespec = sb->st_atimespec;
1752	nsb->st_mtimespec = sb->st_mtimespec;
1753	nsb->st_ctimespec = sb->st_ctimespec;
1754	nsb->st_size = sb->st_size;
1755	nsb->st_blocks = sb->st_blocks;
1756	nsb->st_blksize = sb->st_blksize;
1757	nsb->st_flags = sb->st_flags;
1758	nsb->st_gen = sb->st_gen;
1759	nsb->st_birthtimespec = sb->st_birthtimespec;
1760}
1761
1762#ifndef _SYS_SYSPROTO_H_
1763struct nstat_args {
1764	char	*path;
1765	struct nstat *ub;
1766};
1767#endif
1768/* ARGSUSED */
1769int
1770nstat(td, uap)
1771	struct thread *td;
1772	register struct nstat_args /* {
1773		char *path;
1774		struct nstat *ub;
1775	} */ *uap;
1776{
1777	struct stat sb;
1778	struct nstat nsb;
1779	int error;
1780	struct nameidata nd;
1781
1782	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1783	    uap->path, td);
1784	if ((error = namei(&nd)) != 0)
1785		return (error);
1786	NDFREE(&nd, NDF_ONLY_PNBUF);
1787	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1788	vput(nd.ni_vp);
1789	if (error)
1790		return (error);
1791	cvtnstat(&sb, &nsb);
1792	error = copyout(&nsb, uap->ub, sizeof (nsb));
1793	return (error);
1794}
1795
1796/*
1797 * NetBSD lstat.  Get file status; this version does not follow links.
1798 */
1799#ifndef _SYS_SYSPROTO_H_
1800struct lstat_args {
1801	char	*path;
1802	struct stat *ub;
1803};
1804#endif
1805/* ARGSUSED */
1806int
1807nlstat(td, uap)
1808	struct thread *td;
1809	register struct nlstat_args /* {
1810		char *path;
1811		struct nstat *ub;
1812	} */ *uap;
1813{
1814	int error;
1815	struct vnode *vp;
1816	struct stat sb;
1817	struct nstat nsb;
1818	struct nameidata nd;
1819
1820	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1821	    uap->path, td);
1822	if ((error = namei(&nd)) != 0)
1823		return (error);
1824	vp = nd.ni_vp;
1825	NDFREE(&nd, NDF_ONLY_PNBUF);
1826	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1827	vput(vp);
1828	if (error)
1829		return (error);
1830	cvtnstat(&sb, &nsb);
1831	error = copyout(&nsb, uap->ub, sizeof (nsb));
1832	return (error);
1833}
1834
1835/*
1836 * Get configurable pathname variables.
1837 */
1838#ifndef _SYS_SYSPROTO_H_
1839struct pathconf_args {
1840	char	*path;
1841	int	name;
1842};
1843#endif
1844/* ARGSUSED */
1845int
1846pathconf(td, uap)
1847	struct thread *td;
1848	register struct pathconf_args /* {
1849		char *path;
1850		int name;
1851	} */ *uap;
1852{
1853	int error;
1854	struct nameidata nd;
1855
1856	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1857	    uap->path, td);
1858	if ((error = namei(&nd)) != 0)
1859		return (error);
1860	NDFREE(&nd, NDF_ONLY_PNBUF);
1861
1862	/* If asynchronous I/O is available, it works for all files. */
1863	if (uap->name == _PC_ASYNC_IO)
1864		td->td_retval[0] = async_io_version;
1865	else
1866		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1867	vput(nd.ni_vp);
1868	return (error);
1869}
1870
1871/*
1872 * Return target name of a symbolic link.
1873 */
1874#ifndef _SYS_SYSPROTO_H_
1875struct readlink_args {
1876	char	*path;
1877	char	*buf;
1878	int	count;
1879};
1880#endif
1881/* ARGSUSED */
1882int
1883readlink(td, uap)
1884	struct thread *td;
1885	register struct readlink_args /* {
1886		char *path;
1887		char *buf;
1888		int count;
1889	} */ *uap;
1890{
1891
1892	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1893	    UIO_USERSPACE, uap->count));
1894}
1895
1896int
1897kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1898    enum uio_seg bufseg, int count)
1899{
1900	register struct vnode *vp;
1901	struct iovec aiov;
1902	struct uio auio;
1903	int error;
1904	struct nameidata nd;
1905
1906	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1907	if ((error = namei(&nd)) != 0)
1908		return (error);
1909	NDFREE(&nd, NDF_ONLY_PNBUF);
1910	vp = nd.ni_vp;
1911#ifdef MAC
1912	error = mac_check_vnode_readlink(td->td_ucred, vp);
1913	if (error) {
1914		vput(vp);
1915		return (error);
1916	}
1917#endif
1918	if (vp->v_type != VLNK)
1919		error = EINVAL;
1920	else {
1921		aiov.iov_base = buf;
1922		aiov.iov_len = count;
1923		auio.uio_iov = &aiov;
1924		auio.uio_iovcnt = 1;
1925		auio.uio_offset = 0;
1926		auio.uio_rw = UIO_READ;
1927		auio.uio_segflg = bufseg;
1928		auio.uio_td = td;
1929		auio.uio_resid = count;
1930		error = VOP_READLINK(vp, &auio, td->td_ucred);
1931	}
1932	vput(vp);
1933	td->td_retval[0] = count - auio.uio_resid;
1934	return (error);
1935}
1936
1937/*
1938 * Common implementation code for chflags() and fchflags().
1939 */
1940static int
1941setfflags(td, vp, flags)
1942	struct thread *td;
1943	struct vnode *vp;
1944	int flags;
1945{
1946	int error;
1947	struct mount *mp;
1948	struct vattr vattr;
1949
1950	/*
1951	 * Prevent non-root users from setting flags on devices.  When
1952	 * a device is reused, users can retain ownership of the device
1953	 * if they are allowed to set flags and programs assume that
1954	 * chown can't fail when done as root.
1955	 */
1956	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1957		error = suser_cred(td->td_ucred, PRISON_ROOT);
1958		if (error)
1959			return (error);
1960	}
1961
1962	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1963		return (error);
1964	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1965	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1966	VATTR_NULL(&vattr);
1967	vattr.va_flags = flags;
1968#ifdef MAC
1969	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1970	if (error == 0)
1971#endif
1972		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1973	VOP_UNLOCK(vp, 0, td);
1974	vn_finished_write(mp);
1975	return (error);
1976}
1977
1978/*
1979 * Change flags of a file given a path name.
1980 */
1981#ifndef _SYS_SYSPROTO_H_
1982struct chflags_args {
1983	char	*path;
1984	int	flags;
1985};
1986#endif
1987/* ARGSUSED */
1988int
1989chflags(td, uap)
1990	struct thread *td;
1991	register struct chflags_args /* {
1992		char *path;
1993		int flags;
1994	} */ *uap;
1995{
1996	int error;
1997	struct nameidata nd;
1998
1999	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
2000	if ((error = namei(&nd)) != 0)
2001		return (error);
2002	NDFREE(&nd, NDF_ONLY_PNBUF);
2003	error = setfflags(td, nd.ni_vp, uap->flags);
2004	vrele(nd.ni_vp);
2005	return error;
2006}
2007
2008/*
2009 * Same as chflags() but doesn't follow symlinks.
2010 */
2011int
2012lchflags(td, uap)
2013	struct thread *td;
2014	register struct lchflags_args /* {
2015		char *path;
2016		int flags;
2017	} */ *uap;
2018{
2019	int error;
2020	struct nameidata nd;
2021
2022	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2023	if ((error = namei(&nd)) != 0)
2024		return (error);
2025	NDFREE(&nd, NDF_ONLY_PNBUF);
2026	error = setfflags(td, nd.ni_vp, uap->flags);
2027	vrele(nd.ni_vp);
2028	return error;
2029}
2030
2031/*
2032 * Change flags of a file given a file descriptor.
2033 */
2034#ifndef _SYS_SYSPROTO_H_
2035struct fchflags_args {
2036	int	fd;
2037	int	flags;
2038};
2039#endif
2040/* ARGSUSED */
2041int
2042fchflags(td, uap)
2043	struct thread *td;
2044	register struct fchflags_args /* {
2045		int fd;
2046		int flags;
2047	} */ *uap;
2048{
2049	struct file *fp;
2050	int error;
2051
2052	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2053		return (error);
2054	error = setfflags(td, fp->f_data, uap->flags);
2055	fdrop(fp, td);
2056	return (error);
2057}
2058
2059/*
2060 * Common implementation code for chmod(), lchmod() and fchmod().
2061 */
2062static int
2063setfmode(td, vp, mode)
2064	struct thread *td;
2065	struct vnode *vp;
2066	int mode;
2067{
2068	int error;
2069	struct mount *mp;
2070	struct vattr vattr;
2071
2072	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2073		return (error);
2074	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2075	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2076	VATTR_NULL(&vattr);
2077	vattr.va_mode = mode & ALLPERMS;
2078#ifdef MAC
2079	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2080	if (error == 0)
2081#endif
2082		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2083	VOP_UNLOCK(vp, 0, td);
2084	vn_finished_write(mp);
2085	return error;
2086}
2087
2088/*
2089 * Change mode of a file given path name.
2090 */
2091#ifndef _SYS_SYSPROTO_H_
2092struct chmod_args {
2093	char	*path;
2094	int	mode;
2095};
2096#endif
2097/* ARGSUSED */
2098int
2099chmod(td, uap)
2100	struct thread *td;
2101	register struct chmod_args /* {
2102		char *path;
2103		int mode;
2104	} */ *uap;
2105{
2106
2107	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2108}
2109
2110int
2111kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2112{
2113	int error;
2114	struct nameidata nd;
2115
2116	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2117	if ((error = namei(&nd)) != 0)
2118		return (error);
2119	NDFREE(&nd, NDF_ONLY_PNBUF);
2120	error = setfmode(td, nd.ni_vp, mode);
2121	vrele(nd.ni_vp);
2122	return error;
2123}
2124
2125/*
2126 * Change mode of a file given path name (don't follow links.)
2127 */
2128#ifndef _SYS_SYSPROTO_H_
2129struct lchmod_args {
2130	char	*path;
2131	int	mode;
2132};
2133#endif
2134/* ARGSUSED */
2135int
2136lchmod(td, uap)
2137	struct thread *td;
2138	register struct lchmod_args /* {
2139		char *path;
2140		int mode;
2141	} */ *uap;
2142{
2143	int error;
2144	struct nameidata nd;
2145
2146	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2147	if ((error = namei(&nd)) != 0)
2148		return (error);
2149	NDFREE(&nd, NDF_ONLY_PNBUF);
2150	error = setfmode(td, nd.ni_vp, uap->mode);
2151	vrele(nd.ni_vp);
2152	return error;
2153}
2154
2155/*
2156 * Change mode of a file given a file descriptor.
2157 */
2158#ifndef _SYS_SYSPROTO_H_
2159struct fchmod_args {
2160	int	fd;
2161	int	mode;
2162};
2163#endif
2164/* ARGSUSED */
2165int
2166fchmod(td, uap)
2167	struct thread *td;
2168	register struct fchmod_args /* {
2169		int fd;
2170		int mode;
2171	} */ *uap;
2172{
2173	struct file *fp;
2174	struct vnode *vp;
2175	int error;
2176
2177	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2178		return (error);
2179	vp = fp->f_data;
2180	error = setfmode(td, fp->f_data, uap->mode);
2181	fdrop(fp, td);
2182	return (error);
2183}
2184
2185/*
2186 * Common implementation for chown(), lchown(), and fchown()
2187 */
2188static int
2189setfown(td, vp, uid, gid)
2190	struct thread *td;
2191	struct vnode *vp;
2192	uid_t uid;
2193	gid_t gid;
2194{
2195	int error;
2196	struct mount *mp;
2197	struct vattr vattr;
2198
2199	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2200		return (error);
2201	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2202	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2203	VATTR_NULL(&vattr);
2204	vattr.va_uid = uid;
2205	vattr.va_gid = gid;
2206#ifdef MAC
2207	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2208	    vattr.va_gid);
2209	if (error == 0)
2210#endif
2211		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2212	VOP_UNLOCK(vp, 0, td);
2213	vn_finished_write(mp);
2214	return error;
2215}
2216
2217/*
2218 * Set ownership given a path name.
2219 */
2220#ifndef _SYS_SYSPROTO_H_
2221struct chown_args {
2222	char	*path;
2223	int	uid;
2224	int	gid;
2225};
2226#endif
2227/* ARGSUSED */
2228int
2229chown(td, uap)
2230	struct thread *td;
2231	register struct chown_args /* {
2232		char *path;
2233		int uid;
2234		int gid;
2235	} */ *uap;
2236{
2237
2238	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2239}
2240
2241int
2242kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2243    int gid)
2244{
2245	int error;
2246	struct nameidata nd;
2247
2248	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2249	if ((error = namei(&nd)) != 0)
2250		return (error);
2251	NDFREE(&nd, NDF_ONLY_PNBUF);
2252	error = setfown(td, nd.ni_vp, uid, gid);
2253	vrele(nd.ni_vp);
2254	return (error);
2255}
2256
2257/*
2258 * Set ownership given a path name, do not cross symlinks.
2259 */
2260#ifndef _SYS_SYSPROTO_H_
2261struct lchown_args {
2262	char	*path;
2263	int	uid;
2264	int	gid;
2265};
2266#endif
2267/* ARGSUSED */
2268int
2269lchown(td, uap)
2270	struct thread *td;
2271	register struct lchown_args /* {
2272		char *path;
2273		int uid;
2274		int gid;
2275	} */ *uap;
2276{
2277
2278	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2279}
2280
2281int
2282kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2283    int gid)
2284{
2285	int error;
2286	struct nameidata nd;
2287
2288	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2289	if ((error = namei(&nd)) != 0)
2290		return (error);
2291	NDFREE(&nd, NDF_ONLY_PNBUF);
2292	error = setfown(td, nd.ni_vp, uid, gid);
2293	vrele(nd.ni_vp);
2294	return (error);
2295}
2296
2297/*
2298 * Set ownership given a file descriptor.
2299 */
2300#ifndef _SYS_SYSPROTO_H_
2301struct fchown_args {
2302	int	fd;
2303	int	uid;
2304	int	gid;
2305};
2306#endif
2307/* ARGSUSED */
2308int
2309fchown(td, uap)
2310	struct thread *td;
2311	register struct fchown_args /* {
2312		int fd;
2313		int uid;
2314		int gid;
2315	} */ *uap;
2316{
2317	struct file *fp;
2318	struct vnode *vp;
2319	int error;
2320
2321	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2322		return (error);
2323	vp = fp->f_data;
2324	error = setfown(td, fp->f_data, uap->uid, uap->gid);
2325	fdrop(fp, td);
2326	return (error);
2327}
2328
2329/*
2330 * Common implementation code for utimes(), lutimes(), and futimes().
2331 */
2332static int
2333getutimes(usrtvp, tvpseg, tsp)
2334	const struct timeval *usrtvp;
2335	enum uio_seg tvpseg;
2336	struct timespec *tsp;
2337{
2338	struct timeval tv[2];
2339	const struct timeval *tvp;
2340	int error;
2341
2342	if (usrtvp == NULL) {
2343		microtime(&tv[0]);
2344		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2345		tsp[1] = tsp[0];
2346	} else {
2347		if (tvpseg == UIO_SYSSPACE) {
2348			tvp = usrtvp;
2349		} else {
2350			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2351				return (error);
2352			tvp = tv;
2353		}
2354
2355		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2356		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2357	}
2358	return 0;
2359}
2360
2361/*
2362 * Common implementation code for utimes(), lutimes(), and futimes().
2363 */
2364static int
2365setutimes(td, vp, ts, numtimes, nullflag)
2366	struct thread *td;
2367	struct vnode *vp;
2368	const struct timespec *ts;
2369	int numtimes;
2370	int nullflag;
2371{
2372	int error, setbirthtime;
2373	struct mount *mp;
2374	struct vattr vattr;
2375
2376	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2377		return (error);
2378	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2379	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2380	setbirthtime = 0;
2381	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2382	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2383		setbirthtime = 1;
2384	VATTR_NULL(&vattr);
2385	vattr.va_atime = ts[0];
2386	vattr.va_mtime = ts[1];
2387	if (setbirthtime)
2388		vattr.va_birthtime = ts[1];
2389	if (numtimes > 2)
2390		vattr.va_birthtime = ts[2];
2391	if (nullflag)
2392		vattr.va_vaflags |= VA_UTIMES_NULL;
2393#ifdef MAC
2394	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2395	    vattr.va_mtime);
2396#endif
2397	if (error == 0)
2398		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2399	VOP_UNLOCK(vp, 0, td);
2400	vn_finished_write(mp);
2401	return error;
2402}
2403
2404/*
2405 * Set the access and modification times of a file.
2406 */
2407#ifndef _SYS_SYSPROTO_H_
2408struct utimes_args {
2409	char	*path;
2410	struct	timeval *tptr;
2411};
2412#endif
2413/* ARGSUSED */
2414int
2415utimes(td, uap)
2416	struct thread *td;
2417	register struct utimes_args /* {
2418		char *path;
2419		struct timeval *tptr;
2420	} */ *uap;
2421{
2422
2423	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2424	    UIO_USERSPACE));
2425}
2426
2427int
2428kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2429    struct timeval *tptr, enum uio_seg tptrseg)
2430{
2431	struct timespec ts[2];
2432	int error;
2433	struct nameidata nd;
2434
2435	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2436		return (error);
2437	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2438	if ((error = namei(&nd)) != 0)
2439		return (error);
2440	NDFREE(&nd, NDF_ONLY_PNBUF);
2441	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2442	vrele(nd.ni_vp);
2443	return (error);
2444}
2445
2446/*
2447 * Set the access and modification times of a file.
2448 */
2449#ifndef _SYS_SYSPROTO_H_
2450struct lutimes_args {
2451	char	*path;
2452	struct	timeval *tptr;
2453};
2454#endif
2455/* ARGSUSED */
2456int
2457lutimes(td, uap)
2458	struct thread *td;
2459	register struct lutimes_args /* {
2460		char *path;
2461		struct timeval *tptr;
2462	} */ *uap;
2463{
2464
2465	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2466	    UIO_USERSPACE));
2467}
2468
2469int
2470kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2471    struct timeval *tptr, enum uio_seg tptrseg)
2472{
2473	struct timespec ts[2];
2474	int error;
2475	struct nameidata nd;
2476
2477	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2478		return (error);
2479	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2480	if ((error = namei(&nd)) != 0)
2481		return (error);
2482	NDFREE(&nd, NDF_ONLY_PNBUF);
2483	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2484	vrele(nd.ni_vp);
2485	return (error);
2486}
2487
2488/*
2489 * Set the access and modification times of a file.
2490 */
2491#ifndef _SYS_SYSPROTO_H_
2492struct futimes_args {
2493	int	fd;
2494	struct	timeval *tptr;
2495};
2496#endif
2497/* ARGSUSED */
2498int
2499futimes(td, uap)
2500	struct thread *td;
2501	register struct futimes_args /* {
2502		int  fd;
2503		struct timeval *tptr;
2504	} */ *uap;
2505{
2506
2507	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2508}
2509
2510int
2511kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2512    enum uio_seg tptrseg)
2513{
2514	struct timespec ts[2];
2515	struct file *fp;
2516	int error;
2517
2518	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2519		return (error);
2520	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2521		return (error);
2522	error = setutimes(td, fp->f_data, ts, 2, tptr == NULL);
2523	fdrop(fp, td);
2524	return (error);
2525}
2526
2527/*
2528 * Truncate a file given its path name.
2529 */
2530#ifndef _SYS_SYSPROTO_H_
2531struct truncate_args {
2532	char	*path;
2533	int	pad;
2534	off_t	length;
2535};
2536#endif
2537/* ARGSUSED */
2538int
2539truncate(td, uap)
2540	struct thread *td;
2541	register struct truncate_args /* {
2542		char *path;
2543		int pad;
2544		off_t length;
2545	} */ *uap;
2546{
2547
2548	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2549}
2550
2551int
2552kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2553{
2554	struct mount *mp;
2555	struct vnode *vp;
2556	struct vattr vattr;
2557	int error;
2558	struct nameidata nd;
2559
2560	if (length < 0)
2561		return(EINVAL);
2562	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2563	if ((error = namei(&nd)) != 0)
2564		return (error);
2565	vp = nd.ni_vp;
2566	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2567		vrele(vp);
2568		return (error);
2569	}
2570	NDFREE(&nd, NDF_ONLY_PNBUF);
2571	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2572	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2573	if (vp->v_type == VDIR)
2574		error = EISDIR;
2575#ifdef MAC
2576	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2577	}
2578#endif
2579	else if ((error = vn_writechk(vp)) == 0 &&
2580	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2581		VATTR_NULL(&vattr);
2582		vattr.va_size = length;
2583		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2584	}
2585	vput(vp);
2586	vn_finished_write(mp);
2587	return (error);
2588}
2589
2590/*
2591 * Truncate a file given a file descriptor.
2592 */
2593#ifndef _SYS_SYSPROTO_H_
2594struct ftruncate_args {
2595	int	fd;
2596	int	pad;
2597	off_t	length;
2598};
2599#endif
2600/* ARGSUSED */
2601int
2602ftruncate(td, uap)
2603	struct thread *td;
2604	register struct ftruncate_args /* {
2605		int fd;
2606		int pad;
2607		off_t length;
2608	} */ *uap;
2609{
2610	struct mount *mp;
2611	struct vattr vattr;
2612	struct vnode *vp;
2613	struct file *fp;
2614	int error;
2615
2616	if (uap->length < 0)
2617		return(EINVAL);
2618	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2619		return (error);
2620	if ((fp->f_flag & FWRITE) == 0) {
2621		fdrop(fp, td);
2622		return (EINVAL);
2623	}
2624	vp = fp->f_data;
2625	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2626		fdrop(fp, td);
2627		return (error);
2628	}
2629	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2630	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2631	if (vp->v_type == VDIR)
2632		error = EISDIR;
2633#ifdef MAC
2634	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2635	    vp))) {
2636	}
2637#endif
2638	else if ((error = vn_writechk(vp)) == 0) {
2639		VATTR_NULL(&vattr);
2640		vattr.va_size = uap->length;
2641		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2642	}
2643	VOP_UNLOCK(vp, 0, td);
2644	vn_finished_write(mp);
2645	fdrop(fp, td);
2646	return (error);
2647}
2648
2649#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2650/*
2651 * Truncate a file given its path name.
2652 */
2653#ifndef _SYS_SYSPROTO_H_
2654struct otruncate_args {
2655	char	*path;
2656	long	length;
2657};
2658#endif
2659/* ARGSUSED */
2660int
2661otruncate(td, uap)
2662	struct thread *td;
2663	register struct otruncate_args /* {
2664		char *path;
2665		long length;
2666	} */ *uap;
2667{
2668	struct truncate_args /* {
2669		char *path;
2670		int pad;
2671		off_t length;
2672	} */ nuap;
2673
2674	nuap.path = uap->path;
2675	nuap.length = uap->length;
2676	return (truncate(td, &nuap));
2677}
2678
2679/*
2680 * Truncate a file given a file descriptor.
2681 */
2682#ifndef _SYS_SYSPROTO_H_
2683struct oftruncate_args {
2684	int	fd;
2685	long	length;
2686};
2687#endif
2688/* ARGSUSED */
2689int
2690oftruncate(td, uap)
2691	struct thread *td;
2692	register struct oftruncate_args /* {
2693		int fd;
2694		long length;
2695	} */ *uap;
2696{
2697	struct ftruncate_args /* {
2698		int fd;
2699		int pad;
2700		off_t length;
2701	} */ nuap;
2702
2703	nuap.fd = uap->fd;
2704	nuap.length = uap->length;
2705	return (ftruncate(td, &nuap));
2706}
2707#endif /* COMPAT_43 || COMPAT_SUNOS */
2708
2709/*
2710 * Sync an open file.
2711 */
2712#ifndef _SYS_SYSPROTO_H_
2713struct fsync_args {
2714	int	fd;
2715};
2716#endif
2717/* ARGSUSED */
2718int
2719fsync(td, uap)
2720	struct thread *td;
2721	struct fsync_args /* {
2722		int fd;
2723	} */ *uap;
2724{
2725	struct vnode *vp;
2726	struct mount *mp;
2727	struct file *fp;
2728	vm_object_t obj;
2729	int error;
2730
2731	GIANT_REQUIRED;
2732
2733	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2734		return (error);
2735	vp = fp->f_data;
2736	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2737		fdrop(fp, td);
2738		return (error);
2739	}
2740	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2741	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2742		vm_object_page_clean(obj, 0, 0, 0);
2743	}
2744	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2745	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2746	    && softdep_fsync_hook != NULL)
2747		error = (*softdep_fsync_hook)(vp);
2748
2749	VOP_UNLOCK(vp, 0, td);
2750	vn_finished_write(mp);
2751	fdrop(fp, td);
2752	return (error);
2753}
2754
2755/*
2756 * Rename files.  Source and destination must either both be directories,
2757 * or both not be directories.  If target is a directory, it must be empty.
2758 */
2759#ifndef _SYS_SYSPROTO_H_
2760struct rename_args {
2761	char	*from;
2762	char	*to;
2763};
2764#endif
2765/* ARGSUSED */
2766int
2767rename(td, uap)
2768	struct thread *td;
2769	register struct rename_args /* {
2770		char *from;
2771		char *to;
2772	} */ *uap;
2773{
2774
2775	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2776}
2777
2778int
2779kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2780{
2781	struct mount *mp = NULL;
2782	struct vnode *tvp, *fvp, *tdvp;
2783	struct nameidata fromnd, tond;
2784	int error;
2785
2786	bwillwrite();
2787#ifdef MAC
2788	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2789	    from, td);
2790#else
2791	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2792#endif
2793	if ((error = namei(&fromnd)) != 0)
2794		return (error);
2795#ifdef MAC
2796	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2797	    fromnd.ni_vp, &fromnd.ni_cnd);
2798	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2799	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2800#endif
2801	fvp = fromnd.ni_vp;
2802	if (error == 0)
2803		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2804	if (error != 0) {
2805		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2806		vrele(fromnd.ni_dvp);
2807		vrele(fvp);
2808		goto out1;
2809	}
2810	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2811	    NOOBJ, pathseg, to, td);
2812	if (fromnd.ni_vp->v_type == VDIR)
2813		tond.ni_cnd.cn_flags |= WILLBEDIR;
2814	if ((error = namei(&tond)) != 0) {
2815		/* Translate error code for rename("dir1", "dir2/."). */
2816		if (error == EISDIR && fvp->v_type == VDIR)
2817			error = EINVAL;
2818		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2819		vrele(fromnd.ni_dvp);
2820		vrele(fvp);
2821		goto out1;
2822	}
2823	tdvp = tond.ni_dvp;
2824	tvp = tond.ni_vp;
2825	if (tvp != NULL) {
2826		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2827			error = ENOTDIR;
2828			goto out;
2829		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2830			error = EISDIR;
2831			goto out;
2832		}
2833	}
2834	if (fvp == tdvp)
2835		error = EINVAL;
2836	/*
2837	 * If the source is the same as the destination (that is, if they
2838	 * are links to the same vnode), then there is nothing to do.
2839	 */
2840	if (fvp == tvp)
2841		error = -1;
2842#ifdef MAC
2843	else
2844		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2845		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2846#endif
2847out:
2848	if (!error) {
2849		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2850		if (fromnd.ni_dvp != tdvp) {
2851			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2852		}
2853		if (tvp) {
2854			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2855		}
2856		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2857				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2858		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2859		NDFREE(&tond, NDF_ONLY_PNBUF);
2860	} else {
2861		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2862		NDFREE(&tond, NDF_ONLY_PNBUF);
2863		if (tdvp == tvp)
2864			vrele(tdvp);
2865		else
2866			vput(tdvp);
2867		if (tvp)
2868			vput(tvp);
2869		vrele(fromnd.ni_dvp);
2870		vrele(fvp);
2871	}
2872	vrele(tond.ni_startdir);
2873	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2874	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2875	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2876	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2877out1:
2878	vn_finished_write(mp);
2879	if (fromnd.ni_startdir)
2880		vrele(fromnd.ni_startdir);
2881	if (error == -1)
2882		return (0);
2883	return (error);
2884}
2885
2886/*
2887 * Make a directory file.
2888 */
2889#ifndef _SYS_SYSPROTO_H_
2890struct mkdir_args {
2891	char	*path;
2892	int	mode;
2893};
2894#endif
2895/* ARGSUSED */
2896int
2897mkdir(td, uap)
2898	struct thread *td;
2899	register struct mkdir_args /* {
2900		char *path;
2901		int mode;
2902	} */ *uap;
2903{
2904
2905	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2906}
2907
2908int
2909kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2910{
2911	struct mount *mp;
2912	struct vnode *vp;
2913	struct vattr vattr;
2914	int error;
2915	struct nameidata nd;
2916
2917restart:
2918	bwillwrite();
2919	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2920	nd.ni_cnd.cn_flags |= WILLBEDIR;
2921	if ((error = namei(&nd)) != 0)
2922		return (error);
2923	vp = nd.ni_vp;
2924	if (vp != NULL) {
2925		NDFREE(&nd, NDF_ONLY_PNBUF);
2926		vrele(vp);
2927		/*
2928		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2929		 * the strange behaviour of leaving the vnode unlocked
2930		 * if the target is the same vnode as the parent.
2931		 */
2932		if (vp == nd.ni_dvp)
2933			vrele(nd.ni_dvp);
2934		else
2935			vput(nd.ni_dvp);
2936		return (EEXIST);
2937	}
2938	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2939		NDFREE(&nd, NDF_ONLY_PNBUF);
2940		vput(nd.ni_dvp);
2941		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2942			return (error);
2943		goto restart;
2944	}
2945	VATTR_NULL(&vattr);
2946	vattr.va_type = VDIR;
2947	FILEDESC_LOCK(td->td_proc->p_fd);
2948	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2949	FILEDESC_UNLOCK(td->td_proc->p_fd);
2950#ifdef MAC
2951	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2952	    &vattr);
2953	if (error)
2954		goto out;
2955#endif
2956	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2957	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2958#ifdef MAC
2959out:
2960#endif
2961	NDFREE(&nd, NDF_ONLY_PNBUF);
2962	vput(nd.ni_dvp);
2963	if (!error)
2964		vput(nd.ni_vp);
2965	vn_finished_write(mp);
2966	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2967	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2968	return (error);
2969}
2970
2971/*
2972 * Remove a directory file.
2973 */
2974#ifndef _SYS_SYSPROTO_H_
2975struct rmdir_args {
2976	char	*path;
2977};
2978#endif
2979/* ARGSUSED */
2980int
2981rmdir(td, uap)
2982	struct thread *td;
2983	struct rmdir_args /* {
2984		char *path;
2985	} */ *uap;
2986{
2987
2988	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
2989}
2990
2991int
2992kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
2993{
2994	struct mount *mp;
2995	struct vnode *vp;
2996	int error;
2997	struct nameidata nd;
2998
2999restart:
3000	bwillwrite();
3001	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
3002	if ((error = namei(&nd)) != 0)
3003		return (error);
3004	vp = nd.ni_vp;
3005	if (vp->v_type != VDIR) {
3006		error = ENOTDIR;
3007		goto out;
3008	}
3009	/*
3010	 * No rmdir "." please.
3011	 */
3012	if (nd.ni_dvp == vp) {
3013		error = EINVAL;
3014		goto out;
3015	}
3016	/*
3017	 * The root of a mounted filesystem cannot be deleted.
3018	 */
3019	if (vp->v_vflag & VV_ROOT) {
3020		error = EBUSY;
3021		goto out;
3022	}
3023#ifdef MAC
3024	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3025	    &nd.ni_cnd);
3026	if (error)
3027		goto out;
3028#endif
3029	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3030		NDFREE(&nd, NDF_ONLY_PNBUF);
3031		if (nd.ni_dvp == vp)
3032			vrele(nd.ni_dvp);
3033		else
3034			vput(nd.ni_dvp);
3035		vput(vp);
3036		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3037			return (error);
3038		goto restart;
3039	}
3040	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3041	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3042	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3043	vn_finished_write(mp);
3044out:
3045	NDFREE(&nd, NDF_ONLY_PNBUF);
3046	if (nd.ni_dvp == vp)
3047		vrele(nd.ni_dvp);
3048	else
3049		vput(nd.ni_dvp);
3050	vput(vp);
3051	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3052	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3053	return (error);
3054}
3055
3056#ifdef COMPAT_43
3057/*
3058 * Read a block of directory entries in a filesystem independent format.
3059 */
3060#ifndef _SYS_SYSPROTO_H_
3061struct ogetdirentries_args {
3062	int	fd;
3063	char	*buf;
3064	u_int	count;
3065	long	*basep;
3066};
3067#endif
3068int
3069ogetdirentries(td, uap)
3070	struct thread *td;
3071	register struct ogetdirentries_args /* {
3072		int fd;
3073		char *buf;
3074		u_int count;
3075		long *basep;
3076	} */ *uap;
3077{
3078	struct vnode *vp;
3079	struct file *fp;
3080	struct uio auio, kuio;
3081	struct iovec aiov, kiov;
3082	struct dirent *dp, *edp;
3083	caddr_t dirbuf;
3084	int error, eofflag, readcnt;
3085	long loff;
3086
3087	/* XXX arbitrary sanity limit on `count'. */
3088	if (uap->count > 64 * 1024)
3089		return (EINVAL);
3090	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3091		return (error);
3092	if ((fp->f_flag & FREAD) == 0) {
3093		fdrop(fp, td);
3094		return (EBADF);
3095	}
3096	vp = fp->f_data;
3097unionread:
3098	if (vp->v_type != VDIR) {
3099		fdrop(fp, td);
3100		return (EINVAL);
3101	}
3102	aiov.iov_base = uap->buf;
3103	aiov.iov_len = uap->count;
3104	auio.uio_iov = &aiov;
3105	auio.uio_iovcnt = 1;
3106	auio.uio_rw = UIO_READ;
3107	auio.uio_segflg = UIO_USERSPACE;
3108	auio.uio_td = td;
3109	auio.uio_resid = uap->count;
3110	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3111	loff = auio.uio_offset = fp->f_offset;
3112#ifdef MAC
3113	error = mac_check_vnode_readdir(td->td_ucred, vp);
3114	if (error) {
3115		VOP_UNLOCK(vp, 0, td);
3116		fdrop(fp, td);
3117		return (error);
3118	}
3119#endif
3120#	if (BYTE_ORDER != LITTLE_ENDIAN)
3121		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3122			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3123			    NULL, NULL);
3124			fp->f_offset = auio.uio_offset;
3125		} else
3126#	endif
3127	{
3128		kuio = auio;
3129		kuio.uio_iov = &kiov;
3130		kuio.uio_segflg = UIO_SYSSPACE;
3131		kiov.iov_len = uap->count;
3132		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3133		kiov.iov_base = dirbuf;
3134		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3135			    NULL, NULL);
3136		fp->f_offset = kuio.uio_offset;
3137		if (error == 0) {
3138			readcnt = uap->count - kuio.uio_resid;
3139			edp = (struct dirent *)&dirbuf[readcnt];
3140			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3141#				if (BYTE_ORDER == LITTLE_ENDIAN)
3142					/*
3143					 * The expected low byte of
3144					 * dp->d_namlen is our dp->d_type.
3145					 * The high MBZ byte of dp->d_namlen
3146					 * is our dp->d_namlen.
3147					 */
3148					dp->d_type = dp->d_namlen;
3149					dp->d_namlen = 0;
3150#				else
3151					/*
3152					 * The dp->d_type is the high byte
3153					 * of the expected dp->d_namlen,
3154					 * so must be zero'ed.
3155					 */
3156					dp->d_type = 0;
3157#				endif
3158				if (dp->d_reclen > 0) {
3159					dp = (struct dirent *)
3160					    ((char *)dp + dp->d_reclen);
3161				} else {
3162					error = EIO;
3163					break;
3164				}
3165			}
3166			if (dp >= edp)
3167				error = uiomove(dirbuf, readcnt, &auio);
3168		}
3169		FREE(dirbuf, M_TEMP);
3170	}
3171	VOP_UNLOCK(vp, 0, td);
3172	if (error) {
3173		fdrop(fp, td);
3174		return (error);
3175	}
3176	if (uap->count == auio.uio_resid) {
3177		if (union_dircheckp) {
3178			error = union_dircheckp(td, &vp, fp);
3179			if (error == -1)
3180				goto unionread;
3181			if (error) {
3182				fdrop(fp, td);
3183				return (error);
3184			}
3185		}
3186		/*
3187		 * XXX We could delay dropping the lock above but
3188		 * union_dircheckp complicates things.
3189		 */
3190		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3191		if ((vp->v_vflag & VV_ROOT) &&
3192		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3193			struct vnode *tvp = vp;
3194			vp = vp->v_mount->mnt_vnodecovered;
3195			VREF(vp);
3196			fp->f_data = vp;
3197			fp->f_offset = 0;
3198			vput(tvp);
3199			goto unionread;
3200		}
3201		VOP_UNLOCK(vp, 0, td);
3202	}
3203	error = copyout(&loff, uap->basep, sizeof(long));
3204	fdrop(fp, td);
3205	td->td_retval[0] = uap->count - auio.uio_resid;
3206	return (error);
3207}
3208#endif /* COMPAT_43 */
3209
3210/*
3211 * Read a block of directory entries in a filesystem independent format.
3212 */
3213#ifndef _SYS_SYSPROTO_H_
3214struct getdirentries_args {
3215	int	fd;
3216	char	*buf;
3217	u_int	count;
3218	long	*basep;
3219};
3220#endif
3221int
3222getdirentries(td, uap)
3223	struct thread *td;
3224	register struct getdirentries_args /* {
3225		int fd;
3226		char *buf;
3227		u_int count;
3228		long *basep;
3229	} */ *uap;
3230{
3231	struct vnode *vp;
3232	struct file *fp;
3233	struct uio auio;
3234	struct iovec aiov;
3235	long loff;
3236	int error, eofflag;
3237
3238	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3239		return (error);
3240	if ((fp->f_flag & FREAD) == 0) {
3241		fdrop(fp, td);
3242		return (EBADF);
3243	}
3244	vp = fp->f_data;
3245unionread:
3246	if (vp->v_type != VDIR) {
3247		fdrop(fp, td);
3248		return (EINVAL);
3249	}
3250	aiov.iov_base = uap->buf;
3251	aiov.iov_len = uap->count;
3252	auio.uio_iov = &aiov;
3253	auio.uio_iovcnt = 1;
3254	auio.uio_rw = UIO_READ;
3255	auio.uio_segflg = UIO_USERSPACE;
3256	auio.uio_td = td;
3257	auio.uio_resid = uap->count;
3258	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3259	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3260	loff = auio.uio_offset = fp->f_offset;
3261#ifdef MAC
3262	error = mac_check_vnode_readdir(td->td_ucred, vp);
3263	if (error == 0)
3264#endif
3265		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3266		    NULL);
3267	fp->f_offset = auio.uio_offset;
3268	VOP_UNLOCK(vp, 0, td);
3269	if (error) {
3270		fdrop(fp, td);
3271		return (error);
3272	}
3273	if (uap->count == auio.uio_resid) {
3274		if (union_dircheckp) {
3275			error = union_dircheckp(td, &vp, fp);
3276			if (error == -1)
3277				goto unionread;
3278			if (error) {
3279				fdrop(fp, td);
3280				return (error);
3281			}
3282		}
3283		/*
3284		 * XXX We could delay dropping the lock above but
3285		 * union_dircheckp complicates things.
3286		 */
3287		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3288		if ((vp->v_vflag & VV_ROOT) &&
3289		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3290			struct vnode *tvp = vp;
3291			vp = vp->v_mount->mnt_vnodecovered;
3292			VREF(vp);
3293			fp->f_data = vp;
3294			fp->f_offset = 0;
3295			vput(tvp);
3296			goto unionread;
3297		}
3298		VOP_UNLOCK(vp, 0, td);
3299	}
3300	if (uap->basep != NULL) {
3301		error = copyout(&loff, uap->basep, sizeof(long));
3302	}
3303	td->td_retval[0] = uap->count - auio.uio_resid;
3304	fdrop(fp, td);
3305	return (error);
3306}
3307#ifndef _SYS_SYSPROTO_H_
3308struct getdents_args {
3309	int fd;
3310	char *buf;
3311	size_t count;
3312};
3313#endif
3314int
3315getdents(td, uap)
3316	struct thread *td;
3317	register struct getdents_args /* {
3318		int fd;
3319		char *buf;
3320		u_int count;
3321	} */ *uap;
3322{
3323	struct getdirentries_args ap;
3324	ap.fd = uap->fd;
3325	ap.buf = uap->buf;
3326	ap.count = uap->count;
3327	ap.basep = NULL;
3328	return getdirentries(td, &ap);
3329}
3330
3331/*
3332 * Set the mode mask for creation of filesystem nodes.
3333 *
3334 * MP SAFE
3335 */
3336#ifndef _SYS_SYSPROTO_H_
3337struct umask_args {
3338	int	newmask;
3339};
3340#endif
3341int
3342umask(td, uap)
3343	struct thread *td;
3344	struct umask_args /* {
3345		int newmask;
3346	} */ *uap;
3347{
3348	register struct filedesc *fdp;
3349
3350	FILEDESC_LOCK(td->td_proc->p_fd);
3351	fdp = td->td_proc->p_fd;
3352	td->td_retval[0] = fdp->fd_cmask;
3353	fdp->fd_cmask = uap->newmask & ALLPERMS;
3354	FILEDESC_UNLOCK(td->td_proc->p_fd);
3355	return (0);
3356}
3357
3358/*
3359 * Void all references to file by ripping underlying filesystem
3360 * away from vnode.
3361 */
3362#ifndef _SYS_SYSPROTO_H_
3363struct revoke_args {
3364	char	*path;
3365};
3366#endif
3367/* ARGSUSED */
3368int
3369revoke(td, uap)
3370	struct thread *td;
3371	register struct revoke_args /* {
3372		char *path;
3373	} */ *uap;
3374{
3375	struct mount *mp;
3376	struct vnode *vp;
3377	struct vattr vattr;
3378	int error;
3379	struct nameidata nd;
3380
3381	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3382	if ((error = namei(&nd)) != 0)
3383		return (error);
3384	vp = nd.ni_vp;
3385	NDFREE(&nd, NDF_ONLY_PNBUF);
3386	if (vp->v_type != VCHR) {
3387		vput(vp);
3388		return (EINVAL);
3389	}
3390#ifdef MAC
3391	error = mac_check_vnode_revoke(td->td_ucred, vp);
3392	if (error) {
3393		vput(vp);
3394		return (error);
3395	}
3396#endif
3397	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3398	if (error) {
3399		vput(vp);
3400		return (error);
3401	}
3402	VOP_UNLOCK(vp, 0, td);
3403	if (td->td_ucred->cr_uid != vattr.va_uid) {
3404		error = suser_cred(td->td_ucred, PRISON_ROOT);
3405		if (error)
3406			goto out;
3407	}
3408	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3409		goto out;
3410	if (vcount(vp) > 1)
3411		VOP_REVOKE(vp, REVOKEALL);
3412	vn_finished_write(mp);
3413out:
3414	vrele(vp);
3415	return (error);
3416}
3417
3418/*
3419 * Convert a user file descriptor to a kernel file entry.
3420 * The file entry is locked upon returning.
3421 */
3422int
3423getvnode(fdp, fd, fpp)
3424	struct filedesc *fdp;
3425	int fd;
3426	struct file **fpp;
3427{
3428	int error;
3429	struct file *fp;
3430
3431	fp = NULL;
3432	if (fdp == NULL)
3433		error = EBADF;
3434	else {
3435		FILEDESC_LOCK(fdp);
3436		if ((u_int)fd >= fdp->fd_nfiles ||
3437		    (fp = fdp->fd_ofiles[fd]) == NULL)
3438			error = EBADF;
3439		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3440			fp = NULL;
3441			error = EINVAL;
3442		} else {
3443			fhold(fp);
3444			error = 0;
3445		}
3446		FILEDESC_UNLOCK(fdp);
3447	}
3448	*fpp = fp;
3449	return (error);
3450}
3451/*
3452 * Get (NFS) file handle
3453 */
3454#ifndef _SYS_SYSPROTO_H_
3455struct getfh_args {
3456	char	*fname;
3457	fhandle_t *fhp;
3458};
3459#endif
3460int
3461getfh(td, uap)
3462	struct thread *td;
3463	register struct getfh_args *uap;
3464{
3465	struct nameidata nd;
3466	fhandle_t fh;
3467	register struct vnode *vp;
3468	int error;
3469
3470	/*
3471	 * Must be super user
3472	 */
3473	error = suser(td);
3474	if (error)
3475		return (error);
3476	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3477	error = namei(&nd);
3478	if (error)
3479		return (error);
3480	NDFREE(&nd, NDF_ONLY_PNBUF);
3481	vp = nd.ni_vp;
3482	bzero(&fh, sizeof(fh));
3483	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3484	error = VFS_VPTOFH(vp, &fh.fh_fid);
3485	vput(vp);
3486	if (error)
3487		return (error);
3488	error = copyout(&fh, uap->fhp, sizeof (fh));
3489	return (error);
3490}
3491
3492/*
3493 * syscall for the rpc.lockd to use to translate a NFS file handle into
3494 * an open descriptor.
3495 *
3496 * warning: do not remove the suser() call or this becomes one giant
3497 * security hole.
3498 */
3499#ifndef _SYS_SYSPROTO_H_
3500struct fhopen_args {
3501	const struct fhandle *u_fhp;
3502	int flags;
3503};
3504#endif
3505int
3506fhopen(td, uap)
3507	struct thread *td;
3508	struct fhopen_args /* {
3509		const struct fhandle *u_fhp;
3510		int flags;
3511	} */ *uap;
3512{
3513	struct proc *p = td->td_proc;
3514	struct mount *mp;
3515	struct vnode *vp;
3516	struct fhandle fhp;
3517	struct vattr vat;
3518	struct vattr *vap = &vat;
3519	struct flock lf;
3520	struct file *fp;
3521	register struct filedesc *fdp = p->p_fd;
3522	int fmode, mode, error, type;
3523	struct file *nfp;
3524	int indx;
3525
3526	/*
3527	 * Must be super user
3528	 */
3529	error = suser(td);
3530	if (error)
3531		return (error);
3532
3533	fmode = FFLAGS(uap->flags);
3534	/* why not allow a non-read/write open for our lockd? */
3535	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3536		return (EINVAL);
3537	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3538	if (error)
3539		return(error);
3540	/* find the mount point */
3541	mp = vfs_getvfs(&fhp.fh_fsid);
3542	if (mp == NULL)
3543		return (ESTALE);
3544	/* now give me my vnode, it gets returned to me locked */
3545	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3546	if (error)
3547		return (error);
3548 	/*
3549	 * from now on we have to make sure not
3550	 * to forget about the vnode
3551	 * any error that causes an abort must vput(vp)
3552	 * just set error = err and 'goto bad;'.
3553	 */
3554
3555	/*
3556	 * from vn_open
3557	 */
3558	if (vp->v_type == VLNK) {
3559		error = EMLINK;
3560		goto bad;
3561	}
3562	if (vp->v_type == VSOCK) {
3563		error = EOPNOTSUPP;
3564		goto bad;
3565	}
3566	mode = 0;
3567	if (fmode & (FWRITE | O_TRUNC)) {
3568		if (vp->v_type == VDIR) {
3569			error = EISDIR;
3570			goto bad;
3571		}
3572		error = vn_writechk(vp);
3573		if (error)
3574			goto bad;
3575		mode |= VWRITE;
3576	}
3577	if (fmode & FREAD)
3578		mode |= VREAD;
3579	if (fmode & O_APPEND)
3580		mode |= VAPPEND;
3581#ifdef MAC
3582	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3583	if (error)
3584		goto bad;
3585#endif
3586	if (mode) {
3587		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3588		if (error)
3589			goto bad;
3590	}
3591	if (fmode & O_TRUNC) {
3592		VOP_UNLOCK(vp, 0, td);				/* XXX */
3593		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3594			vrele(vp);
3595			return (error);
3596		}
3597		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3598		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3599#ifdef MAC
3600		/*
3601		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3602		 * should be right.
3603		 */
3604		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3605		if (error == 0) {
3606#endif
3607			VATTR_NULL(vap);
3608			vap->va_size = 0;
3609			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3610#ifdef MAC
3611		}
3612#endif
3613		vn_finished_write(mp);
3614		if (error)
3615			goto bad;
3616	}
3617	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3618	if (error)
3619		goto bad;
3620	/*
3621	 * Make sure that a VM object is created for VMIO support.
3622	 */
3623	if (vn_canvmio(vp) == TRUE) {
3624		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3625			goto bad;
3626	}
3627	if (fmode & FWRITE)
3628		vp->v_writecount++;
3629
3630	/*
3631	 * end of vn_open code
3632	 */
3633
3634	if ((error = falloc(td, &nfp, &indx)) != 0) {
3635		if (fmode & FWRITE)
3636			vp->v_writecount--;
3637		goto bad;
3638	}
3639	fp = nfp;
3640
3641	/*
3642	 * Hold an extra reference to avoid having fp ripped out
3643	 * from under us while we block in the lock op
3644	 */
3645	fhold(fp);
3646	nfp->f_data = vp;
3647	nfp->f_flag = fmode & FMASK;
3648	nfp->f_ops = &vnops;
3649	nfp->f_type = DTYPE_VNODE;
3650	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3651		lf.l_whence = SEEK_SET;
3652		lf.l_start = 0;
3653		lf.l_len = 0;
3654		if (fmode & O_EXLOCK)
3655			lf.l_type = F_WRLCK;
3656		else
3657			lf.l_type = F_RDLCK;
3658		type = F_FLOCK;
3659		if ((fmode & FNONBLOCK) == 0)
3660			type |= F_WAIT;
3661		VOP_UNLOCK(vp, 0, td);
3662		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3663			    type)) != 0) {
3664			/*
3665			 * The lock request failed.  Normally close the
3666			 * descriptor but handle the case where someone might
3667			 * have dup()d or close()d it when we weren't looking.
3668			 */
3669			FILEDESC_LOCK(fdp);
3670			if (fdp->fd_ofiles[indx] == fp) {
3671				fdp->fd_ofiles[indx] = NULL;
3672				FILEDESC_UNLOCK(fdp);
3673				fdrop(fp, td);
3674			} else
3675				FILEDESC_UNLOCK(fdp);
3676			/*
3677			 * release our private reference
3678			 */
3679			fdrop(fp, td);
3680			return(error);
3681		}
3682		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3683		fp->f_flag |= FHASLOCK;
3684	}
3685	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3686		vfs_object_create(vp, td, td->td_ucred);
3687
3688	VOP_UNLOCK(vp, 0, td);
3689	fdrop(fp, td);
3690	td->td_retval[0] = indx;
3691	return (0);
3692
3693bad:
3694	vput(vp);
3695	return (error);
3696}
3697
3698/*
3699 * Stat an (NFS) file handle.
3700 */
3701#ifndef _SYS_SYSPROTO_H_
3702struct fhstat_args {
3703	struct fhandle *u_fhp;
3704	struct stat *sb;
3705};
3706#endif
3707int
3708fhstat(td, uap)
3709	struct thread *td;
3710	register struct fhstat_args /* {
3711		struct fhandle *u_fhp;
3712		struct stat *sb;
3713	} */ *uap;
3714{
3715	struct stat sb;
3716	fhandle_t fh;
3717	struct mount *mp;
3718	struct vnode *vp;
3719	int error;
3720
3721	/*
3722	 * Must be super user
3723	 */
3724	error = suser(td);
3725	if (error)
3726		return (error);
3727
3728	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3729	if (error)
3730		return (error);
3731
3732	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3733		return (ESTALE);
3734	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3735		return (error);
3736	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3737	vput(vp);
3738	if (error)
3739		return (error);
3740	error = copyout(&sb, uap->sb, sizeof(sb));
3741	return (error);
3742}
3743
3744/*
3745 * Implement fstatfs() for (NFS) file handles.
3746 */
3747#ifndef _SYS_SYSPROTO_H_
3748struct fhstatfs_args {
3749	struct fhandle *u_fhp;
3750	struct statfs *buf;
3751};
3752#endif
3753int
3754fhstatfs(td, uap)
3755	struct thread *td;
3756	struct fhstatfs_args /* {
3757		struct fhandle *u_fhp;
3758		struct statfs *buf;
3759	} */ *uap;
3760{
3761	struct statfs *sp;
3762	struct mount *mp;
3763	struct vnode *vp;
3764	struct statfs sb;
3765	fhandle_t fh;
3766	int error;
3767
3768	/*
3769	 * Must be super user
3770	 */
3771	error = suser(td);
3772	if (error)
3773		return (error);
3774
3775	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3776		return (error);
3777
3778	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3779		return (ESTALE);
3780	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3781		return (error);
3782	mp = vp->v_mount;
3783	sp = &mp->mnt_stat;
3784	vput(vp);
3785#ifdef MAC
3786	error = mac_check_mount_stat(td->td_ucred, mp);
3787	if (error)
3788		return (error);
3789#endif
3790	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3791		return (error);
3792	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3793	if (suser(td)) {
3794		bcopy(sp, &sb, sizeof(sb));
3795		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3796		sp = &sb;
3797	}
3798	return (copyout(sp, uap->buf, sizeof(*sp)));
3799}
3800
3801/*
3802 * Syscall to push extended attribute configuration information into the
3803 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3804 * a command (int cmd), and attribute name and misc data.  For now, the
3805 * attribute name is left in userspace for consumption by the VFS_op.
3806 * It will probably be changed to be copied into sysspace by the
3807 * syscall in the future, once issues with various consumers of the
3808 * attribute code have raised their hands.
3809 *
3810 * Currently this is used only by UFS Extended Attributes.
3811 */
3812int
3813extattrctl(td, uap)
3814	struct thread *td;
3815	struct extattrctl_args /* {
3816		const char *path;
3817		int cmd;
3818		const char *filename;
3819		int attrnamespace;
3820		const char *attrname;
3821	} */ *uap;
3822{
3823	struct vnode *filename_vp;
3824	struct nameidata nd;
3825	struct mount *mp, *mp_writable;
3826	char attrname[EXTATTR_MAXNAMELEN];
3827	int error;
3828
3829	/*
3830	 * uap->attrname is not always defined.  We check again later when we
3831	 * invoke the VFS call so as to pass in NULL there if needed.
3832	 */
3833	if (uap->attrname != NULL) {
3834		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3835		    NULL);
3836		if (error)
3837			return (error);
3838	}
3839
3840	/*
3841	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3842	 * which VFS_EXTATTRCTL() will later release.
3843	 */
3844	filename_vp = NULL;
3845	if (uap->filename != NULL) {
3846		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3847		    uap->filename, td);
3848		error = namei(&nd);
3849		if (error)
3850			return (error);
3851		filename_vp = nd.ni_vp;
3852		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3853	}
3854
3855	/* uap->path is always defined. */
3856	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3857	error = namei(&nd);
3858	if (error) {
3859		if (filename_vp != NULL)
3860			vput(filename_vp);
3861		return (error);
3862	}
3863	mp = nd.ni_vp->v_mount;
3864	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3865	NDFREE(&nd, 0);
3866	if (error) {
3867		if (filename_vp != NULL)
3868			vput(filename_vp);
3869		return (error);
3870	}
3871
3872	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3873	    uap->attrname != NULL ? attrname : NULL, td);
3874
3875	vn_finished_write(mp_writable);
3876	/*
3877	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3878	 * filename_vp, so vrele it if it is defined.
3879	 */
3880	if (filename_vp != NULL)
3881		vrele(filename_vp);
3882	return (error);
3883}
3884
3885/*-
3886 * Set a named extended attribute on a file or directory
3887 *
3888 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3889 *            kernelspace string pointer "attrname", userspace buffer
3890 *            pointer "data", buffer length "nbytes", thread "td".
3891 * Returns: 0 on success, an error number otherwise
3892 * Locks: none
3893 * References: vp must be a valid reference for the duration of the call
3894 */
3895static int
3896extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3897    void *data, size_t nbytes, struct thread *td)
3898{
3899	struct mount *mp;
3900	struct uio auio;
3901	struct iovec aiov;
3902	ssize_t cnt;
3903	int error;
3904
3905	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3906	if (error)
3907		return (error);
3908	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3909	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3910
3911	aiov.iov_base = data;
3912	aiov.iov_len = nbytes;
3913	auio.uio_iov = &aiov;
3914	auio.uio_iovcnt = 1;
3915	auio.uio_offset = 0;
3916	if (nbytes > INT_MAX) {
3917		error = EINVAL;
3918		goto done;
3919	}
3920	auio.uio_resid = nbytes;
3921	auio.uio_rw = UIO_WRITE;
3922	auio.uio_segflg = UIO_USERSPACE;
3923	auio.uio_td = td;
3924	cnt = nbytes;
3925
3926#ifdef MAC
3927	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3928	    attrname, &auio);
3929	if (error)
3930		goto done;
3931#endif
3932
3933	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3934	    td->td_ucred, td);
3935	cnt -= auio.uio_resid;
3936	td->td_retval[0] = cnt;
3937
3938done:
3939	VOP_UNLOCK(vp, 0, td);
3940	vn_finished_write(mp);
3941	return (error);
3942}
3943
3944int
3945extattr_set_fd(td, uap)
3946	struct thread *td;
3947	struct extattr_set_fd_args /* {
3948		int fd;
3949		int attrnamespace;
3950		const char *attrname;
3951		void *data;
3952		size_t nbytes;
3953	} */ *uap;
3954{
3955	struct file *fp;
3956	char attrname[EXTATTR_MAXNAMELEN];
3957	int error;
3958
3959	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3960	if (error)
3961		return (error);
3962
3963	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
3964	if (error)
3965		return (error);
3966
3967	error = extattr_set_vp(fp->f_data, uap->attrnamespace,
3968	    attrname, uap->data, uap->nbytes, td);
3969	fdrop(fp, td);
3970
3971	return (error);
3972}
3973
3974int
3975extattr_set_file(td, uap)
3976	struct thread *td;
3977	struct extattr_set_file_args /* {
3978		const char *path;
3979		int attrnamespace;
3980		const char *attrname;
3981		void *data;
3982		size_t nbytes;
3983	} */ *uap;
3984{
3985	struct nameidata nd;
3986	char attrname[EXTATTR_MAXNAMELEN];
3987	int error;
3988
3989	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3990	if (error)
3991		return (error);
3992
3993	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3994	error = namei(&nd);
3995	if (error)
3996		return (error);
3997	NDFREE(&nd, NDF_ONLY_PNBUF);
3998
3999	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4000	    uap->data, uap->nbytes, td);
4001
4002	vrele(nd.ni_vp);
4003	return (error);
4004}
4005
4006int
4007extattr_set_link(td, uap)
4008	struct thread *td;
4009	struct extattr_set_link_args /* {
4010		const char *path;
4011		int attrnamespace;
4012		const char *attrname;
4013		void *data;
4014		size_t nbytes;
4015	} */ *uap;
4016{
4017	struct nameidata nd;
4018	char attrname[EXTATTR_MAXNAMELEN];
4019	int error;
4020
4021	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4022	if (error)
4023		return (error);
4024
4025	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4026	error = namei(&nd);
4027	if (error)
4028		return (error);
4029	NDFREE(&nd, NDF_ONLY_PNBUF);
4030
4031	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4032	    uap->data, uap->nbytes, td);
4033
4034	vrele(nd.ni_vp);
4035	return (error);
4036}
4037
4038/*-
4039 * Get a named extended attribute on a file or directory
4040 *
4041 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4042 *            kernelspace string pointer "attrname", userspace buffer
4043 *            pointer "data", buffer length "nbytes", thread "td".
4044 * Returns: 0 on success, an error number otherwise
4045 * Locks: none
4046 * References: vp must be a valid reference for the duration of the call
4047 */
4048static int
4049extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4050    void *data, size_t nbytes, struct thread *td)
4051{
4052	struct uio auio, *auiop;
4053	struct iovec aiov;
4054	ssize_t cnt;
4055	size_t size, *sizep;
4056	int error;
4057
4058	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4059	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4060
4061	/*
4062	 * Slightly unusual semantics: if the user provides a NULL data
4063	 * pointer, they don't want to receive the data, just the
4064	 * maximum read length.
4065	 */
4066	auiop = NULL;
4067	sizep = NULL;
4068	cnt = 0;
4069	if (data != NULL) {
4070		aiov.iov_base = data;
4071		aiov.iov_len = nbytes;
4072		auio.uio_iov = &aiov;
4073		auio.uio_offset = 0;
4074		if (nbytes > INT_MAX) {
4075			error = EINVAL;
4076			goto done;
4077		}
4078		auio.uio_resid = nbytes;
4079		auio.uio_rw = UIO_READ;
4080		auio.uio_segflg = UIO_USERSPACE;
4081		auio.uio_td = td;
4082		auiop = &auio;
4083		cnt = nbytes;
4084	} else
4085		sizep = &size;
4086
4087#ifdef MAC
4088	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4089	    attrname, &auio);
4090	if (error)
4091		goto done;
4092#endif
4093
4094	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4095	    td->td_ucred, td);
4096
4097	if (auiop != NULL) {
4098		cnt -= auio.uio_resid;
4099		td->td_retval[0] = cnt;
4100	} else
4101		td->td_retval[0] = size;
4102
4103done:
4104	VOP_UNLOCK(vp, 0, td);
4105	return (error);
4106}
4107
4108int
4109extattr_get_fd(td, uap)
4110	struct thread *td;
4111	struct extattr_get_fd_args /* {
4112		int fd;
4113		int attrnamespace;
4114		const char *attrname;
4115		void *data;
4116		size_t nbytes;
4117	} */ *uap;
4118{
4119	struct file *fp;
4120	char attrname[EXTATTR_MAXNAMELEN];
4121	int error;
4122
4123	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4124	if (error)
4125		return (error);
4126
4127	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4128	if (error)
4129		return (error);
4130
4131	error = extattr_get_vp(fp->f_data, uap->attrnamespace,
4132	    attrname, uap->data, uap->nbytes, td);
4133
4134	fdrop(fp, td);
4135	return (error);
4136}
4137
4138int
4139extattr_get_file(td, uap)
4140	struct thread *td;
4141	struct extattr_get_file_args /* {
4142		const char *path;
4143		int attrnamespace;
4144		const char *attrname;
4145		void *data;
4146		size_t nbytes;
4147	} */ *uap;
4148{
4149	struct nameidata nd;
4150	char attrname[EXTATTR_MAXNAMELEN];
4151	int error;
4152
4153	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4154	if (error)
4155		return (error);
4156
4157	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4158	error = namei(&nd);
4159	if (error)
4160		return (error);
4161	NDFREE(&nd, NDF_ONLY_PNBUF);
4162
4163	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4164	    uap->data, uap->nbytes, td);
4165
4166	vrele(nd.ni_vp);
4167	return (error);
4168}
4169
4170int
4171extattr_get_link(td, uap)
4172	struct thread *td;
4173	struct extattr_get_link_args /* {
4174		const char *path;
4175		int attrnamespace;
4176		const char *attrname;
4177		void *data;
4178		size_t nbytes;
4179	} */ *uap;
4180{
4181	struct nameidata nd;
4182	char attrname[EXTATTR_MAXNAMELEN];
4183	int error;
4184
4185	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4186	if (error)
4187		return (error);
4188
4189	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4190	error = namei(&nd);
4191	if (error)
4192		return (error);
4193	NDFREE(&nd, NDF_ONLY_PNBUF);
4194
4195	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4196	    uap->data, uap->nbytes, td);
4197
4198	vrele(nd.ni_vp);
4199	return (error);
4200}
4201
4202/*
4203 * extattr_delete_vp(): Delete a named extended attribute on a file or
4204 *                      directory
4205 *
4206 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4207 *            kernelspace string pointer "attrname", proc "p"
4208 * Returns: 0 on success, an error number otherwise
4209 * Locks: none
4210 * References: vp must be a valid reference for the duration of the call
4211 */
4212static int
4213extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4214    struct thread *td)
4215{
4216	struct mount *mp;
4217	int error;
4218
4219	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4220	if (error)
4221		return (error);
4222	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4223	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4224
4225#ifdef MAC
4226	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4227	    attrname, NULL);
4228	if (error)
4229		goto done;
4230#endif
4231
4232	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4233	    td);
4234#ifdef MAC
4235done:
4236#endif
4237	VOP_UNLOCK(vp, 0, td);
4238	vn_finished_write(mp);
4239	return (error);
4240}
4241
4242int
4243extattr_delete_fd(td, uap)
4244	struct thread *td;
4245	struct extattr_delete_fd_args /* {
4246		int fd;
4247		int attrnamespace;
4248		const char *attrname;
4249	} */ *uap;
4250{
4251	struct file *fp;
4252	struct vnode *vp;
4253	char attrname[EXTATTR_MAXNAMELEN];
4254	int error;
4255
4256	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4257	if (error)
4258		return (error);
4259
4260	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4261	if (error)
4262		return (error);
4263	vp = fp->f_data;
4264
4265	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4266	fdrop(fp, td);
4267	return (error);
4268}
4269
4270int
4271extattr_delete_file(td, uap)
4272	struct thread *td;
4273	struct extattr_delete_file_args /* {
4274		const char *path;
4275		int attrnamespace;
4276		const char *attrname;
4277	} */ *uap;
4278{
4279	struct nameidata nd;
4280	char attrname[EXTATTR_MAXNAMELEN];
4281	int error;
4282
4283	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4284	if (error)
4285		return(error);
4286
4287	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4288	error = namei(&nd);
4289	if (error)
4290		return(error);
4291	NDFREE(&nd, NDF_ONLY_PNBUF);
4292
4293	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4294	vrele(nd.ni_vp);
4295	return(error);
4296}
4297
4298int
4299extattr_delete_link(td, uap)
4300	struct thread *td;
4301	struct extattr_delete_link_args /* {
4302		const char *path;
4303		int attrnamespace;
4304		const char *attrname;
4305	} */ *uap;
4306{
4307	struct nameidata nd;
4308	char attrname[EXTATTR_MAXNAMELEN];
4309	int error;
4310
4311	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4312	if (error)
4313		return(error);
4314
4315	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4316	error = namei(&nd);
4317	if (error)
4318		return(error);
4319	NDFREE(&nd, NDF_ONLY_PNBUF);
4320
4321	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4322	vrele(nd.ni_vp);
4323	return(error);
4324}
4325