vfs_syscalls.c revision 117222
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD: head/sys/kern/vfs_syscalls.c 117222 2003-07-04 12:20:27Z phk $");
43
44/* For 4.3 integer FS ID compatibility */
45#include "opt_compat.h"
46#include "opt_mac.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/bio.h>
51#include <sys/buf.h>
52#include <sys/sysent.h>
53#include <sys/mac.h>
54#include <sys/malloc.h>
55#include <sys/mount.h>
56#include <sys/mutex.h>
57#include <sys/sysproto.h>
58#include <sys/namei.h>
59#include <sys/filedesc.h>
60#include <sys/kernel.h>
61#include <sys/fcntl.h>
62#include <sys/file.h>
63#include <sys/limits.h>
64#include <sys/linker.h>
65#include <sys/stat.h>
66#include <sys/sx.h>
67#include <sys/unistd.h>
68#include <sys/vnode.h>
69#include <sys/proc.h>
70#include <sys/dirent.h>
71#include <sys/extattr.h>
72#include <sys/jail.h>
73#include <sys/syscallsubr.h>
74#include <sys/sysctl.h>
75
76#include <machine/stdarg.h>
77
78#include <vm/vm.h>
79#include <vm/vm_object.h>
80#include <vm/vm_page.h>
81#include <vm/uma.h>
82
83static int chroot_refuse_vdir_fds(struct filedesc *fdp);
84static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
85static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
86static int setfmode(struct thread *td, struct vnode *, int);
87static int setfflags(struct thread *td, struct vnode *, int);
88static int setutimes(struct thread *td, struct vnode *,
89    const struct timespec *, int, int);
90static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
91    struct thread *td);
92
93static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
94    size_t nbytes, struct thread *td);
95
96int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
97int (*softdep_fsync_hook)(struct vnode *);
98
99/*
100 * The module initialization routine for POSIX asynchronous I/O will
101 * set this to the version of AIO that it implements.  (Zero means
102 * that it is not implemented.)  This value is used here by pathconf()
103 * and in kern_descrip.c by fpathconf().
104 */
105int async_io_version;
106
107/*
108 * Sync each mounted filesystem.
109 */
110#ifndef _SYS_SYSPROTO_H_
111struct sync_args {
112        int     dummy;
113};
114#endif
115
116#ifdef DEBUG
117static int syncprt = 0;
118SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
119#endif
120
121/* ARGSUSED */
122int
123sync(td, uap)
124	struct thread *td;
125	struct sync_args *uap;
126{
127	struct mount *mp, *nmp;
128	int asyncflag;
129
130	mtx_lock(&mountlist_mtx);
131	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
132		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
133			nmp = TAILQ_NEXT(mp, mnt_list);
134			continue;
135		}
136		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
137		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
138			asyncflag = mp->mnt_flag & MNT_ASYNC;
139			mp->mnt_flag &= ~MNT_ASYNC;
140			vfs_msync(mp, MNT_NOWAIT);
141			VFS_SYNC(mp, MNT_NOWAIT,
142			    ((td != NULL) ? td->td_ucred : NOCRED), td);
143			mp->mnt_flag |= asyncflag;
144			vn_finished_write(mp);
145		}
146		mtx_lock(&mountlist_mtx);
147		nmp = TAILQ_NEXT(mp, mnt_list);
148		vfs_unbusy(mp, td);
149	}
150	mtx_unlock(&mountlist_mtx);
151#if 0
152/*
153 * XXX don't call vfs_bufstats() yet because that routine
154 * was not imported in the Lite2 merge.
155 */
156#ifdef DIAGNOSTIC
157	if (syncprt)
158		vfs_bufstats();
159#endif /* DIAGNOSTIC */
160#endif
161	return (0);
162}
163
164/* XXX PRISON: could be per prison flag */
165static int prison_quotas;
166#if 0
167SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
168#endif
169
170/*
171 * Change filesystem quotas.
172 */
173#ifndef _SYS_SYSPROTO_H_
174struct quotactl_args {
175	char *path;
176	int cmd;
177	int uid;
178	caddr_t arg;
179};
180#endif
181/* ARGSUSED */
182int
183quotactl(td, uap)
184	struct thread *td;
185	register struct quotactl_args /* {
186		char *path;
187		int cmd;
188		int uid;
189		caddr_t arg;
190	} */ *uap;
191{
192	struct mount *mp;
193	int error;
194	struct nameidata nd;
195
196	if (jailed(td->td_ucred) && !prison_quotas)
197		return (EPERM);
198	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
199	if ((error = namei(&nd)) != 0)
200		return (error);
201	NDFREE(&nd, NDF_ONLY_PNBUF);
202	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
203	vrele(nd.ni_vp);
204	if (error)
205		return (error);
206	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
207	vn_finished_write(mp);
208	return (error);
209}
210
211/*
212 * Get filesystem statistics.
213 */
214#ifndef _SYS_SYSPROTO_H_
215struct statfs_args {
216	char *path;
217	struct statfs *buf;
218};
219#endif
220/* ARGSUSED */
221int
222statfs(td, uap)
223	struct thread *td;
224	register struct statfs_args /* {
225		char *path;
226		struct statfs *buf;
227	} */ *uap;
228{
229	register struct mount *mp;
230	register struct statfs *sp;
231	int error;
232	struct nameidata nd;
233	struct statfs sb;
234
235	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
236	if ((error = namei(&nd)) != 0)
237		return (error);
238	mp = nd.ni_vp->v_mount;
239	sp = &mp->mnt_stat;
240	NDFREE(&nd, NDF_ONLY_PNBUF);
241	vrele(nd.ni_vp);
242#ifdef MAC
243	error = mac_check_mount_stat(td->td_ucred, mp);
244	if (error)
245		return (error);
246#endif
247	error = VFS_STATFS(mp, sp, td);
248	if (error)
249		return (error);
250	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
251	if (suser(td)) {
252		bcopy(sp, &sb, sizeof(sb));
253		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
254		sp = &sb;
255	}
256	return (copyout(sp, uap->buf, sizeof(*sp)));
257}
258
259/*
260 * Get filesystem statistics.
261 */
262#ifndef _SYS_SYSPROTO_H_
263struct fstatfs_args {
264	int fd;
265	struct statfs *buf;
266};
267#endif
268/* ARGSUSED */
269int
270fstatfs(td, uap)
271	struct thread *td;
272	register struct fstatfs_args /* {
273		int fd;
274		struct statfs *buf;
275	} */ *uap;
276{
277	struct file *fp;
278	struct mount *mp;
279	register struct statfs *sp;
280	int error;
281	struct statfs sb;
282
283	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
284		return (error);
285	mp = fp->f_vnode->v_mount;
286	fdrop(fp, td);
287	if (mp == NULL)
288		return (EBADF);
289#ifdef MAC
290	error = mac_check_mount_stat(td->td_ucred, mp);
291	if (error)
292		return (error);
293#endif
294	sp = &mp->mnt_stat;
295	error = VFS_STATFS(mp, sp, td);
296	if (error)
297		return (error);
298	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
299	if (suser(td)) {
300		bcopy(sp, &sb, sizeof(sb));
301		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
302		sp = &sb;
303	}
304	return (copyout(sp, uap->buf, sizeof(*sp)));
305}
306
307/*
308 * Get statistics on all filesystems.
309 */
310#ifndef _SYS_SYSPROTO_H_
311struct getfsstat_args {
312	struct statfs *buf;
313	long bufsize;
314	int flags;
315};
316#endif
317int
318getfsstat(td, uap)
319	struct thread *td;
320	register struct getfsstat_args /* {
321		struct statfs *buf;
322		long bufsize;
323		int flags;
324	} */ *uap;
325{
326	register struct mount *mp, *nmp;
327	register struct statfs *sp;
328	caddr_t sfsp;
329	long count, maxcount, error;
330
331	maxcount = uap->bufsize / sizeof(struct statfs);
332	sfsp = (caddr_t)uap->buf;
333	count = 0;
334	mtx_lock(&mountlist_mtx);
335	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
336#ifdef MAC
337		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
338			nmp = TAILQ_NEXT(mp, mnt_list);
339			continue;
340		}
341#endif
342		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
343			nmp = TAILQ_NEXT(mp, mnt_list);
344			continue;
345		}
346		if (sfsp && count < maxcount) {
347			sp = &mp->mnt_stat;
348			/*
349			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
350			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
351			 * overrides MNT_WAIT.
352			 */
353			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
354			    (uap->flags & MNT_WAIT)) &&
355			    (error = VFS_STATFS(mp, sp, td))) {
356				mtx_lock(&mountlist_mtx);
357				nmp = TAILQ_NEXT(mp, mnt_list);
358				vfs_unbusy(mp, td);
359				continue;
360			}
361			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
362			error = copyout(sp, sfsp, sizeof(*sp));
363			if (error) {
364				vfs_unbusy(mp, td);
365				return (error);
366			}
367			sfsp += sizeof(*sp);
368		}
369		count++;
370		mtx_lock(&mountlist_mtx);
371		nmp = TAILQ_NEXT(mp, mnt_list);
372		vfs_unbusy(mp, td);
373	}
374	mtx_unlock(&mountlist_mtx);
375	if (sfsp && count > maxcount)
376		td->td_retval[0] = maxcount;
377	else
378		td->td_retval[0] = count;
379	return (0);
380}
381
382/*
383 * Change current working directory to a given file descriptor.
384 */
385#ifndef _SYS_SYSPROTO_H_
386struct fchdir_args {
387	int	fd;
388};
389#endif
390/* ARGSUSED */
391int
392fchdir(td, uap)
393	struct thread *td;
394	struct fchdir_args /* {
395		int fd;
396	} */ *uap;
397{
398	register struct filedesc *fdp = td->td_proc->p_fd;
399	struct vnode *vp, *tdp, *vpold;
400	struct mount *mp;
401	struct file *fp;
402	int error;
403
404	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
405		return (error);
406	vp = fp->f_vnode;
407	VREF(vp);
408	fdrop(fp, td);
409	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
410	if (vp->v_type != VDIR)
411		error = ENOTDIR;
412#ifdef MAC
413	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
414	}
415#endif
416	else
417		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
418	while (!error && (mp = vp->v_mountedhere) != NULL) {
419		if (vfs_busy(mp, 0, 0, td))
420			continue;
421		error = VFS_ROOT(mp, &tdp);
422		vfs_unbusy(mp, td);
423		if (error)
424			break;
425		vput(vp);
426		vp = tdp;
427	}
428	if (error) {
429		vput(vp);
430		return (error);
431	}
432	VOP_UNLOCK(vp, 0, td);
433	FILEDESC_LOCK(fdp);
434	vpold = fdp->fd_cdir;
435	fdp->fd_cdir = vp;
436	FILEDESC_UNLOCK(fdp);
437	vrele(vpold);
438	return (0);
439}
440
441/*
442 * Change current working directory (``.'').
443 */
444#ifndef _SYS_SYSPROTO_H_
445struct chdir_args {
446	char	*path;
447};
448#endif
449/* ARGSUSED */
450int
451chdir(td, uap)
452	struct thread *td;
453	struct chdir_args /* {
454		char *path;
455	} */ *uap;
456{
457
458	return (kern_chdir(td, uap->path, UIO_USERSPACE));
459}
460
461int
462kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
463{
464	register struct filedesc *fdp = td->td_proc->p_fd;
465	int error;
466	struct nameidata nd;
467	struct vnode *vp;
468
469	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
470	if ((error = namei(&nd)) != 0)
471		return (error);
472	if ((error = change_dir(nd.ni_vp, td)) != 0) {
473		vput(nd.ni_vp);
474		NDFREE(&nd, NDF_ONLY_PNBUF);
475		return (error);
476	}
477	VOP_UNLOCK(nd.ni_vp, 0, td);
478	NDFREE(&nd, NDF_ONLY_PNBUF);
479	FILEDESC_LOCK(fdp);
480	vp = fdp->fd_cdir;
481	fdp->fd_cdir = nd.ni_vp;
482	FILEDESC_UNLOCK(fdp);
483	vrele(vp);
484	return (0);
485}
486
487/*
488 * Helper function for raised chroot(2) security function:  Refuse if
489 * any filedescriptors are open directories.
490 */
491static int
492chroot_refuse_vdir_fds(fdp)
493	struct filedesc *fdp;
494{
495	struct vnode *vp;
496	struct file *fp;
497	int fd;
498
499	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
500	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
501		fp = fget_locked(fdp, fd);
502		if (fp == NULL)
503			continue;
504		if (fp->f_type == DTYPE_VNODE) {
505			vp = fp->f_vnode;
506			if (vp->v_type == VDIR)
507				return (EPERM);
508		}
509	}
510	return (0);
511}
512
513/*
514 * This sysctl determines if we will allow a process to chroot(2) if it
515 * has a directory open:
516 *	0: disallowed for all processes.
517 *	1: allowed for processes that were not already chroot(2)'ed.
518 *	2: allowed for all processes.
519 */
520
521static int chroot_allow_open_directories = 1;
522
523SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
524     &chroot_allow_open_directories, 0, "");
525
526/*
527 * Change notion of root (``/'') directory.
528 */
529#ifndef _SYS_SYSPROTO_H_
530struct chroot_args {
531	char	*path;
532};
533#endif
534/* ARGSUSED */
535int
536chroot(td, uap)
537	struct thread *td;
538	struct chroot_args /* {
539		char *path;
540	} */ *uap;
541{
542	int error;
543	struct nameidata nd;
544
545	error = suser_cred(td->td_ucred, PRISON_ROOT);
546	if (error)
547		return (error);
548	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
549	mtx_lock(&Giant);
550	error = namei(&nd);
551	if (error)
552		goto error;
553	if ((error = change_dir(nd.ni_vp, td)) != 0)
554		goto e_vunlock;
555#ifdef MAC
556	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
557		goto e_vunlock;
558#endif
559	VOP_UNLOCK(nd.ni_vp, 0, td);
560	error = change_root(nd.ni_vp, td);
561	vrele(nd.ni_vp);
562	NDFREE(&nd, NDF_ONLY_PNBUF);
563	mtx_unlock(&Giant);
564	return (error);
565e_vunlock:
566	vput(nd.ni_vp);
567error:
568	mtx_unlock(&Giant);
569	NDFREE(&nd, NDF_ONLY_PNBUF);
570	return (error);
571}
572
573/*
574 * Common routine for chroot and chdir.  Callers must provide a locked vnode
575 * instance.
576 */
577int
578change_dir(vp, td)
579	struct vnode *vp;
580	struct thread *td;
581{
582	int error;
583
584	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
585	if (vp->v_type != VDIR)
586		return (ENOTDIR);
587#ifdef MAC
588	error = mac_check_vnode_chdir(td->td_ucred, vp);
589	if (error)
590		return (error);
591#endif
592	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
593	return (error);
594}
595
596/*
597 * Common routine for kern_chroot() and jail_attach().  The caller is
598 * responsible for invoking suser() and mac_check_chroot() to authorize this
599 * operation.
600 */
601int
602change_root(vp, td)
603	struct vnode *vp;
604	struct thread *td;
605{
606	struct filedesc *fdp;
607	struct vnode *oldvp;
608	int error;
609
610	mtx_assert(&Giant, MA_OWNED);
611	fdp = td->td_proc->p_fd;
612	FILEDESC_LOCK(fdp);
613	if (chroot_allow_open_directories == 0 ||
614	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
615		error = chroot_refuse_vdir_fds(fdp);
616		if (error) {
617			FILEDESC_UNLOCK(fdp);
618			return (error);
619		}
620	}
621	oldvp = fdp->fd_rdir;
622	fdp->fd_rdir = vp;
623	VREF(fdp->fd_rdir);
624	if (!fdp->fd_jdir) {
625		fdp->fd_jdir = vp;
626		VREF(fdp->fd_jdir);
627	}
628	FILEDESC_UNLOCK(fdp);
629	vrele(oldvp);
630	return (0);
631}
632
633/*
634 * Check permissions, allocate an open file structure,
635 * and call the device open routine if any.
636 */
637#ifndef _SYS_SYSPROTO_H_
638struct open_args {
639	char	*path;
640	int	flags;
641	int	mode;
642};
643#endif
644int
645open(td, uap)
646	struct thread *td;
647	register struct open_args /* {
648		char *path;
649		int flags;
650		int mode;
651	} */ *uap;
652{
653
654	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
655}
656
657int
658kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
659    int mode)
660{
661	struct proc *p = td->td_proc;
662	struct filedesc *fdp = p->p_fd;
663	struct file *fp;
664	struct vnode *vp;
665	struct vattr vat;
666	struct mount *mp;
667	int cmode;
668	struct file *nfp;
669	int type, indx, error;
670	struct flock lf;
671	struct nameidata nd;
672
673	if ((flags & O_ACCMODE) == O_ACCMODE)
674		return (EINVAL);
675	flags = FFLAGS(flags);
676	error = falloc(td, &nfp, &indx);
677	if (error)
678		return (error);
679	fp = nfp;
680	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
681	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
682	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
683	/*
684	 * Bump the ref count to prevent another process from closing
685	 * the descriptor while we are blocked in vn_open()
686	 */
687	fhold(fp);
688	error = vn_open(&nd, &flags, cmode);
689	if (error) {
690		/*
691		 * release our own reference
692		 */
693		fdrop(fp, td);
694
695		/*
696		 * handle special fdopen() case.  bleh.  dupfdopen() is
697		 * responsible for dropping the old contents of ofiles[indx]
698		 * if it succeeds.
699		 */
700		if ((error == ENODEV || error == ENXIO) &&
701		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
702		    (error =
703			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
704			td->td_retval[0] = indx;
705			return (0);
706		}
707		/*
708		 * Clean up the descriptor, but only if another thread hadn't
709		 * replaced or closed it.
710		 */
711		FILEDESC_LOCK(fdp);
712		if (fdp->fd_ofiles[indx] == fp) {
713			fdp->fd_ofiles[indx] = NULL;
714			FILEDESC_UNLOCK(fdp);
715			fdrop(fp, td);
716		} else
717			FILEDESC_UNLOCK(fdp);
718
719		if (error == ERESTART)
720			error = EINTR;
721		return (error);
722	}
723	td->td_dupfd = 0;
724	NDFREE(&nd, NDF_ONLY_PNBUF);
725	vp = nd.ni_vp;
726
727	/*
728	 * There should be 2 references on the file, one from the descriptor
729	 * table, and one for us.
730	 *
731	 * Handle the case where someone closed the file (via its file
732	 * descriptor) while we were blocked.  The end result should look
733	 * like opening the file succeeded but it was immediately closed.
734	 */
735	FILEDESC_LOCK(fdp);
736	FILE_LOCK(fp);
737	if (fp->f_count == 1) {
738		KASSERT(fdp->fd_ofiles[indx] != fp,
739		    ("Open file descriptor lost all refs"));
740		FILEDESC_UNLOCK(fdp);
741		FILE_UNLOCK(fp);
742		VOP_UNLOCK(vp, 0, td);
743		vn_close(vp, flags & FMASK, fp->f_cred, td);
744		fdrop(fp, td);
745		td->td_retval[0] = indx;
746		return 0;
747	}
748	fp->f_vnode = vp;
749	fp->f_data = vp;
750	fp->f_flag = flags & FMASK;
751	fp->f_ops = &vnops;
752	fp->f_seqcount = 1;
753	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
754	FILEDESC_UNLOCK(fdp);
755	FILE_UNLOCK(fp);
756
757	/* assert that vn_open created a backing object if one is needed */
758	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
759		("open: vmio vnode has no backing object after vn_open"));
760
761	VOP_UNLOCK(vp, 0, td);
762	if (flags & (O_EXLOCK | O_SHLOCK)) {
763		lf.l_whence = SEEK_SET;
764		lf.l_start = 0;
765		lf.l_len = 0;
766		if (flags & O_EXLOCK)
767			lf.l_type = F_WRLCK;
768		else
769			lf.l_type = F_RDLCK;
770		type = F_FLOCK;
771		if ((flags & FNONBLOCK) == 0)
772			type |= F_WAIT;
773		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
774			    type)) != 0)
775			goto bad;
776		fp->f_flag |= FHASLOCK;
777	}
778	if (flags & O_TRUNC) {
779		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
780			goto bad;
781		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
782		VATTR_NULL(&vat);
783		vat.va_size = 0;
784		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
785#ifdef MAC
786		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
787		if (error == 0)
788#endif
789			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
790		VOP_UNLOCK(vp, 0, td);
791		vn_finished_write(mp);
792		if (error)
793			goto bad;
794	}
795	/*
796	 * Release our private reference, leaving the one associated with
797	 * the descriptor table intact.
798	 */
799	fdrop(fp, td);
800	td->td_retval[0] = indx;
801	return (0);
802bad:
803	FILEDESC_LOCK(fdp);
804	if (fdp->fd_ofiles[indx] == fp) {
805		fdp->fd_ofiles[indx] = NULL;
806		FILEDESC_UNLOCK(fdp);
807		fdrop(fp, td);
808	} else
809		FILEDESC_UNLOCK(fdp);
810	fdrop(fp, td);
811	return (error);
812}
813
814#ifdef COMPAT_43
815/*
816 * Create a file.
817 */
818#ifndef _SYS_SYSPROTO_H_
819struct ocreat_args {
820	char	*path;
821	int	mode;
822};
823#endif
824int
825ocreat(td, uap)
826	struct thread *td;
827	register struct ocreat_args /* {
828		char *path;
829		int mode;
830	} */ *uap;
831{
832	struct open_args /* {
833		char *path;
834		int flags;
835		int mode;
836	} */ nuap;
837
838	nuap.path = uap->path;
839	nuap.mode = uap->mode;
840	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
841	return (open(td, &nuap));
842}
843#endif /* COMPAT_43 */
844
845/*
846 * Create a special file.
847 */
848#ifndef _SYS_SYSPROTO_H_
849struct mknod_args {
850	char	*path;
851	int	mode;
852	int	dev;
853};
854#endif
855/* ARGSUSED */
856int
857mknod(td, uap)
858	struct thread *td;
859	register struct mknod_args /* {
860		char *path;
861		int mode;
862		int dev;
863	} */ *uap;
864{
865
866	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
867}
868
869int
870kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
871    int dev)
872{
873	struct vnode *vp;
874	struct mount *mp;
875	struct vattr vattr;
876	int error;
877	int whiteout = 0;
878	struct nameidata nd;
879
880	switch (mode & S_IFMT) {
881	case S_IFCHR:
882	case S_IFBLK:
883		error = suser(td);
884		break;
885	default:
886		error = suser_cred(td->td_ucred, PRISON_ROOT);
887		break;
888	}
889	if (error)
890		return (error);
891restart:
892	bwillwrite();
893	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
894	if ((error = namei(&nd)) != 0)
895		return (error);
896	vp = nd.ni_vp;
897	if (vp != NULL) {
898		vrele(vp);
899		error = EEXIST;
900	} else {
901		VATTR_NULL(&vattr);
902		FILEDESC_LOCK(td->td_proc->p_fd);
903		vattr.va_mode = (mode & ALLPERMS) &
904		    ~td->td_proc->p_fd->fd_cmask;
905		FILEDESC_UNLOCK(td->td_proc->p_fd);
906		vattr.va_rdev = dev;
907		whiteout = 0;
908
909		switch (mode & S_IFMT) {
910		case S_IFMT:	/* used by badsect to flag bad sectors */
911			vattr.va_type = VBAD;
912			break;
913		case S_IFCHR:
914			vattr.va_type = VCHR;
915			break;
916		case S_IFBLK:
917			vattr.va_type = VBLK;
918			break;
919		case S_IFWHT:
920			whiteout = 1;
921			break;
922		default:
923			error = EINVAL;
924			break;
925		}
926	}
927	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
928		NDFREE(&nd, NDF_ONLY_PNBUF);
929		vput(nd.ni_dvp);
930		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
931			return (error);
932		goto restart;
933	}
934#ifdef MAC
935	if (error == 0 && !whiteout)
936		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
937		    &nd.ni_cnd, &vattr);
938#endif
939	if (!error) {
940		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
941		if (whiteout)
942			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
943		else {
944			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
945						&nd.ni_cnd, &vattr);
946			if (error == 0)
947				vput(nd.ni_vp);
948		}
949	}
950	NDFREE(&nd, NDF_ONLY_PNBUF);
951	vput(nd.ni_dvp);
952	vn_finished_write(mp);
953	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
954	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
955	return (error);
956}
957
958/*
959 * Create a named pipe.
960 */
961#ifndef _SYS_SYSPROTO_H_
962struct mkfifo_args {
963	char	*path;
964	int	mode;
965};
966#endif
967/* ARGSUSED */
968int
969mkfifo(td, uap)
970	struct thread *td;
971	register struct mkfifo_args /* {
972		char *path;
973		int mode;
974	} */ *uap;
975{
976
977	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
978}
979
980int
981kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
982{
983	struct mount *mp;
984	struct vattr vattr;
985	int error;
986	struct nameidata nd;
987
988restart:
989	bwillwrite();
990	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
991	if ((error = namei(&nd)) != 0)
992		return (error);
993	if (nd.ni_vp != NULL) {
994		NDFREE(&nd, NDF_ONLY_PNBUF);
995		vrele(nd.ni_vp);
996		vput(nd.ni_dvp);
997		return (EEXIST);
998	}
999	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1000		NDFREE(&nd, NDF_ONLY_PNBUF);
1001		vput(nd.ni_dvp);
1002		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1003			return (error);
1004		goto restart;
1005	}
1006	VATTR_NULL(&vattr);
1007	vattr.va_type = VFIFO;
1008	FILEDESC_LOCK(td->td_proc->p_fd);
1009	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1010	FILEDESC_UNLOCK(td->td_proc->p_fd);
1011#ifdef MAC
1012	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1013	    &vattr);
1014	if (error)
1015		goto out;
1016#endif
1017	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1018	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1019	if (error == 0)
1020		vput(nd.ni_vp);
1021#ifdef MAC
1022out:
1023#endif
1024	NDFREE(&nd, NDF_ONLY_PNBUF);
1025	vput(nd.ni_dvp);
1026	vn_finished_write(mp);
1027	return (error);
1028}
1029
1030/*
1031 * Make a hard file link.
1032 */
1033#ifndef _SYS_SYSPROTO_H_
1034struct link_args {
1035	char	*path;
1036	char	*link;
1037};
1038#endif
1039/* ARGSUSED */
1040int
1041link(td, uap)
1042	struct thread *td;
1043	register struct link_args /* {
1044		char *path;
1045		char *link;
1046	} */ *uap;
1047{
1048
1049	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1050}
1051
1052int
1053kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1054{
1055	struct vnode *vp;
1056	struct mount *mp;
1057	struct nameidata nd;
1058	int error;
1059
1060	bwillwrite();
1061	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1062	if ((error = namei(&nd)) != 0)
1063		return (error);
1064	NDFREE(&nd, NDF_ONLY_PNBUF);
1065	vp = nd.ni_vp;
1066	if (vp->v_type == VDIR) {
1067		vrele(vp);
1068		return (EPERM);		/* POSIX */
1069	}
1070	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1071		vrele(vp);
1072		return (error);
1073	}
1074	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1075	if ((error = namei(&nd)) == 0) {
1076		if (nd.ni_vp != NULL) {
1077			vrele(nd.ni_vp);
1078			error = EEXIST;
1079		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1080		    == 0) {
1081			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1082			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1083#ifdef MAC
1084			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1085			    vp, &nd.ni_cnd);
1086			if (error == 0)
1087#endif
1088				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1089			VOP_UNLOCK(vp, 0, td);
1090		}
1091		NDFREE(&nd, NDF_ONLY_PNBUF);
1092		vput(nd.ni_dvp);
1093	}
1094	vrele(vp);
1095	vn_finished_write(mp);
1096	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1097	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1098	return (error);
1099}
1100
1101/*
1102 * Make a symbolic link.
1103 */
1104#ifndef _SYS_SYSPROTO_H_
1105struct symlink_args {
1106	char	*path;
1107	char	*link;
1108};
1109#endif
1110/* ARGSUSED */
1111int
1112symlink(td, uap)
1113	struct thread *td;
1114	register struct symlink_args /* {
1115		char *path;
1116		char *link;
1117	} */ *uap;
1118{
1119
1120	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1121}
1122
1123int
1124kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1125{
1126	struct mount *mp;
1127	struct vattr vattr;
1128	char *syspath;
1129	int error;
1130	struct nameidata nd;
1131
1132	if (segflg == UIO_SYSSPACE) {
1133		syspath = path;
1134	} else {
1135		syspath = uma_zalloc(namei_zone, M_WAITOK);
1136		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1137			goto out;
1138	}
1139restart:
1140	bwillwrite();
1141	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1142	if ((error = namei(&nd)) != 0)
1143		goto out;
1144	if (nd.ni_vp) {
1145		NDFREE(&nd, NDF_ONLY_PNBUF);
1146		vrele(nd.ni_vp);
1147		vput(nd.ni_dvp);
1148		error = EEXIST;
1149		goto out;
1150	}
1151	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1152		NDFREE(&nd, NDF_ONLY_PNBUF);
1153		vput(nd.ni_dvp);
1154		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1155			return (error);
1156		goto restart;
1157	}
1158	VATTR_NULL(&vattr);
1159	FILEDESC_LOCK(td->td_proc->p_fd);
1160	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1161	FILEDESC_UNLOCK(td->td_proc->p_fd);
1162#ifdef MAC
1163	vattr.va_type = VLNK;
1164	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1165	    &vattr);
1166	if (error)
1167		goto out2;
1168#endif
1169	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1170	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1171	if (error == 0)
1172		vput(nd.ni_vp);
1173#ifdef MAC
1174out2:
1175#endif
1176	NDFREE(&nd, NDF_ONLY_PNBUF);
1177	vput(nd.ni_dvp);
1178	vn_finished_write(mp);
1179	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1180	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1181out:
1182	if (segflg != UIO_SYSSPACE)
1183		uma_zfree(namei_zone, syspath);
1184	return (error);
1185}
1186
1187/*
1188 * Delete a whiteout from the filesystem.
1189 */
1190/* ARGSUSED */
1191int
1192undelete(td, uap)
1193	struct thread *td;
1194	register struct undelete_args /* {
1195		char *path;
1196	} */ *uap;
1197{
1198	int error;
1199	struct mount *mp;
1200	struct nameidata nd;
1201
1202restart:
1203	bwillwrite();
1204	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1205	    uap->path, td);
1206	error = namei(&nd);
1207	if (error)
1208		return (error);
1209
1210	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1211		NDFREE(&nd, NDF_ONLY_PNBUF);
1212		if (nd.ni_vp)
1213			vrele(nd.ni_vp);
1214		vput(nd.ni_dvp);
1215		return (EEXIST);
1216	}
1217	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1218		NDFREE(&nd, NDF_ONLY_PNBUF);
1219		vput(nd.ni_dvp);
1220		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1221			return (error);
1222		goto restart;
1223	}
1224	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1225	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1226	NDFREE(&nd, NDF_ONLY_PNBUF);
1227	vput(nd.ni_dvp);
1228	vn_finished_write(mp);
1229	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1230	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1231	return (error);
1232}
1233
1234/*
1235 * Delete a name from the filesystem.
1236 */
1237#ifndef _SYS_SYSPROTO_H_
1238struct unlink_args {
1239	char	*path;
1240};
1241#endif
1242/* ARGSUSED */
1243int
1244unlink(td, uap)
1245	struct thread *td;
1246	struct unlink_args /* {
1247		char *path;
1248	} */ *uap;
1249{
1250
1251	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1252}
1253
1254int
1255kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1256{
1257	struct mount *mp;
1258	struct vnode *vp;
1259	int error;
1260	struct nameidata nd;
1261
1262restart:
1263	bwillwrite();
1264	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1265	if ((error = namei(&nd)) != 0)
1266		return (error);
1267	vp = nd.ni_vp;
1268	if (vp->v_type == VDIR)
1269		error = EPERM;		/* POSIX */
1270	else {
1271		/*
1272		 * The root of a mounted filesystem cannot be deleted.
1273		 *
1274		 * XXX: can this only be a VDIR case?
1275		 */
1276		if (vp->v_vflag & VV_ROOT)
1277			error = EBUSY;
1278	}
1279	if (error == 0) {
1280		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1281			NDFREE(&nd, NDF_ONLY_PNBUF);
1282			if (vp == nd.ni_dvp)
1283				vrele(vp);
1284			else
1285				vput(vp);
1286			vput(nd.ni_dvp);
1287			if ((error = vn_start_write(NULL, &mp,
1288			    V_XSLEEP | PCATCH)) != 0)
1289				return (error);
1290			goto restart;
1291		}
1292#ifdef MAC
1293		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1294		    &nd.ni_cnd);
1295		if (error)
1296			goto out;
1297#endif
1298		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1299		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1300#ifdef MAC
1301out:
1302#endif
1303		vn_finished_write(mp);
1304	}
1305	NDFREE(&nd, NDF_ONLY_PNBUF);
1306	if (vp == nd.ni_dvp)
1307		vrele(vp);
1308	else
1309		vput(vp);
1310	vput(nd.ni_dvp);
1311	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1312	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1313	return (error);
1314}
1315
1316/*
1317 * Reposition read/write file offset.
1318 */
1319#ifndef _SYS_SYSPROTO_H_
1320struct lseek_args {
1321	int	fd;
1322	int	pad;
1323	off_t	offset;
1324	int	whence;
1325};
1326#endif
1327int
1328lseek(td, uap)
1329	struct thread *td;
1330	register struct lseek_args /* {
1331		int fd;
1332		int pad;
1333		off_t offset;
1334		int whence;
1335	} */ *uap;
1336{
1337	struct ucred *cred = td->td_ucred;
1338	struct file *fp;
1339	struct vnode *vp;
1340	struct vattr vattr;
1341	off_t offset;
1342	int error, noneg;
1343
1344	if ((error = fget(td, uap->fd, &fp)) != 0)
1345		return (error);
1346	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1347		fdrop(fp, td);
1348		return (ESPIPE);
1349	}
1350	vp = fp->f_vnode;
1351	noneg = (vp->v_type != VCHR);
1352	offset = uap->offset;
1353	switch (uap->whence) {
1354	case L_INCR:
1355		if (noneg &&
1356		    (fp->f_offset < 0 ||
1357		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1358			error = EOVERFLOW;
1359			break;
1360		}
1361		offset += fp->f_offset;
1362		break;
1363	case L_XTND:
1364		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1365		error = VOP_GETATTR(vp, &vattr, cred, td);
1366		VOP_UNLOCK(vp, 0, td);
1367		if (error)
1368			break;
1369		if (noneg &&
1370		    (vattr.va_size > OFF_MAX ||
1371		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1372			error = EOVERFLOW;
1373			break;
1374		}
1375		offset += vattr.va_size;
1376		break;
1377	case L_SET:
1378		break;
1379	default:
1380		error = EINVAL;
1381	}
1382	if (error == 0 && noneg && offset < 0)
1383		error = EINVAL;
1384	if (error != 0) {
1385		fdrop(fp, td);
1386		return (error);
1387	}
1388	fp->f_offset = offset;
1389	*(off_t *)(td->td_retval) = fp->f_offset;
1390	fdrop(fp, td);
1391	return (0);
1392}
1393
1394#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1395/*
1396 * Reposition read/write file offset.
1397 */
1398#ifndef _SYS_SYSPROTO_H_
1399struct olseek_args {
1400	int	fd;
1401	long	offset;
1402	int	whence;
1403};
1404#endif
1405int
1406olseek(td, uap)
1407	struct thread *td;
1408	register struct olseek_args /* {
1409		int fd;
1410		long offset;
1411		int whence;
1412	} */ *uap;
1413{
1414	struct lseek_args /* {
1415		int fd;
1416		int pad;
1417		off_t offset;
1418		int whence;
1419	} */ nuap;
1420	int error;
1421
1422	nuap.fd = uap->fd;
1423	nuap.offset = uap->offset;
1424	nuap.whence = uap->whence;
1425	error = lseek(td, &nuap);
1426	return (error);
1427}
1428#endif /* COMPAT_43 */
1429
1430/*
1431 * Check access permissions using passed credentials.
1432 */
1433static int
1434vn_access(vp, user_flags, cred, td)
1435	struct vnode	*vp;
1436	int		user_flags;
1437	struct ucred	*cred;
1438	struct thread	*td;
1439{
1440	int error, flags;
1441
1442	/* Flags == 0 means only check for existence. */
1443	error = 0;
1444	if (user_flags) {
1445		flags = 0;
1446		if (user_flags & R_OK)
1447			flags |= VREAD;
1448		if (user_flags & W_OK)
1449			flags |= VWRITE;
1450		if (user_flags & X_OK)
1451			flags |= VEXEC;
1452#ifdef MAC
1453		error = mac_check_vnode_access(cred, vp, flags);
1454		if (error)
1455			return (error);
1456#endif
1457		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1458			error = VOP_ACCESS(vp, flags, cred, td);
1459	}
1460	return (error);
1461}
1462
1463/*
1464 * Check access permissions using "real" credentials.
1465 */
1466#ifndef _SYS_SYSPROTO_H_
1467struct access_args {
1468	char	*path;
1469	int	flags;
1470};
1471#endif
1472int
1473access(td, uap)
1474	struct thread *td;
1475	register struct access_args /* {
1476		char *path;
1477		int flags;
1478	} */ *uap;
1479{
1480
1481	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1482}
1483
1484int
1485kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1486{
1487	struct ucred *cred, *tmpcred;
1488	register struct vnode *vp;
1489	int error;
1490	struct nameidata nd;
1491
1492	/*
1493	 * Create and modify a temporary credential instead of one that
1494	 * is potentially shared.  This could also mess up socket
1495	 * buffer accounting which can run in an interrupt context.
1496	 *
1497	 * XXX - Depending on how "threads" are finally implemented, it
1498	 * may be better to explicitly pass the credential to namei()
1499	 * rather than to modify the potentially shared process structure.
1500	 */
1501	cred = td->td_ucred;
1502	tmpcred = crdup(cred);
1503	tmpcred->cr_uid = cred->cr_ruid;
1504	tmpcred->cr_groups[0] = cred->cr_rgid;
1505	td->td_ucred = tmpcred;
1506	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1507	if ((error = namei(&nd)) != 0)
1508		goto out1;
1509	vp = nd.ni_vp;
1510
1511	error = vn_access(vp, flags, tmpcred, td);
1512	NDFREE(&nd, NDF_ONLY_PNBUF);
1513	vput(vp);
1514out1:
1515	td->td_ucred = cred;
1516	crfree(tmpcred);
1517	return (error);
1518}
1519
1520/*
1521 * Check access permissions using "effective" credentials.
1522 */
1523#ifndef _SYS_SYSPROTO_H_
1524struct eaccess_args {
1525	char	*path;
1526	int	flags;
1527};
1528#endif
1529int
1530eaccess(td, uap)
1531	struct thread *td;
1532	register struct eaccess_args /* {
1533		char *path;
1534		int flags;
1535	} */ *uap;
1536{
1537	struct nameidata nd;
1538	struct vnode *vp;
1539	int error;
1540
1541	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1542	    uap->path, td);
1543	if ((error = namei(&nd)) != 0)
1544		return (error);
1545	vp = nd.ni_vp;
1546
1547	error = vn_access(vp, uap->flags, td->td_ucred, td);
1548	NDFREE(&nd, NDF_ONLY_PNBUF);
1549	vput(vp);
1550	return (error);
1551}
1552
1553#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1554/*
1555 * Get file status; this version follows links.
1556 */
1557#ifndef _SYS_SYSPROTO_H_
1558struct ostat_args {
1559	char	*path;
1560	struct ostat *ub;
1561};
1562#endif
1563/* ARGSUSED */
1564int
1565ostat(td, uap)
1566	struct thread *td;
1567	register struct ostat_args /* {
1568		char *path;
1569		struct ostat *ub;
1570	} */ *uap;
1571{
1572	struct stat sb;
1573	struct ostat osb;
1574	int error;
1575	struct nameidata nd;
1576
1577	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1578	    uap->path, td);
1579	if ((error = namei(&nd)) != 0)
1580		return (error);
1581	NDFREE(&nd, NDF_ONLY_PNBUF);
1582	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1583	vput(nd.ni_vp);
1584	if (error)
1585		return (error);
1586	cvtstat(&sb, &osb);
1587	error = copyout(&osb, uap->ub, sizeof (osb));
1588	return (error);
1589}
1590
1591/*
1592 * Get file status; this version does not follow links.
1593 */
1594#ifndef _SYS_SYSPROTO_H_
1595struct olstat_args {
1596	char	*path;
1597	struct ostat *ub;
1598};
1599#endif
1600/* ARGSUSED */
1601int
1602olstat(td, uap)
1603	struct thread *td;
1604	register struct olstat_args /* {
1605		char *path;
1606		struct ostat *ub;
1607	} */ *uap;
1608{
1609	struct vnode *vp;
1610	struct stat sb;
1611	struct ostat osb;
1612	int error;
1613	struct nameidata nd;
1614
1615	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1616	    uap->path, td);
1617	if ((error = namei(&nd)) != 0)
1618		return (error);
1619	vp = nd.ni_vp;
1620	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1621	NDFREE(&nd, NDF_ONLY_PNBUF);
1622	vput(vp);
1623	if (error)
1624		return (error);
1625	cvtstat(&sb, &osb);
1626	error = copyout(&osb, uap->ub, sizeof (osb));
1627	return (error);
1628}
1629
1630/*
1631 * Convert from an old to a new stat structure.
1632 */
1633void
1634cvtstat(st, ost)
1635	struct stat *st;
1636	struct ostat *ost;
1637{
1638
1639	ost->st_dev = st->st_dev;
1640	ost->st_ino = st->st_ino;
1641	ost->st_mode = st->st_mode;
1642	ost->st_nlink = st->st_nlink;
1643	ost->st_uid = st->st_uid;
1644	ost->st_gid = st->st_gid;
1645	ost->st_rdev = st->st_rdev;
1646	if (st->st_size < (quad_t)1 << 32)
1647		ost->st_size = st->st_size;
1648	else
1649		ost->st_size = -2;
1650	ost->st_atime = st->st_atime;
1651	ost->st_mtime = st->st_mtime;
1652	ost->st_ctime = st->st_ctime;
1653	ost->st_blksize = st->st_blksize;
1654	ost->st_blocks = st->st_blocks;
1655	ost->st_flags = st->st_flags;
1656	ost->st_gen = st->st_gen;
1657}
1658#endif /* COMPAT_43 || COMPAT_SUNOS */
1659
1660/*
1661 * Get file status; this version follows links.
1662 */
1663#ifndef _SYS_SYSPROTO_H_
1664struct stat_args {
1665	char	*path;
1666	struct stat *ub;
1667};
1668#endif
1669/* ARGSUSED */
1670int
1671stat(td, uap)
1672	struct thread *td;
1673	register struct stat_args /* {
1674		char *path;
1675		struct stat *ub;
1676	} */ *uap;
1677{
1678	struct stat sb;
1679	int error;
1680	struct nameidata nd;
1681
1682#ifdef LOOKUP_SHARED
1683	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1684	    UIO_USERSPACE, uap->path, td);
1685#else
1686	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1687	    uap->path, td);
1688#endif
1689	if ((error = namei(&nd)) != 0)
1690		return (error);
1691	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1692	NDFREE(&nd, NDF_ONLY_PNBUF);
1693	vput(nd.ni_vp);
1694	if (error)
1695		return (error);
1696	error = copyout(&sb, uap->ub, sizeof (sb));
1697	return (error);
1698}
1699
1700/*
1701 * Get file status; this version does not follow links.
1702 */
1703#ifndef _SYS_SYSPROTO_H_
1704struct lstat_args {
1705	char	*path;
1706	struct stat *ub;
1707};
1708#endif
1709/* ARGSUSED */
1710int
1711lstat(td, uap)
1712	struct thread *td;
1713	register struct lstat_args /* {
1714		char *path;
1715		struct stat *ub;
1716	} */ *uap;
1717{
1718	int error;
1719	struct vnode *vp;
1720	struct stat sb;
1721	struct nameidata nd;
1722
1723	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1724	    uap->path, td);
1725	if ((error = namei(&nd)) != 0)
1726		return (error);
1727	vp = nd.ni_vp;
1728	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1729	NDFREE(&nd, NDF_ONLY_PNBUF);
1730	vput(vp);
1731	if (error)
1732		return (error);
1733	error = copyout(&sb, uap->ub, sizeof (sb));
1734	return (error);
1735}
1736
1737/*
1738 * Implementation of the NetBSD stat() function.
1739 * XXX This should probably be collapsed with the FreeBSD version,
1740 * as the differences are only due to vn_stat() clearing spares at
1741 * the end of the structures.  vn_stat could be split to avoid this,
1742 * and thus collapse the following to close to zero code.
1743 */
1744void
1745cvtnstat(sb, nsb)
1746	struct stat *sb;
1747	struct nstat *nsb;
1748{
1749	bzero(nsb, sizeof *nsb);
1750	nsb->st_dev = sb->st_dev;
1751	nsb->st_ino = sb->st_ino;
1752	nsb->st_mode = sb->st_mode;
1753	nsb->st_nlink = sb->st_nlink;
1754	nsb->st_uid = sb->st_uid;
1755	nsb->st_gid = sb->st_gid;
1756	nsb->st_rdev = sb->st_rdev;
1757	nsb->st_atimespec = sb->st_atimespec;
1758	nsb->st_mtimespec = sb->st_mtimespec;
1759	nsb->st_ctimespec = sb->st_ctimespec;
1760	nsb->st_size = sb->st_size;
1761	nsb->st_blocks = sb->st_blocks;
1762	nsb->st_blksize = sb->st_blksize;
1763	nsb->st_flags = sb->st_flags;
1764	nsb->st_gen = sb->st_gen;
1765	nsb->st_birthtimespec = sb->st_birthtimespec;
1766}
1767
1768#ifndef _SYS_SYSPROTO_H_
1769struct nstat_args {
1770	char	*path;
1771	struct nstat *ub;
1772};
1773#endif
1774/* ARGSUSED */
1775int
1776nstat(td, uap)
1777	struct thread *td;
1778	register struct nstat_args /* {
1779		char *path;
1780		struct nstat *ub;
1781	} */ *uap;
1782{
1783	struct stat sb;
1784	struct nstat nsb;
1785	int error;
1786	struct nameidata nd;
1787
1788	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1789	    uap->path, td);
1790	if ((error = namei(&nd)) != 0)
1791		return (error);
1792	NDFREE(&nd, NDF_ONLY_PNBUF);
1793	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1794	vput(nd.ni_vp);
1795	if (error)
1796		return (error);
1797	cvtnstat(&sb, &nsb);
1798	error = copyout(&nsb, uap->ub, sizeof (nsb));
1799	return (error);
1800}
1801
1802/*
1803 * NetBSD lstat.  Get file status; this version does not follow links.
1804 */
1805#ifndef _SYS_SYSPROTO_H_
1806struct lstat_args {
1807	char	*path;
1808	struct stat *ub;
1809};
1810#endif
1811/* ARGSUSED */
1812int
1813nlstat(td, uap)
1814	struct thread *td;
1815	register struct nlstat_args /* {
1816		char *path;
1817		struct nstat *ub;
1818	} */ *uap;
1819{
1820	int error;
1821	struct vnode *vp;
1822	struct stat sb;
1823	struct nstat nsb;
1824	struct nameidata nd;
1825
1826	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1827	    uap->path, td);
1828	if ((error = namei(&nd)) != 0)
1829		return (error);
1830	vp = nd.ni_vp;
1831	NDFREE(&nd, NDF_ONLY_PNBUF);
1832	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1833	vput(vp);
1834	if (error)
1835		return (error);
1836	cvtnstat(&sb, &nsb);
1837	error = copyout(&nsb, uap->ub, sizeof (nsb));
1838	return (error);
1839}
1840
1841/*
1842 * Get configurable pathname variables.
1843 */
1844#ifndef _SYS_SYSPROTO_H_
1845struct pathconf_args {
1846	char	*path;
1847	int	name;
1848};
1849#endif
1850/* ARGSUSED */
1851int
1852pathconf(td, uap)
1853	struct thread *td;
1854	register struct pathconf_args /* {
1855		char *path;
1856		int name;
1857	} */ *uap;
1858{
1859	int error;
1860	struct nameidata nd;
1861
1862	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1863	    uap->path, td);
1864	if ((error = namei(&nd)) != 0)
1865		return (error);
1866	NDFREE(&nd, NDF_ONLY_PNBUF);
1867
1868	/* If asynchronous I/O is available, it works for all files. */
1869	if (uap->name == _PC_ASYNC_IO)
1870		td->td_retval[0] = async_io_version;
1871	else
1872		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1873	vput(nd.ni_vp);
1874	return (error);
1875}
1876
1877/*
1878 * Return target name of a symbolic link.
1879 */
1880#ifndef _SYS_SYSPROTO_H_
1881struct readlink_args {
1882	char	*path;
1883	char	*buf;
1884	int	count;
1885};
1886#endif
1887/* ARGSUSED */
1888int
1889readlink(td, uap)
1890	struct thread *td;
1891	register struct readlink_args /* {
1892		char *path;
1893		char *buf;
1894		int count;
1895	} */ *uap;
1896{
1897
1898	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1899	    UIO_USERSPACE, uap->count));
1900}
1901
1902int
1903kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1904    enum uio_seg bufseg, int count)
1905{
1906	register struct vnode *vp;
1907	struct iovec aiov;
1908	struct uio auio;
1909	int error;
1910	struct nameidata nd;
1911
1912	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1913	if ((error = namei(&nd)) != 0)
1914		return (error);
1915	NDFREE(&nd, NDF_ONLY_PNBUF);
1916	vp = nd.ni_vp;
1917#ifdef MAC
1918	error = mac_check_vnode_readlink(td->td_ucred, vp);
1919	if (error) {
1920		vput(vp);
1921		return (error);
1922	}
1923#endif
1924	if (vp->v_type != VLNK)
1925		error = EINVAL;
1926	else {
1927		aiov.iov_base = buf;
1928		aiov.iov_len = count;
1929		auio.uio_iov = &aiov;
1930		auio.uio_iovcnt = 1;
1931		auio.uio_offset = 0;
1932		auio.uio_rw = UIO_READ;
1933		auio.uio_segflg = bufseg;
1934		auio.uio_td = td;
1935		auio.uio_resid = count;
1936		error = VOP_READLINK(vp, &auio, td->td_ucred);
1937	}
1938	vput(vp);
1939	td->td_retval[0] = count - auio.uio_resid;
1940	return (error);
1941}
1942
1943/*
1944 * Common implementation code for chflags() and fchflags().
1945 */
1946static int
1947setfflags(td, vp, flags)
1948	struct thread *td;
1949	struct vnode *vp;
1950	int flags;
1951{
1952	int error;
1953	struct mount *mp;
1954	struct vattr vattr;
1955
1956	/*
1957	 * Prevent non-root users from setting flags on devices.  When
1958	 * a device is reused, users can retain ownership of the device
1959	 * if they are allowed to set flags and programs assume that
1960	 * chown can't fail when done as root.
1961	 */
1962	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1963		error = suser_cred(td->td_ucred, PRISON_ROOT);
1964		if (error)
1965			return (error);
1966	}
1967
1968	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1969		return (error);
1970	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1971	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1972	VATTR_NULL(&vattr);
1973	vattr.va_flags = flags;
1974#ifdef MAC
1975	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1976	if (error == 0)
1977#endif
1978		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1979	VOP_UNLOCK(vp, 0, td);
1980	vn_finished_write(mp);
1981	return (error);
1982}
1983
1984/*
1985 * Change flags of a file given a path name.
1986 */
1987#ifndef _SYS_SYSPROTO_H_
1988struct chflags_args {
1989	char	*path;
1990	int	flags;
1991};
1992#endif
1993/* ARGSUSED */
1994int
1995chflags(td, uap)
1996	struct thread *td;
1997	register struct chflags_args /* {
1998		char *path;
1999		int flags;
2000	} */ *uap;
2001{
2002	int error;
2003	struct nameidata nd;
2004
2005	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
2006	if ((error = namei(&nd)) != 0)
2007		return (error);
2008	NDFREE(&nd, NDF_ONLY_PNBUF);
2009	error = setfflags(td, nd.ni_vp, uap->flags);
2010	vrele(nd.ni_vp);
2011	return error;
2012}
2013
2014/*
2015 * Same as chflags() but doesn't follow symlinks.
2016 */
2017int
2018lchflags(td, uap)
2019	struct thread *td;
2020	register struct lchflags_args /* {
2021		char *path;
2022		int flags;
2023	} */ *uap;
2024{
2025	int error;
2026	struct nameidata nd;
2027
2028	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2029	if ((error = namei(&nd)) != 0)
2030		return (error);
2031	NDFREE(&nd, NDF_ONLY_PNBUF);
2032	error = setfflags(td, nd.ni_vp, uap->flags);
2033	vrele(nd.ni_vp);
2034	return error;
2035}
2036
2037/*
2038 * Change flags of a file given a file descriptor.
2039 */
2040#ifndef _SYS_SYSPROTO_H_
2041struct fchflags_args {
2042	int	fd;
2043	int	flags;
2044};
2045#endif
2046/* ARGSUSED */
2047int
2048fchflags(td, uap)
2049	struct thread *td;
2050	register struct fchflags_args /* {
2051		int fd;
2052		int flags;
2053	} */ *uap;
2054{
2055	struct file *fp;
2056	int error;
2057
2058	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2059		return (error);
2060	error = setfflags(td, fp->f_vnode, uap->flags);
2061	fdrop(fp, td);
2062	return (error);
2063}
2064
2065/*
2066 * Common implementation code for chmod(), lchmod() and fchmod().
2067 */
2068static int
2069setfmode(td, vp, mode)
2070	struct thread *td;
2071	struct vnode *vp;
2072	int mode;
2073{
2074	int error;
2075	struct mount *mp;
2076	struct vattr vattr;
2077
2078	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2079		return (error);
2080	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2081	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2082	VATTR_NULL(&vattr);
2083	vattr.va_mode = mode & ALLPERMS;
2084#ifdef MAC
2085	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2086	if (error == 0)
2087#endif
2088		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2089	VOP_UNLOCK(vp, 0, td);
2090	vn_finished_write(mp);
2091	return error;
2092}
2093
2094/*
2095 * Change mode of a file given path name.
2096 */
2097#ifndef _SYS_SYSPROTO_H_
2098struct chmod_args {
2099	char	*path;
2100	int	mode;
2101};
2102#endif
2103/* ARGSUSED */
2104int
2105chmod(td, uap)
2106	struct thread *td;
2107	register struct chmod_args /* {
2108		char *path;
2109		int mode;
2110	} */ *uap;
2111{
2112
2113	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2114}
2115
2116int
2117kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2118{
2119	int error;
2120	struct nameidata nd;
2121
2122	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2123	if ((error = namei(&nd)) != 0)
2124		return (error);
2125	NDFREE(&nd, NDF_ONLY_PNBUF);
2126	error = setfmode(td, nd.ni_vp, mode);
2127	vrele(nd.ni_vp);
2128	return error;
2129}
2130
2131/*
2132 * Change mode of a file given path name (don't follow links.)
2133 */
2134#ifndef _SYS_SYSPROTO_H_
2135struct lchmod_args {
2136	char	*path;
2137	int	mode;
2138};
2139#endif
2140/* ARGSUSED */
2141int
2142lchmod(td, uap)
2143	struct thread *td;
2144	register struct lchmod_args /* {
2145		char *path;
2146		int mode;
2147	} */ *uap;
2148{
2149	int error;
2150	struct nameidata nd;
2151
2152	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2153	if ((error = namei(&nd)) != 0)
2154		return (error);
2155	NDFREE(&nd, NDF_ONLY_PNBUF);
2156	error = setfmode(td, nd.ni_vp, uap->mode);
2157	vrele(nd.ni_vp);
2158	return error;
2159}
2160
2161/*
2162 * Change mode of a file given a file descriptor.
2163 */
2164#ifndef _SYS_SYSPROTO_H_
2165struct fchmod_args {
2166	int	fd;
2167	int	mode;
2168};
2169#endif
2170/* ARGSUSED */
2171int
2172fchmod(td, uap)
2173	struct thread *td;
2174	register struct fchmod_args /* {
2175		int fd;
2176		int mode;
2177	} */ *uap;
2178{
2179	struct file *fp;
2180	int error;
2181
2182	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2183		return (error);
2184	error = setfmode(td, fp->f_vnode, uap->mode);
2185	fdrop(fp, td);
2186	return (error);
2187}
2188
2189/*
2190 * Common implementation for chown(), lchown(), and fchown()
2191 */
2192static int
2193setfown(td, vp, uid, gid)
2194	struct thread *td;
2195	struct vnode *vp;
2196	uid_t uid;
2197	gid_t gid;
2198{
2199	int error;
2200	struct mount *mp;
2201	struct vattr vattr;
2202
2203	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2204		return (error);
2205	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2206	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2207	VATTR_NULL(&vattr);
2208	vattr.va_uid = uid;
2209	vattr.va_gid = gid;
2210#ifdef MAC
2211	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2212	    vattr.va_gid);
2213	if (error == 0)
2214#endif
2215		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2216	VOP_UNLOCK(vp, 0, td);
2217	vn_finished_write(mp);
2218	return error;
2219}
2220
2221/*
2222 * Set ownership given a path name.
2223 */
2224#ifndef _SYS_SYSPROTO_H_
2225struct chown_args {
2226	char	*path;
2227	int	uid;
2228	int	gid;
2229};
2230#endif
2231/* ARGSUSED */
2232int
2233chown(td, uap)
2234	struct thread *td;
2235	register struct chown_args /* {
2236		char *path;
2237		int uid;
2238		int gid;
2239	} */ *uap;
2240{
2241
2242	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2243}
2244
2245int
2246kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2247    int gid)
2248{
2249	int error;
2250	struct nameidata nd;
2251
2252	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2253	if ((error = namei(&nd)) != 0)
2254		return (error);
2255	NDFREE(&nd, NDF_ONLY_PNBUF);
2256	error = setfown(td, nd.ni_vp, uid, gid);
2257	vrele(nd.ni_vp);
2258	return (error);
2259}
2260
2261/*
2262 * Set ownership given a path name, do not cross symlinks.
2263 */
2264#ifndef _SYS_SYSPROTO_H_
2265struct lchown_args {
2266	char	*path;
2267	int	uid;
2268	int	gid;
2269};
2270#endif
2271/* ARGSUSED */
2272int
2273lchown(td, uap)
2274	struct thread *td;
2275	register struct lchown_args /* {
2276		char *path;
2277		int uid;
2278		int gid;
2279	} */ *uap;
2280{
2281
2282	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2283}
2284
2285int
2286kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2287    int gid)
2288{
2289	int error;
2290	struct nameidata nd;
2291
2292	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2293	if ((error = namei(&nd)) != 0)
2294		return (error);
2295	NDFREE(&nd, NDF_ONLY_PNBUF);
2296	error = setfown(td, nd.ni_vp, uid, gid);
2297	vrele(nd.ni_vp);
2298	return (error);
2299}
2300
2301/*
2302 * Set ownership given a file descriptor.
2303 */
2304#ifndef _SYS_SYSPROTO_H_
2305struct fchown_args {
2306	int	fd;
2307	int	uid;
2308	int	gid;
2309};
2310#endif
2311/* ARGSUSED */
2312int
2313fchown(td, uap)
2314	struct thread *td;
2315	register struct fchown_args /* {
2316		int fd;
2317		int uid;
2318		int gid;
2319	} */ *uap;
2320{
2321	struct file *fp;
2322	int error;
2323
2324	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2325		return (error);
2326	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2327	fdrop(fp, td);
2328	return (error);
2329}
2330
2331/*
2332 * Common implementation code for utimes(), lutimes(), and futimes().
2333 */
2334static int
2335getutimes(usrtvp, tvpseg, tsp)
2336	const struct timeval *usrtvp;
2337	enum uio_seg tvpseg;
2338	struct timespec *tsp;
2339{
2340	struct timeval tv[2];
2341	const struct timeval *tvp;
2342	int error;
2343
2344	if (usrtvp == NULL) {
2345		microtime(&tv[0]);
2346		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2347		tsp[1] = tsp[0];
2348	} else {
2349		if (tvpseg == UIO_SYSSPACE) {
2350			tvp = usrtvp;
2351		} else {
2352			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2353				return (error);
2354			tvp = tv;
2355		}
2356
2357		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2358		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2359	}
2360	return 0;
2361}
2362
2363/*
2364 * Common implementation code for utimes(), lutimes(), and futimes().
2365 */
2366static int
2367setutimes(td, vp, ts, numtimes, nullflag)
2368	struct thread *td;
2369	struct vnode *vp;
2370	const struct timespec *ts;
2371	int numtimes;
2372	int nullflag;
2373{
2374	int error, setbirthtime;
2375	struct mount *mp;
2376	struct vattr vattr;
2377
2378	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2379		return (error);
2380	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2381	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2382	setbirthtime = 0;
2383	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2384	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2385		setbirthtime = 1;
2386	VATTR_NULL(&vattr);
2387	vattr.va_atime = ts[0];
2388	vattr.va_mtime = ts[1];
2389	if (setbirthtime)
2390		vattr.va_birthtime = ts[1];
2391	if (numtimes > 2)
2392		vattr.va_birthtime = ts[2];
2393	if (nullflag)
2394		vattr.va_vaflags |= VA_UTIMES_NULL;
2395#ifdef MAC
2396	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2397	    vattr.va_mtime);
2398#endif
2399	if (error == 0)
2400		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2401	VOP_UNLOCK(vp, 0, td);
2402	vn_finished_write(mp);
2403	return error;
2404}
2405
2406/*
2407 * Set the access and modification times of a file.
2408 */
2409#ifndef _SYS_SYSPROTO_H_
2410struct utimes_args {
2411	char	*path;
2412	struct	timeval *tptr;
2413};
2414#endif
2415/* ARGSUSED */
2416int
2417utimes(td, uap)
2418	struct thread *td;
2419	register struct utimes_args /* {
2420		char *path;
2421		struct timeval *tptr;
2422	} */ *uap;
2423{
2424
2425	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2426	    UIO_USERSPACE));
2427}
2428
2429int
2430kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2431    struct timeval *tptr, enum uio_seg tptrseg)
2432{
2433	struct timespec ts[2];
2434	int error;
2435	struct nameidata nd;
2436
2437	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2438		return (error);
2439	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2440	if ((error = namei(&nd)) != 0)
2441		return (error);
2442	NDFREE(&nd, NDF_ONLY_PNBUF);
2443	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2444	vrele(nd.ni_vp);
2445	return (error);
2446}
2447
2448/*
2449 * Set the access and modification times of a file.
2450 */
2451#ifndef _SYS_SYSPROTO_H_
2452struct lutimes_args {
2453	char	*path;
2454	struct	timeval *tptr;
2455};
2456#endif
2457/* ARGSUSED */
2458int
2459lutimes(td, uap)
2460	struct thread *td;
2461	register struct lutimes_args /* {
2462		char *path;
2463		struct timeval *tptr;
2464	} */ *uap;
2465{
2466
2467	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2468	    UIO_USERSPACE));
2469}
2470
2471int
2472kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2473    struct timeval *tptr, enum uio_seg tptrseg)
2474{
2475	struct timespec ts[2];
2476	int error;
2477	struct nameidata nd;
2478
2479	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2480		return (error);
2481	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2482	if ((error = namei(&nd)) != 0)
2483		return (error);
2484	NDFREE(&nd, NDF_ONLY_PNBUF);
2485	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2486	vrele(nd.ni_vp);
2487	return (error);
2488}
2489
2490/*
2491 * Set the access and modification times of a file.
2492 */
2493#ifndef _SYS_SYSPROTO_H_
2494struct futimes_args {
2495	int	fd;
2496	struct	timeval *tptr;
2497};
2498#endif
2499/* ARGSUSED */
2500int
2501futimes(td, uap)
2502	struct thread *td;
2503	register struct futimes_args /* {
2504		int  fd;
2505		struct timeval *tptr;
2506	} */ *uap;
2507{
2508
2509	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2510}
2511
2512int
2513kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2514    enum uio_seg tptrseg)
2515{
2516	struct timespec ts[2];
2517	struct file *fp;
2518	int error;
2519
2520	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2521		return (error);
2522	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2523		return (error);
2524	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2525	fdrop(fp, td);
2526	return (error);
2527}
2528
2529/*
2530 * Truncate a file given its path name.
2531 */
2532#ifndef _SYS_SYSPROTO_H_
2533struct truncate_args {
2534	char	*path;
2535	int	pad;
2536	off_t	length;
2537};
2538#endif
2539/* ARGSUSED */
2540int
2541truncate(td, uap)
2542	struct thread *td;
2543	register struct truncate_args /* {
2544		char *path;
2545		int pad;
2546		off_t length;
2547	} */ *uap;
2548{
2549
2550	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2551}
2552
2553int
2554kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2555{
2556	struct mount *mp;
2557	struct vnode *vp;
2558	struct vattr vattr;
2559	int error;
2560	struct nameidata nd;
2561
2562	if (length < 0)
2563		return(EINVAL);
2564	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2565	if ((error = namei(&nd)) != 0)
2566		return (error);
2567	vp = nd.ni_vp;
2568	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2569		vrele(vp);
2570		return (error);
2571	}
2572	NDFREE(&nd, NDF_ONLY_PNBUF);
2573	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2574	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2575	if (vp->v_type == VDIR)
2576		error = EISDIR;
2577#ifdef MAC
2578	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2579	}
2580#endif
2581	else if ((error = vn_writechk(vp)) == 0 &&
2582	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2583		VATTR_NULL(&vattr);
2584		vattr.va_size = length;
2585		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2586	}
2587	vput(vp);
2588	vn_finished_write(mp);
2589	return (error);
2590}
2591
2592/*
2593 * Truncate a file given a file descriptor.
2594 */
2595#ifndef _SYS_SYSPROTO_H_
2596struct ftruncate_args {
2597	int	fd;
2598	int	pad;
2599	off_t	length;
2600};
2601#endif
2602/* ARGSUSED */
2603int
2604ftruncate(td, uap)
2605	struct thread *td;
2606	register struct ftruncate_args /* {
2607		int fd;
2608		int pad;
2609		off_t length;
2610	} */ *uap;
2611{
2612	struct mount *mp;
2613	struct vattr vattr;
2614	struct vnode *vp;
2615	struct file *fp;
2616	int error;
2617
2618	if (uap->length < 0)
2619		return(EINVAL);
2620	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2621		return (error);
2622	if ((fp->f_flag & FWRITE) == 0) {
2623		fdrop(fp, td);
2624		return (EINVAL);
2625	}
2626	vp = fp->f_vnode;
2627	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2628		fdrop(fp, td);
2629		return (error);
2630	}
2631	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2632	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2633	if (vp->v_type == VDIR)
2634		error = EISDIR;
2635#ifdef MAC
2636	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2637	    vp))) {
2638	}
2639#endif
2640	else if ((error = vn_writechk(vp)) == 0) {
2641		VATTR_NULL(&vattr);
2642		vattr.va_size = uap->length;
2643		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2644	}
2645	VOP_UNLOCK(vp, 0, td);
2646	vn_finished_write(mp);
2647	fdrop(fp, td);
2648	return (error);
2649}
2650
2651#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2652/*
2653 * Truncate a file given its path name.
2654 */
2655#ifndef _SYS_SYSPROTO_H_
2656struct otruncate_args {
2657	char	*path;
2658	long	length;
2659};
2660#endif
2661/* ARGSUSED */
2662int
2663otruncate(td, uap)
2664	struct thread *td;
2665	register struct otruncate_args /* {
2666		char *path;
2667		long length;
2668	} */ *uap;
2669{
2670	struct truncate_args /* {
2671		char *path;
2672		int pad;
2673		off_t length;
2674	} */ nuap;
2675
2676	nuap.path = uap->path;
2677	nuap.length = uap->length;
2678	return (truncate(td, &nuap));
2679}
2680
2681/*
2682 * Truncate a file given a file descriptor.
2683 */
2684#ifndef _SYS_SYSPROTO_H_
2685struct oftruncate_args {
2686	int	fd;
2687	long	length;
2688};
2689#endif
2690/* ARGSUSED */
2691int
2692oftruncate(td, uap)
2693	struct thread *td;
2694	register struct oftruncate_args /* {
2695		int fd;
2696		long length;
2697	} */ *uap;
2698{
2699	struct ftruncate_args /* {
2700		int fd;
2701		int pad;
2702		off_t length;
2703	} */ nuap;
2704
2705	nuap.fd = uap->fd;
2706	nuap.length = uap->length;
2707	return (ftruncate(td, &nuap));
2708}
2709#endif /* COMPAT_43 || COMPAT_SUNOS */
2710
2711/*
2712 * Sync an open file.
2713 */
2714#ifndef _SYS_SYSPROTO_H_
2715struct fsync_args {
2716	int	fd;
2717};
2718#endif
2719/* ARGSUSED */
2720int
2721fsync(td, uap)
2722	struct thread *td;
2723	struct fsync_args /* {
2724		int fd;
2725	} */ *uap;
2726{
2727	struct vnode *vp;
2728	struct mount *mp;
2729	struct file *fp;
2730	vm_object_t obj;
2731	int error;
2732
2733	GIANT_REQUIRED;
2734
2735	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2736		return (error);
2737	vp = fp->f_vnode;
2738	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2739		fdrop(fp, td);
2740		return (error);
2741	}
2742	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2743	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2744		VM_OBJECT_LOCK(obj);
2745		vm_object_page_clean(obj, 0, 0, 0);
2746		VM_OBJECT_UNLOCK(obj);
2747	}
2748	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2749	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2750	    && softdep_fsync_hook != NULL)
2751		error = (*softdep_fsync_hook)(vp);
2752
2753	VOP_UNLOCK(vp, 0, td);
2754	vn_finished_write(mp);
2755	fdrop(fp, td);
2756	return (error);
2757}
2758
2759/*
2760 * Rename files.  Source and destination must either both be directories,
2761 * or both not be directories.  If target is a directory, it must be empty.
2762 */
2763#ifndef _SYS_SYSPROTO_H_
2764struct rename_args {
2765	char	*from;
2766	char	*to;
2767};
2768#endif
2769/* ARGSUSED */
2770int
2771rename(td, uap)
2772	struct thread *td;
2773	register struct rename_args /* {
2774		char *from;
2775		char *to;
2776	} */ *uap;
2777{
2778
2779	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2780}
2781
2782int
2783kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2784{
2785	struct mount *mp = NULL;
2786	struct vnode *tvp, *fvp, *tdvp;
2787	struct nameidata fromnd, tond;
2788	int error;
2789
2790	bwillwrite();
2791#ifdef MAC
2792	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2793	    from, td);
2794#else
2795	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2796#endif
2797	if ((error = namei(&fromnd)) != 0)
2798		return (error);
2799#ifdef MAC
2800	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2801	    fromnd.ni_vp, &fromnd.ni_cnd);
2802	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2803	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2804#endif
2805	fvp = fromnd.ni_vp;
2806	if (error == 0)
2807		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2808	if (error != 0) {
2809		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2810		vrele(fromnd.ni_dvp);
2811		vrele(fvp);
2812		goto out1;
2813	}
2814	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2815	    NOOBJ, pathseg, to, td);
2816	if (fromnd.ni_vp->v_type == VDIR)
2817		tond.ni_cnd.cn_flags |= WILLBEDIR;
2818	if ((error = namei(&tond)) != 0) {
2819		/* Translate error code for rename("dir1", "dir2/."). */
2820		if (error == EISDIR && fvp->v_type == VDIR)
2821			error = EINVAL;
2822		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2823		vrele(fromnd.ni_dvp);
2824		vrele(fvp);
2825		goto out1;
2826	}
2827	tdvp = tond.ni_dvp;
2828	tvp = tond.ni_vp;
2829	if (tvp != NULL) {
2830		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2831			error = ENOTDIR;
2832			goto out;
2833		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2834			error = EISDIR;
2835			goto out;
2836		}
2837	}
2838	if (fvp == tdvp)
2839		error = EINVAL;
2840	/*
2841	 * If the source is the same as the destination (that is, if they
2842	 * are links to the same vnode), then there is nothing to do.
2843	 */
2844	if (fvp == tvp)
2845		error = -1;
2846#ifdef MAC
2847	else
2848		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2849		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2850#endif
2851out:
2852	if (!error) {
2853		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2854		if (fromnd.ni_dvp != tdvp) {
2855			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2856		}
2857		if (tvp) {
2858			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2859		}
2860		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2861				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2862		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2863		NDFREE(&tond, NDF_ONLY_PNBUF);
2864	} else {
2865		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2866		NDFREE(&tond, NDF_ONLY_PNBUF);
2867		if (tdvp == tvp)
2868			vrele(tdvp);
2869		else
2870			vput(tdvp);
2871		if (tvp)
2872			vput(tvp);
2873		vrele(fromnd.ni_dvp);
2874		vrele(fvp);
2875	}
2876	vrele(tond.ni_startdir);
2877	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2878	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2879	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2880	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2881out1:
2882	vn_finished_write(mp);
2883	if (fromnd.ni_startdir)
2884		vrele(fromnd.ni_startdir);
2885	if (error == -1)
2886		return (0);
2887	return (error);
2888}
2889
2890/*
2891 * Make a directory file.
2892 */
2893#ifndef _SYS_SYSPROTO_H_
2894struct mkdir_args {
2895	char	*path;
2896	int	mode;
2897};
2898#endif
2899/* ARGSUSED */
2900int
2901mkdir(td, uap)
2902	struct thread *td;
2903	register struct mkdir_args /* {
2904		char *path;
2905		int mode;
2906	} */ *uap;
2907{
2908
2909	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2910}
2911
2912int
2913kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2914{
2915	struct mount *mp;
2916	struct vnode *vp;
2917	struct vattr vattr;
2918	int error;
2919	struct nameidata nd;
2920
2921restart:
2922	bwillwrite();
2923	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2924	nd.ni_cnd.cn_flags |= WILLBEDIR;
2925	if ((error = namei(&nd)) != 0)
2926		return (error);
2927	vp = nd.ni_vp;
2928	if (vp != NULL) {
2929		NDFREE(&nd, NDF_ONLY_PNBUF);
2930		vrele(vp);
2931		/*
2932		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2933		 * the strange behaviour of leaving the vnode unlocked
2934		 * if the target is the same vnode as the parent.
2935		 */
2936		if (vp == nd.ni_dvp)
2937			vrele(nd.ni_dvp);
2938		else
2939			vput(nd.ni_dvp);
2940		return (EEXIST);
2941	}
2942	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2943		NDFREE(&nd, NDF_ONLY_PNBUF);
2944		vput(nd.ni_dvp);
2945		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2946			return (error);
2947		goto restart;
2948	}
2949	VATTR_NULL(&vattr);
2950	vattr.va_type = VDIR;
2951	FILEDESC_LOCK(td->td_proc->p_fd);
2952	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2953	FILEDESC_UNLOCK(td->td_proc->p_fd);
2954#ifdef MAC
2955	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2956	    &vattr);
2957	if (error)
2958		goto out;
2959#endif
2960	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2961	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2962#ifdef MAC
2963out:
2964#endif
2965	NDFREE(&nd, NDF_ONLY_PNBUF);
2966	vput(nd.ni_dvp);
2967	if (!error)
2968		vput(nd.ni_vp);
2969	vn_finished_write(mp);
2970	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2971	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2972	return (error);
2973}
2974
2975/*
2976 * Remove a directory file.
2977 */
2978#ifndef _SYS_SYSPROTO_H_
2979struct rmdir_args {
2980	char	*path;
2981};
2982#endif
2983/* ARGSUSED */
2984int
2985rmdir(td, uap)
2986	struct thread *td;
2987	struct rmdir_args /* {
2988		char *path;
2989	} */ *uap;
2990{
2991
2992	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
2993}
2994
2995int
2996kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
2997{
2998	struct mount *mp;
2999	struct vnode *vp;
3000	int error;
3001	struct nameidata nd;
3002
3003restart:
3004	bwillwrite();
3005	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
3006	if ((error = namei(&nd)) != 0)
3007		return (error);
3008	vp = nd.ni_vp;
3009	if (vp->v_type != VDIR) {
3010		error = ENOTDIR;
3011		goto out;
3012	}
3013	/*
3014	 * No rmdir "." please.
3015	 */
3016	if (nd.ni_dvp == vp) {
3017		error = EINVAL;
3018		goto out;
3019	}
3020	/*
3021	 * The root of a mounted filesystem cannot be deleted.
3022	 */
3023	if (vp->v_vflag & VV_ROOT) {
3024		error = EBUSY;
3025		goto out;
3026	}
3027#ifdef MAC
3028	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3029	    &nd.ni_cnd);
3030	if (error)
3031		goto out;
3032#endif
3033	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3034		NDFREE(&nd, NDF_ONLY_PNBUF);
3035		if (nd.ni_dvp == vp)
3036			vrele(nd.ni_dvp);
3037		else
3038			vput(nd.ni_dvp);
3039		vput(vp);
3040		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3041			return (error);
3042		goto restart;
3043	}
3044	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3045	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3046	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3047	vn_finished_write(mp);
3048out:
3049	NDFREE(&nd, NDF_ONLY_PNBUF);
3050	if (nd.ni_dvp == vp)
3051		vrele(nd.ni_dvp);
3052	else
3053		vput(nd.ni_dvp);
3054	vput(vp);
3055	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3056	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3057	return (error);
3058}
3059
3060#ifdef COMPAT_43
3061/*
3062 * Read a block of directory entries in a filesystem independent format.
3063 */
3064#ifndef _SYS_SYSPROTO_H_
3065struct ogetdirentries_args {
3066	int	fd;
3067	char	*buf;
3068	u_int	count;
3069	long	*basep;
3070};
3071#endif
3072int
3073ogetdirentries(td, uap)
3074	struct thread *td;
3075	register struct ogetdirentries_args /* {
3076		int fd;
3077		char *buf;
3078		u_int count;
3079		long *basep;
3080	} */ *uap;
3081{
3082	struct vnode *vp;
3083	struct file *fp;
3084	struct uio auio, kuio;
3085	struct iovec aiov, kiov;
3086	struct dirent *dp, *edp;
3087	caddr_t dirbuf;
3088	int error, eofflag, readcnt;
3089	long loff;
3090
3091	/* XXX arbitrary sanity limit on `count'. */
3092	if (uap->count > 64 * 1024)
3093		return (EINVAL);
3094	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3095		return (error);
3096	if ((fp->f_flag & FREAD) == 0) {
3097		fdrop(fp, td);
3098		return (EBADF);
3099	}
3100	vp = fp->f_vnode;
3101unionread:
3102	if (vp->v_type != VDIR) {
3103		fdrop(fp, td);
3104		return (EINVAL);
3105	}
3106	aiov.iov_base = uap->buf;
3107	aiov.iov_len = uap->count;
3108	auio.uio_iov = &aiov;
3109	auio.uio_iovcnt = 1;
3110	auio.uio_rw = UIO_READ;
3111	auio.uio_segflg = UIO_USERSPACE;
3112	auio.uio_td = td;
3113	auio.uio_resid = uap->count;
3114	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3115	loff = auio.uio_offset = fp->f_offset;
3116#ifdef MAC
3117	error = mac_check_vnode_readdir(td->td_ucred, vp);
3118	if (error) {
3119		VOP_UNLOCK(vp, 0, td);
3120		fdrop(fp, td);
3121		return (error);
3122	}
3123#endif
3124#	if (BYTE_ORDER != LITTLE_ENDIAN)
3125		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3126			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3127			    NULL, NULL);
3128			fp->f_offset = auio.uio_offset;
3129		} else
3130#	endif
3131	{
3132		kuio = auio;
3133		kuio.uio_iov = &kiov;
3134		kuio.uio_segflg = UIO_SYSSPACE;
3135		kiov.iov_len = uap->count;
3136		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3137		kiov.iov_base = dirbuf;
3138		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3139			    NULL, NULL);
3140		fp->f_offset = kuio.uio_offset;
3141		if (error == 0) {
3142			readcnt = uap->count - kuio.uio_resid;
3143			edp = (struct dirent *)&dirbuf[readcnt];
3144			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3145#				if (BYTE_ORDER == LITTLE_ENDIAN)
3146					/*
3147					 * The expected low byte of
3148					 * dp->d_namlen is our dp->d_type.
3149					 * The high MBZ byte of dp->d_namlen
3150					 * is our dp->d_namlen.
3151					 */
3152					dp->d_type = dp->d_namlen;
3153					dp->d_namlen = 0;
3154#				else
3155					/*
3156					 * The dp->d_type is the high byte
3157					 * of the expected dp->d_namlen,
3158					 * so must be zero'ed.
3159					 */
3160					dp->d_type = 0;
3161#				endif
3162				if (dp->d_reclen > 0) {
3163					dp = (struct dirent *)
3164					    ((char *)dp + dp->d_reclen);
3165				} else {
3166					error = EIO;
3167					break;
3168				}
3169			}
3170			if (dp >= edp)
3171				error = uiomove(dirbuf, readcnt, &auio);
3172		}
3173		FREE(dirbuf, M_TEMP);
3174	}
3175	VOP_UNLOCK(vp, 0, td);
3176	if (error) {
3177		fdrop(fp, td);
3178		return (error);
3179	}
3180	if (uap->count == auio.uio_resid) {
3181		if (union_dircheckp) {
3182			error = union_dircheckp(td, &vp, fp);
3183			if (error == -1)
3184				goto unionread;
3185			if (error) {
3186				fdrop(fp, td);
3187				return (error);
3188			}
3189		}
3190		/*
3191		 * XXX We could delay dropping the lock above but
3192		 * union_dircheckp complicates things.
3193		 */
3194		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3195		if ((vp->v_vflag & VV_ROOT) &&
3196		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3197			struct vnode *tvp = vp;
3198			vp = vp->v_mount->mnt_vnodecovered;
3199			VREF(vp);
3200			fp->f_vnode = vp;
3201			fp->f_data = vp;
3202			fp->f_offset = 0;
3203			vput(tvp);
3204			goto unionread;
3205		}
3206		VOP_UNLOCK(vp, 0, td);
3207	}
3208	error = copyout(&loff, uap->basep, sizeof(long));
3209	fdrop(fp, td);
3210	td->td_retval[0] = uap->count - auio.uio_resid;
3211	return (error);
3212}
3213#endif /* COMPAT_43 */
3214
3215/*
3216 * Read a block of directory entries in a filesystem independent format.
3217 */
3218#ifndef _SYS_SYSPROTO_H_
3219struct getdirentries_args {
3220	int	fd;
3221	char	*buf;
3222	u_int	count;
3223	long	*basep;
3224};
3225#endif
3226int
3227getdirentries(td, uap)
3228	struct thread *td;
3229	register struct getdirentries_args /* {
3230		int fd;
3231		char *buf;
3232		u_int count;
3233		long *basep;
3234	} */ *uap;
3235{
3236	struct vnode *vp;
3237	struct file *fp;
3238	struct uio auio;
3239	struct iovec aiov;
3240	long loff;
3241	int error, eofflag;
3242
3243	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3244		return (error);
3245	if ((fp->f_flag & FREAD) == 0) {
3246		fdrop(fp, td);
3247		return (EBADF);
3248	}
3249	vp = fp->f_vnode;
3250unionread:
3251	if (vp->v_type != VDIR) {
3252		fdrop(fp, td);
3253		return (EINVAL);
3254	}
3255	aiov.iov_base = uap->buf;
3256	aiov.iov_len = uap->count;
3257	auio.uio_iov = &aiov;
3258	auio.uio_iovcnt = 1;
3259	auio.uio_rw = UIO_READ;
3260	auio.uio_segflg = UIO_USERSPACE;
3261	auio.uio_td = td;
3262	auio.uio_resid = uap->count;
3263	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3264	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3265	loff = auio.uio_offset = fp->f_offset;
3266#ifdef MAC
3267	error = mac_check_vnode_readdir(td->td_ucred, vp);
3268	if (error == 0)
3269#endif
3270		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3271		    NULL);
3272	fp->f_offset = auio.uio_offset;
3273	VOP_UNLOCK(vp, 0, td);
3274	if (error) {
3275		fdrop(fp, td);
3276		return (error);
3277	}
3278	if (uap->count == auio.uio_resid) {
3279		if (union_dircheckp) {
3280			error = union_dircheckp(td, &vp, fp);
3281			if (error == -1)
3282				goto unionread;
3283			if (error) {
3284				fdrop(fp, td);
3285				return (error);
3286			}
3287		}
3288		/*
3289		 * XXX We could delay dropping the lock above but
3290		 * union_dircheckp complicates things.
3291		 */
3292		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3293		if ((vp->v_vflag & VV_ROOT) &&
3294		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3295			struct vnode *tvp = vp;
3296			vp = vp->v_mount->mnt_vnodecovered;
3297			VREF(vp);
3298			fp->f_vnode = vp;
3299			fp->f_data = vp;
3300			fp->f_offset = 0;
3301			vput(tvp);
3302			goto unionread;
3303		}
3304		VOP_UNLOCK(vp, 0, td);
3305	}
3306	if (uap->basep != NULL) {
3307		error = copyout(&loff, uap->basep, sizeof(long));
3308	}
3309	td->td_retval[0] = uap->count - auio.uio_resid;
3310	fdrop(fp, td);
3311	return (error);
3312}
3313#ifndef _SYS_SYSPROTO_H_
3314struct getdents_args {
3315	int fd;
3316	char *buf;
3317	size_t count;
3318};
3319#endif
3320int
3321getdents(td, uap)
3322	struct thread *td;
3323	register struct getdents_args /* {
3324		int fd;
3325		char *buf;
3326		u_int count;
3327	} */ *uap;
3328{
3329	struct getdirentries_args ap;
3330	ap.fd = uap->fd;
3331	ap.buf = uap->buf;
3332	ap.count = uap->count;
3333	ap.basep = NULL;
3334	return getdirentries(td, &ap);
3335}
3336
3337/*
3338 * Set the mode mask for creation of filesystem nodes.
3339 *
3340 * MP SAFE
3341 */
3342#ifndef _SYS_SYSPROTO_H_
3343struct umask_args {
3344	int	newmask;
3345};
3346#endif
3347int
3348umask(td, uap)
3349	struct thread *td;
3350	struct umask_args /* {
3351		int newmask;
3352	} */ *uap;
3353{
3354	register struct filedesc *fdp;
3355
3356	FILEDESC_LOCK(td->td_proc->p_fd);
3357	fdp = td->td_proc->p_fd;
3358	td->td_retval[0] = fdp->fd_cmask;
3359	fdp->fd_cmask = uap->newmask & ALLPERMS;
3360	FILEDESC_UNLOCK(td->td_proc->p_fd);
3361	return (0);
3362}
3363
3364/*
3365 * Void all references to file by ripping underlying filesystem
3366 * away from vnode.
3367 */
3368#ifndef _SYS_SYSPROTO_H_
3369struct revoke_args {
3370	char	*path;
3371};
3372#endif
3373/* ARGSUSED */
3374int
3375revoke(td, uap)
3376	struct thread *td;
3377	register struct revoke_args /* {
3378		char *path;
3379	} */ *uap;
3380{
3381	struct mount *mp;
3382	struct vnode *vp;
3383	struct vattr vattr;
3384	int error;
3385	struct nameidata nd;
3386
3387	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3388	if ((error = namei(&nd)) != 0)
3389		return (error);
3390	vp = nd.ni_vp;
3391	NDFREE(&nd, NDF_ONLY_PNBUF);
3392	if (vp->v_type != VCHR) {
3393		vput(vp);
3394		return (EINVAL);
3395	}
3396#ifdef MAC
3397	error = mac_check_vnode_revoke(td->td_ucred, vp);
3398	if (error) {
3399		vput(vp);
3400		return (error);
3401	}
3402#endif
3403	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3404	if (error) {
3405		vput(vp);
3406		return (error);
3407	}
3408	VOP_UNLOCK(vp, 0, td);
3409	if (td->td_ucred->cr_uid != vattr.va_uid) {
3410		error = suser_cred(td->td_ucred, PRISON_ROOT);
3411		if (error)
3412			goto out;
3413	}
3414	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3415		goto out;
3416	if (vcount(vp) > 1)
3417		VOP_REVOKE(vp, REVOKEALL);
3418	vn_finished_write(mp);
3419out:
3420	vrele(vp);
3421	return (error);
3422}
3423
3424/*
3425 * Convert a user file descriptor to a kernel file entry.
3426 * The file entry is locked upon returning.
3427 */
3428int
3429getvnode(fdp, fd, fpp)
3430	struct filedesc *fdp;
3431	int fd;
3432	struct file **fpp;
3433{
3434	int error;
3435	struct file *fp;
3436
3437	fp = NULL;
3438	if (fdp == NULL)
3439		error = EBADF;
3440	else {
3441		FILEDESC_LOCK(fdp);
3442		if ((u_int)fd >= fdp->fd_nfiles ||
3443		    (fp = fdp->fd_ofiles[fd]) == NULL)
3444			error = EBADF;
3445		else if (fp->f_vnode == NULL) {
3446			fp = NULL;
3447			error = EINVAL;
3448		} else {
3449			fhold(fp);
3450			error = 0;
3451		}
3452		FILEDESC_UNLOCK(fdp);
3453	}
3454	*fpp = fp;
3455	return (error);
3456}
3457
3458/*
3459 * Get (NFS) file handle
3460 */
3461#ifndef _SYS_SYSPROTO_H_
3462struct getfh_args {
3463	char	*fname;
3464	fhandle_t *fhp;
3465};
3466#endif
3467int
3468getfh(td, uap)
3469	struct thread *td;
3470	register struct getfh_args *uap;
3471{
3472	struct nameidata nd;
3473	fhandle_t fh;
3474	register struct vnode *vp;
3475	int error;
3476
3477	/*
3478	 * Must be super user
3479	 */
3480	error = suser(td);
3481	if (error)
3482		return (error);
3483	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3484	error = namei(&nd);
3485	if (error)
3486		return (error);
3487	NDFREE(&nd, NDF_ONLY_PNBUF);
3488	vp = nd.ni_vp;
3489	bzero(&fh, sizeof(fh));
3490	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3491	error = VFS_VPTOFH(vp, &fh.fh_fid);
3492	vput(vp);
3493	if (error)
3494		return (error);
3495	error = copyout(&fh, uap->fhp, sizeof (fh));
3496	return (error);
3497}
3498
3499/*
3500 * syscall for the rpc.lockd to use to translate a NFS file handle into
3501 * an open descriptor.
3502 *
3503 * warning: do not remove the suser() call or this becomes one giant
3504 * security hole.
3505 */
3506#ifndef _SYS_SYSPROTO_H_
3507struct fhopen_args {
3508	const struct fhandle *u_fhp;
3509	int flags;
3510};
3511#endif
3512int
3513fhopen(td, uap)
3514	struct thread *td;
3515	struct fhopen_args /* {
3516		const struct fhandle *u_fhp;
3517		int flags;
3518	} */ *uap;
3519{
3520	struct proc *p = td->td_proc;
3521	struct mount *mp;
3522	struct vnode *vp;
3523	struct fhandle fhp;
3524	struct vattr vat;
3525	struct vattr *vap = &vat;
3526	struct flock lf;
3527	struct file *fp;
3528	register struct filedesc *fdp = p->p_fd;
3529	int fmode, mode, error, type;
3530	struct file *nfp;
3531	int indx;
3532
3533	/*
3534	 * Must be super user
3535	 */
3536	error = suser(td);
3537	if (error)
3538		return (error);
3539
3540	fmode = FFLAGS(uap->flags);
3541	/* why not allow a non-read/write open for our lockd? */
3542	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3543		return (EINVAL);
3544	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3545	if (error)
3546		return(error);
3547	/* find the mount point */
3548	mp = vfs_getvfs(&fhp.fh_fsid);
3549	if (mp == NULL)
3550		return (ESTALE);
3551	/* now give me my vnode, it gets returned to me locked */
3552	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3553	if (error)
3554		return (error);
3555 	/*
3556	 * from now on we have to make sure not
3557	 * to forget about the vnode
3558	 * any error that causes an abort must vput(vp)
3559	 * just set error = err and 'goto bad;'.
3560	 */
3561
3562	/*
3563	 * from vn_open
3564	 */
3565	if (vp->v_type == VLNK) {
3566		error = EMLINK;
3567		goto bad;
3568	}
3569	if (vp->v_type == VSOCK) {
3570		error = EOPNOTSUPP;
3571		goto bad;
3572	}
3573	mode = 0;
3574	if (fmode & (FWRITE | O_TRUNC)) {
3575		if (vp->v_type == VDIR) {
3576			error = EISDIR;
3577			goto bad;
3578		}
3579		error = vn_writechk(vp);
3580		if (error)
3581			goto bad;
3582		mode |= VWRITE;
3583	}
3584	if (fmode & FREAD)
3585		mode |= VREAD;
3586	if (fmode & O_APPEND)
3587		mode |= VAPPEND;
3588#ifdef MAC
3589	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3590	if (error)
3591		goto bad;
3592#endif
3593	if (mode) {
3594		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3595		if (error)
3596			goto bad;
3597	}
3598	if (fmode & O_TRUNC) {
3599		VOP_UNLOCK(vp, 0, td);				/* XXX */
3600		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3601			vrele(vp);
3602			return (error);
3603		}
3604		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3605		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3606#ifdef MAC
3607		/*
3608		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3609		 * should be right.
3610		 */
3611		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3612		if (error == 0) {
3613#endif
3614			VATTR_NULL(vap);
3615			vap->va_size = 0;
3616			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3617#ifdef MAC
3618		}
3619#endif
3620		vn_finished_write(mp);
3621		if (error)
3622			goto bad;
3623	}
3624	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3625	if (error)
3626		goto bad;
3627	/*
3628	 * Make sure that a VM object is created for VMIO support.
3629	 */
3630	if (vn_canvmio(vp) == TRUE) {
3631		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3632			goto bad;
3633	}
3634	if (fmode & FWRITE)
3635		vp->v_writecount++;
3636
3637	/*
3638	 * end of vn_open code
3639	 */
3640
3641	if ((error = falloc(td, &nfp, &indx)) != 0) {
3642		if (fmode & FWRITE)
3643			vp->v_writecount--;
3644		goto bad;
3645	}
3646	fp = nfp;
3647
3648	/*
3649	 * Hold an extra reference to avoid having fp ripped out
3650	 * from under us while we block in the lock op
3651	 */
3652	fhold(fp);
3653	nfp->f_vnode = vp;
3654	nfp->f_data = vp;
3655	nfp->f_flag = fmode & FMASK;
3656	nfp->f_ops = &vnops;
3657	nfp->f_type = DTYPE_VNODE;
3658	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3659		lf.l_whence = SEEK_SET;
3660		lf.l_start = 0;
3661		lf.l_len = 0;
3662		if (fmode & O_EXLOCK)
3663			lf.l_type = F_WRLCK;
3664		else
3665			lf.l_type = F_RDLCK;
3666		type = F_FLOCK;
3667		if ((fmode & FNONBLOCK) == 0)
3668			type |= F_WAIT;
3669		VOP_UNLOCK(vp, 0, td);
3670		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3671			    type)) != 0) {
3672			/*
3673			 * The lock request failed.  Normally close the
3674			 * descriptor but handle the case where someone might
3675			 * have dup()d or close()d it when we weren't looking.
3676			 */
3677			FILEDESC_LOCK(fdp);
3678			if (fdp->fd_ofiles[indx] == fp) {
3679				fdp->fd_ofiles[indx] = NULL;
3680				FILEDESC_UNLOCK(fdp);
3681				fdrop(fp, td);
3682			} else
3683				FILEDESC_UNLOCK(fdp);
3684			/*
3685			 * release our private reference
3686			 */
3687			fdrop(fp, td);
3688			return(error);
3689		}
3690		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3691		fp->f_flag |= FHASLOCK;
3692	}
3693	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3694		vfs_object_create(vp, td, td->td_ucred);
3695
3696	VOP_UNLOCK(vp, 0, td);
3697	fdrop(fp, td);
3698	td->td_retval[0] = indx;
3699	return (0);
3700
3701bad:
3702	vput(vp);
3703	return (error);
3704}
3705
3706/*
3707 * Stat an (NFS) file handle.
3708 */
3709#ifndef _SYS_SYSPROTO_H_
3710struct fhstat_args {
3711	struct fhandle *u_fhp;
3712	struct stat *sb;
3713};
3714#endif
3715int
3716fhstat(td, uap)
3717	struct thread *td;
3718	register struct fhstat_args /* {
3719		struct fhandle *u_fhp;
3720		struct stat *sb;
3721	} */ *uap;
3722{
3723	struct stat sb;
3724	fhandle_t fh;
3725	struct mount *mp;
3726	struct vnode *vp;
3727	int error;
3728
3729	/*
3730	 * Must be super user
3731	 */
3732	error = suser(td);
3733	if (error)
3734		return (error);
3735
3736	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3737	if (error)
3738		return (error);
3739
3740	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3741		return (ESTALE);
3742	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3743		return (error);
3744	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3745	vput(vp);
3746	if (error)
3747		return (error);
3748	error = copyout(&sb, uap->sb, sizeof(sb));
3749	return (error);
3750}
3751
3752/*
3753 * Implement fstatfs() for (NFS) file handles.
3754 */
3755#ifndef _SYS_SYSPROTO_H_
3756struct fhstatfs_args {
3757	struct fhandle *u_fhp;
3758	struct statfs *buf;
3759};
3760#endif
3761int
3762fhstatfs(td, uap)
3763	struct thread *td;
3764	struct fhstatfs_args /* {
3765		struct fhandle *u_fhp;
3766		struct statfs *buf;
3767	} */ *uap;
3768{
3769	struct statfs *sp;
3770	struct mount *mp;
3771	struct vnode *vp;
3772	struct statfs sb;
3773	fhandle_t fh;
3774	int error;
3775
3776	/*
3777	 * Must be super user
3778	 */
3779	error = suser(td);
3780	if (error)
3781		return (error);
3782
3783	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3784		return (error);
3785
3786	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3787		return (ESTALE);
3788	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3789		return (error);
3790	mp = vp->v_mount;
3791	sp = &mp->mnt_stat;
3792	vput(vp);
3793#ifdef MAC
3794	error = mac_check_mount_stat(td->td_ucred, mp);
3795	if (error)
3796		return (error);
3797#endif
3798	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3799		return (error);
3800	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3801	if (suser(td)) {
3802		bcopy(sp, &sb, sizeof(sb));
3803		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3804		sp = &sb;
3805	}
3806	return (copyout(sp, uap->buf, sizeof(*sp)));
3807}
3808
3809/*
3810 * Syscall to push extended attribute configuration information into the
3811 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3812 * a command (int cmd), and attribute name and misc data.  For now, the
3813 * attribute name is left in userspace for consumption by the VFS_op.
3814 * It will probably be changed to be copied into sysspace by the
3815 * syscall in the future, once issues with various consumers of the
3816 * attribute code have raised their hands.
3817 *
3818 * Currently this is used only by UFS Extended Attributes.
3819 */
3820int
3821extattrctl(td, uap)
3822	struct thread *td;
3823	struct extattrctl_args /* {
3824		const char *path;
3825		int cmd;
3826		const char *filename;
3827		int attrnamespace;
3828		const char *attrname;
3829	} */ *uap;
3830{
3831	struct vnode *filename_vp;
3832	struct nameidata nd;
3833	struct mount *mp, *mp_writable;
3834	char attrname[EXTATTR_MAXNAMELEN];
3835	int error;
3836
3837	/*
3838	 * uap->attrname is not always defined.  We check again later when we
3839	 * invoke the VFS call so as to pass in NULL there if needed.
3840	 */
3841	if (uap->attrname != NULL) {
3842		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3843		    NULL);
3844		if (error)
3845			return (error);
3846	}
3847
3848	/*
3849	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3850	 * which VFS_EXTATTRCTL() will later release.
3851	 */
3852	filename_vp = NULL;
3853	if (uap->filename != NULL) {
3854		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3855		    uap->filename, td);
3856		error = namei(&nd);
3857		if (error)
3858			return (error);
3859		filename_vp = nd.ni_vp;
3860		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3861	}
3862
3863	/* uap->path is always defined. */
3864	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3865	error = namei(&nd);
3866	if (error) {
3867		if (filename_vp != NULL)
3868			vput(filename_vp);
3869		return (error);
3870	}
3871	mp = nd.ni_vp->v_mount;
3872	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3873	NDFREE(&nd, 0);
3874	if (error) {
3875		if (filename_vp != NULL)
3876			vput(filename_vp);
3877		return (error);
3878	}
3879
3880	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3881	    uap->attrname != NULL ? attrname : NULL, td);
3882
3883	vn_finished_write(mp_writable);
3884	/*
3885	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3886	 * filename_vp, so vrele it if it is defined.
3887	 */
3888	if (filename_vp != NULL)
3889		vrele(filename_vp);
3890	return (error);
3891}
3892
3893/*-
3894 * Set a named extended attribute on a file or directory
3895 *
3896 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3897 *            kernelspace string pointer "attrname", userspace buffer
3898 *            pointer "data", buffer length "nbytes", thread "td".
3899 * Returns: 0 on success, an error number otherwise
3900 * Locks: none
3901 * References: vp must be a valid reference for the duration of the call
3902 */
3903static int
3904extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3905    void *data, size_t nbytes, struct thread *td)
3906{
3907	struct mount *mp;
3908	struct uio auio;
3909	struct iovec aiov;
3910	ssize_t cnt;
3911	int error;
3912
3913	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3914	if (error)
3915		return (error);
3916	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3917	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3918
3919	aiov.iov_base = data;
3920	aiov.iov_len = nbytes;
3921	auio.uio_iov = &aiov;
3922	auio.uio_iovcnt = 1;
3923	auio.uio_offset = 0;
3924	if (nbytes > INT_MAX) {
3925		error = EINVAL;
3926		goto done;
3927	}
3928	auio.uio_resid = nbytes;
3929	auio.uio_rw = UIO_WRITE;
3930	auio.uio_segflg = UIO_USERSPACE;
3931	auio.uio_td = td;
3932	cnt = nbytes;
3933
3934#ifdef MAC
3935	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3936	    attrname, &auio);
3937	if (error)
3938		goto done;
3939#endif
3940
3941	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3942	    td->td_ucred, td);
3943	cnt -= auio.uio_resid;
3944	td->td_retval[0] = cnt;
3945
3946done:
3947	VOP_UNLOCK(vp, 0, td);
3948	vn_finished_write(mp);
3949	return (error);
3950}
3951
3952int
3953extattr_set_fd(td, uap)
3954	struct thread *td;
3955	struct extattr_set_fd_args /* {
3956		int fd;
3957		int attrnamespace;
3958		const char *attrname;
3959		void *data;
3960		size_t nbytes;
3961	} */ *uap;
3962{
3963	struct file *fp;
3964	char attrname[EXTATTR_MAXNAMELEN];
3965	int error;
3966
3967	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3968	if (error)
3969		return (error);
3970
3971	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
3972	if (error)
3973		return (error);
3974
3975	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
3976	    attrname, uap->data, uap->nbytes, td);
3977	fdrop(fp, td);
3978
3979	return (error);
3980}
3981
3982int
3983extattr_set_file(td, uap)
3984	struct thread *td;
3985	struct extattr_set_file_args /* {
3986		const char *path;
3987		int attrnamespace;
3988		const char *attrname;
3989		void *data;
3990		size_t nbytes;
3991	} */ *uap;
3992{
3993	struct nameidata nd;
3994	char attrname[EXTATTR_MAXNAMELEN];
3995	int error;
3996
3997	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3998	if (error)
3999		return (error);
4000
4001	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4002	error = namei(&nd);
4003	if (error)
4004		return (error);
4005	NDFREE(&nd, NDF_ONLY_PNBUF);
4006
4007	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4008	    uap->data, uap->nbytes, td);
4009
4010	vrele(nd.ni_vp);
4011	return (error);
4012}
4013
4014int
4015extattr_set_link(td, uap)
4016	struct thread *td;
4017	struct extattr_set_link_args /* {
4018		const char *path;
4019		int attrnamespace;
4020		const char *attrname;
4021		void *data;
4022		size_t nbytes;
4023	} */ *uap;
4024{
4025	struct nameidata nd;
4026	char attrname[EXTATTR_MAXNAMELEN];
4027	int error;
4028
4029	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4030	if (error)
4031		return (error);
4032
4033	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4034	error = namei(&nd);
4035	if (error)
4036		return (error);
4037	NDFREE(&nd, NDF_ONLY_PNBUF);
4038
4039	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4040	    uap->data, uap->nbytes, td);
4041
4042	vrele(nd.ni_vp);
4043	return (error);
4044}
4045
4046/*-
4047 * Get a named extended attribute on a file or directory
4048 *
4049 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4050 *            kernelspace string pointer "attrname", userspace buffer
4051 *            pointer "data", buffer length "nbytes", thread "td".
4052 * Returns: 0 on success, an error number otherwise
4053 * Locks: none
4054 * References: vp must be a valid reference for the duration of the call
4055 */
4056static int
4057extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4058    void *data, size_t nbytes, struct thread *td)
4059{
4060	struct uio auio, *auiop;
4061	struct iovec aiov;
4062	ssize_t cnt;
4063	size_t size, *sizep;
4064	int error;
4065
4066	/*
4067	 * XXX: Temporary API compatibility for applications that know
4068	 * about this hack ("" means list), but haven't been updated
4069	 * for the extattr_list_*() system calls yet.  This will go
4070	 * away for FreeBSD 5.3.
4071	 */
4072	if (strlen(attrname) == 0)
4073		return (extattr_list_vp(vp, attrnamespace, data, nbytes, td));
4074
4075	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4076	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4077
4078	/*
4079	 * Slightly unusual semantics: if the user provides a NULL data
4080	 * pointer, they don't want to receive the data, just the
4081	 * maximum read length.
4082	 */
4083	auiop = NULL;
4084	sizep = NULL;
4085	cnt = 0;
4086	if (data != NULL) {
4087		aiov.iov_base = data;
4088		aiov.iov_len = nbytes;
4089		auio.uio_iov = &aiov;
4090		auio.uio_offset = 0;
4091		if (nbytes > INT_MAX) {
4092			error = EINVAL;
4093			goto done;
4094		}
4095		auio.uio_resid = nbytes;
4096		auio.uio_rw = UIO_READ;
4097		auio.uio_segflg = UIO_USERSPACE;
4098		auio.uio_td = td;
4099		auiop = &auio;
4100		cnt = nbytes;
4101	} else
4102		sizep = &size;
4103
4104#ifdef MAC
4105	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4106	    attrname, &auio);
4107	if (error)
4108		goto done;
4109#endif
4110
4111	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4112	    td->td_ucred, td);
4113
4114	if (auiop != NULL) {
4115		cnt -= auio.uio_resid;
4116		td->td_retval[0] = cnt;
4117	} else
4118		td->td_retval[0] = size;
4119
4120done:
4121	VOP_UNLOCK(vp, 0, td);
4122	return (error);
4123}
4124
4125int
4126extattr_get_fd(td, uap)
4127	struct thread *td;
4128	struct extattr_get_fd_args /* {
4129		int fd;
4130		int attrnamespace;
4131		const char *attrname;
4132		void *data;
4133		size_t nbytes;
4134	} */ *uap;
4135{
4136	struct file *fp;
4137	char attrname[EXTATTR_MAXNAMELEN];
4138	int error;
4139
4140	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4141	if (error)
4142		return (error);
4143
4144	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4145	if (error)
4146		return (error);
4147
4148	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4149	    attrname, uap->data, uap->nbytes, td);
4150
4151	fdrop(fp, td);
4152	return (error);
4153}
4154
4155int
4156extattr_get_file(td, uap)
4157	struct thread *td;
4158	struct extattr_get_file_args /* {
4159		const char *path;
4160		int attrnamespace;
4161		const char *attrname;
4162		void *data;
4163		size_t nbytes;
4164	} */ *uap;
4165{
4166	struct nameidata nd;
4167	char attrname[EXTATTR_MAXNAMELEN];
4168	int error;
4169
4170	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4171	if (error)
4172		return (error);
4173
4174	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4175	error = namei(&nd);
4176	if (error)
4177		return (error);
4178	NDFREE(&nd, NDF_ONLY_PNBUF);
4179
4180	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4181	    uap->data, uap->nbytes, td);
4182
4183	vrele(nd.ni_vp);
4184	return (error);
4185}
4186
4187int
4188extattr_get_link(td, uap)
4189	struct thread *td;
4190	struct extattr_get_link_args /* {
4191		const char *path;
4192		int attrnamespace;
4193		const char *attrname;
4194		void *data;
4195		size_t nbytes;
4196	} */ *uap;
4197{
4198	struct nameidata nd;
4199	char attrname[EXTATTR_MAXNAMELEN];
4200	int error;
4201
4202	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4203	if (error)
4204		return (error);
4205
4206	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4207	error = namei(&nd);
4208	if (error)
4209		return (error);
4210	NDFREE(&nd, NDF_ONLY_PNBUF);
4211
4212	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4213	    uap->data, uap->nbytes, td);
4214
4215	vrele(nd.ni_vp);
4216	return (error);
4217}
4218
4219/*
4220 * extattr_delete_vp(): Delete a named extended attribute on a file or
4221 *                      directory
4222 *
4223 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4224 *            kernelspace string pointer "attrname", proc "p"
4225 * Returns: 0 on success, an error number otherwise
4226 * Locks: none
4227 * References: vp must be a valid reference for the duration of the call
4228 */
4229static int
4230extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4231    struct thread *td)
4232{
4233	struct mount *mp;
4234	int error;
4235
4236	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4237	if (error)
4238		return (error);
4239	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4240	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4241
4242#ifdef MAC
4243	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4244	    attrname, NULL);
4245	if (error)
4246		goto done;
4247#endif
4248
4249	error = VOP_RMEXTATTR(vp, attrnamespace, attrname, td->td_ucred, td);
4250	if (error == EOPNOTSUPP)
4251		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4252		    td->td_ucred, td);
4253#ifdef MAC
4254done:
4255#endif
4256	VOP_UNLOCK(vp, 0, td);
4257	vn_finished_write(mp);
4258	return (error);
4259}
4260
4261int
4262extattr_delete_fd(td, uap)
4263	struct thread *td;
4264	struct extattr_delete_fd_args /* {
4265		int fd;
4266		int attrnamespace;
4267		const char *attrname;
4268	} */ *uap;
4269{
4270	struct file *fp;
4271	struct vnode *vp;
4272	char attrname[EXTATTR_MAXNAMELEN];
4273	int error;
4274
4275	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4276	if (error)
4277		return (error);
4278
4279	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4280	if (error)
4281		return (error);
4282	vp = fp->f_vnode;
4283
4284	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4285	fdrop(fp, td);
4286	return (error);
4287}
4288
4289int
4290extattr_delete_file(td, uap)
4291	struct thread *td;
4292	struct extattr_delete_file_args /* {
4293		const char *path;
4294		int attrnamespace;
4295		const char *attrname;
4296	} */ *uap;
4297{
4298	struct nameidata nd;
4299	char attrname[EXTATTR_MAXNAMELEN];
4300	int error;
4301
4302	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4303	if (error)
4304		return(error);
4305
4306	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4307	error = namei(&nd);
4308	if (error)
4309		return(error);
4310	NDFREE(&nd, NDF_ONLY_PNBUF);
4311
4312	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4313	vrele(nd.ni_vp);
4314	return(error);
4315}
4316
4317int
4318extattr_delete_link(td, uap)
4319	struct thread *td;
4320	struct extattr_delete_link_args /* {
4321		const char *path;
4322		int attrnamespace;
4323		const char *attrname;
4324	} */ *uap;
4325{
4326	struct nameidata nd;
4327	char attrname[EXTATTR_MAXNAMELEN];
4328	int error;
4329
4330	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4331	if (error)
4332		return(error);
4333
4334	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4335	error = namei(&nd);
4336	if (error)
4337		return(error);
4338	NDFREE(&nd, NDF_ONLY_PNBUF);
4339
4340	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4341	vrele(nd.ni_vp);
4342	return(error);
4343}
4344
4345/*-
4346 * Retrieve a list of extended attributes on a file or directory.
4347 *
4348 * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4349 *            userspace buffer pointer "data", buffer length "nbytes",
4350 *            thread "td".
4351 * Returns: 0 on success, an error number otherwise
4352 * Locks: none
4353 * References: vp must be a valid reference for the duration of the call
4354 */
4355static int
4356extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4357    size_t nbytes, struct thread *td)
4358{
4359	struct uio auio, *auiop;
4360	size_t size, *sizep;
4361	struct iovec aiov;
4362	ssize_t cnt;
4363	int error;
4364
4365	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4366	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4367
4368	auiop = NULL;
4369	sizep = NULL;
4370	cnt = 0;
4371	if (data != NULL) {
4372		aiov.iov_base = data;
4373		aiov.iov_len = nbytes;
4374		auio.uio_iov = &aiov;
4375		auio.uio_offset = 0;
4376		if (nbytes > INT_MAX) {
4377			error = EINVAL;
4378			goto done;
4379		}
4380		auio.uio_resid = nbytes;
4381		auio.uio_rw = UIO_READ;
4382		auio.uio_segflg = UIO_USERSPACE;
4383		auio.uio_td = td;
4384		auiop = &auio;
4385		cnt = nbytes;
4386	} else
4387		sizep = &size;
4388
4389#ifdef MAC
4390	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4391	    "", &auio);
4392	if (error)
4393		goto done;
4394#endif
4395
4396	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4397	    td->td_ucred, td);
4398
4399	if (auiop != NULL) {
4400		cnt -= auio.uio_resid;
4401		td->td_retval[0] = cnt;
4402	} else
4403		td->td_retval[0] = size;
4404
4405done:
4406	VOP_UNLOCK(vp, 0, td);
4407	return (error);
4408}
4409
4410
4411int
4412extattr_list_fd(td, uap)
4413	struct thread *td;
4414	struct extattr_list_fd_args /* {
4415		int fd;
4416		int attrnamespace;
4417		void *data;
4418		size_t nbytes;
4419	} */ *uap;
4420{
4421	struct file *fp;
4422	int error;
4423
4424	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4425	if (error)
4426		return (error);
4427
4428	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4429	    uap->nbytes, td);
4430
4431	fdrop(fp, td);
4432	return (error);
4433}
4434
4435int
4436extattr_list_file(td, uap)
4437	struct thread*td;
4438	struct extattr_list_file_args /* {
4439		const char *path;
4440		int attrnamespace;
4441		void *data;
4442		size_t nbytes;
4443	} */ *uap;
4444{
4445	struct nameidata nd;
4446	int error;
4447
4448	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4449	error = namei(&nd);
4450	if (error)
4451		return (error);
4452	NDFREE(&nd, NDF_ONLY_PNBUF);
4453
4454	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4455	    uap->nbytes, td);
4456
4457	vrele(nd.ni_vp);
4458	return (error);
4459}
4460
4461int
4462extattr_list_link(td, uap)
4463	struct thread*td;
4464	struct extattr_list_link_args /* {
4465		const char *path;
4466		int attrnamespace;
4467		void *data;
4468		size_t nbytes;
4469	} */ *uap;
4470{
4471	struct nameidata nd;
4472	int error;
4473
4474	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4475	error = namei(&nd);
4476	if (error)
4477		return (error);
4478	NDFREE(&nd, NDF_ONLY_PNBUF);
4479
4480	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4481	    uap->nbytes, td);
4482
4483	vrele(nd.ni_vp);
4484	return (error);
4485}
4486
4487