vfs_syscalls.c revision 118098
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD: head/sys/kern/vfs_syscalls.c 118098 2003-07-27 20:09:13Z phk $");
43
44/* For 4.3 integer FS ID compatibility */
45#include "opt_compat.h"
46#include "opt_mac.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/bio.h>
51#include <sys/buf.h>
52#include <sys/sysent.h>
53#include <sys/mac.h>
54#include <sys/malloc.h>
55#include <sys/mount.h>
56#include <sys/mutex.h>
57#include <sys/sysproto.h>
58#include <sys/namei.h>
59#include <sys/filedesc.h>
60#include <sys/kernel.h>
61#include <sys/fcntl.h>
62#include <sys/file.h>
63#include <sys/limits.h>
64#include <sys/linker.h>
65#include <sys/stat.h>
66#include <sys/sx.h>
67#include <sys/unistd.h>
68#include <sys/vnode.h>
69#include <sys/proc.h>
70#include <sys/dirent.h>
71#include <sys/extattr.h>
72#include <sys/jail.h>
73#include <sys/syscallsubr.h>
74#include <sys/sysctl.h>
75
76#include <machine/stdarg.h>
77
78#include <vm/vm.h>
79#include <vm/vm_object.h>
80#include <vm/vm_page.h>
81#include <vm/uma.h>
82
83static int chroot_refuse_vdir_fds(struct filedesc *fdp);
84static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
85static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
86static int setfmode(struct thread *td, struct vnode *, int);
87static int setfflags(struct thread *td, struct vnode *, int);
88static int setutimes(struct thread *td, struct vnode *,
89    const struct timespec *, int, int);
90static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
91    struct thread *td);
92
93static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
94    size_t nbytes, struct thread *td);
95
96int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
97int (*softdep_fsync_hook)(struct vnode *);
98
99/*
100 * The module initialization routine for POSIX asynchronous I/O will
101 * set this to the version of AIO that it implements.  (Zero means
102 * that it is not implemented.)  This value is used here by pathconf()
103 * and in kern_descrip.c by fpathconf().
104 */
105int async_io_version;
106
107/*
108 * Sync each mounted filesystem.
109 */
110#ifndef _SYS_SYSPROTO_H_
111struct sync_args {
112        int     dummy;
113};
114#endif
115
116#ifdef DEBUG
117static int syncprt = 0;
118SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
119#endif
120
121/* ARGSUSED */
122int
123sync(td, uap)
124	struct thread *td;
125	struct sync_args *uap;
126{
127	struct mount *mp, *nmp;
128	int asyncflag;
129
130	mtx_lock(&mountlist_mtx);
131	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
132		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
133			nmp = TAILQ_NEXT(mp, mnt_list);
134			continue;
135		}
136		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
137		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
138			asyncflag = mp->mnt_flag & MNT_ASYNC;
139			mp->mnt_flag &= ~MNT_ASYNC;
140			vfs_msync(mp, MNT_NOWAIT);
141			VFS_SYNC(mp, MNT_NOWAIT,
142			    ((td != NULL) ? td->td_ucred : NOCRED), td);
143			mp->mnt_flag |= asyncflag;
144			vn_finished_write(mp);
145		}
146		mtx_lock(&mountlist_mtx);
147		nmp = TAILQ_NEXT(mp, mnt_list);
148		vfs_unbusy(mp, td);
149	}
150	mtx_unlock(&mountlist_mtx);
151#if 0
152/*
153 * XXX don't call vfs_bufstats() yet because that routine
154 * was not imported in the Lite2 merge.
155 */
156#ifdef DIAGNOSTIC
157	if (syncprt)
158		vfs_bufstats();
159#endif /* DIAGNOSTIC */
160#endif
161	return (0);
162}
163
164/* XXX PRISON: could be per prison flag */
165static int prison_quotas;
166#if 0
167SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
168#endif
169
170/*
171 * Change filesystem quotas.
172 */
173#ifndef _SYS_SYSPROTO_H_
174struct quotactl_args {
175	char *path;
176	int cmd;
177	int uid;
178	caddr_t arg;
179};
180#endif
181/* ARGSUSED */
182int
183quotactl(td, uap)
184	struct thread *td;
185	register struct quotactl_args /* {
186		char *path;
187		int cmd;
188		int uid;
189		caddr_t arg;
190	} */ *uap;
191{
192	struct mount *mp;
193	int error;
194	struct nameidata nd;
195
196	if (jailed(td->td_ucred) && !prison_quotas)
197		return (EPERM);
198	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
199	if ((error = namei(&nd)) != 0)
200		return (error);
201	NDFREE(&nd, NDF_ONLY_PNBUF);
202	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
203	vrele(nd.ni_vp);
204	if (error)
205		return (error);
206	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
207	vn_finished_write(mp);
208	return (error);
209}
210
211/*
212 * Get filesystem statistics.
213 */
214#ifndef _SYS_SYSPROTO_H_
215struct statfs_args {
216	char *path;
217	struct statfs *buf;
218};
219#endif
220/* ARGSUSED */
221int
222statfs(td, uap)
223	struct thread *td;
224	register struct statfs_args /* {
225		char *path;
226		struct statfs *buf;
227	} */ *uap;
228{
229	register struct mount *mp;
230	register struct statfs *sp;
231	int error;
232	struct nameidata nd;
233	struct statfs sb;
234
235	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
236	if ((error = namei(&nd)) != 0)
237		return (error);
238	mp = nd.ni_vp->v_mount;
239	sp = &mp->mnt_stat;
240	NDFREE(&nd, NDF_ONLY_PNBUF);
241	vrele(nd.ni_vp);
242#ifdef MAC
243	error = mac_check_mount_stat(td->td_ucred, mp);
244	if (error)
245		return (error);
246#endif
247	error = VFS_STATFS(mp, sp, td);
248	if (error)
249		return (error);
250	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
251	if (suser(td)) {
252		bcopy(sp, &sb, sizeof(sb));
253		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
254		sp = &sb;
255	}
256	return (copyout(sp, uap->buf, sizeof(*sp)));
257}
258
259/*
260 * Get filesystem statistics.
261 */
262#ifndef _SYS_SYSPROTO_H_
263struct fstatfs_args {
264	int fd;
265	struct statfs *buf;
266};
267#endif
268/* ARGSUSED */
269int
270fstatfs(td, uap)
271	struct thread *td;
272	register struct fstatfs_args /* {
273		int fd;
274		struct statfs *buf;
275	} */ *uap;
276{
277	struct file *fp;
278	struct mount *mp;
279	register struct statfs *sp;
280	int error;
281	struct statfs sb;
282
283	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
284		return (error);
285	mp = fp->f_vnode->v_mount;
286	fdrop(fp, td);
287	if (mp == NULL)
288		return (EBADF);
289#ifdef MAC
290	error = mac_check_mount_stat(td->td_ucred, mp);
291	if (error)
292		return (error);
293#endif
294	sp = &mp->mnt_stat;
295	error = VFS_STATFS(mp, sp, td);
296	if (error)
297		return (error);
298	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
299	if (suser(td)) {
300		bcopy(sp, &sb, sizeof(sb));
301		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
302		sp = &sb;
303	}
304	return (copyout(sp, uap->buf, sizeof(*sp)));
305}
306
307/*
308 * Get statistics on all filesystems.
309 */
310#ifndef _SYS_SYSPROTO_H_
311struct getfsstat_args {
312	struct statfs *buf;
313	long bufsize;
314	int flags;
315};
316#endif
317int
318getfsstat(td, uap)
319	struct thread *td;
320	register struct getfsstat_args /* {
321		struct statfs *buf;
322		long bufsize;
323		int flags;
324	} */ *uap;
325{
326	register struct mount *mp, *nmp;
327	register struct statfs *sp;
328	caddr_t sfsp;
329	long count, maxcount, error;
330
331	maxcount = uap->bufsize / sizeof(struct statfs);
332	sfsp = (caddr_t)uap->buf;
333	count = 0;
334	mtx_lock(&mountlist_mtx);
335	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
336#ifdef MAC
337		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
338			nmp = TAILQ_NEXT(mp, mnt_list);
339			continue;
340		}
341#endif
342		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
343			nmp = TAILQ_NEXT(mp, mnt_list);
344			continue;
345		}
346		if (sfsp && count < maxcount) {
347			sp = &mp->mnt_stat;
348			/*
349			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
350			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
351			 * overrides MNT_WAIT.
352			 */
353			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
354			    (uap->flags & MNT_WAIT)) &&
355			    (error = VFS_STATFS(mp, sp, td))) {
356				mtx_lock(&mountlist_mtx);
357				nmp = TAILQ_NEXT(mp, mnt_list);
358				vfs_unbusy(mp, td);
359				continue;
360			}
361			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
362			error = copyout(sp, sfsp, sizeof(*sp));
363			if (error) {
364				vfs_unbusy(mp, td);
365				return (error);
366			}
367			sfsp += sizeof(*sp);
368		}
369		count++;
370		mtx_lock(&mountlist_mtx);
371		nmp = TAILQ_NEXT(mp, mnt_list);
372		vfs_unbusy(mp, td);
373	}
374	mtx_unlock(&mountlist_mtx);
375	if (sfsp && count > maxcount)
376		td->td_retval[0] = maxcount;
377	else
378		td->td_retval[0] = count;
379	return (0);
380}
381
382/*
383 * Change current working directory to a given file descriptor.
384 */
385#ifndef _SYS_SYSPROTO_H_
386struct fchdir_args {
387	int	fd;
388};
389#endif
390/* ARGSUSED */
391int
392fchdir(td, uap)
393	struct thread *td;
394	struct fchdir_args /* {
395		int fd;
396	} */ *uap;
397{
398	register struct filedesc *fdp = td->td_proc->p_fd;
399	struct vnode *vp, *tdp, *vpold;
400	struct mount *mp;
401	struct file *fp;
402	int error;
403
404	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
405		return (error);
406	vp = fp->f_vnode;
407	VREF(vp);
408	fdrop(fp, td);
409	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
410	if (vp->v_type != VDIR)
411		error = ENOTDIR;
412#ifdef MAC
413	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
414	}
415#endif
416	else
417		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
418	while (!error && (mp = vp->v_mountedhere) != NULL) {
419		if (vfs_busy(mp, 0, 0, td))
420			continue;
421		error = VFS_ROOT(mp, &tdp);
422		vfs_unbusy(mp, td);
423		if (error)
424			break;
425		vput(vp);
426		vp = tdp;
427	}
428	if (error) {
429		vput(vp);
430		return (error);
431	}
432	VOP_UNLOCK(vp, 0, td);
433	FILEDESC_LOCK(fdp);
434	vpold = fdp->fd_cdir;
435	fdp->fd_cdir = vp;
436	FILEDESC_UNLOCK(fdp);
437	vrele(vpold);
438	return (0);
439}
440
441/*
442 * Change current working directory (``.'').
443 */
444#ifndef _SYS_SYSPROTO_H_
445struct chdir_args {
446	char	*path;
447};
448#endif
449/* ARGSUSED */
450int
451chdir(td, uap)
452	struct thread *td;
453	struct chdir_args /* {
454		char *path;
455	} */ *uap;
456{
457
458	return (kern_chdir(td, uap->path, UIO_USERSPACE));
459}
460
461int
462kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
463{
464	register struct filedesc *fdp = td->td_proc->p_fd;
465	int error;
466	struct nameidata nd;
467	struct vnode *vp;
468
469	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
470	if ((error = namei(&nd)) != 0)
471		return (error);
472	if ((error = change_dir(nd.ni_vp, td)) != 0) {
473		vput(nd.ni_vp);
474		NDFREE(&nd, NDF_ONLY_PNBUF);
475		return (error);
476	}
477	VOP_UNLOCK(nd.ni_vp, 0, td);
478	NDFREE(&nd, NDF_ONLY_PNBUF);
479	FILEDESC_LOCK(fdp);
480	vp = fdp->fd_cdir;
481	fdp->fd_cdir = nd.ni_vp;
482	FILEDESC_UNLOCK(fdp);
483	vrele(vp);
484	return (0);
485}
486
487/*
488 * Helper function for raised chroot(2) security function:  Refuse if
489 * any filedescriptors are open directories.
490 */
491static int
492chroot_refuse_vdir_fds(fdp)
493	struct filedesc *fdp;
494{
495	struct vnode *vp;
496	struct file *fp;
497	int fd;
498
499	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
500	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
501		fp = fget_locked(fdp, fd);
502		if (fp == NULL)
503			continue;
504		if (fp->f_type == DTYPE_VNODE) {
505			vp = fp->f_vnode;
506			if (vp->v_type == VDIR)
507				return (EPERM);
508		}
509	}
510	return (0);
511}
512
513/*
514 * This sysctl determines if we will allow a process to chroot(2) if it
515 * has a directory open:
516 *	0: disallowed for all processes.
517 *	1: allowed for processes that were not already chroot(2)'ed.
518 *	2: allowed for all processes.
519 */
520
521static int chroot_allow_open_directories = 1;
522
523SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
524     &chroot_allow_open_directories, 0, "");
525
526/*
527 * Change notion of root (``/'') directory.
528 */
529#ifndef _SYS_SYSPROTO_H_
530struct chroot_args {
531	char	*path;
532};
533#endif
534/* ARGSUSED */
535int
536chroot(td, uap)
537	struct thread *td;
538	struct chroot_args /* {
539		char *path;
540	} */ *uap;
541{
542	int error;
543	struct nameidata nd;
544
545	error = suser_cred(td->td_ucred, PRISON_ROOT);
546	if (error)
547		return (error);
548	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
549	mtx_lock(&Giant);
550	error = namei(&nd);
551	if (error)
552		goto error;
553	if ((error = change_dir(nd.ni_vp, td)) != 0)
554		goto e_vunlock;
555#ifdef MAC
556	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
557		goto e_vunlock;
558#endif
559	VOP_UNLOCK(nd.ni_vp, 0, td);
560	error = change_root(nd.ni_vp, td);
561	vrele(nd.ni_vp);
562	NDFREE(&nd, NDF_ONLY_PNBUF);
563	mtx_unlock(&Giant);
564	return (error);
565e_vunlock:
566	vput(nd.ni_vp);
567error:
568	mtx_unlock(&Giant);
569	NDFREE(&nd, NDF_ONLY_PNBUF);
570	return (error);
571}
572
573/*
574 * Common routine for chroot and chdir.  Callers must provide a locked vnode
575 * instance.
576 */
577int
578change_dir(vp, td)
579	struct vnode *vp;
580	struct thread *td;
581{
582	int error;
583
584	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
585	if (vp->v_type != VDIR)
586		return (ENOTDIR);
587#ifdef MAC
588	error = mac_check_vnode_chdir(td->td_ucred, vp);
589	if (error)
590		return (error);
591#endif
592	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
593	return (error);
594}
595
596/*
597 * Common routine for kern_chroot() and jail_attach().  The caller is
598 * responsible for invoking suser() and mac_check_chroot() to authorize this
599 * operation.
600 */
601int
602change_root(vp, td)
603	struct vnode *vp;
604	struct thread *td;
605{
606	struct filedesc *fdp;
607	struct vnode *oldvp;
608	int error;
609
610	mtx_assert(&Giant, MA_OWNED);
611	fdp = td->td_proc->p_fd;
612	FILEDESC_LOCK(fdp);
613	if (chroot_allow_open_directories == 0 ||
614	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
615		error = chroot_refuse_vdir_fds(fdp);
616		if (error) {
617			FILEDESC_UNLOCK(fdp);
618			return (error);
619		}
620	}
621	oldvp = fdp->fd_rdir;
622	fdp->fd_rdir = vp;
623	VREF(fdp->fd_rdir);
624	if (!fdp->fd_jdir) {
625		fdp->fd_jdir = vp;
626		VREF(fdp->fd_jdir);
627	}
628	FILEDESC_UNLOCK(fdp);
629	vrele(oldvp);
630	return (0);
631}
632
633/*
634 * Check permissions, allocate an open file structure,
635 * and call the device open routine if any.
636 */
637#ifndef _SYS_SYSPROTO_H_
638struct open_args {
639	char	*path;
640	int	flags;
641	int	mode;
642};
643#endif
644int
645open(td, uap)
646	struct thread *td;
647	register struct open_args /* {
648		char *path;
649		int flags;
650		int mode;
651	} */ *uap;
652{
653
654	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
655}
656
657int
658kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
659    int mode)
660{
661	struct proc *p = td->td_proc;
662	struct filedesc *fdp = p->p_fd;
663	struct file *fp;
664	struct vnode *vp;
665	struct vattr vat;
666	struct mount *mp;
667	int cmode;
668	struct file *nfp;
669	int type, indx, error;
670	struct flock lf;
671	struct nameidata nd;
672
673	if ((flags & O_ACCMODE) == O_ACCMODE)
674		return (EINVAL);
675	flags = FFLAGS(flags);
676	error = falloc(td, &nfp, &indx);
677	if (error)
678		return (error);
679	fp = nfp;
680	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
681	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
682	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
683	/*
684	 * Bump the ref count to prevent another process from closing
685	 * the descriptor while we are blocked in vn_open()
686	 */
687	fhold(fp);
688	error = vn_open(&nd, &flags, cmode, indx);
689	if (error) {
690
691		/*
692		 * If the vn_open replaced the method vector, something
693		 * wonderous happened deep below and we just pass it up
694		 * pretending we know what we do.
695		 */
696		if (error == ENXIO && fp->f_ops != &badfileops) {
697			fdrop(fp, td);
698			td->td_retval[0] = indx;
699			return (0);
700		}
701
702		/*
703		 * release our own reference
704		 */
705		fdrop(fp, td);
706
707		/*
708		 * handle special fdopen() case.  bleh.  dupfdopen() is
709		 * responsible for dropping the old contents of ofiles[indx]
710		 * if it succeeds.
711		 */
712		if ((error == ENODEV || error == ENXIO) &&
713		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
714		    (error =
715			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
716			td->td_retval[0] = indx;
717			return (0);
718		}
719		/*
720		 * Clean up the descriptor, but only if another thread hadn't
721		 * replaced or closed it.
722		 */
723		FILEDESC_LOCK(fdp);
724		if (fdp->fd_ofiles[indx] == fp) {
725			fdp->fd_ofiles[indx] = NULL;
726			FILEDESC_UNLOCK(fdp);
727			fdrop(fp, td);
728		} else
729			FILEDESC_UNLOCK(fdp);
730
731		if (error == ERESTART)
732			error = EINTR;
733		return (error);
734	}
735	td->td_dupfd = 0;
736	NDFREE(&nd, NDF_ONLY_PNBUF);
737	vp = nd.ni_vp;
738
739	/*
740	 * There should be 2 references on the file, one from the descriptor
741	 * table, and one for us.
742	 *
743	 * Handle the case where someone closed the file (via its file
744	 * descriptor) while we were blocked.  The end result should look
745	 * like opening the file succeeded but it was immediately closed.
746	 */
747	FILEDESC_LOCK(fdp);
748	FILE_LOCK(fp);
749	if (fp->f_count == 1) {
750		KASSERT(fdp->fd_ofiles[indx] != fp,
751		    ("Open file descriptor lost all refs"));
752		FILEDESC_UNLOCK(fdp);
753		FILE_UNLOCK(fp);
754		VOP_UNLOCK(vp, 0, td);
755		vn_close(vp, flags & FMASK, fp->f_cred, td);
756		fdrop(fp, td);
757		td->td_retval[0] = indx;
758		return 0;
759	}
760	fp->f_vnode = vp;
761	fp->f_data = vp;
762	fp->f_flag = flags & FMASK;
763	fp->f_ops = &vnops;
764	fp->f_seqcount = 1;
765	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
766	FILEDESC_UNLOCK(fdp);
767	FILE_UNLOCK(fp);
768
769	/* assert that vn_open created a backing object if one is needed */
770	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
771		("open: vmio vnode has no backing object after vn_open"));
772
773	VOP_UNLOCK(vp, 0, td);
774	if (flags & (O_EXLOCK | O_SHLOCK)) {
775		lf.l_whence = SEEK_SET;
776		lf.l_start = 0;
777		lf.l_len = 0;
778		if (flags & O_EXLOCK)
779			lf.l_type = F_WRLCK;
780		else
781			lf.l_type = F_RDLCK;
782		type = F_FLOCK;
783		if ((flags & FNONBLOCK) == 0)
784			type |= F_WAIT;
785		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
786			    type)) != 0)
787			goto bad;
788		fp->f_flag |= FHASLOCK;
789	}
790	if (flags & O_TRUNC) {
791		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
792			goto bad;
793		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
794		VATTR_NULL(&vat);
795		vat.va_size = 0;
796		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
797#ifdef MAC
798		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
799		if (error == 0)
800#endif
801			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
802		VOP_UNLOCK(vp, 0, td);
803		vn_finished_write(mp);
804		if (error)
805			goto bad;
806	}
807	/*
808	 * Release our private reference, leaving the one associated with
809	 * the descriptor table intact.
810	 */
811	fdrop(fp, td);
812	td->td_retval[0] = indx;
813	return (0);
814bad:
815	FILEDESC_LOCK(fdp);
816	if (fdp->fd_ofiles[indx] == fp) {
817		fdp->fd_ofiles[indx] = NULL;
818		FILEDESC_UNLOCK(fdp);
819		fdrop(fp, td);
820	} else
821		FILEDESC_UNLOCK(fdp);
822	fdrop(fp, td);
823	return (error);
824}
825
826#ifdef COMPAT_43
827/*
828 * Create a file.
829 */
830#ifndef _SYS_SYSPROTO_H_
831struct ocreat_args {
832	char	*path;
833	int	mode;
834};
835#endif
836int
837ocreat(td, uap)
838	struct thread *td;
839	register struct ocreat_args /* {
840		char *path;
841		int mode;
842	} */ *uap;
843{
844	struct open_args /* {
845		char *path;
846		int flags;
847		int mode;
848	} */ nuap;
849
850	nuap.path = uap->path;
851	nuap.mode = uap->mode;
852	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
853	return (open(td, &nuap));
854}
855#endif /* COMPAT_43 */
856
857/*
858 * Create a special file.
859 */
860#ifndef _SYS_SYSPROTO_H_
861struct mknod_args {
862	char	*path;
863	int	mode;
864	int	dev;
865};
866#endif
867/* ARGSUSED */
868int
869mknod(td, uap)
870	struct thread *td;
871	register struct mknod_args /* {
872		char *path;
873		int mode;
874		int dev;
875	} */ *uap;
876{
877
878	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
879}
880
881int
882kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
883    int dev)
884{
885	struct vnode *vp;
886	struct mount *mp;
887	struct vattr vattr;
888	int error;
889	int whiteout = 0;
890	struct nameidata nd;
891
892	switch (mode & S_IFMT) {
893	case S_IFCHR:
894	case S_IFBLK:
895		error = suser(td);
896		break;
897	default:
898		error = suser_cred(td->td_ucred, PRISON_ROOT);
899		break;
900	}
901	if (error)
902		return (error);
903restart:
904	bwillwrite();
905	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
906	if ((error = namei(&nd)) != 0)
907		return (error);
908	vp = nd.ni_vp;
909	if (vp != NULL) {
910		vrele(vp);
911		error = EEXIST;
912	} else {
913		VATTR_NULL(&vattr);
914		FILEDESC_LOCK(td->td_proc->p_fd);
915		vattr.va_mode = (mode & ALLPERMS) &
916		    ~td->td_proc->p_fd->fd_cmask;
917		FILEDESC_UNLOCK(td->td_proc->p_fd);
918		vattr.va_rdev = dev;
919		whiteout = 0;
920
921		switch (mode & S_IFMT) {
922		case S_IFMT:	/* used by badsect to flag bad sectors */
923			vattr.va_type = VBAD;
924			break;
925		case S_IFCHR:
926			vattr.va_type = VCHR;
927			break;
928		case S_IFBLK:
929			vattr.va_type = VBLK;
930			break;
931		case S_IFWHT:
932			whiteout = 1;
933			break;
934		default:
935			error = EINVAL;
936			break;
937		}
938	}
939	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
940		NDFREE(&nd, NDF_ONLY_PNBUF);
941		vput(nd.ni_dvp);
942		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
943			return (error);
944		goto restart;
945	}
946#ifdef MAC
947	if (error == 0 && !whiteout)
948		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
949		    &nd.ni_cnd, &vattr);
950#endif
951	if (!error) {
952		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
953		if (whiteout)
954			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
955		else {
956			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
957						&nd.ni_cnd, &vattr);
958			if (error == 0)
959				vput(nd.ni_vp);
960		}
961	}
962	NDFREE(&nd, NDF_ONLY_PNBUF);
963	vput(nd.ni_dvp);
964	vn_finished_write(mp);
965	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
966	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
967	return (error);
968}
969
970/*
971 * Create a named pipe.
972 */
973#ifndef _SYS_SYSPROTO_H_
974struct mkfifo_args {
975	char	*path;
976	int	mode;
977};
978#endif
979/* ARGSUSED */
980int
981mkfifo(td, uap)
982	struct thread *td;
983	register struct mkfifo_args /* {
984		char *path;
985		int mode;
986	} */ *uap;
987{
988
989	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
990}
991
992int
993kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
994{
995	struct mount *mp;
996	struct vattr vattr;
997	int error;
998	struct nameidata nd;
999
1000restart:
1001	bwillwrite();
1002	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
1003	if ((error = namei(&nd)) != 0)
1004		return (error);
1005	if (nd.ni_vp != NULL) {
1006		NDFREE(&nd, NDF_ONLY_PNBUF);
1007		vrele(nd.ni_vp);
1008		vput(nd.ni_dvp);
1009		return (EEXIST);
1010	}
1011	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1012		NDFREE(&nd, NDF_ONLY_PNBUF);
1013		vput(nd.ni_dvp);
1014		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1015			return (error);
1016		goto restart;
1017	}
1018	VATTR_NULL(&vattr);
1019	vattr.va_type = VFIFO;
1020	FILEDESC_LOCK(td->td_proc->p_fd);
1021	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1022	FILEDESC_UNLOCK(td->td_proc->p_fd);
1023#ifdef MAC
1024	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1025	    &vattr);
1026	if (error)
1027		goto out;
1028#endif
1029	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1030	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1031	if (error == 0)
1032		vput(nd.ni_vp);
1033#ifdef MAC
1034out:
1035#endif
1036	NDFREE(&nd, NDF_ONLY_PNBUF);
1037	vput(nd.ni_dvp);
1038	vn_finished_write(mp);
1039	return (error);
1040}
1041
1042/*
1043 * Make a hard file link.
1044 */
1045#ifndef _SYS_SYSPROTO_H_
1046struct link_args {
1047	char	*path;
1048	char	*link;
1049};
1050#endif
1051/* ARGSUSED */
1052int
1053link(td, uap)
1054	struct thread *td;
1055	register struct link_args /* {
1056		char *path;
1057		char *link;
1058	} */ *uap;
1059{
1060
1061	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1062}
1063
1064int
1065kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1066{
1067	struct vnode *vp;
1068	struct mount *mp;
1069	struct nameidata nd;
1070	int error;
1071
1072	bwillwrite();
1073	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1074	if ((error = namei(&nd)) != 0)
1075		return (error);
1076	NDFREE(&nd, NDF_ONLY_PNBUF);
1077	vp = nd.ni_vp;
1078	if (vp->v_type == VDIR) {
1079		vrele(vp);
1080		return (EPERM);		/* POSIX */
1081	}
1082	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1083		vrele(vp);
1084		return (error);
1085	}
1086	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1087	if ((error = namei(&nd)) == 0) {
1088		if (nd.ni_vp != NULL) {
1089			vrele(nd.ni_vp);
1090			error = EEXIST;
1091		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1092		    == 0) {
1093			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1094			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1095#ifdef MAC
1096			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1097			    vp, &nd.ni_cnd);
1098			if (error == 0)
1099#endif
1100				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1101			VOP_UNLOCK(vp, 0, td);
1102		}
1103		NDFREE(&nd, NDF_ONLY_PNBUF);
1104		vput(nd.ni_dvp);
1105	}
1106	vrele(vp);
1107	vn_finished_write(mp);
1108	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1109	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1110	return (error);
1111}
1112
1113/*
1114 * Make a symbolic link.
1115 */
1116#ifndef _SYS_SYSPROTO_H_
1117struct symlink_args {
1118	char	*path;
1119	char	*link;
1120};
1121#endif
1122/* ARGSUSED */
1123int
1124symlink(td, uap)
1125	struct thread *td;
1126	register struct symlink_args /* {
1127		char *path;
1128		char *link;
1129	} */ *uap;
1130{
1131
1132	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1133}
1134
1135int
1136kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1137{
1138	struct mount *mp;
1139	struct vattr vattr;
1140	char *syspath;
1141	int error;
1142	struct nameidata nd;
1143
1144	if (segflg == UIO_SYSSPACE) {
1145		syspath = path;
1146	} else {
1147		syspath = uma_zalloc(namei_zone, M_WAITOK);
1148		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1149			goto out;
1150	}
1151restart:
1152	bwillwrite();
1153	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1154	if ((error = namei(&nd)) != 0)
1155		goto out;
1156	if (nd.ni_vp) {
1157		NDFREE(&nd, NDF_ONLY_PNBUF);
1158		vrele(nd.ni_vp);
1159		vput(nd.ni_dvp);
1160		error = EEXIST;
1161		goto out;
1162	}
1163	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1164		NDFREE(&nd, NDF_ONLY_PNBUF);
1165		vput(nd.ni_dvp);
1166		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1167			return (error);
1168		goto restart;
1169	}
1170	VATTR_NULL(&vattr);
1171	FILEDESC_LOCK(td->td_proc->p_fd);
1172	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1173	FILEDESC_UNLOCK(td->td_proc->p_fd);
1174#ifdef MAC
1175	vattr.va_type = VLNK;
1176	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1177	    &vattr);
1178	if (error)
1179		goto out2;
1180#endif
1181	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1182	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1183	if (error == 0)
1184		vput(nd.ni_vp);
1185#ifdef MAC
1186out2:
1187#endif
1188	NDFREE(&nd, NDF_ONLY_PNBUF);
1189	vput(nd.ni_dvp);
1190	vn_finished_write(mp);
1191	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1192	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1193out:
1194	if (segflg != UIO_SYSSPACE)
1195		uma_zfree(namei_zone, syspath);
1196	return (error);
1197}
1198
1199/*
1200 * Delete a whiteout from the filesystem.
1201 */
1202/* ARGSUSED */
1203int
1204undelete(td, uap)
1205	struct thread *td;
1206	register struct undelete_args /* {
1207		char *path;
1208	} */ *uap;
1209{
1210	int error;
1211	struct mount *mp;
1212	struct nameidata nd;
1213
1214restart:
1215	bwillwrite();
1216	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1217	    uap->path, td);
1218	error = namei(&nd);
1219	if (error)
1220		return (error);
1221
1222	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1223		NDFREE(&nd, NDF_ONLY_PNBUF);
1224		if (nd.ni_vp)
1225			vrele(nd.ni_vp);
1226		vput(nd.ni_dvp);
1227		return (EEXIST);
1228	}
1229	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1230		NDFREE(&nd, NDF_ONLY_PNBUF);
1231		vput(nd.ni_dvp);
1232		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1233			return (error);
1234		goto restart;
1235	}
1236	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1237	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1238	NDFREE(&nd, NDF_ONLY_PNBUF);
1239	vput(nd.ni_dvp);
1240	vn_finished_write(mp);
1241	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1242	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1243	return (error);
1244}
1245
1246/*
1247 * Delete a name from the filesystem.
1248 */
1249#ifndef _SYS_SYSPROTO_H_
1250struct unlink_args {
1251	char	*path;
1252};
1253#endif
1254/* ARGSUSED */
1255int
1256unlink(td, uap)
1257	struct thread *td;
1258	struct unlink_args /* {
1259		char *path;
1260	} */ *uap;
1261{
1262
1263	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1264}
1265
1266int
1267kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1268{
1269	struct mount *mp;
1270	struct vnode *vp;
1271	int error;
1272	struct nameidata nd;
1273
1274restart:
1275	bwillwrite();
1276	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1277	if ((error = namei(&nd)) != 0)
1278		return (error);
1279	vp = nd.ni_vp;
1280	if (vp->v_type == VDIR)
1281		error = EPERM;		/* POSIX */
1282	else {
1283		/*
1284		 * The root of a mounted filesystem cannot be deleted.
1285		 *
1286		 * XXX: can this only be a VDIR case?
1287		 */
1288		if (vp->v_vflag & VV_ROOT)
1289			error = EBUSY;
1290	}
1291	if (error == 0) {
1292		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1293			NDFREE(&nd, NDF_ONLY_PNBUF);
1294			if (vp == nd.ni_dvp)
1295				vrele(vp);
1296			else
1297				vput(vp);
1298			vput(nd.ni_dvp);
1299			if ((error = vn_start_write(NULL, &mp,
1300			    V_XSLEEP | PCATCH)) != 0)
1301				return (error);
1302			goto restart;
1303		}
1304#ifdef MAC
1305		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1306		    &nd.ni_cnd);
1307		if (error)
1308			goto out;
1309#endif
1310		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1311		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1312#ifdef MAC
1313out:
1314#endif
1315		vn_finished_write(mp);
1316	}
1317	NDFREE(&nd, NDF_ONLY_PNBUF);
1318	if (vp == nd.ni_dvp)
1319		vrele(vp);
1320	else
1321		vput(vp);
1322	vput(nd.ni_dvp);
1323	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1324	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1325	return (error);
1326}
1327
1328/*
1329 * Reposition read/write file offset.
1330 */
1331#ifndef _SYS_SYSPROTO_H_
1332struct lseek_args {
1333	int	fd;
1334	int	pad;
1335	off_t	offset;
1336	int	whence;
1337};
1338#endif
1339int
1340lseek(td, uap)
1341	struct thread *td;
1342	register struct lseek_args /* {
1343		int fd;
1344		int pad;
1345		off_t offset;
1346		int whence;
1347	} */ *uap;
1348{
1349	struct ucred *cred = td->td_ucred;
1350	struct file *fp;
1351	struct vnode *vp;
1352	struct vattr vattr;
1353	off_t offset;
1354	int error, noneg;
1355
1356	if ((error = fget(td, uap->fd, &fp)) != 0)
1357		return (error);
1358	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1359		fdrop(fp, td);
1360		return (ESPIPE);
1361	}
1362	vp = fp->f_vnode;
1363	noneg = (vp->v_type != VCHR);
1364	offset = uap->offset;
1365	switch (uap->whence) {
1366	case L_INCR:
1367		if (noneg &&
1368		    (fp->f_offset < 0 ||
1369		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1370			error = EOVERFLOW;
1371			break;
1372		}
1373		offset += fp->f_offset;
1374		break;
1375	case L_XTND:
1376		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1377		error = VOP_GETATTR(vp, &vattr, cred, td);
1378		VOP_UNLOCK(vp, 0, td);
1379		if (error)
1380			break;
1381		if (noneg &&
1382		    (vattr.va_size > OFF_MAX ||
1383		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1384			error = EOVERFLOW;
1385			break;
1386		}
1387		offset += vattr.va_size;
1388		break;
1389	case L_SET:
1390		break;
1391	default:
1392		error = EINVAL;
1393	}
1394	if (error == 0 && noneg && offset < 0)
1395		error = EINVAL;
1396	if (error != 0) {
1397		fdrop(fp, td);
1398		return (error);
1399	}
1400	fp->f_offset = offset;
1401	*(off_t *)(td->td_retval) = fp->f_offset;
1402	fdrop(fp, td);
1403	return (0);
1404}
1405
1406#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1407/*
1408 * Reposition read/write file offset.
1409 */
1410#ifndef _SYS_SYSPROTO_H_
1411struct olseek_args {
1412	int	fd;
1413	long	offset;
1414	int	whence;
1415};
1416#endif
1417int
1418olseek(td, uap)
1419	struct thread *td;
1420	register struct olseek_args /* {
1421		int fd;
1422		long offset;
1423		int whence;
1424	} */ *uap;
1425{
1426	struct lseek_args /* {
1427		int fd;
1428		int pad;
1429		off_t offset;
1430		int whence;
1431	} */ nuap;
1432	int error;
1433
1434	nuap.fd = uap->fd;
1435	nuap.offset = uap->offset;
1436	nuap.whence = uap->whence;
1437	error = lseek(td, &nuap);
1438	return (error);
1439}
1440#endif /* COMPAT_43 */
1441
1442/*
1443 * Check access permissions using passed credentials.
1444 */
1445static int
1446vn_access(vp, user_flags, cred, td)
1447	struct vnode	*vp;
1448	int		user_flags;
1449	struct ucred	*cred;
1450	struct thread	*td;
1451{
1452	int error, flags;
1453
1454	/* Flags == 0 means only check for existence. */
1455	error = 0;
1456	if (user_flags) {
1457		flags = 0;
1458		if (user_flags & R_OK)
1459			flags |= VREAD;
1460		if (user_flags & W_OK)
1461			flags |= VWRITE;
1462		if (user_flags & X_OK)
1463			flags |= VEXEC;
1464#ifdef MAC
1465		error = mac_check_vnode_access(cred, vp, flags);
1466		if (error)
1467			return (error);
1468#endif
1469		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1470			error = VOP_ACCESS(vp, flags, cred, td);
1471	}
1472	return (error);
1473}
1474
1475/*
1476 * Check access permissions using "real" credentials.
1477 */
1478#ifndef _SYS_SYSPROTO_H_
1479struct access_args {
1480	char	*path;
1481	int	flags;
1482};
1483#endif
1484int
1485access(td, uap)
1486	struct thread *td;
1487	register struct access_args /* {
1488		char *path;
1489		int flags;
1490	} */ *uap;
1491{
1492
1493	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1494}
1495
1496int
1497kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1498{
1499	struct ucred *cred, *tmpcred;
1500	register struct vnode *vp;
1501	int error;
1502	struct nameidata nd;
1503
1504	/*
1505	 * Create and modify a temporary credential instead of one that
1506	 * is potentially shared.  This could also mess up socket
1507	 * buffer accounting which can run in an interrupt context.
1508	 *
1509	 * XXX - Depending on how "threads" are finally implemented, it
1510	 * may be better to explicitly pass the credential to namei()
1511	 * rather than to modify the potentially shared process structure.
1512	 */
1513	cred = td->td_ucred;
1514	tmpcred = crdup(cred);
1515	tmpcred->cr_uid = cred->cr_ruid;
1516	tmpcred->cr_groups[0] = cred->cr_rgid;
1517	td->td_ucred = tmpcred;
1518	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1519	if ((error = namei(&nd)) != 0)
1520		goto out1;
1521	vp = nd.ni_vp;
1522
1523	error = vn_access(vp, flags, tmpcred, td);
1524	NDFREE(&nd, NDF_ONLY_PNBUF);
1525	vput(vp);
1526out1:
1527	td->td_ucred = cred;
1528	crfree(tmpcred);
1529	return (error);
1530}
1531
1532/*
1533 * Check access permissions using "effective" credentials.
1534 */
1535#ifndef _SYS_SYSPROTO_H_
1536struct eaccess_args {
1537	char	*path;
1538	int	flags;
1539};
1540#endif
1541int
1542eaccess(td, uap)
1543	struct thread *td;
1544	register struct eaccess_args /* {
1545		char *path;
1546		int flags;
1547	} */ *uap;
1548{
1549	struct nameidata nd;
1550	struct vnode *vp;
1551	int error;
1552
1553	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1554	    uap->path, td);
1555	if ((error = namei(&nd)) != 0)
1556		return (error);
1557	vp = nd.ni_vp;
1558
1559	error = vn_access(vp, uap->flags, td->td_ucred, td);
1560	NDFREE(&nd, NDF_ONLY_PNBUF);
1561	vput(vp);
1562	return (error);
1563}
1564
1565#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1566/*
1567 * Get file status; this version follows links.
1568 */
1569#ifndef _SYS_SYSPROTO_H_
1570struct ostat_args {
1571	char	*path;
1572	struct ostat *ub;
1573};
1574#endif
1575/* ARGSUSED */
1576int
1577ostat(td, uap)
1578	struct thread *td;
1579	register struct ostat_args /* {
1580		char *path;
1581		struct ostat *ub;
1582	} */ *uap;
1583{
1584	struct stat sb;
1585	struct ostat osb;
1586	int error;
1587	struct nameidata nd;
1588
1589	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1590	    uap->path, td);
1591	if ((error = namei(&nd)) != 0)
1592		return (error);
1593	NDFREE(&nd, NDF_ONLY_PNBUF);
1594	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1595	vput(nd.ni_vp);
1596	if (error)
1597		return (error);
1598	cvtstat(&sb, &osb);
1599	error = copyout(&osb, uap->ub, sizeof (osb));
1600	return (error);
1601}
1602
1603/*
1604 * Get file status; this version does not follow links.
1605 */
1606#ifndef _SYS_SYSPROTO_H_
1607struct olstat_args {
1608	char	*path;
1609	struct ostat *ub;
1610};
1611#endif
1612/* ARGSUSED */
1613int
1614olstat(td, uap)
1615	struct thread *td;
1616	register struct olstat_args /* {
1617		char *path;
1618		struct ostat *ub;
1619	} */ *uap;
1620{
1621	struct vnode *vp;
1622	struct stat sb;
1623	struct ostat osb;
1624	int error;
1625	struct nameidata nd;
1626
1627	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1628	    uap->path, td);
1629	if ((error = namei(&nd)) != 0)
1630		return (error);
1631	vp = nd.ni_vp;
1632	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1633	NDFREE(&nd, NDF_ONLY_PNBUF);
1634	vput(vp);
1635	if (error)
1636		return (error);
1637	cvtstat(&sb, &osb);
1638	error = copyout(&osb, uap->ub, sizeof (osb));
1639	return (error);
1640}
1641
1642/*
1643 * Convert from an old to a new stat structure.
1644 */
1645void
1646cvtstat(st, ost)
1647	struct stat *st;
1648	struct ostat *ost;
1649{
1650
1651	ost->st_dev = st->st_dev;
1652	ost->st_ino = st->st_ino;
1653	ost->st_mode = st->st_mode;
1654	ost->st_nlink = st->st_nlink;
1655	ost->st_uid = st->st_uid;
1656	ost->st_gid = st->st_gid;
1657	ost->st_rdev = st->st_rdev;
1658	if (st->st_size < (quad_t)1 << 32)
1659		ost->st_size = st->st_size;
1660	else
1661		ost->st_size = -2;
1662	ost->st_atime = st->st_atime;
1663	ost->st_mtime = st->st_mtime;
1664	ost->st_ctime = st->st_ctime;
1665	ost->st_blksize = st->st_blksize;
1666	ost->st_blocks = st->st_blocks;
1667	ost->st_flags = st->st_flags;
1668	ost->st_gen = st->st_gen;
1669}
1670#endif /* COMPAT_43 || COMPAT_SUNOS */
1671
1672/*
1673 * Get file status; this version follows links.
1674 */
1675#ifndef _SYS_SYSPROTO_H_
1676struct stat_args {
1677	char	*path;
1678	struct stat *ub;
1679};
1680#endif
1681/* ARGSUSED */
1682int
1683stat(td, uap)
1684	struct thread *td;
1685	register struct stat_args /* {
1686		char *path;
1687		struct stat *ub;
1688	} */ *uap;
1689{
1690	struct stat sb;
1691	int error;
1692	struct nameidata nd;
1693
1694#ifdef LOOKUP_SHARED
1695	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1696	    UIO_USERSPACE, uap->path, td);
1697#else
1698	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1699	    uap->path, td);
1700#endif
1701	if ((error = namei(&nd)) != 0)
1702		return (error);
1703	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1704	NDFREE(&nd, NDF_ONLY_PNBUF);
1705	vput(nd.ni_vp);
1706	if (error)
1707		return (error);
1708	error = copyout(&sb, uap->ub, sizeof (sb));
1709	return (error);
1710}
1711
1712/*
1713 * Get file status; this version does not follow links.
1714 */
1715#ifndef _SYS_SYSPROTO_H_
1716struct lstat_args {
1717	char	*path;
1718	struct stat *ub;
1719};
1720#endif
1721/* ARGSUSED */
1722int
1723lstat(td, uap)
1724	struct thread *td;
1725	register struct lstat_args /* {
1726		char *path;
1727		struct stat *ub;
1728	} */ *uap;
1729{
1730	int error;
1731	struct vnode *vp;
1732	struct stat sb;
1733	struct nameidata nd;
1734
1735	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1736	    uap->path, td);
1737	if ((error = namei(&nd)) != 0)
1738		return (error);
1739	vp = nd.ni_vp;
1740	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1741	NDFREE(&nd, NDF_ONLY_PNBUF);
1742	vput(vp);
1743	if (error)
1744		return (error);
1745	error = copyout(&sb, uap->ub, sizeof (sb));
1746	return (error);
1747}
1748
1749/*
1750 * Implementation of the NetBSD stat() function.
1751 * XXX This should probably be collapsed with the FreeBSD version,
1752 * as the differences are only due to vn_stat() clearing spares at
1753 * the end of the structures.  vn_stat could be split to avoid this,
1754 * and thus collapse the following to close to zero code.
1755 */
1756void
1757cvtnstat(sb, nsb)
1758	struct stat *sb;
1759	struct nstat *nsb;
1760{
1761	bzero(nsb, sizeof *nsb);
1762	nsb->st_dev = sb->st_dev;
1763	nsb->st_ino = sb->st_ino;
1764	nsb->st_mode = sb->st_mode;
1765	nsb->st_nlink = sb->st_nlink;
1766	nsb->st_uid = sb->st_uid;
1767	nsb->st_gid = sb->st_gid;
1768	nsb->st_rdev = sb->st_rdev;
1769	nsb->st_atimespec = sb->st_atimespec;
1770	nsb->st_mtimespec = sb->st_mtimespec;
1771	nsb->st_ctimespec = sb->st_ctimespec;
1772	nsb->st_size = sb->st_size;
1773	nsb->st_blocks = sb->st_blocks;
1774	nsb->st_blksize = sb->st_blksize;
1775	nsb->st_flags = sb->st_flags;
1776	nsb->st_gen = sb->st_gen;
1777	nsb->st_birthtimespec = sb->st_birthtimespec;
1778}
1779
1780#ifndef _SYS_SYSPROTO_H_
1781struct nstat_args {
1782	char	*path;
1783	struct nstat *ub;
1784};
1785#endif
1786/* ARGSUSED */
1787int
1788nstat(td, uap)
1789	struct thread *td;
1790	register struct nstat_args /* {
1791		char *path;
1792		struct nstat *ub;
1793	} */ *uap;
1794{
1795	struct stat sb;
1796	struct nstat nsb;
1797	int error;
1798	struct nameidata nd;
1799
1800	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1801	    uap->path, td);
1802	if ((error = namei(&nd)) != 0)
1803		return (error);
1804	NDFREE(&nd, NDF_ONLY_PNBUF);
1805	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1806	vput(nd.ni_vp);
1807	if (error)
1808		return (error);
1809	cvtnstat(&sb, &nsb);
1810	error = copyout(&nsb, uap->ub, sizeof (nsb));
1811	return (error);
1812}
1813
1814/*
1815 * NetBSD lstat.  Get file status; this version does not follow links.
1816 */
1817#ifndef _SYS_SYSPROTO_H_
1818struct lstat_args {
1819	char	*path;
1820	struct stat *ub;
1821};
1822#endif
1823/* ARGSUSED */
1824int
1825nlstat(td, uap)
1826	struct thread *td;
1827	register struct nlstat_args /* {
1828		char *path;
1829		struct nstat *ub;
1830	} */ *uap;
1831{
1832	int error;
1833	struct vnode *vp;
1834	struct stat sb;
1835	struct nstat nsb;
1836	struct nameidata nd;
1837
1838	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1839	    uap->path, td);
1840	if ((error = namei(&nd)) != 0)
1841		return (error);
1842	vp = nd.ni_vp;
1843	NDFREE(&nd, NDF_ONLY_PNBUF);
1844	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1845	vput(vp);
1846	if (error)
1847		return (error);
1848	cvtnstat(&sb, &nsb);
1849	error = copyout(&nsb, uap->ub, sizeof (nsb));
1850	return (error);
1851}
1852
1853/*
1854 * Get configurable pathname variables.
1855 */
1856#ifndef _SYS_SYSPROTO_H_
1857struct pathconf_args {
1858	char	*path;
1859	int	name;
1860};
1861#endif
1862/* ARGSUSED */
1863int
1864pathconf(td, uap)
1865	struct thread *td;
1866	register struct pathconf_args /* {
1867		char *path;
1868		int name;
1869	} */ *uap;
1870{
1871	int error;
1872	struct nameidata nd;
1873
1874	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1875	    uap->path, td);
1876	if ((error = namei(&nd)) != 0)
1877		return (error);
1878	NDFREE(&nd, NDF_ONLY_PNBUF);
1879
1880	/* If asynchronous I/O is available, it works for all files. */
1881	if (uap->name == _PC_ASYNC_IO)
1882		td->td_retval[0] = async_io_version;
1883	else
1884		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1885	vput(nd.ni_vp);
1886	return (error);
1887}
1888
1889/*
1890 * Return target name of a symbolic link.
1891 */
1892#ifndef _SYS_SYSPROTO_H_
1893struct readlink_args {
1894	char	*path;
1895	char	*buf;
1896	int	count;
1897};
1898#endif
1899/* ARGSUSED */
1900int
1901readlink(td, uap)
1902	struct thread *td;
1903	register struct readlink_args /* {
1904		char *path;
1905		char *buf;
1906		int count;
1907	} */ *uap;
1908{
1909
1910	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1911	    UIO_USERSPACE, uap->count));
1912}
1913
1914int
1915kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1916    enum uio_seg bufseg, int count)
1917{
1918	register struct vnode *vp;
1919	struct iovec aiov;
1920	struct uio auio;
1921	int error;
1922	struct nameidata nd;
1923
1924	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1925	if ((error = namei(&nd)) != 0)
1926		return (error);
1927	NDFREE(&nd, NDF_ONLY_PNBUF);
1928	vp = nd.ni_vp;
1929#ifdef MAC
1930	error = mac_check_vnode_readlink(td->td_ucred, vp);
1931	if (error) {
1932		vput(vp);
1933		return (error);
1934	}
1935#endif
1936	if (vp->v_type != VLNK)
1937		error = EINVAL;
1938	else {
1939		aiov.iov_base = buf;
1940		aiov.iov_len = count;
1941		auio.uio_iov = &aiov;
1942		auio.uio_iovcnt = 1;
1943		auio.uio_offset = 0;
1944		auio.uio_rw = UIO_READ;
1945		auio.uio_segflg = bufseg;
1946		auio.uio_td = td;
1947		auio.uio_resid = count;
1948		error = VOP_READLINK(vp, &auio, td->td_ucred);
1949	}
1950	vput(vp);
1951	td->td_retval[0] = count - auio.uio_resid;
1952	return (error);
1953}
1954
1955/*
1956 * Common implementation code for chflags() and fchflags().
1957 */
1958static int
1959setfflags(td, vp, flags)
1960	struct thread *td;
1961	struct vnode *vp;
1962	int flags;
1963{
1964	int error;
1965	struct mount *mp;
1966	struct vattr vattr;
1967
1968	/*
1969	 * Prevent non-root users from setting flags on devices.  When
1970	 * a device is reused, users can retain ownership of the device
1971	 * if they are allowed to set flags and programs assume that
1972	 * chown can't fail when done as root.
1973	 */
1974	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1975		error = suser_cred(td->td_ucred, PRISON_ROOT);
1976		if (error)
1977			return (error);
1978	}
1979
1980	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1981		return (error);
1982	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1983	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1984	VATTR_NULL(&vattr);
1985	vattr.va_flags = flags;
1986#ifdef MAC
1987	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1988	if (error == 0)
1989#endif
1990		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1991	VOP_UNLOCK(vp, 0, td);
1992	vn_finished_write(mp);
1993	return (error);
1994}
1995
1996/*
1997 * Change flags of a file given a path name.
1998 */
1999#ifndef _SYS_SYSPROTO_H_
2000struct chflags_args {
2001	char	*path;
2002	int	flags;
2003};
2004#endif
2005/* ARGSUSED */
2006int
2007chflags(td, uap)
2008	struct thread *td;
2009	register struct chflags_args /* {
2010		char *path;
2011		int flags;
2012	} */ *uap;
2013{
2014	int error;
2015	struct nameidata nd;
2016
2017	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
2018	if ((error = namei(&nd)) != 0)
2019		return (error);
2020	NDFREE(&nd, NDF_ONLY_PNBUF);
2021	error = setfflags(td, nd.ni_vp, uap->flags);
2022	vrele(nd.ni_vp);
2023	return error;
2024}
2025
2026/*
2027 * Same as chflags() but doesn't follow symlinks.
2028 */
2029int
2030lchflags(td, uap)
2031	struct thread *td;
2032	register struct lchflags_args /* {
2033		char *path;
2034		int flags;
2035	} */ *uap;
2036{
2037	int error;
2038	struct nameidata nd;
2039
2040	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2041	if ((error = namei(&nd)) != 0)
2042		return (error);
2043	NDFREE(&nd, NDF_ONLY_PNBUF);
2044	error = setfflags(td, nd.ni_vp, uap->flags);
2045	vrele(nd.ni_vp);
2046	return error;
2047}
2048
2049/*
2050 * Change flags of a file given a file descriptor.
2051 */
2052#ifndef _SYS_SYSPROTO_H_
2053struct fchflags_args {
2054	int	fd;
2055	int	flags;
2056};
2057#endif
2058/* ARGSUSED */
2059int
2060fchflags(td, uap)
2061	struct thread *td;
2062	register struct fchflags_args /* {
2063		int fd;
2064		int flags;
2065	} */ *uap;
2066{
2067	struct file *fp;
2068	int error;
2069
2070	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2071		return (error);
2072	error = setfflags(td, fp->f_vnode, uap->flags);
2073	fdrop(fp, td);
2074	return (error);
2075}
2076
2077/*
2078 * Common implementation code for chmod(), lchmod() and fchmod().
2079 */
2080static int
2081setfmode(td, vp, mode)
2082	struct thread *td;
2083	struct vnode *vp;
2084	int mode;
2085{
2086	int error;
2087	struct mount *mp;
2088	struct vattr vattr;
2089
2090	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2091		return (error);
2092	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2093	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2094	VATTR_NULL(&vattr);
2095	vattr.va_mode = mode & ALLPERMS;
2096#ifdef MAC
2097	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2098	if (error == 0)
2099#endif
2100		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2101	VOP_UNLOCK(vp, 0, td);
2102	vn_finished_write(mp);
2103	return error;
2104}
2105
2106/*
2107 * Change mode of a file given path name.
2108 */
2109#ifndef _SYS_SYSPROTO_H_
2110struct chmod_args {
2111	char	*path;
2112	int	mode;
2113};
2114#endif
2115/* ARGSUSED */
2116int
2117chmod(td, uap)
2118	struct thread *td;
2119	register struct chmod_args /* {
2120		char *path;
2121		int mode;
2122	} */ *uap;
2123{
2124
2125	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2126}
2127
2128int
2129kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2130{
2131	int error;
2132	struct nameidata nd;
2133
2134	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2135	if ((error = namei(&nd)) != 0)
2136		return (error);
2137	NDFREE(&nd, NDF_ONLY_PNBUF);
2138	error = setfmode(td, nd.ni_vp, mode);
2139	vrele(nd.ni_vp);
2140	return error;
2141}
2142
2143/*
2144 * Change mode of a file given path name (don't follow links.)
2145 */
2146#ifndef _SYS_SYSPROTO_H_
2147struct lchmod_args {
2148	char	*path;
2149	int	mode;
2150};
2151#endif
2152/* ARGSUSED */
2153int
2154lchmod(td, uap)
2155	struct thread *td;
2156	register struct lchmod_args /* {
2157		char *path;
2158		int mode;
2159	} */ *uap;
2160{
2161	int error;
2162	struct nameidata nd;
2163
2164	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2165	if ((error = namei(&nd)) != 0)
2166		return (error);
2167	NDFREE(&nd, NDF_ONLY_PNBUF);
2168	error = setfmode(td, nd.ni_vp, uap->mode);
2169	vrele(nd.ni_vp);
2170	return error;
2171}
2172
2173/*
2174 * Change mode of a file given a file descriptor.
2175 */
2176#ifndef _SYS_SYSPROTO_H_
2177struct fchmod_args {
2178	int	fd;
2179	int	mode;
2180};
2181#endif
2182/* ARGSUSED */
2183int
2184fchmod(td, uap)
2185	struct thread *td;
2186	register struct fchmod_args /* {
2187		int fd;
2188		int mode;
2189	} */ *uap;
2190{
2191	struct file *fp;
2192	int error;
2193
2194	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2195		return (error);
2196	error = setfmode(td, fp->f_vnode, uap->mode);
2197	fdrop(fp, td);
2198	return (error);
2199}
2200
2201/*
2202 * Common implementation for chown(), lchown(), and fchown()
2203 */
2204static int
2205setfown(td, vp, uid, gid)
2206	struct thread *td;
2207	struct vnode *vp;
2208	uid_t uid;
2209	gid_t gid;
2210{
2211	int error;
2212	struct mount *mp;
2213	struct vattr vattr;
2214
2215	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2216		return (error);
2217	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2218	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2219	VATTR_NULL(&vattr);
2220	vattr.va_uid = uid;
2221	vattr.va_gid = gid;
2222#ifdef MAC
2223	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2224	    vattr.va_gid);
2225	if (error == 0)
2226#endif
2227		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2228	VOP_UNLOCK(vp, 0, td);
2229	vn_finished_write(mp);
2230	return error;
2231}
2232
2233/*
2234 * Set ownership given a path name.
2235 */
2236#ifndef _SYS_SYSPROTO_H_
2237struct chown_args {
2238	char	*path;
2239	int	uid;
2240	int	gid;
2241};
2242#endif
2243/* ARGSUSED */
2244int
2245chown(td, uap)
2246	struct thread *td;
2247	register struct chown_args /* {
2248		char *path;
2249		int uid;
2250		int gid;
2251	} */ *uap;
2252{
2253
2254	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2255}
2256
2257int
2258kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2259    int gid)
2260{
2261	int error;
2262	struct nameidata nd;
2263
2264	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2265	if ((error = namei(&nd)) != 0)
2266		return (error);
2267	NDFREE(&nd, NDF_ONLY_PNBUF);
2268	error = setfown(td, nd.ni_vp, uid, gid);
2269	vrele(nd.ni_vp);
2270	return (error);
2271}
2272
2273/*
2274 * Set ownership given a path name, do not cross symlinks.
2275 */
2276#ifndef _SYS_SYSPROTO_H_
2277struct lchown_args {
2278	char	*path;
2279	int	uid;
2280	int	gid;
2281};
2282#endif
2283/* ARGSUSED */
2284int
2285lchown(td, uap)
2286	struct thread *td;
2287	register struct lchown_args /* {
2288		char *path;
2289		int uid;
2290		int gid;
2291	} */ *uap;
2292{
2293
2294	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2295}
2296
2297int
2298kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2299    int gid)
2300{
2301	int error;
2302	struct nameidata nd;
2303
2304	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2305	if ((error = namei(&nd)) != 0)
2306		return (error);
2307	NDFREE(&nd, NDF_ONLY_PNBUF);
2308	error = setfown(td, nd.ni_vp, uid, gid);
2309	vrele(nd.ni_vp);
2310	return (error);
2311}
2312
2313/*
2314 * Set ownership given a file descriptor.
2315 */
2316#ifndef _SYS_SYSPROTO_H_
2317struct fchown_args {
2318	int	fd;
2319	int	uid;
2320	int	gid;
2321};
2322#endif
2323/* ARGSUSED */
2324int
2325fchown(td, uap)
2326	struct thread *td;
2327	register struct fchown_args /* {
2328		int fd;
2329		int uid;
2330		int gid;
2331	} */ *uap;
2332{
2333	struct file *fp;
2334	int error;
2335
2336	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2337		return (error);
2338	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2339	fdrop(fp, td);
2340	return (error);
2341}
2342
2343/*
2344 * Common implementation code for utimes(), lutimes(), and futimes().
2345 */
2346static int
2347getutimes(usrtvp, tvpseg, tsp)
2348	const struct timeval *usrtvp;
2349	enum uio_seg tvpseg;
2350	struct timespec *tsp;
2351{
2352	struct timeval tv[2];
2353	const struct timeval *tvp;
2354	int error;
2355
2356	if (usrtvp == NULL) {
2357		microtime(&tv[0]);
2358		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2359		tsp[1] = tsp[0];
2360	} else {
2361		if (tvpseg == UIO_SYSSPACE) {
2362			tvp = usrtvp;
2363		} else {
2364			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2365				return (error);
2366			tvp = tv;
2367		}
2368
2369		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2370		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2371	}
2372	return 0;
2373}
2374
2375/*
2376 * Common implementation code for utimes(), lutimes(), and futimes().
2377 */
2378static int
2379setutimes(td, vp, ts, numtimes, nullflag)
2380	struct thread *td;
2381	struct vnode *vp;
2382	const struct timespec *ts;
2383	int numtimes;
2384	int nullflag;
2385{
2386	int error, setbirthtime;
2387	struct mount *mp;
2388	struct vattr vattr;
2389
2390	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2391		return (error);
2392	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2393	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2394	setbirthtime = 0;
2395	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2396	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2397		setbirthtime = 1;
2398	VATTR_NULL(&vattr);
2399	vattr.va_atime = ts[0];
2400	vattr.va_mtime = ts[1];
2401	if (setbirthtime)
2402		vattr.va_birthtime = ts[1];
2403	if (numtimes > 2)
2404		vattr.va_birthtime = ts[2];
2405	if (nullflag)
2406		vattr.va_vaflags |= VA_UTIMES_NULL;
2407#ifdef MAC
2408	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2409	    vattr.va_mtime);
2410#endif
2411	if (error == 0)
2412		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2413	VOP_UNLOCK(vp, 0, td);
2414	vn_finished_write(mp);
2415	return error;
2416}
2417
2418/*
2419 * Set the access and modification times of a file.
2420 */
2421#ifndef _SYS_SYSPROTO_H_
2422struct utimes_args {
2423	char	*path;
2424	struct	timeval *tptr;
2425};
2426#endif
2427/* ARGSUSED */
2428int
2429utimes(td, uap)
2430	struct thread *td;
2431	register struct utimes_args /* {
2432		char *path;
2433		struct timeval *tptr;
2434	} */ *uap;
2435{
2436
2437	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2438	    UIO_USERSPACE));
2439}
2440
2441int
2442kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2443    struct timeval *tptr, enum uio_seg tptrseg)
2444{
2445	struct timespec ts[2];
2446	int error;
2447	struct nameidata nd;
2448
2449	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2450		return (error);
2451	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2452	if ((error = namei(&nd)) != 0)
2453		return (error);
2454	NDFREE(&nd, NDF_ONLY_PNBUF);
2455	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2456	vrele(nd.ni_vp);
2457	return (error);
2458}
2459
2460/*
2461 * Set the access and modification times of a file.
2462 */
2463#ifndef _SYS_SYSPROTO_H_
2464struct lutimes_args {
2465	char	*path;
2466	struct	timeval *tptr;
2467};
2468#endif
2469/* ARGSUSED */
2470int
2471lutimes(td, uap)
2472	struct thread *td;
2473	register struct lutimes_args /* {
2474		char *path;
2475		struct timeval *tptr;
2476	} */ *uap;
2477{
2478
2479	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2480	    UIO_USERSPACE));
2481}
2482
2483int
2484kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2485    struct timeval *tptr, enum uio_seg tptrseg)
2486{
2487	struct timespec ts[2];
2488	int error;
2489	struct nameidata nd;
2490
2491	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2492		return (error);
2493	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2494	if ((error = namei(&nd)) != 0)
2495		return (error);
2496	NDFREE(&nd, NDF_ONLY_PNBUF);
2497	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2498	vrele(nd.ni_vp);
2499	return (error);
2500}
2501
2502/*
2503 * Set the access and modification times of a file.
2504 */
2505#ifndef _SYS_SYSPROTO_H_
2506struct futimes_args {
2507	int	fd;
2508	struct	timeval *tptr;
2509};
2510#endif
2511/* ARGSUSED */
2512int
2513futimes(td, uap)
2514	struct thread *td;
2515	register struct futimes_args /* {
2516		int  fd;
2517		struct timeval *tptr;
2518	} */ *uap;
2519{
2520
2521	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2522}
2523
2524int
2525kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2526    enum uio_seg tptrseg)
2527{
2528	struct timespec ts[2];
2529	struct file *fp;
2530	int error;
2531
2532	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2533		return (error);
2534	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2535		return (error);
2536	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2537	fdrop(fp, td);
2538	return (error);
2539}
2540
2541/*
2542 * Truncate a file given its path name.
2543 */
2544#ifndef _SYS_SYSPROTO_H_
2545struct truncate_args {
2546	char	*path;
2547	int	pad;
2548	off_t	length;
2549};
2550#endif
2551/* ARGSUSED */
2552int
2553truncate(td, uap)
2554	struct thread *td;
2555	register struct truncate_args /* {
2556		char *path;
2557		int pad;
2558		off_t length;
2559	} */ *uap;
2560{
2561
2562	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2563}
2564
2565int
2566kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2567{
2568	struct mount *mp;
2569	struct vnode *vp;
2570	struct vattr vattr;
2571	int error;
2572	struct nameidata nd;
2573
2574	if (length < 0)
2575		return(EINVAL);
2576	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2577	if ((error = namei(&nd)) != 0)
2578		return (error);
2579	vp = nd.ni_vp;
2580	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2581		vrele(vp);
2582		return (error);
2583	}
2584	NDFREE(&nd, NDF_ONLY_PNBUF);
2585	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2586	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2587	if (vp->v_type == VDIR)
2588		error = EISDIR;
2589#ifdef MAC
2590	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2591	}
2592#endif
2593	else if ((error = vn_writechk(vp)) == 0 &&
2594	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2595		VATTR_NULL(&vattr);
2596		vattr.va_size = length;
2597		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2598	}
2599	vput(vp);
2600	vn_finished_write(mp);
2601	return (error);
2602}
2603
2604/*
2605 * Truncate a file given a file descriptor.
2606 */
2607#ifndef _SYS_SYSPROTO_H_
2608struct ftruncate_args {
2609	int	fd;
2610	int	pad;
2611	off_t	length;
2612};
2613#endif
2614/* ARGSUSED */
2615int
2616ftruncate(td, uap)
2617	struct thread *td;
2618	register struct ftruncate_args /* {
2619		int fd;
2620		int pad;
2621		off_t length;
2622	} */ *uap;
2623{
2624	struct mount *mp;
2625	struct vattr vattr;
2626	struct vnode *vp;
2627	struct file *fp;
2628	int error;
2629
2630	if (uap->length < 0)
2631		return(EINVAL);
2632	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2633		return (error);
2634	if ((fp->f_flag & FWRITE) == 0) {
2635		fdrop(fp, td);
2636		return (EINVAL);
2637	}
2638	vp = fp->f_vnode;
2639	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2640		fdrop(fp, td);
2641		return (error);
2642	}
2643	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2644	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2645	if (vp->v_type == VDIR)
2646		error = EISDIR;
2647#ifdef MAC
2648	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2649	    vp))) {
2650	}
2651#endif
2652	else if ((error = vn_writechk(vp)) == 0) {
2653		VATTR_NULL(&vattr);
2654		vattr.va_size = uap->length;
2655		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2656	}
2657	VOP_UNLOCK(vp, 0, td);
2658	vn_finished_write(mp);
2659	fdrop(fp, td);
2660	return (error);
2661}
2662
2663#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2664/*
2665 * Truncate a file given its path name.
2666 */
2667#ifndef _SYS_SYSPROTO_H_
2668struct otruncate_args {
2669	char	*path;
2670	long	length;
2671};
2672#endif
2673/* ARGSUSED */
2674int
2675otruncate(td, uap)
2676	struct thread *td;
2677	register struct otruncate_args /* {
2678		char *path;
2679		long length;
2680	} */ *uap;
2681{
2682	struct truncate_args /* {
2683		char *path;
2684		int pad;
2685		off_t length;
2686	} */ nuap;
2687
2688	nuap.path = uap->path;
2689	nuap.length = uap->length;
2690	return (truncate(td, &nuap));
2691}
2692
2693/*
2694 * Truncate a file given a file descriptor.
2695 */
2696#ifndef _SYS_SYSPROTO_H_
2697struct oftruncate_args {
2698	int	fd;
2699	long	length;
2700};
2701#endif
2702/* ARGSUSED */
2703int
2704oftruncate(td, uap)
2705	struct thread *td;
2706	register struct oftruncate_args /* {
2707		int fd;
2708		long length;
2709	} */ *uap;
2710{
2711	struct ftruncate_args /* {
2712		int fd;
2713		int pad;
2714		off_t length;
2715	} */ nuap;
2716
2717	nuap.fd = uap->fd;
2718	nuap.length = uap->length;
2719	return (ftruncate(td, &nuap));
2720}
2721#endif /* COMPAT_43 || COMPAT_SUNOS */
2722
2723/*
2724 * Sync an open file.
2725 */
2726#ifndef _SYS_SYSPROTO_H_
2727struct fsync_args {
2728	int	fd;
2729};
2730#endif
2731/* ARGSUSED */
2732int
2733fsync(td, uap)
2734	struct thread *td;
2735	struct fsync_args /* {
2736		int fd;
2737	} */ *uap;
2738{
2739	struct vnode *vp;
2740	struct mount *mp;
2741	struct file *fp;
2742	vm_object_t obj;
2743	int error;
2744
2745	GIANT_REQUIRED;
2746
2747	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2748		return (error);
2749	vp = fp->f_vnode;
2750	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2751		fdrop(fp, td);
2752		return (error);
2753	}
2754	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2755	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2756		VM_OBJECT_LOCK(obj);
2757		vm_object_page_clean(obj, 0, 0, 0);
2758		VM_OBJECT_UNLOCK(obj);
2759	}
2760	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2761	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2762	    && softdep_fsync_hook != NULL)
2763		error = (*softdep_fsync_hook)(vp);
2764
2765	VOP_UNLOCK(vp, 0, td);
2766	vn_finished_write(mp);
2767	fdrop(fp, td);
2768	return (error);
2769}
2770
2771/*
2772 * Rename files.  Source and destination must either both be directories,
2773 * or both not be directories.  If target is a directory, it must be empty.
2774 */
2775#ifndef _SYS_SYSPROTO_H_
2776struct rename_args {
2777	char	*from;
2778	char	*to;
2779};
2780#endif
2781/* ARGSUSED */
2782int
2783rename(td, uap)
2784	struct thread *td;
2785	register struct rename_args /* {
2786		char *from;
2787		char *to;
2788	} */ *uap;
2789{
2790
2791	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2792}
2793
2794int
2795kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2796{
2797	struct mount *mp = NULL;
2798	struct vnode *tvp, *fvp, *tdvp;
2799	struct nameidata fromnd, tond;
2800	int error;
2801
2802	bwillwrite();
2803#ifdef MAC
2804	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2805	    from, td);
2806#else
2807	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2808#endif
2809	if ((error = namei(&fromnd)) != 0)
2810		return (error);
2811#ifdef MAC
2812	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2813	    fromnd.ni_vp, &fromnd.ni_cnd);
2814	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2815	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2816#endif
2817	fvp = fromnd.ni_vp;
2818	if (error == 0)
2819		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2820	if (error != 0) {
2821		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2822		vrele(fromnd.ni_dvp);
2823		vrele(fvp);
2824		goto out1;
2825	}
2826	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2827	    NOOBJ, pathseg, to, td);
2828	if (fromnd.ni_vp->v_type == VDIR)
2829		tond.ni_cnd.cn_flags |= WILLBEDIR;
2830	if ((error = namei(&tond)) != 0) {
2831		/* Translate error code for rename("dir1", "dir2/."). */
2832		if (error == EISDIR && fvp->v_type == VDIR)
2833			error = EINVAL;
2834		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2835		vrele(fromnd.ni_dvp);
2836		vrele(fvp);
2837		goto out1;
2838	}
2839	tdvp = tond.ni_dvp;
2840	tvp = tond.ni_vp;
2841	if (tvp != NULL) {
2842		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2843			error = ENOTDIR;
2844			goto out;
2845		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2846			error = EISDIR;
2847			goto out;
2848		}
2849	}
2850	if (fvp == tdvp)
2851		error = EINVAL;
2852	/*
2853	 * If the source is the same as the destination (that is, if they
2854	 * are links to the same vnode), then there is nothing to do.
2855	 */
2856	if (fvp == tvp)
2857		error = -1;
2858#ifdef MAC
2859	else
2860		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2861		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2862#endif
2863out:
2864	if (!error) {
2865		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2866		if (fromnd.ni_dvp != tdvp) {
2867			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2868		}
2869		if (tvp) {
2870			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2871		}
2872		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2873				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2874		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2875		NDFREE(&tond, NDF_ONLY_PNBUF);
2876	} else {
2877		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2878		NDFREE(&tond, NDF_ONLY_PNBUF);
2879		if (tdvp == tvp)
2880			vrele(tdvp);
2881		else
2882			vput(tdvp);
2883		if (tvp)
2884			vput(tvp);
2885		vrele(fromnd.ni_dvp);
2886		vrele(fvp);
2887	}
2888	vrele(tond.ni_startdir);
2889	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2890	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2891	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2892	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2893out1:
2894	vn_finished_write(mp);
2895	if (fromnd.ni_startdir)
2896		vrele(fromnd.ni_startdir);
2897	if (error == -1)
2898		return (0);
2899	return (error);
2900}
2901
2902/*
2903 * Make a directory file.
2904 */
2905#ifndef _SYS_SYSPROTO_H_
2906struct mkdir_args {
2907	char	*path;
2908	int	mode;
2909};
2910#endif
2911/* ARGSUSED */
2912int
2913mkdir(td, uap)
2914	struct thread *td;
2915	register struct mkdir_args /* {
2916		char *path;
2917		int mode;
2918	} */ *uap;
2919{
2920
2921	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2922}
2923
2924int
2925kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2926{
2927	struct mount *mp;
2928	struct vnode *vp;
2929	struct vattr vattr;
2930	int error;
2931	struct nameidata nd;
2932
2933restart:
2934	bwillwrite();
2935	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2936	nd.ni_cnd.cn_flags |= WILLBEDIR;
2937	if ((error = namei(&nd)) != 0)
2938		return (error);
2939	vp = nd.ni_vp;
2940	if (vp != NULL) {
2941		NDFREE(&nd, NDF_ONLY_PNBUF);
2942		vrele(vp);
2943		/*
2944		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2945		 * the strange behaviour of leaving the vnode unlocked
2946		 * if the target is the same vnode as the parent.
2947		 */
2948		if (vp == nd.ni_dvp)
2949			vrele(nd.ni_dvp);
2950		else
2951			vput(nd.ni_dvp);
2952		return (EEXIST);
2953	}
2954	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2955		NDFREE(&nd, NDF_ONLY_PNBUF);
2956		vput(nd.ni_dvp);
2957		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2958			return (error);
2959		goto restart;
2960	}
2961	VATTR_NULL(&vattr);
2962	vattr.va_type = VDIR;
2963	FILEDESC_LOCK(td->td_proc->p_fd);
2964	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2965	FILEDESC_UNLOCK(td->td_proc->p_fd);
2966#ifdef MAC
2967	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2968	    &vattr);
2969	if (error)
2970		goto out;
2971#endif
2972	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2973	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2974#ifdef MAC
2975out:
2976#endif
2977	NDFREE(&nd, NDF_ONLY_PNBUF);
2978	vput(nd.ni_dvp);
2979	if (!error)
2980		vput(nd.ni_vp);
2981	vn_finished_write(mp);
2982	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2983	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2984	return (error);
2985}
2986
2987/*
2988 * Remove a directory file.
2989 */
2990#ifndef _SYS_SYSPROTO_H_
2991struct rmdir_args {
2992	char	*path;
2993};
2994#endif
2995/* ARGSUSED */
2996int
2997rmdir(td, uap)
2998	struct thread *td;
2999	struct rmdir_args /* {
3000		char *path;
3001	} */ *uap;
3002{
3003
3004	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3005}
3006
3007int
3008kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3009{
3010	struct mount *mp;
3011	struct vnode *vp;
3012	int error;
3013	struct nameidata nd;
3014
3015restart:
3016	bwillwrite();
3017	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
3018	if ((error = namei(&nd)) != 0)
3019		return (error);
3020	vp = nd.ni_vp;
3021	if (vp->v_type != VDIR) {
3022		error = ENOTDIR;
3023		goto out;
3024	}
3025	/*
3026	 * No rmdir "." please.
3027	 */
3028	if (nd.ni_dvp == vp) {
3029		error = EINVAL;
3030		goto out;
3031	}
3032	/*
3033	 * The root of a mounted filesystem cannot be deleted.
3034	 */
3035	if (vp->v_vflag & VV_ROOT) {
3036		error = EBUSY;
3037		goto out;
3038	}
3039#ifdef MAC
3040	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3041	    &nd.ni_cnd);
3042	if (error)
3043		goto out;
3044#endif
3045	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3046		NDFREE(&nd, NDF_ONLY_PNBUF);
3047		if (nd.ni_dvp == vp)
3048			vrele(nd.ni_dvp);
3049		else
3050			vput(nd.ni_dvp);
3051		vput(vp);
3052		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3053			return (error);
3054		goto restart;
3055	}
3056	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3057	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3058	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3059	vn_finished_write(mp);
3060out:
3061	NDFREE(&nd, NDF_ONLY_PNBUF);
3062	if (nd.ni_dvp == vp)
3063		vrele(nd.ni_dvp);
3064	else
3065		vput(nd.ni_dvp);
3066	vput(vp);
3067	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3068	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3069	return (error);
3070}
3071
3072#ifdef COMPAT_43
3073/*
3074 * Read a block of directory entries in a filesystem independent format.
3075 */
3076#ifndef _SYS_SYSPROTO_H_
3077struct ogetdirentries_args {
3078	int	fd;
3079	char	*buf;
3080	u_int	count;
3081	long	*basep;
3082};
3083#endif
3084int
3085ogetdirentries(td, uap)
3086	struct thread *td;
3087	register struct ogetdirentries_args /* {
3088		int fd;
3089		char *buf;
3090		u_int count;
3091		long *basep;
3092	} */ *uap;
3093{
3094	struct vnode *vp;
3095	struct file *fp;
3096	struct uio auio, kuio;
3097	struct iovec aiov, kiov;
3098	struct dirent *dp, *edp;
3099	caddr_t dirbuf;
3100	int error, eofflag, readcnt;
3101	long loff;
3102
3103	/* XXX arbitrary sanity limit on `count'. */
3104	if (uap->count > 64 * 1024)
3105		return (EINVAL);
3106	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3107		return (error);
3108	if ((fp->f_flag & FREAD) == 0) {
3109		fdrop(fp, td);
3110		return (EBADF);
3111	}
3112	vp = fp->f_vnode;
3113unionread:
3114	if (vp->v_type != VDIR) {
3115		fdrop(fp, td);
3116		return (EINVAL);
3117	}
3118	aiov.iov_base = uap->buf;
3119	aiov.iov_len = uap->count;
3120	auio.uio_iov = &aiov;
3121	auio.uio_iovcnt = 1;
3122	auio.uio_rw = UIO_READ;
3123	auio.uio_segflg = UIO_USERSPACE;
3124	auio.uio_td = td;
3125	auio.uio_resid = uap->count;
3126	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3127	loff = auio.uio_offset = fp->f_offset;
3128#ifdef MAC
3129	error = mac_check_vnode_readdir(td->td_ucred, vp);
3130	if (error) {
3131		VOP_UNLOCK(vp, 0, td);
3132		fdrop(fp, td);
3133		return (error);
3134	}
3135#endif
3136#	if (BYTE_ORDER != LITTLE_ENDIAN)
3137		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3138			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3139			    NULL, NULL);
3140			fp->f_offset = auio.uio_offset;
3141		} else
3142#	endif
3143	{
3144		kuio = auio;
3145		kuio.uio_iov = &kiov;
3146		kuio.uio_segflg = UIO_SYSSPACE;
3147		kiov.iov_len = uap->count;
3148		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3149		kiov.iov_base = dirbuf;
3150		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3151			    NULL, NULL);
3152		fp->f_offset = kuio.uio_offset;
3153		if (error == 0) {
3154			readcnt = uap->count - kuio.uio_resid;
3155			edp = (struct dirent *)&dirbuf[readcnt];
3156			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3157#				if (BYTE_ORDER == LITTLE_ENDIAN)
3158					/*
3159					 * The expected low byte of
3160					 * dp->d_namlen is our dp->d_type.
3161					 * The high MBZ byte of dp->d_namlen
3162					 * is our dp->d_namlen.
3163					 */
3164					dp->d_type = dp->d_namlen;
3165					dp->d_namlen = 0;
3166#				else
3167					/*
3168					 * The dp->d_type is the high byte
3169					 * of the expected dp->d_namlen,
3170					 * so must be zero'ed.
3171					 */
3172					dp->d_type = 0;
3173#				endif
3174				if (dp->d_reclen > 0) {
3175					dp = (struct dirent *)
3176					    ((char *)dp + dp->d_reclen);
3177				} else {
3178					error = EIO;
3179					break;
3180				}
3181			}
3182			if (dp >= edp)
3183				error = uiomove(dirbuf, readcnt, &auio);
3184		}
3185		FREE(dirbuf, M_TEMP);
3186	}
3187	VOP_UNLOCK(vp, 0, td);
3188	if (error) {
3189		fdrop(fp, td);
3190		return (error);
3191	}
3192	if (uap->count == auio.uio_resid) {
3193		if (union_dircheckp) {
3194			error = union_dircheckp(td, &vp, fp);
3195			if (error == -1)
3196				goto unionread;
3197			if (error) {
3198				fdrop(fp, td);
3199				return (error);
3200			}
3201		}
3202		/*
3203		 * XXX We could delay dropping the lock above but
3204		 * union_dircheckp complicates things.
3205		 */
3206		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3207		if ((vp->v_vflag & VV_ROOT) &&
3208		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3209			struct vnode *tvp = vp;
3210			vp = vp->v_mount->mnt_vnodecovered;
3211			VREF(vp);
3212			fp->f_vnode = vp;
3213			fp->f_data = vp;
3214			fp->f_offset = 0;
3215			vput(tvp);
3216			goto unionread;
3217		}
3218		VOP_UNLOCK(vp, 0, td);
3219	}
3220	error = copyout(&loff, uap->basep, sizeof(long));
3221	fdrop(fp, td);
3222	td->td_retval[0] = uap->count - auio.uio_resid;
3223	return (error);
3224}
3225#endif /* COMPAT_43 */
3226
3227/*
3228 * Read a block of directory entries in a filesystem independent format.
3229 */
3230#ifndef _SYS_SYSPROTO_H_
3231struct getdirentries_args {
3232	int	fd;
3233	char	*buf;
3234	u_int	count;
3235	long	*basep;
3236};
3237#endif
3238int
3239getdirentries(td, uap)
3240	struct thread *td;
3241	register struct getdirentries_args /* {
3242		int fd;
3243		char *buf;
3244		u_int count;
3245		long *basep;
3246	} */ *uap;
3247{
3248	struct vnode *vp;
3249	struct file *fp;
3250	struct uio auio;
3251	struct iovec aiov;
3252	long loff;
3253	int error, eofflag;
3254
3255	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3256		return (error);
3257	if ((fp->f_flag & FREAD) == 0) {
3258		fdrop(fp, td);
3259		return (EBADF);
3260	}
3261	vp = fp->f_vnode;
3262unionread:
3263	if (vp->v_type != VDIR) {
3264		fdrop(fp, td);
3265		return (EINVAL);
3266	}
3267	aiov.iov_base = uap->buf;
3268	aiov.iov_len = uap->count;
3269	auio.uio_iov = &aiov;
3270	auio.uio_iovcnt = 1;
3271	auio.uio_rw = UIO_READ;
3272	auio.uio_segflg = UIO_USERSPACE;
3273	auio.uio_td = td;
3274	auio.uio_resid = uap->count;
3275	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3276	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3277	loff = auio.uio_offset = fp->f_offset;
3278#ifdef MAC
3279	error = mac_check_vnode_readdir(td->td_ucred, vp);
3280	if (error == 0)
3281#endif
3282		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3283		    NULL);
3284	fp->f_offset = auio.uio_offset;
3285	VOP_UNLOCK(vp, 0, td);
3286	if (error) {
3287		fdrop(fp, td);
3288		return (error);
3289	}
3290	if (uap->count == auio.uio_resid) {
3291		if (union_dircheckp) {
3292			error = union_dircheckp(td, &vp, fp);
3293			if (error == -1)
3294				goto unionread;
3295			if (error) {
3296				fdrop(fp, td);
3297				return (error);
3298			}
3299		}
3300		/*
3301		 * XXX We could delay dropping the lock above but
3302		 * union_dircheckp complicates things.
3303		 */
3304		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3305		if ((vp->v_vflag & VV_ROOT) &&
3306		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3307			struct vnode *tvp = vp;
3308			vp = vp->v_mount->mnt_vnodecovered;
3309			VREF(vp);
3310			fp->f_vnode = vp;
3311			fp->f_data = vp;
3312			fp->f_offset = 0;
3313			vput(tvp);
3314			goto unionread;
3315		}
3316		VOP_UNLOCK(vp, 0, td);
3317	}
3318	if (uap->basep != NULL) {
3319		error = copyout(&loff, uap->basep, sizeof(long));
3320	}
3321	td->td_retval[0] = uap->count - auio.uio_resid;
3322	fdrop(fp, td);
3323	return (error);
3324}
3325#ifndef _SYS_SYSPROTO_H_
3326struct getdents_args {
3327	int fd;
3328	char *buf;
3329	size_t count;
3330};
3331#endif
3332int
3333getdents(td, uap)
3334	struct thread *td;
3335	register struct getdents_args /* {
3336		int fd;
3337		char *buf;
3338		u_int count;
3339	} */ *uap;
3340{
3341	struct getdirentries_args ap;
3342	ap.fd = uap->fd;
3343	ap.buf = uap->buf;
3344	ap.count = uap->count;
3345	ap.basep = NULL;
3346	return getdirentries(td, &ap);
3347}
3348
3349/*
3350 * Set the mode mask for creation of filesystem nodes.
3351 *
3352 * MP SAFE
3353 */
3354#ifndef _SYS_SYSPROTO_H_
3355struct umask_args {
3356	int	newmask;
3357};
3358#endif
3359int
3360umask(td, uap)
3361	struct thread *td;
3362	struct umask_args /* {
3363		int newmask;
3364	} */ *uap;
3365{
3366	register struct filedesc *fdp;
3367
3368	FILEDESC_LOCK(td->td_proc->p_fd);
3369	fdp = td->td_proc->p_fd;
3370	td->td_retval[0] = fdp->fd_cmask;
3371	fdp->fd_cmask = uap->newmask & ALLPERMS;
3372	FILEDESC_UNLOCK(td->td_proc->p_fd);
3373	return (0);
3374}
3375
3376/*
3377 * Void all references to file by ripping underlying filesystem
3378 * away from vnode.
3379 */
3380#ifndef _SYS_SYSPROTO_H_
3381struct revoke_args {
3382	char	*path;
3383};
3384#endif
3385/* ARGSUSED */
3386int
3387revoke(td, uap)
3388	struct thread *td;
3389	register struct revoke_args /* {
3390		char *path;
3391	} */ *uap;
3392{
3393	struct mount *mp;
3394	struct vnode *vp;
3395	struct vattr vattr;
3396	int error;
3397	struct nameidata nd;
3398
3399	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3400	if ((error = namei(&nd)) != 0)
3401		return (error);
3402	vp = nd.ni_vp;
3403	NDFREE(&nd, NDF_ONLY_PNBUF);
3404	if (vp->v_type != VCHR) {
3405		vput(vp);
3406		return (EINVAL);
3407	}
3408#ifdef MAC
3409	error = mac_check_vnode_revoke(td->td_ucred, vp);
3410	if (error) {
3411		vput(vp);
3412		return (error);
3413	}
3414#endif
3415	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3416	if (error) {
3417		vput(vp);
3418		return (error);
3419	}
3420	VOP_UNLOCK(vp, 0, td);
3421	if (td->td_ucred->cr_uid != vattr.va_uid) {
3422		error = suser_cred(td->td_ucred, PRISON_ROOT);
3423		if (error)
3424			goto out;
3425	}
3426	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3427		goto out;
3428	if (vcount(vp) > 1)
3429		VOP_REVOKE(vp, REVOKEALL);
3430	vn_finished_write(mp);
3431out:
3432	vrele(vp);
3433	return (error);
3434}
3435
3436/*
3437 * Convert a user file descriptor to a kernel file entry.
3438 * The file entry is locked upon returning.
3439 */
3440int
3441getvnode(fdp, fd, fpp)
3442	struct filedesc *fdp;
3443	int fd;
3444	struct file **fpp;
3445{
3446	int error;
3447	struct file *fp;
3448
3449	fp = NULL;
3450	if (fdp == NULL)
3451		error = EBADF;
3452	else {
3453		FILEDESC_LOCK(fdp);
3454		if ((u_int)fd >= fdp->fd_nfiles ||
3455		    (fp = fdp->fd_ofiles[fd]) == NULL)
3456			error = EBADF;
3457		else if (fp->f_vnode == NULL) {
3458			fp = NULL;
3459			error = EINVAL;
3460		} else {
3461			fhold(fp);
3462			error = 0;
3463		}
3464		FILEDESC_UNLOCK(fdp);
3465	}
3466	*fpp = fp;
3467	return (error);
3468}
3469
3470/*
3471 * Get (NFS) file handle
3472 */
3473#ifndef _SYS_SYSPROTO_H_
3474struct getfh_args {
3475	char	*fname;
3476	fhandle_t *fhp;
3477};
3478#endif
3479int
3480getfh(td, uap)
3481	struct thread *td;
3482	register struct getfh_args *uap;
3483{
3484	struct nameidata nd;
3485	fhandle_t fh;
3486	register struct vnode *vp;
3487	int error;
3488
3489	/*
3490	 * Must be super user
3491	 */
3492	error = suser(td);
3493	if (error)
3494		return (error);
3495	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3496	error = namei(&nd);
3497	if (error)
3498		return (error);
3499	NDFREE(&nd, NDF_ONLY_PNBUF);
3500	vp = nd.ni_vp;
3501	bzero(&fh, sizeof(fh));
3502	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3503	error = VFS_VPTOFH(vp, &fh.fh_fid);
3504	vput(vp);
3505	if (error)
3506		return (error);
3507	error = copyout(&fh, uap->fhp, sizeof (fh));
3508	return (error);
3509}
3510
3511/*
3512 * syscall for the rpc.lockd to use to translate a NFS file handle into
3513 * an open descriptor.
3514 *
3515 * warning: do not remove the suser() call or this becomes one giant
3516 * security hole.
3517 */
3518#ifndef _SYS_SYSPROTO_H_
3519struct fhopen_args {
3520	const struct fhandle *u_fhp;
3521	int flags;
3522};
3523#endif
3524int
3525fhopen(td, uap)
3526	struct thread *td;
3527	struct fhopen_args /* {
3528		const struct fhandle *u_fhp;
3529		int flags;
3530	} */ *uap;
3531{
3532	struct proc *p = td->td_proc;
3533	struct mount *mp;
3534	struct vnode *vp;
3535	struct fhandle fhp;
3536	struct vattr vat;
3537	struct vattr *vap = &vat;
3538	struct flock lf;
3539	struct file *fp;
3540	register struct filedesc *fdp = p->p_fd;
3541	int fmode, mode, error, type;
3542	struct file *nfp;
3543	int indx;
3544
3545	/*
3546	 * Must be super user
3547	 */
3548	error = suser(td);
3549	if (error)
3550		return (error);
3551
3552	fmode = FFLAGS(uap->flags);
3553	/* why not allow a non-read/write open for our lockd? */
3554	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3555		return (EINVAL);
3556	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3557	if (error)
3558		return(error);
3559	/* find the mount point */
3560	mp = vfs_getvfs(&fhp.fh_fsid);
3561	if (mp == NULL)
3562		return (ESTALE);
3563	/* now give me my vnode, it gets returned to me locked */
3564	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3565	if (error)
3566		return (error);
3567 	/*
3568	 * from now on we have to make sure not
3569	 * to forget about the vnode
3570	 * any error that causes an abort must vput(vp)
3571	 * just set error = err and 'goto bad;'.
3572	 */
3573
3574	/*
3575	 * from vn_open
3576	 */
3577	if (vp->v_type == VLNK) {
3578		error = EMLINK;
3579		goto bad;
3580	}
3581	if (vp->v_type == VSOCK) {
3582		error = EOPNOTSUPP;
3583		goto bad;
3584	}
3585	mode = 0;
3586	if (fmode & (FWRITE | O_TRUNC)) {
3587		if (vp->v_type == VDIR) {
3588			error = EISDIR;
3589			goto bad;
3590		}
3591		error = vn_writechk(vp);
3592		if (error)
3593			goto bad;
3594		mode |= VWRITE;
3595	}
3596	if (fmode & FREAD)
3597		mode |= VREAD;
3598	if (fmode & O_APPEND)
3599		mode |= VAPPEND;
3600#ifdef MAC
3601	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3602	if (error)
3603		goto bad;
3604#endif
3605	if (mode) {
3606		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3607		if (error)
3608			goto bad;
3609	}
3610	if (fmode & O_TRUNC) {
3611		VOP_UNLOCK(vp, 0, td);				/* XXX */
3612		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3613			vrele(vp);
3614			return (error);
3615		}
3616		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3617		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3618#ifdef MAC
3619		/*
3620		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3621		 * should be right.
3622		 */
3623		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3624		if (error == 0) {
3625#endif
3626			VATTR_NULL(vap);
3627			vap->va_size = 0;
3628			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3629#ifdef MAC
3630		}
3631#endif
3632		vn_finished_write(mp);
3633		if (error)
3634			goto bad;
3635	}
3636	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
3637	if (error)
3638		goto bad;
3639	/*
3640	 * Make sure that a VM object is created for VMIO support.
3641	 */
3642	if (vn_canvmio(vp) == TRUE) {
3643		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3644			goto bad;
3645	}
3646	if (fmode & FWRITE)
3647		vp->v_writecount++;
3648
3649	/*
3650	 * end of vn_open code
3651	 */
3652
3653	if ((error = falloc(td, &nfp, &indx)) != 0) {
3654		if (fmode & FWRITE)
3655			vp->v_writecount--;
3656		goto bad;
3657	}
3658	fp = nfp;
3659
3660	/*
3661	 * Hold an extra reference to avoid having fp ripped out
3662	 * from under us while we block in the lock op
3663	 */
3664	fhold(fp);
3665	nfp->f_vnode = vp;
3666	nfp->f_data = vp;
3667	nfp->f_flag = fmode & FMASK;
3668	nfp->f_ops = &vnops;
3669	nfp->f_type = DTYPE_VNODE;
3670	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3671		lf.l_whence = SEEK_SET;
3672		lf.l_start = 0;
3673		lf.l_len = 0;
3674		if (fmode & O_EXLOCK)
3675			lf.l_type = F_WRLCK;
3676		else
3677			lf.l_type = F_RDLCK;
3678		type = F_FLOCK;
3679		if ((fmode & FNONBLOCK) == 0)
3680			type |= F_WAIT;
3681		VOP_UNLOCK(vp, 0, td);
3682		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3683			    type)) != 0) {
3684			/*
3685			 * The lock request failed.  Normally close the
3686			 * descriptor but handle the case where someone might
3687			 * have dup()d or close()d it when we weren't looking.
3688			 */
3689			FILEDESC_LOCK(fdp);
3690			if (fdp->fd_ofiles[indx] == fp) {
3691				fdp->fd_ofiles[indx] = NULL;
3692				FILEDESC_UNLOCK(fdp);
3693				fdrop(fp, td);
3694			} else
3695				FILEDESC_UNLOCK(fdp);
3696			/*
3697			 * release our private reference
3698			 */
3699			fdrop(fp, td);
3700			return(error);
3701		}
3702		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3703		fp->f_flag |= FHASLOCK;
3704	}
3705	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3706		vfs_object_create(vp, td, td->td_ucred);
3707
3708	VOP_UNLOCK(vp, 0, td);
3709	fdrop(fp, td);
3710	td->td_retval[0] = indx;
3711	return (0);
3712
3713bad:
3714	vput(vp);
3715	return (error);
3716}
3717
3718/*
3719 * Stat an (NFS) file handle.
3720 */
3721#ifndef _SYS_SYSPROTO_H_
3722struct fhstat_args {
3723	struct fhandle *u_fhp;
3724	struct stat *sb;
3725};
3726#endif
3727int
3728fhstat(td, uap)
3729	struct thread *td;
3730	register struct fhstat_args /* {
3731		struct fhandle *u_fhp;
3732		struct stat *sb;
3733	} */ *uap;
3734{
3735	struct stat sb;
3736	fhandle_t fh;
3737	struct mount *mp;
3738	struct vnode *vp;
3739	int error;
3740
3741	/*
3742	 * Must be super user
3743	 */
3744	error = suser(td);
3745	if (error)
3746		return (error);
3747
3748	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3749	if (error)
3750		return (error);
3751
3752	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3753		return (ESTALE);
3754	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3755		return (error);
3756	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3757	vput(vp);
3758	if (error)
3759		return (error);
3760	error = copyout(&sb, uap->sb, sizeof(sb));
3761	return (error);
3762}
3763
3764/*
3765 * Implement fstatfs() for (NFS) file handles.
3766 */
3767#ifndef _SYS_SYSPROTO_H_
3768struct fhstatfs_args {
3769	struct fhandle *u_fhp;
3770	struct statfs *buf;
3771};
3772#endif
3773int
3774fhstatfs(td, uap)
3775	struct thread *td;
3776	struct fhstatfs_args /* {
3777		struct fhandle *u_fhp;
3778		struct statfs *buf;
3779	} */ *uap;
3780{
3781	struct statfs *sp;
3782	struct mount *mp;
3783	struct vnode *vp;
3784	struct statfs sb;
3785	fhandle_t fh;
3786	int error;
3787
3788	/*
3789	 * Must be super user
3790	 */
3791	error = suser(td);
3792	if (error)
3793		return (error);
3794
3795	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3796		return (error);
3797
3798	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3799		return (ESTALE);
3800	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3801		return (error);
3802	mp = vp->v_mount;
3803	sp = &mp->mnt_stat;
3804	vput(vp);
3805#ifdef MAC
3806	error = mac_check_mount_stat(td->td_ucred, mp);
3807	if (error)
3808		return (error);
3809#endif
3810	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3811		return (error);
3812	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3813	if (suser(td)) {
3814		bcopy(sp, &sb, sizeof(sb));
3815		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3816		sp = &sb;
3817	}
3818	return (copyout(sp, uap->buf, sizeof(*sp)));
3819}
3820
3821/*
3822 * Syscall to push extended attribute configuration information into the
3823 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3824 * a command (int cmd), and attribute name and misc data.  For now, the
3825 * attribute name is left in userspace for consumption by the VFS_op.
3826 * It will probably be changed to be copied into sysspace by the
3827 * syscall in the future, once issues with various consumers of the
3828 * attribute code have raised their hands.
3829 *
3830 * Currently this is used only by UFS Extended Attributes.
3831 */
3832int
3833extattrctl(td, uap)
3834	struct thread *td;
3835	struct extattrctl_args /* {
3836		const char *path;
3837		int cmd;
3838		const char *filename;
3839		int attrnamespace;
3840		const char *attrname;
3841	} */ *uap;
3842{
3843	struct vnode *filename_vp;
3844	struct nameidata nd;
3845	struct mount *mp, *mp_writable;
3846	char attrname[EXTATTR_MAXNAMELEN];
3847	int error;
3848
3849	/*
3850	 * uap->attrname is not always defined.  We check again later when we
3851	 * invoke the VFS call so as to pass in NULL there if needed.
3852	 */
3853	if (uap->attrname != NULL) {
3854		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3855		    NULL);
3856		if (error)
3857			return (error);
3858	}
3859
3860	/*
3861	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3862	 * which VFS_EXTATTRCTL() will later release.
3863	 */
3864	filename_vp = NULL;
3865	if (uap->filename != NULL) {
3866		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3867		    uap->filename, td);
3868		error = namei(&nd);
3869		if (error)
3870			return (error);
3871		filename_vp = nd.ni_vp;
3872		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3873	}
3874
3875	/* uap->path is always defined. */
3876	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3877	error = namei(&nd);
3878	if (error) {
3879		if (filename_vp != NULL)
3880			vput(filename_vp);
3881		return (error);
3882	}
3883	mp = nd.ni_vp->v_mount;
3884	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3885	NDFREE(&nd, 0);
3886	if (error) {
3887		if (filename_vp != NULL)
3888			vput(filename_vp);
3889		return (error);
3890	}
3891
3892	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3893	    uap->attrname != NULL ? attrname : NULL, td);
3894
3895	vn_finished_write(mp_writable);
3896	/*
3897	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3898	 * filename_vp, so vrele it if it is defined.
3899	 */
3900	if (filename_vp != NULL)
3901		vrele(filename_vp);
3902	return (error);
3903}
3904
3905/*-
3906 * Set a named extended attribute on a file or directory
3907 *
3908 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3909 *            kernelspace string pointer "attrname", userspace buffer
3910 *            pointer "data", buffer length "nbytes", thread "td".
3911 * Returns: 0 on success, an error number otherwise
3912 * Locks: none
3913 * References: vp must be a valid reference for the duration of the call
3914 */
3915static int
3916extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3917    void *data, size_t nbytes, struct thread *td)
3918{
3919	struct mount *mp;
3920	struct uio auio;
3921	struct iovec aiov;
3922	ssize_t cnt;
3923	int error;
3924
3925	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3926	if (error)
3927		return (error);
3928	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3929	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3930
3931	aiov.iov_base = data;
3932	aiov.iov_len = nbytes;
3933	auio.uio_iov = &aiov;
3934	auio.uio_iovcnt = 1;
3935	auio.uio_offset = 0;
3936	if (nbytes > INT_MAX) {
3937		error = EINVAL;
3938		goto done;
3939	}
3940	auio.uio_resid = nbytes;
3941	auio.uio_rw = UIO_WRITE;
3942	auio.uio_segflg = UIO_USERSPACE;
3943	auio.uio_td = td;
3944	cnt = nbytes;
3945
3946#ifdef MAC
3947	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3948	    attrname, &auio);
3949	if (error)
3950		goto done;
3951#endif
3952
3953	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3954	    td->td_ucred, td);
3955	cnt -= auio.uio_resid;
3956	td->td_retval[0] = cnt;
3957
3958done:
3959	VOP_UNLOCK(vp, 0, td);
3960	vn_finished_write(mp);
3961	return (error);
3962}
3963
3964int
3965extattr_set_fd(td, uap)
3966	struct thread *td;
3967	struct extattr_set_fd_args /* {
3968		int fd;
3969		int attrnamespace;
3970		const char *attrname;
3971		void *data;
3972		size_t nbytes;
3973	} */ *uap;
3974{
3975	struct file *fp;
3976	char attrname[EXTATTR_MAXNAMELEN];
3977	int error;
3978
3979	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3980	if (error)
3981		return (error);
3982
3983	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
3984	if (error)
3985		return (error);
3986
3987	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
3988	    attrname, uap->data, uap->nbytes, td);
3989	fdrop(fp, td);
3990
3991	return (error);
3992}
3993
3994int
3995extattr_set_file(td, uap)
3996	struct thread *td;
3997	struct extattr_set_file_args /* {
3998		const char *path;
3999		int attrnamespace;
4000		const char *attrname;
4001		void *data;
4002		size_t nbytes;
4003	} */ *uap;
4004{
4005	struct nameidata nd;
4006	char attrname[EXTATTR_MAXNAMELEN];
4007	int error;
4008
4009	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4010	if (error)
4011		return (error);
4012
4013	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4014	error = namei(&nd);
4015	if (error)
4016		return (error);
4017	NDFREE(&nd, NDF_ONLY_PNBUF);
4018
4019	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4020	    uap->data, uap->nbytes, td);
4021
4022	vrele(nd.ni_vp);
4023	return (error);
4024}
4025
4026int
4027extattr_set_link(td, uap)
4028	struct thread *td;
4029	struct extattr_set_link_args /* {
4030		const char *path;
4031		int attrnamespace;
4032		const char *attrname;
4033		void *data;
4034		size_t nbytes;
4035	} */ *uap;
4036{
4037	struct nameidata nd;
4038	char attrname[EXTATTR_MAXNAMELEN];
4039	int error;
4040
4041	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4042	if (error)
4043		return (error);
4044
4045	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4046	error = namei(&nd);
4047	if (error)
4048		return (error);
4049	NDFREE(&nd, NDF_ONLY_PNBUF);
4050
4051	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4052	    uap->data, uap->nbytes, td);
4053
4054	vrele(nd.ni_vp);
4055	return (error);
4056}
4057
4058/*-
4059 * Get a named extended attribute on a file or directory
4060 *
4061 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4062 *            kernelspace string pointer "attrname", userspace buffer
4063 *            pointer "data", buffer length "nbytes", thread "td".
4064 * Returns: 0 on success, an error number otherwise
4065 * Locks: none
4066 * References: vp must be a valid reference for the duration of the call
4067 */
4068static int
4069extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4070    void *data, size_t nbytes, struct thread *td)
4071{
4072	struct uio auio, *auiop;
4073	struct iovec aiov;
4074	ssize_t cnt;
4075	size_t size, *sizep;
4076	int error;
4077
4078	/*
4079	 * XXX: Temporary API compatibility for applications that know
4080	 * about this hack ("" means list), but haven't been updated
4081	 * for the extattr_list_*() system calls yet.  This will go
4082	 * away for FreeBSD 5.3.
4083	 */
4084	if (strlen(attrname) == 0)
4085		return (extattr_list_vp(vp, attrnamespace, data, nbytes, td));
4086
4087	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4088	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4089
4090	/*
4091	 * Slightly unusual semantics: if the user provides a NULL data
4092	 * pointer, they don't want to receive the data, just the
4093	 * maximum read length.
4094	 */
4095	auiop = NULL;
4096	sizep = NULL;
4097	cnt = 0;
4098	if (data != NULL) {
4099		aiov.iov_base = data;
4100		aiov.iov_len = nbytes;
4101		auio.uio_iov = &aiov;
4102		auio.uio_offset = 0;
4103		if (nbytes > INT_MAX) {
4104			error = EINVAL;
4105			goto done;
4106		}
4107		auio.uio_resid = nbytes;
4108		auio.uio_rw = UIO_READ;
4109		auio.uio_segflg = UIO_USERSPACE;
4110		auio.uio_td = td;
4111		auiop = &auio;
4112		cnt = nbytes;
4113	} else
4114		sizep = &size;
4115
4116#ifdef MAC
4117	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4118	    attrname, &auio);
4119	if (error)
4120		goto done;
4121#endif
4122
4123	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4124	    td->td_ucred, td);
4125
4126	if (auiop != NULL) {
4127		cnt -= auio.uio_resid;
4128		td->td_retval[0] = cnt;
4129	} else
4130		td->td_retval[0] = size;
4131
4132done:
4133	VOP_UNLOCK(vp, 0, td);
4134	return (error);
4135}
4136
4137int
4138extattr_get_fd(td, uap)
4139	struct thread *td;
4140	struct extattr_get_fd_args /* {
4141		int fd;
4142		int attrnamespace;
4143		const char *attrname;
4144		void *data;
4145		size_t nbytes;
4146	} */ *uap;
4147{
4148	struct file *fp;
4149	char attrname[EXTATTR_MAXNAMELEN];
4150	int error;
4151
4152	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4153	if (error)
4154		return (error);
4155
4156	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4157	if (error)
4158		return (error);
4159
4160	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4161	    attrname, uap->data, uap->nbytes, td);
4162
4163	fdrop(fp, td);
4164	return (error);
4165}
4166
4167int
4168extattr_get_file(td, uap)
4169	struct thread *td;
4170	struct extattr_get_file_args /* {
4171		const char *path;
4172		int attrnamespace;
4173		const char *attrname;
4174		void *data;
4175		size_t nbytes;
4176	} */ *uap;
4177{
4178	struct nameidata nd;
4179	char attrname[EXTATTR_MAXNAMELEN];
4180	int error;
4181
4182	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4183	if (error)
4184		return (error);
4185
4186	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4187	error = namei(&nd);
4188	if (error)
4189		return (error);
4190	NDFREE(&nd, NDF_ONLY_PNBUF);
4191
4192	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4193	    uap->data, uap->nbytes, td);
4194
4195	vrele(nd.ni_vp);
4196	return (error);
4197}
4198
4199int
4200extattr_get_link(td, uap)
4201	struct thread *td;
4202	struct extattr_get_link_args /* {
4203		const char *path;
4204		int attrnamespace;
4205		const char *attrname;
4206		void *data;
4207		size_t nbytes;
4208	} */ *uap;
4209{
4210	struct nameidata nd;
4211	char attrname[EXTATTR_MAXNAMELEN];
4212	int error;
4213
4214	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4215	if (error)
4216		return (error);
4217
4218	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4219	error = namei(&nd);
4220	if (error)
4221		return (error);
4222	NDFREE(&nd, NDF_ONLY_PNBUF);
4223
4224	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4225	    uap->data, uap->nbytes, td);
4226
4227	vrele(nd.ni_vp);
4228	return (error);
4229}
4230
4231/*
4232 * extattr_delete_vp(): Delete a named extended attribute on a file or
4233 *                      directory
4234 *
4235 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4236 *            kernelspace string pointer "attrname", proc "p"
4237 * Returns: 0 on success, an error number otherwise
4238 * Locks: none
4239 * References: vp must be a valid reference for the duration of the call
4240 */
4241static int
4242extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4243    struct thread *td)
4244{
4245	struct mount *mp;
4246	int error;
4247
4248	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4249	if (error)
4250		return (error);
4251	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4252	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4253
4254#ifdef MAC
4255	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4256	    attrname, NULL);
4257	if (error)
4258		goto done;
4259#endif
4260
4261	error = VOP_RMEXTATTR(vp, attrnamespace, attrname, td->td_ucred, td);
4262	if (error == EOPNOTSUPP)
4263		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4264		    td->td_ucred, td);
4265#ifdef MAC
4266done:
4267#endif
4268	VOP_UNLOCK(vp, 0, td);
4269	vn_finished_write(mp);
4270	return (error);
4271}
4272
4273int
4274extattr_delete_fd(td, uap)
4275	struct thread *td;
4276	struct extattr_delete_fd_args /* {
4277		int fd;
4278		int attrnamespace;
4279		const char *attrname;
4280	} */ *uap;
4281{
4282	struct file *fp;
4283	struct vnode *vp;
4284	char attrname[EXTATTR_MAXNAMELEN];
4285	int error;
4286
4287	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4288	if (error)
4289		return (error);
4290
4291	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4292	if (error)
4293		return (error);
4294	vp = fp->f_vnode;
4295
4296	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4297	fdrop(fp, td);
4298	return (error);
4299}
4300
4301int
4302extattr_delete_file(td, uap)
4303	struct thread *td;
4304	struct extattr_delete_file_args /* {
4305		const char *path;
4306		int attrnamespace;
4307		const char *attrname;
4308	} */ *uap;
4309{
4310	struct nameidata nd;
4311	char attrname[EXTATTR_MAXNAMELEN];
4312	int error;
4313
4314	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4315	if (error)
4316		return(error);
4317
4318	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4319	error = namei(&nd);
4320	if (error)
4321		return(error);
4322	NDFREE(&nd, NDF_ONLY_PNBUF);
4323
4324	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4325	vrele(nd.ni_vp);
4326	return(error);
4327}
4328
4329int
4330extattr_delete_link(td, uap)
4331	struct thread *td;
4332	struct extattr_delete_link_args /* {
4333		const char *path;
4334		int attrnamespace;
4335		const char *attrname;
4336	} */ *uap;
4337{
4338	struct nameidata nd;
4339	char attrname[EXTATTR_MAXNAMELEN];
4340	int error;
4341
4342	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4343	if (error)
4344		return(error);
4345
4346	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4347	error = namei(&nd);
4348	if (error)
4349		return(error);
4350	NDFREE(&nd, NDF_ONLY_PNBUF);
4351
4352	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4353	vrele(nd.ni_vp);
4354	return(error);
4355}
4356
4357/*-
4358 * Retrieve a list of extended attributes on a file or directory.
4359 *
4360 * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4361 *            userspace buffer pointer "data", buffer length "nbytes",
4362 *            thread "td".
4363 * Returns: 0 on success, an error number otherwise
4364 * Locks: none
4365 * References: vp must be a valid reference for the duration of the call
4366 */
4367static int
4368extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4369    size_t nbytes, struct thread *td)
4370{
4371	struct uio auio, *auiop;
4372	size_t size, *sizep;
4373	struct iovec aiov;
4374	ssize_t cnt;
4375	int error;
4376
4377	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4378	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4379
4380	auiop = NULL;
4381	sizep = NULL;
4382	cnt = 0;
4383	if (data != NULL) {
4384		aiov.iov_base = data;
4385		aiov.iov_len = nbytes;
4386		auio.uio_iov = &aiov;
4387		auio.uio_offset = 0;
4388		if (nbytes > INT_MAX) {
4389			error = EINVAL;
4390			goto done;
4391		}
4392		auio.uio_resid = nbytes;
4393		auio.uio_rw = UIO_READ;
4394		auio.uio_segflg = UIO_USERSPACE;
4395		auio.uio_td = td;
4396		auiop = &auio;
4397		cnt = nbytes;
4398	} else
4399		sizep = &size;
4400
4401#ifdef MAC
4402	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4403	    "", &auio);
4404	if (error)
4405		goto done;
4406#endif
4407
4408	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4409	    td->td_ucred, td);
4410
4411	if (auiop != NULL) {
4412		cnt -= auio.uio_resid;
4413		td->td_retval[0] = cnt;
4414	} else
4415		td->td_retval[0] = size;
4416
4417done:
4418	VOP_UNLOCK(vp, 0, td);
4419	return (error);
4420}
4421
4422
4423int
4424extattr_list_fd(td, uap)
4425	struct thread *td;
4426	struct extattr_list_fd_args /* {
4427		int fd;
4428		int attrnamespace;
4429		void *data;
4430		size_t nbytes;
4431	} */ *uap;
4432{
4433	struct file *fp;
4434	int error;
4435
4436	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4437	if (error)
4438		return (error);
4439
4440	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4441	    uap->nbytes, td);
4442
4443	fdrop(fp, td);
4444	return (error);
4445}
4446
4447int
4448extattr_list_file(td, uap)
4449	struct thread*td;
4450	struct extattr_list_file_args /* {
4451		const char *path;
4452		int attrnamespace;
4453		void *data;
4454		size_t nbytes;
4455	} */ *uap;
4456{
4457	struct nameidata nd;
4458	int error;
4459
4460	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4461	error = namei(&nd);
4462	if (error)
4463		return (error);
4464	NDFREE(&nd, NDF_ONLY_PNBUF);
4465
4466	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4467	    uap->nbytes, td);
4468
4469	vrele(nd.ni_vp);
4470	return (error);
4471}
4472
4473int
4474extattr_list_link(td, uap)
4475	struct thread*td;
4476	struct extattr_list_link_args /* {
4477		const char *path;
4478		int attrnamespace;
4479		void *data;
4480		size_t nbytes;
4481	} */ *uap;
4482{
4483	struct nameidata nd;
4484	int error;
4485
4486	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4487	error = namei(&nd);
4488	if (error)
4489		return (error);
4490	NDFREE(&nd, NDF_ONLY_PNBUF);
4491
4492	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4493	    uap->nbytes, td);
4494
4495	vrele(nd.ni_vp);
4496	return (error);
4497}
4498
4499