vfs_extattr.c revision 119198
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD: head/sys/kern/vfs_extattr.c 119198 2003-08-21 13:53:01Z rwatson $");
43
44/* For 4.3 integer FS ID compatibility */
45#include "opt_compat.h"
46#include "opt_mac.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/bio.h>
51#include <sys/buf.h>
52#include <sys/sysent.h>
53#include <sys/mac.h>
54#include <sys/malloc.h>
55#include <sys/mount.h>
56#include <sys/mutex.h>
57#include <sys/sysproto.h>
58#include <sys/namei.h>
59#include <sys/filedesc.h>
60#include <sys/kernel.h>
61#include <sys/fcntl.h>
62#include <sys/file.h>
63#include <sys/limits.h>
64#include <sys/linker.h>
65#include <sys/stat.h>
66#include <sys/sx.h>
67#include <sys/unistd.h>
68#include <sys/vnode.h>
69#include <sys/proc.h>
70#include <sys/dirent.h>
71#include <sys/extattr.h>
72#include <sys/jail.h>
73#include <sys/syscallsubr.h>
74#include <sys/sysctl.h>
75
76#include <machine/stdarg.h>
77
78#include <vm/vm.h>
79#include <vm/vm_object.h>
80#include <vm/vm_page.h>
81#include <vm/uma.h>
82
83static int chroot_refuse_vdir_fds(struct filedesc *fdp);
84static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
85static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
86static int setfmode(struct thread *td, struct vnode *, int);
87static int setfflags(struct thread *td, struct vnode *, int);
88static int setutimes(struct thread *td, struct vnode *,
89    const struct timespec *, int, int);
90static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
91    struct thread *td);
92
93static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
94    size_t nbytes, struct thread *td);
95
96int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
97int (*softdep_fsync_hook)(struct vnode *);
98
99/*
100 * The module initialization routine for POSIX asynchronous I/O will
101 * set this to the version of AIO that it implements.  (Zero means
102 * that it is not implemented.)  This value is used here by pathconf()
103 * and in kern_descrip.c by fpathconf().
104 */
105int async_io_version;
106
107/*
108 * Sync each mounted filesystem.
109 */
110#ifndef _SYS_SYSPROTO_H_
111struct sync_args {
112        int     dummy;
113};
114#endif
115
116#ifdef DEBUG
117static int syncprt = 0;
118SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
119#endif
120
121/* ARGSUSED */
122int
123sync(td, uap)
124	struct thread *td;
125	struct sync_args *uap;
126{
127	struct mount *mp, *nmp;
128	int asyncflag;
129
130	mtx_lock(&mountlist_mtx);
131	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
132		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
133			nmp = TAILQ_NEXT(mp, mnt_list);
134			continue;
135		}
136		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
137		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
138			asyncflag = mp->mnt_flag & MNT_ASYNC;
139			mp->mnt_flag &= ~MNT_ASYNC;
140			vfs_msync(mp, MNT_NOWAIT);
141			VFS_SYNC(mp, MNT_NOWAIT,
142			    ((td != NULL) ? td->td_ucred : NOCRED), td);
143			mp->mnt_flag |= asyncflag;
144			vn_finished_write(mp);
145		}
146		mtx_lock(&mountlist_mtx);
147		nmp = TAILQ_NEXT(mp, mnt_list);
148		vfs_unbusy(mp, td);
149	}
150	mtx_unlock(&mountlist_mtx);
151#if 0
152/*
153 * XXX don't call vfs_bufstats() yet because that routine
154 * was not imported in the Lite2 merge.
155 */
156#ifdef DIAGNOSTIC
157	if (syncprt)
158		vfs_bufstats();
159#endif /* DIAGNOSTIC */
160#endif
161	return (0);
162}
163
164/* XXX PRISON: could be per prison flag */
165static int prison_quotas;
166#if 0
167SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
168#endif
169
170/*
171 * Change filesystem quotas.
172 */
173#ifndef _SYS_SYSPROTO_H_
174struct quotactl_args {
175	char *path;
176	int cmd;
177	int uid;
178	caddr_t arg;
179};
180#endif
181/* ARGSUSED */
182int
183quotactl(td, uap)
184	struct thread *td;
185	register struct quotactl_args /* {
186		char *path;
187		int cmd;
188		int uid;
189		caddr_t arg;
190	} */ *uap;
191{
192	struct mount *mp;
193	int error;
194	struct nameidata nd;
195
196	if (jailed(td->td_ucred) && !prison_quotas)
197		return (EPERM);
198	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
199	if ((error = namei(&nd)) != 0)
200		return (error);
201	NDFREE(&nd, NDF_ONLY_PNBUF);
202	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
203	vrele(nd.ni_vp);
204	if (error)
205		return (error);
206	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
207	vn_finished_write(mp);
208	return (error);
209}
210
211/*
212 * Get filesystem statistics.
213 */
214#ifndef _SYS_SYSPROTO_H_
215struct statfs_args {
216	char *path;
217	struct statfs *buf;
218};
219#endif
220/* ARGSUSED */
221int
222statfs(td, uap)
223	struct thread *td;
224	register struct statfs_args /* {
225		char *path;
226		struct statfs *buf;
227	} */ *uap;
228{
229	register struct mount *mp;
230	register struct statfs *sp;
231	int error;
232	struct nameidata nd;
233	struct statfs sb;
234
235	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
236	if ((error = namei(&nd)) != 0)
237		return (error);
238	mp = nd.ni_vp->v_mount;
239	sp = &mp->mnt_stat;
240	NDFREE(&nd, NDF_ONLY_PNBUF);
241	vrele(nd.ni_vp);
242#ifdef MAC
243	error = mac_check_mount_stat(td->td_ucred, mp);
244	if (error)
245		return (error);
246#endif
247	error = VFS_STATFS(mp, sp, td);
248	if (error)
249		return (error);
250	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
251	if (suser(td)) {
252		bcopy(sp, &sb, sizeof(sb));
253		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
254		sp = &sb;
255	}
256	return (copyout(sp, uap->buf, sizeof(*sp)));
257}
258
259/*
260 * Get filesystem statistics.
261 */
262#ifndef _SYS_SYSPROTO_H_
263struct fstatfs_args {
264	int fd;
265	struct statfs *buf;
266};
267#endif
268/* ARGSUSED */
269int
270fstatfs(td, uap)
271	struct thread *td;
272	register struct fstatfs_args /* {
273		int fd;
274		struct statfs *buf;
275	} */ *uap;
276{
277	struct file *fp;
278	struct mount *mp;
279	register struct statfs *sp;
280	int error;
281	struct statfs sb;
282
283	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
284		return (error);
285	mp = fp->f_vnode->v_mount;
286	fdrop(fp, td);
287	if (mp == NULL)
288		return (EBADF);
289#ifdef MAC
290	error = mac_check_mount_stat(td->td_ucred, mp);
291	if (error)
292		return (error);
293#endif
294	sp = &mp->mnt_stat;
295	error = VFS_STATFS(mp, sp, td);
296	if (error)
297		return (error);
298	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
299	if (suser(td)) {
300		bcopy(sp, &sb, sizeof(sb));
301		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
302		sp = &sb;
303	}
304	return (copyout(sp, uap->buf, sizeof(*sp)));
305}
306
307/*
308 * Get statistics on all filesystems.
309 */
310#ifndef _SYS_SYSPROTO_H_
311struct getfsstat_args {
312	struct statfs *buf;
313	long bufsize;
314	int flags;
315};
316#endif
317int
318getfsstat(td, uap)
319	struct thread *td;
320	register struct getfsstat_args /* {
321		struct statfs *buf;
322		long bufsize;
323		int flags;
324	} */ *uap;
325{
326	register struct mount *mp, *nmp;
327	register struct statfs *sp;
328	caddr_t sfsp;
329	long count, maxcount, error;
330
331	maxcount = uap->bufsize / sizeof(struct statfs);
332	sfsp = (caddr_t)uap->buf;
333	count = 0;
334	mtx_lock(&mountlist_mtx);
335	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
336#ifdef MAC
337		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
338			nmp = TAILQ_NEXT(mp, mnt_list);
339			continue;
340		}
341#endif
342		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
343			nmp = TAILQ_NEXT(mp, mnt_list);
344			continue;
345		}
346		if (sfsp && count < maxcount) {
347			sp = &mp->mnt_stat;
348			/*
349			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
350			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
351			 * overrides MNT_WAIT.
352			 */
353			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
354			    (uap->flags & MNT_WAIT)) &&
355			    (error = VFS_STATFS(mp, sp, td))) {
356				mtx_lock(&mountlist_mtx);
357				nmp = TAILQ_NEXT(mp, mnt_list);
358				vfs_unbusy(mp, td);
359				continue;
360			}
361			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
362			error = copyout(sp, sfsp, sizeof(*sp));
363			if (error) {
364				vfs_unbusy(mp, td);
365				return (error);
366			}
367			sfsp += sizeof(*sp);
368		}
369		count++;
370		mtx_lock(&mountlist_mtx);
371		nmp = TAILQ_NEXT(mp, mnt_list);
372		vfs_unbusy(mp, td);
373	}
374	mtx_unlock(&mountlist_mtx);
375	if (sfsp && count > maxcount)
376		td->td_retval[0] = maxcount;
377	else
378		td->td_retval[0] = count;
379	return (0);
380}
381
382/*
383 * Change current working directory to a given file descriptor.
384 */
385#ifndef _SYS_SYSPROTO_H_
386struct fchdir_args {
387	int	fd;
388};
389#endif
390/* ARGSUSED */
391int
392fchdir(td, uap)
393	struct thread *td;
394	struct fchdir_args /* {
395		int fd;
396	} */ *uap;
397{
398	register struct filedesc *fdp = td->td_proc->p_fd;
399	struct vnode *vp, *tdp, *vpold;
400	struct mount *mp;
401	struct file *fp;
402	int error;
403
404	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
405		return (error);
406	vp = fp->f_vnode;
407	VREF(vp);
408	fdrop(fp, td);
409	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
410	if (vp->v_type != VDIR)
411		error = ENOTDIR;
412#ifdef MAC
413	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
414	}
415#endif
416	else
417		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
418	while (!error && (mp = vp->v_mountedhere) != NULL) {
419		if (vfs_busy(mp, 0, 0, td))
420			continue;
421		error = VFS_ROOT(mp, &tdp);
422		vfs_unbusy(mp, td);
423		if (error)
424			break;
425		vput(vp);
426		vp = tdp;
427	}
428	if (error) {
429		vput(vp);
430		return (error);
431	}
432	VOP_UNLOCK(vp, 0, td);
433	FILEDESC_LOCK(fdp);
434	vpold = fdp->fd_cdir;
435	fdp->fd_cdir = vp;
436	FILEDESC_UNLOCK(fdp);
437	vrele(vpold);
438	return (0);
439}
440
441/*
442 * Change current working directory (``.'').
443 */
444#ifndef _SYS_SYSPROTO_H_
445struct chdir_args {
446	char	*path;
447};
448#endif
449/* ARGSUSED */
450int
451chdir(td, uap)
452	struct thread *td;
453	struct chdir_args /* {
454		char *path;
455	} */ *uap;
456{
457
458	return (kern_chdir(td, uap->path, UIO_USERSPACE));
459}
460
461int
462kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
463{
464	register struct filedesc *fdp = td->td_proc->p_fd;
465	int error;
466	struct nameidata nd;
467	struct vnode *vp;
468
469	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
470	if ((error = namei(&nd)) != 0)
471		return (error);
472	if ((error = change_dir(nd.ni_vp, td)) != 0) {
473		vput(nd.ni_vp);
474		NDFREE(&nd, NDF_ONLY_PNBUF);
475		return (error);
476	}
477	VOP_UNLOCK(nd.ni_vp, 0, td);
478	NDFREE(&nd, NDF_ONLY_PNBUF);
479	FILEDESC_LOCK(fdp);
480	vp = fdp->fd_cdir;
481	fdp->fd_cdir = nd.ni_vp;
482	FILEDESC_UNLOCK(fdp);
483	vrele(vp);
484	return (0);
485}
486
487/*
488 * Helper function for raised chroot(2) security function:  Refuse if
489 * any filedescriptors are open directories.
490 */
491static int
492chroot_refuse_vdir_fds(fdp)
493	struct filedesc *fdp;
494{
495	struct vnode *vp;
496	struct file *fp;
497	int fd;
498
499	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
500	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
501		fp = fget_locked(fdp, fd);
502		if (fp == NULL)
503			continue;
504		if (fp->f_type == DTYPE_VNODE) {
505			vp = fp->f_vnode;
506			if (vp->v_type == VDIR)
507				return (EPERM);
508		}
509	}
510	return (0);
511}
512
513/*
514 * This sysctl determines if we will allow a process to chroot(2) if it
515 * has a directory open:
516 *	0: disallowed for all processes.
517 *	1: allowed for processes that were not already chroot(2)'ed.
518 *	2: allowed for all processes.
519 */
520
521static int chroot_allow_open_directories = 1;
522
523SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
524     &chroot_allow_open_directories, 0, "");
525
526/*
527 * Change notion of root (``/'') directory.
528 */
529#ifndef _SYS_SYSPROTO_H_
530struct chroot_args {
531	char	*path;
532};
533#endif
534/* ARGSUSED */
535int
536chroot(td, uap)
537	struct thread *td;
538	struct chroot_args /* {
539		char *path;
540	} */ *uap;
541{
542	int error;
543	struct nameidata nd;
544
545	error = suser_cred(td->td_ucred, PRISON_ROOT);
546	if (error)
547		return (error);
548	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
549	mtx_lock(&Giant);
550	error = namei(&nd);
551	if (error)
552		goto error;
553	if ((error = change_dir(nd.ni_vp, td)) != 0)
554		goto e_vunlock;
555#ifdef MAC
556	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
557		goto e_vunlock;
558#endif
559	VOP_UNLOCK(nd.ni_vp, 0, td);
560	error = change_root(nd.ni_vp, td);
561	vrele(nd.ni_vp);
562	NDFREE(&nd, NDF_ONLY_PNBUF);
563	mtx_unlock(&Giant);
564	return (error);
565e_vunlock:
566	vput(nd.ni_vp);
567error:
568	mtx_unlock(&Giant);
569	NDFREE(&nd, NDF_ONLY_PNBUF);
570	return (error);
571}
572
573/*
574 * Common routine for chroot and chdir.  Callers must provide a locked vnode
575 * instance.
576 */
577int
578change_dir(vp, td)
579	struct vnode *vp;
580	struct thread *td;
581{
582	int error;
583
584	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
585	if (vp->v_type != VDIR)
586		return (ENOTDIR);
587#ifdef MAC
588	error = mac_check_vnode_chdir(td->td_ucred, vp);
589	if (error)
590		return (error);
591#endif
592	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
593	return (error);
594}
595
596/*
597 * Common routine for kern_chroot() and jail_attach().  The caller is
598 * responsible for invoking suser() and mac_check_chroot() to authorize this
599 * operation.
600 */
601int
602change_root(vp, td)
603	struct vnode *vp;
604	struct thread *td;
605{
606	struct filedesc *fdp;
607	struct vnode *oldvp;
608	int error;
609
610	mtx_assert(&Giant, MA_OWNED);
611	fdp = td->td_proc->p_fd;
612	FILEDESC_LOCK(fdp);
613	if (chroot_allow_open_directories == 0 ||
614	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
615		error = chroot_refuse_vdir_fds(fdp);
616		if (error) {
617			FILEDESC_UNLOCK(fdp);
618			return (error);
619		}
620	}
621	oldvp = fdp->fd_rdir;
622	fdp->fd_rdir = vp;
623	VREF(fdp->fd_rdir);
624	if (!fdp->fd_jdir) {
625		fdp->fd_jdir = vp;
626		VREF(fdp->fd_jdir);
627	}
628	FILEDESC_UNLOCK(fdp);
629	vrele(oldvp);
630	return (0);
631}
632
633/*
634 * Check permissions, allocate an open file structure,
635 * and call the device open routine if any.
636 */
637#ifndef _SYS_SYSPROTO_H_
638struct open_args {
639	char	*path;
640	int	flags;
641	int	mode;
642};
643#endif
644int
645open(td, uap)
646	struct thread *td;
647	register struct open_args /* {
648		char *path;
649		int flags;
650		int mode;
651	} */ *uap;
652{
653
654	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
655}
656
657int
658kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
659    int mode)
660{
661	struct proc *p = td->td_proc;
662	struct filedesc *fdp = p->p_fd;
663	struct file *fp;
664	struct vnode *vp;
665	struct vattr vat;
666	struct mount *mp;
667	int cmode;
668	struct file *nfp;
669	int type, indx, error;
670	struct flock lf;
671	struct nameidata nd;
672
673	if ((flags & O_ACCMODE) == O_ACCMODE)
674		return (EINVAL);
675	flags = FFLAGS(flags);
676	error = falloc(td, &nfp, &indx);
677	if (error)
678		return (error);
679	fp = nfp;
680	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
681	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
682	td->td_dupfd = -1;		/* XXX check for fdopen */
683	/*
684	 * Bump the ref count to prevent another process from closing
685	 * the descriptor while we are blocked in vn_open()
686	 */
687	fhold(fp);
688	error = vn_open(&nd, &flags, cmode, indx);
689	if (error) {
690
691		/*
692		 * If the vn_open replaced the method vector, something
693		 * wonderous happened deep below and we just pass it up
694		 * pretending we know what we do.
695		 */
696		if (error == ENXIO && fp->f_ops != &badfileops) {
697			fdrop(fp, td);
698			td->td_retval[0] = indx;
699			return (0);
700		}
701
702		/*
703		 * release our own reference
704		 */
705		fdrop(fp, td);
706
707		/*
708		 * handle special fdopen() case.  bleh.  dupfdopen() is
709		 * responsible for dropping the old contents of ofiles[indx]
710		 * if it succeeds.
711		 */
712		if ((error == ENODEV || error == ENXIO) &&
713		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
714		    (error =
715			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
716			td->td_retval[0] = indx;
717			return (0);
718		}
719		/*
720		 * Clean up the descriptor, but only if another thread hadn't
721		 * replaced or closed it.
722		 */
723		FILEDESC_LOCK(fdp);
724		if (fdp->fd_ofiles[indx] == fp) {
725			fdp->fd_ofiles[indx] = NULL;
726			FILEDESC_UNLOCK(fdp);
727			fdrop(fp, td);
728		} else
729			FILEDESC_UNLOCK(fdp);
730
731		if (error == ERESTART)
732			error = EINTR;
733		return (error);
734	}
735	td->td_dupfd = 0;
736	NDFREE(&nd, NDF_ONLY_PNBUF);
737	vp = nd.ni_vp;
738
739	/*
740	 * There should be 2 references on the file, one from the descriptor
741	 * table, and one for us.
742	 *
743	 * Handle the case where someone closed the file (via its file
744	 * descriptor) while we were blocked.  The end result should look
745	 * like opening the file succeeded but it was immediately closed.
746	 */
747	FILEDESC_LOCK(fdp);
748	FILE_LOCK(fp);
749	if (fp->f_count == 1) {
750		KASSERT(fdp->fd_ofiles[indx] != fp,
751		    ("Open file descriptor lost all refs"));
752		FILEDESC_UNLOCK(fdp);
753		FILE_UNLOCK(fp);
754		VOP_UNLOCK(vp, 0, td);
755		vn_close(vp, flags & FMASK, fp->f_cred, td);
756		fdrop(fp, td);
757		td->td_retval[0] = indx;
758		return 0;
759	}
760	fp->f_vnode = vp;
761	fp->f_data = vp;
762	fp->f_flag = flags & FMASK;
763	fp->f_ops = &vnops;
764	fp->f_seqcount = 1;
765	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
766	FILEDESC_UNLOCK(fdp);
767	FILE_UNLOCK(fp);
768
769	/* assert that vn_open created a backing object if one is needed */
770	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
771		("open: vmio vnode has no backing object after vn_open"));
772
773	VOP_UNLOCK(vp, 0, td);
774	if (flags & (O_EXLOCK | O_SHLOCK)) {
775		lf.l_whence = SEEK_SET;
776		lf.l_start = 0;
777		lf.l_len = 0;
778		if (flags & O_EXLOCK)
779			lf.l_type = F_WRLCK;
780		else
781			lf.l_type = F_RDLCK;
782		type = F_FLOCK;
783		if ((flags & FNONBLOCK) == 0)
784			type |= F_WAIT;
785		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
786			    type)) != 0)
787			goto bad;
788		fp->f_flag |= FHASLOCK;
789	}
790	if (flags & O_TRUNC) {
791		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
792			goto bad;
793		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
794		VATTR_NULL(&vat);
795		vat.va_size = 0;
796		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
797#ifdef MAC
798		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
799		if (error == 0)
800#endif
801			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
802		VOP_UNLOCK(vp, 0, td);
803		vn_finished_write(mp);
804		if (error)
805			goto bad;
806	}
807	/*
808	 * Release our private reference, leaving the one associated with
809	 * the descriptor table intact.
810	 */
811	fdrop(fp, td);
812	td->td_retval[0] = indx;
813	return (0);
814bad:
815	FILEDESC_LOCK(fdp);
816	if (fdp->fd_ofiles[indx] == fp) {
817		fdp->fd_ofiles[indx] = NULL;
818		FILEDESC_UNLOCK(fdp);
819		fdrop(fp, td);
820	} else
821		FILEDESC_UNLOCK(fdp);
822	fdrop(fp, td);
823	return (error);
824}
825
826#ifdef COMPAT_43
827/*
828 * Create a file.
829 */
830#ifndef _SYS_SYSPROTO_H_
831struct ocreat_args {
832	char	*path;
833	int	mode;
834};
835#endif
836int
837ocreat(td, uap)
838	struct thread *td;
839	register struct ocreat_args /* {
840		char *path;
841		int mode;
842	} */ *uap;
843{
844	struct open_args /* {
845		char *path;
846		int flags;
847		int mode;
848	} */ nuap;
849
850	nuap.path = uap->path;
851	nuap.mode = uap->mode;
852	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
853	return (open(td, &nuap));
854}
855#endif /* COMPAT_43 */
856
857/*
858 * Create a special file.
859 */
860#ifndef _SYS_SYSPROTO_H_
861struct mknod_args {
862	char	*path;
863	int	mode;
864	int	dev;
865};
866#endif
867/* ARGSUSED */
868int
869mknod(td, uap)
870	struct thread *td;
871	register struct mknod_args /* {
872		char *path;
873		int mode;
874		int dev;
875	} */ *uap;
876{
877
878	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
879}
880
881int
882kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
883    int dev)
884{
885	struct vnode *vp;
886	struct mount *mp;
887	struct vattr vattr;
888	int error;
889	int whiteout = 0;
890	struct nameidata nd;
891
892	switch (mode & S_IFMT) {
893	case S_IFCHR:
894	case S_IFBLK:
895		error = suser(td);
896		break;
897	default:
898		error = suser_cred(td->td_ucred, PRISON_ROOT);
899		break;
900	}
901	if (error)
902		return (error);
903restart:
904	bwillwrite();
905	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
906	if ((error = namei(&nd)) != 0)
907		return (error);
908	vp = nd.ni_vp;
909	if (vp != NULL) {
910		NDFREE(&nd, NDF_ONLY_PNBUF);
911		vrele(vp);
912		if (vp == nd.ni_dvp)
913			vrele(nd.ni_dvp);
914		else
915			vput(nd.ni_dvp);
916		return (EEXIST);
917	} else {
918		VATTR_NULL(&vattr);
919		FILEDESC_LOCK(td->td_proc->p_fd);
920		vattr.va_mode = (mode & ALLPERMS) &
921		    ~td->td_proc->p_fd->fd_cmask;
922		FILEDESC_UNLOCK(td->td_proc->p_fd);
923		vattr.va_rdev = dev;
924		whiteout = 0;
925
926		switch (mode & S_IFMT) {
927		case S_IFMT:	/* used by badsect to flag bad sectors */
928			vattr.va_type = VBAD;
929			break;
930		case S_IFCHR:
931			vattr.va_type = VCHR;
932			break;
933		case S_IFBLK:
934			vattr.va_type = VBLK;
935			break;
936		case S_IFWHT:
937			whiteout = 1;
938			break;
939		default:
940			error = EINVAL;
941			break;
942		}
943	}
944	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
945		NDFREE(&nd, NDF_ONLY_PNBUF);
946		vput(nd.ni_dvp);
947		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
948			return (error);
949		goto restart;
950	}
951#ifdef MAC
952	if (error == 0 && !whiteout)
953		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
954		    &nd.ni_cnd, &vattr);
955#endif
956	if (!error) {
957		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
958		if (whiteout)
959			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
960		else {
961			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
962						&nd.ni_cnd, &vattr);
963			if (error == 0)
964				vput(nd.ni_vp);
965		}
966	}
967	NDFREE(&nd, NDF_ONLY_PNBUF);
968	vput(nd.ni_dvp);
969	vn_finished_write(mp);
970	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
971	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
972	return (error);
973}
974
975/*
976 * Create a named pipe.
977 */
978#ifndef _SYS_SYSPROTO_H_
979struct mkfifo_args {
980	char	*path;
981	int	mode;
982};
983#endif
984/* ARGSUSED */
985int
986mkfifo(td, uap)
987	struct thread *td;
988	register struct mkfifo_args /* {
989		char *path;
990		int mode;
991	} */ *uap;
992{
993
994	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
995}
996
997int
998kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
999{
1000	struct mount *mp;
1001	struct vattr vattr;
1002	int error;
1003	struct nameidata nd;
1004
1005restart:
1006	bwillwrite();
1007	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
1008	if ((error = namei(&nd)) != 0)
1009		return (error);
1010	if (nd.ni_vp != NULL) {
1011		NDFREE(&nd, NDF_ONLY_PNBUF);
1012		vrele(nd.ni_vp);
1013		if (nd.ni_vp == nd.ni_dvp)
1014			vrele(nd.ni_dvp);
1015		else
1016			vput(nd.ni_dvp);
1017		return (EEXIST);
1018	}
1019	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1020		NDFREE(&nd, NDF_ONLY_PNBUF);
1021		vput(nd.ni_dvp);
1022		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1023			return (error);
1024		goto restart;
1025	}
1026	VATTR_NULL(&vattr);
1027	vattr.va_type = VFIFO;
1028	FILEDESC_LOCK(td->td_proc->p_fd);
1029	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1030	FILEDESC_UNLOCK(td->td_proc->p_fd);
1031#ifdef MAC
1032	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1033	    &vattr);
1034	if (error)
1035		goto out;
1036#endif
1037	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1038	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1039	if (error == 0)
1040		vput(nd.ni_vp);
1041#ifdef MAC
1042out:
1043#endif
1044	NDFREE(&nd, NDF_ONLY_PNBUF);
1045	vput(nd.ni_dvp);
1046	vn_finished_write(mp);
1047	return (error);
1048}
1049
1050/*
1051 * Make a hard file link.
1052 */
1053#ifndef _SYS_SYSPROTO_H_
1054struct link_args {
1055	char	*path;
1056	char	*link;
1057};
1058#endif
1059/* ARGSUSED */
1060int
1061link(td, uap)
1062	struct thread *td;
1063	register struct link_args /* {
1064		char *path;
1065		char *link;
1066	} */ *uap;
1067{
1068
1069	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1070}
1071
1072int
1073kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1074{
1075	struct vnode *vp;
1076	struct mount *mp;
1077	struct nameidata nd;
1078	int error;
1079
1080	bwillwrite();
1081	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1082	if ((error = namei(&nd)) != 0)
1083		return (error);
1084	NDFREE(&nd, NDF_ONLY_PNBUF);
1085	vp = nd.ni_vp;
1086	if (vp->v_type == VDIR) {
1087		vrele(vp);
1088		return (EPERM);		/* POSIX */
1089	}
1090	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1091		vrele(vp);
1092		return (error);
1093	}
1094	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1095	if ((error = namei(&nd)) == 0) {
1096		if (nd.ni_vp != NULL) {
1097			vrele(nd.ni_vp);
1098			if (nd.ni_dvp == nd.ni_vp)
1099				vrele(nd.ni_dvp);
1100			else
1101				vput(nd.ni_dvp);
1102			error = EEXIST;
1103		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1104		    == 0) {
1105			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1106			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1107#ifdef MAC
1108			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
1109			    vp, &nd.ni_cnd);
1110			if (error == 0)
1111#endif
1112				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1113			VOP_UNLOCK(vp, 0, td);
1114			vput(nd.ni_dvp);
1115		}
1116		NDFREE(&nd, NDF_ONLY_PNBUF);
1117	}
1118	vrele(vp);
1119	vn_finished_write(mp);
1120	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1121	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1122	return (error);
1123}
1124
1125/*
1126 * Make a symbolic link.
1127 */
1128#ifndef _SYS_SYSPROTO_H_
1129struct symlink_args {
1130	char	*path;
1131	char	*link;
1132};
1133#endif
1134/* ARGSUSED */
1135int
1136symlink(td, uap)
1137	struct thread *td;
1138	register struct symlink_args /* {
1139		char *path;
1140		char *link;
1141	} */ *uap;
1142{
1143
1144	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1145}
1146
1147int
1148kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1149{
1150	struct mount *mp;
1151	struct vattr vattr;
1152	char *syspath;
1153	int error;
1154	struct nameidata nd;
1155
1156	if (segflg == UIO_SYSSPACE) {
1157		syspath = path;
1158	} else {
1159		syspath = uma_zalloc(namei_zone, M_WAITOK);
1160		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1161			goto out;
1162	}
1163restart:
1164	bwillwrite();
1165	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1166	if ((error = namei(&nd)) != 0)
1167		goto out;
1168	if (nd.ni_vp) {
1169		NDFREE(&nd, NDF_ONLY_PNBUF);
1170		vrele(nd.ni_vp);
1171		if (nd.ni_vp == nd.ni_dvp)
1172			vrele(nd.ni_dvp);
1173		else
1174			vput(nd.ni_dvp);
1175		error = EEXIST;
1176		goto out;
1177	}
1178	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1179		NDFREE(&nd, NDF_ONLY_PNBUF);
1180		vput(nd.ni_dvp);
1181		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1182			return (error);
1183		goto restart;
1184	}
1185	VATTR_NULL(&vattr);
1186	FILEDESC_LOCK(td->td_proc->p_fd);
1187	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1188	FILEDESC_UNLOCK(td->td_proc->p_fd);
1189#ifdef MAC
1190	vattr.va_type = VLNK;
1191	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1192	    &vattr);
1193	if (error)
1194		goto out2;
1195#endif
1196	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1197	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1198	if (error == 0)
1199		vput(nd.ni_vp);
1200#ifdef MAC
1201out2:
1202#endif
1203	NDFREE(&nd, NDF_ONLY_PNBUF);
1204	vput(nd.ni_dvp);
1205	vn_finished_write(mp);
1206	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1207	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1208out:
1209	if (segflg != UIO_SYSSPACE)
1210		uma_zfree(namei_zone, syspath);
1211	return (error);
1212}
1213
1214/*
1215 * Delete a whiteout from the filesystem.
1216 */
1217/* ARGSUSED */
1218int
1219undelete(td, uap)
1220	struct thread *td;
1221	register struct undelete_args /* {
1222		char *path;
1223	} */ *uap;
1224{
1225	int error;
1226	struct mount *mp;
1227	struct nameidata nd;
1228
1229restart:
1230	bwillwrite();
1231	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1232	    uap->path, td);
1233	error = namei(&nd);
1234	if (error)
1235		return (error);
1236
1237	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1238		NDFREE(&nd, NDF_ONLY_PNBUF);
1239		if (nd.ni_vp)
1240			vrele(nd.ni_vp);
1241		if (nd.ni_vp == nd.ni_dvp)
1242			vrele(nd.ni_dvp);
1243		else
1244			vput(nd.ni_dvp);
1245		return (EEXIST);
1246	}
1247	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1248		NDFREE(&nd, NDF_ONLY_PNBUF);
1249		vput(nd.ni_dvp);
1250		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1251			return (error);
1252		goto restart;
1253	}
1254	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1255	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1256	NDFREE(&nd, NDF_ONLY_PNBUF);
1257	vput(nd.ni_dvp);
1258	vn_finished_write(mp);
1259	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1260	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1261	return (error);
1262}
1263
1264/*
1265 * Delete a name from the filesystem.
1266 */
1267#ifndef _SYS_SYSPROTO_H_
1268struct unlink_args {
1269	char	*path;
1270};
1271#endif
1272/* ARGSUSED */
1273int
1274unlink(td, uap)
1275	struct thread *td;
1276	struct unlink_args /* {
1277		char *path;
1278	} */ *uap;
1279{
1280
1281	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1282}
1283
1284int
1285kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1286{
1287	struct mount *mp;
1288	struct vnode *vp;
1289	int error;
1290	struct nameidata nd;
1291
1292restart:
1293	bwillwrite();
1294	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1295	if ((error = namei(&nd)) != 0)
1296		return (error);
1297	vp = nd.ni_vp;
1298	if (vp->v_type == VDIR)
1299		error = EPERM;		/* POSIX */
1300	else {
1301		/*
1302		 * The root of a mounted filesystem cannot be deleted.
1303		 *
1304		 * XXX: can this only be a VDIR case?
1305		 */
1306		if (vp->v_vflag & VV_ROOT)
1307			error = EBUSY;
1308	}
1309	if (error == 0) {
1310		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1311			NDFREE(&nd, NDF_ONLY_PNBUF);
1312			if (vp == nd.ni_dvp)
1313				vrele(vp);
1314			else
1315				vput(vp);
1316			vput(nd.ni_dvp);
1317			if ((error = vn_start_write(NULL, &mp,
1318			    V_XSLEEP | PCATCH)) != 0)
1319				return (error);
1320			goto restart;
1321		}
1322#ifdef MAC
1323		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1324		    &nd.ni_cnd);
1325		if (error)
1326			goto out;
1327#endif
1328		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1329		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1330#ifdef MAC
1331out:
1332#endif
1333		vn_finished_write(mp);
1334	}
1335	NDFREE(&nd, NDF_ONLY_PNBUF);
1336	if (vp == nd.ni_dvp)
1337		vrele(vp);
1338	else
1339		vput(vp);
1340	vput(nd.ni_dvp);
1341	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1342	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1343	return (error);
1344}
1345
1346/*
1347 * Reposition read/write file offset.
1348 */
1349#ifndef _SYS_SYSPROTO_H_
1350struct lseek_args {
1351	int	fd;
1352	int	pad;
1353	off_t	offset;
1354	int	whence;
1355};
1356#endif
1357int
1358lseek(td, uap)
1359	struct thread *td;
1360	register struct lseek_args /* {
1361		int fd;
1362		int pad;
1363		off_t offset;
1364		int whence;
1365	} */ *uap;
1366{
1367	struct ucred *cred = td->td_ucred;
1368	struct file *fp;
1369	struct vnode *vp;
1370	struct vattr vattr;
1371	off_t offset;
1372	int error, noneg;
1373
1374	if ((error = fget(td, uap->fd, &fp)) != 0)
1375		return (error);
1376	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1377		fdrop(fp, td);
1378		return (ESPIPE);
1379	}
1380	vp = fp->f_vnode;
1381	noneg = (vp->v_type != VCHR);
1382	offset = uap->offset;
1383	switch (uap->whence) {
1384	case L_INCR:
1385		if (noneg &&
1386		    (fp->f_offset < 0 ||
1387		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1388			error = EOVERFLOW;
1389			break;
1390		}
1391		offset += fp->f_offset;
1392		break;
1393	case L_XTND:
1394		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1395		error = VOP_GETATTR(vp, &vattr, cred, td);
1396		VOP_UNLOCK(vp, 0, td);
1397		if (error)
1398			break;
1399		if (noneg &&
1400		    (vattr.va_size > OFF_MAX ||
1401		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1402			error = EOVERFLOW;
1403			break;
1404		}
1405		offset += vattr.va_size;
1406		break;
1407	case L_SET:
1408		break;
1409	default:
1410		error = EINVAL;
1411	}
1412	if (error == 0 && noneg && offset < 0)
1413		error = EINVAL;
1414	if (error != 0) {
1415		fdrop(fp, td);
1416		return (error);
1417	}
1418	fp->f_offset = offset;
1419	*(off_t *)(td->td_retval) = fp->f_offset;
1420	fdrop(fp, td);
1421	return (0);
1422}
1423
1424#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1425/*
1426 * Reposition read/write file offset.
1427 */
1428#ifndef _SYS_SYSPROTO_H_
1429struct olseek_args {
1430	int	fd;
1431	long	offset;
1432	int	whence;
1433};
1434#endif
1435int
1436olseek(td, uap)
1437	struct thread *td;
1438	register struct olseek_args /* {
1439		int fd;
1440		long offset;
1441		int whence;
1442	} */ *uap;
1443{
1444	struct lseek_args /* {
1445		int fd;
1446		int pad;
1447		off_t offset;
1448		int whence;
1449	} */ nuap;
1450	int error;
1451
1452	nuap.fd = uap->fd;
1453	nuap.offset = uap->offset;
1454	nuap.whence = uap->whence;
1455	error = lseek(td, &nuap);
1456	return (error);
1457}
1458#endif /* COMPAT_43 */
1459
1460/*
1461 * Check access permissions using passed credentials.
1462 */
1463static int
1464vn_access(vp, user_flags, cred, td)
1465	struct vnode	*vp;
1466	int		user_flags;
1467	struct ucred	*cred;
1468	struct thread	*td;
1469{
1470	int error, flags;
1471
1472	/* Flags == 0 means only check for existence. */
1473	error = 0;
1474	if (user_flags) {
1475		flags = 0;
1476		if (user_flags & R_OK)
1477			flags |= VREAD;
1478		if (user_flags & W_OK)
1479			flags |= VWRITE;
1480		if (user_flags & X_OK)
1481			flags |= VEXEC;
1482#ifdef MAC
1483		error = mac_check_vnode_access(cred, vp, flags);
1484		if (error)
1485			return (error);
1486#endif
1487		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1488			error = VOP_ACCESS(vp, flags, cred, td);
1489	}
1490	return (error);
1491}
1492
1493/*
1494 * Check access permissions using "real" credentials.
1495 */
1496#ifndef _SYS_SYSPROTO_H_
1497struct access_args {
1498	char	*path;
1499	int	flags;
1500};
1501#endif
1502int
1503access(td, uap)
1504	struct thread *td;
1505	register struct access_args /* {
1506		char *path;
1507		int flags;
1508	} */ *uap;
1509{
1510
1511	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1512}
1513
1514int
1515kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1516{
1517	struct ucred *cred, *tmpcred;
1518	register struct vnode *vp;
1519	int error;
1520	struct nameidata nd;
1521
1522	/*
1523	 * Create and modify a temporary credential instead of one that
1524	 * is potentially shared.  This could also mess up socket
1525	 * buffer accounting which can run in an interrupt context.
1526	 *
1527	 * XXX - Depending on how "threads" are finally implemented, it
1528	 * may be better to explicitly pass the credential to namei()
1529	 * rather than to modify the potentially shared process structure.
1530	 */
1531	cred = td->td_ucred;
1532	tmpcred = crdup(cred);
1533	tmpcred->cr_uid = cred->cr_ruid;
1534	tmpcred->cr_groups[0] = cred->cr_rgid;
1535	td->td_ucred = tmpcred;
1536	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1537	if ((error = namei(&nd)) != 0)
1538		goto out1;
1539	vp = nd.ni_vp;
1540
1541	error = vn_access(vp, flags, tmpcred, td);
1542	NDFREE(&nd, NDF_ONLY_PNBUF);
1543	vput(vp);
1544out1:
1545	td->td_ucred = cred;
1546	crfree(tmpcred);
1547	return (error);
1548}
1549
1550/*
1551 * Check access permissions using "effective" credentials.
1552 */
1553#ifndef _SYS_SYSPROTO_H_
1554struct eaccess_args {
1555	char	*path;
1556	int	flags;
1557};
1558#endif
1559int
1560eaccess(td, uap)
1561	struct thread *td;
1562	register struct eaccess_args /* {
1563		char *path;
1564		int flags;
1565	} */ *uap;
1566{
1567	struct nameidata nd;
1568	struct vnode *vp;
1569	int error;
1570
1571	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1572	    uap->path, td);
1573	if ((error = namei(&nd)) != 0)
1574		return (error);
1575	vp = nd.ni_vp;
1576
1577	error = vn_access(vp, uap->flags, td->td_ucred, td);
1578	NDFREE(&nd, NDF_ONLY_PNBUF);
1579	vput(vp);
1580	return (error);
1581}
1582
1583#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1584/*
1585 * Get file status; this version follows links.
1586 */
1587#ifndef _SYS_SYSPROTO_H_
1588struct ostat_args {
1589	char	*path;
1590	struct ostat *ub;
1591};
1592#endif
1593/* ARGSUSED */
1594int
1595ostat(td, uap)
1596	struct thread *td;
1597	register struct ostat_args /* {
1598		char *path;
1599		struct ostat *ub;
1600	} */ *uap;
1601{
1602	struct stat sb;
1603	struct ostat osb;
1604	int error;
1605	struct nameidata nd;
1606
1607	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1608	    uap->path, td);
1609	if ((error = namei(&nd)) != 0)
1610		return (error);
1611	NDFREE(&nd, NDF_ONLY_PNBUF);
1612	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1613	vput(nd.ni_vp);
1614	if (error)
1615		return (error);
1616	cvtstat(&sb, &osb);
1617	error = copyout(&osb, uap->ub, sizeof (osb));
1618	return (error);
1619}
1620
1621/*
1622 * Get file status; this version does not follow links.
1623 */
1624#ifndef _SYS_SYSPROTO_H_
1625struct olstat_args {
1626	char	*path;
1627	struct ostat *ub;
1628};
1629#endif
1630/* ARGSUSED */
1631int
1632olstat(td, uap)
1633	struct thread *td;
1634	register struct olstat_args /* {
1635		char *path;
1636		struct ostat *ub;
1637	} */ *uap;
1638{
1639	struct vnode *vp;
1640	struct stat sb;
1641	struct ostat osb;
1642	int error;
1643	struct nameidata nd;
1644
1645	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1646	    uap->path, td);
1647	if ((error = namei(&nd)) != 0)
1648		return (error);
1649	vp = nd.ni_vp;
1650	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1651	NDFREE(&nd, NDF_ONLY_PNBUF);
1652	vput(vp);
1653	if (error)
1654		return (error);
1655	cvtstat(&sb, &osb);
1656	error = copyout(&osb, uap->ub, sizeof (osb));
1657	return (error);
1658}
1659
1660/*
1661 * Convert from an old to a new stat structure.
1662 */
1663void
1664cvtstat(st, ost)
1665	struct stat *st;
1666	struct ostat *ost;
1667{
1668
1669	ost->st_dev = st->st_dev;
1670	ost->st_ino = st->st_ino;
1671	ost->st_mode = st->st_mode;
1672	ost->st_nlink = st->st_nlink;
1673	ost->st_uid = st->st_uid;
1674	ost->st_gid = st->st_gid;
1675	ost->st_rdev = st->st_rdev;
1676	if (st->st_size < (quad_t)1 << 32)
1677		ost->st_size = st->st_size;
1678	else
1679		ost->st_size = -2;
1680	ost->st_atime = st->st_atime;
1681	ost->st_mtime = st->st_mtime;
1682	ost->st_ctime = st->st_ctime;
1683	ost->st_blksize = st->st_blksize;
1684	ost->st_blocks = st->st_blocks;
1685	ost->st_flags = st->st_flags;
1686	ost->st_gen = st->st_gen;
1687}
1688#endif /* COMPAT_43 || COMPAT_SUNOS */
1689
1690/*
1691 * Get file status; this version follows links.
1692 */
1693#ifndef _SYS_SYSPROTO_H_
1694struct stat_args {
1695	char	*path;
1696	struct stat *ub;
1697};
1698#endif
1699/* ARGSUSED */
1700int
1701stat(td, uap)
1702	struct thread *td;
1703	register struct stat_args /* {
1704		char *path;
1705		struct stat *ub;
1706	} */ *uap;
1707{
1708	struct stat sb;
1709	int error;
1710	struct nameidata nd;
1711
1712#ifdef LOOKUP_SHARED
1713	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1714	    UIO_USERSPACE, uap->path, td);
1715#else
1716	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1717	    uap->path, td);
1718#endif
1719	if ((error = namei(&nd)) != 0)
1720		return (error);
1721	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1722	NDFREE(&nd, NDF_ONLY_PNBUF);
1723	vput(nd.ni_vp);
1724	if (error)
1725		return (error);
1726	error = copyout(&sb, uap->ub, sizeof (sb));
1727	return (error);
1728}
1729
1730/*
1731 * Get file status; this version does not follow links.
1732 */
1733#ifndef _SYS_SYSPROTO_H_
1734struct lstat_args {
1735	char	*path;
1736	struct stat *ub;
1737};
1738#endif
1739/* ARGSUSED */
1740int
1741lstat(td, uap)
1742	struct thread *td;
1743	register struct lstat_args /* {
1744		char *path;
1745		struct stat *ub;
1746	} */ *uap;
1747{
1748	int error;
1749	struct vnode *vp;
1750	struct stat sb;
1751	struct nameidata nd;
1752
1753	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1754	    uap->path, td);
1755	if ((error = namei(&nd)) != 0)
1756		return (error);
1757	vp = nd.ni_vp;
1758	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1759	NDFREE(&nd, NDF_ONLY_PNBUF);
1760	vput(vp);
1761	if (error)
1762		return (error);
1763	error = copyout(&sb, uap->ub, sizeof (sb));
1764	return (error);
1765}
1766
1767/*
1768 * Implementation of the NetBSD stat() function.
1769 * XXX This should probably be collapsed with the FreeBSD version,
1770 * as the differences are only due to vn_stat() clearing spares at
1771 * the end of the structures.  vn_stat could be split to avoid this,
1772 * and thus collapse the following to close to zero code.
1773 */
1774void
1775cvtnstat(sb, nsb)
1776	struct stat *sb;
1777	struct nstat *nsb;
1778{
1779	bzero(nsb, sizeof *nsb);
1780	nsb->st_dev = sb->st_dev;
1781	nsb->st_ino = sb->st_ino;
1782	nsb->st_mode = sb->st_mode;
1783	nsb->st_nlink = sb->st_nlink;
1784	nsb->st_uid = sb->st_uid;
1785	nsb->st_gid = sb->st_gid;
1786	nsb->st_rdev = sb->st_rdev;
1787	nsb->st_atimespec = sb->st_atimespec;
1788	nsb->st_mtimespec = sb->st_mtimespec;
1789	nsb->st_ctimespec = sb->st_ctimespec;
1790	nsb->st_size = sb->st_size;
1791	nsb->st_blocks = sb->st_blocks;
1792	nsb->st_blksize = sb->st_blksize;
1793	nsb->st_flags = sb->st_flags;
1794	nsb->st_gen = sb->st_gen;
1795	nsb->st_birthtimespec = sb->st_birthtimespec;
1796}
1797
1798#ifndef _SYS_SYSPROTO_H_
1799struct nstat_args {
1800	char	*path;
1801	struct nstat *ub;
1802};
1803#endif
1804/* ARGSUSED */
1805int
1806nstat(td, uap)
1807	struct thread *td;
1808	register struct nstat_args /* {
1809		char *path;
1810		struct nstat *ub;
1811	} */ *uap;
1812{
1813	struct stat sb;
1814	struct nstat nsb;
1815	int error;
1816	struct nameidata nd;
1817
1818	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1819	    uap->path, td);
1820	if ((error = namei(&nd)) != 0)
1821		return (error);
1822	NDFREE(&nd, NDF_ONLY_PNBUF);
1823	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1824	vput(nd.ni_vp);
1825	if (error)
1826		return (error);
1827	cvtnstat(&sb, &nsb);
1828	error = copyout(&nsb, uap->ub, sizeof (nsb));
1829	return (error);
1830}
1831
1832/*
1833 * NetBSD lstat.  Get file status; this version does not follow links.
1834 */
1835#ifndef _SYS_SYSPROTO_H_
1836struct lstat_args {
1837	char	*path;
1838	struct stat *ub;
1839};
1840#endif
1841/* ARGSUSED */
1842int
1843nlstat(td, uap)
1844	struct thread *td;
1845	register struct nlstat_args /* {
1846		char *path;
1847		struct nstat *ub;
1848	} */ *uap;
1849{
1850	int error;
1851	struct vnode *vp;
1852	struct stat sb;
1853	struct nstat nsb;
1854	struct nameidata nd;
1855
1856	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1857	    uap->path, td);
1858	if ((error = namei(&nd)) != 0)
1859		return (error);
1860	vp = nd.ni_vp;
1861	NDFREE(&nd, NDF_ONLY_PNBUF);
1862	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1863	vput(vp);
1864	if (error)
1865		return (error);
1866	cvtnstat(&sb, &nsb);
1867	error = copyout(&nsb, uap->ub, sizeof (nsb));
1868	return (error);
1869}
1870
1871/*
1872 * Get configurable pathname variables.
1873 */
1874#ifndef _SYS_SYSPROTO_H_
1875struct pathconf_args {
1876	char	*path;
1877	int	name;
1878};
1879#endif
1880/* ARGSUSED */
1881int
1882pathconf(td, uap)
1883	struct thread *td;
1884	register struct pathconf_args /* {
1885		char *path;
1886		int name;
1887	} */ *uap;
1888{
1889	int error;
1890	struct nameidata nd;
1891
1892	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1893	    uap->path, td);
1894	if ((error = namei(&nd)) != 0)
1895		return (error);
1896	NDFREE(&nd, NDF_ONLY_PNBUF);
1897
1898	/* If asynchronous I/O is available, it works for all files. */
1899	if (uap->name == _PC_ASYNC_IO)
1900		td->td_retval[0] = async_io_version;
1901	else
1902		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
1903	vput(nd.ni_vp);
1904	return (error);
1905}
1906
1907/*
1908 * Return target name of a symbolic link.
1909 */
1910#ifndef _SYS_SYSPROTO_H_
1911struct readlink_args {
1912	char	*path;
1913	char	*buf;
1914	int	count;
1915};
1916#endif
1917/* ARGSUSED */
1918int
1919readlink(td, uap)
1920	struct thread *td;
1921	register struct readlink_args /* {
1922		char *path;
1923		char *buf;
1924		int count;
1925	} */ *uap;
1926{
1927
1928	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
1929	    UIO_USERSPACE, uap->count));
1930}
1931
1932int
1933kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
1934    enum uio_seg bufseg, int count)
1935{
1936	register struct vnode *vp;
1937	struct iovec aiov;
1938	struct uio auio;
1939	int error;
1940	struct nameidata nd;
1941
1942	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1943	if ((error = namei(&nd)) != 0)
1944		return (error);
1945	NDFREE(&nd, NDF_ONLY_PNBUF);
1946	vp = nd.ni_vp;
1947#ifdef MAC
1948	error = mac_check_vnode_readlink(td->td_ucred, vp);
1949	if (error) {
1950		vput(vp);
1951		return (error);
1952	}
1953#endif
1954	if (vp->v_type != VLNK)
1955		error = EINVAL;
1956	else {
1957		aiov.iov_base = buf;
1958		aiov.iov_len = count;
1959		auio.uio_iov = &aiov;
1960		auio.uio_iovcnt = 1;
1961		auio.uio_offset = 0;
1962		auio.uio_rw = UIO_READ;
1963		auio.uio_segflg = bufseg;
1964		auio.uio_td = td;
1965		auio.uio_resid = count;
1966		error = VOP_READLINK(vp, &auio, td->td_ucred);
1967	}
1968	vput(vp);
1969	td->td_retval[0] = count - auio.uio_resid;
1970	return (error);
1971}
1972
1973/*
1974 * Common implementation code for chflags() and fchflags().
1975 */
1976static int
1977setfflags(td, vp, flags)
1978	struct thread *td;
1979	struct vnode *vp;
1980	int flags;
1981{
1982	int error;
1983	struct mount *mp;
1984	struct vattr vattr;
1985
1986	/*
1987	 * Prevent non-root users from setting flags on devices.  When
1988	 * a device is reused, users can retain ownership of the device
1989	 * if they are allowed to set flags and programs assume that
1990	 * chown can't fail when done as root.
1991	 */
1992	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1993		error = suser_cred(td->td_ucred, PRISON_ROOT);
1994		if (error)
1995			return (error);
1996	}
1997
1998	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1999		return (error);
2000	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2001	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2002	VATTR_NULL(&vattr);
2003	vattr.va_flags = flags;
2004#ifdef MAC
2005	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2006	if (error == 0)
2007#endif
2008		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2009	VOP_UNLOCK(vp, 0, td);
2010	vn_finished_write(mp);
2011	return (error);
2012}
2013
2014/*
2015 * Change flags of a file given a path name.
2016 */
2017#ifndef _SYS_SYSPROTO_H_
2018struct chflags_args {
2019	char	*path;
2020	int	flags;
2021};
2022#endif
2023/* ARGSUSED */
2024int
2025chflags(td, uap)
2026	struct thread *td;
2027	register struct chflags_args /* {
2028		char *path;
2029		int flags;
2030	} */ *uap;
2031{
2032	int error;
2033	struct nameidata nd;
2034
2035	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
2036	if ((error = namei(&nd)) != 0)
2037		return (error);
2038	NDFREE(&nd, NDF_ONLY_PNBUF);
2039	error = setfflags(td, nd.ni_vp, uap->flags);
2040	vrele(nd.ni_vp);
2041	return error;
2042}
2043
2044/*
2045 * Same as chflags() but doesn't follow symlinks.
2046 */
2047int
2048lchflags(td, uap)
2049	struct thread *td;
2050	register struct lchflags_args /* {
2051		char *path;
2052		int flags;
2053	} */ *uap;
2054{
2055	int error;
2056	struct nameidata nd;
2057
2058	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2059	if ((error = namei(&nd)) != 0)
2060		return (error);
2061	NDFREE(&nd, NDF_ONLY_PNBUF);
2062	error = setfflags(td, nd.ni_vp, uap->flags);
2063	vrele(nd.ni_vp);
2064	return error;
2065}
2066
2067/*
2068 * Change flags of a file given a file descriptor.
2069 */
2070#ifndef _SYS_SYSPROTO_H_
2071struct fchflags_args {
2072	int	fd;
2073	int	flags;
2074};
2075#endif
2076/* ARGSUSED */
2077int
2078fchflags(td, uap)
2079	struct thread *td;
2080	register struct fchflags_args /* {
2081		int fd;
2082		int flags;
2083	} */ *uap;
2084{
2085	struct file *fp;
2086	int error;
2087
2088	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2089		return (error);
2090	error = setfflags(td, fp->f_vnode, uap->flags);
2091	fdrop(fp, td);
2092	return (error);
2093}
2094
2095/*
2096 * Common implementation code for chmod(), lchmod() and fchmod().
2097 */
2098static int
2099setfmode(td, vp, mode)
2100	struct thread *td;
2101	struct vnode *vp;
2102	int mode;
2103{
2104	int error;
2105	struct mount *mp;
2106	struct vattr vattr;
2107
2108	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2109		return (error);
2110	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2111	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2112	VATTR_NULL(&vattr);
2113	vattr.va_mode = mode & ALLPERMS;
2114#ifdef MAC
2115	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2116	if (error == 0)
2117#endif
2118		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2119	VOP_UNLOCK(vp, 0, td);
2120	vn_finished_write(mp);
2121	return error;
2122}
2123
2124/*
2125 * Change mode of a file given path name.
2126 */
2127#ifndef _SYS_SYSPROTO_H_
2128struct chmod_args {
2129	char	*path;
2130	int	mode;
2131};
2132#endif
2133/* ARGSUSED */
2134int
2135chmod(td, uap)
2136	struct thread *td;
2137	register struct chmod_args /* {
2138		char *path;
2139		int mode;
2140	} */ *uap;
2141{
2142
2143	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2144}
2145
2146int
2147kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2148{
2149	int error;
2150	struct nameidata nd;
2151
2152	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2153	if ((error = namei(&nd)) != 0)
2154		return (error);
2155	NDFREE(&nd, NDF_ONLY_PNBUF);
2156	error = setfmode(td, nd.ni_vp, mode);
2157	vrele(nd.ni_vp);
2158	return error;
2159}
2160
2161/*
2162 * Change mode of a file given path name (don't follow links.)
2163 */
2164#ifndef _SYS_SYSPROTO_H_
2165struct lchmod_args {
2166	char	*path;
2167	int	mode;
2168};
2169#endif
2170/* ARGSUSED */
2171int
2172lchmod(td, uap)
2173	struct thread *td;
2174	register struct lchmod_args /* {
2175		char *path;
2176		int mode;
2177	} */ *uap;
2178{
2179	int error;
2180	struct nameidata nd;
2181
2182	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2183	if ((error = namei(&nd)) != 0)
2184		return (error);
2185	NDFREE(&nd, NDF_ONLY_PNBUF);
2186	error = setfmode(td, nd.ni_vp, uap->mode);
2187	vrele(nd.ni_vp);
2188	return error;
2189}
2190
2191/*
2192 * Change mode of a file given a file descriptor.
2193 */
2194#ifndef _SYS_SYSPROTO_H_
2195struct fchmod_args {
2196	int	fd;
2197	int	mode;
2198};
2199#endif
2200/* ARGSUSED */
2201int
2202fchmod(td, uap)
2203	struct thread *td;
2204	register struct fchmod_args /* {
2205		int fd;
2206		int mode;
2207	} */ *uap;
2208{
2209	struct file *fp;
2210	int error;
2211
2212	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2213		return (error);
2214	error = setfmode(td, fp->f_vnode, uap->mode);
2215	fdrop(fp, td);
2216	return (error);
2217}
2218
2219/*
2220 * Common implementation for chown(), lchown(), and fchown()
2221 */
2222static int
2223setfown(td, vp, uid, gid)
2224	struct thread *td;
2225	struct vnode *vp;
2226	uid_t uid;
2227	gid_t gid;
2228{
2229	int error;
2230	struct mount *mp;
2231	struct vattr vattr;
2232
2233	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2234		return (error);
2235	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2236	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2237	VATTR_NULL(&vattr);
2238	vattr.va_uid = uid;
2239	vattr.va_gid = gid;
2240#ifdef MAC
2241	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2242	    vattr.va_gid);
2243	if (error == 0)
2244#endif
2245		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2246	VOP_UNLOCK(vp, 0, td);
2247	vn_finished_write(mp);
2248	return error;
2249}
2250
2251/*
2252 * Set ownership given a path name.
2253 */
2254#ifndef _SYS_SYSPROTO_H_
2255struct chown_args {
2256	char	*path;
2257	int	uid;
2258	int	gid;
2259};
2260#endif
2261/* ARGSUSED */
2262int
2263chown(td, uap)
2264	struct thread *td;
2265	register struct chown_args /* {
2266		char *path;
2267		int uid;
2268		int gid;
2269	} */ *uap;
2270{
2271
2272	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2273}
2274
2275int
2276kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2277    int gid)
2278{
2279	int error;
2280	struct nameidata nd;
2281
2282	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2283	if ((error = namei(&nd)) != 0)
2284		return (error);
2285	NDFREE(&nd, NDF_ONLY_PNBUF);
2286	error = setfown(td, nd.ni_vp, uid, gid);
2287	vrele(nd.ni_vp);
2288	return (error);
2289}
2290
2291/*
2292 * Set ownership given a path name, do not cross symlinks.
2293 */
2294#ifndef _SYS_SYSPROTO_H_
2295struct lchown_args {
2296	char	*path;
2297	int	uid;
2298	int	gid;
2299};
2300#endif
2301/* ARGSUSED */
2302int
2303lchown(td, uap)
2304	struct thread *td;
2305	register struct lchown_args /* {
2306		char *path;
2307		int uid;
2308		int gid;
2309	} */ *uap;
2310{
2311
2312	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2313}
2314
2315int
2316kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2317    int gid)
2318{
2319	int error;
2320	struct nameidata nd;
2321
2322	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2323	if ((error = namei(&nd)) != 0)
2324		return (error);
2325	NDFREE(&nd, NDF_ONLY_PNBUF);
2326	error = setfown(td, nd.ni_vp, uid, gid);
2327	vrele(nd.ni_vp);
2328	return (error);
2329}
2330
2331/*
2332 * Set ownership given a file descriptor.
2333 */
2334#ifndef _SYS_SYSPROTO_H_
2335struct fchown_args {
2336	int	fd;
2337	int	uid;
2338	int	gid;
2339};
2340#endif
2341/* ARGSUSED */
2342int
2343fchown(td, uap)
2344	struct thread *td;
2345	register struct fchown_args /* {
2346		int fd;
2347		int uid;
2348		int gid;
2349	} */ *uap;
2350{
2351	struct file *fp;
2352	int error;
2353
2354	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2355		return (error);
2356	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2357	fdrop(fp, td);
2358	return (error);
2359}
2360
2361/*
2362 * Common implementation code for utimes(), lutimes(), and futimes().
2363 */
2364static int
2365getutimes(usrtvp, tvpseg, tsp)
2366	const struct timeval *usrtvp;
2367	enum uio_seg tvpseg;
2368	struct timespec *tsp;
2369{
2370	struct timeval tv[2];
2371	const struct timeval *tvp;
2372	int error;
2373
2374	if (usrtvp == NULL) {
2375		microtime(&tv[0]);
2376		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2377		tsp[1] = tsp[0];
2378	} else {
2379		if (tvpseg == UIO_SYSSPACE) {
2380			tvp = usrtvp;
2381		} else {
2382			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2383				return (error);
2384			tvp = tv;
2385		}
2386
2387		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2388		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2389	}
2390	return 0;
2391}
2392
2393/*
2394 * Common implementation code for utimes(), lutimes(), and futimes().
2395 */
2396static int
2397setutimes(td, vp, ts, numtimes, nullflag)
2398	struct thread *td;
2399	struct vnode *vp;
2400	const struct timespec *ts;
2401	int numtimes;
2402	int nullflag;
2403{
2404	int error, setbirthtime;
2405	struct mount *mp;
2406	struct vattr vattr;
2407
2408	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2409		return (error);
2410	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2411	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2412	setbirthtime = 0;
2413	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2414	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2415		setbirthtime = 1;
2416	VATTR_NULL(&vattr);
2417	vattr.va_atime = ts[0];
2418	vattr.va_mtime = ts[1];
2419	if (setbirthtime)
2420		vattr.va_birthtime = ts[1];
2421	if (numtimes > 2)
2422		vattr.va_birthtime = ts[2];
2423	if (nullflag)
2424		vattr.va_vaflags |= VA_UTIMES_NULL;
2425#ifdef MAC
2426	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2427	    vattr.va_mtime);
2428#endif
2429	if (error == 0)
2430		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2431	VOP_UNLOCK(vp, 0, td);
2432	vn_finished_write(mp);
2433	return error;
2434}
2435
2436/*
2437 * Set the access and modification times of a file.
2438 */
2439#ifndef _SYS_SYSPROTO_H_
2440struct utimes_args {
2441	char	*path;
2442	struct	timeval *tptr;
2443};
2444#endif
2445/* ARGSUSED */
2446int
2447utimes(td, uap)
2448	struct thread *td;
2449	register struct utimes_args /* {
2450		char *path;
2451		struct timeval *tptr;
2452	} */ *uap;
2453{
2454
2455	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2456	    UIO_USERSPACE));
2457}
2458
2459int
2460kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2461    struct timeval *tptr, enum uio_seg tptrseg)
2462{
2463	struct timespec ts[2];
2464	int error;
2465	struct nameidata nd;
2466
2467	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2468		return (error);
2469	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2470	if ((error = namei(&nd)) != 0)
2471		return (error);
2472	NDFREE(&nd, NDF_ONLY_PNBUF);
2473	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2474	vrele(nd.ni_vp);
2475	return (error);
2476}
2477
2478/*
2479 * Set the access and modification times of a file.
2480 */
2481#ifndef _SYS_SYSPROTO_H_
2482struct lutimes_args {
2483	char	*path;
2484	struct	timeval *tptr;
2485};
2486#endif
2487/* ARGSUSED */
2488int
2489lutimes(td, uap)
2490	struct thread *td;
2491	register struct lutimes_args /* {
2492		char *path;
2493		struct timeval *tptr;
2494	} */ *uap;
2495{
2496
2497	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2498	    UIO_USERSPACE));
2499}
2500
2501int
2502kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2503    struct timeval *tptr, enum uio_seg tptrseg)
2504{
2505	struct timespec ts[2];
2506	int error;
2507	struct nameidata nd;
2508
2509	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2510		return (error);
2511	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2512	if ((error = namei(&nd)) != 0)
2513		return (error);
2514	NDFREE(&nd, NDF_ONLY_PNBUF);
2515	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2516	vrele(nd.ni_vp);
2517	return (error);
2518}
2519
2520/*
2521 * Set the access and modification times of a file.
2522 */
2523#ifndef _SYS_SYSPROTO_H_
2524struct futimes_args {
2525	int	fd;
2526	struct	timeval *tptr;
2527};
2528#endif
2529/* ARGSUSED */
2530int
2531futimes(td, uap)
2532	struct thread *td;
2533	register struct futimes_args /* {
2534		int  fd;
2535		struct timeval *tptr;
2536	} */ *uap;
2537{
2538
2539	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2540}
2541
2542int
2543kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2544    enum uio_seg tptrseg)
2545{
2546	struct timespec ts[2];
2547	struct file *fp;
2548	int error;
2549
2550	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2551		return (error);
2552	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2553		return (error);
2554	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2555	fdrop(fp, td);
2556	return (error);
2557}
2558
2559/*
2560 * Truncate a file given its path name.
2561 */
2562#ifndef _SYS_SYSPROTO_H_
2563struct truncate_args {
2564	char	*path;
2565	int	pad;
2566	off_t	length;
2567};
2568#endif
2569/* ARGSUSED */
2570int
2571truncate(td, uap)
2572	struct thread *td;
2573	register struct truncate_args /* {
2574		char *path;
2575		int pad;
2576		off_t length;
2577	} */ *uap;
2578{
2579
2580	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2581}
2582
2583int
2584kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2585{
2586	struct mount *mp;
2587	struct vnode *vp;
2588	struct vattr vattr;
2589	int error;
2590	struct nameidata nd;
2591
2592	if (length < 0)
2593		return(EINVAL);
2594	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2595	if ((error = namei(&nd)) != 0)
2596		return (error);
2597	vp = nd.ni_vp;
2598	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2599		vrele(vp);
2600		return (error);
2601	}
2602	NDFREE(&nd, NDF_ONLY_PNBUF);
2603	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2604	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2605	if (vp->v_type == VDIR)
2606		error = EISDIR;
2607#ifdef MAC
2608	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2609	}
2610#endif
2611	else if ((error = vn_writechk(vp)) == 0 &&
2612	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2613		VATTR_NULL(&vattr);
2614		vattr.va_size = length;
2615		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2616	}
2617	vput(vp);
2618	vn_finished_write(mp);
2619	return (error);
2620}
2621
2622/*
2623 * Truncate a file given a file descriptor.
2624 */
2625#ifndef _SYS_SYSPROTO_H_
2626struct ftruncate_args {
2627	int	fd;
2628	int	pad;
2629	off_t	length;
2630};
2631#endif
2632/* ARGSUSED */
2633int
2634ftruncate(td, uap)
2635	struct thread *td;
2636	register struct ftruncate_args /* {
2637		int fd;
2638		int pad;
2639		off_t length;
2640	} */ *uap;
2641{
2642	struct mount *mp;
2643	struct vattr vattr;
2644	struct vnode *vp;
2645	struct file *fp;
2646	int error;
2647
2648	if (uap->length < 0)
2649		return(EINVAL);
2650	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2651		return (error);
2652	if ((fp->f_flag & FWRITE) == 0) {
2653		fdrop(fp, td);
2654		return (EINVAL);
2655	}
2656	vp = fp->f_vnode;
2657	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2658		fdrop(fp, td);
2659		return (error);
2660	}
2661	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2662	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2663	if (vp->v_type == VDIR)
2664		error = EISDIR;
2665#ifdef MAC
2666	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2667	    vp))) {
2668	}
2669#endif
2670	else if ((error = vn_writechk(vp)) == 0) {
2671		VATTR_NULL(&vattr);
2672		vattr.va_size = uap->length;
2673		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2674	}
2675	VOP_UNLOCK(vp, 0, td);
2676	vn_finished_write(mp);
2677	fdrop(fp, td);
2678	return (error);
2679}
2680
2681#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2682/*
2683 * Truncate a file given its path name.
2684 */
2685#ifndef _SYS_SYSPROTO_H_
2686struct otruncate_args {
2687	char	*path;
2688	long	length;
2689};
2690#endif
2691/* ARGSUSED */
2692int
2693otruncate(td, uap)
2694	struct thread *td;
2695	register struct otruncate_args /* {
2696		char *path;
2697		long length;
2698	} */ *uap;
2699{
2700	struct truncate_args /* {
2701		char *path;
2702		int pad;
2703		off_t length;
2704	} */ nuap;
2705
2706	nuap.path = uap->path;
2707	nuap.length = uap->length;
2708	return (truncate(td, &nuap));
2709}
2710
2711/*
2712 * Truncate a file given a file descriptor.
2713 */
2714#ifndef _SYS_SYSPROTO_H_
2715struct oftruncate_args {
2716	int	fd;
2717	long	length;
2718};
2719#endif
2720/* ARGSUSED */
2721int
2722oftruncate(td, uap)
2723	struct thread *td;
2724	register struct oftruncate_args /* {
2725		int fd;
2726		long length;
2727	} */ *uap;
2728{
2729	struct ftruncate_args /* {
2730		int fd;
2731		int pad;
2732		off_t length;
2733	} */ nuap;
2734
2735	nuap.fd = uap->fd;
2736	nuap.length = uap->length;
2737	return (ftruncate(td, &nuap));
2738}
2739#endif /* COMPAT_43 || COMPAT_SUNOS */
2740
2741/*
2742 * Sync an open file.
2743 */
2744#ifndef _SYS_SYSPROTO_H_
2745struct fsync_args {
2746	int	fd;
2747};
2748#endif
2749/* ARGSUSED */
2750int
2751fsync(td, uap)
2752	struct thread *td;
2753	struct fsync_args /* {
2754		int fd;
2755	} */ *uap;
2756{
2757	struct vnode *vp;
2758	struct mount *mp;
2759	struct file *fp;
2760	vm_object_t obj;
2761	int error;
2762
2763	GIANT_REQUIRED;
2764
2765	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2766		return (error);
2767	vp = fp->f_vnode;
2768	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2769		fdrop(fp, td);
2770		return (error);
2771	}
2772	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2773	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2774		VM_OBJECT_LOCK(obj);
2775		vm_object_page_clean(obj, 0, 0, 0);
2776		VM_OBJECT_UNLOCK(obj);
2777	}
2778	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2779	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2780	    && softdep_fsync_hook != NULL)
2781		error = (*softdep_fsync_hook)(vp);
2782
2783	VOP_UNLOCK(vp, 0, td);
2784	vn_finished_write(mp);
2785	fdrop(fp, td);
2786	return (error);
2787}
2788
2789/*
2790 * Rename files.  Source and destination must either both be directories,
2791 * or both not be directories.  If target is a directory, it must be empty.
2792 */
2793#ifndef _SYS_SYSPROTO_H_
2794struct rename_args {
2795	char	*from;
2796	char	*to;
2797};
2798#endif
2799/* ARGSUSED */
2800int
2801rename(td, uap)
2802	struct thread *td;
2803	register struct rename_args /* {
2804		char *from;
2805		char *to;
2806	} */ *uap;
2807{
2808
2809	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
2810}
2811
2812int
2813kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
2814{
2815	struct mount *mp = NULL;
2816	struct vnode *tvp, *fvp, *tdvp;
2817	struct nameidata fromnd, tond;
2818	int error;
2819
2820	bwillwrite();
2821#ifdef MAC
2822	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
2823	    from, td);
2824#else
2825	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
2826#endif
2827	if ((error = namei(&fromnd)) != 0)
2828		return (error);
2829#ifdef MAC
2830	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
2831	    fromnd.ni_vp, &fromnd.ni_cnd);
2832	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
2833	VOP_UNLOCK(fromnd.ni_vp, 0, td);
2834#endif
2835	fvp = fromnd.ni_vp;
2836	if (error == 0)
2837		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
2838	if (error != 0) {
2839		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2840		vrele(fromnd.ni_dvp);
2841		vrele(fvp);
2842		goto out1;
2843	}
2844	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
2845	    NOOBJ, pathseg, to, td);
2846	if (fromnd.ni_vp->v_type == VDIR)
2847		tond.ni_cnd.cn_flags |= WILLBEDIR;
2848	if ((error = namei(&tond)) != 0) {
2849		/* Translate error code for rename("dir1", "dir2/."). */
2850		if (error == EISDIR && fvp->v_type == VDIR)
2851			error = EINVAL;
2852		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2853		vrele(fromnd.ni_dvp);
2854		vrele(fvp);
2855		goto out1;
2856	}
2857	tdvp = tond.ni_dvp;
2858	tvp = tond.ni_vp;
2859	if (tvp != NULL) {
2860		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2861			error = ENOTDIR;
2862			goto out;
2863		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2864			error = EISDIR;
2865			goto out;
2866		}
2867	}
2868	if (fvp == tdvp)
2869		error = EINVAL;
2870	/*
2871	 * If the source is the same as the destination (that is, if they
2872	 * are links to the same vnode), then there is nothing to do.
2873	 */
2874	if (fvp == tvp)
2875		error = -1;
2876#ifdef MAC
2877	else
2878		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
2879		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
2880#endif
2881out:
2882	if (!error) {
2883		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2884		if (fromnd.ni_dvp != tdvp) {
2885			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2886		}
2887		if (tvp) {
2888			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2889		}
2890		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2891				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2892		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2893		NDFREE(&tond, NDF_ONLY_PNBUF);
2894	} else {
2895		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2896		NDFREE(&tond, NDF_ONLY_PNBUF);
2897		if (tdvp == tvp)
2898			vrele(tdvp);
2899		else
2900			vput(tdvp);
2901		if (tvp)
2902			vput(tvp);
2903		vrele(fromnd.ni_dvp);
2904		vrele(fvp);
2905	}
2906	vrele(tond.ni_startdir);
2907	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2908	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2909	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2910	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2911out1:
2912	vn_finished_write(mp);
2913	if (fromnd.ni_startdir)
2914		vrele(fromnd.ni_startdir);
2915	if (error == -1)
2916		return (0);
2917	return (error);
2918}
2919
2920/*
2921 * Make a directory file.
2922 */
2923#ifndef _SYS_SYSPROTO_H_
2924struct mkdir_args {
2925	char	*path;
2926	int	mode;
2927};
2928#endif
2929/* ARGSUSED */
2930int
2931mkdir(td, uap)
2932	struct thread *td;
2933	register struct mkdir_args /* {
2934		char *path;
2935		int mode;
2936	} */ *uap;
2937{
2938
2939	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
2940}
2941
2942int
2943kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
2944{
2945	struct mount *mp;
2946	struct vnode *vp;
2947	struct vattr vattr;
2948	int error;
2949	struct nameidata nd;
2950
2951restart:
2952	bwillwrite();
2953	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2954	nd.ni_cnd.cn_flags |= WILLBEDIR;
2955	if ((error = namei(&nd)) != 0)
2956		return (error);
2957	vp = nd.ni_vp;
2958	if (vp != NULL) {
2959		NDFREE(&nd, NDF_ONLY_PNBUF);
2960		vrele(vp);
2961		/*
2962		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2963		 * the strange behaviour of leaving the vnode unlocked
2964		 * if the target is the same vnode as the parent.
2965		 */
2966		if (vp == nd.ni_dvp)
2967			vrele(nd.ni_dvp);
2968		else
2969			vput(nd.ni_dvp);
2970		return (EEXIST);
2971	}
2972	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2973		NDFREE(&nd, NDF_ONLY_PNBUF);
2974		vput(nd.ni_dvp);
2975		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2976			return (error);
2977		goto restart;
2978	}
2979	VATTR_NULL(&vattr);
2980	vattr.va_type = VDIR;
2981	FILEDESC_LOCK(td->td_proc->p_fd);
2982	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2983	FILEDESC_UNLOCK(td->td_proc->p_fd);
2984#ifdef MAC
2985	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
2986	    &vattr);
2987	if (error)
2988		goto out;
2989#endif
2990	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2991	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2992#ifdef MAC
2993out:
2994#endif
2995	NDFREE(&nd, NDF_ONLY_PNBUF);
2996	vput(nd.ni_dvp);
2997	if (!error)
2998		vput(nd.ni_vp);
2999	vn_finished_write(mp);
3000	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3001	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3002	return (error);
3003}
3004
3005/*
3006 * Remove a directory file.
3007 */
3008#ifndef _SYS_SYSPROTO_H_
3009struct rmdir_args {
3010	char	*path;
3011};
3012#endif
3013/* ARGSUSED */
3014int
3015rmdir(td, uap)
3016	struct thread *td;
3017	struct rmdir_args /* {
3018		char *path;
3019	} */ *uap;
3020{
3021
3022	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3023}
3024
3025int
3026kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3027{
3028	struct mount *mp;
3029	struct vnode *vp;
3030	int error;
3031	struct nameidata nd;
3032
3033restart:
3034	bwillwrite();
3035	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
3036	if ((error = namei(&nd)) != 0)
3037		return (error);
3038	vp = nd.ni_vp;
3039	if (vp->v_type != VDIR) {
3040		error = ENOTDIR;
3041		goto out;
3042	}
3043	/*
3044	 * No rmdir "." please.
3045	 */
3046	if (nd.ni_dvp == vp) {
3047		error = EINVAL;
3048		goto out;
3049	}
3050	/*
3051	 * The root of a mounted filesystem cannot be deleted.
3052	 */
3053	if (vp->v_vflag & VV_ROOT) {
3054		error = EBUSY;
3055		goto out;
3056	}
3057#ifdef MAC
3058	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3059	    &nd.ni_cnd);
3060	if (error)
3061		goto out;
3062#endif
3063	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3064		NDFREE(&nd, NDF_ONLY_PNBUF);
3065		if (nd.ni_dvp == vp)
3066			vrele(nd.ni_dvp);
3067		else
3068			vput(nd.ni_dvp);
3069		vput(vp);
3070		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3071			return (error);
3072		goto restart;
3073	}
3074	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3075	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3076	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3077	vn_finished_write(mp);
3078out:
3079	NDFREE(&nd, NDF_ONLY_PNBUF);
3080	if (nd.ni_dvp == vp)
3081		vrele(nd.ni_dvp);
3082	else
3083		vput(nd.ni_dvp);
3084	vput(vp);
3085	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3086	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3087	return (error);
3088}
3089
3090#ifdef COMPAT_43
3091/*
3092 * Read a block of directory entries in a filesystem independent format.
3093 */
3094#ifndef _SYS_SYSPROTO_H_
3095struct ogetdirentries_args {
3096	int	fd;
3097	char	*buf;
3098	u_int	count;
3099	long	*basep;
3100};
3101#endif
3102int
3103ogetdirentries(td, uap)
3104	struct thread *td;
3105	register struct ogetdirentries_args /* {
3106		int fd;
3107		char *buf;
3108		u_int count;
3109		long *basep;
3110	} */ *uap;
3111{
3112	struct vnode *vp;
3113	struct file *fp;
3114	struct uio auio, kuio;
3115	struct iovec aiov, kiov;
3116	struct dirent *dp, *edp;
3117	caddr_t dirbuf;
3118	int error, eofflag, readcnt;
3119	long loff;
3120
3121	/* XXX arbitrary sanity limit on `count'. */
3122	if (uap->count > 64 * 1024)
3123		return (EINVAL);
3124	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3125		return (error);
3126	if ((fp->f_flag & FREAD) == 0) {
3127		fdrop(fp, td);
3128		return (EBADF);
3129	}
3130	vp = fp->f_vnode;
3131unionread:
3132	if (vp->v_type != VDIR) {
3133		fdrop(fp, td);
3134		return (EINVAL);
3135	}
3136	aiov.iov_base = uap->buf;
3137	aiov.iov_len = uap->count;
3138	auio.uio_iov = &aiov;
3139	auio.uio_iovcnt = 1;
3140	auio.uio_rw = UIO_READ;
3141	auio.uio_segflg = UIO_USERSPACE;
3142	auio.uio_td = td;
3143	auio.uio_resid = uap->count;
3144	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3145	loff = auio.uio_offset = fp->f_offset;
3146#ifdef MAC
3147	error = mac_check_vnode_readdir(td->td_ucred, vp);
3148	if (error) {
3149		VOP_UNLOCK(vp, 0, td);
3150		fdrop(fp, td);
3151		return (error);
3152	}
3153#endif
3154#	if (BYTE_ORDER != LITTLE_ENDIAN)
3155		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3156			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3157			    NULL, NULL);
3158			fp->f_offset = auio.uio_offset;
3159		} else
3160#	endif
3161	{
3162		kuio = auio;
3163		kuio.uio_iov = &kiov;
3164		kuio.uio_segflg = UIO_SYSSPACE;
3165		kiov.iov_len = uap->count;
3166		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3167		kiov.iov_base = dirbuf;
3168		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3169			    NULL, NULL);
3170		fp->f_offset = kuio.uio_offset;
3171		if (error == 0) {
3172			readcnt = uap->count - kuio.uio_resid;
3173			edp = (struct dirent *)&dirbuf[readcnt];
3174			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3175#				if (BYTE_ORDER == LITTLE_ENDIAN)
3176					/*
3177					 * The expected low byte of
3178					 * dp->d_namlen is our dp->d_type.
3179					 * The high MBZ byte of dp->d_namlen
3180					 * is our dp->d_namlen.
3181					 */
3182					dp->d_type = dp->d_namlen;
3183					dp->d_namlen = 0;
3184#				else
3185					/*
3186					 * The dp->d_type is the high byte
3187					 * of the expected dp->d_namlen,
3188					 * so must be zero'ed.
3189					 */
3190					dp->d_type = 0;
3191#				endif
3192				if (dp->d_reclen > 0) {
3193					dp = (struct dirent *)
3194					    ((char *)dp + dp->d_reclen);
3195				} else {
3196					error = EIO;
3197					break;
3198				}
3199			}
3200			if (dp >= edp)
3201				error = uiomove(dirbuf, readcnt, &auio);
3202		}
3203		FREE(dirbuf, M_TEMP);
3204	}
3205	VOP_UNLOCK(vp, 0, td);
3206	if (error) {
3207		fdrop(fp, td);
3208		return (error);
3209	}
3210	if (uap->count == auio.uio_resid) {
3211		if (union_dircheckp) {
3212			error = union_dircheckp(td, &vp, fp);
3213			if (error == -1)
3214				goto unionread;
3215			if (error) {
3216				fdrop(fp, td);
3217				return (error);
3218			}
3219		}
3220		/*
3221		 * XXX We could delay dropping the lock above but
3222		 * union_dircheckp complicates things.
3223		 */
3224		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3225		if ((vp->v_vflag & VV_ROOT) &&
3226		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3227			struct vnode *tvp = vp;
3228			vp = vp->v_mount->mnt_vnodecovered;
3229			VREF(vp);
3230			fp->f_vnode = vp;
3231			fp->f_data = vp;
3232			fp->f_offset = 0;
3233			vput(tvp);
3234			goto unionread;
3235		}
3236		VOP_UNLOCK(vp, 0, td);
3237	}
3238	error = copyout(&loff, uap->basep, sizeof(long));
3239	fdrop(fp, td);
3240	td->td_retval[0] = uap->count - auio.uio_resid;
3241	return (error);
3242}
3243#endif /* COMPAT_43 */
3244
3245/*
3246 * Read a block of directory entries in a filesystem independent format.
3247 */
3248#ifndef _SYS_SYSPROTO_H_
3249struct getdirentries_args {
3250	int	fd;
3251	char	*buf;
3252	u_int	count;
3253	long	*basep;
3254};
3255#endif
3256int
3257getdirentries(td, uap)
3258	struct thread *td;
3259	register struct getdirentries_args /* {
3260		int fd;
3261		char *buf;
3262		u_int count;
3263		long *basep;
3264	} */ *uap;
3265{
3266	struct vnode *vp;
3267	struct file *fp;
3268	struct uio auio;
3269	struct iovec aiov;
3270	long loff;
3271	int error, eofflag;
3272
3273	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3274		return (error);
3275	if ((fp->f_flag & FREAD) == 0) {
3276		fdrop(fp, td);
3277		return (EBADF);
3278	}
3279	vp = fp->f_vnode;
3280unionread:
3281	if (vp->v_type != VDIR) {
3282		fdrop(fp, td);
3283		return (EINVAL);
3284	}
3285	aiov.iov_base = uap->buf;
3286	aiov.iov_len = uap->count;
3287	auio.uio_iov = &aiov;
3288	auio.uio_iovcnt = 1;
3289	auio.uio_rw = UIO_READ;
3290	auio.uio_segflg = UIO_USERSPACE;
3291	auio.uio_td = td;
3292	auio.uio_resid = uap->count;
3293	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3294	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3295	loff = auio.uio_offset = fp->f_offset;
3296#ifdef MAC
3297	error = mac_check_vnode_readdir(td->td_ucred, vp);
3298	if (error == 0)
3299#endif
3300		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3301		    NULL);
3302	fp->f_offset = auio.uio_offset;
3303	VOP_UNLOCK(vp, 0, td);
3304	if (error) {
3305		fdrop(fp, td);
3306		return (error);
3307	}
3308	if (uap->count == auio.uio_resid) {
3309		if (union_dircheckp) {
3310			error = union_dircheckp(td, &vp, fp);
3311			if (error == -1)
3312				goto unionread;
3313			if (error) {
3314				fdrop(fp, td);
3315				return (error);
3316			}
3317		}
3318		/*
3319		 * XXX We could delay dropping the lock above but
3320		 * union_dircheckp complicates things.
3321		 */
3322		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3323		if ((vp->v_vflag & VV_ROOT) &&
3324		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3325			struct vnode *tvp = vp;
3326			vp = vp->v_mount->mnt_vnodecovered;
3327			VREF(vp);
3328			fp->f_vnode = vp;
3329			fp->f_data = vp;
3330			fp->f_offset = 0;
3331			vput(tvp);
3332			goto unionread;
3333		}
3334		VOP_UNLOCK(vp, 0, td);
3335	}
3336	if (uap->basep != NULL) {
3337		error = copyout(&loff, uap->basep, sizeof(long));
3338	}
3339	td->td_retval[0] = uap->count - auio.uio_resid;
3340	fdrop(fp, td);
3341	return (error);
3342}
3343#ifndef _SYS_SYSPROTO_H_
3344struct getdents_args {
3345	int fd;
3346	char *buf;
3347	size_t count;
3348};
3349#endif
3350int
3351getdents(td, uap)
3352	struct thread *td;
3353	register struct getdents_args /* {
3354		int fd;
3355		char *buf;
3356		u_int count;
3357	} */ *uap;
3358{
3359	struct getdirentries_args ap;
3360	ap.fd = uap->fd;
3361	ap.buf = uap->buf;
3362	ap.count = uap->count;
3363	ap.basep = NULL;
3364	return getdirentries(td, &ap);
3365}
3366
3367/*
3368 * Set the mode mask for creation of filesystem nodes.
3369 *
3370 * MP SAFE
3371 */
3372#ifndef _SYS_SYSPROTO_H_
3373struct umask_args {
3374	int	newmask;
3375};
3376#endif
3377int
3378umask(td, uap)
3379	struct thread *td;
3380	struct umask_args /* {
3381		int newmask;
3382	} */ *uap;
3383{
3384	register struct filedesc *fdp;
3385
3386	FILEDESC_LOCK(td->td_proc->p_fd);
3387	fdp = td->td_proc->p_fd;
3388	td->td_retval[0] = fdp->fd_cmask;
3389	fdp->fd_cmask = uap->newmask & ALLPERMS;
3390	FILEDESC_UNLOCK(td->td_proc->p_fd);
3391	return (0);
3392}
3393
3394/*
3395 * Void all references to file by ripping underlying filesystem
3396 * away from vnode.
3397 */
3398#ifndef _SYS_SYSPROTO_H_
3399struct revoke_args {
3400	char	*path;
3401};
3402#endif
3403/* ARGSUSED */
3404int
3405revoke(td, uap)
3406	struct thread *td;
3407	register struct revoke_args /* {
3408		char *path;
3409	} */ *uap;
3410{
3411	struct mount *mp;
3412	struct vnode *vp;
3413	struct vattr vattr;
3414	int error;
3415	struct nameidata nd;
3416
3417	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3418	if ((error = namei(&nd)) != 0)
3419		return (error);
3420	vp = nd.ni_vp;
3421	NDFREE(&nd, NDF_ONLY_PNBUF);
3422	if (vp->v_type != VCHR) {
3423		vput(vp);
3424		return (EINVAL);
3425	}
3426#ifdef MAC
3427	error = mac_check_vnode_revoke(td->td_ucred, vp);
3428	if (error) {
3429		vput(vp);
3430		return (error);
3431	}
3432#endif
3433	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3434	if (error) {
3435		vput(vp);
3436		return (error);
3437	}
3438	VOP_UNLOCK(vp, 0, td);
3439	if (td->td_ucred->cr_uid != vattr.va_uid) {
3440		error = suser_cred(td->td_ucred, PRISON_ROOT);
3441		if (error)
3442			goto out;
3443	}
3444	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3445		goto out;
3446	if (vcount(vp) > 1)
3447		VOP_REVOKE(vp, REVOKEALL);
3448	vn_finished_write(mp);
3449out:
3450	vrele(vp);
3451	return (error);
3452}
3453
3454/*
3455 * Convert a user file descriptor to a kernel file entry.
3456 * The file entry is locked upon returning.
3457 */
3458int
3459getvnode(fdp, fd, fpp)
3460	struct filedesc *fdp;
3461	int fd;
3462	struct file **fpp;
3463{
3464	int error;
3465	struct file *fp;
3466
3467	fp = NULL;
3468	if (fdp == NULL)
3469		error = EBADF;
3470	else {
3471		FILEDESC_LOCK(fdp);
3472		if ((u_int)fd >= fdp->fd_nfiles ||
3473		    (fp = fdp->fd_ofiles[fd]) == NULL)
3474			error = EBADF;
3475		else if (fp->f_vnode == NULL) {
3476			fp = NULL;
3477			error = EINVAL;
3478		} else {
3479			fhold(fp);
3480			error = 0;
3481		}
3482		FILEDESC_UNLOCK(fdp);
3483	}
3484	*fpp = fp;
3485	return (error);
3486}
3487
3488/*
3489 * Get (NFS) file handle
3490 */
3491#ifndef _SYS_SYSPROTO_H_
3492struct getfh_args {
3493	char	*fname;
3494	fhandle_t *fhp;
3495};
3496#endif
3497int
3498getfh(td, uap)
3499	struct thread *td;
3500	register struct getfh_args *uap;
3501{
3502	struct nameidata nd;
3503	fhandle_t fh;
3504	register struct vnode *vp;
3505	int error;
3506
3507	/*
3508	 * Must be super user
3509	 */
3510	error = suser(td);
3511	if (error)
3512		return (error);
3513	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3514	error = namei(&nd);
3515	if (error)
3516		return (error);
3517	NDFREE(&nd, NDF_ONLY_PNBUF);
3518	vp = nd.ni_vp;
3519	bzero(&fh, sizeof(fh));
3520	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3521	error = VFS_VPTOFH(vp, &fh.fh_fid);
3522	vput(vp);
3523	if (error)
3524		return (error);
3525	error = copyout(&fh, uap->fhp, sizeof (fh));
3526	return (error);
3527}
3528
3529/*
3530 * syscall for the rpc.lockd to use to translate a NFS file handle into
3531 * an open descriptor.
3532 *
3533 * warning: do not remove the suser() call or this becomes one giant
3534 * security hole.
3535 */
3536#ifndef _SYS_SYSPROTO_H_
3537struct fhopen_args {
3538	const struct fhandle *u_fhp;
3539	int flags;
3540};
3541#endif
3542int
3543fhopen(td, uap)
3544	struct thread *td;
3545	struct fhopen_args /* {
3546		const struct fhandle *u_fhp;
3547		int flags;
3548	} */ *uap;
3549{
3550	struct proc *p = td->td_proc;
3551	struct mount *mp;
3552	struct vnode *vp;
3553	struct fhandle fhp;
3554	struct vattr vat;
3555	struct vattr *vap = &vat;
3556	struct flock lf;
3557	struct file *fp;
3558	register struct filedesc *fdp = p->p_fd;
3559	int fmode, mode, error, type;
3560	struct file *nfp;
3561	int indx;
3562
3563	/*
3564	 * Must be super user
3565	 */
3566	error = suser(td);
3567	if (error)
3568		return (error);
3569
3570	fmode = FFLAGS(uap->flags);
3571	/* why not allow a non-read/write open for our lockd? */
3572	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3573		return (EINVAL);
3574	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3575	if (error)
3576		return(error);
3577	/* find the mount point */
3578	mp = vfs_getvfs(&fhp.fh_fsid);
3579	if (mp == NULL)
3580		return (ESTALE);
3581	/* now give me my vnode, it gets returned to me locked */
3582	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3583	if (error)
3584		return (error);
3585 	/*
3586	 * from now on we have to make sure not
3587	 * to forget about the vnode
3588	 * any error that causes an abort must vput(vp)
3589	 * just set error = err and 'goto bad;'.
3590	 */
3591
3592	/*
3593	 * from vn_open
3594	 */
3595	if (vp->v_type == VLNK) {
3596		error = EMLINK;
3597		goto bad;
3598	}
3599	if (vp->v_type == VSOCK) {
3600		error = EOPNOTSUPP;
3601		goto bad;
3602	}
3603	mode = 0;
3604	if (fmode & (FWRITE | O_TRUNC)) {
3605		if (vp->v_type == VDIR) {
3606			error = EISDIR;
3607			goto bad;
3608		}
3609		error = vn_writechk(vp);
3610		if (error)
3611			goto bad;
3612		mode |= VWRITE;
3613	}
3614	if (fmode & FREAD)
3615		mode |= VREAD;
3616	if (fmode & O_APPEND)
3617		mode |= VAPPEND;
3618#ifdef MAC
3619	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3620	if (error)
3621		goto bad;
3622#endif
3623	if (mode) {
3624		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3625		if (error)
3626			goto bad;
3627	}
3628	if (fmode & O_TRUNC) {
3629		VOP_UNLOCK(vp, 0, td);				/* XXX */
3630		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3631			vrele(vp);
3632			return (error);
3633		}
3634		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3635		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3636#ifdef MAC
3637		/*
3638		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3639		 * should be right.
3640		 */
3641		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3642		if (error == 0) {
3643#endif
3644			VATTR_NULL(vap);
3645			vap->va_size = 0;
3646			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3647#ifdef MAC
3648		}
3649#endif
3650		vn_finished_write(mp);
3651		if (error)
3652			goto bad;
3653	}
3654	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
3655	if (error)
3656		goto bad;
3657	/*
3658	 * Make sure that a VM object is created for VMIO support.
3659	 */
3660	if (vn_canvmio(vp) == TRUE) {
3661		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3662			goto bad;
3663	}
3664	if (fmode & FWRITE)
3665		vp->v_writecount++;
3666
3667	/*
3668	 * end of vn_open code
3669	 */
3670
3671	if ((error = falloc(td, &nfp, &indx)) != 0) {
3672		if (fmode & FWRITE)
3673			vp->v_writecount--;
3674		goto bad;
3675	}
3676	fp = nfp;
3677
3678	/*
3679	 * Hold an extra reference to avoid having fp ripped out
3680	 * from under us while we block in the lock op
3681	 */
3682	fhold(fp);
3683	nfp->f_vnode = vp;
3684	nfp->f_data = vp;
3685	nfp->f_flag = fmode & FMASK;
3686	nfp->f_ops = &vnops;
3687	nfp->f_type = DTYPE_VNODE;
3688	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3689		lf.l_whence = SEEK_SET;
3690		lf.l_start = 0;
3691		lf.l_len = 0;
3692		if (fmode & O_EXLOCK)
3693			lf.l_type = F_WRLCK;
3694		else
3695			lf.l_type = F_RDLCK;
3696		type = F_FLOCK;
3697		if ((fmode & FNONBLOCK) == 0)
3698			type |= F_WAIT;
3699		VOP_UNLOCK(vp, 0, td);
3700		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3701			    type)) != 0) {
3702			/*
3703			 * The lock request failed.  Normally close the
3704			 * descriptor but handle the case where someone might
3705			 * have dup()d or close()d it when we weren't looking.
3706			 */
3707			FILEDESC_LOCK(fdp);
3708			if (fdp->fd_ofiles[indx] == fp) {
3709				fdp->fd_ofiles[indx] = NULL;
3710				FILEDESC_UNLOCK(fdp);
3711				fdrop(fp, td);
3712			} else
3713				FILEDESC_UNLOCK(fdp);
3714			/*
3715			 * release our private reference
3716			 */
3717			fdrop(fp, td);
3718			return(error);
3719		}
3720		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3721		fp->f_flag |= FHASLOCK;
3722	}
3723	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3724		vfs_object_create(vp, td, td->td_ucred);
3725
3726	VOP_UNLOCK(vp, 0, td);
3727	fdrop(fp, td);
3728	td->td_retval[0] = indx;
3729	return (0);
3730
3731bad:
3732	vput(vp);
3733	return (error);
3734}
3735
3736/*
3737 * Stat an (NFS) file handle.
3738 */
3739#ifndef _SYS_SYSPROTO_H_
3740struct fhstat_args {
3741	struct fhandle *u_fhp;
3742	struct stat *sb;
3743};
3744#endif
3745int
3746fhstat(td, uap)
3747	struct thread *td;
3748	register struct fhstat_args /* {
3749		struct fhandle *u_fhp;
3750		struct stat *sb;
3751	} */ *uap;
3752{
3753	struct stat sb;
3754	fhandle_t fh;
3755	struct mount *mp;
3756	struct vnode *vp;
3757	int error;
3758
3759	/*
3760	 * Must be super user
3761	 */
3762	error = suser(td);
3763	if (error)
3764		return (error);
3765
3766	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
3767	if (error)
3768		return (error);
3769
3770	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3771		return (ESTALE);
3772	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3773		return (error);
3774	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
3775	vput(vp);
3776	if (error)
3777		return (error);
3778	error = copyout(&sb, uap->sb, sizeof(sb));
3779	return (error);
3780}
3781
3782/*
3783 * Implement fstatfs() for (NFS) file handles.
3784 */
3785#ifndef _SYS_SYSPROTO_H_
3786struct fhstatfs_args {
3787	struct fhandle *u_fhp;
3788	struct statfs *buf;
3789};
3790#endif
3791int
3792fhstatfs(td, uap)
3793	struct thread *td;
3794	struct fhstatfs_args /* {
3795		struct fhandle *u_fhp;
3796		struct statfs *buf;
3797	} */ *uap;
3798{
3799	struct statfs *sp;
3800	struct mount *mp;
3801	struct vnode *vp;
3802	struct statfs sb;
3803	fhandle_t fh;
3804	int error;
3805
3806	/*
3807	 * Must be super user
3808	 */
3809	error = suser(td);
3810	if (error)
3811		return (error);
3812
3813	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
3814		return (error);
3815
3816	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3817		return (ESTALE);
3818	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3819		return (error);
3820	mp = vp->v_mount;
3821	sp = &mp->mnt_stat;
3822	vput(vp);
3823#ifdef MAC
3824	error = mac_check_mount_stat(td->td_ucred, mp);
3825	if (error)
3826		return (error);
3827#endif
3828	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3829		return (error);
3830	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3831	if (suser(td)) {
3832		bcopy(sp, &sb, sizeof(sb));
3833		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3834		sp = &sb;
3835	}
3836	return (copyout(sp, uap->buf, sizeof(*sp)));
3837}
3838
3839/*
3840 * Syscall to push extended attribute configuration information into the
3841 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3842 * a command (int cmd), and attribute name and misc data.  For now, the
3843 * attribute name is left in userspace for consumption by the VFS_op.
3844 * It will probably be changed to be copied into sysspace by the
3845 * syscall in the future, once issues with various consumers of the
3846 * attribute code have raised their hands.
3847 *
3848 * Currently this is used only by UFS Extended Attributes.
3849 */
3850int
3851extattrctl(td, uap)
3852	struct thread *td;
3853	struct extattrctl_args /* {
3854		const char *path;
3855		int cmd;
3856		const char *filename;
3857		int attrnamespace;
3858		const char *attrname;
3859	} */ *uap;
3860{
3861	struct vnode *filename_vp;
3862	struct nameidata nd;
3863	struct mount *mp, *mp_writable;
3864	char attrname[EXTATTR_MAXNAMELEN];
3865	int error;
3866
3867	/*
3868	 * uap->attrname is not always defined.  We check again later when we
3869	 * invoke the VFS call so as to pass in NULL there if needed.
3870	 */
3871	if (uap->attrname != NULL) {
3872		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3873		    NULL);
3874		if (error)
3875			return (error);
3876	}
3877
3878	/*
3879	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3880	 * which VFS_EXTATTRCTL() will later release.
3881	 */
3882	filename_vp = NULL;
3883	if (uap->filename != NULL) {
3884		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3885		    uap->filename, td);
3886		error = namei(&nd);
3887		if (error)
3888			return (error);
3889		filename_vp = nd.ni_vp;
3890		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3891	}
3892
3893	/* uap->path is always defined. */
3894	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3895	error = namei(&nd);
3896	if (error) {
3897		if (filename_vp != NULL)
3898			vput(filename_vp);
3899		return (error);
3900	}
3901	mp = nd.ni_vp->v_mount;
3902	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3903	NDFREE(&nd, 0);
3904	if (error) {
3905		if (filename_vp != NULL)
3906			vput(filename_vp);
3907		return (error);
3908	}
3909
3910	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3911	    uap->attrname != NULL ? attrname : NULL, td);
3912
3913	vn_finished_write(mp_writable);
3914	/*
3915	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3916	 * filename_vp, so vrele it if it is defined.
3917	 */
3918	if (filename_vp != NULL)
3919		vrele(filename_vp);
3920	return (error);
3921}
3922
3923/*-
3924 * Set a named extended attribute on a file or directory
3925 *
3926 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3927 *            kernelspace string pointer "attrname", userspace buffer
3928 *            pointer "data", buffer length "nbytes", thread "td".
3929 * Returns: 0 on success, an error number otherwise
3930 * Locks: none
3931 * References: vp must be a valid reference for the duration of the call
3932 */
3933static int
3934extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3935    void *data, size_t nbytes, struct thread *td)
3936{
3937	struct mount *mp;
3938	struct uio auio;
3939	struct iovec aiov;
3940	ssize_t cnt;
3941	int error;
3942
3943	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
3944	if (error)
3945		return (error);
3946	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3947	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3948
3949	aiov.iov_base = data;
3950	aiov.iov_len = nbytes;
3951	auio.uio_iov = &aiov;
3952	auio.uio_iovcnt = 1;
3953	auio.uio_offset = 0;
3954	if (nbytes > INT_MAX) {
3955		error = EINVAL;
3956		goto done;
3957	}
3958	auio.uio_resid = nbytes;
3959	auio.uio_rw = UIO_WRITE;
3960	auio.uio_segflg = UIO_USERSPACE;
3961	auio.uio_td = td;
3962	cnt = nbytes;
3963
3964#ifdef MAC
3965	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3966	    attrname, &auio);
3967	if (error)
3968		goto done;
3969#endif
3970
3971	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3972	    td->td_ucred, td);
3973	cnt -= auio.uio_resid;
3974	td->td_retval[0] = cnt;
3975
3976done:
3977	VOP_UNLOCK(vp, 0, td);
3978	vn_finished_write(mp);
3979	return (error);
3980}
3981
3982int
3983extattr_set_fd(td, uap)
3984	struct thread *td;
3985	struct extattr_set_fd_args /* {
3986		int fd;
3987		int attrnamespace;
3988		const char *attrname;
3989		void *data;
3990		size_t nbytes;
3991	} */ *uap;
3992{
3993	struct file *fp;
3994	char attrname[EXTATTR_MAXNAMELEN];
3995	int error;
3996
3997	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3998	if (error)
3999		return (error);
4000
4001	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4002	if (error)
4003		return (error);
4004
4005	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4006	    attrname, uap->data, uap->nbytes, td);
4007	fdrop(fp, td);
4008
4009	return (error);
4010}
4011
4012int
4013extattr_set_file(td, uap)
4014	struct thread *td;
4015	struct extattr_set_file_args /* {
4016		const char *path;
4017		int attrnamespace;
4018		const char *attrname;
4019		void *data;
4020		size_t nbytes;
4021	} */ *uap;
4022{
4023	struct nameidata nd;
4024	char attrname[EXTATTR_MAXNAMELEN];
4025	int error;
4026
4027	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4028	if (error)
4029		return (error);
4030
4031	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4032	error = namei(&nd);
4033	if (error)
4034		return (error);
4035	NDFREE(&nd, NDF_ONLY_PNBUF);
4036
4037	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4038	    uap->data, uap->nbytes, td);
4039
4040	vrele(nd.ni_vp);
4041	return (error);
4042}
4043
4044int
4045extattr_set_link(td, uap)
4046	struct thread *td;
4047	struct extattr_set_link_args /* {
4048		const char *path;
4049		int attrnamespace;
4050		const char *attrname;
4051		void *data;
4052		size_t nbytes;
4053	} */ *uap;
4054{
4055	struct nameidata nd;
4056	char attrname[EXTATTR_MAXNAMELEN];
4057	int error;
4058
4059	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4060	if (error)
4061		return (error);
4062
4063	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4064	error = namei(&nd);
4065	if (error)
4066		return (error);
4067	NDFREE(&nd, NDF_ONLY_PNBUF);
4068
4069	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4070	    uap->data, uap->nbytes, td);
4071
4072	vrele(nd.ni_vp);
4073	return (error);
4074}
4075
4076/*-
4077 * Get a named extended attribute on a file or directory
4078 *
4079 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4080 *            kernelspace string pointer "attrname", userspace buffer
4081 *            pointer "data", buffer length "nbytes", thread "td".
4082 * Returns: 0 on success, an error number otherwise
4083 * Locks: none
4084 * References: vp must be a valid reference for the duration of the call
4085 */
4086static int
4087extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4088    void *data, size_t nbytes, struct thread *td)
4089{
4090	struct uio auio, *auiop;
4091	struct iovec aiov;
4092	ssize_t cnt;
4093	size_t size, *sizep;
4094	int error;
4095
4096	/*
4097	 * XXX: Temporary API compatibility for applications that know
4098	 * about this hack ("" means list), but haven't been updated
4099	 * for the extattr_list_*() system calls yet.  This will go
4100	 * away for FreeBSD 5.3.
4101	 */
4102	if (strlen(attrname) == 0)
4103		return (extattr_list_vp(vp, attrnamespace, data, nbytes, td));
4104
4105	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4106	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4107
4108	/*
4109	 * Slightly unusual semantics: if the user provides a NULL data
4110	 * pointer, they don't want to receive the data, just the
4111	 * maximum read length.
4112	 */
4113	auiop = NULL;
4114	sizep = NULL;
4115	cnt = 0;
4116	if (data != NULL) {
4117		aiov.iov_base = data;
4118		aiov.iov_len = nbytes;
4119		auio.uio_iov = &aiov;
4120		auio.uio_offset = 0;
4121		if (nbytes > INT_MAX) {
4122			error = EINVAL;
4123			goto done;
4124		}
4125		auio.uio_resid = nbytes;
4126		auio.uio_rw = UIO_READ;
4127		auio.uio_segflg = UIO_USERSPACE;
4128		auio.uio_td = td;
4129		auiop = &auio;
4130		cnt = nbytes;
4131	} else
4132		sizep = &size;
4133
4134#ifdef MAC
4135	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4136	    attrname, &auio);
4137	if (error)
4138		goto done;
4139#endif
4140
4141	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4142	    td->td_ucred, td);
4143
4144	if (auiop != NULL) {
4145		cnt -= auio.uio_resid;
4146		td->td_retval[0] = cnt;
4147	} else
4148		td->td_retval[0] = size;
4149
4150done:
4151	VOP_UNLOCK(vp, 0, td);
4152	return (error);
4153}
4154
4155int
4156extattr_get_fd(td, uap)
4157	struct thread *td;
4158	struct extattr_get_fd_args /* {
4159		int fd;
4160		int attrnamespace;
4161		const char *attrname;
4162		void *data;
4163		size_t nbytes;
4164	} */ *uap;
4165{
4166	struct file *fp;
4167	char attrname[EXTATTR_MAXNAMELEN];
4168	int error;
4169
4170	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4171	if (error)
4172		return (error);
4173
4174	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4175	if (error)
4176		return (error);
4177
4178	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4179	    attrname, uap->data, uap->nbytes, td);
4180
4181	fdrop(fp, td);
4182	return (error);
4183}
4184
4185int
4186extattr_get_file(td, uap)
4187	struct thread *td;
4188	struct extattr_get_file_args /* {
4189		const char *path;
4190		int attrnamespace;
4191		const char *attrname;
4192		void *data;
4193		size_t nbytes;
4194	} */ *uap;
4195{
4196	struct nameidata nd;
4197	char attrname[EXTATTR_MAXNAMELEN];
4198	int error;
4199
4200	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4201	if (error)
4202		return (error);
4203
4204	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4205	error = namei(&nd);
4206	if (error)
4207		return (error);
4208	NDFREE(&nd, NDF_ONLY_PNBUF);
4209
4210	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4211	    uap->data, uap->nbytes, td);
4212
4213	vrele(nd.ni_vp);
4214	return (error);
4215}
4216
4217int
4218extattr_get_link(td, uap)
4219	struct thread *td;
4220	struct extattr_get_link_args /* {
4221		const char *path;
4222		int attrnamespace;
4223		const char *attrname;
4224		void *data;
4225		size_t nbytes;
4226	} */ *uap;
4227{
4228	struct nameidata nd;
4229	char attrname[EXTATTR_MAXNAMELEN];
4230	int error;
4231
4232	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4233	if (error)
4234		return (error);
4235
4236	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4237	error = namei(&nd);
4238	if (error)
4239		return (error);
4240	NDFREE(&nd, NDF_ONLY_PNBUF);
4241
4242	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4243	    uap->data, uap->nbytes, td);
4244
4245	vrele(nd.ni_vp);
4246	return (error);
4247}
4248
4249/*
4250 * extattr_delete_vp(): Delete a named extended attribute on a file or
4251 *                      directory
4252 *
4253 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4254 *            kernelspace string pointer "attrname", proc "p"
4255 * Returns: 0 on success, an error number otherwise
4256 * Locks: none
4257 * References: vp must be a valid reference for the duration of the call
4258 */
4259static int
4260extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4261    struct thread *td)
4262{
4263	struct mount *mp;
4264	int error;
4265
4266	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4267	if (error)
4268		return (error);
4269	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4270	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4271
4272#ifdef MAC
4273	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4274	    attrname);
4275	if (error)
4276		goto done;
4277#endif
4278
4279	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4280	    td);
4281	if (error == EOPNOTSUPP)
4282		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4283		    td->td_ucred, td);
4284#ifdef MAC
4285done:
4286#endif
4287	VOP_UNLOCK(vp, 0, td);
4288	vn_finished_write(mp);
4289	return (error);
4290}
4291
4292int
4293extattr_delete_fd(td, uap)
4294	struct thread *td;
4295	struct extattr_delete_fd_args /* {
4296		int fd;
4297		int attrnamespace;
4298		const char *attrname;
4299	} */ *uap;
4300{
4301	struct file *fp;
4302	struct vnode *vp;
4303	char attrname[EXTATTR_MAXNAMELEN];
4304	int error;
4305
4306	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4307	if (error)
4308		return (error);
4309
4310	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4311	if (error)
4312		return (error);
4313	vp = fp->f_vnode;
4314
4315	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4316	fdrop(fp, td);
4317	return (error);
4318}
4319
4320int
4321extattr_delete_file(td, uap)
4322	struct thread *td;
4323	struct extattr_delete_file_args /* {
4324		const char *path;
4325		int attrnamespace;
4326		const char *attrname;
4327	} */ *uap;
4328{
4329	struct nameidata nd;
4330	char attrname[EXTATTR_MAXNAMELEN];
4331	int error;
4332
4333	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4334	if (error)
4335		return(error);
4336
4337	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4338	error = namei(&nd);
4339	if (error)
4340		return(error);
4341	NDFREE(&nd, NDF_ONLY_PNBUF);
4342
4343	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4344	vrele(nd.ni_vp);
4345	return(error);
4346}
4347
4348int
4349extattr_delete_link(td, uap)
4350	struct thread *td;
4351	struct extattr_delete_link_args /* {
4352		const char *path;
4353		int attrnamespace;
4354		const char *attrname;
4355	} */ *uap;
4356{
4357	struct nameidata nd;
4358	char attrname[EXTATTR_MAXNAMELEN];
4359	int error;
4360
4361	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4362	if (error)
4363		return(error);
4364
4365	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4366	error = namei(&nd);
4367	if (error)
4368		return(error);
4369	NDFREE(&nd, NDF_ONLY_PNBUF);
4370
4371	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4372	vrele(nd.ni_vp);
4373	return(error);
4374}
4375
4376/*-
4377 * Retrieve a list of extended attributes on a file or directory.
4378 *
4379 * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4380 *            userspace buffer pointer "data", buffer length "nbytes",
4381 *            thread "td".
4382 * Returns: 0 on success, an error number otherwise
4383 * Locks: none
4384 * References: vp must be a valid reference for the duration of the call
4385 */
4386static int
4387extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4388    size_t nbytes, struct thread *td)
4389{
4390	struct uio auio, *auiop;
4391	size_t size, *sizep;
4392	struct iovec aiov;
4393	ssize_t cnt;
4394	int error;
4395
4396	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4397	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4398
4399	auiop = NULL;
4400	sizep = NULL;
4401	cnt = 0;
4402	if (data != NULL) {
4403		aiov.iov_base = data;
4404		aiov.iov_len = nbytes;
4405		auio.uio_iov = &aiov;
4406		auio.uio_offset = 0;
4407		if (nbytes > INT_MAX) {
4408			error = EINVAL;
4409			goto done;
4410		}
4411		auio.uio_resid = nbytes;
4412		auio.uio_rw = UIO_READ;
4413		auio.uio_segflg = UIO_USERSPACE;
4414		auio.uio_td = td;
4415		auiop = &auio;
4416		cnt = nbytes;
4417	} else
4418		sizep = &size;
4419
4420#ifdef MAC
4421	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4422	if (error)
4423		goto done;
4424#endif
4425
4426	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4427	    td->td_ucred, td);
4428
4429	if (auiop != NULL) {
4430		cnt -= auio.uio_resid;
4431		td->td_retval[0] = cnt;
4432	} else
4433		td->td_retval[0] = size;
4434
4435done:
4436	VOP_UNLOCK(vp, 0, td);
4437	return (error);
4438}
4439
4440
4441int
4442extattr_list_fd(td, uap)
4443	struct thread *td;
4444	struct extattr_list_fd_args /* {
4445		int fd;
4446		int attrnamespace;
4447		void *data;
4448		size_t nbytes;
4449	} */ *uap;
4450{
4451	struct file *fp;
4452	int error;
4453
4454	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4455	if (error)
4456		return (error);
4457
4458	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4459	    uap->nbytes, td);
4460
4461	fdrop(fp, td);
4462	return (error);
4463}
4464
4465int
4466extattr_list_file(td, uap)
4467	struct thread*td;
4468	struct extattr_list_file_args /* {
4469		const char *path;
4470		int attrnamespace;
4471		void *data;
4472		size_t nbytes;
4473	} */ *uap;
4474{
4475	struct nameidata nd;
4476	int error;
4477
4478	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4479	error = namei(&nd);
4480	if (error)
4481		return (error);
4482	NDFREE(&nd, NDF_ONLY_PNBUF);
4483
4484	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4485	    uap->nbytes, td);
4486
4487	vrele(nd.ni_vp);
4488	return (error);
4489}
4490
4491int
4492extattr_list_link(td, uap)
4493	struct thread*td;
4494	struct extattr_list_link_args /* {
4495		const char *path;
4496		int attrnamespace;
4497		void *data;
4498		size_t nbytes;
4499	} */ *uap;
4500{
4501	struct nameidata nd;
4502	int error;
4503
4504	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4505	error = namei(&nd);
4506	if (error)
4507		return (error);
4508	NDFREE(&nd, NDF_ONLY_PNBUF);
4509
4510	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4511	    uap->nbytes, td);
4512
4513	vrele(nd.ni_vp);
4514	return (error);
4515}
4516
4517