vfs_extattr.c revision 101160
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39 * $FreeBSD: head/sys/kern/vfs_extattr.c 101160 2002-08-01 15:37:12Z rwatson $
40 */
41
42/* For 4.3 integer FS ID compatibility */
43#include "opt_compat.h"
44#include "opt_mac.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/sysent.h>
51#include <sys/malloc.h>
52#include <sys/mac.h>
53#include <sys/mount.h>
54#include <sys/mutex.h>
55#include <sys/sysproto.h>
56#include <sys/namei.h>
57#include <sys/filedesc.h>
58#include <sys/kernel.h>
59#include <sys/fcntl.h>
60#include <sys/file.h>
61#include <sys/linker.h>
62#include <sys/stat.h>
63#include <sys/sx.h>
64#include <sys/unistd.h>
65#include <sys/vnode.h>
66#include <sys/proc.h>
67#include <sys/dirent.h>
68#include <sys/extattr.h>
69#include <sys/jail.h>
70#include <sys/sysctl.h>
71
72#include <machine/limits.h>
73#include <machine/stdarg.h>
74
75#include <vm/vm.h>
76#include <vm/vm_object.h>
77#include <vm/vm_page.h>
78#include <vm/uma.h>
79
80static int change_dir(struct nameidata *ndp, struct thread *td);
81static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82static int getutimes(const struct timeval *, struct timespec *);
83static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84static int setfmode(struct thread *td, struct vnode *, int);
85static int setfflags(struct thread *td, struct vnode *, int);
86static int setutimes(struct thread *td, struct vnode *,
87    const struct timespec *, int, int);
88static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89    struct thread *td);
90
91int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
92int (*softdep_fsync_hook)(struct vnode *);
93
94/*
95 * Sync each mounted filesystem.
96 */
97#ifndef _SYS_SYSPROTO_H_
98struct sync_args {
99        int     dummy;
100};
101#endif
102
103#ifdef DEBUG
104static int syncprt = 0;
105SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
106#endif
107
108/* ARGSUSED */
109int
110sync(td, uap)
111	struct thread *td;
112	struct sync_args *uap;
113{
114	struct mount *mp, *nmp;
115	int asyncflag;
116
117	mtx_lock(&mountlist_mtx);
118	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
119		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
120			nmp = TAILQ_NEXT(mp, mnt_list);
121			continue;
122		}
123		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
124		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
125			asyncflag = mp->mnt_flag & MNT_ASYNC;
126			mp->mnt_flag &= ~MNT_ASYNC;
127			vfs_msync(mp, MNT_NOWAIT);
128			VFS_SYNC(mp, MNT_NOWAIT,
129			    ((td != NULL) ? td->td_ucred : NOCRED), td);
130			mp->mnt_flag |= asyncflag;
131			vn_finished_write(mp);
132		}
133		mtx_lock(&mountlist_mtx);
134		nmp = TAILQ_NEXT(mp, mnt_list);
135		vfs_unbusy(mp, td);
136	}
137	mtx_unlock(&mountlist_mtx);
138#if 0
139/*
140 * XXX don't call vfs_bufstats() yet because that routine
141 * was not imported in the Lite2 merge.
142 */
143#ifdef DIAGNOSTIC
144	if (syncprt)
145		vfs_bufstats();
146#endif /* DIAGNOSTIC */
147#endif
148	return (0);
149}
150
151/* XXX PRISON: could be per prison flag */
152static int prison_quotas;
153#if 0
154SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
155#endif
156
157/*
158 * Change filesystem quotas.
159 */
160#ifndef _SYS_SYSPROTO_H_
161struct quotactl_args {
162	char *path;
163	int cmd;
164	int uid;
165	caddr_t arg;
166};
167#endif
168/* ARGSUSED */
169int
170quotactl(td, uap)
171	struct thread *td;
172	register struct quotactl_args /* {
173		syscallarg(char *) path;
174		syscallarg(int) cmd;
175		syscallarg(int) uid;
176		syscallarg(caddr_t) arg;
177	} */ *uap;
178{
179	struct mount *mp;
180	int error;
181	struct nameidata nd;
182
183	if (jailed(td->td_ucred) && !prison_quotas)
184		return (EPERM);
185	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
186	if ((error = namei(&nd)) != 0)
187		return (error);
188	NDFREE(&nd, NDF_ONLY_PNBUF);
189	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
190	vrele(nd.ni_vp);
191	if (error)
192		return (error);
193	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
194	    SCARG(uap, arg), td);
195	vn_finished_write(mp);
196	return (error);
197}
198
199/*
200 * Get filesystem statistics.
201 */
202#ifndef _SYS_SYSPROTO_H_
203struct statfs_args {
204	char *path;
205	struct statfs *buf;
206};
207#endif
208/* ARGSUSED */
209int
210statfs(td, uap)
211	struct thread *td;
212	register struct statfs_args /* {
213		syscallarg(char *) path;
214		syscallarg(struct statfs *) buf;
215	} */ *uap;
216{
217	register struct mount *mp;
218	register struct statfs *sp;
219	int error;
220	struct nameidata nd;
221	struct statfs sb;
222
223	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
224	if ((error = namei(&nd)) != 0)
225		return (error);
226	mp = nd.ni_vp->v_mount;
227	sp = &mp->mnt_stat;
228	NDFREE(&nd, NDF_ONLY_PNBUF);
229	vrele(nd.ni_vp);
230#ifdef MAC
231	error = mac_check_mount_stat(td->td_ucred, mp);
232	if (error)
233		return (error);
234#endif
235	error = VFS_STATFS(mp, sp, td);
236	if (error)
237		return (error);
238	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
239	if (suser(td)) {
240		bcopy(sp, &sb, sizeof(sb));
241		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
242		sp = &sb;
243	}
244	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
245}
246
247/*
248 * Get filesystem statistics.
249 */
250#ifndef _SYS_SYSPROTO_H_
251struct fstatfs_args {
252	int fd;
253	struct statfs *buf;
254};
255#endif
256/* ARGSUSED */
257int
258fstatfs(td, uap)
259	struct thread *td;
260	register struct fstatfs_args /* {
261		syscallarg(int) fd;
262		syscallarg(struct statfs *) buf;
263	} */ *uap;
264{
265	struct file *fp;
266	struct mount *mp;
267	register struct statfs *sp;
268	int error;
269	struct statfs sb;
270
271	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
272		return (error);
273	mp = ((struct vnode *)fp->f_data)->v_mount;
274	fdrop(fp, td);
275	if (mp == NULL)
276		return (EBADF);
277#ifdef MAC
278	error = mac_check_mount_stat(td->td_ucred, mp);
279	if (error)
280		return (error);
281#endif
282	sp = &mp->mnt_stat;
283	error = VFS_STATFS(mp, sp, td);
284	if (error)
285		return (error);
286	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
287	if (suser(td)) {
288		bcopy(sp, &sb, sizeof(sb));
289		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
290		sp = &sb;
291	}
292	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
293}
294
295/*
296 * Get statistics on all filesystems.
297 */
298#ifndef _SYS_SYSPROTO_H_
299struct getfsstat_args {
300	struct statfs *buf;
301	long bufsize;
302	int flags;
303};
304#endif
305int
306getfsstat(td, uap)
307	struct thread *td;
308	register struct getfsstat_args /* {
309		syscallarg(struct statfs *) buf;
310		syscallarg(long) bufsize;
311		syscallarg(int) flags;
312	} */ *uap;
313{
314	register struct mount *mp, *nmp;
315	register struct statfs *sp;
316	caddr_t sfsp;
317	long count, maxcount, error;
318
319	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
320	sfsp = (caddr_t)SCARG(uap, buf);
321	count = 0;
322	mtx_lock(&mountlist_mtx);
323	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
324#ifdef MAC
325		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
326			nmp = TAILQ_NEXT(mp, mnt_list);
327			continue;
328		}
329#endif
330		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
331			nmp = TAILQ_NEXT(mp, mnt_list);
332			continue;
333		}
334		if (sfsp && count < maxcount) {
335			sp = &mp->mnt_stat;
336			/*
337			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
338			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
339			 * overrides MNT_WAIT.
340			 */
341			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
342			    (SCARG(uap, flags) & MNT_WAIT)) &&
343			    (error = VFS_STATFS(mp, sp, td))) {
344				mtx_lock(&mountlist_mtx);
345				nmp = TAILQ_NEXT(mp, mnt_list);
346				vfs_unbusy(mp, td);
347				continue;
348			}
349			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
350			error = copyout(sp, sfsp, sizeof(*sp));
351			if (error) {
352				vfs_unbusy(mp, td);
353				return (error);
354			}
355			sfsp += sizeof(*sp);
356		}
357		count++;
358		mtx_lock(&mountlist_mtx);
359		nmp = TAILQ_NEXT(mp, mnt_list);
360		vfs_unbusy(mp, td);
361	}
362	mtx_unlock(&mountlist_mtx);
363	if (sfsp && count > maxcount)
364		td->td_retval[0] = maxcount;
365	else
366		td->td_retval[0] = count;
367	return (0);
368}
369
370/*
371 * Change current working directory to a given file descriptor.
372 */
373#ifndef _SYS_SYSPROTO_H_
374struct fchdir_args {
375	int	fd;
376};
377#endif
378/* ARGSUSED */
379int
380fchdir(td, uap)
381	struct thread *td;
382	struct fchdir_args /* {
383		syscallarg(int) fd;
384	} */ *uap;
385{
386	register struct filedesc *fdp = td->td_proc->p_fd;
387	struct vnode *vp, *tdp, *vpold;
388	struct mount *mp;
389	struct file *fp;
390	int error;
391
392	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
393		return (error);
394	vp = (struct vnode *)fp->f_data;
395	VREF(vp);
396	fdrop(fp, td);
397	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
398	if (vp->v_type != VDIR)
399		error = ENOTDIR;
400#ifdef MAC
401	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
402	}
403#endif
404	else
405		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
406	while (!error && (mp = vp->v_mountedhere) != NULL) {
407		if (vfs_busy(mp, 0, 0, td))
408			continue;
409		error = VFS_ROOT(mp, &tdp);
410		vfs_unbusy(mp, td);
411		if (error)
412			break;
413		vput(vp);
414		vp = tdp;
415	}
416	if (error) {
417		vput(vp);
418		return (error);
419	}
420	VOP_UNLOCK(vp, 0, td);
421	FILEDESC_LOCK(fdp);
422	vpold = fdp->fd_cdir;
423	fdp->fd_cdir = vp;
424	FILEDESC_UNLOCK(fdp);
425	vrele(vpold);
426	return (0);
427}
428
429/*
430 * Change current working directory (``.'').
431 */
432#ifndef _SYS_SYSPROTO_H_
433struct chdir_args {
434	char	*path;
435};
436#endif
437/* ARGSUSED */
438int
439chdir(td, uap)
440	struct thread *td;
441	struct chdir_args /* {
442		syscallarg(char *) path;
443	} */ *uap;
444{
445	register struct filedesc *fdp = td->td_proc->p_fd;
446	int error;
447	struct nameidata nd;
448	struct vnode *vp;
449
450	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
451	    SCARG(uap, path), td);
452	if ((error = change_dir(&nd, td)) != 0)
453		return (error);
454	NDFREE(&nd, NDF_ONLY_PNBUF);
455	FILEDESC_LOCK(fdp);
456	vp = fdp->fd_cdir;
457	fdp->fd_cdir = nd.ni_vp;
458	FILEDESC_UNLOCK(fdp);
459	vrele(vp);
460	return (0);
461}
462
463/*
464 * Helper function for raised chroot(2) security function:  Refuse if
465 * any filedescriptors are open directories.
466 */
467static int
468chroot_refuse_vdir_fds(fdp)
469	struct filedesc *fdp;
470{
471	struct vnode *vp;
472	struct file *fp;
473	int fd;
474
475	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
476	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
477		fp = fget_locked(fdp, fd);
478		if (fp == NULL)
479			continue;
480		if (fp->f_type == DTYPE_VNODE) {
481			vp = (struct vnode *)fp->f_data;
482			if (vp->v_type == VDIR)
483				return (EPERM);
484		}
485	}
486	return (0);
487}
488
489/*
490 * This sysctl determines if we will allow a process to chroot(2) if it
491 * has a directory open:
492 *	0: disallowed for all processes.
493 *	1: allowed for processes that were not already chroot(2)'ed.
494 *	2: allowed for all processes.
495 */
496
497static int chroot_allow_open_directories = 1;
498
499SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
500     &chroot_allow_open_directories, 0, "");
501
502/*
503 * Change notion of root (``/'') directory.
504 */
505#ifndef _SYS_SYSPROTO_H_
506struct chroot_args {
507	char	*path;
508};
509#endif
510/* ARGSUSED */
511int
512chroot(td, uap)
513	struct thread *td;
514	struct chroot_args /* {
515		syscallarg(char *) path;
516	} */ *uap;
517{
518	register struct filedesc *fdp = td->td_proc->p_fd;
519	int error;
520	struct nameidata nd;
521	struct vnode *vp;
522
523	error = suser_cred(td->td_ucred, PRISON_ROOT);
524	if (error)
525		return (error);
526	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
527	    SCARG(uap, path), td);
528	mtx_lock(&Giant);
529	if ((error = change_dir(&nd, td)) != 0)
530		goto error;
531#ifdef MAC
532	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
533		goto error;
534#endif
535	FILEDESC_LOCK(fdp);
536	if (chroot_allow_open_directories == 0 ||
537	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
538		error = chroot_refuse_vdir_fds(fdp);
539		if (error)
540			goto error_unlock;
541	}
542	vp = fdp->fd_rdir;
543	fdp->fd_rdir = nd.ni_vp;
544	if (!fdp->fd_jdir) {
545		fdp->fd_jdir = nd.ni_vp;
546                VREF(fdp->fd_jdir);
547	}
548	FILEDESC_UNLOCK(fdp);
549	NDFREE(&nd, NDF_ONLY_PNBUF);
550	vrele(vp);
551	mtx_unlock(&Giant);
552	return (0);
553error_unlock:
554	FILEDESC_UNLOCK(fdp);
555error:
556	mtx_unlock(&Giant);
557	NDFREE(&nd, 0);
558	return (error);
559}
560
561/*
562 * Common routine for chroot and chdir.
563 */
564static int
565change_dir(ndp, td)
566	register struct nameidata *ndp;
567	struct thread *td;
568{
569	struct vnode *vp;
570	int error;
571
572	error = namei(ndp);
573	if (error)
574		return (error);
575	vp = ndp->ni_vp;
576	if (vp->v_type != VDIR)
577		error = ENOTDIR;
578#ifdef MAC
579	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
580	}
581#endif
582	else
583		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
584	if (error)
585		vput(vp);
586	else
587		VOP_UNLOCK(vp, 0, td);
588	return (error);
589}
590
591/*
592 * Check permissions, allocate an open file structure,
593 * and call the device open routine if any.
594 */
595#ifndef _SYS_SYSPROTO_H_
596struct open_args {
597	char	*path;
598	int	flags;
599	int	mode;
600};
601#endif
602int
603open(td, uap)
604	struct thread *td;
605	register struct open_args /* {
606		syscallarg(char *) path;
607		syscallarg(int) flags;
608		syscallarg(int) mode;
609	} */ *uap;
610{
611	struct proc *p = td->td_proc;
612	struct filedesc *fdp = p->p_fd;
613	struct file *fp;
614	struct vnode *vp;
615	struct vattr vat;
616	struct mount *mp;
617	int cmode, flags, oflags;
618	struct file *nfp;
619	int type, indx, error;
620	struct flock lf;
621	struct nameidata nd;
622
623	oflags = SCARG(uap, flags);
624	if ((oflags & O_ACCMODE) == O_ACCMODE)
625		return (EINVAL);
626	flags = FFLAGS(oflags);
627	error = falloc(td, &nfp, &indx);
628	if (error)
629		return (error);
630	fp = nfp;
631	FILEDESC_LOCK(fdp);
632	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
633	FILEDESC_UNLOCK(fdp);
634	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
635	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
636	/*
637	 * Bump the ref count to prevent another process from closing
638	 * the descriptor while we are blocked in vn_open()
639	 */
640	fhold(fp);
641	error = vn_open(&nd, &flags, cmode);
642	if (error) {
643		/*
644		 * release our own reference
645		 */
646		fdrop(fp, td);
647
648		/*
649		 * handle special fdopen() case.  bleh.  dupfdopen() is
650		 * responsible for dropping the old contents of ofiles[indx]
651		 * if it succeeds.
652		 */
653		if ((error == ENODEV || error == ENXIO) &&
654		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
655		    (error =
656			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
657			td->td_retval[0] = indx;
658			return (0);
659		}
660		/*
661		 * Clean up the descriptor, but only if another thread hadn't
662		 * replaced or closed it.
663		 */
664		FILEDESC_LOCK(fdp);
665		if (fdp->fd_ofiles[indx] == fp) {
666			fdp->fd_ofiles[indx] = NULL;
667			FILEDESC_UNLOCK(fdp);
668			fdrop(fp, td);
669		} else
670			FILEDESC_UNLOCK(fdp);
671
672		if (error == ERESTART)
673			error = EINTR;
674		return (error);
675	}
676	td->td_dupfd = 0;
677	NDFREE(&nd, NDF_ONLY_PNBUF);
678	vp = nd.ni_vp;
679
680	/*
681	 * There should be 2 references on the file, one from the descriptor
682	 * table, and one for us.
683	 *
684	 * Handle the case where someone closed the file (via its file
685	 * descriptor) while we were blocked.  The end result should look
686	 * like opening the file succeeded but it was immediately closed.
687	 */
688	FILEDESC_LOCK(fdp);
689	FILE_LOCK(fp);
690	if (fp->f_count == 1) {
691		KASSERT(fdp->fd_ofiles[indx] != fp,
692		    ("Open file descriptor lost all refs"));
693		FILEDESC_UNLOCK(fdp);
694		FILE_UNLOCK(fp);
695		VOP_UNLOCK(vp, 0, td);
696		vn_close(vp, flags & FMASK, fp->f_cred, td);
697		fdrop(fp, td);
698		td->td_retval[0] = indx;
699		return 0;
700	}
701
702	/* assert that vn_open created a backing object if one is needed */
703	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
704		("open: vmio vnode has no backing object after vn_open"));
705
706	fp->f_data = vp;
707	fp->f_flag = flags & FMASK;
708	fp->f_ops = &vnops;
709	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
710	FILEDESC_UNLOCK(fdp);
711	FILE_UNLOCK(fp);
712	VOP_UNLOCK(vp, 0, td);
713	if (flags & (O_EXLOCK | O_SHLOCK)) {
714		lf.l_whence = SEEK_SET;
715		lf.l_start = 0;
716		lf.l_len = 0;
717		if (flags & O_EXLOCK)
718			lf.l_type = F_WRLCK;
719		else
720			lf.l_type = F_RDLCK;
721		type = F_FLOCK;
722		if ((flags & FNONBLOCK) == 0)
723			type |= F_WAIT;
724		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
725			    type)) != 0)
726			goto bad;
727		fp->f_flag |= FHASLOCK;
728	}
729	if (flags & O_TRUNC) {
730		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
731			goto bad;
732		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
733		VATTR_NULL(&vat);
734		vat.va_size = 0;
735		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
736#ifdef MAC
737		error = mac_check_vnode_op(td->td_ucred, vp,
738		    MAC_OP_VNODE_WRITE);
739		if (error == 0)
740#endif
741			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
742		VOP_UNLOCK(vp, 0, td);
743		vn_finished_write(mp);
744		if (error)
745			goto bad;
746	}
747	/*
748	 * Release our private reference, leaving the one associated with
749	 * the descriptor table intact.
750	 */
751	fdrop(fp, td);
752	td->td_retval[0] = indx;
753	return (0);
754bad:
755	FILEDESC_LOCK(fdp);
756	if (fdp->fd_ofiles[indx] == fp) {
757		fdp->fd_ofiles[indx] = NULL;
758		FILEDESC_UNLOCK(fdp);
759		fdrop(fp, td);
760	} else
761		FILEDESC_UNLOCK(fdp);
762	return (error);
763}
764
765#ifdef COMPAT_43
766/*
767 * Create a file.
768 */
769#ifndef _SYS_SYSPROTO_H_
770struct ocreat_args {
771	char	*path;
772	int	mode;
773};
774#endif
775int
776ocreat(td, uap)
777	struct thread *td;
778	register struct ocreat_args /* {
779		syscallarg(char *) path;
780		syscallarg(int) mode;
781	} */ *uap;
782{
783	struct open_args /* {
784		syscallarg(char *) path;
785		syscallarg(int) flags;
786		syscallarg(int) mode;
787	} */ nuap;
788
789	SCARG(&nuap, path) = SCARG(uap, path);
790	SCARG(&nuap, mode) = SCARG(uap, mode);
791	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
792	return (open(td, &nuap));
793}
794#endif /* COMPAT_43 */
795
796/*
797 * Create a special file.
798 */
799#ifndef _SYS_SYSPROTO_H_
800struct mknod_args {
801	char	*path;
802	int	mode;
803	int	dev;
804};
805#endif
806/* ARGSUSED */
807int
808mknod(td, uap)
809	struct thread *td;
810	register struct mknod_args /* {
811		syscallarg(char *) path;
812		syscallarg(int) mode;
813		syscallarg(int) dev;
814	} */ *uap;
815{
816	struct vnode *vp;
817	struct mount *mp;
818	struct vattr vattr;
819	int error;
820	int whiteout = 0;
821	struct nameidata nd;
822
823	switch (SCARG(uap, mode) & S_IFMT) {
824	case S_IFCHR:
825	case S_IFBLK:
826		error = suser(td);
827		break;
828	default:
829		error = suser_cred(td->td_ucred, PRISON_ROOT);
830		break;
831	}
832	if (error)
833		return (error);
834restart:
835	bwillwrite();
836	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, UIO_USERSPACE,
837	    SCARG(uap, path), td);
838	if ((error = namei(&nd)) != 0)
839		return (error);
840	vp = nd.ni_vp;
841	if (vp != NULL) {
842		vrele(vp);
843		error = EEXIST;
844	} else {
845		VATTR_NULL(&vattr);
846		FILEDESC_LOCK(td->td_proc->p_fd);
847		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
848		FILEDESC_UNLOCK(td->td_proc->p_fd);
849		vattr.va_rdev = SCARG(uap, dev);
850		whiteout = 0;
851
852		switch (SCARG(uap, mode) & S_IFMT) {
853		case S_IFMT:	/* used by badsect to flag bad sectors */
854			vattr.va_type = VBAD;
855			break;
856		case S_IFCHR:
857			vattr.va_type = VCHR;
858			break;
859		case S_IFBLK:
860			vattr.va_type = VBLK;
861			break;
862		case S_IFWHT:
863			whiteout = 1;
864			break;
865		default:
866			error = EINVAL;
867			break;
868		}
869	}
870	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
871		NDFREE(&nd, NDF_ONLY_PNBUF);
872		vput(nd.ni_dvp);
873		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
874			return (error);
875		goto restart;
876	}
877	if (!error) {
878		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
879		if (whiteout)
880			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
881		else {
882			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
883						&nd.ni_cnd, &vattr);
884			if (error == 0)
885				vput(nd.ni_vp);
886		}
887	}
888	NDFREE(&nd, NDF_ONLY_PNBUF);
889	vput(nd.ni_dvp);
890	vn_finished_write(mp);
891	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
892	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
893	return (error);
894}
895
896/*
897 * Create a named pipe.
898 */
899#ifndef _SYS_SYSPROTO_H_
900struct mkfifo_args {
901	char	*path;
902	int	mode;
903};
904#endif
905/* ARGSUSED */
906int
907mkfifo(td, uap)
908	struct thread *td;
909	register struct mkfifo_args /* {
910		syscallarg(char *) path;
911		syscallarg(int) mode;
912	} */ *uap;
913{
914	struct mount *mp;
915	struct vattr vattr;
916	int error;
917	struct nameidata nd;
918
919restart:
920	bwillwrite();
921	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, UIO_USERSPACE,
922	    SCARG(uap, path), td);
923	if ((error = namei(&nd)) != 0)
924		return (error);
925	if (nd.ni_vp != NULL) {
926		NDFREE(&nd, NDF_ONLY_PNBUF);
927		vrele(nd.ni_vp);
928		vput(nd.ni_dvp);
929		return (EEXIST);
930	}
931	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
932		NDFREE(&nd, NDF_ONLY_PNBUF);
933		vput(nd.ni_dvp);
934		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
935			return (error);
936		goto restart;
937	}
938	VATTR_NULL(&vattr);
939	vattr.va_type = VFIFO;
940	FILEDESC_LOCK(td->td_proc->p_fd);
941	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
942	FILEDESC_UNLOCK(td->td_proc->p_fd);
943	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
944	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
945	if (error == 0)
946		vput(nd.ni_vp);
947	NDFREE(&nd, NDF_ONLY_PNBUF);
948	vput(nd.ni_dvp);
949	vn_finished_write(mp);
950	return (error);
951}
952
953/*
954 * Make a hard file link.
955 */
956#ifndef _SYS_SYSPROTO_H_
957struct link_args {
958	char	*path;
959	char	*link;
960};
961#endif
962/* ARGSUSED */
963int
964link(td, uap)
965	struct thread *td;
966	register struct link_args /* {
967		syscallarg(char *) path;
968		syscallarg(char *) link;
969	} */ *uap;
970{
971	struct vnode *vp;
972	struct mount *mp;
973	struct nameidata nd;
974	int error;
975
976	bwillwrite();
977	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
978	if ((error = namei(&nd)) != 0)
979		return (error);
980	NDFREE(&nd, NDF_ONLY_PNBUF);
981	vp = nd.ni_vp;
982	if (vp->v_type == VDIR) {
983		vrele(vp);
984		return (EPERM);		/* POSIX */
985	}
986	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
987		vrele(vp);
988		return (error);
989	}
990	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, UIO_USERSPACE,
991	    SCARG(uap, link), td);
992	if ((error = namei(&nd)) == 0) {
993		if (nd.ni_vp != NULL) {
994			vrele(nd.ni_vp);
995			error = EEXIST;
996		} else {
997			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
998			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
999			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1000		}
1001		NDFREE(&nd, NDF_ONLY_PNBUF);
1002		vput(nd.ni_dvp);
1003	}
1004	vrele(vp);
1005	vn_finished_write(mp);
1006	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1007	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1008	return (error);
1009}
1010
1011/*
1012 * Make a symbolic link.
1013 */
1014#ifndef _SYS_SYSPROTO_H_
1015struct symlink_args {
1016	char	*path;
1017	char	*link;
1018};
1019#endif
1020/* ARGSUSED */
1021int
1022symlink(td, uap)
1023	struct thread *td;
1024	register struct symlink_args /* {
1025		syscallarg(char *) path;
1026		syscallarg(char *) link;
1027	} */ *uap;
1028{
1029	struct mount *mp;
1030	struct vattr vattr;
1031	char *path;
1032	int error;
1033	struct nameidata nd;
1034
1035	path = uma_zalloc(namei_zone, M_WAITOK);
1036	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
1037		goto out;
1038restart:
1039	bwillwrite();
1040	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, UIO_USERSPACE,
1041	    SCARG(uap, link), td);
1042	if ((error = namei(&nd)) != 0)
1043		goto out;
1044	if (nd.ni_vp) {
1045		NDFREE(&nd, NDF_ONLY_PNBUF);
1046		vrele(nd.ni_vp);
1047		vput(nd.ni_dvp);
1048		error = EEXIST;
1049		goto out;
1050	}
1051	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1052		NDFREE(&nd, NDF_ONLY_PNBUF);
1053		vput(nd.ni_dvp);
1054		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1055			return (error);
1056		goto restart;
1057	}
1058	VATTR_NULL(&vattr);
1059	FILEDESC_LOCK(td->td_proc->p_fd);
1060	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1061	FILEDESC_UNLOCK(td->td_proc->p_fd);
1062	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1063	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1064	NDFREE(&nd, NDF_ONLY_PNBUF);
1065	if (error == 0)
1066		vput(nd.ni_vp);
1067	vput(nd.ni_dvp);
1068	vn_finished_write(mp);
1069	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1070	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1071out:
1072	uma_zfree(namei_zone, path);
1073	return (error);
1074}
1075
1076/*
1077 * Delete a whiteout from the filesystem.
1078 */
1079/* ARGSUSED */
1080int
1081undelete(td, uap)
1082	struct thread *td;
1083	register struct undelete_args /* {
1084		syscallarg(char *) path;
1085	} */ *uap;
1086{
1087	int error;
1088	struct mount *mp;
1089	struct nameidata nd;
1090
1091restart:
1092	bwillwrite();
1093	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1094	    SCARG(uap, path), td);
1095	error = namei(&nd);
1096	if (error)
1097		return (error);
1098
1099	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1100		NDFREE(&nd, NDF_ONLY_PNBUF);
1101		if (nd.ni_vp)
1102			vrele(nd.ni_vp);
1103		vput(nd.ni_dvp);
1104		return (EEXIST);
1105	}
1106	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1107		NDFREE(&nd, NDF_ONLY_PNBUF);
1108		vput(nd.ni_dvp);
1109		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1110			return (error);
1111		goto restart;
1112	}
1113	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1114	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1115	NDFREE(&nd, NDF_ONLY_PNBUF);
1116	vput(nd.ni_dvp);
1117	vn_finished_write(mp);
1118	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1119	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1120	return (error);
1121}
1122
1123/*
1124 * Delete a name from the filesystem.
1125 */
1126#ifndef _SYS_SYSPROTO_H_
1127struct unlink_args {
1128	char	*path;
1129};
1130#endif
1131/* ARGSUSED */
1132int
1133unlink(td, uap)
1134	struct thread *td;
1135	struct unlink_args /* {
1136		syscallarg(char *) path;
1137	} */ *uap;
1138{
1139	struct mount *mp;
1140	struct vnode *vp;
1141	int error;
1142	struct nameidata nd;
1143
1144restart:
1145	bwillwrite();
1146	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1147	if ((error = namei(&nd)) != 0)
1148		return (error);
1149	vp = nd.ni_vp;
1150	if (vp->v_type == VDIR)
1151		error = EPERM;		/* POSIX */
1152	else {
1153		/*
1154		 * The root of a mounted filesystem cannot be deleted.
1155		 *
1156		 * XXX: can this only be a VDIR case?
1157		 */
1158		if (vp->v_flag & VROOT)
1159			error = EBUSY;
1160	}
1161	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1162		NDFREE(&nd, NDF_ONLY_PNBUF);
1163		vrele(vp);
1164		vput(nd.ni_dvp);
1165		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1166			return (error);
1167		goto restart;
1168	}
1169	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1170	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1171	if (!error) {
1172		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1173		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1174	}
1175	NDFREE(&nd, NDF_ONLY_PNBUF);
1176	vput(nd.ni_dvp);
1177	vput(vp);
1178	vn_finished_write(mp);
1179	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1180	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1181	return (error);
1182}
1183
1184/*
1185 * Reposition read/write file offset.
1186 */
1187#ifndef _SYS_SYSPROTO_H_
1188struct lseek_args {
1189	int	fd;
1190	int	pad;
1191	off_t	offset;
1192	int	whence;
1193};
1194#endif
1195int
1196lseek(td, uap)
1197	struct thread *td;
1198	register struct lseek_args /* {
1199		syscallarg(int) fd;
1200		syscallarg(int) pad;
1201		syscallarg(off_t) offset;
1202		syscallarg(int) whence;
1203	} */ *uap;
1204{
1205	struct ucred *cred = td->td_ucred;
1206	struct file *fp;
1207	struct vnode *vp;
1208	struct vattr vattr;
1209	off_t offset;
1210	int error, noneg;
1211
1212	if ((error = fget(td, uap->fd, &fp)) != 0)
1213		return (error);
1214	if (fp->f_type != DTYPE_VNODE) {
1215		fdrop(fp, td);
1216		return (ESPIPE);
1217	}
1218	vp = (struct vnode *)fp->f_data;
1219	noneg = (vp->v_type != VCHR);
1220	offset = SCARG(uap, offset);
1221	switch (SCARG(uap, whence)) {
1222	case L_INCR:
1223		if (noneg &&
1224		    (fp->f_offset < 0 ||
1225		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
1226			return (EOVERFLOW);
1227		offset += fp->f_offset;
1228		break;
1229	case L_XTND:
1230		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1231		error = VOP_GETATTR(vp, &vattr, cred, td);
1232		VOP_UNLOCK(vp, 0, td);
1233		if (error)
1234			return (error);
1235		if (noneg &&
1236		    (vattr.va_size > OFF_MAX ||
1237		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
1238			return (EOVERFLOW);
1239		offset += vattr.va_size;
1240		break;
1241	case L_SET:
1242		break;
1243	default:
1244		fdrop(fp, td);
1245		return (EINVAL);
1246	}
1247	if (noneg && offset < 0)
1248		return (EINVAL);
1249	fp->f_offset = offset;
1250	*(off_t *)(td->td_retval) = fp->f_offset;
1251	fdrop(fp, td);
1252	return (0);
1253}
1254
1255#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1256/*
1257 * Reposition read/write file offset.
1258 */
1259#ifndef _SYS_SYSPROTO_H_
1260struct olseek_args {
1261	int	fd;
1262	long	offset;
1263	int	whence;
1264};
1265#endif
1266int
1267olseek(td, uap)
1268	struct thread *td;
1269	register struct olseek_args /* {
1270		syscallarg(int) fd;
1271		syscallarg(long) offset;
1272		syscallarg(int) whence;
1273	} */ *uap;
1274{
1275	struct lseek_args /* {
1276		syscallarg(int) fd;
1277		syscallarg(int) pad;
1278		syscallarg(off_t) offset;
1279		syscallarg(int) whence;
1280	} */ nuap;
1281	int error;
1282
1283	SCARG(&nuap, fd) = SCARG(uap, fd);
1284	SCARG(&nuap, offset) = SCARG(uap, offset);
1285	SCARG(&nuap, whence) = SCARG(uap, whence);
1286	error = lseek(td, &nuap);
1287	return (error);
1288}
1289#endif /* COMPAT_43 */
1290
1291/*
1292 * Check access permissions using passed credentials.
1293 */
1294static int
1295vn_access(vp, user_flags, cred, td)
1296	struct vnode	*vp;
1297	int		user_flags;
1298	struct ucred	*cred;
1299	struct thread	*td;
1300{
1301	int error, flags;
1302
1303	/* Flags == 0 means only check for existence. */
1304	error = 0;
1305	if (user_flags) {
1306		flags = 0;
1307		if (user_flags & R_OK)
1308			flags |= VREAD;
1309		if (user_flags & W_OK)
1310			flags |= VWRITE;
1311		if (user_flags & X_OK)
1312			flags |= VEXEC;
1313#ifdef MAC
1314		error = mac_check_vnode_access(cred, vp, flags);
1315		if (error)
1316			return (error);
1317#endif
1318		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1319			error = VOP_ACCESS(vp, flags, cred, td);
1320	}
1321	return (error);
1322}
1323
1324/*
1325 * Check access permissions using "real" credentials.
1326 */
1327#ifndef _SYS_SYSPROTO_H_
1328struct access_args {
1329	char	*path;
1330	int	flags;
1331};
1332#endif
1333int
1334access(td, uap)
1335	struct thread *td;
1336	register struct access_args /* {
1337		syscallarg(char *) path;
1338		syscallarg(int) flags;
1339	} */ *uap;
1340{
1341	struct ucred *cred, *tmpcred;
1342	register struct vnode *vp;
1343	int error;
1344	struct nameidata nd;
1345
1346	/*
1347	 * Create and modify a temporary credential instead of one that
1348	 * is potentially shared.  This could also mess up socket
1349	 * buffer accounting which can run in an interrupt context.
1350	 *
1351	 * XXX - Depending on how "threads" are finally implemented, it
1352	 * may be better to explicitly pass the credential to namei()
1353	 * rather than to modify the potentially shared process structure.
1354	 */
1355	cred = td->td_ucred;
1356	tmpcred = crdup(cred);
1357	tmpcred->cr_uid = cred->cr_ruid;
1358	tmpcred->cr_groups[0] = cred->cr_rgid;
1359	td->td_ucred = tmpcred;
1360	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1361	    SCARG(uap, path), td);
1362	if ((error = namei(&nd)) != 0)
1363		goto out1;
1364	vp = nd.ni_vp;
1365
1366	error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
1367	NDFREE(&nd, NDF_ONLY_PNBUF);
1368	vput(vp);
1369out1:
1370	td->td_ucred = cred;
1371	crfree(tmpcred);
1372	return (error);
1373}
1374
1375/*
1376 * Check access permissions using "effective" credentials.
1377 */
1378#ifndef _SYS_SYSPROTO_H_
1379struct eaccess_args {
1380	char	*path;
1381	int	flags;
1382};
1383#endif
1384int
1385eaccess(td, uap)
1386	struct thread *td;
1387	register struct eaccess_args /* {
1388		syscallarg(char *) path;
1389		syscallarg(int) flags;
1390	} */ *uap;
1391{
1392	struct nameidata nd;
1393	struct vnode *vp;
1394	int error;
1395
1396	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1397	    SCARG(uap, path), td);
1398	if ((error = namei(&nd)) != 0)
1399		return (error);
1400	vp = nd.ni_vp;
1401
1402	error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
1403	NDFREE(&nd, NDF_ONLY_PNBUF);
1404	vput(vp);
1405	return (error);
1406}
1407
1408#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1409/*
1410 * Get file status; this version follows links.
1411 */
1412#ifndef _SYS_SYSPROTO_H_
1413struct ostat_args {
1414	char	*path;
1415	struct ostat *ub;
1416};
1417#endif
1418/* ARGSUSED */
1419int
1420ostat(td, uap)
1421	struct thread *td;
1422	register struct ostat_args /* {
1423		syscallarg(char *) path;
1424		syscallarg(struct ostat *) ub;
1425	} */ *uap;
1426{
1427	struct stat sb;
1428	struct ostat osb;
1429	int error;
1430	struct nameidata nd;
1431
1432	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1433	    SCARG(uap, path), td);
1434	if ((error = namei(&nd)) != 0)
1435		return (error);
1436	NDFREE(&nd, NDF_ONLY_PNBUF);
1437	error = vn_stat(nd.ni_vp, &sb, td);
1438	vput(nd.ni_vp);
1439	if (error)
1440		return (error);
1441	cvtstat(&sb, &osb);
1442	error = copyout(&osb, SCARG(uap, ub), sizeof (osb));
1443	return (error);
1444}
1445
1446/*
1447 * Get file status; this version does not follow links.
1448 */
1449#ifndef _SYS_SYSPROTO_H_
1450struct olstat_args {
1451	char	*path;
1452	struct ostat *ub;
1453};
1454#endif
1455/* ARGSUSED */
1456int
1457olstat(td, uap)
1458	struct thread *td;
1459	register struct olstat_args /* {
1460		syscallarg(char *) path;
1461		syscallarg(struct ostat *) ub;
1462	} */ *uap;
1463{
1464	struct vnode *vp;
1465	struct stat sb;
1466	struct ostat osb;
1467	int error;
1468	struct nameidata nd;
1469
1470	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1471	    SCARG(uap, path), td);
1472	if ((error = namei(&nd)) != 0)
1473		return (error);
1474	vp = nd.ni_vp;
1475	error = vn_stat(vp, &sb, td);
1476	NDFREE(&nd, NDF_ONLY_PNBUF);
1477	vput(vp);
1478	if (error)
1479		return (error);
1480	cvtstat(&sb, &osb);
1481	error = copyout(&osb, SCARG(uap, ub), sizeof (osb));
1482	return (error);
1483}
1484
1485/*
1486 * Convert from an old to a new stat structure.
1487 */
1488void
1489cvtstat(st, ost)
1490	struct stat *st;
1491	struct ostat *ost;
1492{
1493
1494	ost->st_dev = st->st_dev;
1495	ost->st_ino = st->st_ino;
1496	ost->st_mode = st->st_mode;
1497	ost->st_nlink = st->st_nlink;
1498	ost->st_uid = st->st_uid;
1499	ost->st_gid = st->st_gid;
1500	ost->st_rdev = st->st_rdev;
1501	if (st->st_size < (quad_t)1 << 32)
1502		ost->st_size = st->st_size;
1503	else
1504		ost->st_size = -2;
1505	ost->st_atime = st->st_atime;
1506	ost->st_mtime = st->st_mtime;
1507	ost->st_ctime = st->st_ctime;
1508	ost->st_blksize = st->st_blksize;
1509	ost->st_blocks = st->st_blocks;
1510	ost->st_flags = st->st_flags;
1511	ost->st_gen = st->st_gen;
1512}
1513#endif /* COMPAT_43 || COMPAT_SUNOS */
1514
1515/*
1516 * Get file status; this version follows links.
1517 */
1518#ifndef _SYS_SYSPROTO_H_
1519struct stat_args {
1520	char	*path;
1521	struct stat *ub;
1522};
1523#endif
1524/* ARGSUSED */
1525int
1526stat(td, uap)
1527	struct thread *td;
1528	register struct stat_args /* {
1529		syscallarg(char *) path;
1530		syscallarg(struct stat *) ub;
1531	} */ *uap;
1532{
1533	struct stat sb;
1534	int error;
1535	struct nameidata nd;
1536
1537#ifdef LOOKUP_SHARED
1538	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
1539	    UIO_USERSPACE, SCARG(uap, path), td);
1540#else
1541	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1542	    SCARG(uap, path), td);
1543#endif
1544	if ((error = namei(&nd)) != 0)
1545		return (error);
1546	error = vn_stat(nd.ni_vp, &sb, td);
1547	NDFREE(&nd, NDF_ONLY_PNBUF);
1548	vput(nd.ni_vp);
1549	if (error)
1550		return (error);
1551	error = copyout(&sb, SCARG(uap, ub), sizeof (sb));
1552	return (error);
1553}
1554
1555/*
1556 * Get file status; this version does not follow links.
1557 */
1558#ifndef _SYS_SYSPROTO_H_
1559struct lstat_args {
1560	char	*path;
1561	struct stat *ub;
1562};
1563#endif
1564/* ARGSUSED */
1565int
1566lstat(td, uap)
1567	struct thread *td;
1568	register struct lstat_args /* {
1569		syscallarg(char *) path;
1570		syscallarg(struct stat *) ub;
1571	} */ *uap;
1572{
1573	int error;
1574	struct vnode *vp;
1575	struct stat sb;
1576	struct nameidata nd;
1577
1578	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1579	    SCARG(uap, path), td);
1580	if ((error = namei(&nd)) != 0)
1581		return (error);
1582	vp = nd.ni_vp;
1583	error = vn_stat(vp, &sb, td);
1584	NDFREE(&nd, NDF_ONLY_PNBUF);
1585	vput(vp);
1586	if (error)
1587		return (error);
1588	error = copyout(&sb, SCARG(uap, ub), sizeof (sb));
1589	return (error);
1590}
1591
1592/*
1593 * Implementation of the NetBSD stat() function.
1594 * XXX This should probably be collapsed with the FreeBSD version,
1595 * as the differences are only due to vn_stat() clearing spares at
1596 * the end of the structures.  vn_stat could be split to avoid this,
1597 * and thus collapse the following to close to zero code.
1598 */
1599void
1600cvtnstat(sb, nsb)
1601	struct stat *sb;
1602	struct nstat *nsb;
1603{
1604	bzero(nsb, sizeof *nsb);
1605	nsb->st_dev = sb->st_dev;
1606	nsb->st_ino = sb->st_ino;
1607	nsb->st_mode = sb->st_mode;
1608	nsb->st_nlink = sb->st_nlink;
1609	nsb->st_uid = sb->st_uid;
1610	nsb->st_gid = sb->st_gid;
1611	nsb->st_rdev = sb->st_rdev;
1612	nsb->st_atimespec = sb->st_atimespec;
1613	nsb->st_mtimespec = sb->st_mtimespec;
1614	nsb->st_ctimespec = sb->st_ctimespec;
1615	nsb->st_size = sb->st_size;
1616	nsb->st_blocks = sb->st_blocks;
1617	nsb->st_blksize = sb->st_blksize;
1618	nsb->st_flags = sb->st_flags;
1619	nsb->st_gen = sb->st_gen;
1620	nsb->st_birthtimespec = sb->st_birthtimespec;
1621}
1622
1623#ifndef _SYS_SYSPROTO_H_
1624struct nstat_args {
1625	char	*path;
1626	struct nstat *ub;
1627};
1628#endif
1629/* ARGSUSED */
1630int
1631nstat(td, uap)
1632	struct thread *td;
1633	register struct nstat_args /* {
1634		syscallarg(char *) path;
1635		syscallarg(struct nstat *) ub;
1636	} */ *uap;
1637{
1638	struct stat sb;
1639	struct nstat nsb;
1640	int error;
1641	struct nameidata nd;
1642
1643	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1644	    SCARG(uap, path), td);
1645	if ((error = namei(&nd)) != 0)
1646		return (error);
1647	NDFREE(&nd, NDF_ONLY_PNBUF);
1648	error = vn_stat(nd.ni_vp, &sb, td);
1649	vput(nd.ni_vp);
1650	if (error)
1651		return (error);
1652	cvtnstat(&sb, &nsb);
1653	error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb));
1654	return (error);
1655}
1656
1657/*
1658 * NetBSD lstat.  Get file status; this version does not follow links.
1659 */
1660#ifndef _SYS_SYSPROTO_H_
1661struct lstat_args {
1662	char	*path;
1663	struct stat *ub;
1664};
1665#endif
1666/* ARGSUSED */
1667int
1668nlstat(td, uap)
1669	struct thread *td;
1670	register struct nlstat_args /* {
1671		syscallarg(char *) path;
1672		syscallarg(struct nstat *) ub;
1673	} */ *uap;
1674{
1675	int error;
1676	struct vnode *vp;
1677	struct stat sb;
1678	struct nstat nsb;
1679	struct nameidata nd;
1680
1681	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1682	    SCARG(uap, path), td);
1683	if ((error = namei(&nd)) != 0)
1684		return (error);
1685	vp = nd.ni_vp;
1686	NDFREE(&nd, NDF_ONLY_PNBUF);
1687	error = vn_stat(vp, &sb, td);
1688	vput(vp);
1689	if (error)
1690		return (error);
1691	cvtnstat(&sb, &nsb);
1692	error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb));
1693	return (error);
1694}
1695
1696/*
1697 * Get configurable pathname variables.
1698 */
1699#ifndef _SYS_SYSPROTO_H_
1700struct pathconf_args {
1701	char	*path;
1702	int	name;
1703};
1704#endif
1705/* ARGSUSED */
1706int
1707pathconf(td, uap)
1708	struct thread *td;
1709	register struct pathconf_args /* {
1710		syscallarg(char *) path;
1711		syscallarg(int) name;
1712	} */ *uap;
1713{
1714	int error;
1715	struct nameidata nd;
1716
1717	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1718	    SCARG(uap, path), td);
1719	if ((error = namei(&nd)) != 0)
1720		return (error);
1721	NDFREE(&nd, NDF_ONLY_PNBUF);
1722	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
1723	vput(nd.ni_vp);
1724	return (error);
1725}
1726
1727/*
1728 * Return target name of a symbolic link.
1729 */
1730#ifndef _SYS_SYSPROTO_H_
1731struct readlink_args {
1732	char	*path;
1733	char	*buf;
1734	int	count;
1735};
1736#endif
1737/* ARGSUSED */
1738int
1739readlink(td, uap)
1740	struct thread *td;
1741	register struct readlink_args /* {
1742		syscallarg(char *) path;
1743		syscallarg(char *) buf;
1744		syscallarg(int) count;
1745	} */ *uap;
1746{
1747	register struct vnode *vp;
1748	struct iovec aiov;
1749	struct uio auio;
1750	int error;
1751	struct nameidata nd;
1752
1753	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1754	    SCARG(uap, path), td);
1755	if ((error = namei(&nd)) != 0)
1756		return (error);
1757	NDFREE(&nd, NDF_ONLY_PNBUF);
1758	vp = nd.ni_vp;
1759#ifdef MAC
1760	error = mac_check_vnode_readlink(td->td_ucred, vp);
1761	if (error) {
1762		vput(vp);
1763		return (error);
1764	}
1765#endif
1766	if (vp->v_type != VLNK)
1767		error = EINVAL;
1768	else {
1769		aiov.iov_base = SCARG(uap, buf);
1770		aiov.iov_len = SCARG(uap, count);
1771		auio.uio_iov = &aiov;
1772		auio.uio_iovcnt = 1;
1773		auio.uio_offset = 0;
1774		auio.uio_rw = UIO_READ;
1775		auio.uio_segflg = UIO_USERSPACE;
1776		auio.uio_td = td;
1777		auio.uio_resid = SCARG(uap, count);
1778		error = VOP_READLINK(vp, &auio, td->td_ucred);
1779	}
1780	vput(vp);
1781	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
1782	return (error);
1783}
1784
1785/*
1786 * Common implementation code for chflags() and fchflags().
1787 */
1788static int
1789setfflags(td, vp, flags)
1790	struct thread *td;
1791	struct vnode *vp;
1792	int flags;
1793{
1794	int error;
1795	struct mount *mp;
1796	struct vattr vattr;
1797
1798	/*
1799	 * Prevent non-root users from setting flags on devices.  When
1800	 * a device is reused, users can retain ownership of the device
1801	 * if they are allowed to set flags and programs assume that
1802	 * chown can't fail when done as root.
1803	 */
1804	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1805		error = suser_cred(td->td_ucred, PRISON_ROOT);
1806		if (error)
1807			return (error);
1808	}
1809
1810	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1811		return (error);
1812	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1813	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1814#ifdef MAC
1815	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
1816	if (error == 0) {
1817#endif
1818		VATTR_NULL(&vattr);
1819		vattr.va_flags = flags;
1820		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1821#ifdef MAC
1822	}
1823#endif
1824	VOP_UNLOCK(vp, 0, td);
1825	vn_finished_write(mp);
1826	return (error);
1827}
1828
1829/*
1830 * Change flags of a file given a path name.
1831 */
1832#ifndef _SYS_SYSPROTO_H_
1833struct chflags_args {
1834	char	*path;
1835	int	flags;
1836};
1837#endif
1838/* ARGSUSED */
1839int
1840chflags(td, uap)
1841	struct thread *td;
1842	register struct chflags_args /* {
1843		syscallarg(char *) path;
1844		syscallarg(int) flags;
1845	} */ *uap;
1846{
1847	int error;
1848	struct nameidata nd;
1849
1850	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1851	if ((error = namei(&nd)) != 0)
1852		return (error);
1853	NDFREE(&nd, NDF_ONLY_PNBUF);
1854	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
1855	vrele(nd.ni_vp);
1856	return error;
1857}
1858
1859/*
1860 * Same as chflags() but doesn't follow symlinks.
1861 */
1862int
1863lchflags(td, uap)
1864	struct thread *td;
1865	register struct lchflags_args /* {
1866		syscallarg(char *) path;
1867		syscallarg(int) flags;
1868	} */ *uap;
1869{
1870	int error;
1871	struct nameidata nd;
1872
1873	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1874	if ((error = namei(&nd)) != 0)
1875		return (error);
1876	NDFREE(&nd, NDF_ONLY_PNBUF);
1877	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
1878	vrele(nd.ni_vp);
1879	return error;
1880}
1881
1882/*
1883 * Change flags of a file given a file descriptor.
1884 */
1885#ifndef _SYS_SYSPROTO_H_
1886struct fchflags_args {
1887	int	fd;
1888	int	flags;
1889};
1890#endif
1891/* ARGSUSED */
1892int
1893fchflags(td, uap)
1894	struct thread *td;
1895	register struct fchflags_args /* {
1896		syscallarg(int) fd;
1897		syscallarg(int) flags;
1898	} */ *uap;
1899{
1900	struct file *fp;
1901	int error;
1902
1903	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
1904		return (error);
1905	error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
1906	fdrop(fp, td);
1907	return (error);
1908}
1909
1910/*
1911 * Common implementation code for chmod(), lchmod() and fchmod().
1912 */
1913static int
1914setfmode(td, vp, mode)
1915	struct thread *td;
1916	struct vnode *vp;
1917	int mode;
1918{
1919	int error;
1920	struct mount *mp;
1921	struct vattr vattr;
1922
1923	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1924		return (error);
1925	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1926	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1927	VATTR_NULL(&vattr);
1928	vattr.va_mode = mode & ALLPERMS;
1929#ifdef MAC
1930	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
1931	if (error == 0)
1932#endif
1933		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
1934	VOP_UNLOCK(vp, 0, td);
1935	vn_finished_write(mp);
1936	return error;
1937}
1938
1939/*
1940 * Change mode of a file given path name.
1941 */
1942#ifndef _SYS_SYSPROTO_H_
1943struct chmod_args {
1944	char	*path;
1945	int	mode;
1946};
1947#endif
1948/* ARGSUSED */
1949int
1950chmod(td, uap)
1951	struct thread *td;
1952	register struct chmod_args /* {
1953		syscallarg(char *) path;
1954		syscallarg(int) mode;
1955	} */ *uap;
1956{
1957	int error;
1958	struct nameidata nd;
1959
1960	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1961	if ((error = namei(&nd)) != 0)
1962		return (error);
1963	NDFREE(&nd, NDF_ONLY_PNBUF);
1964	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
1965	vrele(nd.ni_vp);
1966	return error;
1967}
1968
1969/*
1970 * Change mode of a file given path name (don't follow links.)
1971 */
1972#ifndef _SYS_SYSPROTO_H_
1973struct lchmod_args {
1974	char	*path;
1975	int	mode;
1976};
1977#endif
1978/* ARGSUSED */
1979int
1980lchmod(td, uap)
1981	struct thread *td;
1982	register struct lchmod_args /* {
1983		syscallarg(char *) path;
1984		syscallarg(int) mode;
1985	} */ *uap;
1986{
1987	int error;
1988	struct nameidata nd;
1989
1990	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1991	if ((error = namei(&nd)) != 0)
1992		return (error);
1993	NDFREE(&nd, NDF_ONLY_PNBUF);
1994	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
1995	vrele(nd.ni_vp);
1996	return error;
1997}
1998
1999/*
2000 * Change mode of a file given a file descriptor.
2001 */
2002#ifndef _SYS_SYSPROTO_H_
2003struct fchmod_args {
2004	int	fd;
2005	int	mode;
2006};
2007#endif
2008/* ARGSUSED */
2009int
2010fchmod(td, uap)
2011	struct thread *td;
2012	register struct fchmod_args /* {
2013		syscallarg(int) fd;
2014		syscallarg(int) mode;
2015	} */ *uap;
2016{
2017	struct file *fp;
2018	struct vnode *vp;
2019	int error;
2020
2021	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2022		return (error);
2023	vp = (struct vnode *)fp->f_data;
2024	error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
2025	fdrop(fp, td);
2026	return (error);
2027}
2028
2029/*
2030 * Common implementation for chown(), lchown(), and fchown()
2031 */
2032static int
2033setfown(td, vp, uid, gid)
2034	struct thread *td;
2035	struct vnode *vp;
2036	uid_t uid;
2037	gid_t gid;
2038{
2039	int error;
2040	struct mount *mp;
2041	struct vattr vattr;
2042
2043	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2044		return (error);
2045	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2046	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2047	VATTR_NULL(&vattr);
2048	vattr.va_uid = uid;
2049	vattr.va_gid = gid;
2050#ifdef MAC
2051	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2052	    vattr.va_gid);
2053	if (error == 0)
2054#endif
2055		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2056	VOP_UNLOCK(vp, 0, td);
2057	vn_finished_write(mp);
2058	return error;
2059}
2060
2061/*
2062 * Set ownership given a path name.
2063 */
2064#ifndef _SYS_SYSPROTO_H_
2065struct chown_args {
2066	char	*path;
2067	int	uid;
2068	int	gid;
2069};
2070#endif
2071/* ARGSUSED */
2072int
2073chown(td, uap)
2074	struct thread *td;
2075	register struct chown_args /* {
2076		syscallarg(char *) path;
2077		syscallarg(int) uid;
2078		syscallarg(int) gid;
2079	} */ *uap;
2080{
2081	int error;
2082	struct nameidata nd;
2083
2084	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2085	if ((error = namei(&nd)) != 0)
2086		return (error);
2087	NDFREE(&nd, NDF_ONLY_PNBUF);
2088	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2089	vrele(nd.ni_vp);
2090	return (error);
2091}
2092
2093/*
2094 * Set ownership given a path name, do not cross symlinks.
2095 */
2096#ifndef _SYS_SYSPROTO_H_
2097struct lchown_args {
2098	char	*path;
2099	int	uid;
2100	int	gid;
2101};
2102#endif
2103/* ARGSUSED */
2104int
2105lchown(td, uap)
2106	struct thread *td;
2107	register struct lchown_args /* {
2108		syscallarg(char *) path;
2109		syscallarg(int) uid;
2110		syscallarg(int) gid;
2111	} */ *uap;
2112{
2113	int error;
2114	struct nameidata nd;
2115
2116	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2117	if ((error = namei(&nd)) != 0)
2118		return (error);
2119	NDFREE(&nd, NDF_ONLY_PNBUF);
2120	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
2121	vrele(nd.ni_vp);
2122	return (error);
2123}
2124
2125/*
2126 * Set ownership given a file descriptor.
2127 */
2128#ifndef _SYS_SYSPROTO_H_
2129struct fchown_args {
2130	int	fd;
2131	int	uid;
2132	int	gid;
2133};
2134#endif
2135/* ARGSUSED */
2136int
2137fchown(td, uap)
2138	struct thread *td;
2139	register struct fchown_args /* {
2140		syscallarg(int) fd;
2141		syscallarg(int) uid;
2142		syscallarg(int) gid;
2143	} */ *uap;
2144{
2145	struct file *fp;
2146	struct vnode *vp;
2147	int error;
2148
2149	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2150		return (error);
2151	vp = (struct vnode *)fp->f_data;
2152	error = setfown(td, (struct vnode *)fp->f_data,
2153		SCARG(uap, uid), SCARG(uap, gid));
2154	fdrop(fp, td);
2155	return (error);
2156}
2157
2158/*
2159 * Common implementation code for utimes(), lutimes(), and futimes().
2160 */
2161static int
2162getutimes(usrtvp, tsp)
2163	const struct timeval *usrtvp;
2164	struct timespec *tsp;
2165{
2166	struct timeval tv[2];
2167	int error;
2168
2169	if (usrtvp == NULL) {
2170		microtime(&tv[0]);
2171		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2172		tsp[1] = tsp[0];
2173	} else {
2174		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
2175			return (error);
2176		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2177		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
2178	}
2179	return 0;
2180}
2181
2182/*
2183 * Common implementation code for utimes(), lutimes(), and futimes().
2184 */
2185static int
2186setutimes(td, vp, ts, numtimes, nullflag)
2187	struct thread *td;
2188	struct vnode *vp;
2189	const struct timespec *ts;
2190	int numtimes;
2191	int nullflag;
2192{
2193	int error, setbirthtime;
2194	struct mount *mp;
2195	struct vattr vattr;
2196
2197	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2198		return (error);
2199	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2200	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2201	setbirthtime = 0;
2202	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2203	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2204		setbirthtime = 1;
2205	VATTR_NULL(&vattr);
2206	vattr.va_atime = ts[0];
2207	vattr.va_mtime = ts[1];
2208	if (setbirthtime)
2209		vattr.va_birthtime = ts[1];
2210	if (numtimes > 2)
2211		vattr.va_birthtime = ts[2];
2212	if (nullflag)
2213		vattr.va_vaflags |= VA_UTIMES_NULL;
2214#ifdef MAC
2215	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2216	    vattr.va_mtime);
2217	if (error == 0)
2218#endif
2219		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2220	VOP_UNLOCK(vp, 0, td);
2221	vn_finished_write(mp);
2222	return error;
2223}
2224
2225/*
2226 * Set the access and modification times of a file.
2227 */
2228#ifndef _SYS_SYSPROTO_H_
2229struct utimes_args {
2230	char	*path;
2231	struct	timeval *tptr;
2232};
2233#endif
2234/* ARGSUSED */
2235int
2236utimes(td, uap)
2237	struct thread *td;
2238	register struct utimes_args /* {
2239		syscallarg(char *) path;
2240		syscallarg(struct timeval *) tptr;
2241	} */ *uap;
2242{
2243	struct timespec ts[2];
2244	struct timeval *usrtvp;
2245	int error;
2246	struct nameidata nd;
2247
2248	usrtvp = SCARG(uap, tptr);
2249	if ((error = getutimes(usrtvp, ts)) != 0)
2250		return (error);
2251	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2252	if ((error = namei(&nd)) != 0)
2253		return (error);
2254	NDFREE(&nd, NDF_ONLY_PNBUF);
2255	error = setutimes(td, nd.ni_vp, ts, 2, usrtvp == NULL);
2256	vrele(nd.ni_vp);
2257	return (error);
2258}
2259
2260/*
2261 * Set the access and modification times of a file.
2262 */
2263#ifndef _SYS_SYSPROTO_H_
2264struct lutimes_args {
2265	char	*path;
2266	struct	timeval *tptr;
2267};
2268#endif
2269/* ARGSUSED */
2270int
2271lutimes(td, uap)
2272	struct thread *td;
2273	register struct lutimes_args /* {
2274		syscallarg(char *) path;
2275		syscallarg(struct timeval *) tptr;
2276	} */ *uap;
2277{
2278	struct timespec ts[2];
2279	struct timeval *usrtvp;
2280	int error;
2281	struct nameidata nd;
2282
2283	usrtvp = SCARG(uap, tptr);
2284	if ((error = getutimes(usrtvp, ts)) != 0)
2285		return (error);
2286	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2287	if ((error = namei(&nd)) != 0)
2288		return (error);
2289	NDFREE(&nd, NDF_ONLY_PNBUF);
2290	error = setutimes(td, nd.ni_vp, ts, 2, usrtvp == NULL);
2291	vrele(nd.ni_vp);
2292	return (error);
2293}
2294
2295/*
2296 * Set the access and modification times of a file.
2297 */
2298#ifndef _SYS_SYSPROTO_H_
2299struct futimes_args {
2300	int	fd;
2301	struct	timeval *tptr;
2302};
2303#endif
2304/* ARGSUSED */
2305int
2306futimes(td, uap)
2307	struct thread *td;
2308	register struct futimes_args /* {
2309		syscallarg(int ) fd;
2310		syscallarg(struct timeval *) tptr;
2311	} */ *uap;
2312{
2313	struct timespec ts[2];
2314	struct file *fp;
2315	struct timeval *usrtvp;
2316	int error;
2317
2318	usrtvp = SCARG(uap, tptr);
2319	if ((error = getutimes(usrtvp, ts)) != 0)
2320		return (error);
2321	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2322		return (error);
2323	error = setutimes(td, (struct vnode *)fp->f_data, ts, 2, usrtvp==NULL);
2324	fdrop(fp, td);
2325	return (error);
2326}
2327
2328/*
2329 * Truncate a file given its path name.
2330 */
2331#ifndef _SYS_SYSPROTO_H_
2332struct truncate_args {
2333	char	*path;
2334	int	pad;
2335	off_t	length;
2336};
2337#endif
2338/* ARGSUSED */
2339int
2340truncate(td, uap)
2341	struct thread *td;
2342	register struct truncate_args /* {
2343		syscallarg(char *) path;
2344		syscallarg(int) pad;
2345		syscallarg(off_t) length;
2346	} */ *uap;
2347{
2348	struct mount *mp;
2349	struct vnode *vp;
2350	struct vattr vattr;
2351	int error;
2352	struct nameidata nd;
2353
2354	if (uap->length < 0)
2355		return(EINVAL);
2356	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2357	if ((error = namei(&nd)) != 0)
2358		return (error);
2359	vp = nd.ni_vp;
2360	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2361		vrele(vp);
2362		return (error);
2363	}
2364	NDFREE(&nd, NDF_ONLY_PNBUF);
2365	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2366	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2367	if (vp->v_type == VDIR)
2368		error = EISDIR;
2369#ifdef MAC
2370	else if ((error = mac_check_vnode_op(td->td_ucred, vp,
2371	    MAC_OP_VNODE_WRITE))) {}
2372#endif
2373	else if ((error = vn_writechk(vp)) == 0 &&
2374	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2375		VATTR_NULL(&vattr);
2376		vattr.va_size = SCARG(uap, length);
2377		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2378	}
2379	vput(vp);
2380	vn_finished_write(mp);
2381	return (error);
2382}
2383
2384/*
2385 * Truncate a file given a file descriptor.
2386 */
2387#ifndef _SYS_SYSPROTO_H_
2388struct ftruncate_args {
2389	int	fd;
2390	int	pad;
2391	off_t	length;
2392};
2393#endif
2394/* ARGSUSED */
2395int
2396ftruncate(td, uap)
2397	struct thread *td;
2398	register struct ftruncate_args /* {
2399		syscallarg(int) fd;
2400		syscallarg(int) pad;
2401		syscallarg(off_t) length;
2402	} */ *uap;
2403{
2404	struct mount *mp;
2405	struct vattr vattr;
2406	struct vnode *vp;
2407	struct file *fp;
2408	int error;
2409
2410	if (uap->length < 0)
2411		return(EINVAL);
2412	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2413		return (error);
2414	if ((fp->f_flag & FWRITE) == 0) {
2415		fdrop(fp, td);
2416		return (EINVAL);
2417	}
2418	vp = (struct vnode *)fp->f_data;
2419	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2420		fdrop(fp, td);
2421		return (error);
2422	}
2423	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2424	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2425	if (vp->v_type == VDIR)
2426		error = EISDIR;
2427#ifdef MAC
2428	else if ((error = mac_check_vnode_op(td->td_ucred, vp,
2429	    MAC_OP_VNODE_WRITE))) {}
2430#endif
2431	else if ((error = vn_writechk(vp)) == 0) {
2432		VATTR_NULL(&vattr);
2433		vattr.va_size = SCARG(uap, length);
2434		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2435	}
2436	VOP_UNLOCK(vp, 0, td);
2437	vn_finished_write(mp);
2438	fdrop(fp, td);
2439	return (error);
2440}
2441
2442#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2443/*
2444 * Truncate a file given its path name.
2445 */
2446#ifndef _SYS_SYSPROTO_H_
2447struct otruncate_args {
2448	char	*path;
2449	long	length;
2450};
2451#endif
2452/* ARGSUSED */
2453int
2454otruncate(td, uap)
2455	struct thread *td;
2456	register struct otruncate_args /* {
2457		syscallarg(char *) path;
2458		syscallarg(long) length;
2459	} */ *uap;
2460{
2461	struct truncate_args /* {
2462		syscallarg(char *) path;
2463		syscallarg(int) pad;
2464		syscallarg(off_t) length;
2465	} */ nuap;
2466
2467	SCARG(&nuap, path) = SCARG(uap, path);
2468	SCARG(&nuap, length) = SCARG(uap, length);
2469	return (truncate(td, &nuap));
2470}
2471
2472/*
2473 * Truncate a file given a file descriptor.
2474 */
2475#ifndef _SYS_SYSPROTO_H_
2476struct oftruncate_args {
2477	int	fd;
2478	long	length;
2479};
2480#endif
2481/* ARGSUSED */
2482int
2483oftruncate(td, uap)
2484	struct thread *td;
2485	register struct oftruncate_args /* {
2486		syscallarg(int) fd;
2487		syscallarg(long) length;
2488	} */ *uap;
2489{
2490	struct ftruncate_args /* {
2491		syscallarg(int) fd;
2492		syscallarg(int) pad;
2493		syscallarg(off_t) length;
2494	} */ nuap;
2495
2496	SCARG(&nuap, fd) = SCARG(uap, fd);
2497	SCARG(&nuap, length) = SCARG(uap, length);
2498	return (ftruncate(td, &nuap));
2499}
2500#endif /* COMPAT_43 || COMPAT_SUNOS */
2501
2502/*
2503 * Sync an open file.
2504 */
2505#ifndef _SYS_SYSPROTO_H_
2506struct fsync_args {
2507	int	fd;
2508};
2509#endif
2510/* ARGSUSED */
2511int
2512fsync(td, uap)
2513	struct thread *td;
2514	struct fsync_args /* {
2515		syscallarg(int) fd;
2516	} */ *uap;
2517{
2518	struct vnode *vp;
2519	struct mount *mp;
2520	struct file *fp;
2521	vm_object_t obj;
2522	int error;
2523
2524	GIANT_REQUIRED;
2525
2526	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2527		return (error);
2528	vp = (struct vnode *)fp->f_data;
2529	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2530		fdrop(fp, td);
2531		return (error);
2532	}
2533	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2534	if (VOP_GETVOBJECT(vp, &obj) == 0) {
2535		vm_object_page_clean(obj, 0, 0, 0);
2536	}
2537	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
2538	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
2539	    && softdep_fsync_hook != NULL)
2540		error = (*softdep_fsync_hook)(vp);
2541
2542	VOP_UNLOCK(vp, 0, td);
2543	vn_finished_write(mp);
2544	fdrop(fp, td);
2545	return (error);
2546}
2547
2548/*
2549 * Rename files.  Source and destination must either both be directories,
2550 * or both not be directories.  If target is a directory, it must be empty.
2551 */
2552#ifndef _SYS_SYSPROTO_H_
2553struct rename_args {
2554	char	*from;
2555	char	*to;
2556};
2557#endif
2558/* ARGSUSED */
2559int
2560rename(td, uap)
2561	struct thread *td;
2562	register struct rename_args /* {
2563		syscallarg(char *) from;
2564		syscallarg(char *) to;
2565	} */ *uap;
2566{
2567	struct mount *mp;
2568	struct vnode *tvp, *fvp, *tdvp;
2569	struct nameidata fromnd, tond;
2570	int error;
2571
2572	bwillwrite();
2573	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
2574	    SCARG(uap, from), td);
2575	if ((error = namei(&fromnd)) != 0)
2576		return (error);
2577	fvp = fromnd.ni_vp;
2578	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
2579		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2580		vrele(fromnd.ni_dvp);
2581		vrele(fvp);
2582		goto out1;
2583	}
2584	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
2585	    UIO_USERSPACE, SCARG(uap, to), td);
2586	if (fromnd.ni_vp->v_type == VDIR)
2587		tond.ni_cnd.cn_flags |= WILLBEDIR;
2588	if ((error = namei(&tond)) != 0) {
2589		/* Translate error code for rename("dir1", "dir2/."). */
2590		if (error == EISDIR && fvp->v_type == VDIR)
2591			error = EINVAL;
2592		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2593		vrele(fromnd.ni_dvp);
2594		vrele(fvp);
2595		goto out1;
2596	}
2597	tdvp = tond.ni_dvp;
2598	tvp = tond.ni_vp;
2599	if (tvp != NULL) {
2600		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2601			error = ENOTDIR;
2602			goto out;
2603		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2604			error = EISDIR;
2605			goto out;
2606		}
2607	}
2608	if (fvp == tdvp)
2609		error = EINVAL;
2610	/*
2611	 * If source is the same as the destination (that is the
2612	 * same inode number with the same name in the same directory),
2613	 * then there is nothing to do.
2614	 */
2615	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2616	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2617	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2618	      fromnd.ni_cnd.cn_namelen))
2619		error = -1;
2620out:
2621	if (!error) {
2622		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
2623		if (fromnd.ni_dvp != tdvp) {
2624			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2625		}
2626		if (tvp) {
2627			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
2628		}
2629		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2630				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2631		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2632		NDFREE(&tond, NDF_ONLY_PNBUF);
2633	} else {
2634		NDFREE(&fromnd, NDF_ONLY_PNBUF);
2635		NDFREE(&tond, NDF_ONLY_PNBUF);
2636		if (tdvp == tvp)
2637			vrele(tdvp);
2638		else
2639			vput(tdvp);
2640		if (tvp)
2641			vput(tvp);
2642		vrele(fromnd.ni_dvp);
2643		vrele(fvp);
2644	}
2645	vrele(tond.ni_startdir);
2646	vn_finished_write(mp);
2647	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
2648	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
2649	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
2650	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
2651out1:
2652	if (fromnd.ni_startdir)
2653		vrele(fromnd.ni_startdir);
2654	if (error == -1)
2655		return (0);
2656	return (error);
2657}
2658
2659/*
2660 * Make a directory file.
2661 */
2662#ifndef _SYS_SYSPROTO_H_
2663struct mkdir_args {
2664	char	*path;
2665	int	mode;
2666};
2667#endif
2668/* ARGSUSED */
2669int
2670mkdir(td, uap)
2671	struct thread *td;
2672	register struct mkdir_args /* {
2673		syscallarg(char *) path;
2674		syscallarg(int) mode;
2675	} */ *uap;
2676{
2677
2678	return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
2679}
2680
2681int
2682vn_mkdir(path, mode, segflg, td)
2683	char *path;
2684	int mode;
2685	enum uio_seg segflg;
2686	struct thread *td;
2687{
2688	struct mount *mp;
2689	struct vnode *vp;
2690	struct vattr vattr;
2691	int error;
2692	struct nameidata nd;
2693
2694restart:
2695	bwillwrite();
2696	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
2697	nd.ni_cnd.cn_flags |= WILLBEDIR;
2698	if ((error = namei(&nd)) != 0)
2699		return (error);
2700	vp = nd.ni_vp;
2701	if (vp != NULL) {
2702		NDFREE(&nd, NDF_ONLY_PNBUF);
2703		vrele(vp);
2704		/*
2705		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
2706		 * the strange behaviour of leaving the vnode unlocked
2707		 * if the target is the same vnode as the parent.
2708		 */
2709		if (vp == nd.ni_dvp)
2710			vrele(nd.ni_dvp);
2711		else
2712			vput(nd.ni_dvp);
2713		return (EEXIST);
2714	}
2715	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2716		NDFREE(&nd, NDF_ONLY_PNBUF);
2717		vput(nd.ni_dvp);
2718		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2719			return (error);
2720		goto restart;
2721	}
2722	VATTR_NULL(&vattr);
2723	vattr.va_type = VDIR;
2724	FILEDESC_LOCK(td->td_proc->p_fd);
2725	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
2726	FILEDESC_UNLOCK(td->td_proc->p_fd);
2727	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2728	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2729	NDFREE(&nd, NDF_ONLY_PNBUF);
2730	vput(nd.ni_dvp);
2731	if (!error)
2732		vput(nd.ni_vp);
2733	vn_finished_write(mp);
2734	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
2735	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
2736	return (error);
2737}
2738
2739/*
2740 * Remove a directory file.
2741 */
2742#ifndef _SYS_SYSPROTO_H_
2743struct rmdir_args {
2744	char	*path;
2745};
2746#endif
2747/* ARGSUSED */
2748int
2749rmdir(td, uap)
2750	struct thread *td;
2751	struct rmdir_args /* {
2752		syscallarg(char *) path;
2753	} */ *uap;
2754{
2755	struct mount *mp;
2756	struct vnode *vp;
2757	int error;
2758	struct nameidata nd;
2759
2760restart:
2761	bwillwrite();
2762	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2763	    SCARG(uap, path), td);
2764	if ((error = namei(&nd)) != 0)
2765		return (error);
2766	vp = nd.ni_vp;
2767	if (vp->v_type != VDIR) {
2768		error = ENOTDIR;
2769		goto out;
2770	}
2771	/*
2772	 * No rmdir "." please.
2773	 */
2774	if (nd.ni_dvp == vp) {
2775		error = EINVAL;
2776		goto out;
2777	}
2778	/*
2779	 * The root of a mounted filesystem cannot be deleted.
2780	 */
2781	if (vp->v_flag & VROOT) {
2782		error = EBUSY;
2783		goto out;
2784	}
2785	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2786		NDFREE(&nd, NDF_ONLY_PNBUF);
2787		if (nd.ni_dvp == vp)
2788			vrele(nd.ni_dvp);
2789		else
2790			vput(nd.ni_dvp);
2791		vput(vp);
2792		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2793			return (error);
2794		goto restart;
2795	}
2796	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2797	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2798	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2799	vn_finished_write(mp);
2800out:
2801	NDFREE(&nd, NDF_ONLY_PNBUF);
2802	if (nd.ni_dvp == vp)
2803		vrele(nd.ni_dvp);
2804	else
2805		vput(nd.ni_dvp);
2806	vput(vp);
2807	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
2808	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
2809	return (error);
2810}
2811
2812#ifdef COMPAT_43
2813/*
2814 * Read a block of directory entries in a filesystem independent format.
2815 */
2816#ifndef _SYS_SYSPROTO_H_
2817struct ogetdirentries_args {
2818	int	fd;
2819	char	*buf;
2820	u_int	count;
2821	long	*basep;
2822};
2823#endif
2824int
2825ogetdirentries(td, uap)
2826	struct thread *td;
2827	register struct ogetdirentries_args /* {
2828		syscallarg(int) fd;
2829		syscallarg(char *) buf;
2830		syscallarg(u_int) count;
2831		syscallarg(long *) basep;
2832	} */ *uap;
2833{
2834	struct vnode *vp;
2835	struct file *fp;
2836	struct uio auio, kuio;
2837	struct iovec aiov, kiov;
2838	struct dirent *dp, *edp;
2839	caddr_t dirbuf;
2840	int error, eofflag, readcnt;
2841	long loff;
2842
2843	/* XXX arbitrary sanity limit on `count'. */
2844	if (SCARG(uap, count) > 64 * 1024)
2845		return (EINVAL);
2846	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2847		return (error);
2848	if ((fp->f_flag & FREAD) == 0) {
2849		fdrop(fp, td);
2850		return (EBADF);
2851	}
2852	vp = (struct vnode *)fp->f_data;
2853unionread:
2854	if (vp->v_type != VDIR) {
2855		fdrop(fp, td);
2856		return (EINVAL);
2857	}
2858	aiov.iov_base = SCARG(uap, buf);
2859	aiov.iov_len = SCARG(uap, count);
2860	auio.uio_iov = &aiov;
2861	auio.uio_iovcnt = 1;
2862	auio.uio_rw = UIO_READ;
2863	auio.uio_segflg = UIO_USERSPACE;
2864	auio.uio_td = td;
2865	auio.uio_resid = SCARG(uap, count);
2866	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2867	loff = auio.uio_offset = fp->f_offset;
2868#	if (BYTE_ORDER != LITTLE_ENDIAN)
2869		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
2870			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
2871			    NULL, NULL);
2872			fp->f_offset = auio.uio_offset;
2873		} else
2874#	endif
2875	{
2876		kuio = auio;
2877		kuio.uio_iov = &kiov;
2878		kuio.uio_segflg = UIO_SYSSPACE;
2879		kiov.iov_len = SCARG(uap, count);
2880		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
2881		kiov.iov_base = dirbuf;
2882		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
2883			    NULL, NULL);
2884		fp->f_offset = kuio.uio_offset;
2885		if (error == 0) {
2886			readcnt = SCARG(uap, count) - kuio.uio_resid;
2887			edp = (struct dirent *)&dirbuf[readcnt];
2888			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
2889#				if (BYTE_ORDER == LITTLE_ENDIAN)
2890					/*
2891					 * The expected low byte of
2892					 * dp->d_namlen is our dp->d_type.
2893					 * The high MBZ byte of dp->d_namlen
2894					 * is our dp->d_namlen.
2895					 */
2896					dp->d_type = dp->d_namlen;
2897					dp->d_namlen = 0;
2898#				else
2899					/*
2900					 * The dp->d_type is the high byte
2901					 * of the expected dp->d_namlen,
2902					 * so must be zero'ed.
2903					 */
2904					dp->d_type = 0;
2905#				endif
2906				if (dp->d_reclen > 0) {
2907					dp = (struct dirent *)
2908					    ((char *)dp + dp->d_reclen);
2909				} else {
2910					error = EIO;
2911					break;
2912				}
2913			}
2914			if (dp >= edp)
2915				error = uiomove(dirbuf, readcnt, &auio);
2916		}
2917		FREE(dirbuf, M_TEMP);
2918	}
2919	VOP_UNLOCK(vp, 0, td);
2920	if (error) {
2921		fdrop(fp, td);
2922		return (error);
2923	}
2924	if (SCARG(uap, count) == auio.uio_resid) {
2925		if (union_dircheckp) {
2926			error = union_dircheckp(td, &vp, fp);
2927			if (error == -1)
2928				goto unionread;
2929			if (error) {
2930				fdrop(fp, td);
2931				return (error);
2932			}
2933		}
2934		if ((vp->v_flag & VROOT) &&
2935		    (vp->v_mount->mnt_flag & MNT_UNION)) {
2936			struct vnode *tvp = vp;
2937			vp = vp->v_mount->mnt_vnodecovered;
2938			VREF(vp);
2939			fp->f_data = vp;
2940			fp->f_offset = 0;
2941			vrele(tvp);
2942			goto unionread;
2943		}
2944	}
2945	error = copyout(&loff, SCARG(uap, basep), sizeof(long));
2946	fdrop(fp, td);
2947	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2948	return (error);
2949}
2950#endif /* COMPAT_43 */
2951
2952/*
2953 * Read a block of directory entries in a filesystem independent format.
2954 */
2955#ifndef _SYS_SYSPROTO_H_
2956struct getdirentries_args {
2957	int	fd;
2958	char	*buf;
2959	u_int	count;
2960	long	*basep;
2961};
2962#endif
2963int
2964getdirentries(td, uap)
2965	struct thread *td;
2966	register struct getdirentries_args /* {
2967		syscallarg(int) fd;
2968		syscallarg(char *) buf;
2969		syscallarg(u_int) count;
2970		syscallarg(long *) basep;
2971	} */ *uap;
2972{
2973	struct vnode *vp;
2974	struct file *fp;
2975	struct uio auio;
2976	struct iovec aiov;
2977	long loff;
2978	int error, eofflag;
2979
2980	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2981		return (error);
2982	if ((fp->f_flag & FREAD) == 0) {
2983		fdrop(fp, td);
2984		return (EBADF);
2985	}
2986	vp = (struct vnode *)fp->f_data;
2987unionread:
2988	if (vp->v_type != VDIR) {
2989		fdrop(fp, td);
2990		return (EINVAL);
2991	}
2992	aiov.iov_base = SCARG(uap, buf);
2993	aiov.iov_len = SCARG(uap, count);
2994	auio.uio_iov = &aiov;
2995	auio.uio_iovcnt = 1;
2996	auio.uio_rw = UIO_READ;
2997	auio.uio_segflg = UIO_USERSPACE;
2998	auio.uio_td = td;
2999	auio.uio_resid = SCARG(uap, count);
3000	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3001	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3002	loff = auio.uio_offset = fp->f_offset;
3003	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3004	fp->f_offset = auio.uio_offset;
3005	VOP_UNLOCK(vp, 0, td);
3006	if (error) {
3007		fdrop(fp, td);
3008		return (error);
3009	}
3010	if (SCARG(uap, count) == auio.uio_resid) {
3011		if (union_dircheckp) {
3012			error = union_dircheckp(td, &vp, fp);
3013			if (error == -1)
3014				goto unionread;
3015			if (error) {
3016				fdrop(fp, td);
3017				return (error);
3018			}
3019		}
3020		if ((vp->v_flag & VROOT) &&
3021		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3022			struct vnode *tvp = vp;
3023			vp = vp->v_mount->mnt_vnodecovered;
3024			VREF(vp);
3025			fp->f_data = vp;
3026			fp->f_offset = 0;
3027			vrele(tvp);
3028			goto unionread;
3029		}
3030	}
3031	if (SCARG(uap, basep) != NULL) {
3032		error = copyout(&loff, SCARG(uap, basep), sizeof(long));
3033	}
3034	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3035	fdrop(fp, td);
3036	return (error);
3037}
3038#ifndef _SYS_SYSPROTO_H_
3039struct getdents_args {
3040	int fd;
3041	char *buf;
3042	size_t count;
3043};
3044#endif
3045int
3046getdents(td, uap)
3047	struct thread *td;
3048	register struct getdents_args /* {
3049		syscallarg(int) fd;
3050		syscallarg(char *) buf;
3051		syscallarg(u_int) count;
3052	} */ *uap;
3053{
3054	struct getdirentries_args ap;
3055	ap.fd = uap->fd;
3056	ap.buf = uap->buf;
3057	ap.count = uap->count;
3058	ap.basep = NULL;
3059	return getdirentries(td, &ap);
3060}
3061
3062/*
3063 * Set the mode mask for creation of filesystem nodes.
3064 *
3065 * MP SAFE
3066 */
3067#ifndef _SYS_SYSPROTO_H_
3068struct umask_args {
3069	int	newmask;
3070};
3071#endif
3072int
3073umask(td, uap)
3074	struct thread *td;
3075	struct umask_args /* {
3076		syscallarg(int) newmask;
3077	} */ *uap;
3078{
3079	register struct filedesc *fdp;
3080
3081	FILEDESC_LOCK(td->td_proc->p_fd);
3082	fdp = td->td_proc->p_fd;
3083	td->td_retval[0] = fdp->fd_cmask;
3084	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
3085	FILEDESC_UNLOCK(td->td_proc->p_fd);
3086	return (0);
3087}
3088
3089/*
3090 * Void all references to file by ripping underlying filesystem
3091 * away from vnode.
3092 */
3093#ifndef _SYS_SYSPROTO_H_
3094struct revoke_args {
3095	char	*path;
3096};
3097#endif
3098/* ARGSUSED */
3099int
3100revoke(td, uap)
3101	struct thread *td;
3102	register struct revoke_args /* {
3103		syscallarg(char *) path;
3104	} */ *uap;
3105{
3106	struct mount *mp;
3107	struct vnode *vp;
3108	struct vattr vattr;
3109	int error;
3110	struct nameidata nd;
3111
3112	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
3113	    td);
3114	if ((error = namei(&nd)) != 0)
3115		return (error);
3116	vp = nd.ni_vp;
3117	NDFREE(&nd, NDF_ONLY_PNBUF);
3118	if (vp->v_type != VCHR) {
3119		vput(vp);
3120		return (EINVAL);
3121	}
3122#ifdef MAC
3123	error = mac_check_vnode_revoke(td->td_ucred, vp);
3124	if (error) {
3125		vput(vp);
3126		return (error);
3127	}
3128#endif
3129	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3130	if (error) {
3131		vput(vp);
3132		return (error);
3133	}
3134	VOP_UNLOCK(vp, 0, td);
3135	if (td->td_ucred->cr_uid != vattr.va_uid) {
3136		error = suser_cred(td->td_ucred, PRISON_ROOT);
3137		if (error)
3138			goto out;
3139	}
3140	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3141		goto out;
3142	if (vcount(vp) > 1)
3143		VOP_REVOKE(vp, REVOKEALL);
3144	vn_finished_write(mp);
3145out:
3146	vrele(vp);
3147	return (error);
3148}
3149
3150/*
3151 * Convert a user file descriptor to a kernel file entry.
3152 * The file entry is locked upon returning.
3153 */
3154int
3155getvnode(fdp, fd, fpp)
3156	struct filedesc *fdp;
3157	int fd;
3158	struct file **fpp;
3159{
3160	int error;
3161	struct file *fp;
3162
3163	fp = NULL;
3164	if (fdp == NULL)
3165		error = EBADF;
3166	else {
3167		FILEDESC_LOCK(fdp);
3168		if ((u_int)fd >= fdp->fd_nfiles ||
3169		    (fp = fdp->fd_ofiles[fd]) == NULL)
3170			error = EBADF;
3171		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3172			fp = NULL;
3173			error = EINVAL;
3174		} else {
3175			fhold(fp);
3176			error = 0;
3177		}
3178		FILEDESC_UNLOCK(fdp);
3179	}
3180	*fpp = fp;
3181	return (error);
3182}
3183/*
3184 * Get (NFS) file handle
3185 */
3186#ifndef _SYS_SYSPROTO_H_
3187struct getfh_args {
3188	char	*fname;
3189	fhandle_t *fhp;
3190};
3191#endif
3192int
3193getfh(td, uap)
3194	struct thread *td;
3195	register struct getfh_args *uap;
3196{
3197	struct nameidata nd;
3198	fhandle_t fh;
3199	register struct vnode *vp;
3200	int error;
3201
3202	/*
3203	 * Must be super user
3204	 */
3205	error = suser(td);
3206	if (error)
3207		return (error);
3208	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3209	error = namei(&nd);
3210	if (error)
3211		return (error);
3212	NDFREE(&nd, NDF_ONLY_PNBUF);
3213	vp = nd.ni_vp;
3214	bzero(&fh, sizeof(fh));
3215	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3216	error = VFS_VPTOFH(vp, &fh.fh_fid);
3217	vput(vp);
3218	if (error)
3219		return (error);
3220	error = copyout(&fh, uap->fhp, sizeof (fh));
3221	return (error);
3222}
3223
3224/*
3225 * syscall for the rpc.lockd to use to translate a NFS file handle into
3226 * an open descriptor.
3227 *
3228 * warning: do not remove the suser() call or this becomes one giant
3229 * security hole.
3230 */
3231#ifndef _SYS_SYSPROTO_H_
3232struct fhopen_args {
3233	const struct fhandle *u_fhp;
3234	int flags;
3235};
3236#endif
3237int
3238fhopen(td, uap)
3239	struct thread *td;
3240	struct fhopen_args /* {
3241		syscallarg(const struct fhandle *) u_fhp;
3242		syscallarg(int) flags;
3243	} */ *uap;
3244{
3245	struct proc *p = td->td_proc;
3246	struct mount *mp;
3247	struct vnode *vp;
3248	struct fhandle fhp;
3249	struct vattr vat;
3250	struct vattr *vap = &vat;
3251	struct flock lf;
3252	struct file *fp;
3253	register struct filedesc *fdp = p->p_fd;
3254	int fmode, mode, error, type;
3255	struct file *nfp;
3256	int indx;
3257
3258	/*
3259	 * Must be super user
3260	 */
3261	error = suser(td);
3262	if (error)
3263		return (error);
3264
3265	fmode = FFLAGS(SCARG(uap, flags));
3266	/* why not allow a non-read/write open for our lockd? */
3267	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3268		return (EINVAL);
3269	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
3270	if (error)
3271		return(error);
3272	/* find the mount point */
3273	mp = vfs_getvfs(&fhp.fh_fsid);
3274	if (mp == NULL)
3275		return (ESTALE);
3276	/* now give me my vnode, it gets returned to me locked */
3277	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3278	if (error)
3279		return (error);
3280 	/*
3281	 * from now on we have to make sure not
3282	 * to forget about the vnode
3283	 * any error that causes an abort must vput(vp)
3284	 * just set error = err and 'goto bad;'.
3285	 */
3286
3287	/*
3288	 * from vn_open
3289	 */
3290	if (vp->v_type == VLNK) {
3291		error = EMLINK;
3292		goto bad;
3293	}
3294	if (vp->v_type == VSOCK) {
3295		error = EOPNOTSUPP;
3296		goto bad;
3297	}
3298	mode = 0;
3299	if (fmode & (FWRITE | O_TRUNC)) {
3300		if (vp->v_type == VDIR) {
3301			error = EISDIR;
3302			goto bad;
3303		}
3304		error = vn_writechk(vp);
3305		if (error)
3306			goto bad;
3307		mode |= VWRITE;
3308	}
3309	if (fmode & FREAD)
3310		mode |= VREAD;
3311	if (fmode & O_APPEND)
3312		mode |= VAPPEND;
3313#ifdef MAC
3314	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3315	if (error)
3316		goto bad;
3317#endif
3318	if (mode) {
3319		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3320		if (error)
3321			goto bad;
3322	}
3323	if (fmode & O_TRUNC) {
3324		VOP_UNLOCK(vp, 0, td);				/* XXX */
3325		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3326			vrele(vp);
3327			return (error);
3328		}
3329		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3330		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3331#ifdef MAC
3332		error = mac_check_vnode_op(td->td_ucred, vp,
3333		    MAC_OP_VNODE_WRITE);
3334		if (error == 0) {
3335#endif
3336			VATTR_NULL(vap);
3337			vap->va_size = 0;
3338			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3339#ifdef MAC
3340		}
3341#endif
3342		vn_finished_write(mp);
3343		if (error)
3344			goto bad;
3345	}
3346	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
3347	if (error)
3348		goto bad;
3349	/*
3350	 * Make sure that a VM object is created for VMIO support.
3351	 */
3352	if (vn_canvmio(vp) == TRUE) {
3353		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3354			goto bad;
3355	}
3356	if (fmode & FWRITE)
3357		vp->v_writecount++;
3358
3359	/*
3360	 * end of vn_open code
3361	 */
3362
3363	if ((error = falloc(td, &nfp, &indx)) != 0) {
3364		if (fmode & FWRITE)
3365			vp->v_writecount--;
3366		goto bad;
3367	}
3368	fp = nfp;
3369
3370	/*
3371	 * Hold an extra reference to avoid having fp ripped out
3372	 * from under us while we block in the lock op
3373	 */
3374	fhold(fp);
3375	nfp->f_data = vp;
3376	nfp->f_flag = fmode & FMASK;
3377	nfp->f_ops = &vnops;
3378	nfp->f_type = DTYPE_VNODE;
3379	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3380		lf.l_whence = SEEK_SET;
3381		lf.l_start = 0;
3382		lf.l_len = 0;
3383		if (fmode & O_EXLOCK)
3384			lf.l_type = F_WRLCK;
3385		else
3386			lf.l_type = F_RDLCK;
3387		type = F_FLOCK;
3388		if ((fmode & FNONBLOCK) == 0)
3389			type |= F_WAIT;
3390		VOP_UNLOCK(vp, 0, td);
3391		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3392			    type)) != 0) {
3393			/*
3394			 * The lock request failed.  Normally close the
3395			 * descriptor but handle the case where someone might
3396			 * have dup()d or close()d it when we weren't looking.
3397			 */
3398			FILEDESC_LOCK(fdp);
3399			if (fdp->fd_ofiles[indx] == fp) {
3400				fdp->fd_ofiles[indx] = NULL;
3401				FILEDESC_UNLOCK(fdp);
3402				fdrop(fp, td);
3403			} else
3404				FILEDESC_UNLOCK(fdp);
3405			/*
3406			 * release our private reference
3407			 */
3408			fdrop(fp, td);
3409			return(error);
3410		}
3411		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3412		fp->f_flag |= FHASLOCK;
3413	}
3414	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
3415		vfs_object_create(vp, td, td->td_ucred);
3416
3417	VOP_UNLOCK(vp, 0, td);
3418	fdrop(fp, td);
3419	td->td_retval[0] = indx;
3420	return (0);
3421
3422bad:
3423	vput(vp);
3424	return (error);
3425}
3426
3427/*
3428 * Stat an (NFS) file handle.
3429 */
3430#ifndef _SYS_SYSPROTO_H_
3431struct fhstat_args {
3432	struct fhandle *u_fhp;
3433	struct stat *sb;
3434};
3435#endif
3436int
3437fhstat(td, uap)
3438	struct thread *td;
3439	register struct fhstat_args /* {
3440		syscallarg(struct fhandle *) u_fhp;
3441		syscallarg(struct stat *) sb;
3442	} */ *uap;
3443{
3444	struct stat sb;
3445	fhandle_t fh;
3446	struct mount *mp;
3447	struct vnode *vp;
3448	int error;
3449
3450	/*
3451	 * Must be super user
3452	 */
3453	error = suser(td);
3454	if (error)
3455		return (error);
3456
3457	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
3458	if (error)
3459		return (error);
3460
3461	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3462		return (ESTALE);
3463	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3464		return (error);
3465	error = vn_stat(vp, &sb, td);
3466	vput(vp);
3467	if (error)
3468		return (error);
3469	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
3470	return (error);
3471}
3472
3473/*
3474 * Implement fstatfs() for (NFS) file handles.
3475 */
3476#ifndef _SYS_SYSPROTO_H_
3477struct fhstatfs_args {
3478	struct fhandle *u_fhp;
3479	struct statfs *buf;
3480};
3481#endif
3482int
3483fhstatfs(td, uap)
3484	struct thread *td;
3485	struct fhstatfs_args /* {
3486		syscallarg(struct fhandle) *u_fhp;
3487		syscallarg(struct statfs) *buf;
3488	} */ *uap;
3489{
3490	struct statfs *sp;
3491	struct mount *mp;
3492	struct vnode *vp;
3493	struct statfs sb;
3494	fhandle_t fh;
3495	int error;
3496
3497	/*
3498	 * Must be super user
3499	 */
3500	error = suser(td);
3501	if (error)
3502		return (error);
3503
3504	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
3505		return (error);
3506
3507	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
3508		return (ESTALE);
3509	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
3510		return (error);
3511	mp = vp->v_mount;
3512	sp = &mp->mnt_stat;
3513	vput(vp);
3514#ifdef MAC
3515	error = mac_check_mount_stat(td->td_ucred, mp);
3516	if (error)
3517		return (error);
3518#endif
3519	if ((error = VFS_STATFS(mp, sp, td)) != 0)
3520		return (error);
3521	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3522	if (suser(td)) {
3523		bcopy(sp, &sb, sizeof(sb));
3524		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
3525		sp = &sb;
3526	}
3527	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
3528}
3529
3530/*
3531 * Syscall to push extended attribute configuration information into the
3532 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
3533 * a command (int cmd), and attribute name and misc data.  For now, the
3534 * attribute name is left in userspace for consumption by the VFS_op.
3535 * It will probably be changed to be copied into sysspace by the
3536 * syscall in the future, once issues with various consumers of the
3537 * attribute code have raised their hands.
3538 *
3539 * Currently this is used only by UFS Extended Attributes.
3540 */
3541int
3542extattrctl(td, uap)
3543	struct thread *td;
3544	struct extattrctl_args /* {
3545		syscallarg(const char *) path;
3546		syscallarg(int) cmd;
3547		syscallarg(const char *) filename;
3548		syscallarg(int) attrnamespace;
3549		syscallarg(const char *) attrname;
3550	} */ *uap;
3551{
3552	struct vnode *filename_vp;
3553	struct nameidata nd;
3554	struct mount *mp, *mp_writable;
3555	char attrname[EXTATTR_MAXNAMELEN];
3556	int error;
3557
3558	/*
3559	 * uap->attrname is not always defined.  We check again later when we
3560	 * invoke the VFS call so as to pass in NULL there if needed.
3561	 */
3562	if (uap->attrname != NULL) {
3563		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
3564		    NULL);
3565		if (error)
3566			return (error);
3567	}
3568
3569	/*
3570	 * uap->filename is not always defined.  If it is, grab a vnode lock,
3571	 * which VFS_EXTATTRCTL() will later release.
3572	 */
3573	filename_vp = NULL;
3574	if (uap->filename != NULL) {
3575		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
3576		    uap->filename, td);
3577		if ((error = namei(&nd)) != 0)
3578			return (error);
3579		filename_vp = nd.ni_vp;
3580		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
3581	}
3582
3583	/* uap->path is always defined. */
3584	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3585	if ((error = namei(&nd)) != 0) {
3586		if (filename_vp != NULL)
3587			vput(filename_vp);
3588		return (error);
3589	}
3590	mp = nd.ni_vp->v_mount;
3591	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
3592	NDFREE(&nd, 0);
3593	if (error) {
3594		if (filename_vp != NULL)
3595			vput(filename_vp);
3596		return (error);
3597	}
3598
3599	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
3600	    uap->attrname != NULL ? attrname : NULL, td);
3601
3602	vn_finished_write(mp_writable);
3603	/*
3604	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
3605	 * filename_vp, so vrele it if it is defined.
3606	 */
3607	if (filename_vp != NULL)
3608		vrele(filename_vp);
3609
3610	return (error);
3611}
3612
3613/*-
3614 * Set a named extended attribute on a file or directory
3615 *
3616 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3617 *            kernelspace string pointer "attrname", userspace buffer
3618 *            pointer "data", buffer length "nbytes", thread "td".
3619 * Returns: 0 on success, an error number otherwise
3620 * Locks: none
3621 * References: vp must be a valid reference for the duration of the call
3622 */
3623static int
3624extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3625    void *data, size_t nbytes, struct thread *td)
3626{
3627	struct mount *mp;
3628	struct uio auio;
3629	struct iovec aiov;
3630	ssize_t cnt;
3631	int error;
3632
3633	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3634		return (error);
3635	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3636	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3637
3638	aiov.iov_base = data;
3639	aiov.iov_len = nbytes;
3640	auio.uio_iov = &aiov;
3641	auio.uio_iovcnt = 1;
3642	auio.uio_offset = 0;
3643	if (nbytes > INT_MAX) {
3644		error = EINVAL;
3645		goto done;
3646	}
3647	auio.uio_resid = nbytes;
3648	auio.uio_rw = UIO_WRITE;
3649	auio.uio_segflg = UIO_USERSPACE;
3650	auio.uio_td = td;
3651	cnt = nbytes;
3652
3653#ifdef MAC
3654	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3655	    attrname, &auio);
3656	if (error)
3657		goto done;
3658#endif
3659
3660	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
3661	    td->td_ucred, td);
3662	cnt -= auio.uio_resid;
3663	td->td_retval[0] = cnt;
3664
3665done:
3666	VOP_UNLOCK(vp, 0, td);
3667	vn_finished_write(mp);
3668	return (error);
3669}
3670
3671int
3672extattr_set_file(td, uap)
3673	struct thread *td;
3674	struct extattr_set_file_args /* {
3675		syscallarg(const char *) path;
3676		syscallarg(int) attrnamespace;
3677		syscallarg(const char *) attrname;
3678		syscallarg(void *) data;
3679		syscallarg(size_t) nbytes;
3680	} */ *uap;
3681{
3682	struct nameidata nd;
3683	char attrname[EXTATTR_MAXNAMELEN];
3684	int error;
3685
3686	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3687	if (error)
3688		return (error);
3689
3690	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3691	if ((error = namei(&nd)) != 0)
3692		return (error);
3693	NDFREE(&nd, NDF_ONLY_PNBUF);
3694
3695	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
3696	    uap->data, uap->nbytes, td);
3697
3698	vrele(nd.ni_vp);
3699	return (error);
3700}
3701
3702int
3703extattr_set_fd(td, uap)
3704	struct thread *td;
3705	struct extattr_set_fd_args /* {
3706		syscallarg(int) fd;
3707		syscallarg(int) attrnamespace;
3708		syscallarg(const char *) attrname;
3709		syscallarg(void *) data;
3710		syscallarg(size_t) nbytes;
3711	} */ *uap;
3712{
3713	struct file *fp;
3714	char attrname[EXTATTR_MAXNAMELEN];
3715	int error;
3716
3717	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3718	if (error)
3719		return (error);
3720
3721	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3722		return (error);
3723
3724	error = extattr_set_vp((struct vnode *)fp->f_data, uap->attrnamespace,
3725	    attrname, uap->data, uap->nbytes, td);
3726	fdrop(fp, td);
3727
3728	return (error);
3729}
3730
3731/*-
3732 * Get a named extended attribute on a file or directory
3733 *
3734 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3735 *            kernelspace string pointer "attrname", userspace buffer
3736 *            pointer "data", buffer length "nbytes", thread "td".
3737 * Returns: 0 on success, an error number otherwise
3738 * Locks: none
3739 * References: vp must be a valid reference for the duration of the call
3740 */
3741static int
3742extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3743    void *data, size_t nbytes, struct thread *td)
3744{
3745	struct uio auio, *auiop;
3746	struct iovec aiov;
3747	ssize_t cnt;
3748	size_t size, *sizep;
3749	int error;
3750
3751	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
3752	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3753
3754	/*
3755	 * Slightly unusual semantics: if the user provides a NULL data
3756	 * pointer, they don't want to receive the data, just the
3757	 * maximum read length.
3758	 */
3759	auiop = NULL;
3760	sizep = NULL;
3761	cnt = 0;
3762	if (data != NULL) {
3763		aiov.iov_base = data;
3764		aiov.iov_len = nbytes;
3765		auio.uio_iov = &aiov;
3766		auio.uio_offset = 0;
3767		if (nbytes > INT_MAX) {
3768			error = EINVAL;
3769			goto done;
3770		}
3771		auio.uio_resid = nbytes;
3772		auio.uio_rw = UIO_READ;
3773		auio.uio_segflg = UIO_USERSPACE;
3774		auio.uio_td = td;
3775		auiop = &auio;
3776		cnt = nbytes;
3777	} else
3778		sizep = &size;
3779
3780#ifdef MAC
3781	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
3782	    attrname, &auio);
3783	if (error)
3784		goto done;
3785#endif
3786
3787	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
3788	    td->td_ucred, td);
3789
3790	if (auiop != NULL) {
3791		cnt -= auio.uio_resid;
3792		td->td_retval[0] = cnt;
3793	} else
3794		td->td_retval[0] = size;
3795
3796done:
3797	VOP_UNLOCK(vp, 0, td);
3798	return (error);
3799}
3800
3801int
3802extattr_get_file(td, uap)
3803	struct thread *td;
3804	struct extattr_get_file_args /* {
3805		syscallarg(const char *) path;
3806		syscallarg(int) attrnamespace;
3807		syscallarg(const char *) attrname;
3808		syscallarg(void *) data;
3809		syscallarg(size_t) nbytes;
3810	} */ *uap;
3811{
3812	struct nameidata nd;
3813	char attrname[EXTATTR_MAXNAMELEN];
3814	int error;
3815
3816	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3817	if (error)
3818		return (error);
3819
3820	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3821	if ((error = namei(&nd)) != 0)
3822		return (error);
3823	NDFREE(&nd, NDF_ONLY_PNBUF);
3824
3825	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
3826	    uap->data, uap->nbytes, td);
3827
3828	vrele(nd.ni_vp);
3829	return (error);
3830}
3831
3832int
3833extattr_get_fd(td, uap)
3834	struct thread *td;
3835	struct extattr_get_fd_args /* {
3836		syscallarg(int) fd;
3837		syscallarg(int) attrnamespace;
3838		syscallarg(const char *) attrname;
3839		syscallarg(void *) data;
3840		syscallarg(size_t) nbytes;
3841	} */ *uap;
3842{
3843	struct file *fp;
3844	char attrname[EXTATTR_MAXNAMELEN];
3845	int error;
3846
3847	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3848	if (error)
3849		return (error);
3850
3851	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3852		return (error);
3853
3854	error = extattr_get_vp((struct vnode *)fp->f_data, uap->attrnamespace,
3855	    attrname, uap->data, uap->nbytes, td);
3856
3857	fdrop(fp, td);
3858	return (error);
3859}
3860
3861/*
3862 * extattr_delete_vp(): Delete a named extended attribute on a file or
3863 *                      directory
3864 *
3865 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
3866 *            kernelspace string pointer "attrname", proc "p"
3867 * Returns: 0 on success, an error number otherwise
3868 * Locks: none
3869 * References: vp must be a valid reference for the duration of the call
3870 */
3871static int
3872extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
3873    struct thread *td)
3874{
3875	struct mount *mp;
3876	int error;
3877
3878	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3879		return (error);
3880	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3881	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3882
3883#ifdef MAC
3884	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
3885	    attrname, NULL);
3886#endif
3887
3888	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
3889	    td);
3890
3891	VOP_UNLOCK(vp, 0, td);
3892	vn_finished_write(mp);
3893	return (error);
3894}
3895
3896int
3897extattr_delete_file(td, uap)
3898	struct thread *td;
3899	struct extattr_delete_file_args /* {
3900		syscallarg(const char *) path;
3901		syscallarg(int) attrnamespace;
3902		syscallarg(const char *) attrname;
3903	} */ *uap;
3904{
3905	struct nameidata nd;
3906	char attrname[EXTATTR_MAXNAMELEN];
3907	int error;
3908
3909	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3910	if (error)
3911		return(error);
3912
3913	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
3914	if ((error = namei(&nd)) != 0)
3915		return(error);
3916	NDFREE(&nd, NDF_ONLY_PNBUF);
3917
3918	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
3919
3920	vrele(nd.ni_vp);
3921	return(error);
3922}
3923
3924int
3925extattr_delete_fd(td, uap)
3926	struct thread *td;
3927	struct extattr_delete_fd_args /* {
3928		syscallarg(int) fd;
3929		syscallarg(int) attrnamespace;
3930		syscallarg(const char *) attrname;
3931	} */ *uap;
3932{
3933	struct file *fp;
3934	struct vnode *vp;
3935	char attrname[EXTATTR_MAXNAMELEN];
3936	int error;
3937
3938	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
3939	if (error)
3940		return (error);
3941
3942	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3943		return (error);
3944	vp = (struct vnode *)fp->f_data;
3945
3946	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
3947
3948	fdrop(fp, td);
3949	return (error);
3950}
3951