vfs_syscalls.c revision 128552
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/vfs_syscalls.c 128552 2004-04-22 15:40:27Z pjd $");
39
40#include "opt_compat.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/sysent.h>
48#include <sys/mac.h>
49#include <sys/malloc.h>
50#include <sys/mount.h>
51#include <sys/mutex.h>
52#include <sys/sysproto.h>
53#include <sys/namei.h>
54#include <sys/filedesc.h>
55#include <sys/kernel.h>
56#include <sys/fcntl.h>
57#include <sys/file.h>
58#include <sys/limits.h>
59#include <sys/linker.h>
60#include <sys/stat.h>
61#include <sys/sx.h>
62#include <sys/unistd.h>
63#include <sys/vnode.h>
64#include <sys/proc.h>
65#include <sys/dirent.h>
66#include <sys/extattr.h>
67#include <sys/jail.h>
68#include <sys/syscallsubr.h>
69#include <sys/sysctl.h>
70
71#include <machine/stdarg.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/uma.h>
77
78static int chroot_refuse_vdir_fds(struct filedesc *fdp);
79static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
80static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
81static int setfmode(struct thread *td, struct vnode *, int);
82static int setfflags(struct thread *td, struct vnode *, int);
83static int setutimes(struct thread *td, struct vnode *,
84    const struct timespec *, int, int);
85static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
86    struct thread *td);
87
88static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
89    size_t nbytes, struct thread *td);
90
91int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
92int (*softdep_fsync_hook)(struct vnode *);
93
94/*
95 * The module initialization routine for POSIX asynchronous I/O will
96 * set this to the version of AIO that it implements.  (Zero means
97 * that it is not implemented.)  This value is used here by pathconf()
98 * and in kern_descrip.c by fpathconf().
99 */
100int async_io_version;
101
102/*
103 * Sync each mounted filesystem.
104 */
105#ifndef _SYS_SYSPROTO_H_
106struct sync_args {
107	int     dummy;
108};
109#endif
110
111#ifdef DEBUG
112static int syncprt = 0;
113SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
114#endif
115
116/* ARGSUSED */
117int
118sync(td, uap)
119	struct thread *td;
120	struct sync_args *uap;
121{
122	struct mount *mp, *nmp;
123	int asyncflag;
124
125	mtx_lock(&mountlist_mtx);
126	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
127		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
128			nmp = TAILQ_NEXT(mp, mnt_list);
129			continue;
130		}
131		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
132		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
133			asyncflag = mp->mnt_flag & MNT_ASYNC;
134			mp->mnt_flag &= ~MNT_ASYNC;
135			vfs_msync(mp, MNT_NOWAIT);
136			VFS_SYNC(mp, MNT_NOWAIT,
137			    ((td != NULL) ? td->td_ucred : NOCRED), td);
138			mp->mnt_flag |= asyncflag;
139			vn_finished_write(mp);
140		}
141		mtx_lock(&mountlist_mtx);
142		nmp = TAILQ_NEXT(mp, mnt_list);
143		vfs_unbusy(mp, td);
144	}
145	mtx_unlock(&mountlist_mtx);
146#if 0
147/*
148 * XXX don't call vfs_bufstats() yet because that routine
149 * was not imported in the Lite2 merge.
150 */
151#ifdef DIAGNOSTIC
152	if (syncprt)
153		vfs_bufstats();
154#endif /* DIAGNOSTIC */
155#endif
156	return (0);
157}
158
159/* XXX PRISON: could be per prison flag */
160static int prison_quotas;
161#if 0
162SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
163#endif
164
165/*
166 * Change filesystem quotas.
167 */
168#ifndef _SYS_SYSPROTO_H_
169struct quotactl_args {
170	char *path;
171	int cmd;
172	int uid;
173	caddr_t arg;
174};
175#endif
176int
177quotactl(td, uap)
178	struct thread *td;
179	register struct quotactl_args /* {
180		char *path;
181		int cmd;
182		int uid;
183		caddr_t arg;
184	} */ *uap;
185{
186	struct mount *mp;
187	int error;
188	struct nameidata nd;
189
190	if (jailed(td->td_ucred) && !prison_quotas)
191		return (EPERM);
192	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
193	if ((error = namei(&nd)) != 0)
194		return (error);
195	NDFREE(&nd, NDF_ONLY_PNBUF);
196	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
197	vrele(nd.ni_vp);
198	if (error)
199		return (error);
200	if (mp == NULL)
201		return (EOPNOTSUPP);
202	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203	vn_finished_write(mp);
204	return (error);
205}
206
207/*
208 * Get filesystem statistics.
209 */
210#ifndef _SYS_SYSPROTO_H_
211struct statfs_args {
212	char *path;
213	struct statfs *buf;
214};
215#endif
216int
217statfs(td, uap)
218	struct thread *td;
219	register struct statfs_args /* {
220		char *path;
221		struct statfs *buf;
222	} */ *uap;
223{
224	struct mount *mp;
225	struct statfs *sp, sb;
226	int error;
227	struct nameidata nd;
228
229	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
230	if ((error = namei(&nd)) != 0)
231		return (error);
232	mp = nd.ni_vp->v_mount;
233	sp = &mp->mnt_stat;
234	NDFREE(&nd, NDF_ONLY_PNBUF);
235	vrele(nd.ni_vp);
236#ifdef MAC
237	error = mac_check_mount_stat(td->td_ucred, mp);
238	if (error)
239		return (error);
240#endif
241	/*
242	 * Set these in case the underlying filesystem fails to do so.
243	 */
244	sp->f_version = STATFS_VERSION;
245	sp->f_namemax = NAME_MAX;
246	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
247	error = VFS_STATFS(mp, sp, td);
248	if (error)
249		return (error);
250	if (suser(td)) {
251		bcopy(sp, &sb, sizeof(sb));
252		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
253		sp = &sb;
254	}
255	return (copyout(sp, uap->buf, sizeof(*sp)));
256}
257
258/*
259 * Get filesystem statistics.
260 */
261#ifndef _SYS_SYSPROTO_H_
262struct fstatfs_args {
263	int fd;
264	struct statfs *buf;
265};
266#endif
267int
268fstatfs(td, uap)
269	struct thread *td;
270	register struct fstatfs_args /* {
271		int fd;
272		struct statfs *buf;
273	} */ *uap;
274{
275	struct file *fp;
276	struct mount *mp;
277	struct statfs *sp, sb;
278	int error;
279
280	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
281		return (error);
282	mp = fp->f_vnode->v_mount;
283	fdrop(fp, td);
284	if (mp == NULL)
285		return (EBADF);
286#ifdef MAC
287	error = mac_check_mount_stat(td->td_ucred, mp);
288	if (error)
289		return (error);
290#endif
291	sp = &mp->mnt_stat;
292	/*
293	 * Set these in case the underlying filesystem fails to do so.
294	 */
295	sp->f_version = STATFS_VERSION;
296	sp->f_namemax = NAME_MAX;
297	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
298	error = VFS_STATFS(mp, sp, td);
299	if (error)
300		return (error);
301	if (suser(td)) {
302		bcopy(sp, &sb, sizeof(sb));
303		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
304		sp = &sb;
305	}
306	return (copyout(sp, uap->buf, sizeof(*sp)));
307}
308
309/*
310 * Get statistics on all filesystems.
311 */
312#ifndef _SYS_SYSPROTO_H_
313struct getfsstat_args {
314	struct statfs *buf;
315	long bufsize;
316	int flags;
317};
318#endif
319int
320getfsstat(td, uap)
321	struct thread *td;
322	register struct getfsstat_args /* {
323		struct statfs *buf;
324		long bufsize;
325		int flags;
326	} */ *uap;
327{
328	struct mount *mp, *nmp;
329	struct statfs *sp, sb;
330	caddr_t sfsp;
331	long count, maxcount, error;
332
333	maxcount = uap->bufsize / sizeof(struct statfs);
334	sfsp = (caddr_t)uap->buf;
335	count = 0;
336	mtx_lock(&mountlist_mtx);
337	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
338		if (!prison_check_mount(td->td_ucred, mp)) {
339			nmp = TAILQ_NEXT(mp, mnt_list);
340			continue;
341		}
342#ifdef MAC
343		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
344			nmp = TAILQ_NEXT(mp, mnt_list);
345			continue;
346		}
347#endif
348		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
349			nmp = TAILQ_NEXT(mp, mnt_list);
350			continue;
351		}
352		if (sfsp && count < maxcount) {
353			sp = &mp->mnt_stat;
354			/*
355			 * Set these in case the underlying filesystem
356			 * fails to do so.
357			 */
358			sp->f_version = STATFS_VERSION;
359			sp->f_namemax = NAME_MAX;
360			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
361			/*
362			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
363			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
364			 * overrides MNT_WAIT.
365			 */
366			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
367			    (uap->flags & MNT_WAIT)) &&
368			    (error = VFS_STATFS(mp, sp, td))) {
369				mtx_lock(&mountlist_mtx);
370				nmp = TAILQ_NEXT(mp, mnt_list);
371				vfs_unbusy(mp, td);
372				continue;
373			}
374			if (suser(td)) {
375				bcopy(sp, &sb, sizeof(sb));
376				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
377				sp = &sb;
378			}
379			error = copyout(sp, sfsp, sizeof(*sp));
380			if (error) {
381				vfs_unbusy(mp, td);
382				return (error);
383			}
384			sfsp += sizeof(*sp);
385		}
386		count++;
387		mtx_lock(&mountlist_mtx);
388		nmp = TAILQ_NEXT(mp, mnt_list);
389		vfs_unbusy(mp, td);
390	}
391	mtx_unlock(&mountlist_mtx);
392	if (sfsp && count > maxcount)
393		td->td_retval[0] = maxcount;
394	else
395		td->td_retval[0] = count;
396	return (0);
397}
398
399#ifdef COMPAT_FREEBSD4
400/*
401 * Get old format filesystem statistics.
402 */
403static void cvtstatfs(struct thread *, struct statfs *, struct ostatfs *);
404
405#ifndef _SYS_SYSPROTO_H_
406struct freebsd4_statfs_args {
407	char *path;
408	struct ostatfs *buf;
409};
410#endif
411int
412freebsd4_statfs(td, uap)
413	struct thread *td;
414	struct freebsd4_statfs_args /* {
415		char *path;
416		struct ostatfs *buf;
417	} */ *uap;
418{
419	struct mount *mp;
420	struct statfs *sp;
421	struct ostatfs osb;
422	int error;
423	struct nameidata nd;
424
425	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
426	if ((error = namei(&nd)) != 0)
427		return (error);
428	mp = nd.ni_vp->v_mount;
429	sp = &mp->mnt_stat;
430	NDFREE(&nd, NDF_ONLY_PNBUF);
431	vrele(nd.ni_vp);
432#ifdef MAC
433	error = mac_check_mount_stat(td->td_ucred, mp);
434	if (error)
435		return (error);
436#endif
437	error = VFS_STATFS(mp, sp, td);
438	if (error)
439		return (error);
440	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
441	cvtstatfs(td, sp, &osb);
442	return (copyout(&osb, uap->buf, sizeof(osb)));
443}
444
445/*
446 * Get filesystem statistics.
447 */
448#ifndef _SYS_SYSPROTO_H_
449struct freebsd4_fstatfs_args {
450	int fd;
451	struct ostatfs *buf;
452};
453#endif
454int
455freebsd4_fstatfs(td, uap)
456	struct thread *td;
457	struct freebsd4_fstatfs_args /* {
458		int fd;
459		struct ostatfs *buf;
460	} */ *uap;
461{
462	struct file *fp;
463	struct mount *mp;
464	struct statfs *sp;
465	struct ostatfs osb;
466	int error;
467
468	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
469		return (error);
470	mp = fp->f_vnode->v_mount;
471	fdrop(fp, td);
472	if (mp == NULL)
473		return (EBADF);
474#ifdef MAC
475	error = mac_check_mount_stat(td->td_ucred, mp);
476	if (error)
477		return (error);
478#endif
479	sp = &mp->mnt_stat;
480	error = VFS_STATFS(mp, sp, td);
481	if (error)
482		return (error);
483	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
484	cvtstatfs(td, sp, &osb);
485	return (copyout(&osb, uap->buf, sizeof(osb)));
486}
487
488/*
489 * Get statistics on all filesystems.
490 */
491#ifndef _SYS_SYSPROTO_H_
492struct freebsd4_getfsstat_args {
493	struct ostatfs *buf;
494	long bufsize;
495	int flags;
496};
497#endif
498int
499freebsd4_getfsstat(td, uap)
500	struct thread *td;
501	register struct freebsd4_getfsstat_args /* {
502		struct ostatfs *buf;
503		long bufsize;
504		int flags;
505	} */ *uap;
506{
507	struct mount *mp, *nmp;
508	struct statfs *sp;
509	struct ostatfs osb;
510	caddr_t sfsp;
511	long count, maxcount, error;
512
513	maxcount = uap->bufsize / sizeof(struct ostatfs);
514	sfsp = (caddr_t)uap->buf;
515	count = 0;
516	mtx_lock(&mountlist_mtx);
517	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
518		if (!prison_check_mount(td->td_ucred, mp)) {
519			nmp = TAILQ_NEXT(mp, mnt_list);
520			continue;
521		}
522#ifdef MAC
523		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
524			nmp = TAILQ_NEXT(mp, mnt_list);
525			continue;
526		}
527#endif
528		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
529			nmp = TAILQ_NEXT(mp, mnt_list);
530			continue;
531		}
532		if (sfsp && count < maxcount) {
533			sp = &mp->mnt_stat;
534			/*
535			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
536			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
537			 * overrides MNT_WAIT.
538			 */
539			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
540			    (uap->flags & MNT_WAIT)) &&
541			    (error = VFS_STATFS(mp, sp, td))) {
542				mtx_lock(&mountlist_mtx);
543				nmp = TAILQ_NEXT(mp, mnt_list);
544				vfs_unbusy(mp, td);
545				continue;
546			}
547			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
548			cvtstatfs(td, sp, &osb);
549			error = copyout(&osb, sfsp, sizeof(osb));
550			if (error) {
551				vfs_unbusy(mp, td);
552				return (error);
553			}
554			sfsp += sizeof(osb);
555		}
556		count++;
557		mtx_lock(&mountlist_mtx);
558		nmp = TAILQ_NEXT(mp, mnt_list);
559		vfs_unbusy(mp, td);
560	}
561	mtx_unlock(&mountlist_mtx);
562	if (sfsp && count > maxcount)
563		td->td_retval[0] = maxcount;
564	else
565		td->td_retval[0] = count;
566	return (0);
567}
568
569/*
570 * Implement fstatfs() for (NFS) file handles.
571 */
572#ifndef _SYS_SYSPROTO_H_
573struct freebsd4_fhstatfs_args {
574	struct fhandle *u_fhp;
575	struct ostatfs *buf;
576};
577#endif
578int
579freebsd4_fhstatfs(td, uap)
580	struct thread *td;
581	struct freebsd4_fhstatfs_args /* {
582		struct fhandle *u_fhp;
583		struct ostatfs *buf;
584	} */ *uap;
585{
586	struct statfs *sp;
587	struct mount *mp;
588	struct vnode *vp;
589	struct ostatfs osb;
590	fhandle_t fh;
591	int error;
592
593	error = suser(td);
594	if (error)
595		return (error);
596	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
597		return (error);
598	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
599		return (ESTALE);
600	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
601		return (error);
602	mp = vp->v_mount;
603	sp = &mp->mnt_stat;
604	vput(vp);
605#ifdef MAC
606	error = mac_check_mount_stat(td->td_ucred, mp);
607	if (error)
608		return (error);
609#endif
610	if ((error = VFS_STATFS(mp, sp, td)) != 0)
611		return (error);
612	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
613	cvtstatfs(td, sp, &osb);
614	return (copyout(&osb, uap->buf, sizeof(osb)));
615}
616
617/*
618 * Convert a new format statfs structure to an old format statfs structure.
619 */
620static void
621cvtstatfs(td, nsp, osp)
622	struct thread *td;
623	struct statfs *nsp;
624	struct ostatfs *osp;
625{
626
627	bzero(osp, sizeof(*osp));
628	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
629	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
630	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
631	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
632	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
633	osp->f_files = MIN(nsp->f_files, LONG_MAX);
634	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
635	osp->f_owner = nsp->f_owner;
636	osp->f_type = nsp->f_type;
637	osp->f_flags = nsp->f_flags;
638	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
639	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
640	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
641	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
642	bcopy(nsp->f_fstypename, osp->f_fstypename,
643	    MIN(MFSNAMELEN, OMNAMELEN));
644	bcopy(nsp->f_mntonname, osp->f_mntonname,
645	    MIN(MFSNAMELEN, OMNAMELEN));
646	bcopy(nsp->f_mntfromname, osp->f_mntfromname,
647	    MIN(MFSNAMELEN, OMNAMELEN));
648	if (suser(td)) {
649		osp->f_fsid.val[0] = osp->f_fsid.val[1] = 0;
650	} else {
651		osp->f_fsid = nsp->f_fsid;
652	}
653}
654#endif /* COMPAT_FREEBSD4 */
655
656/*
657 * Change current working directory to a given file descriptor.
658 */
659#ifndef _SYS_SYSPROTO_H_
660struct fchdir_args {
661	int	fd;
662};
663#endif
664int
665fchdir(td, uap)
666	struct thread *td;
667	struct fchdir_args /* {
668		int fd;
669	} */ *uap;
670{
671	register struct filedesc *fdp = td->td_proc->p_fd;
672	struct vnode *vp, *tdp, *vpold;
673	struct mount *mp;
674	struct file *fp;
675	int error;
676
677	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
678		return (error);
679	vp = fp->f_vnode;
680	VREF(vp);
681	fdrop(fp, td);
682	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
683	if (vp->v_type != VDIR)
684		error = ENOTDIR;
685#ifdef MAC
686	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
687	}
688#endif
689	else
690		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
691	while (!error && (mp = vp->v_mountedhere) != NULL) {
692		if (vfs_busy(mp, 0, 0, td))
693			continue;
694		error = VFS_ROOT(mp, &tdp);
695		vfs_unbusy(mp, td);
696		if (error)
697			break;
698		vput(vp);
699		vp = tdp;
700	}
701	if (error) {
702		vput(vp);
703		return (error);
704	}
705	VOP_UNLOCK(vp, 0, td);
706	FILEDESC_LOCK(fdp);
707	vpold = fdp->fd_cdir;
708	fdp->fd_cdir = vp;
709	FILEDESC_UNLOCK(fdp);
710	vrele(vpold);
711	return (0);
712}
713
714/*
715 * Change current working directory (``.'').
716 */
717#ifndef _SYS_SYSPROTO_H_
718struct chdir_args {
719	char	*path;
720};
721#endif
722int
723chdir(td, uap)
724	struct thread *td;
725	struct chdir_args /* {
726		char *path;
727	} */ *uap;
728{
729
730	return (kern_chdir(td, uap->path, UIO_USERSPACE));
731}
732
733int
734kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
735{
736	register struct filedesc *fdp = td->td_proc->p_fd;
737	int error;
738	struct nameidata nd;
739	struct vnode *vp;
740
741	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
742	if ((error = namei(&nd)) != 0)
743		return (error);
744	if ((error = change_dir(nd.ni_vp, td)) != 0) {
745		vput(nd.ni_vp);
746		NDFREE(&nd, NDF_ONLY_PNBUF);
747		return (error);
748	}
749	VOP_UNLOCK(nd.ni_vp, 0, td);
750	NDFREE(&nd, NDF_ONLY_PNBUF);
751	FILEDESC_LOCK(fdp);
752	vp = fdp->fd_cdir;
753	fdp->fd_cdir = nd.ni_vp;
754	FILEDESC_UNLOCK(fdp);
755	vrele(vp);
756	return (0);
757}
758
759/*
760 * Helper function for raised chroot(2) security function:  Refuse if
761 * any filedescriptors are open directories.
762 */
763static int
764chroot_refuse_vdir_fds(fdp)
765	struct filedesc *fdp;
766{
767	struct vnode *vp;
768	struct file *fp;
769	int fd;
770
771	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
772	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
773		fp = fget_locked(fdp, fd);
774		if (fp == NULL)
775			continue;
776		if (fp->f_type == DTYPE_VNODE) {
777			vp = fp->f_vnode;
778			if (vp->v_type == VDIR)
779				return (EPERM);
780		}
781	}
782	return (0);
783}
784
785/*
786 * This sysctl determines if we will allow a process to chroot(2) if it
787 * has a directory open:
788 *	0: disallowed for all processes.
789 *	1: allowed for processes that were not already chroot(2)'ed.
790 *	2: allowed for all processes.
791 */
792
793static int chroot_allow_open_directories = 1;
794
795SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
796     &chroot_allow_open_directories, 0, "");
797
798/*
799 * Change notion of root (``/'') directory.
800 */
801#ifndef _SYS_SYSPROTO_H_
802struct chroot_args {
803	char	*path;
804};
805#endif
806int
807chroot(td, uap)
808	struct thread *td;
809	struct chroot_args /* {
810		char *path;
811	} */ *uap;
812{
813	int error;
814	struct nameidata nd;
815
816	error = suser_cred(td->td_ucred, PRISON_ROOT);
817	if (error)
818		return (error);
819	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
820	mtx_lock(&Giant);
821	error = namei(&nd);
822	if (error)
823		goto error;
824	if ((error = change_dir(nd.ni_vp, td)) != 0)
825		goto e_vunlock;
826#ifdef MAC
827	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
828		goto e_vunlock;
829#endif
830	VOP_UNLOCK(nd.ni_vp, 0, td);
831	error = change_root(nd.ni_vp, td);
832	vrele(nd.ni_vp);
833	NDFREE(&nd, NDF_ONLY_PNBUF);
834	mtx_unlock(&Giant);
835	return (error);
836e_vunlock:
837	vput(nd.ni_vp);
838error:
839	mtx_unlock(&Giant);
840	NDFREE(&nd, NDF_ONLY_PNBUF);
841	return (error);
842}
843
844/*
845 * Common routine for chroot and chdir.  Callers must provide a locked vnode
846 * instance.
847 */
848int
849change_dir(vp, td)
850	struct vnode *vp;
851	struct thread *td;
852{
853	int error;
854
855	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
856	if (vp->v_type != VDIR)
857		return (ENOTDIR);
858#ifdef MAC
859	error = mac_check_vnode_chdir(td->td_ucred, vp);
860	if (error)
861		return (error);
862#endif
863	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
864	return (error);
865}
866
867/*
868 * Common routine for kern_chroot() and jail_attach().  The caller is
869 * responsible for invoking suser() and mac_check_chroot() to authorize this
870 * operation.
871 */
872int
873change_root(vp, td)
874	struct vnode *vp;
875	struct thread *td;
876{
877	struct filedesc *fdp;
878	struct vnode *oldvp;
879	int error;
880
881	mtx_assert(&Giant, MA_OWNED);
882	fdp = td->td_proc->p_fd;
883	FILEDESC_LOCK(fdp);
884	if (chroot_allow_open_directories == 0 ||
885	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
886		error = chroot_refuse_vdir_fds(fdp);
887		if (error) {
888			FILEDESC_UNLOCK(fdp);
889			return (error);
890		}
891	}
892	oldvp = fdp->fd_rdir;
893	fdp->fd_rdir = vp;
894	VREF(fdp->fd_rdir);
895	if (!fdp->fd_jdir) {
896		fdp->fd_jdir = vp;
897		VREF(fdp->fd_jdir);
898	}
899	FILEDESC_UNLOCK(fdp);
900	vrele(oldvp);
901	return (0);
902}
903
904/*
905 * Check permissions, allocate an open file structure,
906 * and call the device open routine if any.
907 *
908 * MP SAFE
909 */
910#ifndef _SYS_SYSPROTO_H_
911struct open_args {
912	char	*path;
913	int	flags;
914	int	mode;
915};
916#endif
917int
918open(td, uap)
919	struct thread *td;
920	register struct open_args /* {
921		char *path;
922		int flags;
923		int mode;
924	} */ *uap;
925{
926
927	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
928}
929
930int
931kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
932    int mode)
933{
934	struct proc *p = td->td_proc;
935	struct filedesc *fdp = p->p_fd;
936	struct file *fp;
937	struct vnode *vp;
938	struct vattr vat;
939	struct mount *mp;
940	int cmode;
941	struct file *nfp;
942	int type, indx, error;
943	struct flock lf;
944	struct nameidata nd;
945
946	if ((flags & O_ACCMODE) == O_ACCMODE)
947		return (EINVAL);
948	flags = FFLAGS(flags);
949	error = falloc(td, &nfp, &indx);
950	if (error)
951		return (error);
952	/* An extra reference on `nfp' has been held for us by falloc(). */
953	fp = nfp;
954	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
955	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
956	td->td_dupfd = -1;		/* XXX check for fdopen */
957	mtx_lock(&Giant);
958	error = vn_open(&nd, &flags, cmode, indx);
959	if (error) {
960		mtx_unlock(&Giant);
961
962		/*
963		 * If the vn_open replaced the method vector, something
964		 * wonderous happened deep below and we just pass it up
965		 * pretending we know what we do.
966		 */
967		if (error == ENXIO && fp->f_ops != &badfileops) {
968			fdrop(fp, td);
969			td->td_retval[0] = indx;
970			return (0);
971		}
972
973		/*
974		 * release our own reference
975		 */
976		fdrop(fp, td);
977
978		/*
979		 * handle special fdopen() case.  bleh.  dupfdopen() is
980		 * responsible for dropping the old contents of ofiles[indx]
981		 * if it succeeds.
982		 */
983		if ((error == ENODEV || error == ENXIO) &&
984		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
985		    (error =
986			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
987			td->td_retval[0] = indx;
988			return (0);
989		}
990		/*
991		 * Clean up the descriptor, but only if another thread hadn't
992		 * replaced or closed it.
993		 */
994		FILEDESC_LOCK(fdp);
995		if (fdp->fd_ofiles[indx] == fp) {
996			fdp->fd_ofiles[indx] = NULL;
997			fdunused(fdp, indx);
998			FILEDESC_UNLOCK(fdp);
999			fdrop(fp, td);
1000		} else {
1001			FILEDESC_UNLOCK(fdp);
1002		}
1003
1004		if (error == ERESTART)
1005			error = EINTR;
1006		return (error);
1007	}
1008	td->td_dupfd = 0;
1009	NDFREE(&nd, NDF_ONLY_PNBUF);
1010	vp = nd.ni_vp;
1011
1012	/*
1013	 * There should be 2 references on the file, one from the descriptor
1014	 * table, and one for us.
1015	 *
1016	 * Handle the case where someone closed the file (via its file
1017	 * descriptor) while we were blocked.  The end result should look
1018	 * like opening the file succeeded but it was immediately closed.
1019	 * We call vn_close() manually because we haven't yet hooked up
1020	 * the various 'struct file' fields.
1021	 */
1022	FILEDESC_LOCK(fdp);
1023	FILE_LOCK(fp);
1024	if (fp->f_count == 1) {
1025		KASSERT(fdp->fd_ofiles[indx] != fp,
1026		    ("Open file descriptor lost all refs"));
1027		FILEDESC_UNLOCK(fdp);
1028		FILE_UNLOCK(fp);
1029		VOP_UNLOCK(vp, 0, td);
1030		vn_close(vp, flags & FMASK, fp->f_cred, td);
1031		mtx_unlock(&Giant);
1032		fdrop(fp, td);
1033		td->td_retval[0] = indx;
1034		return (0);
1035	}
1036	fp->f_vnode = vp;
1037	fp->f_data = vp;
1038	fp->f_flag = flags & FMASK;
1039	fp->f_ops = &vnops;
1040	fp->f_seqcount = 1;
1041	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1042	FILEDESC_UNLOCK(fdp);
1043	FILE_UNLOCK(fp);
1044
1045	/* assert that vn_open created a backing object if one is needed */
1046	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1047		("open: vmio vnode has no backing object after vn_open"));
1048
1049	VOP_UNLOCK(vp, 0, td);
1050	if (flags & (O_EXLOCK | O_SHLOCK)) {
1051		lf.l_whence = SEEK_SET;
1052		lf.l_start = 0;
1053		lf.l_len = 0;
1054		if (flags & O_EXLOCK)
1055			lf.l_type = F_WRLCK;
1056		else
1057			lf.l_type = F_RDLCK;
1058		type = F_FLOCK;
1059		if ((flags & FNONBLOCK) == 0)
1060			type |= F_WAIT;
1061		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1062			    type)) != 0)
1063			goto bad;
1064		fp->f_flag |= FHASLOCK;
1065	}
1066	if (flags & O_TRUNC) {
1067		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1068			goto bad;
1069		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1070		VATTR_NULL(&vat);
1071		vat.va_size = 0;
1072		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1073#ifdef MAC
1074		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1075		if (error == 0)
1076#endif
1077			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1078		VOP_UNLOCK(vp, 0, td);
1079		vn_finished_write(mp);
1080		if (error)
1081			goto bad;
1082	}
1083	mtx_unlock(&Giant);
1084	/*
1085	 * Release our private reference, leaving the one associated with
1086	 * the descriptor table intact.
1087	 */
1088	fdrop(fp, td);
1089	td->td_retval[0] = indx;
1090	return (0);
1091bad:
1092	mtx_unlock(&Giant);
1093	FILEDESC_LOCK(fdp);
1094	if (fdp->fd_ofiles[indx] == fp) {
1095		fdp->fd_ofiles[indx] = NULL;
1096		fdunused(fdp, indx);
1097		FILEDESC_UNLOCK(fdp);
1098		fdrop(fp, td);
1099	} else {
1100		FILEDESC_UNLOCK(fdp);
1101	}
1102	fdrop(fp, td);
1103	return (error);
1104}
1105
1106#ifdef COMPAT_43
1107/*
1108 * Create a file.
1109 *
1110 * MP SAFE
1111 */
1112#ifndef _SYS_SYSPROTO_H_
1113struct ocreat_args {
1114	char	*path;
1115	int	mode;
1116};
1117#endif
1118int
1119ocreat(td, uap)
1120	struct thread *td;
1121	register struct ocreat_args /* {
1122		char *path;
1123		int mode;
1124	} */ *uap;
1125{
1126
1127	return (kern_open(td, uap->path, UIO_USERSPACE,
1128	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1129}
1130#endif /* COMPAT_43 */
1131
1132/*
1133 * Create a special file.
1134 */
1135#ifndef _SYS_SYSPROTO_H_
1136struct mknod_args {
1137	char	*path;
1138	int	mode;
1139	int	dev;
1140};
1141#endif
1142int
1143mknod(td, uap)
1144	struct thread *td;
1145	register struct mknod_args /* {
1146		char *path;
1147		int mode;
1148		int dev;
1149	} */ *uap;
1150{
1151
1152	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1153}
1154
1155int
1156kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1157    int dev)
1158{
1159	struct vnode *vp;
1160	struct mount *mp;
1161	struct vattr vattr;
1162	int error;
1163	int whiteout = 0;
1164	struct nameidata nd;
1165
1166	switch (mode & S_IFMT) {
1167	case S_IFCHR:
1168	case S_IFBLK:
1169		error = suser(td);
1170		break;
1171	default:
1172		error = suser_cred(td->td_ucred, PRISON_ROOT);
1173		break;
1174	}
1175	if (error)
1176		return (error);
1177restart:
1178	bwillwrite();
1179	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
1180	if ((error = namei(&nd)) != 0)
1181		return (error);
1182	vp = nd.ni_vp;
1183	if (vp != NULL) {
1184		NDFREE(&nd, NDF_ONLY_PNBUF);
1185		vrele(vp);
1186		if (vp == nd.ni_dvp)
1187			vrele(nd.ni_dvp);
1188		else
1189			vput(nd.ni_dvp);
1190		return (EEXIST);
1191	} else {
1192		VATTR_NULL(&vattr);
1193		FILEDESC_LOCK(td->td_proc->p_fd);
1194		vattr.va_mode = (mode & ALLPERMS) &
1195		    ~td->td_proc->p_fd->fd_cmask;
1196		FILEDESC_UNLOCK(td->td_proc->p_fd);
1197		vattr.va_rdev = dev;
1198		whiteout = 0;
1199
1200		switch (mode & S_IFMT) {
1201		case S_IFMT:	/* used by badsect to flag bad sectors */
1202			vattr.va_type = VBAD;
1203			break;
1204		case S_IFCHR:
1205			vattr.va_type = VCHR;
1206			break;
1207		case S_IFBLK:
1208			vattr.va_type = VBLK;
1209			break;
1210		case S_IFWHT:
1211			whiteout = 1;
1212			break;
1213		default:
1214			error = EINVAL;
1215			break;
1216		}
1217	}
1218	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1219		NDFREE(&nd, NDF_ONLY_PNBUF);
1220		vput(nd.ni_dvp);
1221		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1222			return (error);
1223		goto restart;
1224	}
1225#ifdef MAC
1226	if (error == 0 && !whiteout)
1227		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1228		    &nd.ni_cnd, &vattr);
1229#endif
1230	if (!error) {
1231		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1232		if (whiteout)
1233			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1234		else {
1235			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1236						&nd.ni_cnd, &vattr);
1237			if (error == 0)
1238				vput(nd.ni_vp);
1239		}
1240	}
1241	NDFREE(&nd, NDF_ONLY_PNBUF);
1242	vput(nd.ni_dvp);
1243	vn_finished_write(mp);
1244	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1245	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1246	return (error);
1247}
1248
1249/*
1250 * Create a named pipe.
1251 */
1252#ifndef _SYS_SYSPROTO_H_
1253struct mkfifo_args {
1254	char	*path;
1255	int	mode;
1256};
1257#endif
1258int
1259mkfifo(td, uap)
1260	struct thread *td;
1261	register struct mkfifo_args /* {
1262		char *path;
1263		int mode;
1264	} */ *uap;
1265{
1266
1267	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1268}
1269
1270int
1271kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1272{
1273	struct mount *mp;
1274	struct vattr vattr;
1275	int error;
1276	struct nameidata nd;
1277
1278restart:
1279	bwillwrite();
1280	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
1281	if ((error = namei(&nd)) != 0)
1282		return (error);
1283	if (nd.ni_vp != NULL) {
1284		NDFREE(&nd, NDF_ONLY_PNBUF);
1285		vrele(nd.ni_vp);
1286		if (nd.ni_vp == nd.ni_dvp)
1287			vrele(nd.ni_dvp);
1288		else
1289			vput(nd.ni_dvp);
1290		return (EEXIST);
1291	}
1292	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1293		NDFREE(&nd, NDF_ONLY_PNBUF);
1294		vput(nd.ni_dvp);
1295		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1296			return (error);
1297		goto restart;
1298	}
1299	VATTR_NULL(&vattr);
1300	vattr.va_type = VFIFO;
1301	FILEDESC_LOCK(td->td_proc->p_fd);
1302	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1303	FILEDESC_UNLOCK(td->td_proc->p_fd);
1304#ifdef MAC
1305	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1306	    &vattr);
1307	if (error)
1308		goto out;
1309#endif
1310	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1311	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1312	if (error == 0)
1313		vput(nd.ni_vp);
1314#ifdef MAC
1315out:
1316#endif
1317	NDFREE(&nd, NDF_ONLY_PNBUF);
1318	vput(nd.ni_dvp);
1319	vn_finished_write(mp);
1320	return (error);
1321}
1322
1323/*
1324 * Make a hard file link.
1325 */
1326#ifndef _SYS_SYSPROTO_H_
1327struct link_args {
1328	char	*path;
1329	char	*link;
1330};
1331#endif
1332int
1333link(td, uap)
1334	struct thread *td;
1335	register struct link_args /* {
1336		char *path;
1337		char *link;
1338	} */ *uap;
1339{
1340
1341	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
1342}
1343
1344SYSCTL_DECL(_security_bsd);
1345
1346static int hardlink_check_uid = 0;
1347SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1348    &hardlink_check_uid, 0,
1349    "Unprivileged processes cannot create hard links to files owned by other "
1350    "users");
1351static int hardlink_check_gid = 0;
1352SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1353    &hardlink_check_gid, 0,
1354    "Unprivileged processes cannot create hard links to files owned by other "
1355    "groups");
1356
1357static int
1358can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1359{
1360	struct vattr va;
1361	int error;
1362
1363	if (suser_cred(cred, PRISON_ROOT) == 0)
1364		return (0);
1365
1366	if (!hardlink_check_uid && !hardlink_check_gid)
1367		return (0);
1368
1369	error = VOP_GETATTR(vp, &va, cred, td);
1370	if (error != 0)
1371		return (error);
1372
1373	if (hardlink_check_uid) {
1374		if (cred->cr_uid != va.va_uid)
1375			return (EPERM);
1376	}
1377
1378	if (hardlink_check_gid) {
1379		if (!groupmember(va.va_gid, cred))
1380			return (EPERM);
1381	}
1382
1383	return (0);
1384}
1385
1386int
1387kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1388{
1389	struct vnode *vp;
1390	struct mount *mp;
1391	struct nameidata nd;
1392	int error;
1393
1394	bwillwrite();
1395	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
1396	if ((error = namei(&nd)) != 0)
1397		return (error);
1398	NDFREE(&nd, NDF_ONLY_PNBUF);
1399	vp = nd.ni_vp;
1400	if (vp->v_type == VDIR) {
1401		vrele(vp);
1402		return (EPERM);		/* POSIX */
1403	}
1404	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1405		vrele(vp);
1406		return (error);
1407	}
1408	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1409	if ((error = namei(&nd)) == 0) {
1410		if (nd.ni_vp != NULL) {
1411			vrele(nd.ni_vp);
1412			if (nd.ni_dvp == nd.ni_vp)
1413				vrele(nd.ni_dvp);
1414			else
1415				vput(nd.ni_dvp);
1416			error = EEXIST;
1417		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1418		    == 0) {
1419			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1420			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1421			error = can_hardlink(vp, td, td->td_ucred);
1422			if (error == 0)
1423#ifdef MAC
1424				error = mac_check_vnode_link(td->td_ucred,
1425				    nd.ni_dvp, vp, &nd.ni_cnd);
1426			if (error == 0)
1427#endif
1428				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1429			VOP_UNLOCK(vp, 0, td);
1430			vput(nd.ni_dvp);
1431		}
1432		NDFREE(&nd, NDF_ONLY_PNBUF);
1433	}
1434	vrele(vp);
1435	vn_finished_write(mp);
1436	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1437	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1438	return (error);
1439}
1440
1441/*
1442 * Make a symbolic link.
1443 */
1444#ifndef _SYS_SYSPROTO_H_
1445struct symlink_args {
1446	char	*path;
1447	char	*link;
1448};
1449#endif
1450int
1451symlink(td, uap)
1452	struct thread *td;
1453	register struct symlink_args /* {
1454		char *path;
1455		char *link;
1456	} */ *uap;
1457{
1458
1459	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1460}
1461
1462int
1463kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1464{
1465	struct mount *mp;
1466	struct vattr vattr;
1467	char *syspath;
1468	int error;
1469	struct nameidata nd;
1470
1471	if (segflg == UIO_SYSSPACE) {
1472		syspath = path;
1473	} else {
1474		syspath = uma_zalloc(namei_zone, M_WAITOK);
1475		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1476			goto out;
1477	}
1478restart:
1479	bwillwrite();
1480	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1481	if ((error = namei(&nd)) != 0)
1482		goto out;
1483	if (nd.ni_vp) {
1484		NDFREE(&nd, NDF_ONLY_PNBUF);
1485		vrele(nd.ni_vp);
1486		if (nd.ni_vp == nd.ni_dvp)
1487			vrele(nd.ni_dvp);
1488		else
1489			vput(nd.ni_dvp);
1490		error = EEXIST;
1491		goto out;
1492	}
1493	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1494		NDFREE(&nd, NDF_ONLY_PNBUF);
1495		vput(nd.ni_dvp);
1496		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1497			return (error);
1498		goto restart;
1499	}
1500	VATTR_NULL(&vattr);
1501	FILEDESC_LOCK(td->td_proc->p_fd);
1502	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1503	FILEDESC_UNLOCK(td->td_proc->p_fd);
1504#ifdef MAC
1505	vattr.va_type = VLNK;
1506	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1507	    &vattr);
1508	if (error)
1509		goto out2;
1510#endif
1511	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1512	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1513	if (error == 0)
1514		vput(nd.ni_vp);
1515#ifdef MAC
1516out2:
1517#endif
1518	NDFREE(&nd, NDF_ONLY_PNBUF);
1519	vput(nd.ni_dvp);
1520	vn_finished_write(mp);
1521	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1522	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1523out:
1524	if (segflg != UIO_SYSSPACE)
1525		uma_zfree(namei_zone, syspath);
1526	return (error);
1527}
1528
1529/*
1530 * Delete a whiteout from the filesystem.
1531 */
1532int
1533undelete(td, uap)
1534	struct thread *td;
1535	register struct undelete_args /* {
1536		char *path;
1537	} */ *uap;
1538{
1539	int error;
1540	struct mount *mp;
1541	struct nameidata nd;
1542
1543restart:
1544	bwillwrite();
1545	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1546	    uap->path, td);
1547	error = namei(&nd);
1548	if (error)
1549		return (error);
1550
1551	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1552		NDFREE(&nd, NDF_ONLY_PNBUF);
1553		if (nd.ni_vp)
1554			vrele(nd.ni_vp);
1555		if (nd.ni_vp == nd.ni_dvp)
1556			vrele(nd.ni_dvp);
1557		else
1558			vput(nd.ni_dvp);
1559		return (EEXIST);
1560	}
1561	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1562		NDFREE(&nd, NDF_ONLY_PNBUF);
1563		vput(nd.ni_dvp);
1564		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1565			return (error);
1566		goto restart;
1567	}
1568	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1569	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1570	NDFREE(&nd, NDF_ONLY_PNBUF);
1571	vput(nd.ni_dvp);
1572	vn_finished_write(mp);
1573	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1574	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1575	return (error);
1576}
1577
1578/*
1579 * Delete a name from the filesystem.
1580 */
1581#ifndef _SYS_SYSPROTO_H_
1582struct unlink_args {
1583	char	*path;
1584};
1585#endif
1586int
1587unlink(td, uap)
1588	struct thread *td;
1589	struct unlink_args /* {
1590		char *path;
1591	} */ *uap;
1592{
1593
1594	return (kern_unlink(td, uap->path, UIO_USERSPACE));
1595}
1596
1597int
1598kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1599{
1600	struct mount *mp;
1601	struct vnode *vp;
1602	int error;
1603	struct nameidata nd;
1604
1605restart:
1606	bwillwrite();
1607	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
1608	if ((error = namei(&nd)) != 0)
1609		return (error);
1610	vp = nd.ni_vp;
1611	if (vp->v_type == VDIR)
1612		error = EPERM;		/* POSIX */
1613	else {
1614		/*
1615		 * The root of a mounted filesystem cannot be deleted.
1616		 *
1617		 * XXX: can this only be a VDIR case?
1618		 */
1619		if (vp->v_vflag & VV_ROOT)
1620			error = EBUSY;
1621	}
1622	if (error == 0) {
1623		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1624			NDFREE(&nd, NDF_ONLY_PNBUF);
1625			if (vp == nd.ni_dvp)
1626				vrele(vp);
1627			else
1628				vput(vp);
1629			vput(nd.ni_dvp);
1630			if ((error = vn_start_write(NULL, &mp,
1631			    V_XSLEEP | PCATCH)) != 0)
1632				return (error);
1633			goto restart;
1634		}
1635#ifdef MAC
1636		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1637		    &nd.ni_cnd);
1638		if (error)
1639			goto out;
1640#endif
1641		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1642		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1643#ifdef MAC
1644out:
1645#endif
1646		vn_finished_write(mp);
1647	}
1648	NDFREE(&nd, NDF_ONLY_PNBUF);
1649	if (vp == nd.ni_dvp)
1650		vrele(vp);
1651	else
1652		vput(vp);
1653	vput(nd.ni_dvp);
1654	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1655	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1656	return (error);
1657}
1658
1659/*
1660 * Reposition read/write file offset.
1661 */
1662#ifndef _SYS_SYSPROTO_H_
1663struct lseek_args {
1664	int	fd;
1665	int	pad;
1666	off_t	offset;
1667	int	whence;
1668};
1669#endif
1670int
1671lseek(td, uap)
1672	struct thread *td;
1673	register struct lseek_args /* {
1674		int fd;
1675		int pad;
1676		off_t offset;
1677		int whence;
1678	} */ *uap;
1679{
1680	struct ucred *cred = td->td_ucred;
1681	struct file *fp;
1682	struct vnode *vp;
1683	struct vattr vattr;
1684	off_t offset;
1685	int error, noneg;
1686
1687	if ((error = fget(td, uap->fd, &fp)) != 0)
1688		return (error);
1689	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1690		fdrop(fp, td);
1691		return (ESPIPE);
1692	}
1693	vp = fp->f_vnode;
1694	noneg = (vp->v_type != VCHR);
1695	offset = uap->offset;
1696	switch (uap->whence) {
1697	case L_INCR:
1698		if (noneg &&
1699		    (fp->f_offset < 0 ||
1700		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1701			error = EOVERFLOW;
1702			break;
1703		}
1704		offset += fp->f_offset;
1705		break;
1706	case L_XTND:
1707		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1708		error = VOP_GETATTR(vp, &vattr, cred, td);
1709		VOP_UNLOCK(vp, 0, td);
1710		if (error)
1711			break;
1712		if (noneg &&
1713		    (vattr.va_size > OFF_MAX ||
1714		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1715			error = EOVERFLOW;
1716			break;
1717		}
1718		offset += vattr.va_size;
1719		break;
1720	case L_SET:
1721		break;
1722	default:
1723		error = EINVAL;
1724	}
1725	if (error == 0 && noneg && offset < 0)
1726		error = EINVAL;
1727	if (error != 0) {
1728		fdrop(fp, td);
1729		return (error);
1730	}
1731	fp->f_offset = offset;
1732	*(off_t *)(td->td_retval) = fp->f_offset;
1733	fdrop(fp, td);
1734	return (0);
1735}
1736
1737#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1738/*
1739 * Reposition read/write file offset.
1740 */
1741#ifndef _SYS_SYSPROTO_H_
1742struct olseek_args {
1743	int	fd;
1744	long	offset;
1745	int	whence;
1746};
1747#endif
1748int
1749olseek(td, uap)
1750	struct thread *td;
1751	register struct olseek_args /* {
1752		int fd;
1753		long offset;
1754		int whence;
1755	} */ *uap;
1756{
1757	struct lseek_args /* {
1758		int fd;
1759		int pad;
1760		off_t offset;
1761		int whence;
1762	} */ nuap;
1763	int error;
1764
1765	nuap.fd = uap->fd;
1766	nuap.offset = uap->offset;
1767	nuap.whence = uap->whence;
1768	error = lseek(td, &nuap);
1769	return (error);
1770}
1771#endif /* COMPAT_43 */
1772
1773/*
1774 * Check access permissions using passed credentials.
1775 */
1776static int
1777vn_access(vp, user_flags, cred, td)
1778	struct vnode	*vp;
1779	int		user_flags;
1780	struct ucred	*cred;
1781	struct thread	*td;
1782{
1783	int error, flags;
1784
1785	/* Flags == 0 means only check for existence. */
1786	error = 0;
1787	if (user_flags) {
1788		flags = 0;
1789		if (user_flags & R_OK)
1790			flags |= VREAD;
1791		if (user_flags & W_OK)
1792			flags |= VWRITE;
1793		if (user_flags & X_OK)
1794			flags |= VEXEC;
1795#ifdef MAC
1796		error = mac_check_vnode_access(cred, vp, flags);
1797		if (error)
1798			return (error);
1799#endif
1800		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1801			error = VOP_ACCESS(vp, flags, cred, td);
1802	}
1803	return (error);
1804}
1805
1806/*
1807 * Check access permissions using "real" credentials.
1808 */
1809#ifndef _SYS_SYSPROTO_H_
1810struct access_args {
1811	char	*path;
1812	int	flags;
1813};
1814#endif
1815int
1816access(td, uap)
1817	struct thread *td;
1818	register struct access_args /* {
1819		char *path;
1820		int flags;
1821	} */ *uap;
1822{
1823
1824	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1825}
1826
1827int
1828kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1829{
1830	struct ucred *cred, *tmpcred;
1831	register struct vnode *vp;
1832	int error;
1833	struct nameidata nd;
1834
1835	/*
1836	 * Create and modify a temporary credential instead of one that
1837	 * is potentially shared.  This could also mess up socket
1838	 * buffer accounting which can run in an interrupt context.
1839	 *
1840	 * XXX - Depending on how "threads" are finally implemented, it
1841	 * may be better to explicitly pass the credential to namei()
1842	 * rather than to modify the potentially shared process structure.
1843	 */
1844	cred = td->td_ucred;
1845	tmpcred = crdup(cred);
1846	tmpcred->cr_uid = cred->cr_ruid;
1847	tmpcred->cr_groups[0] = cred->cr_rgid;
1848	td->td_ucred = tmpcred;
1849	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
1850	if ((error = namei(&nd)) != 0)
1851		goto out1;
1852	vp = nd.ni_vp;
1853
1854	error = vn_access(vp, flags, tmpcred, td);
1855	NDFREE(&nd, NDF_ONLY_PNBUF);
1856	vput(vp);
1857out1:
1858	td->td_ucred = cred;
1859	crfree(tmpcred);
1860	return (error);
1861}
1862
1863/*
1864 * Check access permissions using "effective" credentials.
1865 */
1866#ifndef _SYS_SYSPROTO_H_
1867struct eaccess_args {
1868	char	*path;
1869	int	flags;
1870};
1871#endif
1872int
1873eaccess(td, uap)
1874	struct thread *td;
1875	register struct eaccess_args /* {
1876		char *path;
1877		int flags;
1878	} */ *uap;
1879{
1880	struct nameidata nd;
1881	struct vnode *vp;
1882	int error;
1883
1884	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1885	    uap->path, td);
1886	if ((error = namei(&nd)) != 0)
1887		return (error);
1888	vp = nd.ni_vp;
1889
1890	error = vn_access(vp, uap->flags, td->td_ucred, td);
1891	NDFREE(&nd, NDF_ONLY_PNBUF);
1892	vput(vp);
1893	return (error);
1894}
1895
1896#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
1897/*
1898 * Get file status; this version follows links.
1899 */
1900#ifndef _SYS_SYSPROTO_H_
1901struct ostat_args {
1902	char	*path;
1903	struct ostat *ub;
1904};
1905#endif
1906int
1907ostat(td, uap)
1908	struct thread *td;
1909	register struct ostat_args /* {
1910		char *path;
1911		struct ostat *ub;
1912	} */ *uap;
1913{
1914	struct stat sb;
1915	struct ostat osb;
1916	int error;
1917	struct nameidata nd;
1918
1919	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1920	    uap->path, td);
1921	if ((error = namei(&nd)) != 0)
1922		return (error);
1923	NDFREE(&nd, NDF_ONLY_PNBUF);
1924	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1925	vput(nd.ni_vp);
1926	if (error)
1927		return (error);
1928	cvtstat(&sb, &osb);
1929	error = copyout(&osb, uap->ub, sizeof (osb));
1930	return (error);
1931}
1932
1933/*
1934 * Get file status; this version does not follow links.
1935 */
1936#ifndef _SYS_SYSPROTO_H_
1937struct olstat_args {
1938	char	*path;
1939	struct ostat *ub;
1940};
1941#endif
1942int
1943olstat(td, uap)
1944	struct thread *td;
1945	register struct olstat_args /* {
1946		char *path;
1947		struct ostat *ub;
1948	} */ *uap;
1949{
1950	struct vnode *vp;
1951	struct stat sb;
1952	struct ostat osb;
1953	int error;
1954	struct nameidata nd;
1955
1956	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1957	    uap->path, td);
1958	if ((error = namei(&nd)) != 0)
1959		return (error);
1960	vp = nd.ni_vp;
1961	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
1962	NDFREE(&nd, NDF_ONLY_PNBUF);
1963	vput(vp);
1964	if (error)
1965		return (error);
1966	cvtstat(&sb, &osb);
1967	error = copyout(&osb, uap->ub, sizeof (osb));
1968	return (error);
1969}
1970
1971/*
1972 * Convert from an old to a new stat structure.
1973 */
1974void
1975cvtstat(st, ost)
1976	struct stat *st;
1977	struct ostat *ost;
1978{
1979
1980	ost->st_dev = st->st_dev;
1981	ost->st_ino = st->st_ino;
1982	ost->st_mode = st->st_mode;
1983	ost->st_nlink = st->st_nlink;
1984	ost->st_uid = st->st_uid;
1985	ost->st_gid = st->st_gid;
1986	ost->st_rdev = st->st_rdev;
1987	if (st->st_size < (quad_t)1 << 32)
1988		ost->st_size = st->st_size;
1989	else
1990		ost->st_size = -2;
1991	ost->st_atime = st->st_atime;
1992	ost->st_mtime = st->st_mtime;
1993	ost->st_ctime = st->st_ctime;
1994	ost->st_blksize = st->st_blksize;
1995	ost->st_blocks = st->st_blocks;
1996	ost->st_flags = st->st_flags;
1997	ost->st_gen = st->st_gen;
1998}
1999#endif /* COMPAT_43 || COMPAT_SUNOS */
2000
2001/*
2002 * Get file status; this version follows links.
2003 */
2004#ifndef _SYS_SYSPROTO_H_
2005struct stat_args {
2006	char	*path;
2007	struct stat *ub;
2008};
2009#endif
2010int
2011stat(td, uap)
2012	struct thread *td;
2013	register struct stat_args /* {
2014		char *path;
2015		struct stat *ub;
2016	} */ *uap;
2017{
2018	struct stat sb;
2019	int error;
2020	struct nameidata nd;
2021
2022#ifdef LOOKUP_SHARED
2023	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
2024	    UIO_USERSPACE, uap->path, td);
2025#else
2026	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2027	    uap->path, td);
2028#endif
2029	if ((error = namei(&nd)) != 0)
2030		return (error);
2031	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2032	NDFREE(&nd, NDF_ONLY_PNBUF);
2033	vput(nd.ni_vp);
2034	if (error)
2035		return (error);
2036	error = copyout(&sb, uap->ub, sizeof (sb));
2037	return (error);
2038}
2039
2040/*
2041 * Get file status; this version does not follow links.
2042 */
2043#ifndef _SYS_SYSPROTO_H_
2044struct lstat_args {
2045	char	*path;
2046	struct stat *ub;
2047};
2048#endif
2049int
2050lstat(td, uap)
2051	struct thread *td;
2052	register struct lstat_args /* {
2053		char *path;
2054		struct stat *ub;
2055	} */ *uap;
2056{
2057	int error;
2058	struct vnode *vp;
2059	struct stat sb;
2060	struct nameidata nd;
2061
2062	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2063	    uap->path, td);
2064	if ((error = namei(&nd)) != 0)
2065		return (error);
2066	vp = nd.ni_vp;
2067	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2068	NDFREE(&nd, NDF_ONLY_PNBUF);
2069	vput(vp);
2070	if (error)
2071		return (error);
2072	error = copyout(&sb, uap->ub, sizeof (sb));
2073	return (error);
2074}
2075
2076/*
2077 * Implementation of the NetBSD stat() function.
2078 * XXX This should probably be collapsed with the FreeBSD version,
2079 * as the differences are only due to vn_stat() clearing spares at
2080 * the end of the structures.  vn_stat could be split to avoid this,
2081 * and thus collapse the following to close to zero code.
2082 */
2083void
2084cvtnstat(sb, nsb)
2085	struct stat *sb;
2086	struct nstat *nsb;
2087{
2088	bzero(nsb, sizeof *nsb);
2089	nsb->st_dev = sb->st_dev;
2090	nsb->st_ino = sb->st_ino;
2091	nsb->st_mode = sb->st_mode;
2092	nsb->st_nlink = sb->st_nlink;
2093	nsb->st_uid = sb->st_uid;
2094	nsb->st_gid = sb->st_gid;
2095	nsb->st_rdev = sb->st_rdev;
2096	nsb->st_atimespec = sb->st_atimespec;
2097	nsb->st_mtimespec = sb->st_mtimespec;
2098	nsb->st_ctimespec = sb->st_ctimespec;
2099	nsb->st_size = sb->st_size;
2100	nsb->st_blocks = sb->st_blocks;
2101	nsb->st_blksize = sb->st_blksize;
2102	nsb->st_flags = sb->st_flags;
2103	nsb->st_gen = sb->st_gen;
2104	nsb->st_birthtimespec = sb->st_birthtimespec;
2105}
2106
2107#ifndef _SYS_SYSPROTO_H_
2108struct nstat_args {
2109	char	*path;
2110	struct nstat *ub;
2111};
2112#endif
2113int
2114nstat(td, uap)
2115	struct thread *td;
2116	register struct nstat_args /* {
2117		char *path;
2118		struct nstat *ub;
2119	} */ *uap;
2120{
2121	struct stat sb;
2122	struct nstat nsb;
2123	int error;
2124	struct nameidata nd;
2125
2126	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2127	    uap->path, td);
2128	if ((error = namei(&nd)) != 0)
2129		return (error);
2130	NDFREE(&nd, NDF_ONLY_PNBUF);
2131	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2132	vput(nd.ni_vp);
2133	if (error)
2134		return (error);
2135	cvtnstat(&sb, &nsb);
2136	error = copyout(&nsb, uap->ub, sizeof (nsb));
2137	return (error);
2138}
2139
2140/*
2141 * NetBSD lstat.  Get file status; this version does not follow links.
2142 */
2143#ifndef _SYS_SYSPROTO_H_
2144struct lstat_args {
2145	char	*path;
2146	struct stat *ub;
2147};
2148#endif
2149int
2150nlstat(td, uap)
2151	struct thread *td;
2152	register struct nlstat_args /* {
2153		char *path;
2154		struct nstat *ub;
2155	} */ *uap;
2156{
2157	int error;
2158	struct vnode *vp;
2159	struct stat sb;
2160	struct nstat nsb;
2161	struct nameidata nd;
2162
2163	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2164	    uap->path, td);
2165	if ((error = namei(&nd)) != 0)
2166		return (error);
2167	vp = nd.ni_vp;
2168	NDFREE(&nd, NDF_ONLY_PNBUF);
2169	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2170	vput(vp);
2171	if (error)
2172		return (error);
2173	cvtnstat(&sb, &nsb);
2174	error = copyout(&nsb, uap->ub, sizeof (nsb));
2175	return (error);
2176}
2177
2178/*
2179 * Get configurable pathname variables.
2180 */
2181#ifndef _SYS_SYSPROTO_H_
2182struct pathconf_args {
2183	char	*path;
2184	int	name;
2185};
2186#endif
2187int
2188pathconf(td, uap)
2189	struct thread *td;
2190	register struct pathconf_args /* {
2191		char *path;
2192		int name;
2193	} */ *uap;
2194{
2195	int error;
2196	struct nameidata nd;
2197
2198	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2199	    uap->path, td);
2200	if ((error = namei(&nd)) != 0)
2201		return (error);
2202	NDFREE(&nd, NDF_ONLY_PNBUF);
2203
2204	/* If asynchronous I/O is available, it works for all files. */
2205	if (uap->name == _PC_ASYNC_IO)
2206		td->td_retval[0] = async_io_version;
2207	else
2208		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
2209	vput(nd.ni_vp);
2210	return (error);
2211}
2212
2213/*
2214 * Return target name of a symbolic link.
2215 */
2216#ifndef _SYS_SYSPROTO_H_
2217struct readlink_args {
2218	char	*path;
2219	char	*buf;
2220	int	count;
2221};
2222#endif
2223int
2224readlink(td, uap)
2225	struct thread *td;
2226	register struct readlink_args /* {
2227		char *path;
2228		char *buf;
2229		int count;
2230	} */ *uap;
2231{
2232
2233	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2234	    UIO_USERSPACE, uap->count));
2235}
2236
2237int
2238kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2239    enum uio_seg bufseg, int count)
2240{
2241	register struct vnode *vp;
2242	struct iovec aiov;
2243	struct uio auio;
2244	int error;
2245	struct nameidata nd;
2246
2247	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
2248	if ((error = namei(&nd)) != 0)
2249		return (error);
2250	NDFREE(&nd, NDF_ONLY_PNBUF);
2251	vp = nd.ni_vp;
2252#ifdef MAC
2253	error = mac_check_vnode_readlink(td->td_ucred, vp);
2254	if (error) {
2255		vput(vp);
2256		return (error);
2257	}
2258#endif
2259	if (vp->v_type != VLNK)
2260		error = EINVAL;
2261	else {
2262		aiov.iov_base = buf;
2263		aiov.iov_len = count;
2264		auio.uio_iov = &aiov;
2265		auio.uio_iovcnt = 1;
2266		auio.uio_offset = 0;
2267		auio.uio_rw = UIO_READ;
2268		auio.uio_segflg = bufseg;
2269		auio.uio_td = td;
2270		auio.uio_resid = count;
2271		error = VOP_READLINK(vp, &auio, td->td_ucred);
2272	}
2273	vput(vp);
2274	td->td_retval[0] = count - auio.uio_resid;
2275	return (error);
2276}
2277
2278/*
2279 * Common implementation code for chflags() and fchflags().
2280 */
2281static int
2282setfflags(td, vp, flags)
2283	struct thread *td;
2284	struct vnode *vp;
2285	int flags;
2286{
2287	int error;
2288	struct mount *mp;
2289	struct vattr vattr;
2290
2291	/*
2292	 * Prevent non-root users from setting flags on devices.  When
2293	 * a device is reused, users can retain ownership of the device
2294	 * if they are allowed to set flags and programs assume that
2295	 * chown can't fail when done as root.
2296	 */
2297	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2298		error = suser_cred(td->td_ucred, PRISON_ROOT);
2299		if (error)
2300			return (error);
2301	}
2302
2303	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2304		return (error);
2305	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2306	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2307	VATTR_NULL(&vattr);
2308	vattr.va_flags = flags;
2309#ifdef MAC
2310	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2311	if (error == 0)
2312#endif
2313		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2314	VOP_UNLOCK(vp, 0, td);
2315	vn_finished_write(mp);
2316	return (error);
2317}
2318
2319/*
2320 * Change flags of a file given a path name.
2321 */
2322#ifndef _SYS_SYSPROTO_H_
2323struct chflags_args {
2324	char	*path;
2325	int	flags;
2326};
2327#endif
2328int
2329chflags(td, uap)
2330	struct thread *td;
2331	register struct chflags_args /* {
2332		char *path;
2333		int flags;
2334	} */ *uap;
2335{
2336	int error;
2337	struct nameidata nd;
2338
2339	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
2340	if ((error = namei(&nd)) != 0)
2341		return (error);
2342	NDFREE(&nd, NDF_ONLY_PNBUF);
2343	error = setfflags(td, nd.ni_vp, uap->flags);
2344	vrele(nd.ni_vp);
2345	return (error);
2346}
2347
2348/*
2349 * Same as chflags() but doesn't follow symlinks.
2350 */
2351int
2352lchflags(td, uap)
2353	struct thread *td;
2354	register struct lchflags_args /* {
2355		char *path;
2356		int flags;
2357	} */ *uap;
2358{
2359	int error;
2360	struct nameidata nd;
2361
2362	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2363	if ((error = namei(&nd)) != 0)
2364		return (error);
2365	NDFREE(&nd, NDF_ONLY_PNBUF);
2366	error = setfflags(td, nd.ni_vp, uap->flags);
2367	vrele(nd.ni_vp);
2368	return (error);
2369}
2370
2371/*
2372 * Change flags of a file given a file descriptor.
2373 */
2374#ifndef _SYS_SYSPROTO_H_
2375struct fchflags_args {
2376	int	fd;
2377	int	flags;
2378};
2379#endif
2380int
2381fchflags(td, uap)
2382	struct thread *td;
2383	register struct fchflags_args /* {
2384		int fd;
2385		int flags;
2386	} */ *uap;
2387{
2388	struct file *fp;
2389	int error;
2390
2391	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2392		return (error);
2393	error = setfflags(td, fp->f_vnode, uap->flags);
2394	fdrop(fp, td);
2395	return (error);
2396}
2397
2398/*
2399 * Common implementation code for chmod(), lchmod() and fchmod().
2400 */
2401static int
2402setfmode(td, vp, mode)
2403	struct thread *td;
2404	struct vnode *vp;
2405	int mode;
2406{
2407	int error;
2408	struct mount *mp;
2409	struct vattr vattr;
2410
2411	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2412		return (error);
2413	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2414	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2415	VATTR_NULL(&vattr);
2416	vattr.va_mode = mode & ALLPERMS;
2417#ifdef MAC
2418	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2419	if (error == 0)
2420#endif
2421		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2422	VOP_UNLOCK(vp, 0, td);
2423	vn_finished_write(mp);
2424	return (error);
2425}
2426
2427/*
2428 * Change mode of a file given path name.
2429 */
2430#ifndef _SYS_SYSPROTO_H_
2431struct chmod_args {
2432	char	*path;
2433	int	mode;
2434};
2435#endif
2436int
2437chmod(td, uap)
2438	struct thread *td;
2439	register struct chmod_args /* {
2440		char *path;
2441		int mode;
2442	} */ *uap;
2443{
2444
2445	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2446}
2447
2448int
2449kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2450{
2451	int error;
2452	struct nameidata nd;
2453
2454	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2455	if ((error = namei(&nd)) != 0)
2456		return (error);
2457	NDFREE(&nd, NDF_ONLY_PNBUF);
2458	error = setfmode(td, nd.ni_vp, mode);
2459	vrele(nd.ni_vp);
2460	return (error);
2461}
2462
2463/*
2464 * Change mode of a file given path name (don't follow links.)
2465 */
2466#ifndef _SYS_SYSPROTO_H_
2467struct lchmod_args {
2468	char	*path;
2469	int	mode;
2470};
2471#endif
2472int
2473lchmod(td, uap)
2474	struct thread *td;
2475	register struct lchmod_args /* {
2476		char *path;
2477		int mode;
2478	} */ *uap;
2479{
2480	int error;
2481	struct nameidata nd;
2482
2483	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
2484	if ((error = namei(&nd)) != 0)
2485		return (error);
2486	NDFREE(&nd, NDF_ONLY_PNBUF);
2487	error = setfmode(td, nd.ni_vp, uap->mode);
2488	vrele(nd.ni_vp);
2489	return (error);
2490}
2491
2492/*
2493 * Change mode of a file given a file descriptor.
2494 */
2495#ifndef _SYS_SYSPROTO_H_
2496struct fchmod_args {
2497	int	fd;
2498	int	mode;
2499};
2500#endif
2501int
2502fchmod(td, uap)
2503	struct thread *td;
2504	register struct fchmod_args /* {
2505		int fd;
2506		int mode;
2507	} */ *uap;
2508{
2509	struct file *fp;
2510	int error;
2511
2512	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2513		return (error);
2514	error = setfmode(td, fp->f_vnode, uap->mode);
2515	fdrop(fp, td);
2516	return (error);
2517}
2518
2519/*
2520 * Common implementation for chown(), lchown(), and fchown()
2521 */
2522static int
2523setfown(td, vp, uid, gid)
2524	struct thread *td;
2525	struct vnode *vp;
2526	uid_t uid;
2527	gid_t gid;
2528{
2529	int error;
2530	struct mount *mp;
2531	struct vattr vattr;
2532
2533	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2534		return (error);
2535	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2536	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2537	VATTR_NULL(&vattr);
2538	vattr.va_uid = uid;
2539	vattr.va_gid = gid;
2540#ifdef MAC
2541	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2542	    vattr.va_gid);
2543	if (error == 0)
2544#endif
2545		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2546	VOP_UNLOCK(vp, 0, td);
2547	vn_finished_write(mp);
2548	return (error);
2549}
2550
2551/*
2552 * Set ownership given a path name.
2553 */
2554#ifndef _SYS_SYSPROTO_H_
2555struct chown_args {
2556	char	*path;
2557	int	uid;
2558	int	gid;
2559};
2560#endif
2561int
2562chown(td, uap)
2563	struct thread *td;
2564	register struct chown_args /* {
2565		char *path;
2566		int uid;
2567		int gid;
2568	} */ *uap;
2569{
2570
2571	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2572}
2573
2574int
2575kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2576    int gid)
2577{
2578	int error;
2579	struct nameidata nd;
2580
2581	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2582	if ((error = namei(&nd)) != 0)
2583		return (error);
2584	NDFREE(&nd, NDF_ONLY_PNBUF);
2585	error = setfown(td, nd.ni_vp, uid, gid);
2586	vrele(nd.ni_vp);
2587	return (error);
2588}
2589
2590/*
2591 * Set ownership given a path name, do not cross symlinks.
2592 */
2593#ifndef _SYS_SYSPROTO_H_
2594struct lchown_args {
2595	char	*path;
2596	int	uid;
2597	int	gid;
2598};
2599#endif
2600int
2601lchown(td, uap)
2602	struct thread *td;
2603	register struct lchown_args /* {
2604		char *path;
2605		int uid;
2606		int gid;
2607	} */ *uap;
2608{
2609
2610	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2611}
2612
2613int
2614kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2615    int gid)
2616{
2617	int error;
2618	struct nameidata nd;
2619
2620	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2621	if ((error = namei(&nd)) != 0)
2622		return (error);
2623	NDFREE(&nd, NDF_ONLY_PNBUF);
2624	error = setfown(td, nd.ni_vp, uid, gid);
2625	vrele(nd.ni_vp);
2626	return (error);
2627}
2628
2629/*
2630 * Set ownership given a file descriptor.
2631 */
2632#ifndef _SYS_SYSPROTO_H_
2633struct fchown_args {
2634	int	fd;
2635	int	uid;
2636	int	gid;
2637};
2638#endif
2639int
2640fchown(td, uap)
2641	struct thread *td;
2642	register struct fchown_args /* {
2643		int fd;
2644		int uid;
2645		int gid;
2646	} */ *uap;
2647{
2648	struct file *fp;
2649	int error;
2650
2651	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2652		return (error);
2653	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2654	fdrop(fp, td);
2655	return (error);
2656}
2657
2658/*
2659 * Common implementation code for utimes(), lutimes(), and futimes().
2660 */
2661static int
2662getutimes(usrtvp, tvpseg, tsp)
2663	const struct timeval *usrtvp;
2664	enum uio_seg tvpseg;
2665	struct timespec *tsp;
2666{
2667	struct timeval tv[2];
2668	const struct timeval *tvp;
2669	int error;
2670
2671	if (usrtvp == NULL) {
2672		microtime(&tv[0]);
2673		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2674		tsp[1] = tsp[0];
2675	} else {
2676		if (tvpseg == UIO_SYSSPACE) {
2677			tvp = usrtvp;
2678		} else {
2679			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2680				return (error);
2681			tvp = tv;
2682		}
2683
2684		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2685		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2686	}
2687	return (0);
2688}
2689
2690/*
2691 * Common implementation code for utimes(), lutimes(), and futimes().
2692 */
2693static int
2694setutimes(td, vp, ts, numtimes, nullflag)
2695	struct thread *td;
2696	struct vnode *vp;
2697	const struct timespec *ts;
2698	int numtimes;
2699	int nullflag;
2700{
2701	int error, setbirthtime;
2702	struct mount *mp;
2703	struct vattr vattr;
2704
2705	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2706		return (error);
2707	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2708	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2709	setbirthtime = 0;
2710	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2711	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2712		setbirthtime = 1;
2713	VATTR_NULL(&vattr);
2714	vattr.va_atime = ts[0];
2715	vattr.va_mtime = ts[1];
2716	if (setbirthtime)
2717		vattr.va_birthtime = ts[1];
2718	if (numtimes > 2)
2719		vattr.va_birthtime = ts[2];
2720	if (nullflag)
2721		vattr.va_vaflags |= VA_UTIMES_NULL;
2722#ifdef MAC
2723	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2724	    vattr.va_mtime);
2725#endif
2726	if (error == 0)
2727		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2728	VOP_UNLOCK(vp, 0, td);
2729	vn_finished_write(mp);
2730	return (error);
2731}
2732
2733/*
2734 * Set the access and modification times of a file.
2735 */
2736#ifndef _SYS_SYSPROTO_H_
2737struct utimes_args {
2738	char	*path;
2739	struct	timeval *tptr;
2740};
2741#endif
2742int
2743utimes(td, uap)
2744	struct thread *td;
2745	register struct utimes_args /* {
2746		char *path;
2747		struct timeval *tptr;
2748	} */ *uap;
2749{
2750
2751	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2752	    UIO_USERSPACE));
2753}
2754
2755int
2756kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2757    struct timeval *tptr, enum uio_seg tptrseg)
2758{
2759	struct timespec ts[2];
2760	int error;
2761	struct nameidata nd;
2762
2763	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2764		return (error);
2765	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2766	if ((error = namei(&nd)) != 0)
2767		return (error);
2768	NDFREE(&nd, NDF_ONLY_PNBUF);
2769	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2770	vrele(nd.ni_vp);
2771	return (error);
2772}
2773
2774/*
2775 * Set the access and modification times of a file.
2776 */
2777#ifndef _SYS_SYSPROTO_H_
2778struct lutimes_args {
2779	char	*path;
2780	struct	timeval *tptr;
2781};
2782#endif
2783int
2784lutimes(td, uap)
2785	struct thread *td;
2786	register struct lutimes_args /* {
2787		char *path;
2788		struct timeval *tptr;
2789	} */ *uap;
2790{
2791
2792	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2793	    UIO_USERSPACE));
2794}
2795
2796int
2797kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2798    struct timeval *tptr, enum uio_seg tptrseg)
2799{
2800	struct timespec ts[2];
2801	int error;
2802	struct nameidata nd;
2803
2804	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2805		return (error);
2806	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
2807	if ((error = namei(&nd)) != 0)
2808		return (error);
2809	NDFREE(&nd, NDF_ONLY_PNBUF);
2810	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2811	vrele(nd.ni_vp);
2812	return (error);
2813}
2814
2815/*
2816 * Set the access and modification times of a file.
2817 */
2818#ifndef _SYS_SYSPROTO_H_
2819struct futimes_args {
2820	int	fd;
2821	struct	timeval *tptr;
2822};
2823#endif
2824int
2825futimes(td, uap)
2826	struct thread *td;
2827	register struct futimes_args /* {
2828		int  fd;
2829		struct timeval *tptr;
2830	} */ *uap;
2831{
2832
2833	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2834}
2835
2836int
2837kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2838    enum uio_seg tptrseg)
2839{
2840	struct timespec ts[2];
2841	struct file *fp;
2842	int error;
2843
2844	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2845		return (error);
2846	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2847		return (error);
2848	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2849	fdrop(fp, td);
2850	return (error);
2851}
2852
2853/*
2854 * Truncate a file given its path name.
2855 */
2856#ifndef _SYS_SYSPROTO_H_
2857struct truncate_args {
2858	char	*path;
2859	int	pad;
2860	off_t	length;
2861};
2862#endif
2863int
2864truncate(td, uap)
2865	struct thread *td;
2866	register struct truncate_args /* {
2867		char *path;
2868		int pad;
2869		off_t length;
2870	} */ *uap;
2871{
2872
2873	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2874}
2875
2876int
2877kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2878{
2879	struct mount *mp;
2880	struct vnode *vp;
2881	struct vattr vattr;
2882	int error;
2883	struct nameidata nd;
2884
2885	if (length < 0)
2886		return(EINVAL);
2887	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
2888	if ((error = namei(&nd)) != 0)
2889		return (error);
2890	vp = nd.ni_vp;
2891	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2892		vrele(vp);
2893		return (error);
2894	}
2895	NDFREE(&nd, NDF_ONLY_PNBUF);
2896	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2897	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2898	if (vp->v_type == VDIR)
2899		error = EISDIR;
2900#ifdef MAC
2901	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2902	}
2903#endif
2904	else if ((error = vn_writechk(vp)) == 0 &&
2905	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2906		VATTR_NULL(&vattr);
2907		vattr.va_size = length;
2908		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2909	}
2910	vput(vp);
2911	vn_finished_write(mp);
2912	return (error);
2913}
2914
2915/*
2916 * Truncate a file given a file descriptor.
2917 */
2918#ifndef _SYS_SYSPROTO_H_
2919struct ftruncate_args {
2920	int	fd;
2921	int	pad;
2922	off_t	length;
2923};
2924#endif
2925int
2926ftruncate(td, uap)
2927	struct thread *td;
2928	register struct ftruncate_args /* {
2929		int fd;
2930		int pad;
2931		off_t length;
2932	} */ *uap;
2933{
2934	struct mount *mp;
2935	struct vattr vattr;
2936	struct vnode *vp;
2937	struct file *fp;
2938	int error;
2939
2940	if (uap->length < 0)
2941		return(EINVAL);
2942	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2943		return (error);
2944	if ((fp->f_flag & FWRITE) == 0) {
2945		fdrop(fp, td);
2946		return (EINVAL);
2947	}
2948	vp = fp->f_vnode;
2949	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2950		fdrop(fp, td);
2951		return (error);
2952	}
2953	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2954	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2955	if (vp->v_type == VDIR)
2956		error = EISDIR;
2957#ifdef MAC
2958	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
2959	    vp))) {
2960	}
2961#endif
2962	else if ((error = vn_writechk(vp)) == 0) {
2963		VATTR_NULL(&vattr);
2964		vattr.va_size = uap->length;
2965		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
2966	}
2967	VOP_UNLOCK(vp, 0, td);
2968	vn_finished_write(mp);
2969	fdrop(fp, td);
2970	return (error);
2971}
2972
2973#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2974/*
2975 * Truncate a file given its path name.
2976 */
2977#ifndef _SYS_SYSPROTO_H_
2978struct otruncate_args {
2979	char	*path;
2980	long	length;
2981};
2982#endif
2983int
2984otruncate(td, uap)
2985	struct thread *td;
2986	register struct otruncate_args /* {
2987		char *path;
2988		long length;
2989	} */ *uap;
2990{
2991	struct truncate_args /* {
2992		char *path;
2993		int pad;
2994		off_t length;
2995	} */ nuap;
2996
2997	nuap.path = uap->path;
2998	nuap.length = uap->length;
2999	return (truncate(td, &nuap));
3000}
3001
3002/*
3003 * Truncate a file given a file descriptor.
3004 */
3005#ifndef _SYS_SYSPROTO_H_
3006struct oftruncate_args {
3007	int	fd;
3008	long	length;
3009};
3010#endif
3011int
3012oftruncate(td, uap)
3013	struct thread *td;
3014	register struct oftruncate_args /* {
3015		int fd;
3016		long length;
3017	} */ *uap;
3018{
3019	struct ftruncate_args /* {
3020		int fd;
3021		int pad;
3022		off_t length;
3023	} */ nuap;
3024
3025	nuap.fd = uap->fd;
3026	nuap.length = uap->length;
3027	return (ftruncate(td, &nuap));
3028}
3029#endif /* COMPAT_43 || COMPAT_SUNOS */
3030
3031/*
3032 * Sync an open file.
3033 */
3034#ifndef _SYS_SYSPROTO_H_
3035struct fsync_args {
3036	int	fd;
3037};
3038#endif
3039int
3040fsync(td, uap)
3041	struct thread *td;
3042	struct fsync_args /* {
3043		int fd;
3044	} */ *uap;
3045{
3046	struct vnode *vp;
3047	struct mount *mp;
3048	struct file *fp;
3049	vm_object_t obj;
3050	int error;
3051
3052	GIANT_REQUIRED;
3053
3054	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3055		return (error);
3056	vp = fp->f_vnode;
3057	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3058		fdrop(fp, td);
3059		return (error);
3060	}
3061	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3062	if (VOP_GETVOBJECT(vp, &obj) == 0) {
3063		VM_OBJECT_LOCK(obj);
3064		vm_object_page_clean(obj, 0, 0, 0);
3065		VM_OBJECT_UNLOCK(obj);
3066	}
3067	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
3068	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
3069	    && softdep_fsync_hook != NULL)
3070		error = (*softdep_fsync_hook)(vp);
3071
3072	VOP_UNLOCK(vp, 0, td);
3073	vn_finished_write(mp);
3074	fdrop(fp, td);
3075	return (error);
3076}
3077
3078/*
3079 * Rename files.  Source and destination must either both be directories,
3080 * or both not be directories.  If target is a directory, it must be empty.
3081 */
3082#ifndef _SYS_SYSPROTO_H_
3083struct rename_args {
3084	char	*from;
3085	char	*to;
3086};
3087#endif
3088int
3089rename(td, uap)
3090	struct thread *td;
3091	register struct rename_args /* {
3092		char *from;
3093		char *to;
3094	} */ *uap;
3095{
3096
3097	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3098}
3099
3100int
3101kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3102{
3103	struct mount *mp = NULL;
3104	struct vnode *tvp, *fvp, *tdvp;
3105	struct nameidata fromnd, tond;
3106	int error;
3107
3108	bwillwrite();
3109#ifdef MAC
3110	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
3111	    from, td);
3112#else
3113	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
3114#endif
3115	if ((error = namei(&fromnd)) != 0)
3116		return (error);
3117#ifdef MAC
3118	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3119	    fromnd.ni_vp, &fromnd.ni_cnd);
3120	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3121	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3122#endif
3123	fvp = fromnd.ni_vp;
3124	if (error == 0)
3125		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3126	if (error != 0) {
3127		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3128		vrele(fromnd.ni_dvp);
3129		vrele(fvp);
3130		goto out1;
3131	}
3132	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3133	    NOOBJ, pathseg, to, td);
3134	if (fromnd.ni_vp->v_type == VDIR)
3135		tond.ni_cnd.cn_flags |= WILLBEDIR;
3136	if ((error = namei(&tond)) != 0) {
3137		/* Translate error code for rename("dir1", "dir2/."). */
3138		if (error == EISDIR && fvp->v_type == VDIR)
3139			error = EINVAL;
3140		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3141		vrele(fromnd.ni_dvp);
3142		vrele(fvp);
3143		goto out1;
3144	}
3145	tdvp = tond.ni_dvp;
3146	tvp = tond.ni_vp;
3147	if (tvp != NULL) {
3148		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3149			error = ENOTDIR;
3150			goto out;
3151		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3152			error = EISDIR;
3153			goto out;
3154		}
3155	}
3156	if (fvp == tdvp)
3157		error = EINVAL;
3158	/*
3159	 * If the source is the same as the destination (that is, if they
3160	 * are links to the same vnode), then there is nothing to do.
3161	 */
3162	if (fvp == tvp)
3163		error = -1;
3164#ifdef MAC
3165	else
3166		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3167		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3168#endif
3169out:
3170	if (!error) {
3171		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3172		if (fromnd.ni_dvp != tdvp) {
3173			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3174		}
3175		if (tvp) {
3176			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3177		}
3178		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3179				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3180		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3181		NDFREE(&tond, NDF_ONLY_PNBUF);
3182	} else {
3183		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3184		NDFREE(&tond, NDF_ONLY_PNBUF);
3185		if (tdvp == tvp)
3186			vrele(tdvp);
3187		else
3188			vput(tdvp);
3189		if (tvp)
3190			vput(tvp);
3191		vrele(fromnd.ni_dvp);
3192		vrele(fvp);
3193	}
3194	vrele(tond.ni_startdir);
3195	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3196	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3197	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3198	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3199out1:
3200	vn_finished_write(mp);
3201	if (fromnd.ni_startdir)
3202		vrele(fromnd.ni_startdir);
3203	if (error == -1)
3204		return (0);
3205	return (error);
3206}
3207
3208/*
3209 * Make a directory file.
3210 */
3211#ifndef _SYS_SYSPROTO_H_
3212struct mkdir_args {
3213	char	*path;
3214	int	mode;
3215};
3216#endif
3217int
3218mkdir(td, uap)
3219	struct thread *td;
3220	register struct mkdir_args /* {
3221		char *path;
3222		int mode;
3223	} */ *uap;
3224{
3225
3226	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3227}
3228
3229int
3230kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3231{
3232	struct mount *mp;
3233	struct vnode *vp;
3234	struct vattr vattr;
3235	int error;
3236	struct nameidata nd;
3237
3238restart:
3239	bwillwrite();
3240	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
3241	nd.ni_cnd.cn_flags |= WILLBEDIR;
3242	if ((error = namei(&nd)) != 0)
3243		return (error);
3244	vp = nd.ni_vp;
3245	if (vp != NULL) {
3246		NDFREE(&nd, NDF_ONLY_PNBUF);
3247		vrele(vp);
3248		/*
3249		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3250		 * the strange behaviour of leaving the vnode unlocked
3251		 * if the target is the same vnode as the parent.
3252		 */
3253		if (vp == nd.ni_dvp)
3254			vrele(nd.ni_dvp);
3255		else
3256			vput(nd.ni_dvp);
3257		return (EEXIST);
3258	}
3259	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3260		NDFREE(&nd, NDF_ONLY_PNBUF);
3261		vput(nd.ni_dvp);
3262		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3263			return (error);
3264		goto restart;
3265	}
3266	VATTR_NULL(&vattr);
3267	vattr.va_type = VDIR;
3268	FILEDESC_LOCK(td->td_proc->p_fd);
3269	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3270	FILEDESC_UNLOCK(td->td_proc->p_fd);
3271#ifdef MAC
3272	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3273	    &vattr);
3274	if (error)
3275		goto out;
3276#endif
3277	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3278	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3279#ifdef MAC
3280out:
3281#endif
3282	NDFREE(&nd, NDF_ONLY_PNBUF);
3283	vput(nd.ni_dvp);
3284	if (!error)
3285		vput(nd.ni_vp);
3286	vn_finished_write(mp);
3287	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3288	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3289	return (error);
3290}
3291
3292/*
3293 * Remove a directory file.
3294 */
3295#ifndef _SYS_SYSPROTO_H_
3296struct rmdir_args {
3297	char	*path;
3298};
3299#endif
3300int
3301rmdir(td, uap)
3302	struct thread *td;
3303	struct rmdir_args /* {
3304		char *path;
3305	} */ *uap;
3306{
3307
3308	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3309}
3310
3311int
3312kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3313{
3314	struct mount *mp;
3315	struct vnode *vp;
3316	int error;
3317	struct nameidata nd;
3318
3319restart:
3320	bwillwrite();
3321	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
3322	if ((error = namei(&nd)) != 0)
3323		return (error);
3324	vp = nd.ni_vp;
3325	if (vp->v_type != VDIR) {
3326		error = ENOTDIR;
3327		goto out;
3328	}
3329	/*
3330	 * No rmdir "." please.
3331	 */
3332	if (nd.ni_dvp == vp) {
3333		error = EINVAL;
3334		goto out;
3335	}
3336	/*
3337	 * The root of a mounted filesystem cannot be deleted.
3338	 */
3339	if (vp->v_vflag & VV_ROOT) {
3340		error = EBUSY;
3341		goto out;
3342	}
3343#ifdef MAC
3344	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3345	    &nd.ni_cnd);
3346	if (error)
3347		goto out;
3348#endif
3349	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3350		NDFREE(&nd, NDF_ONLY_PNBUF);
3351		if (nd.ni_dvp == vp)
3352			vrele(nd.ni_dvp);
3353		else
3354			vput(nd.ni_dvp);
3355		vput(vp);
3356		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3357			return (error);
3358		goto restart;
3359	}
3360	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3361	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3362	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3363	vn_finished_write(mp);
3364out:
3365	NDFREE(&nd, NDF_ONLY_PNBUF);
3366	if (nd.ni_dvp == vp)
3367		vrele(nd.ni_dvp);
3368	else
3369		vput(nd.ni_dvp);
3370	vput(vp);
3371	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3372	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3373	return (error);
3374}
3375
3376#ifdef COMPAT_43
3377/*
3378 * Read a block of directory entries in a filesystem independent format.
3379 */
3380#ifndef _SYS_SYSPROTO_H_
3381struct ogetdirentries_args {
3382	int	fd;
3383	char	*buf;
3384	u_int	count;
3385	long	*basep;
3386};
3387#endif
3388int
3389ogetdirentries(td, uap)
3390	struct thread *td;
3391	register struct ogetdirentries_args /* {
3392		int fd;
3393		char *buf;
3394		u_int count;
3395		long *basep;
3396	} */ *uap;
3397{
3398	struct vnode *vp;
3399	struct file *fp;
3400	struct uio auio, kuio;
3401	struct iovec aiov, kiov;
3402	struct dirent *dp, *edp;
3403	caddr_t dirbuf;
3404	int error, eofflag, readcnt;
3405	long loff;
3406
3407	/* XXX arbitrary sanity limit on `count'. */
3408	if (uap->count > 64 * 1024)
3409		return (EINVAL);
3410	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3411		return (error);
3412	if ((fp->f_flag & FREAD) == 0) {
3413		fdrop(fp, td);
3414		return (EBADF);
3415	}
3416	vp = fp->f_vnode;
3417unionread:
3418	if (vp->v_type != VDIR) {
3419		fdrop(fp, td);
3420		return (EINVAL);
3421	}
3422	aiov.iov_base = uap->buf;
3423	aiov.iov_len = uap->count;
3424	auio.uio_iov = &aiov;
3425	auio.uio_iovcnt = 1;
3426	auio.uio_rw = UIO_READ;
3427	auio.uio_segflg = UIO_USERSPACE;
3428	auio.uio_td = td;
3429	auio.uio_resid = uap->count;
3430	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3431	loff = auio.uio_offset = fp->f_offset;
3432#ifdef MAC
3433	error = mac_check_vnode_readdir(td->td_ucred, vp);
3434	if (error) {
3435		VOP_UNLOCK(vp, 0, td);
3436		fdrop(fp, td);
3437		return (error);
3438	}
3439#endif
3440#	if (BYTE_ORDER != LITTLE_ENDIAN)
3441		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3442			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3443			    NULL, NULL);
3444			fp->f_offset = auio.uio_offset;
3445		} else
3446#	endif
3447	{
3448		kuio = auio;
3449		kuio.uio_iov = &kiov;
3450		kuio.uio_segflg = UIO_SYSSPACE;
3451		kiov.iov_len = uap->count;
3452		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3453		kiov.iov_base = dirbuf;
3454		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3455			    NULL, NULL);
3456		fp->f_offset = kuio.uio_offset;
3457		if (error == 0) {
3458			readcnt = uap->count - kuio.uio_resid;
3459			edp = (struct dirent *)&dirbuf[readcnt];
3460			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3461#				if (BYTE_ORDER == LITTLE_ENDIAN)
3462					/*
3463					 * The expected low byte of
3464					 * dp->d_namlen is our dp->d_type.
3465					 * The high MBZ byte of dp->d_namlen
3466					 * is our dp->d_namlen.
3467					 */
3468					dp->d_type = dp->d_namlen;
3469					dp->d_namlen = 0;
3470#				else
3471					/*
3472					 * The dp->d_type is the high byte
3473					 * of the expected dp->d_namlen,
3474					 * so must be zero'ed.
3475					 */
3476					dp->d_type = 0;
3477#				endif
3478				if (dp->d_reclen > 0) {
3479					dp = (struct dirent *)
3480					    ((char *)dp + dp->d_reclen);
3481				} else {
3482					error = EIO;
3483					break;
3484				}
3485			}
3486			if (dp >= edp)
3487				error = uiomove(dirbuf, readcnt, &auio);
3488		}
3489		FREE(dirbuf, M_TEMP);
3490	}
3491	VOP_UNLOCK(vp, 0, td);
3492	if (error) {
3493		fdrop(fp, td);
3494		return (error);
3495	}
3496	if (uap->count == auio.uio_resid) {
3497		if (union_dircheckp) {
3498			error = union_dircheckp(td, &vp, fp);
3499			if (error == -1)
3500				goto unionread;
3501			if (error) {
3502				fdrop(fp, td);
3503				return (error);
3504			}
3505		}
3506		/*
3507		 * XXX We could delay dropping the lock above but
3508		 * union_dircheckp complicates things.
3509		 */
3510		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3511		if ((vp->v_vflag & VV_ROOT) &&
3512		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3513			struct vnode *tvp = vp;
3514			vp = vp->v_mount->mnt_vnodecovered;
3515			VREF(vp);
3516			fp->f_vnode = vp;
3517			fp->f_data = vp;
3518			fp->f_offset = 0;
3519			vput(tvp);
3520			goto unionread;
3521		}
3522		VOP_UNLOCK(vp, 0, td);
3523	}
3524	error = copyout(&loff, uap->basep, sizeof(long));
3525	fdrop(fp, td);
3526	td->td_retval[0] = uap->count - auio.uio_resid;
3527	return (error);
3528}
3529#endif /* COMPAT_43 */
3530
3531/*
3532 * Read a block of directory entries in a filesystem independent format.
3533 */
3534#ifndef _SYS_SYSPROTO_H_
3535struct getdirentries_args {
3536	int	fd;
3537	char	*buf;
3538	u_int	count;
3539	long	*basep;
3540};
3541#endif
3542int
3543getdirentries(td, uap)
3544	struct thread *td;
3545	register struct getdirentries_args /* {
3546		int fd;
3547		char *buf;
3548		u_int count;
3549		long *basep;
3550	} */ *uap;
3551{
3552	struct vnode *vp;
3553	struct file *fp;
3554	struct uio auio;
3555	struct iovec aiov;
3556	long loff;
3557	int error, eofflag;
3558
3559	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3560		return (error);
3561	if ((fp->f_flag & FREAD) == 0) {
3562		fdrop(fp, td);
3563		return (EBADF);
3564	}
3565	vp = fp->f_vnode;
3566unionread:
3567	if (vp->v_type != VDIR) {
3568		fdrop(fp, td);
3569		return (EINVAL);
3570	}
3571	aiov.iov_base = uap->buf;
3572	aiov.iov_len = uap->count;
3573	auio.uio_iov = &aiov;
3574	auio.uio_iovcnt = 1;
3575	auio.uio_rw = UIO_READ;
3576	auio.uio_segflg = UIO_USERSPACE;
3577	auio.uio_td = td;
3578	auio.uio_resid = uap->count;
3579	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3580	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3581	loff = auio.uio_offset = fp->f_offset;
3582#ifdef MAC
3583	error = mac_check_vnode_readdir(td->td_ucred, vp);
3584	if (error == 0)
3585#endif
3586		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3587		    NULL);
3588	fp->f_offset = auio.uio_offset;
3589	VOP_UNLOCK(vp, 0, td);
3590	if (error) {
3591		fdrop(fp, td);
3592		return (error);
3593	}
3594	if (uap->count == auio.uio_resid) {
3595		if (union_dircheckp) {
3596			error = union_dircheckp(td, &vp, fp);
3597			if (error == -1)
3598				goto unionread;
3599			if (error) {
3600				fdrop(fp, td);
3601				return (error);
3602			}
3603		}
3604		/*
3605		 * XXX We could delay dropping the lock above but
3606		 * union_dircheckp complicates things.
3607		 */
3608		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3609		if ((vp->v_vflag & VV_ROOT) &&
3610		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3611			struct vnode *tvp = vp;
3612			vp = vp->v_mount->mnt_vnodecovered;
3613			VREF(vp);
3614			fp->f_vnode = vp;
3615			fp->f_data = vp;
3616			fp->f_offset = 0;
3617			vput(tvp);
3618			goto unionread;
3619		}
3620		VOP_UNLOCK(vp, 0, td);
3621	}
3622	if (uap->basep != NULL) {
3623		error = copyout(&loff, uap->basep, sizeof(long));
3624	}
3625	td->td_retval[0] = uap->count - auio.uio_resid;
3626	fdrop(fp, td);
3627	return (error);
3628}
3629#ifndef _SYS_SYSPROTO_H_
3630struct getdents_args {
3631	int fd;
3632	char *buf;
3633	size_t count;
3634};
3635#endif
3636int
3637getdents(td, uap)
3638	struct thread *td;
3639	register struct getdents_args /* {
3640		int fd;
3641		char *buf;
3642		u_int count;
3643	} */ *uap;
3644{
3645	struct getdirentries_args ap;
3646	ap.fd = uap->fd;
3647	ap.buf = uap->buf;
3648	ap.count = uap->count;
3649	ap.basep = NULL;
3650	return (getdirentries(td, &ap));
3651}
3652
3653/*
3654 * Set the mode mask for creation of filesystem nodes.
3655 *
3656 * MP SAFE
3657 */
3658#ifndef _SYS_SYSPROTO_H_
3659struct umask_args {
3660	int	newmask;
3661};
3662#endif
3663int
3664umask(td, uap)
3665	struct thread *td;
3666	struct umask_args /* {
3667		int newmask;
3668	} */ *uap;
3669{
3670	register struct filedesc *fdp;
3671
3672	FILEDESC_LOCK(td->td_proc->p_fd);
3673	fdp = td->td_proc->p_fd;
3674	td->td_retval[0] = fdp->fd_cmask;
3675	fdp->fd_cmask = uap->newmask & ALLPERMS;
3676	FILEDESC_UNLOCK(td->td_proc->p_fd);
3677	return (0);
3678}
3679
3680/*
3681 * Void all references to file by ripping underlying filesystem
3682 * away from vnode.
3683 */
3684#ifndef _SYS_SYSPROTO_H_
3685struct revoke_args {
3686	char	*path;
3687};
3688#endif
3689int
3690revoke(td, uap)
3691	struct thread *td;
3692	register struct revoke_args /* {
3693		char *path;
3694	} */ *uap;
3695{
3696	struct mount *mp;
3697	struct vnode *vp;
3698	struct vattr vattr;
3699	int error;
3700	struct nameidata nd;
3701
3702	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
3703	if ((error = namei(&nd)) != 0)
3704		return (error);
3705	vp = nd.ni_vp;
3706	NDFREE(&nd, NDF_ONLY_PNBUF);
3707	if (vp->v_type != VCHR) {
3708		vput(vp);
3709		return (EINVAL);
3710	}
3711#ifdef MAC
3712	error = mac_check_vnode_revoke(td->td_ucred, vp);
3713	if (error) {
3714		vput(vp);
3715		return (error);
3716	}
3717#endif
3718	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3719	if (error) {
3720		vput(vp);
3721		return (error);
3722	}
3723	VOP_UNLOCK(vp, 0, td);
3724	if (td->td_ucred->cr_uid != vattr.va_uid) {
3725		error = suser_cred(td->td_ucred, PRISON_ROOT);
3726		if (error)
3727			goto out;
3728	}
3729	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3730		goto out;
3731	if (vcount(vp) > 1)
3732		VOP_REVOKE(vp, REVOKEALL);
3733	vn_finished_write(mp);
3734out:
3735	vrele(vp);
3736	return (error);
3737}
3738
3739/*
3740 * Convert a user file descriptor to a kernel file entry.
3741 * A reference on the file entry is held upon returning.
3742 */
3743int
3744getvnode(fdp, fd, fpp)
3745	struct filedesc *fdp;
3746	int fd;
3747	struct file **fpp;
3748{
3749	int error;
3750	struct file *fp;
3751
3752	fp = NULL;
3753	if (fdp == NULL)
3754		error = EBADF;
3755	else {
3756		FILEDESC_LOCK(fdp);
3757		if ((u_int)fd >= fdp->fd_nfiles ||
3758		    (fp = fdp->fd_ofiles[fd]) == NULL)
3759			error = EBADF;
3760		else if (fp->f_vnode == NULL) {
3761			fp = NULL;
3762			error = EINVAL;
3763		} else {
3764			fhold(fp);
3765			error = 0;
3766		}
3767		FILEDESC_UNLOCK(fdp);
3768	}
3769	*fpp = fp;
3770	return (error);
3771}
3772
3773/*
3774 * Get (NFS) file handle
3775 */
3776#ifndef _SYS_SYSPROTO_H_
3777struct lgetfh_args {
3778	char	*fname;
3779	fhandle_t *fhp;
3780};
3781#endif
3782int
3783lgetfh(td, uap)
3784	struct thread *td;
3785	register struct lgetfh_args *uap;
3786{
3787	struct nameidata nd;
3788	fhandle_t fh;
3789	register struct vnode *vp;
3790	int error;
3791
3792	error = suser(td);
3793	if (error)
3794		return (error);
3795	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3796	error = namei(&nd);
3797	if (error)
3798		return (error);
3799	NDFREE(&nd, NDF_ONLY_PNBUF);
3800	vp = nd.ni_vp;
3801	bzero(&fh, sizeof(fh));
3802	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3803	error = VFS_VPTOFH(vp, &fh.fh_fid);
3804	vput(vp);
3805	if (error)
3806		return (error);
3807	error = copyout(&fh, uap->fhp, sizeof (fh));
3808	return (error);
3809}
3810
3811#ifndef _SYS_SYSPROTO_H_
3812struct getfh_args {
3813	char	*fname;
3814	fhandle_t *fhp;
3815};
3816#endif
3817int
3818getfh(td, uap)
3819	struct thread *td;
3820	register struct getfh_args *uap;
3821{
3822	struct nameidata nd;
3823	fhandle_t fh;
3824	register struct vnode *vp;
3825	int error;
3826
3827	error = suser(td);
3828	if (error)
3829		return (error);
3830	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
3831	error = namei(&nd);
3832	if (error)
3833		return (error);
3834	NDFREE(&nd, NDF_ONLY_PNBUF);
3835	vp = nd.ni_vp;
3836	bzero(&fh, sizeof(fh));
3837	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3838	error = VFS_VPTOFH(vp, &fh.fh_fid);
3839	vput(vp);
3840	if (error)
3841		return (error);
3842	error = copyout(&fh, uap->fhp, sizeof (fh));
3843	return (error);
3844}
3845
3846/*
3847 * syscall for the rpc.lockd to use to translate a NFS file handle into
3848 * an open descriptor.
3849 *
3850 * warning: do not remove the suser() call or this becomes one giant
3851 * security hole.
3852 */
3853#ifndef _SYS_SYSPROTO_H_
3854struct fhopen_args {
3855	const struct fhandle *u_fhp;
3856	int flags;
3857};
3858#endif
3859int
3860fhopen(td, uap)
3861	struct thread *td;
3862	struct fhopen_args /* {
3863		const struct fhandle *u_fhp;
3864		int flags;
3865	} */ *uap;
3866{
3867	struct proc *p = td->td_proc;
3868	struct mount *mp;
3869	struct vnode *vp;
3870	struct fhandle fhp;
3871	struct vattr vat;
3872	struct vattr *vap = &vat;
3873	struct flock lf;
3874	struct file *fp;
3875	register struct filedesc *fdp = p->p_fd;
3876	int fmode, mode, error, type;
3877	struct file *nfp;
3878	int indx;
3879
3880	error = suser(td);
3881	if (error)
3882		return (error);
3883	fmode = FFLAGS(uap->flags);
3884	/* why not allow a non-read/write open for our lockd? */
3885	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3886		return (EINVAL);
3887	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3888	if (error)
3889		return(error);
3890	/* find the mount point */
3891	mp = vfs_getvfs(&fhp.fh_fsid);
3892	if (mp == NULL)
3893		return (ESTALE);
3894	/* now give me my vnode, it gets returned to me locked */
3895	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3896	if (error)
3897		return (error);
3898	/*
3899	 * from now on we have to make sure not
3900	 * to forget about the vnode
3901	 * any error that causes an abort must vput(vp)
3902	 * just set error = err and 'goto bad;'.
3903	 */
3904
3905	/*
3906	 * from vn_open
3907	 */
3908	if (vp->v_type == VLNK) {
3909		error = EMLINK;
3910		goto bad;
3911	}
3912	if (vp->v_type == VSOCK) {
3913		error = EOPNOTSUPP;
3914		goto bad;
3915	}
3916	mode = 0;
3917	if (fmode & (FWRITE | O_TRUNC)) {
3918		if (vp->v_type == VDIR) {
3919			error = EISDIR;
3920			goto bad;
3921		}
3922		error = vn_writechk(vp);
3923		if (error)
3924			goto bad;
3925		mode |= VWRITE;
3926	}
3927	if (fmode & FREAD)
3928		mode |= VREAD;
3929	if (fmode & O_APPEND)
3930		mode |= VAPPEND;
3931#ifdef MAC
3932	error = mac_check_vnode_open(td->td_ucred, vp, mode);
3933	if (error)
3934		goto bad;
3935#endif
3936	if (mode) {
3937		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
3938		if (error)
3939			goto bad;
3940	}
3941	if (fmode & O_TRUNC) {
3942		VOP_UNLOCK(vp, 0, td);				/* XXX */
3943		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
3944			vrele(vp);
3945			return (error);
3946		}
3947		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3948		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
3949#ifdef MAC
3950		/*
3951		 * We don't yet have fp->f_cred, so use td->td_ucred, which
3952		 * should be right.
3953		 */
3954		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
3955		if (error == 0) {
3956#endif
3957			VATTR_NULL(vap);
3958			vap->va_size = 0;
3959			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
3960#ifdef MAC
3961		}
3962#endif
3963		vn_finished_write(mp);
3964		if (error)
3965			goto bad;
3966	}
3967	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
3968	if (error)
3969		goto bad;
3970	/*
3971	 * Make sure that a VM object is created for VMIO support.
3972	 */
3973	if (vn_canvmio(vp) == TRUE) {
3974		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
3975			goto bad;
3976	}
3977	if (fmode & FWRITE)
3978		vp->v_writecount++;
3979
3980	/*
3981	 * end of vn_open code
3982	 */
3983
3984	if ((error = falloc(td, &nfp, &indx)) != 0) {
3985		if (fmode & FWRITE)
3986			vp->v_writecount--;
3987		goto bad;
3988	}
3989	/* An extra reference on `nfp' has been held for us by falloc(). */
3990	fp = nfp;
3991
3992	nfp->f_vnode = vp;
3993	nfp->f_data = vp;
3994	nfp->f_flag = fmode & FMASK;
3995	nfp->f_ops = &vnops;
3996	nfp->f_type = DTYPE_VNODE;
3997	if (fmode & (O_EXLOCK | O_SHLOCK)) {
3998		lf.l_whence = SEEK_SET;
3999		lf.l_start = 0;
4000		lf.l_len = 0;
4001		if (fmode & O_EXLOCK)
4002			lf.l_type = F_WRLCK;
4003		else
4004			lf.l_type = F_RDLCK;
4005		type = F_FLOCK;
4006		if ((fmode & FNONBLOCK) == 0)
4007			type |= F_WAIT;
4008		VOP_UNLOCK(vp, 0, td);
4009		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4010			    type)) != 0) {
4011			/*
4012			 * The lock request failed.  Normally close the
4013			 * descriptor but handle the case where someone might
4014			 * have dup()d or close()d it when we weren't looking.
4015			 */
4016			FILEDESC_LOCK(fdp);
4017			if (fdp->fd_ofiles[indx] == fp) {
4018				fdp->fd_ofiles[indx] = NULL;
4019				fdunused(fdp, indx);
4020				FILEDESC_UNLOCK(fdp);
4021				fdrop(fp, td);
4022			} else {
4023				FILEDESC_UNLOCK(fdp);
4024			}
4025			/*
4026			 * release our private reference
4027			 */
4028			fdrop(fp, td);
4029			return(error);
4030		}
4031		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4032		fp->f_flag |= FHASLOCK;
4033	}
4034	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
4035		vfs_object_create(vp, td, td->td_ucred);
4036
4037	VOP_UNLOCK(vp, 0, td);
4038	fdrop(fp, td);
4039	td->td_retval[0] = indx;
4040	return (0);
4041
4042bad:
4043	vput(vp);
4044	return (error);
4045}
4046
4047/*
4048 * Stat an (NFS) file handle.
4049 */
4050#ifndef _SYS_SYSPROTO_H_
4051struct fhstat_args {
4052	struct fhandle *u_fhp;
4053	struct stat *sb;
4054};
4055#endif
4056int
4057fhstat(td, uap)
4058	struct thread *td;
4059	register struct fhstat_args /* {
4060		struct fhandle *u_fhp;
4061		struct stat *sb;
4062	} */ *uap;
4063{
4064	struct stat sb;
4065	fhandle_t fh;
4066	struct mount *mp;
4067	struct vnode *vp;
4068	int error;
4069
4070	error = suser(td);
4071	if (error)
4072		return (error);
4073	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4074	if (error)
4075		return (error);
4076	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4077		return (ESTALE);
4078	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4079		return (error);
4080	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4081	vput(vp);
4082	if (error)
4083		return (error);
4084	error = copyout(&sb, uap->sb, sizeof(sb));
4085	return (error);
4086}
4087
4088/*
4089 * Implement fstatfs() for (NFS) file handles.
4090 */
4091#ifndef _SYS_SYSPROTO_H_
4092struct fhstatfs_args {
4093	struct fhandle *u_fhp;
4094	struct statfs *buf;
4095};
4096#endif
4097int
4098fhstatfs(td, uap)
4099	struct thread *td;
4100	struct fhstatfs_args /* {
4101		struct fhandle *u_fhp;
4102		struct statfs *buf;
4103	} */ *uap;
4104{
4105	struct statfs *sp, sb;
4106	struct mount *mp;
4107	struct vnode *vp;
4108	fhandle_t fh;
4109	int error;
4110
4111	error = suser(td);
4112	if (error)
4113		return (error);
4114	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
4115		return (error);
4116	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4117		return (ESTALE);
4118	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4119		return (error);
4120	mp = vp->v_mount;
4121	sp = &mp->mnt_stat;
4122	vput(vp);
4123#ifdef MAC
4124	error = mac_check_mount_stat(td->td_ucred, mp);
4125	if (error)
4126		return (error);
4127#endif
4128	/*
4129	 * Set these in case the underlying filesystem fails to do so.
4130	 */
4131	sp->f_version = STATFS_VERSION;
4132	sp->f_namemax = NAME_MAX;
4133	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4134	if ((error = VFS_STATFS(mp, sp, td)) != 0)
4135		return (error);
4136	if (suser(td)) {
4137		bcopy(sp, &sb, sizeof(sb));
4138		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
4139		sp = &sb;
4140	}
4141	return (copyout(sp, uap->buf, sizeof(*sp)));
4142}
4143
4144/*
4145 * Syscall to push extended attribute configuration information into the
4146 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4147 * a command (int cmd), and attribute name and misc data.  For now, the
4148 * attribute name is left in userspace for consumption by the VFS_op.
4149 * It will probably be changed to be copied into sysspace by the
4150 * syscall in the future, once issues with various consumers of the
4151 * attribute code have raised their hands.
4152 *
4153 * Currently this is used only by UFS Extended Attributes.
4154 */
4155int
4156extattrctl(td, uap)
4157	struct thread *td;
4158	struct extattrctl_args /* {
4159		const char *path;
4160		int cmd;
4161		const char *filename;
4162		int attrnamespace;
4163		const char *attrname;
4164	} */ *uap;
4165{
4166	struct vnode *filename_vp;
4167	struct nameidata nd;
4168	struct mount *mp, *mp_writable;
4169	char attrname[EXTATTR_MAXNAMELEN];
4170	int error;
4171
4172	/*
4173	 * uap->attrname is not always defined.  We check again later when we
4174	 * invoke the VFS call so as to pass in NULL there if needed.
4175	 */
4176	if (uap->attrname != NULL) {
4177		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4178		    NULL);
4179		if (error)
4180			return (error);
4181	}
4182
4183	/*
4184	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4185	 * which VFS_EXTATTRCTL() will later release.
4186	 */
4187	filename_vp = NULL;
4188	if (uap->filename != NULL) {
4189		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
4190		    uap->filename, td);
4191		error = namei(&nd);
4192		if (error)
4193			return (error);
4194		filename_vp = nd.ni_vp;
4195		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4196	}
4197
4198	/* uap->path is always defined. */
4199	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4200	error = namei(&nd);
4201	if (error) {
4202		if (filename_vp != NULL)
4203			vput(filename_vp);
4204		return (error);
4205	}
4206	mp = nd.ni_vp->v_mount;
4207	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4208	NDFREE(&nd, 0);
4209	if (error) {
4210		if (filename_vp != NULL)
4211			vput(filename_vp);
4212		return (error);
4213	}
4214
4215	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4216	    uap->attrname != NULL ? attrname : NULL, td);
4217
4218	vn_finished_write(mp_writable);
4219	/*
4220	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4221	 * filename_vp, so vrele it if it is defined.
4222	 */
4223	if (filename_vp != NULL)
4224		vrele(filename_vp);
4225	return (error);
4226}
4227
4228/*-
4229 * Set a named extended attribute on a file or directory
4230 *
4231 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4232 *            kernelspace string pointer "attrname", userspace buffer
4233 *            pointer "data", buffer length "nbytes", thread "td".
4234 * Returns: 0 on success, an error number otherwise
4235 * Locks: none
4236 * References: vp must be a valid reference for the duration of the call
4237 */
4238static int
4239extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4240    void *data, size_t nbytes, struct thread *td)
4241{
4242	struct mount *mp;
4243	struct uio auio;
4244	struct iovec aiov;
4245	ssize_t cnt;
4246	int error;
4247
4248	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4249	if (error)
4250		return (error);
4251	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4252	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4253
4254	aiov.iov_base = data;
4255	aiov.iov_len = nbytes;
4256	auio.uio_iov = &aiov;
4257	auio.uio_iovcnt = 1;
4258	auio.uio_offset = 0;
4259	if (nbytes > INT_MAX) {
4260		error = EINVAL;
4261		goto done;
4262	}
4263	auio.uio_resid = nbytes;
4264	auio.uio_rw = UIO_WRITE;
4265	auio.uio_segflg = UIO_USERSPACE;
4266	auio.uio_td = td;
4267	cnt = nbytes;
4268
4269#ifdef MAC
4270	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4271	    attrname, &auio);
4272	if (error)
4273		goto done;
4274#endif
4275
4276	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4277	    td->td_ucred, td);
4278	cnt -= auio.uio_resid;
4279	td->td_retval[0] = cnt;
4280
4281done:
4282	VOP_UNLOCK(vp, 0, td);
4283	vn_finished_write(mp);
4284	return (error);
4285}
4286
4287int
4288extattr_set_fd(td, uap)
4289	struct thread *td;
4290	struct extattr_set_fd_args /* {
4291		int fd;
4292		int attrnamespace;
4293		const char *attrname;
4294		void *data;
4295		size_t nbytes;
4296	} */ *uap;
4297{
4298	struct file *fp;
4299	char attrname[EXTATTR_MAXNAMELEN];
4300	int error;
4301
4302	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4303	if (error)
4304		return (error);
4305
4306	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4307	if (error)
4308		return (error);
4309
4310	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4311	    attrname, uap->data, uap->nbytes, td);
4312	fdrop(fp, td);
4313
4314	return (error);
4315}
4316
4317int
4318extattr_set_file(td, uap)
4319	struct thread *td;
4320	struct extattr_set_file_args /* {
4321		const char *path;
4322		int attrnamespace;
4323		const char *attrname;
4324		void *data;
4325		size_t nbytes;
4326	} */ *uap;
4327{
4328	struct nameidata nd;
4329	char attrname[EXTATTR_MAXNAMELEN];
4330	int error;
4331
4332	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4333	if (error)
4334		return (error);
4335
4336	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4337	error = namei(&nd);
4338	if (error)
4339		return (error);
4340	NDFREE(&nd, NDF_ONLY_PNBUF);
4341
4342	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4343	    uap->data, uap->nbytes, td);
4344
4345	vrele(nd.ni_vp);
4346	return (error);
4347}
4348
4349int
4350extattr_set_link(td, uap)
4351	struct thread *td;
4352	struct extattr_set_link_args /* {
4353		const char *path;
4354		int attrnamespace;
4355		const char *attrname;
4356		void *data;
4357		size_t nbytes;
4358	} */ *uap;
4359{
4360	struct nameidata nd;
4361	char attrname[EXTATTR_MAXNAMELEN];
4362	int error;
4363
4364	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4365	if (error)
4366		return (error);
4367
4368	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4369	error = namei(&nd);
4370	if (error)
4371		return (error);
4372	NDFREE(&nd, NDF_ONLY_PNBUF);
4373
4374	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4375	    uap->data, uap->nbytes, td);
4376
4377	vrele(nd.ni_vp);
4378	return (error);
4379}
4380
4381/*-
4382 * Get a named extended attribute on a file or directory
4383 *
4384 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4385 *            kernelspace string pointer "attrname", userspace buffer
4386 *            pointer "data", buffer length "nbytes", thread "td".
4387 * Returns: 0 on success, an error number otherwise
4388 * Locks: none
4389 * References: vp must be a valid reference for the duration of the call
4390 */
4391static int
4392extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4393    void *data, size_t nbytes, struct thread *td)
4394{
4395	struct uio auio, *auiop;
4396	struct iovec aiov;
4397	ssize_t cnt;
4398	size_t size, *sizep;
4399	int error;
4400
4401	/*
4402	 * XXX: Temporary API compatibility for applications that know
4403	 * about this hack ("" means list), but haven't been updated
4404	 * for the extattr_list_*() system calls yet.  This will go
4405	 * away for FreeBSD 5.3.
4406	 */
4407	if (strlen(attrname) == 0)
4408		return (extattr_list_vp(vp, attrnamespace, data, nbytes, td));
4409
4410	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4411	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4412
4413	/*
4414	 * Slightly unusual semantics: if the user provides a NULL data
4415	 * pointer, they don't want to receive the data, just the
4416	 * maximum read length.
4417	 */
4418	auiop = NULL;
4419	sizep = NULL;
4420	cnt = 0;
4421	if (data != NULL) {
4422		aiov.iov_base = data;
4423		aiov.iov_len = nbytes;
4424		auio.uio_iov = &aiov;
4425		auio.uio_offset = 0;
4426		if (nbytes > INT_MAX) {
4427			error = EINVAL;
4428			goto done;
4429		}
4430		auio.uio_resid = nbytes;
4431		auio.uio_rw = UIO_READ;
4432		auio.uio_segflg = UIO_USERSPACE;
4433		auio.uio_td = td;
4434		auiop = &auio;
4435		cnt = nbytes;
4436	} else {
4437		sizep = &size;
4438	}
4439
4440#ifdef MAC
4441	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4442	    attrname, &auio);
4443	if (error)
4444		goto done;
4445#endif
4446
4447	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4448	    td->td_ucred, td);
4449
4450	if (auiop != NULL) {
4451		cnt -= auio.uio_resid;
4452		td->td_retval[0] = cnt;
4453	} else {
4454		td->td_retval[0] = size;
4455	}
4456
4457done:
4458	VOP_UNLOCK(vp, 0, td);
4459	return (error);
4460}
4461
4462int
4463extattr_get_fd(td, uap)
4464	struct thread *td;
4465	struct extattr_get_fd_args /* {
4466		int fd;
4467		int attrnamespace;
4468		const char *attrname;
4469		void *data;
4470		size_t nbytes;
4471	} */ *uap;
4472{
4473	struct file *fp;
4474	char attrname[EXTATTR_MAXNAMELEN];
4475	int error;
4476
4477	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4478	if (error)
4479		return (error);
4480
4481	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4482	if (error)
4483		return (error);
4484
4485	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4486	    attrname, uap->data, uap->nbytes, td);
4487
4488	fdrop(fp, td);
4489	return (error);
4490}
4491
4492int
4493extattr_get_file(td, uap)
4494	struct thread *td;
4495	struct extattr_get_file_args /* {
4496		const char *path;
4497		int attrnamespace;
4498		const char *attrname;
4499		void *data;
4500		size_t nbytes;
4501	} */ *uap;
4502{
4503	struct nameidata nd;
4504	char attrname[EXTATTR_MAXNAMELEN];
4505	int error;
4506
4507	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4508	if (error)
4509		return (error);
4510
4511	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4512	error = namei(&nd);
4513	if (error)
4514		return (error);
4515	NDFREE(&nd, NDF_ONLY_PNBUF);
4516
4517	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4518	    uap->data, uap->nbytes, td);
4519
4520	vrele(nd.ni_vp);
4521	return (error);
4522}
4523
4524int
4525extattr_get_link(td, uap)
4526	struct thread *td;
4527	struct extattr_get_link_args /* {
4528		const char *path;
4529		int attrnamespace;
4530		const char *attrname;
4531		void *data;
4532		size_t nbytes;
4533	} */ *uap;
4534{
4535	struct nameidata nd;
4536	char attrname[EXTATTR_MAXNAMELEN];
4537	int error;
4538
4539	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4540	if (error)
4541		return (error);
4542
4543	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4544	error = namei(&nd);
4545	if (error)
4546		return (error);
4547	NDFREE(&nd, NDF_ONLY_PNBUF);
4548
4549	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4550	    uap->data, uap->nbytes, td);
4551
4552	vrele(nd.ni_vp);
4553	return (error);
4554}
4555
4556/*
4557 * extattr_delete_vp(): Delete a named extended attribute on a file or
4558 *                      directory
4559 *
4560 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4561 *            kernelspace string pointer "attrname", proc "p"
4562 * Returns: 0 on success, an error number otherwise
4563 * Locks: none
4564 * References: vp must be a valid reference for the duration of the call
4565 */
4566static int
4567extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4568    struct thread *td)
4569{
4570	struct mount *mp;
4571	int error;
4572
4573	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4574	if (error)
4575		return (error);
4576	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4577	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4578
4579#ifdef MAC
4580	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4581	    attrname);
4582	if (error)
4583		goto done;
4584#endif
4585
4586	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4587	    td);
4588	if (error == EOPNOTSUPP)
4589		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4590		    td->td_ucred, td);
4591#ifdef MAC
4592done:
4593#endif
4594	VOP_UNLOCK(vp, 0, td);
4595	vn_finished_write(mp);
4596	return (error);
4597}
4598
4599int
4600extattr_delete_fd(td, uap)
4601	struct thread *td;
4602	struct extattr_delete_fd_args /* {
4603		int fd;
4604		int attrnamespace;
4605		const char *attrname;
4606	} */ *uap;
4607{
4608	struct file *fp;
4609	struct vnode *vp;
4610	char attrname[EXTATTR_MAXNAMELEN];
4611	int error;
4612
4613	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4614	if (error)
4615		return (error);
4616
4617	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4618	if (error)
4619		return (error);
4620	vp = fp->f_vnode;
4621
4622	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4623	fdrop(fp, td);
4624	return (error);
4625}
4626
4627int
4628extattr_delete_file(td, uap)
4629	struct thread *td;
4630	struct extattr_delete_file_args /* {
4631		const char *path;
4632		int attrnamespace;
4633		const char *attrname;
4634	} */ *uap;
4635{
4636	struct nameidata nd;
4637	char attrname[EXTATTR_MAXNAMELEN];
4638	int error;
4639
4640	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4641	if (error)
4642		return(error);
4643
4644	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4645	error = namei(&nd);
4646	if (error)
4647		return(error);
4648	NDFREE(&nd, NDF_ONLY_PNBUF);
4649
4650	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4651	vrele(nd.ni_vp);
4652	return(error);
4653}
4654
4655int
4656extattr_delete_link(td, uap)
4657	struct thread *td;
4658	struct extattr_delete_link_args /* {
4659		const char *path;
4660		int attrnamespace;
4661		const char *attrname;
4662	} */ *uap;
4663{
4664	struct nameidata nd;
4665	char attrname[EXTATTR_MAXNAMELEN];
4666	int error;
4667
4668	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4669	if (error)
4670		return(error);
4671
4672	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4673	error = namei(&nd);
4674	if (error)
4675		return(error);
4676	NDFREE(&nd, NDF_ONLY_PNBUF);
4677
4678	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4679	vrele(nd.ni_vp);
4680	return(error);
4681}
4682
4683/*-
4684 * Retrieve a list of extended attributes on a file or directory.
4685 *
4686 * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4687 *            userspace buffer pointer "data", buffer length "nbytes",
4688 *            thread "td".
4689 * Returns: 0 on success, an error number otherwise
4690 * Locks: none
4691 * References: vp must be a valid reference for the duration of the call
4692 */
4693static int
4694extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4695    size_t nbytes, struct thread *td)
4696{
4697	struct uio auio, *auiop;
4698	size_t size, *sizep;
4699	struct iovec aiov;
4700	ssize_t cnt;
4701	int error;
4702
4703	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4704	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4705
4706	auiop = NULL;
4707	sizep = NULL;
4708	cnt = 0;
4709	if (data != NULL) {
4710		aiov.iov_base = data;
4711		aiov.iov_len = nbytes;
4712		auio.uio_iov = &aiov;
4713		auio.uio_offset = 0;
4714		if (nbytes > INT_MAX) {
4715			error = EINVAL;
4716			goto done;
4717		}
4718		auio.uio_resid = nbytes;
4719		auio.uio_rw = UIO_READ;
4720		auio.uio_segflg = UIO_USERSPACE;
4721		auio.uio_td = td;
4722		auiop = &auio;
4723		cnt = nbytes;
4724	} else {
4725		sizep = &size;
4726	}
4727
4728#ifdef MAC
4729	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4730	if (error)
4731		goto done;
4732#endif
4733
4734	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4735	    td->td_ucred, td);
4736
4737	if (auiop != NULL) {
4738		cnt -= auio.uio_resid;
4739		td->td_retval[0] = cnt;
4740	} else {
4741		td->td_retval[0] = size;
4742	}
4743
4744done:
4745	VOP_UNLOCK(vp, 0, td);
4746	return (error);
4747}
4748
4749
4750int
4751extattr_list_fd(td, uap)
4752	struct thread *td;
4753	struct extattr_list_fd_args /* {
4754		int fd;
4755		int attrnamespace;
4756		void *data;
4757		size_t nbytes;
4758	} */ *uap;
4759{
4760	struct file *fp;
4761	int error;
4762
4763	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4764	if (error)
4765		return (error);
4766
4767	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4768	    uap->nbytes, td);
4769
4770	fdrop(fp, td);
4771	return (error);
4772}
4773
4774int
4775extattr_list_file(td, uap)
4776	struct thread*td;
4777	struct extattr_list_file_args /* {
4778		const char *path;
4779		int attrnamespace;
4780		void *data;
4781		size_t nbytes;
4782	} */ *uap;
4783{
4784	struct nameidata nd;
4785	int error;
4786
4787	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4788	error = namei(&nd);
4789	if (error)
4790		return (error);
4791	NDFREE(&nd, NDF_ONLY_PNBUF);
4792
4793	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4794	    uap->nbytes, td);
4795
4796	vrele(nd.ni_vp);
4797	return (error);
4798}
4799
4800int
4801extattr_list_link(td, uap)
4802	struct thread*td;
4803	struct extattr_list_link_args /* {
4804		const char *path;
4805		int attrnamespace;
4806		void *data;
4807		size_t nbytes;
4808	} */ *uap;
4809{
4810	struct nameidata nd;
4811	int error;
4812
4813	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4814	error = namei(&nd);
4815	if (error)
4816		return (error);
4817	NDFREE(&nd, NDF_ONLY_PNBUF);
4818
4819	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4820	    uap->nbytes, td);
4821
4822	vrele(nd.ni_vp);
4823	return (error);
4824}
4825