vfs_extattr.c revision 146506
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/vfs_extattr.c 146506 2005-05-22 23:05:27Z pjd $");
39
40#include "opt_compat.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/sysent.h>
48#include <sys/mac.h>
49#include <sys/malloc.h>
50#include <sys/mount.h>
51#include <sys/mutex.h>
52#include <sys/sysproto.h>
53#include <sys/namei.h>
54#include <sys/filedesc.h>
55#include <sys/kernel.h>
56#include <sys/fcntl.h>
57#include <sys/file.h>
58#include <sys/limits.h>
59#include <sys/linker.h>
60#include <sys/stat.h>
61#include <sys/sx.h>
62#include <sys/unistd.h>
63#include <sys/vnode.h>
64#include <sys/proc.h>
65#include <sys/dirent.h>
66#include <sys/extattr.h>
67#include <sys/jail.h>
68#include <sys/syscallsubr.h>
69#include <sys/sysctl.h>
70
71#include <machine/stdarg.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/uma.h>
77
78static int chroot_refuse_vdir_fds(struct filedesc *fdp);
79static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
80static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
81static int setfmode(struct thread *td, struct vnode *, int);
82static int setfflags(struct thread *td, struct vnode *, int);
83static int setutimes(struct thread *td, struct vnode *,
84    const struct timespec *, int, int);
85static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
86    struct thread *td);
87
88static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
89    size_t nbytes, struct thread *td);
90
91int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
92
93/*
94 * The module initialization routine for POSIX asynchronous I/O will
95 * set this to the version of AIO that it implements.  (Zero means
96 * that it is not implemented.)  This value is used here by pathconf()
97 * and in kern_descrip.c by fpathconf().
98 */
99int async_io_version;
100
101/*
102 * Sync each mounted filesystem.
103 */
104#ifndef _SYS_SYSPROTO_H_
105struct sync_args {
106	int     dummy;
107};
108#endif
109
110#ifdef DEBUG
111static int syncprt = 0;
112SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
113#endif
114
115/* ARGSUSED */
116int
117sync(td, uap)
118	struct thread *td;
119	struct sync_args *uap;
120{
121	struct mount *mp, *nmp;
122	int asyncflag;
123
124	mtx_lock(&Giant);
125	mtx_lock(&mountlist_mtx);
126	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
127		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
128			nmp = TAILQ_NEXT(mp, mnt_list);
129			continue;
130		}
131		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
132		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
133			asyncflag = mp->mnt_flag & MNT_ASYNC;
134			mp->mnt_flag &= ~MNT_ASYNC;
135			vfs_msync(mp, MNT_NOWAIT);
136			VFS_SYNC(mp, MNT_NOWAIT, td);
137			mp->mnt_flag |= asyncflag;
138			vn_finished_write(mp);
139		}
140		mtx_lock(&mountlist_mtx);
141		nmp = TAILQ_NEXT(mp, mnt_list);
142		vfs_unbusy(mp, td);
143	}
144	mtx_unlock(&mountlist_mtx);
145#if 0
146/*
147 * XXX don't call vfs_bufstats() yet because that routine
148 * was not imported in the Lite2 merge.
149 */
150#ifdef DIAGNOSTIC
151	if (syncprt)
152		vfs_bufstats();
153#endif /* DIAGNOSTIC */
154#endif
155	mtx_unlock(&Giant);
156	return (0);
157}
158
159/* XXX PRISON: could be per prison flag */
160static int prison_quotas;
161#if 0
162SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
163#endif
164
165/*
166 * Change filesystem quotas.
167 */
168#ifndef _SYS_SYSPROTO_H_
169struct quotactl_args {
170	char *path;
171	int cmd;
172	int uid;
173	caddr_t arg;
174};
175#endif
176int
177quotactl(td, uap)
178	struct thread *td;
179	register struct quotactl_args /* {
180		char *path;
181		int cmd;
182		int uid;
183		caddr_t arg;
184	} */ *uap;
185{
186	struct mount *mp, *vmp;
187	int error;
188	struct nameidata nd;
189
190	if (jailed(td->td_ucred) && !prison_quotas)
191		return (EPERM);
192	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
193	if ((error = namei(&nd)) != 0)
194		return (error);
195	NDFREE(&nd, NDF_ONLY_PNBUF);
196	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
197	mp = nd.ni_vp->v_mount;
198	vrele(nd.ni_vp);
199	if (error)
200		return (error);
201	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
202	vn_finished_write(vmp);
203	return (error);
204}
205
206/*
207 * Get filesystem statistics.
208 */
209#ifndef _SYS_SYSPROTO_H_
210struct statfs_args {
211	char *path;
212	struct statfs *buf;
213};
214#endif
215int
216statfs(td, uap)
217	struct thread *td;
218	register struct statfs_args /* {
219		char *path;
220		struct statfs *buf;
221	} */ *uap;
222{
223	struct statfs sf;
224	int error;
225
226	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
227	if (error == 0)
228		error = copyout(&sf, uap->buf, sizeof(sf));
229	return (error);
230}
231
232int
233kern_statfs(struct thread *td, char *path, enum uio_seg pathseg,
234    struct statfs *buf)
235{
236	struct mount *mp;
237	struct statfs *sp, sb;
238	int error;
239	struct nameidata nd;
240
241	mtx_lock(&Giant);
242	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
243	if ((error = namei(&nd)) != 0) {
244		mtx_unlock(&Giant);
245		return (error);
246	}
247	mp = nd.ni_vp->v_mount;
248	sp = &mp->mnt_stat;
249	NDFREE(&nd, NDF_ONLY_PNBUF);
250	vrele(nd.ni_vp);
251#ifdef MAC
252	error = mac_check_mount_stat(td->td_ucred, mp);
253	if (error) {
254		mtx_unlock(&Giant);
255		return (error);
256	}
257#endif
258	/*
259	 * Set these in case the underlying filesystem fails to do so.
260	 */
261	sp->f_version = STATFS_VERSION;
262	sp->f_namemax = NAME_MAX;
263	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
264	error = VFS_STATFS(mp, sp, td);
265	mtx_unlock(&Giant);
266	if (error)
267		return (error);
268	if (suser(td)) {
269		bcopy(sp, &sb, sizeof(sb));
270		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
271		sp = &sb;
272	}
273	*buf = *sp;
274	return (0);
275}
276
277/*
278 * Get filesystem statistics.
279 */
280#ifndef _SYS_SYSPROTO_H_
281struct fstatfs_args {
282	int fd;
283	struct statfs *buf;
284};
285#endif
286int
287fstatfs(td, uap)
288	struct thread *td;
289	register struct fstatfs_args /* {
290		int fd;
291		struct statfs *buf;
292	} */ *uap;
293{
294	struct statfs sf;
295	int error;
296
297	error = kern_fstatfs(td, uap->fd, &sf);
298	if (error == 0)
299		error = copyout(&sf, uap->buf, sizeof(sf));
300	return (error);
301}
302
303int
304kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
305{
306	struct file *fp;
307	struct mount *mp;
308	struct statfs *sp, sb;
309	int error;
310
311	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
312		return (error);
313	mtx_lock(&Giant);
314	mp = fp->f_vnode->v_mount;
315	fdrop(fp, td);
316	if (mp == NULL) {
317		mtx_unlock(&Giant);
318		return (EBADF);
319	}
320#ifdef MAC
321	error = mac_check_mount_stat(td->td_ucred, mp);
322	if (error) {
323		mtx_unlock(&Giant);
324		return (error);
325	}
326#endif
327	sp = &mp->mnt_stat;
328	/*
329	 * Set these in case the underlying filesystem fails to do so.
330	 */
331	sp->f_version = STATFS_VERSION;
332	sp->f_namemax = NAME_MAX;
333	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
334	error = VFS_STATFS(mp, sp, td);
335	mtx_unlock(&Giant);
336	if (error)
337		return (error);
338	if (suser(td)) {
339		bcopy(sp, &sb, sizeof(sb));
340		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
341		sp = &sb;
342	}
343	*buf = *sp;
344	return (0);
345}
346
347/*
348 * Get statistics on all filesystems.
349 */
350#ifndef _SYS_SYSPROTO_H_
351struct getfsstat_args {
352	struct statfs *buf;
353	long bufsize;
354	int flags;
355};
356#endif
357int
358getfsstat(td, uap)
359	struct thread *td;
360	register struct getfsstat_args /* {
361		struct statfs *buf;
362		long bufsize;
363		int flags;
364	} */ *uap;
365{
366	struct mount *mp, *nmp;
367	struct statfs *sp, sb;
368	caddr_t sfsp;
369	long count, maxcount, error;
370
371	maxcount = uap->bufsize / sizeof(struct statfs);
372	sfsp = (caddr_t)uap->buf;
373	count = 0;
374	mtx_lock(&Giant);
375	mtx_lock(&mountlist_mtx);
376	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
377		if (!prison_check_mount(td->td_ucred, mp)) {
378			nmp = TAILQ_NEXT(mp, mnt_list);
379			continue;
380		}
381#ifdef MAC
382		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
383			nmp = TAILQ_NEXT(mp, mnt_list);
384			continue;
385		}
386#endif
387		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
388			nmp = TAILQ_NEXT(mp, mnt_list);
389			continue;
390		}
391		if (sfsp && count < maxcount) {
392			sp = &mp->mnt_stat;
393			/*
394			 * Set these in case the underlying filesystem
395			 * fails to do so.
396			 */
397			sp->f_version = STATFS_VERSION;
398			sp->f_namemax = NAME_MAX;
399			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
400			/*
401			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
402			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
403			 * overrides MNT_WAIT.
404			 */
405			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
406			    (uap->flags & MNT_WAIT)) &&
407			    (error = VFS_STATFS(mp, sp, td))) {
408				mtx_lock(&mountlist_mtx);
409				nmp = TAILQ_NEXT(mp, mnt_list);
410				vfs_unbusy(mp, td);
411				continue;
412			}
413			if (suser(td)) {
414				bcopy(sp, &sb, sizeof(sb));
415				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
416				sp = &sb;
417			}
418			error = copyout(sp, sfsp, sizeof(*sp));
419			if (error) {
420				vfs_unbusy(mp, td);
421				mtx_unlock(&Giant);
422				return (error);
423			}
424			sfsp += sizeof(*sp);
425		}
426		count++;
427		mtx_lock(&mountlist_mtx);
428		nmp = TAILQ_NEXT(mp, mnt_list);
429		vfs_unbusy(mp, td);
430	}
431	mtx_unlock(&mountlist_mtx);
432	mtx_unlock(&Giant);
433	if (sfsp && count > maxcount)
434		td->td_retval[0] = maxcount;
435	else
436		td->td_retval[0] = count;
437	return (0);
438}
439
440#ifdef COMPAT_FREEBSD4
441/*
442 * Get old format filesystem statistics.
443 */
444static void cvtstatfs(struct thread *, struct statfs *, struct ostatfs *);
445
446#ifndef _SYS_SYSPROTO_H_
447struct freebsd4_statfs_args {
448	char *path;
449	struct ostatfs *buf;
450};
451#endif
452int
453freebsd4_statfs(td, uap)
454	struct thread *td;
455	struct freebsd4_statfs_args /* {
456		char *path;
457		struct ostatfs *buf;
458	} */ *uap;
459{
460	struct ostatfs osb;
461	struct statfs sf;
462	int error;
463
464	error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf);
465	if (error)
466		return (error);
467	cvtstatfs(td, &sf, &osb);
468	return (copyout(&osb, uap->buf, sizeof(osb)));
469}
470
471/*
472 * Get filesystem statistics.
473 */
474#ifndef _SYS_SYSPROTO_H_
475struct freebsd4_fstatfs_args {
476	int fd;
477	struct ostatfs *buf;
478};
479#endif
480int
481freebsd4_fstatfs(td, uap)
482	struct thread *td;
483	struct freebsd4_fstatfs_args /* {
484		int fd;
485		struct ostatfs *buf;
486	} */ *uap;
487{
488	struct ostatfs osb;
489	struct statfs sf;
490	int error;
491
492	error = kern_fstatfs(td, uap->fd, &sf);
493	if (error)
494		return (error);
495	cvtstatfs(td, &sf, &osb);
496	return (copyout(&osb, uap->buf, sizeof(osb)));
497}
498
499/*
500 * Get statistics on all filesystems.
501 */
502#ifndef _SYS_SYSPROTO_H_
503struct freebsd4_getfsstat_args {
504	struct ostatfs *buf;
505	long bufsize;
506	int flags;
507};
508#endif
509int
510freebsd4_getfsstat(td, uap)
511	struct thread *td;
512	register struct freebsd4_getfsstat_args /* {
513		struct ostatfs *buf;
514		long bufsize;
515		int flags;
516	} */ *uap;
517{
518	struct mount *mp, *nmp;
519	struct statfs *sp, sb;
520	struct ostatfs osb;
521	caddr_t sfsp;
522	long count, maxcount, error;
523
524	maxcount = uap->bufsize / sizeof(struct ostatfs);
525	sfsp = (caddr_t)uap->buf;
526	count = 0;
527	mtx_lock(&mountlist_mtx);
528	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
529		if (!prison_check_mount(td->td_ucred, mp)) {
530			nmp = TAILQ_NEXT(mp, mnt_list);
531			continue;
532		}
533#ifdef MAC
534		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
535			nmp = TAILQ_NEXT(mp, mnt_list);
536			continue;
537		}
538#endif
539		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
540			nmp = TAILQ_NEXT(mp, mnt_list);
541			continue;
542		}
543		if (sfsp && count < maxcount) {
544			sp = &mp->mnt_stat;
545			/*
546			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
547			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
548			 * overrides MNT_WAIT.
549			 */
550			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
551			    (uap->flags & MNT_WAIT)) &&
552			    (error = VFS_STATFS(mp, sp, td))) {
553				mtx_lock(&mountlist_mtx);
554				nmp = TAILQ_NEXT(mp, mnt_list);
555				vfs_unbusy(mp, td);
556				continue;
557			}
558			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
559			if (suser(td)) {
560				bcopy(sp, &sb, sizeof(sb));
561				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
562				sp = &sb;
563			}
564			cvtstatfs(td, sp, &osb);
565			error = copyout(&osb, sfsp, sizeof(osb));
566			if (error) {
567				vfs_unbusy(mp, td);
568				return (error);
569			}
570			sfsp += sizeof(osb);
571		}
572		count++;
573		mtx_lock(&mountlist_mtx);
574		nmp = TAILQ_NEXT(mp, mnt_list);
575		vfs_unbusy(mp, td);
576	}
577	mtx_unlock(&mountlist_mtx);
578	if (sfsp && count > maxcount)
579		td->td_retval[0] = maxcount;
580	else
581		td->td_retval[0] = count;
582	return (0);
583}
584
585/*
586 * Implement fstatfs() for (NFS) file handles.
587 */
588#ifndef _SYS_SYSPROTO_H_
589struct freebsd4_fhstatfs_args {
590	struct fhandle *u_fhp;
591	struct ostatfs *buf;
592};
593#endif
594int
595freebsd4_fhstatfs(td, uap)
596	struct thread *td;
597	struct freebsd4_fhstatfs_args /* {
598		struct fhandle *u_fhp;
599		struct ostatfs *buf;
600	} */ *uap;
601{
602	struct ostatfs osb;
603	struct statfs sf;
604	fhandle_t fh;
605	int error;
606
607	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
608		return (error);
609	error = kern_fhstatfs(td, fh, &sf);
610	if (error)
611		return (error);
612	cvtstatfs(td, &sf, &osb);
613	return (copyout(&osb, uap->buf, sizeof(osb)));
614}
615
616/*
617 * Convert a new format statfs structure to an old format statfs structure.
618 */
619static void
620cvtstatfs(td, nsp, osp)
621	struct thread *td;
622	struct statfs *nsp;
623	struct ostatfs *osp;
624{
625
626	bzero(osp, sizeof(*osp));
627	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
628	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
629	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
630	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
631	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
632	osp->f_files = MIN(nsp->f_files, LONG_MAX);
633	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
634	osp->f_owner = nsp->f_owner;
635	osp->f_type = nsp->f_type;
636	osp->f_flags = nsp->f_flags;
637	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
638	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
639	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
640	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
641	bcopy(nsp->f_fstypename, osp->f_fstypename,
642	    MIN(MFSNAMELEN, OMNAMELEN));
643	bcopy(nsp->f_mntonname, osp->f_mntonname,
644	    MIN(MFSNAMELEN, OMNAMELEN));
645	bcopy(nsp->f_mntfromname, osp->f_mntfromname,
646	    MIN(MFSNAMELEN, OMNAMELEN));
647	osp->f_fsid = nsp->f_fsid;
648}
649#endif /* COMPAT_FREEBSD4 */
650
651/*
652 * Change current working directory to a given file descriptor.
653 */
654#ifndef _SYS_SYSPROTO_H_
655struct fchdir_args {
656	int	fd;
657};
658#endif
659int
660fchdir(td, uap)
661	struct thread *td;
662	struct fchdir_args /* {
663		int fd;
664	} */ *uap;
665{
666	register struct filedesc *fdp = td->td_proc->p_fd;
667	struct vnode *vp, *tdp, *vpold;
668	struct mount *mp;
669	struct file *fp;
670	int vfslocked;
671	int error;
672
673	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
674		return (error);
675	vp = fp->f_vnode;
676	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
677	VREF(vp);
678	fdrop(fp, td);
679	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
680	if (vp->v_type != VDIR)
681		error = ENOTDIR;
682#ifdef MAC
683	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
684	}
685#endif
686	else
687		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
688	while (!error && (mp = vp->v_mountedhere) != NULL) {
689		int tvfslocked;
690		if (vfs_busy(mp, 0, 0, td))
691			continue;
692		tvfslocked = VFS_LOCK_GIANT(mp);
693		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td);
694		vfs_unbusy(mp, td);
695		if (error) {
696			VFS_UNLOCK_GIANT(tvfslocked);
697			break;
698		}
699		vput(vp);
700		VFS_UNLOCK_GIANT(vfslocked);
701		vp = tdp;
702		vfslocked = tvfslocked;
703	}
704	if (error) {
705		vput(vp);
706		VFS_UNLOCK_GIANT(vfslocked);
707		return (error);
708	}
709	VOP_UNLOCK(vp, 0, td);
710	FILEDESC_LOCK_FAST(fdp);
711	vpold = fdp->fd_cdir;
712	fdp->fd_cdir = vp;
713	FILEDESC_UNLOCK_FAST(fdp);
714	vrele(vpold);
715	VFS_UNLOCK_GIANT(vfslocked);
716	return (0);
717}
718
719/*
720 * Change current working directory (``.'').
721 */
722#ifndef _SYS_SYSPROTO_H_
723struct chdir_args {
724	char	*path;
725};
726#endif
727int
728chdir(td, uap)
729	struct thread *td;
730	struct chdir_args /* {
731		char *path;
732	} */ *uap;
733{
734
735	return (kern_chdir(td, uap->path, UIO_USERSPACE));
736}
737
738int
739kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
740{
741	register struct filedesc *fdp = td->td_proc->p_fd;
742	int error;
743	struct nameidata nd;
744	struct vnode *vp;
745	int vfslocked;
746
747	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
748	if ((error = namei(&nd)) != 0)
749		return (error);
750	vfslocked = NDHASGIANT(&nd);
751	if ((error = change_dir(nd.ni_vp, td)) != 0) {
752		vput(nd.ni_vp);
753		VFS_UNLOCK_GIANT(vfslocked);
754		NDFREE(&nd, NDF_ONLY_PNBUF);
755		return (error);
756	}
757	VOP_UNLOCK(nd.ni_vp, 0, td);
758	NDFREE(&nd, NDF_ONLY_PNBUF);
759	FILEDESC_LOCK_FAST(fdp);
760	vp = fdp->fd_cdir;
761	fdp->fd_cdir = nd.ni_vp;
762	FILEDESC_UNLOCK_FAST(fdp);
763	vrele(vp);
764	VFS_UNLOCK_GIANT(vfslocked);
765	return (0);
766}
767
768/*
769 * Helper function for raised chroot(2) security function:  Refuse if
770 * any filedescriptors are open directories.
771 */
772static int
773chroot_refuse_vdir_fds(fdp)
774	struct filedesc *fdp;
775{
776	struct vnode *vp;
777	struct file *fp;
778	int fd;
779
780	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
781	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
782		fp = fget_locked(fdp, fd);
783		if (fp == NULL)
784			continue;
785		if (fp->f_type == DTYPE_VNODE) {
786			vp = fp->f_vnode;
787			if (vp->v_type == VDIR)
788				return (EPERM);
789		}
790	}
791	return (0);
792}
793
794/*
795 * This sysctl determines if we will allow a process to chroot(2) if it
796 * has a directory open:
797 *	0: disallowed for all processes.
798 *	1: allowed for processes that were not already chroot(2)'ed.
799 *	2: allowed for all processes.
800 */
801
802static int chroot_allow_open_directories = 1;
803
804SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
805     &chroot_allow_open_directories, 0, "");
806
807/*
808 * Change notion of root (``/'') directory.
809 */
810#ifndef _SYS_SYSPROTO_H_
811struct chroot_args {
812	char	*path;
813};
814#endif
815int
816chroot(td, uap)
817	struct thread *td;
818	struct chroot_args /* {
819		char *path;
820	} */ *uap;
821{
822	int error;
823	struct nameidata nd;
824	int vfslocked;
825
826	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
827	if (error)
828		return (error);
829	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
830	    UIO_USERSPACE, uap->path, td);
831	error = namei(&nd);
832	if (error)
833		goto error;
834	vfslocked = NDHASGIANT(&nd);
835	if ((error = change_dir(nd.ni_vp, td)) != 0)
836		goto e_vunlock;
837#ifdef MAC
838	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
839		goto e_vunlock;
840#endif
841	VOP_UNLOCK(nd.ni_vp, 0, td);
842	error = change_root(nd.ni_vp, td);
843	vrele(nd.ni_vp);
844	VFS_UNLOCK_GIANT(vfslocked);
845	NDFREE(&nd, NDF_ONLY_PNBUF);
846	return (error);
847e_vunlock:
848	vput(nd.ni_vp);
849	VFS_UNLOCK_GIANT(vfslocked);
850error:
851	NDFREE(&nd, NDF_ONLY_PNBUF);
852	return (error);
853}
854
855/*
856 * Common routine for chroot and chdir.  Callers must provide a locked vnode
857 * instance.
858 */
859int
860change_dir(vp, td)
861	struct vnode *vp;
862	struct thread *td;
863{
864	int error;
865
866	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
867	if (vp->v_type != VDIR)
868		return (ENOTDIR);
869#ifdef MAC
870	error = mac_check_vnode_chdir(td->td_ucred, vp);
871	if (error)
872		return (error);
873#endif
874	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
875	return (error);
876}
877
878/*
879 * Common routine for kern_chroot() and jail_attach().  The caller is
880 * responsible for invoking suser() and mac_check_chroot() to authorize this
881 * operation.
882 */
883int
884change_root(vp, td)
885	struct vnode *vp;
886	struct thread *td;
887{
888	struct filedesc *fdp;
889	struct vnode *oldvp;
890	int error;
891
892	VFS_ASSERT_GIANT(vp->v_mount);
893	fdp = td->td_proc->p_fd;
894	FILEDESC_LOCK(fdp);
895	if (chroot_allow_open_directories == 0 ||
896	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
897		error = chroot_refuse_vdir_fds(fdp);
898		if (error) {
899			FILEDESC_UNLOCK(fdp);
900			return (error);
901		}
902	}
903	oldvp = fdp->fd_rdir;
904	fdp->fd_rdir = vp;
905	VREF(fdp->fd_rdir);
906	if (!fdp->fd_jdir) {
907		fdp->fd_jdir = vp;
908		VREF(fdp->fd_jdir);
909	}
910	FILEDESC_UNLOCK(fdp);
911	vrele(oldvp);
912	return (0);
913}
914
915/*
916 * Check permissions, allocate an open file structure,
917 * and call the device open routine if any.
918 *
919 * MP SAFE
920 */
921#ifndef _SYS_SYSPROTO_H_
922struct open_args {
923	char	*path;
924	int	flags;
925	int	mode;
926};
927#endif
928int
929open(td, uap)
930	struct thread *td;
931	register struct open_args /* {
932		char *path;
933		int flags;
934		int mode;
935	} */ *uap;
936{
937	int error;
938
939	error = kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
940	if (mtx_owned(&Giant))
941		printf("open: %s: %d\n", uap->path, error);
942	return (error);
943}
944
945int
946kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
947    int mode)
948{
949	struct proc *p = td->td_proc;
950	struct filedesc *fdp = p->p_fd;
951	struct file *fp;
952	struct vnode *vp;
953	struct vattr vat;
954	struct mount *mp;
955	int cmode;
956	struct file *nfp;
957	int type, indx, error;
958	struct flock lf;
959	struct nameidata nd;
960	int vfslocked;
961
962	if ((flags & O_ACCMODE) == O_ACCMODE)
963		return (EINVAL);
964	flags = FFLAGS(flags);
965	error = falloc(td, &nfp, &indx);
966	if (error)
967		return (error);
968	/* An extra reference on `nfp' has been held for us by falloc(). */
969	fp = nfp;
970	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
971	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
972	td->td_dupfd = -1;		/* XXX check for fdopen */
973	error = vn_open(&nd, &flags, cmode, indx);
974	if (error) {
975		/*
976		 * If the vn_open replaced the method vector, something
977		 * wonderous happened deep below and we just pass it up
978		 * pretending we know what we do.
979		 */
980		if (error == ENXIO && fp->f_ops != &badfileops) {
981			fdrop(fp, td);
982			td->td_retval[0] = indx;
983			return (0);
984		}
985
986		/*
987		 * release our own reference
988		 */
989		fdrop(fp, td);
990
991		/*
992		 * handle special fdopen() case.  bleh.  dupfdopen() is
993		 * responsible for dropping the old contents of ofiles[indx]
994		 * if it succeeds.
995		 */
996		if ((error == ENODEV || error == ENXIO) &&
997		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
998		    (error =
999			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1000			td->td_retval[0] = indx;
1001			return (0);
1002		}
1003		/*
1004		 * Clean up the descriptor, but only if another thread hadn't
1005		 * replaced or closed it.
1006		 */
1007		fdclose(fdp, fp, indx, td);
1008
1009		if (error == ERESTART)
1010			error = EINTR;
1011		return (error);
1012	}
1013	td->td_dupfd = 0;
1014	vfslocked = NDHASGIANT(&nd);
1015	NDFREE(&nd, NDF_ONLY_PNBUF);
1016	vp = nd.ni_vp;
1017
1018	/*
1019	 * There should be 2 references on the file, one from the descriptor
1020	 * table, and one for us.
1021	 *
1022	 * Handle the case where someone closed the file (via its file
1023	 * descriptor) while we were blocked.  The end result should look
1024	 * like opening the file succeeded but it was immediately closed.
1025	 * We call vn_close() manually because we haven't yet hooked up
1026	 * the various 'struct file' fields.
1027	 */
1028	FILEDESC_LOCK(fdp);
1029	FILE_LOCK(fp);
1030	if (fp->f_count == 1) {
1031		mp = vp->v_mount;
1032		KASSERT(fdp->fd_ofiles[indx] != fp,
1033		    ("Open file descriptor lost all refs"));
1034		FILE_UNLOCK(fp);
1035		FILEDESC_UNLOCK(fdp);
1036		VOP_UNLOCK(vp, 0, td);
1037		vn_close(vp, flags & FMASK, fp->f_cred, td);
1038		VFS_UNLOCK_GIANT(vfslocked);
1039		fdrop(fp, td);
1040		td->td_retval[0] = indx;
1041		return (0);
1042	}
1043	fp->f_vnode = vp;
1044	if (fp->f_data == NULL)
1045		fp->f_data = vp;
1046	fp->f_flag = flags & FMASK;
1047	if (fp->f_ops == &badfileops)
1048		fp->f_ops = &vnops;
1049	fp->f_seqcount = 1;
1050	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1051	FILE_UNLOCK(fp);
1052	FILEDESC_UNLOCK(fdp);
1053
1054	VOP_UNLOCK(vp, 0, td);
1055	if (flags & (O_EXLOCK | O_SHLOCK)) {
1056		lf.l_whence = SEEK_SET;
1057		lf.l_start = 0;
1058		lf.l_len = 0;
1059		if (flags & O_EXLOCK)
1060			lf.l_type = F_WRLCK;
1061		else
1062			lf.l_type = F_RDLCK;
1063		type = F_FLOCK;
1064		if ((flags & FNONBLOCK) == 0)
1065			type |= F_WAIT;
1066		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1067			    type)) != 0)
1068			goto bad;
1069		fp->f_flag |= FHASLOCK;
1070	}
1071	if (flags & O_TRUNC) {
1072		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1073			goto bad;
1074		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1075		VATTR_NULL(&vat);
1076		vat.va_size = 0;
1077		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1078#ifdef MAC
1079		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1080		if (error == 0)
1081#endif
1082			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1083		VOP_UNLOCK(vp, 0, td);
1084		vn_finished_write(mp);
1085		if (error)
1086			goto bad;
1087	}
1088	VFS_UNLOCK_GIANT(vfslocked);
1089	/*
1090	 * Release our private reference, leaving the one associated with
1091	 * the descriptor table intact.
1092	 */
1093	fdrop(fp, td);
1094	td->td_retval[0] = indx;
1095	return (0);
1096bad:
1097	VFS_UNLOCK_GIANT(vfslocked);
1098	fdclose(fdp, fp, indx, td);
1099	fdrop(fp, td);
1100	return (error);
1101}
1102
1103#ifdef COMPAT_43
1104/*
1105 * Create a file.
1106 *
1107 * MP SAFE
1108 */
1109#ifndef _SYS_SYSPROTO_H_
1110struct ocreat_args {
1111	char	*path;
1112	int	mode;
1113};
1114#endif
1115int
1116ocreat(td, uap)
1117	struct thread *td;
1118	register struct ocreat_args /* {
1119		char *path;
1120		int mode;
1121	} */ *uap;
1122{
1123
1124	return (kern_open(td, uap->path, UIO_USERSPACE,
1125	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1126}
1127#endif /* COMPAT_43 */
1128
1129/*
1130 * Create a special file.
1131 */
1132#ifndef _SYS_SYSPROTO_H_
1133struct mknod_args {
1134	char	*path;
1135	int	mode;
1136	int	dev;
1137};
1138#endif
1139int
1140mknod(td, uap)
1141	struct thread *td;
1142	register struct mknod_args /* {
1143		char *path;
1144		int mode;
1145		int dev;
1146	} */ *uap;
1147{
1148
1149	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1150}
1151
1152int
1153kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1154    int dev)
1155{
1156	struct vnode *vp;
1157	struct mount *mp;
1158	struct vattr vattr;
1159	int error;
1160	int whiteout = 0;
1161	struct nameidata nd;
1162	int vfslocked;
1163
1164	switch (mode & S_IFMT) {
1165	case S_IFCHR:
1166	case S_IFBLK:
1167		error = suser(td);
1168		break;
1169	default:
1170		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1171		break;
1172	}
1173	if (error)
1174		return (error);
1175restart:
1176	bwillwrite();
1177	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1178	if ((error = namei(&nd)) != 0)
1179		return (error);
1180	vfslocked = NDHASGIANT(&nd);
1181	vp = nd.ni_vp;
1182	if (vp != NULL) {
1183		NDFREE(&nd, NDF_ONLY_PNBUF);
1184		vrele(vp);
1185		if (vp == nd.ni_dvp)
1186			vrele(nd.ni_dvp);
1187		else
1188			vput(nd.ni_dvp);
1189		VFS_UNLOCK_GIANT(vfslocked);
1190		return (EEXIST);
1191	} else {
1192		VATTR_NULL(&vattr);
1193		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1194		vattr.va_mode = (mode & ALLPERMS) &
1195		    ~td->td_proc->p_fd->fd_cmask;
1196		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1197		vattr.va_rdev = dev;
1198		whiteout = 0;
1199
1200		switch (mode & S_IFMT) {
1201		case S_IFMT:	/* used by badsect to flag bad sectors */
1202			vattr.va_type = VBAD;
1203			break;
1204		case S_IFCHR:
1205			vattr.va_type = VCHR;
1206			break;
1207		case S_IFBLK:
1208			vattr.va_type = VBLK;
1209			break;
1210		case S_IFWHT:
1211			whiteout = 1;
1212			break;
1213		default:
1214			error = EINVAL;
1215			break;
1216		}
1217	}
1218	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1219		NDFREE(&nd, NDF_ONLY_PNBUF);
1220		vput(nd.ni_dvp);
1221		VFS_UNLOCK_GIANT(vfslocked);
1222		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1223			return (error);
1224		goto restart;
1225	}
1226#ifdef MAC
1227	if (error == 0 && !whiteout)
1228		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1229		    &nd.ni_cnd, &vattr);
1230#endif
1231	if (!error) {
1232		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1233		if (whiteout)
1234			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1235		else {
1236			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1237						&nd.ni_cnd, &vattr);
1238			if (error == 0)
1239				vput(nd.ni_vp);
1240		}
1241	}
1242	NDFREE(&nd, NDF_ONLY_PNBUF);
1243	vput(nd.ni_dvp);
1244	vn_finished_write(mp);
1245	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1246	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1247	VFS_UNLOCK_GIANT(vfslocked);
1248	return (error);
1249}
1250
1251/*
1252 * Create a named pipe.
1253 */
1254#ifndef _SYS_SYSPROTO_H_
1255struct mkfifo_args {
1256	char	*path;
1257	int	mode;
1258};
1259#endif
1260int
1261mkfifo(td, uap)
1262	struct thread *td;
1263	register struct mkfifo_args /* {
1264		char *path;
1265		int mode;
1266	} */ *uap;
1267{
1268
1269	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1270}
1271
1272int
1273kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1274{
1275	struct mount *mp;
1276	struct vattr vattr;
1277	int error;
1278	struct nameidata nd;
1279	int vfslocked;
1280
1281restart:
1282	bwillwrite();
1283	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1284	if ((error = namei(&nd)) != 0)
1285		return (error);
1286	vfslocked = NDHASGIANT(&nd);
1287	if (nd.ni_vp != NULL) {
1288		NDFREE(&nd, NDF_ONLY_PNBUF);
1289		vrele(nd.ni_vp);
1290		if (nd.ni_vp == nd.ni_dvp)
1291			vrele(nd.ni_dvp);
1292		else
1293			vput(nd.ni_dvp);
1294		VFS_UNLOCK_GIANT(vfslocked);
1295		return (EEXIST);
1296	}
1297	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1298		NDFREE(&nd, NDF_ONLY_PNBUF);
1299		vput(nd.ni_dvp);
1300		VFS_UNLOCK_GIANT(vfslocked);
1301		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1302			return (error);
1303		goto restart;
1304	}
1305	VATTR_NULL(&vattr);
1306	vattr.va_type = VFIFO;
1307	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1308	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1309	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1310#ifdef MAC
1311	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1312	    &vattr);
1313	if (error)
1314		goto out;
1315#endif
1316	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1317	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1318	if (error == 0)
1319		vput(nd.ni_vp);
1320#ifdef MAC
1321out:
1322#endif
1323	vput(nd.ni_dvp);
1324	vn_finished_write(mp);
1325	VFS_UNLOCK_GIANT(vfslocked);
1326	NDFREE(&nd, NDF_ONLY_PNBUF);
1327	return (error);
1328}
1329
1330/*
1331 * Make a hard file link.
1332 */
1333#ifndef _SYS_SYSPROTO_H_
1334struct link_args {
1335	char	*path;
1336	char	*link;
1337};
1338#endif
1339int
1340link(td, uap)
1341	struct thread *td;
1342	register struct link_args /* {
1343		char *path;
1344		char *link;
1345	} */ *uap;
1346{
1347	int error;
1348
1349	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1350	return (error);
1351}
1352
1353SYSCTL_DECL(_security_bsd);
1354
1355static int hardlink_check_uid = 0;
1356SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1357    &hardlink_check_uid, 0,
1358    "Unprivileged processes cannot create hard links to files owned by other "
1359    "users");
1360static int hardlink_check_gid = 0;
1361SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1362    &hardlink_check_gid, 0,
1363    "Unprivileged processes cannot create hard links to files owned by other "
1364    "groups");
1365
1366static int
1367can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1368{
1369	struct vattr va;
1370	int error;
1371
1372	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1373		return (0);
1374
1375	if (!hardlink_check_uid && !hardlink_check_gid)
1376		return (0);
1377
1378	error = VOP_GETATTR(vp, &va, cred, td);
1379	if (error != 0)
1380		return (error);
1381
1382	if (hardlink_check_uid) {
1383		if (cred->cr_uid != va.va_uid)
1384			return (EPERM);
1385	}
1386
1387	if (hardlink_check_gid) {
1388		if (!groupmember(va.va_gid, cred))
1389			return (EPERM);
1390	}
1391
1392	return (0);
1393}
1394
1395int
1396kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1397{
1398	struct vnode *vp;
1399	struct mount *mp;
1400	struct nameidata nd;
1401	int vfslocked;
1402	int lvfslocked;
1403	int error;
1404
1405	bwillwrite();
1406	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, segflg, path, td);
1407	if ((error = namei(&nd)) != 0)
1408		return (error);
1409	vfslocked = NDHASGIANT(&nd);
1410	NDFREE(&nd, NDF_ONLY_PNBUF);
1411	vp = nd.ni_vp;
1412	if (vp->v_type == VDIR) {
1413		vrele(vp);
1414		VFS_UNLOCK_GIANT(vfslocked);
1415		return (EPERM);		/* POSIX */
1416	}
1417	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1418		vrele(vp);
1419		VFS_UNLOCK_GIANT(vfslocked);
1420		return (error);
1421	}
1422	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, link, td);
1423	if ((error = namei(&nd)) == 0) {
1424		lvfslocked = NDHASGIANT(&nd);
1425		if (nd.ni_vp != NULL) {
1426			vrele(nd.ni_vp);
1427			if (nd.ni_dvp == nd.ni_vp)
1428				vrele(nd.ni_dvp);
1429			else
1430				vput(nd.ni_dvp);
1431			error = EEXIST;
1432		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1433		    == 0) {
1434			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1435			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1436			error = can_hardlink(vp, td, td->td_ucred);
1437			if (error == 0)
1438#ifdef MAC
1439				error = mac_check_vnode_link(td->td_ucred,
1440				    nd.ni_dvp, vp, &nd.ni_cnd);
1441			if (error == 0)
1442#endif
1443				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1444			VOP_UNLOCK(vp, 0, td);
1445			vput(nd.ni_dvp);
1446		}
1447		NDFREE(&nd, NDF_ONLY_PNBUF);
1448		VFS_UNLOCK_GIANT(lvfslocked);
1449	}
1450	vrele(vp);
1451	vn_finished_write(mp);
1452	VFS_UNLOCK_GIANT(vfslocked);
1453	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1454	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1455	return (error);
1456}
1457
1458/*
1459 * Make a symbolic link.
1460 */
1461#ifndef _SYS_SYSPROTO_H_
1462struct symlink_args {
1463	char	*path;
1464	char	*link;
1465};
1466#endif
1467int
1468symlink(td, uap)
1469	struct thread *td;
1470	register struct symlink_args /* {
1471		char *path;
1472		char *link;
1473	} */ *uap;
1474{
1475
1476	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1477}
1478
1479int
1480kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1481{
1482	struct mount *mp;
1483	struct vattr vattr;
1484	char *syspath;
1485	int error;
1486	struct nameidata nd;
1487	int vfslocked;
1488
1489	if (segflg == UIO_SYSSPACE) {
1490		syspath = path;
1491	} else {
1492		syspath = uma_zalloc(namei_zone, M_WAITOK);
1493		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1494			goto out;
1495	}
1496restart:
1497	bwillwrite();
1498	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE,
1499	    segflg, link, td);
1500	if ((error = namei(&nd)) != 0)
1501		goto out;
1502	vfslocked = NDHASGIANT(&nd);
1503	if (nd.ni_vp) {
1504		NDFREE(&nd, NDF_ONLY_PNBUF);
1505		vrele(nd.ni_vp);
1506		if (nd.ni_vp == nd.ni_dvp)
1507			vrele(nd.ni_dvp);
1508		else
1509			vput(nd.ni_dvp);
1510		VFS_UNLOCK_GIANT(vfslocked);
1511		error = EEXIST;
1512		goto out;
1513	}
1514	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1515		NDFREE(&nd, NDF_ONLY_PNBUF);
1516		vput(nd.ni_dvp);
1517		VFS_UNLOCK_GIANT(vfslocked);
1518		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1519			goto out;
1520		goto restart;
1521	}
1522	VATTR_NULL(&vattr);
1523	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1524	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1525	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1526#ifdef MAC
1527	vattr.va_type = VLNK;
1528	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1529	    &vattr);
1530	if (error)
1531		goto out2;
1532#endif
1533	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1534	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1535	if (error == 0)
1536		vput(nd.ni_vp);
1537#ifdef MAC
1538out2:
1539#endif
1540	NDFREE(&nd, NDF_ONLY_PNBUF);
1541	vput(nd.ni_dvp);
1542	vn_finished_write(mp);
1543	VFS_UNLOCK_GIANT(vfslocked);
1544	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1545	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1546out:
1547	if (segflg != UIO_SYSSPACE)
1548		uma_zfree(namei_zone, syspath);
1549	return (error);
1550}
1551
1552/*
1553 * Delete a whiteout from the filesystem.
1554 */
1555int
1556undelete(td, uap)
1557	struct thread *td;
1558	register struct undelete_args /* {
1559		char *path;
1560	} */ *uap;
1561{
1562	int error;
1563	struct mount *mp;
1564	struct nameidata nd;
1565	int vfslocked;
1566
1567restart:
1568	bwillwrite();
1569	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE, UIO_USERSPACE,
1570	    uap->path, td);
1571	error = namei(&nd);
1572	if (error)
1573		return (error);
1574	vfslocked = NDHASGIANT(&nd);
1575
1576	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1577		NDFREE(&nd, NDF_ONLY_PNBUF);
1578		if (nd.ni_vp)
1579			vrele(nd.ni_vp);
1580		if (nd.ni_vp == nd.ni_dvp)
1581			vrele(nd.ni_dvp);
1582		else
1583			vput(nd.ni_dvp);
1584		VFS_UNLOCK_GIANT(vfslocked);
1585		return (EEXIST);
1586	}
1587	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1588		NDFREE(&nd, NDF_ONLY_PNBUF);
1589		vput(nd.ni_dvp);
1590		VFS_UNLOCK_GIANT(vfslocked);
1591		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1592			return (error);
1593		goto restart;
1594	}
1595	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1596	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1597	NDFREE(&nd, NDF_ONLY_PNBUF);
1598	vput(nd.ni_dvp);
1599	vn_finished_write(mp);
1600	VFS_UNLOCK_GIANT(vfslocked);
1601	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1602	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1603	return (error);
1604}
1605
1606/*
1607 * Delete a name from the filesystem.
1608 */
1609#ifndef _SYS_SYSPROTO_H_
1610struct unlink_args {
1611	char	*path;
1612};
1613#endif
1614int
1615unlink(td, uap)
1616	struct thread *td;
1617	struct unlink_args /* {
1618		char *path;
1619	} */ *uap;
1620{
1621	int error;
1622
1623	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1624	return (error);
1625}
1626
1627int
1628kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1629{
1630	struct mount *mp;
1631	struct vnode *vp;
1632	int error;
1633	struct nameidata nd;
1634	int vfslocked;
1635
1636restart:
1637	bwillwrite();
1638	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
1639	if ((error = namei(&nd)) != 0)
1640		return (error);
1641	vfslocked = NDHASGIANT(&nd);
1642	vp = nd.ni_vp;
1643	if (vp->v_type == VDIR)
1644		error = EPERM;		/* POSIX */
1645	else {
1646		/*
1647		 * The root of a mounted filesystem cannot be deleted.
1648		 *
1649		 * XXX: can this only be a VDIR case?
1650		 */
1651		if (vp->v_vflag & VV_ROOT)
1652			error = EBUSY;
1653	}
1654	if (error == 0) {
1655		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1656			NDFREE(&nd, NDF_ONLY_PNBUF);
1657			if (vp == nd.ni_dvp)
1658				vrele(vp);
1659			else
1660				vput(vp);
1661			vput(nd.ni_dvp);
1662			VFS_UNLOCK_GIANT(vfslocked);
1663			if ((error = vn_start_write(NULL, &mp,
1664			    V_XSLEEP | PCATCH)) != 0)
1665				return (error);
1666			goto restart;
1667		}
1668#ifdef MAC
1669		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1670		    &nd.ni_cnd);
1671		if (error)
1672			goto out;
1673#endif
1674		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1675		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1676#ifdef MAC
1677out:
1678#endif
1679		vn_finished_write(mp);
1680	}
1681	NDFREE(&nd, NDF_ONLY_PNBUF);
1682	if (vp == nd.ni_dvp)
1683		vrele(vp);
1684	else
1685		vput(vp);
1686	vput(nd.ni_dvp);
1687	VFS_UNLOCK_GIANT(vfslocked);
1688	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1689	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1690	return (error);
1691}
1692
1693/*
1694 * Reposition read/write file offset.
1695 */
1696#ifndef _SYS_SYSPROTO_H_
1697struct lseek_args {
1698	int	fd;
1699	int	pad;
1700	off_t	offset;
1701	int	whence;
1702};
1703#endif
1704int
1705lseek(td, uap)
1706	struct thread *td;
1707	register struct lseek_args /* {
1708		int fd;
1709		int pad;
1710		off_t offset;
1711		int whence;
1712	} */ *uap;
1713{
1714	struct ucred *cred = td->td_ucred;
1715	struct file *fp;
1716	struct vnode *vp;
1717	struct vattr vattr;
1718	off_t offset;
1719	int error, noneg;
1720	int vfslocked;
1721
1722	if ((error = fget(td, uap->fd, &fp)) != 0)
1723		return (error);
1724	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1725		fdrop(fp, td);
1726		return (ESPIPE);
1727	}
1728	vp = fp->f_vnode;
1729	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1730	noneg = (vp->v_type != VCHR);
1731	offset = uap->offset;
1732	switch (uap->whence) {
1733	case L_INCR:
1734		if (noneg &&
1735		    (fp->f_offset < 0 ||
1736		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1737			error = EOVERFLOW;
1738			break;
1739		}
1740		offset += fp->f_offset;
1741		break;
1742	case L_XTND:
1743		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1744		error = VOP_GETATTR(vp, &vattr, cred, td);
1745		VOP_UNLOCK(vp, 0, td);
1746		if (error)
1747			break;
1748		if (noneg &&
1749		    (vattr.va_size > OFF_MAX ||
1750		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1751			error = EOVERFLOW;
1752			break;
1753		}
1754		offset += vattr.va_size;
1755		break;
1756	case L_SET:
1757		break;
1758	default:
1759		error = EINVAL;
1760	}
1761	if (error == 0 && noneg && offset < 0)
1762		error = EINVAL;
1763	if (error != 0)
1764		goto drop;
1765	fp->f_offset = offset;
1766	*(off_t *)(td->td_retval) = fp->f_offset;
1767drop:
1768	fdrop(fp, td);
1769	VFS_UNLOCK_GIANT(vfslocked);
1770	return (error);
1771}
1772
1773#if defined(COMPAT_43)
1774/*
1775 * Reposition read/write file offset.
1776 */
1777#ifndef _SYS_SYSPROTO_H_
1778struct olseek_args {
1779	int	fd;
1780	long	offset;
1781	int	whence;
1782};
1783#endif
1784int
1785olseek(td, uap)
1786	struct thread *td;
1787	register struct olseek_args /* {
1788		int fd;
1789		long offset;
1790		int whence;
1791	} */ *uap;
1792{
1793	struct lseek_args /* {
1794		int fd;
1795		int pad;
1796		off_t offset;
1797		int whence;
1798	} */ nuap;
1799	int error;
1800
1801	nuap.fd = uap->fd;
1802	nuap.offset = uap->offset;
1803	nuap.whence = uap->whence;
1804	error = lseek(td, &nuap);
1805	return (error);
1806}
1807#endif /* COMPAT_43 */
1808
1809/*
1810 * Check access permissions using passed credentials.
1811 */
1812static int
1813vn_access(vp, user_flags, cred, td)
1814	struct vnode	*vp;
1815	int		user_flags;
1816	struct ucred	*cred;
1817	struct thread	*td;
1818{
1819	int error, flags;
1820
1821	/* Flags == 0 means only check for existence. */
1822	error = 0;
1823	if (user_flags) {
1824		flags = 0;
1825		if (user_flags & R_OK)
1826			flags |= VREAD;
1827		if (user_flags & W_OK)
1828			flags |= VWRITE;
1829		if (user_flags & X_OK)
1830			flags |= VEXEC;
1831#ifdef MAC
1832		error = mac_check_vnode_access(cred, vp, flags);
1833		if (error)
1834			return (error);
1835#endif
1836		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1837			error = VOP_ACCESS(vp, flags, cred, td);
1838	}
1839	return (error);
1840}
1841
1842/*
1843 * Check access permissions using "real" credentials.
1844 */
1845#ifndef _SYS_SYSPROTO_H_
1846struct access_args {
1847	char	*path;
1848	int	flags;
1849};
1850#endif
1851int
1852access(td, uap)
1853	struct thread *td;
1854	register struct access_args /* {
1855		char *path;
1856		int flags;
1857	} */ *uap;
1858{
1859
1860	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1861}
1862
1863int
1864kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1865{
1866	struct ucred *cred, *tmpcred;
1867	register struct vnode *vp;
1868	struct nameidata nd;
1869	int vfslocked;
1870	int error;
1871
1872	/*
1873	 * Create and modify a temporary credential instead of one that
1874	 * is potentially shared.  This could also mess up socket
1875	 * buffer accounting which can run in an interrupt context.
1876	 */
1877	cred = td->td_ucred;
1878	tmpcred = crdup(cred);
1879	tmpcred->cr_uid = cred->cr_ruid;
1880	tmpcred->cr_groups[0] = cred->cr_rgid;
1881	td->td_ucred = tmpcred;
1882	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
1883	if ((error = namei(&nd)) != 0)
1884		goto out1;
1885	vfslocked = NDHASGIANT(&nd);
1886	vp = nd.ni_vp;
1887
1888	error = vn_access(vp, flags, tmpcred, td);
1889	NDFREE(&nd, NDF_ONLY_PNBUF);
1890	vput(vp);
1891	VFS_UNLOCK_GIANT(vfslocked);
1892out1:
1893	td->td_ucred = cred;
1894	crfree(tmpcred);
1895	return (error);
1896}
1897
1898/*
1899 * Check access permissions using "effective" credentials.
1900 */
1901#ifndef _SYS_SYSPROTO_H_
1902struct eaccess_args {
1903	char	*path;
1904	int	flags;
1905};
1906#endif
1907int
1908eaccess(td, uap)
1909	struct thread *td;
1910	register struct eaccess_args /* {
1911		char *path;
1912		int flags;
1913	} */ *uap;
1914{
1915	struct nameidata nd;
1916	struct vnode *vp;
1917	int vfslocked;
1918	int error;
1919
1920	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE,
1921	    uap->path, td);
1922	if ((error = namei(&nd)) != 0)
1923		return (error);
1924	vp = nd.ni_vp;
1925	vfslocked = NDHASGIANT(&nd);
1926	error = vn_access(vp, uap->flags, td->td_ucred, td);
1927	NDFREE(&nd, NDF_ONLY_PNBUF);
1928	vput(vp);
1929	VFS_UNLOCK_GIANT(vfslocked);
1930	return (error);
1931}
1932
1933#if defined(COMPAT_43)
1934/*
1935 * Get file status; this version follows links.
1936 */
1937#ifndef _SYS_SYSPROTO_H_
1938struct ostat_args {
1939	char	*path;
1940	struct ostat *ub;
1941};
1942#endif
1943int
1944ostat(td, uap)
1945	struct thread *td;
1946	register struct ostat_args /* {
1947		char *path;
1948		struct ostat *ub;
1949	} */ *uap;
1950{
1951	struct stat sb;
1952	struct ostat osb;
1953	int error;
1954
1955	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
1956	if (error)
1957		return (error);
1958	cvtstat(&sb, &osb);
1959	error = copyout(&osb, uap->ub, sizeof (osb));
1960	return (error);
1961}
1962
1963/*
1964 * Get file status; this version does not follow links.
1965 */
1966#ifndef _SYS_SYSPROTO_H_
1967struct olstat_args {
1968	char	*path;
1969	struct ostat *ub;
1970};
1971#endif
1972int
1973olstat(td, uap)
1974	struct thread *td;
1975	register struct olstat_args /* {
1976		char *path;
1977		struct ostat *ub;
1978	} */ *uap;
1979{
1980	struct stat sb;
1981	struct ostat osb;
1982	int error;
1983
1984	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
1985	if (error)
1986		return (error);
1987	cvtstat(&sb, &osb);
1988	error = copyout(&osb, uap->ub, sizeof (osb));
1989	return (error);
1990}
1991
1992/*
1993 * Convert from an old to a new stat structure.
1994 */
1995void
1996cvtstat(st, ost)
1997	struct stat *st;
1998	struct ostat *ost;
1999{
2000
2001	ost->st_dev = st->st_dev;
2002	ost->st_ino = st->st_ino;
2003	ost->st_mode = st->st_mode;
2004	ost->st_nlink = st->st_nlink;
2005	ost->st_uid = st->st_uid;
2006	ost->st_gid = st->st_gid;
2007	ost->st_rdev = st->st_rdev;
2008	if (st->st_size < (quad_t)1 << 32)
2009		ost->st_size = st->st_size;
2010	else
2011		ost->st_size = -2;
2012	ost->st_atime = st->st_atime;
2013	ost->st_mtime = st->st_mtime;
2014	ost->st_ctime = st->st_ctime;
2015	ost->st_blksize = st->st_blksize;
2016	ost->st_blocks = st->st_blocks;
2017	ost->st_flags = st->st_flags;
2018	ost->st_gen = st->st_gen;
2019}
2020#endif /* COMPAT_43 */
2021
2022/*
2023 * Get file status; this version follows links.
2024 */
2025#ifndef _SYS_SYSPROTO_H_
2026struct stat_args {
2027	char	*path;
2028	struct stat *ub;
2029};
2030#endif
2031int
2032stat(td, uap)
2033	struct thread *td;
2034	register struct stat_args /* {
2035		char *path;
2036		struct stat *ub;
2037	} */ *uap;
2038{
2039	struct stat sb;
2040	int error;
2041
2042	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2043	if (error == 0)
2044		error = copyout(&sb, uap->ub, sizeof (sb));
2045	return (error);
2046}
2047
2048int
2049kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2050{
2051	struct nameidata nd;
2052	struct stat sb;
2053	int error, vfslocked;
2054
2055	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE,
2056	    pathseg, path, td);
2057	if ((error = namei(&nd)) != 0)
2058		return (error);
2059	vfslocked = NDHASGIANT(&nd);
2060	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2061	NDFREE(&nd, NDF_ONLY_PNBUF);
2062	vput(nd.ni_vp);
2063	VFS_UNLOCK_GIANT(vfslocked);
2064	if (error)
2065		return (error);
2066	*sbp = sb;
2067	return (0);
2068}
2069
2070/*
2071 * Get file status; this version does not follow links.
2072 */
2073#ifndef _SYS_SYSPROTO_H_
2074struct lstat_args {
2075	char	*path;
2076	struct stat *ub;
2077};
2078#endif
2079int
2080lstat(td, uap)
2081	struct thread *td;
2082	register struct lstat_args /* {
2083		char *path;
2084		struct stat *ub;
2085	} */ *uap;
2086{
2087	struct stat sb;
2088	int error;
2089
2090	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2091	if (error == 0)
2092		error = copyout(&sb, uap->ub, sizeof (sb));
2093	return (error);
2094}
2095
2096int
2097kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp)
2098{
2099	struct vnode *vp;
2100	struct stat sb;
2101	struct nameidata nd;
2102	int error, vfslocked;
2103
2104	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE,
2105	    pathseg, path, td);
2106	if ((error = namei(&nd)) != 0)
2107		return (error);
2108	vfslocked = NDHASGIANT(&nd);
2109	vp = nd.ni_vp;
2110	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2111	NDFREE(&nd, NDF_ONLY_PNBUF);
2112	vput(vp);
2113	VFS_UNLOCK_GIANT(vfslocked);
2114	if (error)
2115		return (error);
2116	*sbp = sb;
2117	return (0);
2118}
2119
2120/*
2121 * Implementation of the NetBSD [l]stat() functions.
2122 */
2123void
2124cvtnstat(sb, nsb)
2125	struct stat *sb;
2126	struct nstat *nsb;
2127{
2128	bzero(nsb, sizeof *nsb);
2129	nsb->st_dev = sb->st_dev;
2130	nsb->st_ino = sb->st_ino;
2131	nsb->st_mode = sb->st_mode;
2132	nsb->st_nlink = sb->st_nlink;
2133	nsb->st_uid = sb->st_uid;
2134	nsb->st_gid = sb->st_gid;
2135	nsb->st_rdev = sb->st_rdev;
2136	nsb->st_atimespec = sb->st_atimespec;
2137	nsb->st_mtimespec = sb->st_mtimespec;
2138	nsb->st_ctimespec = sb->st_ctimespec;
2139	nsb->st_size = sb->st_size;
2140	nsb->st_blocks = sb->st_blocks;
2141	nsb->st_blksize = sb->st_blksize;
2142	nsb->st_flags = sb->st_flags;
2143	nsb->st_gen = sb->st_gen;
2144	nsb->st_birthtimespec = sb->st_birthtimespec;
2145}
2146
2147#ifndef _SYS_SYSPROTO_H_
2148struct nstat_args {
2149	char	*path;
2150	struct nstat *ub;
2151};
2152#endif
2153int
2154nstat(td, uap)
2155	struct thread *td;
2156	register struct nstat_args /* {
2157		char *path;
2158		struct nstat *ub;
2159	} */ *uap;
2160{
2161	struct stat sb;
2162	struct nstat nsb;
2163	int error;
2164
2165	error = kern_stat(td, uap->path, UIO_USERSPACE, &sb);
2166	if (error)
2167		return (error);
2168	cvtnstat(&sb, &nsb);
2169	error = copyout(&nsb, uap->ub, sizeof (nsb));
2170	return (error);
2171}
2172
2173/*
2174 * NetBSD lstat.  Get file status; this version does not follow links.
2175 */
2176#ifndef _SYS_SYSPROTO_H_
2177struct lstat_args {
2178	char	*path;
2179	struct stat *ub;
2180};
2181#endif
2182int
2183nlstat(td, uap)
2184	struct thread *td;
2185	register struct nlstat_args /* {
2186		char *path;
2187		struct nstat *ub;
2188	} */ *uap;
2189{
2190	struct stat sb;
2191	struct nstat nsb;
2192	int error;
2193
2194	error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb);
2195	if (error)
2196		return (error);
2197	cvtnstat(&sb, &nsb);
2198	error = copyout(&nsb, uap->ub, sizeof (nsb));
2199	return (error);
2200}
2201
2202/*
2203 * Get configurable pathname variables.
2204 */
2205#ifndef _SYS_SYSPROTO_H_
2206struct pathconf_args {
2207	char	*path;
2208	int	name;
2209};
2210#endif
2211int
2212pathconf(td, uap)
2213	struct thread *td;
2214	register struct pathconf_args /* {
2215		char *path;
2216		int name;
2217	} */ *uap;
2218{
2219
2220	return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name));
2221}
2222
2223int
2224kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name)
2225{
2226	struct nameidata nd;
2227	int error, vfslocked;
2228
2229	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
2230	if ((error = namei(&nd)) != 0)
2231		return (error);
2232	vfslocked = NDHASGIANT(&nd);
2233	NDFREE(&nd, NDF_ONLY_PNBUF);
2234
2235	/* If asynchronous I/O is available, it works for all files. */
2236	if (name == _PC_ASYNC_IO)
2237		td->td_retval[0] = async_io_version;
2238	else
2239		error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval);
2240	vput(nd.ni_vp);
2241	VFS_UNLOCK_GIANT(vfslocked);
2242	return (error);
2243}
2244
2245/*
2246 * Return target name of a symbolic link.
2247 */
2248#ifndef _SYS_SYSPROTO_H_
2249struct readlink_args {
2250	char	*path;
2251	char	*buf;
2252	int	count;
2253};
2254#endif
2255int
2256readlink(td, uap)
2257	struct thread *td;
2258	register struct readlink_args /* {
2259		char *path;
2260		char *buf;
2261		int count;
2262	} */ *uap;
2263{
2264
2265	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2266	    UIO_USERSPACE, uap->count));
2267}
2268
2269int
2270kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2271    enum uio_seg bufseg, int count)
2272{
2273	register struct vnode *vp;
2274	struct iovec aiov;
2275	struct uio auio;
2276	int error;
2277	struct nameidata nd;
2278	int vfslocked;
2279
2280	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
2281	if ((error = namei(&nd)) != 0)
2282		return (error);
2283	NDFREE(&nd, NDF_ONLY_PNBUF);
2284	vfslocked = NDHASGIANT(&nd);
2285	vp = nd.ni_vp;
2286#ifdef MAC
2287	error = mac_check_vnode_readlink(td->td_ucred, vp);
2288	if (error) {
2289		vput(vp);
2290		VFS_UNLOCK_GIANT(vfslocked);
2291		return (error);
2292	}
2293#endif
2294	if (vp->v_type != VLNK)
2295		error = EINVAL;
2296	else {
2297		aiov.iov_base = buf;
2298		aiov.iov_len = count;
2299		auio.uio_iov = &aiov;
2300		auio.uio_iovcnt = 1;
2301		auio.uio_offset = 0;
2302		auio.uio_rw = UIO_READ;
2303		auio.uio_segflg = bufseg;
2304		auio.uio_td = td;
2305		auio.uio_resid = count;
2306		error = VOP_READLINK(vp, &auio, td->td_ucred);
2307	}
2308	vput(vp);
2309	VFS_UNLOCK_GIANT(vfslocked);
2310	td->td_retval[0] = count - auio.uio_resid;
2311	return (error);
2312}
2313
2314/*
2315 * Common implementation code for chflags() and fchflags().
2316 */
2317static int
2318setfflags(td, vp, flags)
2319	struct thread *td;
2320	struct vnode *vp;
2321	int flags;
2322{
2323	int error;
2324	struct mount *mp;
2325	struct vattr vattr;
2326
2327	/*
2328	 * Prevent non-root users from setting flags on devices.  When
2329	 * a device is reused, users can retain ownership of the device
2330	 * if they are allowed to set flags and programs assume that
2331	 * chown can't fail when done as root.
2332	 */
2333	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2334		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2335		if (error)
2336			return (error);
2337	}
2338
2339	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2340		return (error);
2341	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2342	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2343	VATTR_NULL(&vattr);
2344	vattr.va_flags = flags;
2345#ifdef MAC
2346	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2347	if (error == 0)
2348#endif
2349		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2350	VOP_UNLOCK(vp, 0, td);
2351	vn_finished_write(mp);
2352	return (error);
2353}
2354
2355/*
2356 * Change flags of a file given a path name.
2357 */
2358#ifndef _SYS_SYSPROTO_H_
2359struct chflags_args {
2360	char	*path;
2361	int	flags;
2362};
2363#endif
2364int
2365chflags(td, uap)
2366	struct thread *td;
2367	register struct chflags_args /* {
2368		char *path;
2369		int flags;
2370	} */ *uap;
2371{
2372	int error;
2373	struct nameidata nd;
2374	int vfslocked;
2375
2376	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2377	if ((error = namei(&nd)) != 0)
2378		return (error);
2379	NDFREE(&nd, NDF_ONLY_PNBUF);
2380	vfslocked = NDHASGIANT(&nd);
2381	error = setfflags(td, nd.ni_vp, uap->flags);
2382	vrele(nd.ni_vp);
2383	VFS_UNLOCK_GIANT(vfslocked);
2384	return (error);
2385}
2386
2387/*
2388 * Same as chflags() but doesn't follow symlinks.
2389 */
2390int
2391lchflags(td, uap)
2392	struct thread *td;
2393	register struct lchflags_args /* {
2394		char *path;
2395		int flags;
2396	} */ *uap;
2397{
2398	int error;
2399	struct nameidata nd;
2400	int vfslocked;
2401
2402	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2403	if ((error = namei(&nd)) != 0)
2404		return (error);
2405	vfslocked = NDHASGIANT(&nd);
2406	NDFREE(&nd, NDF_ONLY_PNBUF);
2407	error = setfflags(td, nd.ni_vp, uap->flags);
2408	vrele(nd.ni_vp);
2409	VFS_UNLOCK_GIANT(vfslocked);
2410	return (error);
2411}
2412
2413/*
2414 * Change flags of a file given a file descriptor.
2415 */
2416#ifndef _SYS_SYSPROTO_H_
2417struct fchflags_args {
2418	int	fd;
2419	int	flags;
2420};
2421#endif
2422int
2423fchflags(td, uap)
2424	struct thread *td;
2425	register struct fchflags_args /* {
2426		int fd;
2427		int flags;
2428	} */ *uap;
2429{
2430	struct file *fp;
2431	int vfslocked;
2432	int error;
2433
2434	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2435		return (error);
2436	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2437	error = setfflags(td, fp->f_vnode, uap->flags);
2438	fdrop(fp, td);
2439	VFS_UNLOCK_GIANT(vfslocked);
2440	return (error);
2441}
2442
2443/*
2444 * Common implementation code for chmod(), lchmod() and fchmod().
2445 */
2446static int
2447setfmode(td, vp, mode)
2448	struct thread *td;
2449	struct vnode *vp;
2450	int mode;
2451{
2452	int error;
2453	struct mount *mp;
2454	struct vattr vattr;
2455
2456	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2457		return (error);
2458	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2459	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2460	VATTR_NULL(&vattr);
2461	vattr.va_mode = mode & ALLPERMS;
2462#ifdef MAC
2463	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2464	if (error == 0)
2465#endif
2466		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2467	VOP_UNLOCK(vp, 0, td);
2468	vn_finished_write(mp);
2469	return (error);
2470}
2471
2472/*
2473 * Change mode of a file given path name.
2474 */
2475#ifndef _SYS_SYSPROTO_H_
2476struct chmod_args {
2477	char	*path;
2478	int	mode;
2479};
2480#endif
2481int
2482chmod(td, uap)
2483	struct thread *td;
2484	register struct chmod_args /* {
2485		char *path;
2486		int mode;
2487	} */ *uap;
2488{
2489
2490	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2491}
2492
2493int
2494kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2495{
2496	int error;
2497	struct nameidata nd;
2498	int vfslocked;
2499
2500	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2501	if ((error = namei(&nd)) != 0)
2502		return (error);
2503	vfslocked = NDHASGIANT(&nd);
2504	NDFREE(&nd, NDF_ONLY_PNBUF);
2505	error = setfmode(td, nd.ni_vp, mode);
2506	vrele(nd.ni_vp);
2507	VFS_UNLOCK_GIANT(vfslocked);
2508	return (error);
2509}
2510
2511/*
2512 * Change mode of a file given path name (don't follow links.)
2513 */
2514#ifndef _SYS_SYSPROTO_H_
2515struct lchmod_args {
2516	char	*path;
2517	int	mode;
2518};
2519#endif
2520int
2521lchmod(td, uap)
2522	struct thread *td;
2523	register struct lchmod_args /* {
2524		char *path;
2525		int mode;
2526	} */ *uap;
2527{
2528	int error;
2529	struct nameidata nd;
2530	int vfslocked;
2531
2532	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2533	if ((error = namei(&nd)) != 0)
2534		return (error);
2535	vfslocked = NDHASGIANT(&nd);
2536	NDFREE(&nd, NDF_ONLY_PNBUF);
2537	error = setfmode(td, nd.ni_vp, uap->mode);
2538	vrele(nd.ni_vp);
2539	VFS_UNLOCK_GIANT(vfslocked);
2540	return (error);
2541}
2542
2543/*
2544 * Change mode of a file given a file descriptor.
2545 */
2546#ifndef _SYS_SYSPROTO_H_
2547struct fchmod_args {
2548	int	fd;
2549	int	mode;
2550};
2551#endif
2552int
2553fchmod(td, uap)
2554	struct thread *td;
2555	register struct fchmod_args /* {
2556		int fd;
2557		int mode;
2558	} */ *uap;
2559{
2560	struct file *fp;
2561	int vfslocked;
2562	int error;
2563
2564	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2565		return (error);
2566	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2567	error = setfmode(td, fp->f_vnode, uap->mode);
2568	fdrop(fp, td);
2569	VFS_UNLOCK_GIANT(vfslocked);
2570	return (error);
2571}
2572
2573/*
2574 * Common implementation for chown(), lchown(), and fchown()
2575 */
2576static int
2577setfown(td, vp, uid, gid)
2578	struct thread *td;
2579	struct vnode *vp;
2580	uid_t uid;
2581	gid_t gid;
2582{
2583	int error;
2584	struct mount *mp;
2585	struct vattr vattr;
2586
2587	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2588		return (error);
2589	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2590	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2591	VATTR_NULL(&vattr);
2592	vattr.va_uid = uid;
2593	vattr.va_gid = gid;
2594#ifdef MAC
2595	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2596	    vattr.va_gid);
2597	if (error == 0)
2598#endif
2599		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2600	VOP_UNLOCK(vp, 0, td);
2601	vn_finished_write(mp);
2602	return (error);
2603}
2604
2605/*
2606 * Set ownership given a path name.
2607 */
2608#ifndef _SYS_SYSPROTO_H_
2609struct chown_args {
2610	char	*path;
2611	int	uid;
2612	int	gid;
2613};
2614#endif
2615int
2616chown(td, uap)
2617	struct thread *td;
2618	register struct chown_args /* {
2619		char *path;
2620		int uid;
2621		int gid;
2622	} */ *uap;
2623{
2624
2625	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2626}
2627
2628int
2629kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2630    int gid)
2631{
2632	int error;
2633	struct nameidata nd;
2634	int vfslocked;
2635
2636	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2637	if ((error = namei(&nd)) != 0)
2638		return (error);
2639	vfslocked = NDHASGIANT(&nd);
2640	NDFREE(&nd, NDF_ONLY_PNBUF);
2641	error = setfown(td, nd.ni_vp, uid, gid);
2642	vrele(nd.ni_vp);
2643	VFS_UNLOCK_GIANT(vfslocked);
2644	return (error);
2645}
2646
2647/*
2648 * Set ownership given a path name, do not cross symlinks.
2649 */
2650#ifndef _SYS_SYSPROTO_H_
2651struct lchown_args {
2652	char	*path;
2653	int	uid;
2654	int	gid;
2655};
2656#endif
2657int
2658lchown(td, uap)
2659	struct thread *td;
2660	register struct lchown_args /* {
2661		char *path;
2662		int uid;
2663		int gid;
2664	} */ *uap;
2665{
2666
2667	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2668}
2669
2670int
2671kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2672    int gid)
2673{
2674	int error;
2675	struct nameidata nd;
2676	int vfslocked;
2677
2678	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2679	if ((error = namei(&nd)) != 0)
2680		return (error);
2681	vfslocked = NDHASGIANT(&nd);
2682	NDFREE(&nd, NDF_ONLY_PNBUF);
2683	error = setfown(td, nd.ni_vp, uid, gid);
2684	vrele(nd.ni_vp);
2685	VFS_UNLOCK_GIANT(vfslocked);
2686	return (error);
2687}
2688
2689/*
2690 * Set ownership given a file descriptor.
2691 */
2692#ifndef _SYS_SYSPROTO_H_
2693struct fchown_args {
2694	int	fd;
2695	int	uid;
2696	int	gid;
2697};
2698#endif
2699int
2700fchown(td, uap)
2701	struct thread *td;
2702	register struct fchown_args /* {
2703		int fd;
2704		int uid;
2705		int gid;
2706	} */ *uap;
2707{
2708	struct file *fp;
2709	int vfslocked;
2710	int error;
2711
2712	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2713		return (error);
2714	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2715	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2716	fdrop(fp, td);
2717	VFS_UNLOCK_GIANT(vfslocked);
2718	return (error);
2719}
2720
2721/*
2722 * Common implementation code for utimes(), lutimes(), and futimes().
2723 */
2724static int
2725getutimes(usrtvp, tvpseg, tsp)
2726	const struct timeval *usrtvp;
2727	enum uio_seg tvpseg;
2728	struct timespec *tsp;
2729{
2730	struct timeval tv[2];
2731	const struct timeval *tvp;
2732	int error;
2733
2734	if (usrtvp == NULL) {
2735		microtime(&tv[0]);
2736		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2737		tsp[1] = tsp[0];
2738	} else {
2739		if (tvpseg == UIO_SYSSPACE) {
2740			tvp = usrtvp;
2741		} else {
2742			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2743				return (error);
2744			tvp = tv;
2745		}
2746
2747		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2748		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2749	}
2750	return (0);
2751}
2752
2753/*
2754 * Common implementation code for utimes(), lutimes(), and futimes().
2755 */
2756static int
2757setutimes(td, vp, ts, numtimes, nullflag)
2758	struct thread *td;
2759	struct vnode *vp;
2760	const struct timespec *ts;
2761	int numtimes;
2762	int nullflag;
2763{
2764	int error, setbirthtime;
2765	struct mount *mp;
2766	struct vattr vattr;
2767
2768	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2769		return (error);
2770	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2771	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2772	setbirthtime = 0;
2773	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2774	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2775		setbirthtime = 1;
2776	VATTR_NULL(&vattr);
2777	vattr.va_atime = ts[0];
2778	vattr.va_mtime = ts[1];
2779	if (setbirthtime)
2780		vattr.va_birthtime = ts[1];
2781	if (numtimes > 2)
2782		vattr.va_birthtime = ts[2];
2783	if (nullflag)
2784		vattr.va_vaflags |= VA_UTIMES_NULL;
2785#ifdef MAC
2786	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2787	    vattr.va_mtime);
2788#endif
2789	if (error == 0)
2790		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2791	VOP_UNLOCK(vp, 0, td);
2792	vn_finished_write(mp);
2793	return (error);
2794}
2795
2796/*
2797 * Set the access and modification times of a file.
2798 */
2799#ifndef _SYS_SYSPROTO_H_
2800struct utimes_args {
2801	char	*path;
2802	struct	timeval *tptr;
2803};
2804#endif
2805int
2806utimes(td, uap)
2807	struct thread *td;
2808	register struct utimes_args /* {
2809		char *path;
2810		struct timeval *tptr;
2811	} */ *uap;
2812{
2813
2814	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2815	    UIO_USERSPACE));
2816}
2817
2818int
2819kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2820    struct timeval *tptr, enum uio_seg tptrseg)
2821{
2822	struct timespec ts[2];
2823	int error;
2824	struct nameidata nd;
2825	int vfslocked;
2826
2827	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2828		return (error);
2829	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2830	if ((error = namei(&nd)) != 0)
2831		return (error);
2832	vfslocked = NDHASGIANT(&nd);
2833	NDFREE(&nd, NDF_ONLY_PNBUF);
2834	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2835	vrele(nd.ni_vp);
2836	VFS_UNLOCK_GIANT(vfslocked);
2837	return (error);
2838}
2839
2840/*
2841 * Set the access and modification times of a file.
2842 */
2843#ifndef _SYS_SYSPROTO_H_
2844struct lutimes_args {
2845	char	*path;
2846	struct	timeval *tptr;
2847};
2848#endif
2849int
2850lutimes(td, uap)
2851	struct thread *td;
2852	register struct lutimes_args /* {
2853		char *path;
2854		struct timeval *tptr;
2855	} */ *uap;
2856{
2857
2858	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2859	    UIO_USERSPACE));
2860}
2861
2862int
2863kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2864    struct timeval *tptr, enum uio_seg tptrseg)
2865{
2866	struct timespec ts[2];
2867	int error;
2868	struct nameidata nd;
2869	int vfslocked;
2870
2871	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2872		return (error);
2873	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2874	if ((error = namei(&nd)) != 0)
2875		return (error);
2876	vfslocked = NDHASGIANT(&nd);
2877	NDFREE(&nd, NDF_ONLY_PNBUF);
2878	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2879	vrele(nd.ni_vp);
2880	VFS_UNLOCK_GIANT(vfslocked);
2881	return (error);
2882}
2883
2884/*
2885 * Set the access and modification times of a file.
2886 */
2887#ifndef _SYS_SYSPROTO_H_
2888struct futimes_args {
2889	int	fd;
2890	struct	timeval *tptr;
2891};
2892#endif
2893int
2894futimes(td, uap)
2895	struct thread *td;
2896	register struct futimes_args /* {
2897		int  fd;
2898		struct timeval *tptr;
2899	} */ *uap;
2900{
2901
2902	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2903}
2904
2905int
2906kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2907    enum uio_seg tptrseg)
2908{
2909	struct timespec ts[2];
2910	struct file *fp;
2911	int vfslocked;
2912	int error;
2913
2914	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2915		return (error);
2916	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2917		return (error);
2918	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2919	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2920	fdrop(fp, td);
2921	VFS_UNLOCK_GIANT(vfslocked);
2922	return (error);
2923}
2924
2925/*
2926 * Truncate a file given its path name.
2927 */
2928#ifndef _SYS_SYSPROTO_H_
2929struct truncate_args {
2930	char	*path;
2931	int	pad;
2932	off_t	length;
2933};
2934#endif
2935int
2936truncate(td, uap)
2937	struct thread *td;
2938	register struct truncate_args /* {
2939		char *path;
2940		int pad;
2941		off_t length;
2942	} */ *uap;
2943{
2944
2945	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2946}
2947
2948int
2949kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2950{
2951	struct mount *mp;
2952	struct vnode *vp;
2953	struct vattr vattr;
2954	int error;
2955	struct nameidata nd;
2956	int vfslocked;
2957
2958	if (length < 0)
2959		return(EINVAL);
2960	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2961	if ((error = namei(&nd)) != 0)
2962		return (error);
2963	vfslocked = NDHASGIANT(&nd);
2964	vp = nd.ni_vp;
2965	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2966		vrele(vp);
2967		VFS_UNLOCK_GIANT(vfslocked);
2968		return (error);
2969	}
2970	NDFREE(&nd, NDF_ONLY_PNBUF);
2971	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2972	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2973	if (vp->v_type == VDIR)
2974		error = EISDIR;
2975#ifdef MAC
2976	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
2977	}
2978#endif
2979	else if ((error = vn_writechk(vp)) == 0 &&
2980	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
2981		VATTR_NULL(&vattr);
2982		vattr.va_size = length;
2983		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2984	}
2985	vput(vp);
2986	vn_finished_write(mp);
2987	VFS_UNLOCK_GIANT(vfslocked);
2988	return (error);
2989}
2990
2991/*
2992 * Truncate a file given a file descriptor.
2993 */
2994#ifndef _SYS_SYSPROTO_H_
2995struct ftruncate_args {
2996	int	fd;
2997	int	pad;
2998	off_t	length;
2999};
3000#endif
3001int
3002ftruncate(td, uap)
3003	struct thread *td;
3004	register struct ftruncate_args /* {
3005		int fd;
3006		int pad;
3007		off_t length;
3008	} */ *uap;
3009{
3010	struct mount *mp;
3011	struct vattr vattr;
3012	struct vnode *vp;
3013	struct file *fp;
3014	int vfslocked;
3015	int error;
3016
3017	if (uap->length < 0)
3018		return(EINVAL);
3019	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3020		return (error);
3021	if ((fp->f_flag & FWRITE) == 0) {
3022		fdrop(fp, td);
3023		return (EINVAL);
3024	}
3025	vp = fp->f_vnode;
3026	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3027	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3028		goto drop;
3029	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3030	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3031	if (vp->v_type == VDIR)
3032		error = EISDIR;
3033#ifdef MAC
3034	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3035	    vp))) {
3036	}
3037#endif
3038	else if ((error = vn_writechk(vp)) == 0) {
3039		VATTR_NULL(&vattr);
3040		vattr.va_size = uap->length;
3041		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3042	}
3043	VOP_UNLOCK(vp, 0, td);
3044	vn_finished_write(mp);
3045drop:
3046	VFS_UNLOCK_GIANT(vfslocked);
3047	fdrop(fp, td);
3048	return (error);
3049}
3050
3051#if defined(COMPAT_43)
3052/*
3053 * Truncate a file given its path name.
3054 */
3055#ifndef _SYS_SYSPROTO_H_
3056struct otruncate_args {
3057	char	*path;
3058	long	length;
3059};
3060#endif
3061int
3062otruncate(td, uap)
3063	struct thread *td;
3064	register struct otruncate_args /* {
3065		char *path;
3066		long length;
3067	} */ *uap;
3068{
3069	struct truncate_args /* {
3070		char *path;
3071		int pad;
3072		off_t length;
3073	} */ nuap;
3074
3075	nuap.path = uap->path;
3076	nuap.length = uap->length;
3077	return (truncate(td, &nuap));
3078}
3079
3080/*
3081 * Truncate a file given a file descriptor.
3082 */
3083#ifndef _SYS_SYSPROTO_H_
3084struct oftruncate_args {
3085	int	fd;
3086	long	length;
3087};
3088#endif
3089int
3090oftruncate(td, uap)
3091	struct thread *td;
3092	register struct oftruncate_args /* {
3093		int fd;
3094		long length;
3095	} */ *uap;
3096{
3097	struct ftruncate_args /* {
3098		int fd;
3099		int pad;
3100		off_t length;
3101	} */ nuap;
3102
3103	nuap.fd = uap->fd;
3104	nuap.length = uap->length;
3105	return (ftruncate(td, &nuap));
3106}
3107#endif /* COMPAT_43 */
3108
3109/*
3110 * Sync an open file.
3111 */
3112#ifndef _SYS_SYSPROTO_H_
3113struct fsync_args {
3114	int	fd;
3115};
3116#endif
3117int
3118fsync(td, uap)
3119	struct thread *td;
3120	struct fsync_args /* {
3121		int fd;
3122	} */ *uap;
3123{
3124	struct vnode *vp;
3125	struct mount *mp;
3126	struct file *fp;
3127	int vfslocked;
3128	int error;
3129
3130	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3131		return (error);
3132	vp = fp->f_vnode;
3133	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3134	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3135		goto drop;
3136	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3137	if (vp->v_object != NULL) {
3138		VM_OBJECT_LOCK(vp->v_object);
3139		vm_object_page_clean(vp->v_object, 0, 0, 0);
3140		VM_OBJECT_UNLOCK(vp->v_object);
3141	}
3142	error = VOP_FSYNC(vp, MNT_WAIT, td);
3143
3144	VOP_UNLOCK(vp, 0, td);
3145	vn_finished_write(mp);
3146drop:
3147	VFS_UNLOCK_GIANT(vfslocked);
3148	fdrop(fp, td);
3149	return (error);
3150}
3151
3152/*
3153 * Rename files.  Source and destination must either both be directories,
3154 * or both not be directories.  If target is a directory, it must be empty.
3155 */
3156#ifndef _SYS_SYSPROTO_H_
3157struct rename_args {
3158	char	*from;
3159	char	*to;
3160};
3161#endif
3162int
3163rename(td, uap)
3164	struct thread *td;
3165	register struct rename_args /* {
3166		char *from;
3167		char *to;
3168	} */ *uap;
3169{
3170
3171	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3172}
3173
3174int
3175kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3176{
3177	struct mount *mp = NULL;
3178	struct vnode *tvp, *fvp, *tdvp;
3179	struct nameidata fromnd, tond;
3180	int tvfslocked;
3181	int fvfslocked;
3182	int error;
3183
3184	bwillwrite();
3185#ifdef MAC
3186	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE,
3187	    pathseg, from, td);
3188#else
3189	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE,
3190	    pathseg, from, td);
3191#endif
3192	if ((error = namei(&fromnd)) != 0)
3193		return (error);
3194	fvfslocked = NDHASGIANT(&fromnd);
3195	tvfslocked = 0;
3196#ifdef MAC
3197	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3198	    fromnd.ni_vp, &fromnd.ni_cnd);
3199	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3200	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3201#endif
3202	fvp = fromnd.ni_vp;
3203	if (error == 0)
3204		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3205	if (error != 0) {
3206		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3207		vrele(fromnd.ni_dvp);
3208		vrele(fvp);
3209		goto out1;
3210	}
3211	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3212	    MPSAFE, pathseg, to, td);
3213	if (fromnd.ni_vp->v_type == VDIR)
3214		tond.ni_cnd.cn_flags |= WILLBEDIR;
3215	if ((error = namei(&tond)) != 0) {
3216		/* Translate error code for rename("dir1", "dir2/."). */
3217		if (error == EISDIR && fvp->v_type == VDIR)
3218			error = EINVAL;
3219		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3220		vrele(fromnd.ni_dvp);
3221		vrele(fvp);
3222		goto out1;
3223	}
3224	tvfslocked = NDHASGIANT(&tond);
3225	tdvp = tond.ni_dvp;
3226	tvp = tond.ni_vp;
3227	if (tvp != NULL) {
3228		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3229			error = ENOTDIR;
3230			goto out;
3231		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3232			error = EISDIR;
3233			goto out;
3234		}
3235	}
3236	if (fvp == tdvp)
3237		error = EINVAL;
3238	/*
3239	 * If the source is the same as the destination (that is, if they
3240	 * are links to the same vnode), then there is nothing to do.
3241	 */
3242	if (fvp == tvp)
3243		error = -1;
3244#ifdef MAC
3245	else
3246		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3247		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3248#endif
3249out:
3250	if (!error) {
3251		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3252		if (fromnd.ni_dvp != tdvp) {
3253			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3254		}
3255		if (tvp) {
3256			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3257		}
3258		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3259				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3260		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3261		NDFREE(&tond, NDF_ONLY_PNBUF);
3262	} else {
3263		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3264		NDFREE(&tond, NDF_ONLY_PNBUF);
3265		if (tvp)
3266			vput(tvp);
3267		if (tdvp == tvp)
3268			vrele(tdvp);
3269		else
3270			vput(tdvp);
3271		vrele(fromnd.ni_dvp);
3272		vrele(fvp);
3273	}
3274	vrele(tond.ni_startdir);
3275	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3276	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3277	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3278	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3279out1:
3280	vn_finished_write(mp);
3281	if (fromnd.ni_startdir)
3282		vrele(fromnd.ni_startdir);
3283	VFS_UNLOCK_GIANT(fvfslocked);
3284	VFS_UNLOCK_GIANT(tvfslocked);
3285	if (error == -1)
3286		return (0);
3287	return (error);
3288}
3289
3290/*
3291 * Make a directory file.
3292 */
3293#ifndef _SYS_SYSPROTO_H_
3294struct mkdir_args {
3295	char	*path;
3296	int	mode;
3297};
3298#endif
3299int
3300mkdir(td, uap)
3301	struct thread *td;
3302	register struct mkdir_args /* {
3303		char *path;
3304		int mode;
3305	} */ *uap;
3306{
3307
3308	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3309}
3310
3311int
3312kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3313{
3314	struct mount *mp;
3315	struct vnode *vp;
3316	struct vattr vattr;
3317	int error;
3318	struct nameidata nd;
3319	int vfslocked;
3320
3321restart:
3322	bwillwrite();
3323	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, path, td);
3324	nd.ni_cnd.cn_flags |= WILLBEDIR;
3325	if ((error = namei(&nd)) != 0)
3326		return (error);
3327	vfslocked = NDHASGIANT(&nd);
3328	vp = nd.ni_vp;
3329	if (vp != NULL) {
3330		NDFREE(&nd, NDF_ONLY_PNBUF);
3331		vrele(vp);
3332		/*
3333		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3334		 * the strange behaviour of leaving the vnode unlocked
3335		 * if the target is the same vnode as the parent.
3336		 */
3337		if (vp == nd.ni_dvp)
3338			vrele(nd.ni_dvp);
3339		else
3340			vput(nd.ni_dvp);
3341		VFS_UNLOCK_GIANT(vfslocked);
3342		return (EEXIST);
3343	}
3344	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3345		NDFREE(&nd, NDF_ONLY_PNBUF);
3346		vput(nd.ni_dvp);
3347		VFS_UNLOCK_GIANT(vfslocked);
3348		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3349			return (error);
3350		goto restart;
3351	}
3352	VATTR_NULL(&vattr);
3353	vattr.va_type = VDIR;
3354	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3355	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3356	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3357#ifdef MAC
3358	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3359	    &vattr);
3360	if (error)
3361		goto out;
3362#endif
3363	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3364	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3365#ifdef MAC
3366out:
3367#endif
3368	NDFREE(&nd, NDF_ONLY_PNBUF);
3369	vput(nd.ni_dvp);
3370	if (!error)
3371		vput(nd.ni_vp);
3372	vn_finished_write(mp);
3373	VFS_UNLOCK_GIANT(vfslocked);
3374	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3375	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3376	return (error);
3377}
3378
3379/*
3380 * Remove a directory file.
3381 */
3382#ifndef _SYS_SYSPROTO_H_
3383struct rmdir_args {
3384	char	*path;
3385};
3386#endif
3387int
3388rmdir(td, uap)
3389	struct thread *td;
3390	struct rmdir_args /* {
3391		char *path;
3392	} */ *uap;
3393{
3394
3395	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3396}
3397
3398int
3399kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3400{
3401	struct mount *mp;
3402	struct vnode *vp;
3403	int error;
3404	struct nameidata nd;
3405	int vfslocked;
3406
3407restart:
3408	bwillwrite();
3409	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
3410	if ((error = namei(&nd)) != 0)
3411		return (error);
3412	vfslocked = NDHASGIANT(&nd);
3413	vp = nd.ni_vp;
3414	if (vp->v_type != VDIR) {
3415		error = ENOTDIR;
3416		goto out;
3417	}
3418	/*
3419	 * No rmdir "." please.
3420	 */
3421	if (nd.ni_dvp == vp) {
3422		error = EINVAL;
3423		goto out;
3424	}
3425	/*
3426	 * The root of a mounted filesystem cannot be deleted.
3427	 */
3428	if (vp->v_vflag & VV_ROOT) {
3429		error = EBUSY;
3430		goto out;
3431	}
3432#ifdef MAC
3433	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3434	    &nd.ni_cnd);
3435	if (error)
3436		goto out;
3437#endif
3438	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3439		NDFREE(&nd, NDF_ONLY_PNBUF);
3440		if (nd.ni_dvp == vp)
3441			vrele(nd.ni_dvp);
3442		else
3443			vput(nd.ni_dvp);
3444		vput(vp);
3445		VFS_UNLOCK_GIANT(vfslocked);
3446		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3447			return (error);
3448		goto restart;
3449	}
3450	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3451	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3452	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3453	vn_finished_write(mp);
3454out:
3455	NDFREE(&nd, NDF_ONLY_PNBUF);
3456	if (nd.ni_dvp == vp)
3457		vrele(nd.ni_dvp);
3458	else
3459		vput(nd.ni_dvp);
3460	vput(vp);
3461	VFS_UNLOCK_GIANT(vfslocked);
3462	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3463	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3464	return (error);
3465}
3466
3467#ifdef COMPAT_43
3468/*
3469 * Read a block of directory entries in a filesystem independent format.
3470 */
3471#ifndef _SYS_SYSPROTO_H_
3472struct ogetdirentries_args {
3473	int	fd;
3474	char	*buf;
3475	u_int	count;
3476	long	*basep;
3477};
3478#endif
3479int
3480ogetdirentries(td, uap)
3481	struct thread *td;
3482	register struct ogetdirentries_args /* {
3483		int fd;
3484		char *buf;
3485		u_int count;
3486		long *basep;
3487	} */ *uap;
3488{
3489	struct vnode *vp;
3490	struct file *fp;
3491	struct uio auio, kuio;
3492	struct iovec aiov, kiov;
3493	struct dirent *dp, *edp;
3494	caddr_t dirbuf;
3495	int error, eofflag, readcnt;
3496	long loff;
3497
3498	/* XXX arbitrary sanity limit on `count'. */
3499	if (uap->count > 64 * 1024)
3500		return (EINVAL);
3501	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3502		return (error);
3503	if ((fp->f_flag & FREAD) == 0) {
3504		fdrop(fp, td);
3505		return (EBADF);
3506	}
3507	vp = fp->f_vnode;
3508unionread:
3509	if (vp->v_type != VDIR) {
3510		fdrop(fp, td);
3511		return (EINVAL);
3512	}
3513	aiov.iov_base = uap->buf;
3514	aiov.iov_len = uap->count;
3515	auio.uio_iov = &aiov;
3516	auio.uio_iovcnt = 1;
3517	auio.uio_rw = UIO_READ;
3518	auio.uio_segflg = UIO_USERSPACE;
3519	auio.uio_td = td;
3520	auio.uio_resid = uap->count;
3521	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3522	loff = auio.uio_offset = fp->f_offset;
3523#ifdef MAC
3524	error = mac_check_vnode_readdir(td->td_ucred, vp);
3525	if (error) {
3526		VOP_UNLOCK(vp, 0, td);
3527		fdrop(fp, td);
3528		return (error);
3529	}
3530#endif
3531#	if (BYTE_ORDER != LITTLE_ENDIAN)
3532		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3533			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3534			    NULL, NULL);
3535			fp->f_offset = auio.uio_offset;
3536		} else
3537#	endif
3538	{
3539		kuio = auio;
3540		kuio.uio_iov = &kiov;
3541		kuio.uio_segflg = UIO_SYSSPACE;
3542		kiov.iov_len = uap->count;
3543		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3544		kiov.iov_base = dirbuf;
3545		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3546			    NULL, NULL);
3547		fp->f_offset = kuio.uio_offset;
3548		if (error == 0) {
3549			readcnt = uap->count - kuio.uio_resid;
3550			edp = (struct dirent *)&dirbuf[readcnt];
3551			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3552#				if (BYTE_ORDER == LITTLE_ENDIAN)
3553					/*
3554					 * The expected low byte of
3555					 * dp->d_namlen is our dp->d_type.
3556					 * The high MBZ byte of dp->d_namlen
3557					 * is our dp->d_namlen.
3558					 */
3559					dp->d_type = dp->d_namlen;
3560					dp->d_namlen = 0;
3561#				else
3562					/*
3563					 * The dp->d_type is the high byte
3564					 * of the expected dp->d_namlen,
3565					 * so must be zero'ed.
3566					 */
3567					dp->d_type = 0;
3568#				endif
3569				if (dp->d_reclen > 0) {
3570					dp = (struct dirent *)
3571					    ((char *)dp + dp->d_reclen);
3572				} else {
3573					error = EIO;
3574					break;
3575				}
3576			}
3577			if (dp >= edp)
3578				error = uiomove(dirbuf, readcnt, &auio);
3579		}
3580		FREE(dirbuf, M_TEMP);
3581	}
3582	VOP_UNLOCK(vp, 0, td);
3583	if (error) {
3584		fdrop(fp, td);
3585		return (error);
3586	}
3587	if (uap->count == auio.uio_resid) {
3588		if (union_dircheckp) {
3589			error = union_dircheckp(td, &vp, fp);
3590			if (error == -1)
3591				goto unionread;
3592			if (error) {
3593				fdrop(fp, td);
3594				return (error);
3595			}
3596		}
3597		/*
3598		 * XXX We could delay dropping the lock above but
3599		 * union_dircheckp complicates things.
3600		 */
3601		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3602		if ((vp->v_vflag & VV_ROOT) &&
3603		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3604			struct vnode *tvp = vp;
3605			vp = vp->v_mount->mnt_vnodecovered;
3606			VREF(vp);
3607			fp->f_vnode = vp;
3608			fp->f_data = vp;
3609			fp->f_offset = 0;
3610			vput(tvp);
3611			goto unionread;
3612		}
3613		VOP_UNLOCK(vp, 0, td);
3614	}
3615	error = copyout(&loff, uap->basep, sizeof(long));
3616	fdrop(fp, td);
3617	td->td_retval[0] = uap->count - auio.uio_resid;
3618	return (error);
3619}
3620#endif /* COMPAT_43 */
3621
3622/*
3623 * Read a block of directory entries in a filesystem independent format.
3624 */
3625#ifndef _SYS_SYSPROTO_H_
3626struct getdirentries_args {
3627	int	fd;
3628	char	*buf;
3629	u_int	count;
3630	long	*basep;
3631};
3632#endif
3633int
3634getdirentries(td, uap)
3635	struct thread *td;
3636	register struct getdirentries_args /* {
3637		int fd;
3638		char *buf;
3639		u_int count;
3640		long *basep;
3641	} */ *uap;
3642{
3643	struct vnode *vp;
3644	struct file *fp;
3645	struct uio auio;
3646	struct iovec aiov;
3647	int vfslocked;
3648	long loff;
3649	int error, eofflag;
3650
3651	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3652		return (error);
3653	if ((fp->f_flag & FREAD) == 0) {
3654		fdrop(fp, td);
3655		return (EBADF);
3656	}
3657	vp = fp->f_vnode;
3658unionread:
3659	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3660	if (vp->v_type != VDIR) {
3661		error = EINVAL;
3662		goto fail;
3663	}
3664	aiov.iov_base = uap->buf;
3665	aiov.iov_len = uap->count;
3666	auio.uio_iov = &aiov;
3667	auio.uio_iovcnt = 1;
3668	auio.uio_rw = UIO_READ;
3669	auio.uio_segflg = UIO_USERSPACE;
3670	auio.uio_td = td;
3671	auio.uio_resid = uap->count;
3672	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3673	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3674	loff = auio.uio_offset = fp->f_offset;
3675#ifdef MAC
3676	error = mac_check_vnode_readdir(td->td_ucred, vp);
3677	if (error == 0)
3678#endif
3679		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3680		    NULL);
3681	fp->f_offset = auio.uio_offset;
3682	VOP_UNLOCK(vp, 0, td);
3683	if (error)
3684		goto fail;
3685	if (uap->count == auio.uio_resid) {
3686		if (union_dircheckp) {
3687			error = union_dircheckp(td, &vp, fp);
3688			if (error == -1) {
3689				VFS_UNLOCK_GIANT(vfslocked);
3690				goto unionread;
3691			}
3692			if (error)
3693				goto fail;
3694		}
3695		/*
3696		 * XXX We could delay dropping the lock above but
3697		 * union_dircheckp complicates things.
3698		 */
3699		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3700		if ((vp->v_vflag & VV_ROOT) &&
3701		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3702			struct vnode *tvp = vp;
3703			vp = vp->v_mount->mnt_vnodecovered;
3704			VREF(vp);
3705			fp->f_vnode = vp;
3706			fp->f_data = vp;
3707			fp->f_offset = 0;
3708			vput(tvp);
3709			VFS_UNLOCK_GIANT(vfslocked);
3710			goto unionread;
3711		}
3712		VOP_UNLOCK(vp, 0, td);
3713	}
3714	if (uap->basep != NULL) {
3715		error = copyout(&loff, uap->basep, sizeof(long));
3716	}
3717	td->td_retval[0] = uap->count - auio.uio_resid;
3718fail:
3719	VFS_UNLOCK_GIANT(vfslocked);
3720	fdrop(fp, td);
3721	return (error);
3722}
3723#ifndef _SYS_SYSPROTO_H_
3724struct getdents_args {
3725	int fd;
3726	char *buf;
3727	size_t count;
3728};
3729#endif
3730int
3731getdents(td, uap)
3732	struct thread *td;
3733	register struct getdents_args /* {
3734		int fd;
3735		char *buf;
3736		u_int count;
3737	} */ *uap;
3738{
3739	struct getdirentries_args ap;
3740	ap.fd = uap->fd;
3741	ap.buf = uap->buf;
3742	ap.count = uap->count;
3743	ap.basep = NULL;
3744	return (getdirentries(td, &ap));
3745}
3746
3747/*
3748 * Set the mode mask for creation of filesystem nodes.
3749 *
3750 * MP SAFE
3751 */
3752#ifndef _SYS_SYSPROTO_H_
3753struct umask_args {
3754	int	newmask;
3755};
3756#endif
3757int
3758umask(td, uap)
3759	struct thread *td;
3760	struct umask_args /* {
3761		int newmask;
3762	} */ *uap;
3763{
3764	register struct filedesc *fdp;
3765
3766	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3767	fdp = td->td_proc->p_fd;
3768	td->td_retval[0] = fdp->fd_cmask;
3769	fdp->fd_cmask = uap->newmask & ALLPERMS;
3770	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3771	return (0);
3772}
3773
3774/*
3775 * Void all references to file by ripping underlying filesystem
3776 * away from vnode.
3777 */
3778#ifndef _SYS_SYSPROTO_H_
3779struct revoke_args {
3780	char	*path;
3781};
3782#endif
3783int
3784revoke(td, uap)
3785	struct thread *td;
3786	register struct revoke_args /* {
3787		char *path;
3788	} */ *uap;
3789{
3790	struct vnode *vp;
3791	struct vattr vattr;
3792	int error;
3793	struct nameidata nd;
3794	int vfslocked;
3795
3796	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE,
3797	    uap->path, td);
3798	if ((error = namei(&nd)) != 0)
3799		return (error);
3800	vfslocked = NDHASGIANT(&nd);
3801	vp = nd.ni_vp;
3802	NDFREE(&nd, NDF_ONLY_PNBUF);
3803	if (vp->v_type != VCHR) {
3804		error = EINVAL;
3805		goto out;
3806	}
3807#ifdef MAC
3808	error = mac_check_vnode_revoke(td->td_ucred, vp);
3809	if (error)
3810		goto out;
3811#endif
3812	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3813	if (error)
3814		goto out;
3815	if (td->td_ucred->cr_uid != vattr.va_uid) {
3816		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3817		if (error)
3818			goto out;
3819	}
3820	if (vcount(vp) > 1)
3821		VOP_REVOKE(vp, REVOKEALL);
3822out:
3823	vput(vp);
3824	VFS_UNLOCK_GIANT(vfslocked);
3825	return (error);
3826}
3827
3828/*
3829 * Convert a user file descriptor to a kernel file entry.
3830 * A reference on the file entry is held upon returning.
3831 */
3832int
3833getvnode(fdp, fd, fpp)
3834	struct filedesc *fdp;
3835	int fd;
3836	struct file **fpp;
3837{
3838	int error;
3839	struct file *fp;
3840
3841	fp = NULL;
3842	if (fdp == NULL)
3843		error = EBADF;
3844	else {
3845		FILEDESC_LOCK(fdp);
3846		if ((u_int)fd >= fdp->fd_nfiles ||
3847		    (fp = fdp->fd_ofiles[fd]) == NULL)
3848			error = EBADF;
3849		else if (fp->f_vnode == NULL) {
3850			fp = NULL;
3851			error = EINVAL;
3852		} else {
3853			fhold(fp);
3854			error = 0;
3855		}
3856		FILEDESC_UNLOCK(fdp);
3857	}
3858	*fpp = fp;
3859	return (error);
3860}
3861
3862/*
3863 * Get (NFS) file handle
3864 */
3865#ifndef _SYS_SYSPROTO_H_
3866struct lgetfh_args {
3867	char	*fname;
3868	fhandle_t *fhp;
3869};
3870#endif
3871int
3872lgetfh(td, uap)
3873	struct thread *td;
3874	register struct lgetfh_args *uap;
3875{
3876	struct nameidata nd;
3877	fhandle_t fh;
3878	register struct vnode *vp;
3879	int vfslocked;
3880	int error;
3881
3882	error = suser(td);
3883	if (error)
3884		return (error);
3885	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE,
3886	    UIO_USERSPACE, uap->fname, td);
3887	error = namei(&nd);
3888	if (error)
3889		return (error);
3890	vfslocked = NDHASGIANT(&nd);
3891	NDFREE(&nd, NDF_ONLY_PNBUF);
3892	vp = nd.ni_vp;
3893	bzero(&fh, sizeof(fh));
3894	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3895	error = VFS_VPTOFH(vp, &fh.fh_fid);
3896	vput(vp);
3897	VFS_UNLOCK_GIANT(vfslocked);
3898	if (error)
3899		return (error);
3900	error = copyout(&fh, uap->fhp, sizeof (fh));
3901	return (error);
3902}
3903
3904#ifndef _SYS_SYSPROTO_H_
3905struct getfh_args {
3906	char	*fname;
3907	fhandle_t *fhp;
3908};
3909#endif
3910int
3911getfh(td, uap)
3912	struct thread *td;
3913	register struct getfh_args *uap;
3914{
3915	struct nameidata nd;
3916	fhandle_t fh;
3917	register struct vnode *vp;
3918	int vfslocked;
3919	int error;
3920
3921	error = suser(td);
3922	if (error)
3923		return (error);
3924	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
3925	    UIO_USERSPACE, uap->fname, td);
3926	error = namei(&nd);
3927	if (error)
3928		return (error);
3929	vfslocked = NDHASGIANT(&nd);
3930	NDFREE(&nd, NDF_ONLY_PNBUF);
3931	vp = nd.ni_vp;
3932	bzero(&fh, sizeof(fh));
3933	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3934	error = VFS_VPTOFH(vp, &fh.fh_fid);
3935	vput(vp);
3936	VFS_UNLOCK_GIANT(vfslocked);
3937	if (error)
3938		return (error);
3939	error = copyout(&fh, uap->fhp, sizeof (fh));
3940	return (error);
3941}
3942
3943/*
3944 * syscall for the rpc.lockd to use to translate a NFS file handle into
3945 * an open descriptor.
3946 *
3947 * warning: do not remove the suser() call or this becomes one giant
3948 * security hole.
3949 */
3950#ifndef _SYS_SYSPROTO_H_
3951struct fhopen_args {
3952	const struct fhandle *u_fhp;
3953	int flags;
3954};
3955#endif
3956int
3957fhopen(td, uap)
3958	struct thread *td;
3959	struct fhopen_args /* {
3960		const struct fhandle *u_fhp;
3961		int flags;
3962	} */ *uap;
3963{
3964	struct proc *p = td->td_proc;
3965	struct mount *mp;
3966	struct vnode *vp;
3967	struct fhandle fhp;
3968	struct vattr vat;
3969	struct vattr *vap = &vat;
3970	struct flock lf;
3971	struct file *fp;
3972	register struct filedesc *fdp = p->p_fd;
3973	int fmode, mode, error, type;
3974	struct file *nfp;
3975	int indx;
3976
3977	error = suser(td);
3978	if (error)
3979		return (error);
3980	fmode = FFLAGS(uap->flags);
3981	/* why not allow a non-read/write open for our lockd? */
3982	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
3983		return (EINVAL);
3984	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
3985	if (error)
3986		return(error);
3987	/* find the mount point */
3988	mp = vfs_getvfs(&fhp.fh_fsid);
3989	if (mp == NULL)
3990		return (ESTALE);
3991	/* now give me my vnode, it gets returned to me locked */
3992	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
3993	if (error)
3994		return (error);
3995	/*
3996	 * from now on we have to make sure not
3997	 * to forget about the vnode
3998	 * any error that causes an abort must vput(vp)
3999	 * just set error = err and 'goto bad;'.
4000	 */
4001
4002	/*
4003	 * from vn_open
4004	 */
4005	if (vp->v_type == VLNK) {
4006		error = EMLINK;
4007		goto bad;
4008	}
4009	if (vp->v_type == VSOCK) {
4010		error = EOPNOTSUPP;
4011		goto bad;
4012	}
4013	mode = 0;
4014	if (fmode & (FWRITE | O_TRUNC)) {
4015		if (vp->v_type == VDIR) {
4016			error = EISDIR;
4017			goto bad;
4018		}
4019		error = vn_writechk(vp);
4020		if (error)
4021			goto bad;
4022		mode |= VWRITE;
4023	}
4024	if (fmode & FREAD)
4025		mode |= VREAD;
4026	if (fmode & O_APPEND)
4027		mode |= VAPPEND;
4028#ifdef MAC
4029	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4030	if (error)
4031		goto bad;
4032#endif
4033	if (mode) {
4034		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4035		if (error)
4036			goto bad;
4037	}
4038	if (fmode & O_TRUNC) {
4039		VOP_UNLOCK(vp, 0, td);				/* XXX */
4040		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4041			vrele(vp);
4042			return (error);
4043		}
4044		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4045		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4046#ifdef MAC
4047		/*
4048		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4049		 * should be right.
4050		 */
4051		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4052		if (error == 0) {
4053#endif
4054			VATTR_NULL(vap);
4055			vap->va_size = 0;
4056			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4057#ifdef MAC
4058		}
4059#endif
4060		vn_finished_write(mp);
4061		if (error)
4062			goto bad;
4063	}
4064	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4065	if (error)
4066		goto bad;
4067
4068	if (fmode & FWRITE)
4069		vp->v_writecount++;
4070
4071	/*
4072	 * end of vn_open code
4073	 */
4074
4075	if ((error = falloc(td, &nfp, &indx)) != 0) {
4076		if (fmode & FWRITE)
4077			vp->v_writecount--;
4078		goto bad;
4079	}
4080	/* An extra reference on `nfp' has been held for us by falloc(). */
4081	fp = nfp;
4082
4083	nfp->f_vnode = vp;
4084	nfp->f_data = vp;
4085	nfp->f_flag = fmode & FMASK;
4086	nfp->f_ops = &vnops;
4087	nfp->f_type = DTYPE_VNODE;
4088	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4089		lf.l_whence = SEEK_SET;
4090		lf.l_start = 0;
4091		lf.l_len = 0;
4092		if (fmode & O_EXLOCK)
4093			lf.l_type = F_WRLCK;
4094		else
4095			lf.l_type = F_RDLCK;
4096		type = F_FLOCK;
4097		if ((fmode & FNONBLOCK) == 0)
4098			type |= F_WAIT;
4099		VOP_UNLOCK(vp, 0, td);
4100		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4101			    type)) != 0) {
4102			/*
4103			 * The lock request failed.  Normally close the
4104			 * descriptor but handle the case where someone might
4105			 * have dup()d or close()d it when we weren't looking.
4106			 */
4107			fdclose(fdp, fp, indx, td);
4108
4109			/*
4110			 * release our private reference
4111			 */
4112			fdrop(fp, td);
4113			return(error);
4114		}
4115		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4116		fp->f_flag |= FHASLOCK;
4117	}
4118
4119	VOP_UNLOCK(vp, 0, td);
4120	fdrop(fp, td);
4121	td->td_retval[0] = indx;
4122	return (0);
4123
4124bad:
4125	vput(vp);
4126	return (error);
4127}
4128
4129/*
4130 * Stat an (NFS) file handle.
4131 */
4132#ifndef _SYS_SYSPROTO_H_
4133struct fhstat_args {
4134	struct fhandle *u_fhp;
4135	struct stat *sb;
4136};
4137#endif
4138int
4139fhstat(td, uap)
4140	struct thread *td;
4141	register struct fhstat_args /* {
4142		struct fhandle *u_fhp;
4143		struct stat *sb;
4144	} */ *uap;
4145{
4146	struct stat sb;
4147	fhandle_t fh;
4148	struct mount *mp;
4149	struct vnode *vp;
4150	int error;
4151
4152	error = suser(td);
4153	if (error)
4154		return (error);
4155	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4156	if (error)
4157		return (error);
4158	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4159		return (ESTALE);
4160	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4161		return (error);
4162	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4163	vput(vp);
4164	if (error)
4165		return (error);
4166	error = copyout(&sb, uap->sb, sizeof(sb));
4167	return (error);
4168}
4169
4170/*
4171 * Implement fstatfs() for (NFS) file handles.
4172 */
4173#ifndef _SYS_SYSPROTO_H_
4174struct fhstatfs_args {
4175	struct fhandle *u_fhp;
4176	struct statfs *buf;
4177};
4178#endif
4179int
4180fhstatfs(td, uap)
4181	struct thread *td;
4182	struct fhstatfs_args /* {
4183		struct fhandle *u_fhp;
4184		struct statfs *buf;
4185	} */ *uap;
4186{
4187	struct statfs sf;
4188	fhandle_t fh;
4189	int error;
4190
4191	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
4192		return (error);
4193	error = kern_fhstatfs(td, fh, &sf);
4194	if (error == 0)
4195		error = copyout(&sf, uap->buf, sizeof(sf));
4196	return (error);
4197}
4198
4199int
4200kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
4201{
4202	struct statfs *sp;
4203	struct mount *mp;
4204	struct vnode *vp;
4205	int error;
4206
4207	error = suser(td);
4208	if (error)
4209		return (error);
4210	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4211		return (ESTALE);
4212	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4213		return (error);
4214	mp = vp->v_mount;
4215	sp = &mp->mnt_stat;
4216	vput(vp);
4217#ifdef MAC
4218	error = mac_check_mount_stat(td->td_ucred, mp);
4219	if (error)
4220		return (error);
4221#endif
4222	/*
4223	 * Set these in case the underlying filesystem fails to do so.
4224	 */
4225	sp->f_version = STATFS_VERSION;
4226	sp->f_namemax = NAME_MAX;
4227	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4228	if ((error = VFS_STATFS(mp, sp, td)) != 0)
4229		return (error);
4230	*buf = *sp;
4231	return (0);
4232}
4233
4234/*
4235 * Syscall to push extended attribute configuration information into the
4236 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4237 * a command (int cmd), and attribute name and misc data.  For now, the
4238 * attribute name is left in userspace for consumption by the VFS_op.
4239 * It will probably be changed to be copied into sysspace by the
4240 * syscall in the future, once issues with various consumers of the
4241 * attribute code have raised their hands.
4242 *
4243 * Currently this is used only by UFS Extended Attributes.
4244 */
4245int
4246extattrctl(td, uap)
4247	struct thread *td;
4248	struct extattrctl_args /* {
4249		const char *path;
4250		int cmd;
4251		const char *filename;
4252		int attrnamespace;
4253		const char *attrname;
4254	} */ *uap;
4255{
4256	struct vnode *filename_vp;
4257	struct nameidata nd;
4258	struct mount *mp, *mp_writable;
4259	char attrname[EXTATTR_MAXNAMELEN];
4260	int error;
4261
4262	/*
4263	 * uap->attrname is not always defined.  We check again later when we
4264	 * invoke the VFS call so as to pass in NULL there if needed.
4265	 */
4266	if (uap->attrname != NULL) {
4267		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4268		    NULL);
4269		if (error)
4270			return (error);
4271	}
4272
4273	/*
4274	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4275	 * which VFS_EXTATTRCTL() will later release.
4276	 */
4277	filename_vp = NULL;
4278	if (uap->filename != NULL) {
4279		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
4280		    uap->filename, td);
4281		error = namei(&nd);
4282		if (error)
4283			return (error);
4284		filename_vp = nd.ni_vp;
4285		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4286	}
4287
4288	/* uap->path is always defined. */
4289	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4290	error = namei(&nd);
4291	if (error) {
4292		if (filename_vp != NULL)
4293			vput(filename_vp);
4294		return (error);
4295	}
4296	mp = nd.ni_vp->v_mount;
4297	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4298	NDFREE(&nd, 0);
4299	if (error) {
4300		if (filename_vp != NULL)
4301			vput(filename_vp);
4302		return (error);
4303	}
4304
4305	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4306	    uap->attrname != NULL ? attrname : NULL, td);
4307
4308	vn_finished_write(mp_writable);
4309	/*
4310	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4311	 * filename_vp, so vrele it if it is defined.
4312	 */
4313	if (filename_vp != NULL)
4314		vrele(filename_vp);
4315	return (error);
4316}
4317
4318/*-
4319 * Set a named extended attribute on a file or directory
4320 *
4321 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4322 *            kernelspace string pointer "attrname", userspace buffer
4323 *            pointer "data", buffer length "nbytes", thread "td".
4324 * Returns: 0 on success, an error number otherwise
4325 * Locks: none
4326 * References: vp must be a valid reference for the duration of the call
4327 */
4328static int
4329extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4330    void *data, size_t nbytes, struct thread *td)
4331{
4332	struct mount *mp;
4333	struct uio auio;
4334	struct iovec aiov;
4335	ssize_t cnt;
4336	int error;
4337
4338	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4339	if (error)
4340		return (error);
4341	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4342	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4343
4344	aiov.iov_base = data;
4345	aiov.iov_len = nbytes;
4346	auio.uio_iov = &aiov;
4347	auio.uio_iovcnt = 1;
4348	auio.uio_offset = 0;
4349	if (nbytes > INT_MAX) {
4350		error = EINVAL;
4351		goto done;
4352	}
4353	auio.uio_resid = nbytes;
4354	auio.uio_rw = UIO_WRITE;
4355	auio.uio_segflg = UIO_USERSPACE;
4356	auio.uio_td = td;
4357	cnt = nbytes;
4358
4359#ifdef MAC
4360	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4361	    attrname, &auio);
4362	if (error)
4363		goto done;
4364#endif
4365
4366	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4367	    td->td_ucred, td);
4368	cnt -= auio.uio_resid;
4369	td->td_retval[0] = cnt;
4370
4371done:
4372	VOP_UNLOCK(vp, 0, td);
4373	vn_finished_write(mp);
4374	return (error);
4375}
4376
4377int
4378extattr_set_fd(td, uap)
4379	struct thread *td;
4380	struct extattr_set_fd_args /* {
4381		int fd;
4382		int attrnamespace;
4383		const char *attrname;
4384		void *data;
4385		size_t nbytes;
4386	} */ *uap;
4387{
4388	struct file *fp;
4389	char attrname[EXTATTR_MAXNAMELEN];
4390	int error;
4391
4392	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4393	if (error)
4394		return (error);
4395
4396	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4397	if (error)
4398		return (error);
4399
4400	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4401	    attrname, uap->data, uap->nbytes, td);
4402	fdrop(fp, td);
4403
4404	return (error);
4405}
4406
4407int
4408extattr_set_file(td, uap)
4409	struct thread *td;
4410	struct extattr_set_file_args /* {
4411		const char *path;
4412		int attrnamespace;
4413		const char *attrname;
4414		void *data;
4415		size_t nbytes;
4416	} */ *uap;
4417{
4418	struct nameidata nd;
4419	char attrname[EXTATTR_MAXNAMELEN];
4420	int error;
4421
4422	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4423	if (error)
4424		return (error);
4425
4426	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4427	error = namei(&nd);
4428	if (error)
4429		return (error);
4430	NDFREE(&nd, NDF_ONLY_PNBUF);
4431
4432	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4433	    uap->data, uap->nbytes, td);
4434
4435	vrele(nd.ni_vp);
4436	return (error);
4437}
4438
4439int
4440extattr_set_link(td, uap)
4441	struct thread *td;
4442	struct extattr_set_link_args /* {
4443		const char *path;
4444		int attrnamespace;
4445		const char *attrname;
4446		void *data;
4447		size_t nbytes;
4448	} */ *uap;
4449{
4450	struct nameidata nd;
4451	char attrname[EXTATTR_MAXNAMELEN];
4452	int error;
4453
4454	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4455	if (error)
4456		return (error);
4457
4458	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4459	error = namei(&nd);
4460	if (error)
4461		return (error);
4462	NDFREE(&nd, NDF_ONLY_PNBUF);
4463
4464	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4465	    uap->data, uap->nbytes, td);
4466
4467	vrele(nd.ni_vp);
4468	return (error);
4469}
4470
4471/*-
4472 * Get a named extended attribute on a file or directory
4473 *
4474 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4475 *            kernelspace string pointer "attrname", userspace buffer
4476 *            pointer "data", buffer length "nbytes", thread "td".
4477 * Returns: 0 on success, an error number otherwise
4478 * Locks: none
4479 * References: vp must be a valid reference for the duration of the call
4480 */
4481static int
4482extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4483    void *data, size_t nbytes, struct thread *td)
4484{
4485	struct uio auio, *auiop;
4486	struct iovec aiov;
4487	ssize_t cnt;
4488	size_t size, *sizep;
4489	int error;
4490
4491	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4492	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4493
4494	/*
4495	 * Slightly unusual semantics: if the user provides a NULL data
4496	 * pointer, they don't want to receive the data, just the
4497	 * maximum read length.
4498	 */
4499	auiop = NULL;
4500	sizep = NULL;
4501	cnt = 0;
4502	if (data != NULL) {
4503		aiov.iov_base = data;
4504		aiov.iov_len = nbytes;
4505		auio.uio_iov = &aiov;
4506		auio.uio_offset = 0;
4507		if (nbytes > INT_MAX) {
4508			error = EINVAL;
4509			goto done;
4510		}
4511		auio.uio_resid = nbytes;
4512		auio.uio_rw = UIO_READ;
4513		auio.uio_segflg = UIO_USERSPACE;
4514		auio.uio_td = td;
4515		auiop = &auio;
4516		cnt = nbytes;
4517	} else
4518		sizep = &size;
4519
4520#ifdef MAC
4521	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4522	    attrname, &auio);
4523	if (error)
4524		goto done;
4525#endif
4526
4527	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4528	    td->td_ucred, td);
4529
4530	if (auiop != NULL) {
4531		cnt -= auio.uio_resid;
4532		td->td_retval[0] = cnt;
4533	} else
4534		td->td_retval[0] = size;
4535
4536done:
4537	VOP_UNLOCK(vp, 0, td);
4538	return (error);
4539}
4540
4541int
4542extattr_get_fd(td, uap)
4543	struct thread *td;
4544	struct extattr_get_fd_args /* {
4545		int fd;
4546		int attrnamespace;
4547		const char *attrname;
4548		void *data;
4549		size_t nbytes;
4550	} */ *uap;
4551{
4552	struct file *fp;
4553	char attrname[EXTATTR_MAXNAMELEN];
4554	int error;
4555
4556	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4557	if (error)
4558		return (error);
4559
4560	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4561	if (error)
4562		return (error);
4563
4564	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4565	    attrname, uap->data, uap->nbytes, td);
4566
4567	fdrop(fp, td);
4568	return (error);
4569}
4570
4571int
4572extattr_get_file(td, uap)
4573	struct thread *td;
4574	struct extattr_get_file_args /* {
4575		const char *path;
4576		int attrnamespace;
4577		const char *attrname;
4578		void *data;
4579		size_t nbytes;
4580	} */ *uap;
4581{
4582	struct nameidata nd;
4583	char attrname[EXTATTR_MAXNAMELEN];
4584	int error;
4585
4586	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4587	if (error)
4588		return (error);
4589
4590	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4591	error = namei(&nd);
4592	if (error)
4593		return (error);
4594	NDFREE(&nd, NDF_ONLY_PNBUF);
4595
4596	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4597	    uap->data, uap->nbytes, td);
4598
4599	vrele(nd.ni_vp);
4600	return (error);
4601}
4602
4603int
4604extattr_get_link(td, uap)
4605	struct thread *td;
4606	struct extattr_get_link_args /* {
4607		const char *path;
4608		int attrnamespace;
4609		const char *attrname;
4610		void *data;
4611		size_t nbytes;
4612	} */ *uap;
4613{
4614	struct nameidata nd;
4615	char attrname[EXTATTR_MAXNAMELEN];
4616	int error;
4617
4618	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4619	if (error)
4620		return (error);
4621
4622	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4623	error = namei(&nd);
4624	if (error)
4625		return (error);
4626	NDFREE(&nd, NDF_ONLY_PNBUF);
4627
4628	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4629	    uap->data, uap->nbytes, td);
4630
4631	vrele(nd.ni_vp);
4632	return (error);
4633}
4634
4635/*
4636 * extattr_delete_vp(): Delete a named extended attribute on a file or
4637 *                      directory
4638 *
4639 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4640 *            kernelspace string pointer "attrname", proc "p"
4641 * Returns: 0 on success, an error number otherwise
4642 * Locks: none
4643 * References: vp must be a valid reference for the duration of the call
4644 */
4645static int
4646extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4647    struct thread *td)
4648{
4649	struct mount *mp;
4650	int error;
4651
4652	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4653	if (error)
4654		return (error);
4655	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4656	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4657
4658#ifdef MAC
4659	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4660	    attrname);
4661	if (error)
4662		goto done;
4663#endif
4664
4665	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4666	    td);
4667	if (error == EOPNOTSUPP)
4668		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4669		    td->td_ucred, td);
4670#ifdef MAC
4671done:
4672#endif
4673	VOP_UNLOCK(vp, 0, td);
4674	vn_finished_write(mp);
4675	return (error);
4676}
4677
4678int
4679extattr_delete_fd(td, uap)
4680	struct thread *td;
4681	struct extattr_delete_fd_args /* {
4682		int fd;
4683		int attrnamespace;
4684		const char *attrname;
4685	} */ *uap;
4686{
4687	struct file *fp;
4688	struct vnode *vp;
4689	char attrname[EXTATTR_MAXNAMELEN];
4690	int error;
4691
4692	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4693	if (error)
4694		return (error);
4695
4696	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4697	if (error)
4698		return (error);
4699	vp = fp->f_vnode;
4700
4701	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4702	fdrop(fp, td);
4703	return (error);
4704}
4705
4706int
4707extattr_delete_file(td, uap)
4708	struct thread *td;
4709	struct extattr_delete_file_args /* {
4710		const char *path;
4711		int attrnamespace;
4712		const char *attrname;
4713	} */ *uap;
4714{
4715	struct nameidata nd;
4716	char attrname[EXTATTR_MAXNAMELEN];
4717	int error;
4718
4719	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4720	if (error)
4721		return(error);
4722
4723	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4724	error = namei(&nd);
4725	if (error)
4726		return(error);
4727	NDFREE(&nd, NDF_ONLY_PNBUF);
4728
4729	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4730	vrele(nd.ni_vp);
4731	return(error);
4732}
4733
4734int
4735extattr_delete_link(td, uap)
4736	struct thread *td;
4737	struct extattr_delete_link_args /* {
4738		const char *path;
4739		int attrnamespace;
4740		const char *attrname;
4741	} */ *uap;
4742{
4743	struct nameidata nd;
4744	char attrname[EXTATTR_MAXNAMELEN];
4745	int error;
4746
4747	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4748	if (error)
4749		return(error);
4750
4751	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4752	error = namei(&nd);
4753	if (error)
4754		return(error);
4755	NDFREE(&nd, NDF_ONLY_PNBUF);
4756
4757	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4758	vrele(nd.ni_vp);
4759	return(error);
4760}
4761
4762/*-
4763 * Retrieve a list of extended attributes on a file or directory.
4764 *
4765 * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4766 *            userspace buffer pointer "data", buffer length "nbytes",
4767 *            thread "td".
4768 * Returns: 0 on success, an error number otherwise
4769 * Locks: none
4770 * References: vp must be a valid reference for the duration of the call
4771 */
4772static int
4773extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4774    size_t nbytes, struct thread *td)
4775{
4776	struct uio auio, *auiop;
4777	size_t size, *sizep;
4778	struct iovec aiov;
4779	ssize_t cnt;
4780	int error;
4781
4782	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4783	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4784
4785	auiop = NULL;
4786	sizep = NULL;
4787	cnt = 0;
4788	if (data != NULL) {
4789		aiov.iov_base = data;
4790		aiov.iov_len = nbytes;
4791		auio.uio_iov = &aiov;
4792		auio.uio_offset = 0;
4793		if (nbytes > INT_MAX) {
4794			error = EINVAL;
4795			goto done;
4796		}
4797		auio.uio_resid = nbytes;
4798		auio.uio_rw = UIO_READ;
4799		auio.uio_segflg = UIO_USERSPACE;
4800		auio.uio_td = td;
4801		auiop = &auio;
4802		cnt = nbytes;
4803	} else
4804		sizep = &size;
4805
4806#ifdef MAC
4807	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4808	if (error)
4809		goto done;
4810#endif
4811
4812	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4813	    td->td_ucred, td);
4814
4815	if (auiop != NULL) {
4816		cnt -= auio.uio_resid;
4817		td->td_retval[0] = cnt;
4818	} else
4819		td->td_retval[0] = size;
4820
4821done:
4822	VOP_UNLOCK(vp, 0, td);
4823	return (error);
4824}
4825
4826
4827int
4828extattr_list_fd(td, uap)
4829	struct thread *td;
4830	struct extattr_list_fd_args /* {
4831		int fd;
4832		int attrnamespace;
4833		void *data;
4834		size_t nbytes;
4835	} */ *uap;
4836{
4837	struct file *fp;
4838	int error;
4839
4840	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4841	if (error)
4842		return (error);
4843
4844	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4845	    uap->nbytes, td);
4846
4847	fdrop(fp, td);
4848	return (error);
4849}
4850
4851int
4852extattr_list_file(td, uap)
4853	struct thread*td;
4854	struct extattr_list_file_args /* {
4855		const char *path;
4856		int attrnamespace;
4857		void *data;
4858		size_t nbytes;
4859	} */ *uap;
4860{
4861	struct nameidata nd;
4862	int error;
4863
4864	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4865	error = namei(&nd);
4866	if (error)
4867		return (error);
4868	NDFREE(&nd, NDF_ONLY_PNBUF);
4869
4870	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4871	    uap->nbytes, td);
4872
4873	vrele(nd.ni_vp);
4874	return (error);
4875}
4876
4877int
4878extattr_list_link(td, uap)
4879	struct thread*td;
4880	struct extattr_list_link_args /* {
4881		const char *path;
4882		int attrnamespace;
4883		void *data;
4884		size_t nbytes;
4885	} */ *uap;
4886{
4887	struct nameidata nd;
4888	int error;
4889
4890	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4891	error = namei(&nd);
4892	if (error)
4893		return (error);
4894	NDFREE(&nd, NDF_ONLY_PNBUF);
4895
4896	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4897	    uap->nbytes, td);
4898
4899	vrele(nd.ni_vp);
4900	return (error);
4901}
4902