vfs_extattr.c revision 141129
1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/vfs_extattr.c 141129 2005-02-01 23:43:46Z jeff $");
39
40#include "opt_compat.h"
41#include "opt_mac.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/bio.h>
46#include <sys/buf.h>
47#include <sys/sysent.h>
48#include <sys/mac.h>
49#include <sys/malloc.h>
50#include <sys/mount.h>
51#include <sys/mutex.h>
52#include <sys/sysproto.h>
53#include <sys/namei.h>
54#include <sys/filedesc.h>
55#include <sys/kernel.h>
56#include <sys/fcntl.h>
57#include <sys/file.h>
58#include <sys/limits.h>
59#include <sys/linker.h>
60#include <sys/stat.h>
61#include <sys/sx.h>
62#include <sys/unistd.h>
63#include <sys/vnode.h>
64#include <sys/proc.h>
65#include <sys/dirent.h>
66#include <sys/extattr.h>
67#include <sys/jail.h>
68#include <sys/syscallsubr.h>
69#include <sys/sysctl.h>
70
71#include <machine/stdarg.h>
72
73#include <vm/vm.h>
74#include <vm/vm_object.h>
75#include <vm/vm_page.h>
76#include <vm/uma.h>
77
78static int chroot_refuse_vdir_fds(struct filedesc *fdp);
79static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
80static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
81static int setfmode(struct thread *td, struct vnode *, int);
82static int setfflags(struct thread *td, struct vnode *, int);
83static int setutimes(struct thread *td, struct vnode *,
84    const struct timespec *, int, int);
85static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
86    struct thread *td);
87
88static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
89    size_t nbytes, struct thread *td);
90
91int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
92int (*softdep_fsync_hook)(struct vnode *);
93
94/*
95 * The module initialization routine for POSIX asynchronous I/O will
96 * set this to the version of AIO that it implements.  (Zero means
97 * that it is not implemented.)  This value is used here by pathconf()
98 * and in kern_descrip.c by fpathconf().
99 */
100int async_io_version;
101
102/*
103 * Sync each mounted filesystem.
104 */
105#ifndef _SYS_SYSPROTO_H_
106struct sync_args {
107	int     dummy;
108};
109#endif
110
111#ifdef DEBUG
112static int syncprt = 0;
113SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
114#endif
115
116/* ARGSUSED */
117int
118sync(td, uap)
119	struct thread *td;
120	struct sync_args *uap;
121{
122	struct mount *mp, *nmp;
123	int asyncflag;
124
125	mtx_lock(&Giant);
126	mtx_lock(&mountlist_mtx);
127	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
128		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
129			nmp = TAILQ_NEXT(mp, mnt_list);
130			continue;
131		}
132		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
133		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
134			asyncflag = mp->mnt_flag & MNT_ASYNC;
135			mp->mnt_flag &= ~MNT_ASYNC;
136			vfs_msync(mp, MNT_NOWAIT);
137			VFS_SYNC(mp, MNT_NOWAIT, td);
138			mp->mnt_flag |= asyncflag;
139			vn_finished_write(mp);
140		}
141		mtx_lock(&mountlist_mtx);
142		nmp = TAILQ_NEXT(mp, mnt_list);
143		vfs_unbusy(mp, td);
144	}
145	mtx_unlock(&mountlist_mtx);
146#if 0
147/*
148 * XXX don't call vfs_bufstats() yet because that routine
149 * was not imported in the Lite2 merge.
150 */
151#ifdef DIAGNOSTIC
152	if (syncprt)
153		vfs_bufstats();
154#endif /* DIAGNOSTIC */
155#endif
156	mtx_unlock(&Giant);
157	return (0);
158}
159
160/* XXX PRISON: could be per prison flag */
161static int prison_quotas;
162#if 0
163SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
164#endif
165
166/*
167 * Change filesystem quotas.
168 */
169#ifndef _SYS_SYSPROTO_H_
170struct quotactl_args {
171	char *path;
172	int cmd;
173	int uid;
174	caddr_t arg;
175};
176#endif
177int
178quotactl(td, uap)
179	struct thread *td;
180	register struct quotactl_args /* {
181		char *path;
182		int cmd;
183		int uid;
184		caddr_t arg;
185	} */ *uap;
186{
187	struct mount *mp, *vmp;
188	int error;
189	struct nameidata nd;
190
191	if (jailed(td->td_ucred) && !prison_quotas)
192		return (EPERM);
193	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
194	if ((error = namei(&nd)) != 0)
195		return (error);
196	NDFREE(&nd, NDF_ONLY_PNBUF);
197	error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH);
198	mp = nd.ni_vp->v_mount;
199	vrele(nd.ni_vp);
200	if (error)
201		return (error);
202	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
203	vn_finished_write(vmp);
204	return (error);
205}
206
207/*
208 * Get filesystem statistics.
209 */
210#ifndef _SYS_SYSPROTO_H_
211struct statfs_args {
212	char *path;
213	struct statfs *buf;
214};
215#endif
216int
217statfs(td, uap)
218	struct thread *td;
219	register struct statfs_args /* {
220		char *path;
221		struct statfs *buf;
222	} */ *uap;
223{
224	struct mount *mp;
225	struct statfs *sp, sb;
226	int error;
227	struct nameidata nd;
228
229	mtx_lock(&Giant);
230	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
231	if ((error = namei(&nd)) != 0) {
232		mtx_unlock(&Giant);
233		return (error);
234	}
235	mp = nd.ni_vp->v_mount;
236	sp = &mp->mnt_stat;
237	NDFREE(&nd, NDF_ONLY_PNBUF);
238	vrele(nd.ni_vp);
239#ifdef MAC
240	error = mac_check_mount_stat(td->td_ucred, mp);
241	if (error) {
242		mtx_unlock(&Giant);
243		return (error);
244	}
245#endif
246	/*
247	 * Set these in case the underlying filesystem fails to do so.
248	 */
249	sp->f_version = STATFS_VERSION;
250	sp->f_namemax = NAME_MAX;
251	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
252	error = VFS_STATFS(mp, sp, td);
253	mtx_unlock(&Giant);
254	if (error)
255		return (error);
256	if (suser(td)) {
257		bcopy(sp, &sb, sizeof(sb));
258		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
259		sp = &sb;
260	}
261	return (copyout(sp, uap->buf, sizeof(*sp)));
262}
263
264/*
265 * Get filesystem statistics.
266 */
267#ifndef _SYS_SYSPROTO_H_
268struct fstatfs_args {
269	int fd;
270	struct statfs *buf;
271};
272#endif
273int
274fstatfs(td, uap)
275	struct thread *td;
276	register struct fstatfs_args /* {
277		int fd;
278		struct statfs *buf;
279	} */ *uap;
280{
281	struct file *fp;
282	struct mount *mp;
283	struct statfs *sp, sb;
284	int error;
285
286	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
287		return (error);
288	mtx_lock(&Giant);
289	mp = fp->f_vnode->v_mount;
290	fdrop(fp, td);
291	if (mp == NULL) {
292		mtx_unlock(&Giant);
293		return (EBADF);
294	}
295#ifdef MAC
296	error = mac_check_mount_stat(td->td_ucred, mp);
297	if (error) {
298		mtx_unlock(&Giant);
299		return (error);
300	}
301#endif
302	sp = &mp->mnt_stat;
303	/*
304	 * Set these in case the underlying filesystem fails to do so.
305	 */
306	sp->f_version = STATFS_VERSION;
307	sp->f_namemax = NAME_MAX;
308	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
309	error = VFS_STATFS(mp, sp, td);
310	mtx_unlock(&Giant);
311	if (error)
312		return (error);
313	if (suser(td)) {
314		bcopy(sp, &sb, sizeof(sb));
315		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
316		sp = &sb;
317	}
318	return (copyout(sp, uap->buf, sizeof(*sp)));
319}
320
321/*
322 * Get statistics on all filesystems.
323 */
324#ifndef _SYS_SYSPROTO_H_
325struct getfsstat_args {
326	struct statfs *buf;
327	long bufsize;
328	int flags;
329};
330#endif
331int
332getfsstat(td, uap)
333	struct thread *td;
334	register struct getfsstat_args /* {
335		struct statfs *buf;
336		long bufsize;
337		int flags;
338	} */ *uap;
339{
340	struct mount *mp, *nmp;
341	struct statfs *sp, sb;
342	caddr_t sfsp;
343	long count, maxcount, error;
344
345	maxcount = uap->bufsize / sizeof(struct statfs);
346	sfsp = (caddr_t)uap->buf;
347	count = 0;
348	mtx_lock(&Giant);
349	mtx_lock(&mountlist_mtx);
350	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
351		if (!prison_check_mount(td->td_ucred, mp)) {
352			nmp = TAILQ_NEXT(mp, mnt_list);
353			continue;
354		}
355#ifdef MAC
356		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
357			nmp = TAILQ_NEXT(mp, mnt_list);
358			continue;
359		}
360#endif
361		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
362			nmp = TAILQ_NEXT(mp, mnt_list);
363			continue;
364		}
365		if (sfsp && count < maxcount) {
366			sp = &mp->mnt_stat;
367			/*
368			 * Set these in case the underlying filesystem
369			 * fails to do so.
370			 */
371			sp->f_version = STATFS_VERSION;
372			sp->f_namemax = NAME_MAX;
373			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
374			/*
375			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
376			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
377			 * overrides MNT_WAIT.
378			 */
379			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
380			    (uap->flags & MNT_WAIT)) &&
381			    (error = VFS_STATFS(mp, sp, td))) {
382				mtx_lock(&mountlist_mtx);
383				nmp = TAILQ_NEXT(mp, mnt_list);
384				vfs_unbusy(mp, td);
385				continue;
386			}
387			if (suser(td)) {
388				bcopy(sp, &sb, sizeof(sb));
389				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
390				sp = &sb;
391			}
392			error = copyout(sp, sfsp, sizeof(*sp));
393			if (error) {
394				vfs_unbusy(mp, td);
395				mtx_unlock(&Giant);
396				return (error);
397			}
398			sfsp += sizeof(*sp);
399		}
400		count++;
401		mtx_lock(&mountlist_mtx);
402		nmp = TAILQ_NEXT(mp, mnt_list);
403		vfs_unbusy(mp, td);
404	}
405	mtx_unlock(&mountlist_mtx);
406	mtx_unlock(&Giant);
407	if (sfsp && count > maxcount)
408		td->td_retval[0] = maxcount;
409	else
410		td->td_retval[0] = count;
411	return (0);
412}
413
414#ifdef COMPAT_FREEBSD4
415/*
416 * Get old format filesystem statistics.
417 */
418static void cvtstatfs(struct thread *, struct statfs *, struct ostatfs *);
419
420#ifndef _SYS_SYSPROTO_H_
421struct freebsd4_statfs_args {
422	char *path;
423	struct ostatfs *buf;
424};
425#endif
426int
427freebsd4_statfs(td, uap)
428	struct thread *td;
429	struct freebsd4_statfs_args /* {
430		char *path;
431		struct ostatfs *buf;
432	} */ *uap;
433{
434	struct mount *mp;
435	struct statfs *sp;
436	struct ostatfs osb;
437	int error;
438	struct nameidata nd;
439
440	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
441	if ((error = namei(&nd)) != 0)
442		return (error);
443	mp = nd.ni_vp->v_mount;
444	sp = &mp->mnt_stat;
445	NDFREE(&nd, NDF_ONLY_PNBUF);
446	vrele(nd.ni_vp);
447#ifdef MAC
448	error = mac_check_mount_stat(td->td_ucred, mp);
449	if (error)
450		return (error);
451#endif
452	error = VFS_STATFS(mp, sp, td);
453	if (error)
454		return (error);
455	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
456	cvtstatfs(td, sp, &osb);
457	return (copyout(&osb, uap->buf, sizeof(osb)));
458}
459
460/*
461 * Get filesystem statistics.
462 */
463#ifndef _SYS_SYSPROTO_H_
464struct freebsd4_fstatfs_args {
465	int fd;
466	struct ostatfs *buf;
467};
468#endif
469int
470freebsd4_fstatfs(td, uap)
471	struct thread *td;
472	struct freebsd4_fstatfs_args /* {
473		int fd;
474		struct ostatfs *buf;
475	} */ *uap;
476{
477	struct file *fp;
478	struct mount *mp;
479	struct statfs *sp;
480	struct ostatfs osb;
481	int error;
482
483	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
484		return (error);
485	mp = fp->f_vnode->v_mount;
486	fdrop(fp, td);
487	if (mp == NULL)
488		return (EBADF);
489#ifdef MAC
490	error = mac_check_mount_stat(td->td_ucred, mp);
491	if (error)
492		return (error);
493#endif
494	sp = &mp->mnt_stat;
495	error = VFS_STATFS(mp, sp, td);
496	if (error)
497		return (error);
498	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
499	cvtstatfs(td, sp, &osb);
500	return (copyout(&osb, uap->buf, sizeof(osb)));
501}
502
503/*
504 * Get statistics on all filesystems.
505 */
506#ifndef _SYS_SYSPROTO_H_
507struct freebsd4_getfsstat_args {
508	struct ostatfs *buf;
509	long bufsize;
510	int flags;
511};
512#endif
513int
514freebsd4_getfsstat(td, uap)
515	struct thread *td;
516	register struct freebsd4_getfsstat_args /* {
517		struct ostatfs *buf;
518		long bufsize;
519		int flags;
520	} */ *uap;
521{
522	struct mount *mp, *nmp;
523	struct statfs *sp;
524	struct ostatfs osb;
525	caddr_t sfsp;
526	long count, maxcount, error;
527
528	maxcount = uap->bufsize / sizeof(struct ostatfs);
529	sfsp = (caddr_t)uap->buf;
530	count = 0;
531	mtx_lock(&mountlist_mtx);
532	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
533		if (!prison_check_mount(td->td_ucred, mp)) {
534			nmp = TAILQ_NEXT(mp, mnt_list);
535			continue;
536		}
537#ifdef MAC
538		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
539			nmp = TAILQ_NEXT(mp, mnt_list);
540			continue;
541		}
542#endif
543		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
544			nmp = TAILQ_NEXT(mp, mnt_list);
545			continue;
546		}
547		if (sfsp && count < maxcount) {
548			sp = &mp->mnt_stat;
549			/*
550			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
551			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
552			 * overrides MNT_WAIT.
553			 */
554			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
555			    (uap->flags & MNT_WAIT)) &&
556			    (error = VFS_STATFS(mp, sp, td))) {
557				mtx_lock(&mountlist_mtx);
558				nmp = TAILQ_NEXT(mp, mnt_list);
559				vfs_unbusy(mp, td);
560				continue;
561			}
562			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
563			cvtstatfs(td, sp, &osb);
564			error = copyout(&osb, sfsp, sizeof(osb));
565			if (error) {
566				vfs_unbusy(mp, td);
567				return (error);
568			}
569			sfsp += sizeof(osb);
570		}
571		count++;
572		mtx_lock(&mountlist_mtx);
573		nmp = TAILQ_NEXT(mp, mnt_list);
574		vfs_unbusy(mp, td);
575	}
576	mtx_unlock(&mountlist_mtx);
577	if (sfsp && count > maxcount)
578		td->td_retval[0] = maxcount;
579	else
580		td->td_retval[0] = count;
581	return (0);
582}
583
584/*
585 * Implement fstatfs() for (NFS) file handles.
586 */
587#ifndef _SYS_SYSPROTO_H_
588struct freebsd4_fhstatfs_args {
589	struct fhandle *u_fhp;
590	struct ostatfs *buf;
591};
592#endif
593int
594freebsd4_fhstatfs(td, uap)
595	struct thread *td;
596	struct freebsd4_fhstatfs_args /* {
597		struct fhandle *u_fhp;
598		struct ostatfs *buf;
599	} */ *uap;
600{
601	struct statfs *sp;
602	struct mount *mp;
603	struct vnode *vp;
604	struct ostatfs osb;
605	fhandle_t fh;
606	int error;
607
608	error = suser(td);
609	if (error)
610		return (error);
611	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
612		return (error);
613	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
614		return (ESTALE);
615	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
616		return (error);
617	mp = vp->v_mount;
618	sp = &mp->mnt_stat;
619	vput(vp);
620#ifdef MAC
621	error = mac_check_mount_stat(td->td_ucred, mp);
622	if (error)
623		return (error);
624#endif
625	if ((error = VFS_STATFS(mp, sp, td)) != 0)
626		return (error);
627	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
628	cvtstatfs(td, sp, &osb);
629	return (copyout(&osb, uap->buf, sizeof(osb)));
630}
631
632/*
633 * Convert a new format statfs structure to an old format statfs structure.
634 */
635static void
636cvtstatfs(td, nsp, osp)
637	struct thread *td;
638	struct statfs *nsp;
639	struct ostatfs *osp;
640{
641
642	bzero(osp, sizeof(*osp));
643	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
644	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
645	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
646	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
647	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
648	osp->f_files = MIN(nsp->f_files, LONG_MAX);
649	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
650	osp->f_owner = nsp->f_owner;
651	osp->f_type = nsp->f_type;
652	osp->f_flags = nsp->f_flags;
653	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
654	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
655	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
656	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
657	bcopy(nsp->f_fstypename, osp->f_fstypename,
658	    MIN(MFSNAMELEN, OMNAMELEN));
659	bcopy(nsp->f_mntonname, osp->f_mntonname,
660	    MIN(MFSNAMELEN, OMNAMELEN));
661	bcopy(nsp->f_mntfromname, osp->f_mntfromname,
662	    MIN(MFSNAMELEN, OMNAMELEN));
663	if (suser(td)) {
664		osp->f_fsid.val[0] = osp->f_fsid.val[1] = 0;
665	} else {
666		osp->f_fsid = nsp->f_fsid;
667	}
668}
669#endif /* COMPAT_FREEBSD4 */
670
671/*
672 * Change current working directory to a given file descriptor.
673 */
674#ifndef _SYS_SYSPROTO_H_
675struct fchdir_args {
676	int	fd;
677};
678#endif
679int
680fchdir(td, uap)
681	struct thread *td;
682	struct fchdir_args /* {
683		int fd;
684	} */ *uap;
685{
686	register struct filedesc *fdp = td->td_proc->p_fd;
687	struct vnode *vp, *tdp, *vpold;
688	struct mount *mp;
689	struct file *fp;
690	int vfslocked;
691	int error;
692
693	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
694		return (error);
695	vp = fp->f_vnode;
696	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
697	VREF(vp);
698	fdrop(fp, td);
699	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
700	if (vp->v_type != VDIR)
701		error = ENOTDIR;
702#ifdef MAC
703	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
704	}
705#endif
706	else
707		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
708	while (!error && (mp = vp->v_mountedhere) != NULL) {
709		int tvfslocked;
710		if (vfs_busy(mp, 0, 0, td))
711			continue;
712		tvfslocked = VFS_LOCK_GIANT(mp);
713		error = VFS_ROOT(mp, &tdp, td);
714		vfs_unbusy(mp, td);
715		if (error) {
716			VFS_UNLOCK_GIANT(tvfslocked);
717			break;
718		}
719		vput(vp);
720		VFS_UNLOCK_GIANT(vfslocked);
721		vp = tdp;
722		vfslocked = tvfslocked;
723	}
724	if (error) {
725		vput(vp);
726		VFS_UNLOCK_GIANT(vfslocked);
727		return (error);
728	}
729	VOP_UNLOCK(vp, 0, td);
730	FILEDESC_LOCK_FAST(fdp);
731	vpold = fdp->fd_cdir;
732	fdp->fd_cdir = vp;
733	FILEDESC_UNLOCK_FAST(fdp);
734	vrele(vpold);
735	VFS_UNLOCK_GIANT(vfslocked);
736	return (0);
737}
738
739/*
740 * Change current working directory (``.'').
741 */
742#ifndef _SYS_SYSPROTO_H_
743struct chdir_args {
744	char	*path;
745};
746#endif
747int
748chdir(td, uap)
749	struct thread *td;
750	struct chdir_args /* {
751		char *path;
752	} */ *uap;
753{
754
755	return (kern_chdir(td, uap->path, UIO_USERSPACE));
756}
757
758int
759kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
760{
761	register struct filedesc *fdp = td->td_proc->p_fd;
762	int error;
763	struct nameidata nd;
764	struct vnode *vp;
765	int vfslocked;
766
767	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td);
768	if ((error = namei(&nd)) != 0)
769		return (error);
770	vfslocked = NDHASGIANT(&nd);
771	if ((error = change_dir(nd.ni_vp, td)) != 0) {
772		vput(nd.ni_vp);
773		VFS_UNLOCK_GIANT(vfslocked);
774		NDFREE(&nd, NDF_ONLY_PNBUF);
775		return (error);
776	}
777	VOP_UNLOCK(nd.ni_vp, 0, td);
778	NDFREE(&nd, NDF_ONLY_PNBUF);
779	FILEDESC_LOCK_FAST(fdp);
780	vp = fdp->fd_cdir;
781	fdp->fd_cdir = nd.ni_vp;
782	FILEDESC_UNLOCK_FAST(fdp);
783	vrele(vp);
784	VFS_UNLOCK_GIANT(vfslocked);
785	return (0);
786}
787
788/*
789 * Helper function for raised chroot(2) security function:  Refuse if
790 * any filedescriptors are open directories.
791 */
792static int
793chroot_refuse_vdir_fds(fdp)
794	struct filedesc *fdp;
795{
796	struct vnode *vp;
797	struct file *fp;
798	int fd;
799
800	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
801	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
802		fp = fget_locked(fdp, fd);
803		if (fp == NULL)
804			continue;
805		if (fp->f_type == DTYPE_VNODE) {
806			vp = fp->f_vnode;
807			if (vp->v_type == VDIR)
808				return (EPERM);
809		}
810	}
811	return (0);
812}
813
814/*
815 * This sysctl determines if we will allow a process to chroot(2) if it
816 * has a directory open:
817 *	0: disallowed for all processes.
818 *	1: allowed for processes that were not already chroot(2)'ed.
819 *	2: allowed for all processes.
820 */
821
822static int chroot_allow_open_directories = 1;
823
824SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
825     &chroot_allow_open_directories, 0, "");
826
827/*
828 * Change notion of root (``/'') directory.
829 */
830#ifndef _SYS_SYSPROTO_H_
831struct chroot_args {
832	char	*path;
833};
834#endif
835int
836chroot(td, uap)
837	struct thread *td;
838	struct chroot_args /* {
839		char *path;
840	} */ *uap;
841{
842	int error;
843	struct nameidata nd;
844	int vfslocked;
845
846	error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
847	if (error)
848		return (error);
849	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
850	    UIO_USERSPACE, uap->path, td);
851	error = namei(&nd);
852	if (error)
853		goto error;
854	vfslocked = NDHASGIANT(&nd);
855	if ((error = change_dir(nd.ni_vp, td)) != 0)
856		goto e_vunlock;
857#ifdef MAC
858	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
859		goto e_vunlock;
860#endif
861	VOP_UNLOCK(nd.ni_vp, 0, td);
862	error = change_root(nd.ni_vp, td);
863	vrele(nd.ni_vp);
864	VFS_UNLOCK_GIANT(vfslocked);
865	NDFREE(&nd, NDF_ONLY_PNBUF);
866	return (error);
867e_vunlock:
868	vput(nd.ni_vp);
869	VFS_UNLOCK_GIANT(vfslocked);
870error:
871	NDFREE(&nd, NDF_ONLY_PNBUF);
872	return (error);
873}
874
875/*
876 * Common routine for chroot and chdir.  Callers must provide a locked vnode
877 * instance.
878 */
879int
880change_dir(vp, td)
881	struct vnode *vp;
882	struct thread *td;
883{
884	int error;
885
886	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
887	if (vp->v_type != VDIR)
888		return (ENOTDIR);
889#ifdef MAC
890	error = mac_check_vnode_chdir(td->td_ucred, vp);
891	if (error)
892		return (error);
893#endif
894	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
895	return (error);
896}
897
898/*
899 * Common routine for kern_chroot() and jail_attach().  The caller is
900 * responsible for invoking suser() and mac_check_chroot() to authorize this
901 * operation.
902 */
903int
904change_root(vp, td)
905	struct vnode *vp;
906	struct thread *td;
907{
908	struct filedesc *fdp;
909	struct vnode *oldvp;
910	int error;
911
912	VFS_ASSERT_GIANT(vp->v_mount);
913	fdp = td->td_proc->p_fd;
914	FILEDESC_LOCK(fdp);
915	if (chroot_allow_open_directories == 0 ||
916	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
917		error = chroot_refuse_vdir_fds(fdp);
918		if (error) {
919			FILEDESC_UNLOCK(fdp);
920			return (error);
921		}
922	}
923	oldvp = fdp->fd_rdir;
924	fdp->fd_rdir = vp;
925	VREF(fdp->fd_rdir);
926	if (!fdp->fd_jdir) {
927		fdp->fd_jdir = vp;
928		VREF(fdp->fd_jdir);
929	}
930	FILEDESC_UNLOCK(fdp);
931	vrele(oldvp);
932	return (0);
933}
934
935/*
936 * Check permissions, allocate an open file structure,
937 * and call the device open routine if any.
938 *
939 * MP SAFE
940 */
941#ifndef _SYS_SYSPROTO_H_
942struct open_args {
943	char	*path;
944	int	flags;
945	int	mode;
946};
947#endif
948int
949open(td, uap)
950	struct thread *td;
951	register struct open_args /* {
952		char *path;
953		int flags;
954		int mode;
955	} */ *uap;
956{
957	int error;
958
959	error = kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode);
960	if (mtx_owned(&Giant))
961		printf("open: %s: %d\n", uap->path, error);
962	return (error);
963}
964
965int
966kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
967    int mode)
968{
969	struct proc *p = td->td_proc;
970	struct filedesc *fdp = p->p_fd;
971	struct file *fp;
972	struct vnode *vp;
973	struct vattr vat;
974	struct mount *mp;
975	int cmode;
976	struct file *nfp;
977	int type, indx, error;
978	struct flock lf;
979	struct nameidata nd;
980	int vfslocked;
981
982	if ((flags & O_ACCMODE) == O_ACCMODE)
983		return (EINVAL);
984	flags = FFLAGS(flags);
985	error = falloc(td, &nfp, &indx);
986	if (error)
987		return (error);
988	/* An extra reference on `nfp' has been held for us by falloc(). */
989	fp = nfp;
990	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
991	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
992	td->td_dupfd = -1;		/* XXX check for fdopen */
993	error = vn_open(&nd, &flags, cmode, indx);
994	if (error) {
995		/*
996		 * If the vn_open replaced the method vector, something
997		 * wonderous happened deep below and we just pass it up
998		 * pretending we know what we do.
999		 */
1000		if (error == ENXIO && fp->f_ops != &badfileops) {
1001			fdrop(fp, td);
1002			td->td_retval[0] = indx;
1003			return (0);
1004		}
1005
1006		/*
1007		 * release our own reference
1008		 */
1009		fdrop(fp, td);
1010
1011		/*
1012		 * handle special fdopen() case.  bleh.  dupfdopen() is
1013		 * responsible for dropping the old contents of ofiles[indx]
1014		 * if it succeeds.
1015		 */
1016		if ((error == ENODEV || error == ENXIO) &&
1017		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1018		    (error =
1019			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1020			td->td_retval[0] = indx;
1021			return (0);
1022		}
1023		/*
1024		 * Clean up the descriptor, but only if another thread hadn't
1025		 * replaced or closed it.
1026		 */
1027		fdclose(fdp, fp, indx, td);
1028
1029		if (error == ERESTART)
1030			error = EINTR;
1031		return (error);
1032	}
1033	td->td_dupfd = 0;
1034	vfslocked = NDHASGIANT(&nd);
1035	NDFREE(&nd, NDF_ONLY_PNBUF);
1036	vp = nd.ni_vp;
1037
1038	/*
1039	 * There should be 2 references on the file, one from the descriptor
1040	 * table, and one for us.
1041	 *
1042	 * Handle the case where someone closed the file (via its file
1043	 * descriptor) while we were blocked.  The end result should look
1044	 * like opening the file succeeded but it was immediately closed.
1045	 * We call vn_close() manually because we haven't yet hooked up
1046	 * the various 'struct file' fields.
1047	 */
1048	FILEDESC_LOCK(fdp);
1049	FILE_LOCK(fp);
1050	if (fp->f_count == 1) {
1051		mp = vp->v_mount;
1052		KASSERT(fdp->fd_ofiles[indx] != fp,
1053		    ("Open file descriptor lost all refs"));
1054		FILE_UNLOCK(fp);
1055		FILEDESC_UNLOCK(fdp);
1056		VOP_UNLOCK(vp, 0, td);
1057		vn_close(vp, flags & FMASK, fp->f_cred, td);
1058		VFS_UNLOCK_GIANT(vfslocked);
1059		fdrop(fp, td);
1060		td->td_retval[0] = indx;
1061		return (0);
1062	}
1063	fp->f_vnode = vp;
1064	if (fp->f_data == NULL)
1065		fp->f_data = vp;
1066	fp->f_flag = flags & FMASK;
1067	if (fp->f_ops == &badfileops)
1068		fp->f_ops = &vnops;
1069	fp->f_seqcount = 1;
1070	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1071	FILE_UNLOCK(fp);
1072	FILEDESC_UNLOCK(fdp);
1073
1074	VOP_UNLOCK(vp, 0, td);
1075	if (flags & (O_EXLOCK | O_SHLOCK)) {
1076		lf.l_whence = SEEK_SET;
1077		lf.l_start = 0;
1078		lf.l_len = 0;
1079		if (flags & O_EXLOCK)
1080			lf.l_type = F_WRLCK;
1081		else
1082			lf.l_type = F_RDLCK;
1083		type = F_FLOCK;
1084		if ((flags & FNONBLOCK) == 0)
1085			type |= F_WAIT;
1086		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1087			    type)) != 0)
1088			goto bad;
1089		fp->f_flag |= FHASLOCK;
1090	}
1091	if (flags & O_TRUNC) {
1092		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1093			goto bad;
1094		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1095		VATTR_NULL(&vat);
1096		vat.va_size = 0;
1097		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1098#ifdef MAC
1099		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
1100		if (error == 0)
1101#endif
1102			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1103		VOP_UNLOCK(vp, 0, td);
1104		vn_finished_write(mp);
1105		if (error)
1106			goto bad;
1107	}
1108	VFS_UNLOCK_GIANT(vfslocked);
1109	/*
1110	 * Release our private reference, leaving the one associated with
1111	 * the descriptor table intact.
1112	 */
1113	fdrop(fp, td);
1114	td->td_retval[0] = indx;
1115	return (0);
1116bad:
1117	VFS_UNLOCK_GIANT(vfslocked);
1118	fdclose(fdp, fp, indx, td);
1119	fdrop(fp, td);
1120	return (error);
1121}
1122
1123#ifdef COMPAT_43
1124/*
1125 * Create a file.
1126 *
1127 * MP SAFE
1128 */
1129#ifndef _SYS_SYSPROTO_H_
1130struct ocreat_args {
1131	char	*path;
1132	int	mode;
1133};
1134#endif
1135int
1136ocreat(td, uap)
1137	struct thread *td;
1138	register struct ocreat_args /* {
1139		char *path;
1140		int mode;
1141	} */ *uap;
1142{
1143
1144	return (kern_open(td, uap->path, UIO_USERSPACE,
1145	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
1146}
1147#endif /* COMPAT_43 */
1148
1149/*
1150 * Create a special file.
1151 */
1152#ifndef _SYS_SYSPROTO_H_
1153struct mknod_args {
1154	char	*path;
1155	int	mode;
1156	int	dev;
1157};
1158#endif
1159int
1160mknod(td, uap)
1161	struct thread *td;
1162	register struct mknod_args /* {
1163		char *path;
1164		int mode;
1165		int dev;
1166	} */ *uap;
1167{
1168
1169	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
1170}
1171
1172int
1173kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
1174    int dev)
1175{
1176	struct vnode *vp;
1177	struct mount *mp;
1178	struct vattr vattr;
1179	int error;
1180	int whiteout = 0;
1181	struct nameidata nd;
1182	int vfslocked;
1183
1184	switch (mode & S_IFMT) {
1185	case S_IFCHR:
1186	case S_IFBLK:
1187		error = suser(td);
1188		break;
1189	default:
1190		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
1191		break;
1192	}
1193	if (error)
1194		return (error);
1195restart:
1196	bwillwrite();
1197	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1198	if ((error = namei(&nd)) != 0)
1199		return (error);
1200	vfslocked = NDHASGIANT(&nd);
1201	vp = nd.ni_vp;
1202	if (vp != NULL) {
1203		NDFREE(&nd, NDF_ONLY_PNBUF);
1204		vrele(vp);
1205		if (vp == nd.ni_dvp)
1206			vrele(nd.ni_dvp);
1207		else
1208			vput(nd.ni_dvp);
1209		VFS_UNLOCK_GIANT(vfslocked);
1210		return (EEXIST);
1211	} else {
1212		VATTR_NULL(&vattr);
1213		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1214		vattr.va_mode = (mode & ALLPERMS) &
1215		    ~td->td_proc->p_fd->fd_cmask;
1216		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1217		vattr.va_rdev = dev;
1218		whiteout = 0;
1219
1220		switch (mode & S_IFMT) {
1221		case S_IFMT:	/* used by badsect to flag bad sectors */
1222			vattr.va_type = VBAD;
1223			break;
1224		case S_IFCHR:
1225			vattr.va_type = VCHR;
1226			break;
1227		case S_IFBLK:
1228			vattr.va_type = VBLK;
1229			break;
1230		case S_IFWHT:
1231			whiteout = 1;
1232			break;
1233		default:
1234			error = EINVAL;
1235			break;
1236		}
1237	}
1238	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1239		NDFREE(&nd, NDF_ONLY_PNBUF);
1240		vput(nd.ni_dvp);
1241		VFS_UNLOCK_GIANT(vfslocked);
1242		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1243			return (error);
1244		goto restart;
1245	}
1246#ifdef MAC
1247	if (error == 0 && !whiteout)
1248		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
1249		    &nd.ni_cnd, &vattr);
1250#endif
1251	if (!error) {
1252		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1253		if (whiteout)
1254			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1255		else {
1256			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1257						&nd.ni_cnd, &vattr);
1258			if (error == 0)
1259				vput(nd.ni_vp);
1260		}
1261	}
1262	NDFREE(&nd, NDF_ONLY_PNBUF);
1263	vput(nd.ni_dvp);
1264	vn_finished_write(mp);
1265	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1266	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1267	VFS_UNLOCK_GIANT(vfslocked);
1268	return (error);
1269}
1270
1271/*
1272 * Create a named pipe.
1273 */
1274#ifndef _SYS_SYSPROTO_H_
1275struct mkfifo_args {
1276	char	*path;
1277	int	mode;
1278};
1279#endif
1280int
1281mkfifo(td, uap)
1282	struct thread *td;
1283	register struct mkfifo_args /* {
1284		char *path;
1285		int mode;
1286	} */ *uap;
1287{
1288
1289	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
1290}
1291
1292int
1293kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
1294{
1295	struct mount *mp;
1296	struct vattr vattr;
1297	int error;
1298	struct nameidata nd;
1299	int vfslocked;
1300
1301restart:
1302	bwillwrite();
1303	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td);
1304	if ((error = namei(&nd)) != 0)
1305		return (error);
1306	vfslocked = NDHASGIANT(&nd);
1307	if (nd.ni_vp != NULL) {
1308		NDFREE(&nd, NDF_ONLY_PNBUF);
1309		vrele(nd.ni_vp);
1310		if (nd.ni_vp == nd.ni_dvp)
1311			vrele(nd.ni_dvp);
1312		else
1313			vput(nd.ni_dvp);
1314		VFS_UNLOCK_GIANT(vfslocked);
1315		return (EEXIST);
1316	}
1317	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1318		NDFREE(&nd, NDF_ONLY_PNBUF);
1319		vput(nd.ni_dvp);
1320		VFS_UNLOCK_GIANT(vfslocked);
1321		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1322			return (error);
1323		goto restart;
1324	}
1325	VATTR_NULL(&vattr);
1326	vattr.va_type = VFIFO;
1327	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1328	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
1329	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1330#ifdef MAC
1331	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1332	    &vattr);
1333	if (error)
1334		goto out;
1335#endif
1336	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1337	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1338	if (error == 0)
1339		vput(nd.ni_vp);
1340#ifdef MAC
1341out:
1342#endif
1343	vput(nd.ni_dvp);
1344	vn_finished_write(mp);
1345	VFS_UNLOCK_GIANT(vfslocked);
1346	NDFREE(&nd, NDF_ONLY_PNBUF);
1347	return (error);
1348}
1349
1350/*
1351 * Make a hard file link.
1352 */
1353#ifndef _SYS_SYSPROTO_H_
1354struct link_args {
1355	char	*path;
1356	char	*link;
1357};
1358#endif
1359int
1360link(td, uap)
1361	struct thread *td;
1362	register struct link_args /* {
1363		char *path;
1364		char *link;
1365	} */ *uap;
1366{
1367	int error;
1368
1369	error = kern_link(td, uap->path, uap->link, UIO_USERSPACE);
1370	return (error);
1371}
1372
1373SYSCTL_DECL(_security_bsd);
1374
1375static int hardlink_check_uid = 0;
1376SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
1377    &hardlink_check_uid, 0,
1378    "Unprivileged processes cannot create hard links to files owned by other "
1379    "users");
1380static int hardlink_check_gid = 0;
1381SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
1382    &hardlink_check_gid, 0,
1383    "Unprivileged processes cannot create hard links to files owned by other "
1384    "groups");
1385
1386static int
1387can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred)
1388{
1389	struct vattr va;
1390	int error;
1391
1392	if (suser_cred(cred, SUSER_ALLOWJAIL) == 0)
1393		return (0);
1394
1395	if (!hardlink_check_uid && !hardlink_check_gid)
1396		return (0);
1397
1398	error = VOP_GETATTR(vp, &va, cred, td);
1399	if (error != 0)
1400		return (error);
1401
1402	if (hardlink_check_uid) {
1403		if (cred->cr_uid != va.va_uid)
1404			return (EPERM);
1405	}
1406
1407	if (hardlink_check_gid) {
1408		if (!groupmember(va.va_gid, cred))
1409			return (EPERM);
1410	}
1411
1412	return (0);
1413}
1414
1415int
1416kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
1417{
1418	struct vnode *vp;
1419	struct mount *mp;
1420	struct nameidata nd;
1421	int vfslocked;
1422	int lvfslocked;
1423	int error;
1424
1425	bwillwrite();
1426	NDINIT(&nd, LOOKUP, FOLLOW | NOOBJ | MPSAFE, segflg, path, td);
1427	if ((error = namei(&nd)) != 0)
1428		return (error);
1429	vfslocked = NDHASGIANT(&nd);
1430	NDFREE(&nd, NDF_ONLY_PNBUF);
1431	vp = nd.ni_vp;
1432	if (vp->v_type == VDIR) {
1433		vrele(vp);
1434		VFS_UNLOCK_GIANT(vfslocked);
1435		return (EPERM);		/* POSIX */
1436	}
1437	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
1438		vrele(vp);
1439		VFS_UNLOCK_GIANT(vfslocked);
1440		return (error);
1441	}
1442	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
1443	if ((error = namei(&nd)) == 0) {
1444		lvfslocked = NDHASGIANT(&nd);
1445		if (nd.ni_vp != NULL) {
1446			vrele(nd.ni_vp);
1447			if (nd.ni_dvp == nd.ni_vp)
1448				vrele(nd.ni_dvp);
1449			else
1450				vput(nd.ni_dvp);
1451			error = EEXIST;
1452		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
1453		    == 0) {
1454			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1455			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1456			error = can_hardlink(vp, td, td->td_ucred);
1457			if (error == 0)
1458#ifdef MAC
1459				error = mac_check_vnode_link(td->td_ucred,
1460				    nd.ni_dvp, vp, &nd.ni_cnd);
1461			if (error == 0)
1462#endif
1463				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1464			VOP_UNLOCK(vp, 0, td);
1465			vput(nd.ni_dvp);
1466		}
1467		NDFREE(&nd, NDF_ONLY_PNBUF);
1468		VFS_UNLOCK_GIANT(lvfslocked);
1469	}
1470	vrele(vp);
1471	vn_finished_write(mp);
1472	VFS_UNLOCK_GIANT(vfslocked);
1473	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
1474	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
1475	return (error);
1476}
1477
1478/*
1479 * Make a symbolic link.
1480 */
1481#ifndef _SYS_SYSPROTO_H_
1482struct symlink_args {
1483	char	*path;
1484	char	*link;
1485};
1486#endif
1487int
1488symlink(td, uap)
1489	struct thread *td;
1490	register struct symlink_args /* {
1491		char *path;
1492		char *link;
1493	} */ *uap;
1494{
1495
1496	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
1497}
1498
1499int
1500kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
1501{
1502	struct mount *mp;
1503	struct vattr vattr;
1504	char *syspath;
1505	int error;
1506	struct nameidata nd;
1507	int vfslocked;
1508
1509	if (segflg == UIO_SYSSPACE) {
1510		syspath = path;
1511	} else {
1512		syspath = uma_zalloc(namei_zone, M_WAITOK);
1513		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
1514			goto out;
1515	}
1516restart:
1517	bwillwrite();
1518	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME | MPSAFE,
1519	    segflg, link, td);
1520	if ((error = namei(&nd)) != 0)
1521		goto out;
1522	vfslocked = NDHASGIANT(&nd);
1523	if (nd.ni_vp) {
1524		NDFREE(&nd, NDF_ONLY_PNBUF);
1525		vrele(nd.ni_vp);
1526		if (nd.ni_vp == nd.ni_dvp)
1527			vrele(nd.ni_dvp);
1528		else
1529			vput(nd.ni_dvp);
1530		VFS_UNLOCK_GIANT(vfslocked);
1531		error = EEXIST;
1532		goto out;
1533	}
1534	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1535		NDFREE(&nd, NDF_ONLY_PNBUF);
1536		vput(nd.ni_dvp);
1537		VFS_UNLOCK_GIANT(vfslocked);
1538		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1539			goto out;
1540		goto restart;
1541	}
1542	VATTR_NULL(&vattr);
1543	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
1544	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
1545	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
1546#ifdef MAC
1547	vattr.va_type = VLNK;
1548	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
1549	    &vattr);
1550	if (error)
1551		goto out2;
1552#endif
1553	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1554	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
1555	if (error == 0)
1556		vput(nd.ni_vp);
1557#ifdef MAC
1558out2:
1559#endif
1560	NDFREE(&nd, NDF_ONLY_PNBUF);
1561	vput(nd.ni_dvp);
1562	vn_finished_write(mp);
1563	VFS_UNLOCK_GIANT(vfslocked);
1564	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
1565	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
1566out:
1567	if (segflg != UIO_SYSSPACE)
1568		uma_zfree(namei_zone, syspath);
1569	return (error);
1570}
1571
1572/*
1573 * Delete a whiteout from the filesystem.
1574 */
1575int
1576undelete(td, uap)
1577	struct thread *td;
1578	register struct undelete_args /* {
1579		char *path;
1580	} */ *uap;
1581{
1582	int error;
1583	struct mount *mp;
1584	struct nameidata nd;
1585	int vfslocked;
1586
1587restart:
1588	bwillwrite();
1589	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE, UIO_USERSPACE,
1590	    uap->path, td);
1591	error = namei(&nd);
1592	if (error)
1593		return (error);
1594	vfslocked = NDHASGIANT(&nd);
1595
1596	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1597		NDFREE(&nd, NDF_ONLY_PNBUF);
1598		if (nd.ni_vp)
1599			vrele(nd.ni_vp);
1600		if (nd.ni_vp == nd.ni_dvp)
1601			vrele(nd.ni_dvp);
1602		else
1603			vput(nd.ni_dvp);
1604		VFS_UNLOCK_GIANT(vfslocked);
1605		return (EEXIST);
1606	}
1607	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1608		NDFREE(&nd, NDF_ONLY_PNBUF);
1609		vput(nd.ni_dvp);
1610		VFS_UNLOCK_GIANT(vfslocked);
1611		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1612			return (error);
1613		goto restart;
1614	}
1615	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1616	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
1617	NDFREE(&nd, NDF_ONLY_PNBUF);
1618	vput(nd.ni_dvp);
1619	vn_finished_write(mp);
1620	VFS_UNLOCK_GIANT(vfslocked);
1621	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
1622	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
1623	return (error);
1624}
1625
1626/*
1627 * Delete a name from the filesystem.
1628 */
1629#ifndef _SYS_SYSPROTO_H_
1630struct unlink_args {
1631	char	*path;
1632};
1633#endif
1634int
1635unlink(td, uap)
1636	struct thread *td;
1637	struct unlink_args /* {
1638		char *path;
1639	} */ *uap;
1640{
1641	int error;
1642
1643	error = kern_unlink(td, uap->path, UIO_USERSPACE);
1644	return (error);
1645}
1646
1647int
1648kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
1649{
1650	struct mount *mp;
1651	struct vnode *vp;
1652	int error;
1653	struct nameidata nd;
1654	int vfslocked;
1655
1656restart:
1657	bwillwrite();
1658	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
1659	if ((error = namei(&nd)) != 0)
1660		return (error);
1661	vfslocked = NDHASGIANT(&nd);
1662	vp = nd.ni_vp;
1663	if (vp->v_type == VDIR)
1664		error = EPERM;		/* POSIX */
1665	else {
1666		/*
1667		 * The root of a mounted filesystem cannot be deleted.
1668		 *
1669		 * XXX: can this only be a VDIR case?
1670		 */
1671		if (vp->v_vflag & VV_ROOT)
1672			error = EBUSY;
1673	}
1674	if (error == 0) {
1675		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1676			NDFREE(&nd, NDF_ONLY_PNBUF);
1677			if (vp == nd.ni_dvp)
1678				vrele(vp);
1679			else
1680				vput(vp);
1681			vput(nd.ni_dvp);
1682			VFS_UNLOCK_GIANT(vfslocked);
1683			if ((error = vn_start_write(NULL, &mp,
1684			    V_XSLEEP | PCATCH)) != 0)
1685				return (error);
1686			goto restart;
1687		}
1688#ifdef MAC
1689		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
1690		    &nd.ni_cnd);
1691		if (error)
1692			goto out;
1693#endif
1694		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1695		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
1696#ifdef MAC
1697out:
1698#endif
1699		vn_finished_write(mp);
1700	}
1701	NDFREE(&nd, NDF_ONLY_PNBUF);
1702	if (vp == nd.ni_dvp)
1703		vrele(vp);
1704	else
1705		vput(vp);
1706	vput(nd.ni_dvp);
1707	VFS_UNLOCK_GIANT(vfslocked);
1708	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
1709	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
1710	return (error);
1711}
1712
1713/*
1714 * Reposition read/write file offset.
1715 */
1716#ifndef _SYS_SYSPROTO_H_
1717struct lseek_args {
1718	int	fd;
1719	int	pad;
1720	off_t	offset;
1721	int	whence;
1722};
1723#endif
1724int
1725lseek(td, uap)
1726	struct thread *td;
1727	register struct lseek_args /* {
1728		int fd;
1729		int pad;
1730		off_t offset;
1731		int whence;
1732	} */ *uap;
1733{
1734	struct ucred *cred = td->td_ucred;
1735	struct file *fp;
1736	struct vnode *vp;
1737	struct vattr vattr;
1738	off_t offset;
1739	int error, noneg;
1740	int vfslocked;
1741
1742	if ((error = fget(td, uap->fd, &fp)) != 0)
1743		return (error);
1744	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
1745		fdrop(fp, td);
1746		return (ESPIPE);
1747	}
1748	vp = fp->f_vnode;
1749	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1750	noneg = (vp->v_type != VCHR);
1751	offset = uap->offset;
1752	switch (uap->whence) {
1753	case L_INCR:
1754		if (noneg &&
1755		    (fp->f_offset < 0 ||
1756		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
1757			error = EOVERFLOW;
1758			break;
1759		}
1760		offset += fp->f_offset;
1761		break;
1762	case L_XTND:
1763		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1764		error = VOP_GETATTR(vp, &vattr, cred, td);
1765		VOP_UNLOCK(vp, 0, td);
1766		if (error)
1767			break;
1768		if (noneg &&
1769		    (vattr.va_size > OFF_MAX ||
1770		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
1771			error = EOVERFLOW;
1772			break;
1773		}
1774		offset += vattr.va_size;
1775		break;
1776	case L_SET:
1777		break;
1778	default:
1779		error = EINVAL;
1780	}
1781	if (error == 0 && noneg && offset < 0)
1782		error = EINVAL;
1783	if (error != 0)
1784		goto drop;
1785	fp->f_offset = offset;
1786	*(off_t *)(td->td_retval) = fp->f_offset;
1787drop:
1788	fdrop(fp, td);
1789	VFS_UNLOCK_GIANT(vfslocked);
1790	return (error);
1791}
1792
1793#if defined(COMPAT_43)
1794/*
1795 * Reposition read/write file offset.
1796 */
1797#ifndef _SYS_SYSPROTO_H_
1798struct olseek_args {
1799	int	fd;
1800	long	offset;
1801	int	whence;
1802};
1803#endif
1804int
1805olseek(td, uap)
1806	struct thread *td;
1807	register struct olseek_args /* {
1808		int fd;
1809		long offset;
1810		int whence;
1811	} */ *uap;
1812{
1813	struct lseek_args /* {
1814		int fd;
1815		int pad;
1816		off_t offset;
1817		int whence;
1818	} */ nuap;
1819	int error;
1820
1821	nuap.fd = uap->fd;
1822	nuap.offset = uap->offset;
1823	nuap.whence = uap->whence;
1824	error = lseek(td, &nuap);
1825	return (error);
1826}
1827#endif /* COMPAT_43 */
1828
1829/*
1830 * Check access permissions using passed credentials.
1831 */
1832static int
1833vn_access(vp, user_flags, cred, td)
1834	struct vnode	*vp;
1835	int		user_flags;
1836	struct ucred	*cred;
1837	struct thread	*td;
1838{
1839	int error, flags;
1840
1841	/* Flags == 0 means only check for existence. */
1842	error = 0;
1843	if (user_flags) {
1844		flags = 0;
1845		if (user_flags & R_OK)
1846			flags |= VREAD;
1847		if (user_flags & W_OK)
1848			flags |= VWRITE;
1849		if (user_flags & X_OK)
1850			flags |= VEXEC;
1851#ifdef MAC
1852		error = mac_check_vnode_access(cred, vp, flags);
1853		if (error)
1854			return (error);
1855#endif
1856		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
1857			error = VOP_ACCESS(vp, flags, cred, td);
1858	}
1859	return (error);
1860}
1861
1862/*
1863 * Check access permissions using "real" credentials.
1864 */
1865#ifndef _SYS_SYSPROTO_H_
1866struct access_args {
1867	char	*path;
1868	int	flags;
1869};
1870#endif
1871int
1872access(td, uap)
1873	struct thread *td;
1874	register struct access_args /* {
1875		char *path;
1876		int flags;
1877	} */ *uap;
1878{
1879
1880	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
1881}
1882
1883int
1884kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
1885{
1886	struct ucred *cred, *tmpcred;
1887	register struct vnode *vp;
1888	struct nameidata nd;
1889	int vfslocked;
1890	int error;
1891
1892	/*
1893	 * Create and modify a temporary credential instead of one that
1894	 * is potentially shared.  This could also mess up socket
1895	 * buffer accounting which can run in an interrupt context.
1896	 */
1897	cred = td->td_ucred;
1898	tmpcred = crdup(cred);
1899	tmpcred->cr_uid = cred->cr_ruid;
1900	tmpcred->cr_groups[0] = cred->cr_rgid;
1901	td->td_ucred = tmpcred;
1902	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ | MPSAFE, pathseg,
1903	    path, td);
1904	if ((error = namei(&nd)) != 0)
1905		goto out1;
1906	vfslocked = NDHASGIANT(&nd);
1907	vp = nd.ni_vp;
1908
1909	error = vn_access(vp, flags, tmpcred, td);
1910	NDFREE(&nd, NDF_ONLY_PNBUF);
1911	vput(vp);
1912	VFS_UNLOCK_GIANT(vfslocked);
1913out1:
1914	td->td_ucred = cred;
1915	crfree(tmpcred);
1916	return (error);
1917}
1918
1919/*
1920 * Check access permissions using "effective" credentials.
1921 */
1922#ifndef _SYS_SYSPROTO_H_
1923struct eaccess_args {
1924	char	*path;
1925	int	flags;
1926};
1927#endif
1928int
1929eaccess(td, uap)
1930	struct thread *td;
1931	register struct eaccess_args /* {
1932		char *path;
1933		int flags;
1934	} */ *uap;
1935{
1936	struct nameidata nd;
1937	struct vnode *vp;
1938	int vfslocked;
1939	int error;
1940
1941	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ | MPSAFE, UIO_USERSPACE,
1942	    uap->path, td);
1943	if ((error = namei(&nd)) != 0)
1944		return (error);
1945	vp = nd.ni_vp;
1946	vfslocked = NDHASGIANT(&nd);
1947	error = vn_access(vp, uap->flags, td->td_ucred, td);
1948	NDFREE(&nd, NDF_ONLY_PNBUF);
1949	vput(vp);
1950	VFS_UNLOCK_GIANT(vfslocked);
1951	return (error);
1952}
1953
1954#if defined(COMPAT_43)
1955/*
1956 * Get file status; this version follows links.
1957 */
1958#ifndef _SYS_SYSPROTO_H_
1959struct ostat_args {
1960	char	*path;
1961	struct ostat *ub;
1962};
1963#endif
1964int
1965ostat(td, uap)
1966	struct thread *td;
1967	register struct ostat_args /* {
1968		char *path;
1969		struct ostat *ub;
1970	} */ *uap;
1971{
1972	struct stat sb;
1973	struct ostat osb;
1974	int error;
1975	struct nameidata nd;
1976
1977	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
1978	    uap->path, td);
1979	if ((error = namei(&nd)) != 0)
1980		return (error);
1981	NDFREE(&nd, NDF_ONLY_PNBUF);
1982	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
1983	vput(nd.ni_vp);
1984	if (error)
1985		return (error);
1986	cvtstat(&sb, &osb);
1987	error = copyout(&osb, uap->ub, sizeof (osb));
1988	return (error);
1989}
1990
1991/*
1992 * Get file status; this version does not follow links.
1993 */
1994#ifndef _SYS_SYSPROTO_H_
1995struct olstat_args {
1996	char	*path;
1997	struct ostat *ub;
1998};
1999#endif
2000int
2001olstat(td, uap)
2002	struct thread *td;
2003	register struct olstat_args /* {
2004		char *path;
2005		struct ostat *ub;
2006	} */ *uap;
2007{
2008	struct vnode *vp;
2009	struct stat sb;
2010	struct ostat osb;
2011	int error;
2012	struct nameidata nd;
2013
2014	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2015	    uap->path, td);
2016	if ((error = namei(&nd)) != 0)
2017		return (error);
2018	vp = nd.ni_vp;
2019	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2020	NDFREE(&nd, NDF_ONLY_PNBUF);
2021	vput(vp);
2022	if (error)
2023		return (error);
2024	cvtstat(&sb, &osb);
2025	error = copyout(&osb, uap->ub, sizeof (osb));
2026	return (error);
2027}
2028
2029/*
2030 * Convert from an old to a new stat structure.
2031 */
2032void
2033cvtstat(st, ost)
2034	struct stat *st;
2035	struct ostat *ost;
2036{
2037
2038	ost->st_dev = st->st_dev;
2039	ost->st_ino = st->st_ino;
2040	ost->st_mode = st->st_mode;
2041	ost->st_nlink = st->st_nlink;
2042	ost->st_uid = st->st_uid;
2043	ost->st_gid = st->st_gid;
2044	ost->st_rdev = st->st_rdev;
2045	if (st->st_size < (quad_t)1 << 32)
2046		ost->st_size = st->st_size;
2047	else
2048		ost->st_size = -2;
2049	ost->st_atime = st->st_atime;
2050	ost->st_mtime = st->st_mtime;
2051	ost->st_ctime = st->st_ctime;
2052	ost->st_blksize = st->st_blksize;
2053	ost->st_blocks = st->st_blocks;
2054	ost->st_flags = st->st_flags;
2055	ost->st_gen = st->st_gen;
2056}
2057#endif /* COMPAT_43 */
2058
2059/*
2060 * Get file status; this version follows links.
2061 */
2062#ifndef _SYS_SYSPROTO_H_
2063struct stat_args {
2064	char	*path;
2065	struct stat *ub;
2066};
2067#endif
2068int
2069stat(td, uap)
2070	struct thread *td;
2071	register struct stat_args /* {
2072		char *path;
2073		struct stat *ub;
2074	} */ *uap;
2075{
2076	struct stat sb;
2077	int error;
2078	struct nameidata nd;
2079	int vfslocked;
2080
2081#ifdef LOOKUP_SHARED
2082	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ | MPSAFE,
2083	    UIO_USERSPACE, uap->path, td);
2084#else
2085	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ | MPSAFE, UIO_USERSPACE,
2086	    uap->path, td);
2087#endif
2088	if ((error = namei(&nd)) != 0)
2089		return (error);
2090	vfslocked = NDHASGIANT(&nd);
2091	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2092	NDFREE(&nd, NDF_ONLY_PNBUF);
2093	vput(nd.ni_vp);
2094	VFS_UNLOCK_GIANT(vfslocked);
2095	if (error)
2096		return (error);
2097	error = copyout(&sb, uap->ub, sizeof (sb));
2098	return (error);
2099}
2100
2101/*
2102 * Get file status; this version does not follow links.
2103 */
2104#ifndef _SYS_SYSPROTO_H_
2105struct lstat_args {
2106	char	*path;
2107	struct stat *ub;
2108};
2109#endif
2110int
2111lstat(td, uap)
2112	struct thread *td;
2113	register struct lstat_args /* {
2114		char *path;
2115		struct stat *ub;
2116	} */ *uap;
2117{
2118	int error;
2119	struct vnode *vp;
2120	struct stat sb;
2121	struct nameidata nd;
2122	int vfslocked;
2123
2124	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ | MPSAFE,
2125	    UIO_USERSPACE, uap->path, td);
2126	if ((error = namei(&nd)) != 0)
2127		return (error);
2128	vfslocked = NDHASGIANT(&nd);
2129	vp = nd.ni_vp;
2130	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2131	NDFREE(&nd, NDF_ONLY_PNBUF);
2132	vput(vp);
2133	VFS_UNLOCK_GIANT(vfslocked);
2134	if (error)
2135		return (error);
2136	error = copyout(&sb, uap->ub, sizeof (sb));
2137	return (error);
2138}
2139
2140/*
2141 * Implementation of the NetBSD stat() function.
2142 * XXX This should probably be collapsed with the FreeBSD version,
2143 * as the differences are only due to vn_stat() clearing spares at
2144 * the end of the structures.  vn_stat could be split to avoid this,
2145 * and thus collapse the following to close to zero code.
2146 */
2147void
2148cvtnstat(sb, nsb)
2149	struct stat *sb;
2150	struct nstat *nsb;
2151{
2152	bzero(nsb, sizeof *nsb);
2153	nsb->st_dev = sb->st_dev;
2154	nsb->st_ino = sb->st_ino;
2155	nsb->st_mode = sb->st_mode;
2156	nsb->st_nlink = sb->st_nlink;
2157	nsb->st_uid = sb->st_uid;
2158	nsb->st_gid = sb->st_gid;
2159	nsb->st_rdev = sb->st_rdev;
2160	nsb->st_atimespec = sb->st_atimespec;
2161	nsb->st_mtimespec = sb->st_mtimespec;
2162	nsb->st_ctimespec = sb->st_ctimespec;
2163	nsb->st_size = sb->st_size;
2164	nsb->st_blocks = sb->st_blocks;
2165	nsb->st_blksize = sb->st_blksize;
2166	nsb->st_flags = sb->st_flags;
2167	nsb->st_gen = sb->st_gen;
2168	nsb->st_birthtimespec = sb->st_birthtimespec;
2169}
2170
2171#ifndef _SYS_SYSPROTO_H_
2172struct nstat_args {
2173	char	*path;
2174	struct nstat *ub;
2175};
2176#endif
2177int
2178nstat(td, uap)
2179	struct thread *td;
2180	register struct nstat_args /* {
2181		char *path;
2182		struct nstat *ub;
2183	} */ *uap;
2184{
2185	struct stat sb;
2186	struct nstat nsb;
2187	int error;
2188	struct nameidata nd;
2189	int vfslocked;
2190
2191	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ | MPSAFE, UIO_USERSPACE,
2192	    uap->path, td);
2193	if ((error = namei(&nd)) != 0)
2194		return (error);
2195	vfslocked = NDHASGIANT(&nd);
2196	NDFREE(&nd, NDF_ONLY_PNBUF);
2197	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
2198	vput(nd.ni_vp);
2199	VFS_UNLOCK_GIANT(vfslocked);
2200	if (error)
2201		return (error);
2202	cvtnstat(&sb, &nsb);
2203	error = copyout(&nsb, uap->ub, sizeof (nsb));
2204	return (error);
2205}
2206
2207/*
2208 * NetBSD lstat.  Get file status; this version does not follow links.
2209 */
2210#ifndef _SYS_SYSPROTO_H_
2211struct lstat_args {
2212	char	*path;
2213	struct stat *ub;
2214};
2215#endif
2216int
2217nlstat(td, uap)
2218	struct thread *td;
2219	register struct nlstat_args /* {
2220		char *path;
2221		struct nstat *ub;
2222	} */ *uap;
2223{
2224	int error;
2225	struct vnode *vp;
2226	struct stat sb;
2227	struct nstat nsb;
2228	struct nameidata nd;
2229	int vfslocked;
2230
2231	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ | MPSAFE,
2232	    UIO_USERSPACE, uap->path, td);
2233	if ((error = namei(&nd)) != 0)
2234		return (error);
2235	vfslocked = NDHASGIANT(&nd);
2236	vp = nd.ni_vp;
2237	NDFREE(&nd, NDF_ONLY_PNBUF);
2238	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
2239	vput(vp);
2240	VFS_UNLOCK_GIANT(vfslocked);
2241	if (error)
2242		return (error);
2243	cvtnstat(&sb, &nsb);
2244	error = copyout(&nsb, uap->ub, sizeof (nsb));
2245	return (error);
2246}
2247
2248/*
2249 * Get configurable pathname variables.
2250 */
2251#ifndef _SYS_SYSPROTO_H_
2252struct pathconf_args {
2253	char	*path;
2254	int	name;
2255};
2256#endif
2257int
2258pathconf(td, uap)
2259	struct thread *td;
2260	register struct pathconf_args /* {
2261		char *path;
2262		int name;
2263	} */ *uap;
2264{
2265	int error;
2266	struct nameidata nd;
2267	int vfslocked;
2268
2269	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ | MPSAFE, UIO_USERSPACE,
2270	    uap->path, td);
2271	if ((error = namei(&nd)) != 0)
2272		return (error);
2273	vfslocked = NDHASGIANT(&nd);
2274	NDFREE(&nd, NDF_ONLY_PNBUF);
2275
2276	/* If asynchronous I/O is available, it works for all files. */
2277	if (uap->name == _PC_ASYNC_IO)
2278		td->td_retval[0] = async_io_version;
2279	else
2280		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
2281	vput(nd.ni_vp);
2282	VFS_UNLOCK_GIANT(vfslocked);
2283	return (error);
2284}
2285
2286/*
2287 * Return target name of a symbolic link.
2288 */
2289#ifndef _SYS_SYSPROTO_H_
2290struct readlink_args {
2291	char	*path;
2292	char	*buf;
2293	int	count;
2294};
2295#endif
2296int
2297readlink(td, uap)
2298	struct thread *td;
2299	register struct readlink_args /* {
2300		char *path;
2301		char *buf;
2302		int count;
2303	} */ *uap;
2304{
2305
2306	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
2307	    UIO_USERSPACE, uap->count));
2308}
2309
2310int
2311kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
2312    enum uio_seg bufseg, int count)
2313{
2314	register struct vnode *vp;
2315	struct iovec aiov;
2316	struct uio auio;
2317	int error;
2318	struct nameidata nd;
2319	int vfslocked;
2320
2321	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ | MPSAFE,
2322	    pathseg, path, td);
2323	if ((error = namei(&nd)) != 0)
2324		return (error);
2325	NDFREE(&nd, NDF_ONLY_PNBUF);
2326	vfslocked = NDHASGIANT(&nd);
2327	vp = nd.ni_vp;
2328#ifdef MAC
2329	error = mac_check_vnode_readlink(td->td_ucred, vp);
2330	if (error) {
2331		vput(vp);
2332		VFS_UNLOCK_GIANT(vfslocked);
2333		return (error);
2334	}
2335#endif
2336	if (vp->v_type != VLNK)
2337		error = EINVAL;
2338	else {
2339		aiov.iov_base = buf;
2340		aiov.iov_len = count;
2341		auio.uio_iov = &aiov;
2342		auio.uio_iovcnt = 1;
2343		auio.uio_offset = 0;
2344		auio.uio_rw = UIO_READ;
2345		auio.uio_segflg = bufseg;
2346		auio.uio_td = td;
2347		auio.uio_resid = count;
2348		error = VOP_READLINK(vp, &auio, td->td_ucred);
2349	}
2350	vput(vp);
2351	VFS_UNLOCK_GIANT(vfslocked);
2352	td->td_retval[0] = count - auio.uio_resid;
2353	return (error);
2354}
2355
2356/*
2357 * Common implementation code for chflags() and fchflags().
2358 */
2359static int
2360setfflags(td, vp, flags)
2361	struct thread *td;
2362	struct vnode *vp;
2363	int flags;
2364{
2365	int error;
2366	struct mount *mp;
2367	struct vattr vattr;
2368
2369	/*
2370	 * Prevent non-root users from setting flags on devices.  When
2371	 * a device is reused, users can retain ownership of the device
2372	 * if they are allowed to set flags and programs assume that
2373	 * chown can't fail when done as root.
2374	 */
2375	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2376		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
2377		if (error)
2378			return (error);
2379	}
2380
2381	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2382		return (error);
2383	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2384	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2385	VATTR_NULL(&vattr);
2386	vattr.va_flags = flags;
2387#ifdef MAC
2388	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
2389	if (error == 0)
2390#endif
2391		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2392	VOP_UNLOCK(vp, 0, td);
2393	vn_finished_write(mp);
2394	return (error);
2395}
2396
2397/*
2398 * Change flags of a file given a path name.
2399 */
2400#ifndef _SYS_SYSPROTO_H_
2401struct chflags_args {
2402	char	*path;
2403	int	flags;
2404};
2405#endif
2406int
2407chflags(td, uap)
2408	struct thread *td;
2409	register struct chflags_args /* {
2410		char *path;
2411		int flags;
2412	} */ *uap;
2413{
2414	int error;
2415	struct nameidata nd;
2416	int vfslocked;
2417
2418	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2419	if ((error = namei(&nd)) != 0)
2420		return (error);
2421	NDFREE(&nd, NDF_ONLY_PNBUF);
2422	vfslocked = NDHASGIANT(&nd);
2423	error = setfflags(td, nd.ni_vp, uap->flags);
2424	vrele(nd.ni_vp);
2425	VFS_UNLOCK_GIANT(vfslocked);
2426	return (error);
2427}
2428
2429/*
2430 * Same as chflags() but doesn't follow symlinks.
2431 */
2432int
2433lchflags(td, uap)
2434	struct thread *td;
2435	register struct lchflags_args /* {
2436		char *path;
2437		int flags;
2438	} */ *uap;
2439{
2440	int error;
2441	struct nameidata nd;
2442	int vfslocked;
2443
2444	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2445	if ((error = namei(&nd)) != 0)
2446		return (error);
2447	vfslocked = NDHASGIANT(&nd);
2448	NDFREE(&nd, NDF_ONLY_PNBUF);
2449	error = setfflags(td, nd.ni_vp, uap->flags);
2450	vrele(nd.ni_vp);
2451	VFS_UNLOCK_GIANT(vfslocked);
2452	return (error);
2453}
2454
2455/*
2456 * Change flags of a file given a file descriptor.
2457 */
2458#ifndef _SYS_SYSPROTO_H_
2459struct fchflags_args {
2460	int	fd;
2461	int	flags;
2462};
2463#endif
2464int
2465fchflags(td, uap)
2466	struct thread *td;
2467	register struct fchflags_args /* {
2468		int fd;
2469		int flags;
2470	} */ *uap;
2471{
2472	struct file *fp;
2473	int vfslocked;
2474	int error;
2475
2476	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2477		return (error);
2478	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2479	error = setfflags(td, fp->f_vnode, uap->flags);
2480	fdrop(fp, td);
2481	VFS_UNLOCK_GIANT(vfslocked);
2482	return (error);
2483}
2484
2485/*
2486 * Common implementation code for chmod(), lchmod() and fchmod().
2487 */
2488static int
2489setfmode(td, vp, mode)
2490	struct thread *td;
2491	struct vnode *vp;
2492	int mode;
2493{
2494	int error;
2495	struct mount *mp;
2496	struct vattr vattr;
2497
2498	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2499		return (error);
2500	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2501	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2502	VATTR_NULL(&vattr);
2503	vattr.va_mode = mode & ALLPERMS;
2504#ifdef MAC
2505	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
2506	if (error == 0)
2507#endif
2508		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2509	VOP_UNLOCK(vp, 0, td);
2510	vn_finished_write(mp);
2511	return (error);
2512}
2513
2514/*
2515 * Change mode of a file given path name.
2516 */
2517#ifndef _SYS_SYSPROTO_H_
2518struct chmod_args {
2519	char	*path;
2520	int	mode;
2521};
2522#endif
2523int
2524chmod(td, uap)
2525	struct thread *td;
2526	register struct chmod_args /* {
2527		char *path;
2528		int mode;
2529	} */ *uap;
2530{
2531
2532	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
2533}
2534
2535int
2536kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
2537{
2538	int error;
2539	struct nameidata nd;
2540	int vfslocked;
2541
2542	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2543	if ((error = namei(&nd)) != 0)
2544		return (error);
2545	vfslocked = NDHASGIANT(&nd);
2546	NDFREE(&nd, NDF_ONLY_PNBUF);
2547	error = setfmode(td, nd.ni_vp, mode);
2548	vrele(nd.ni_vp);
2549	VFS_UNLOCK_GIANT(vfslocked);
2550	return (error);
2551}
2552
2553/*
2554 * Change mode of a file given path name (don't follow links.)
2555 */
2556#ifndef _SYS_SYSPROTO_H_
2557struct lchmod_args {
2558	char	*path;
2559	int	mode;
2560};
2561#endif
2562int
2563lchmod(td, uap)
2564	struct thread *td;
2565	register struct lchmod_args /* {
2566		char *path;
2567		int mode;
2568	} */ *uap;
2569{
2570	int error;
2571	struct nameidata nd;
2572	int vfslocked;
2573
2574	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td);
2575	if ((error = namei(&nd)) != 0)
2576		return (error);
2577	vfslocked = NDHASGIANT(&nd);
2578	NDFREE(&nd, NDF_ONLY_PNBUF);
2579	error = setfmode(td, nd.ni_vp, uap->mode);
2580	vrele(nd.ni_vp);
2581	VFS_UNLOCK_GIANT(vfslocked);
2582	return (error);
2583}
2584
2585/*
2586 * Change mode of a file given a file descriptor.
2587 */
2588#ifndef _SYS_SYSPROTO_H_
2589struct fchmod_args {
2590	int	fd;
2591	int	mode;
2592};
2593#endif
2594int
2595fchmod(td, uap)
2596	struct thread *td;
2597	register struct fchmod_args /* {
2598		int fd;
2599		int mode;
2600	} */ *uap;
2601{
2602	struct file *fp;
2603	int vfslocked;
2604	int error;
2605
2606	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2607		return (error);
2608	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2609	error = setfmode(td, fp->f_vnode, uap->mode);
2610	fdrop(fp, td);
2611	VFS_UNLOCK_GIANT(vfslocked);
2612	return (error);
2613}
2614
2615/*
2616 * Common implementation for chown(), lchown(), and fchown()
2617 */
2618static int
2619setfown(td, vp, uid, gid)
2620	struct thread *td;
2621	struct vnode *vp;
2622	uid_t uid;
2623	gid_t gid;
2624{
2625	int error;
2626	struct mount *mp;
2627	struct vattr vattr;
2628
2629	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2630		return (error);
2631	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2632	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2633	VATTR_NULL(&vattr);
2634	vattr.va_uid = uid;
2635	vattr.va_gid = gid;
2636#ifdef MAC
2637	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
2638	    vattr.va_gid);
2639	if (error == 0)
2640#endif
2641		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2642	VOP_UNLOCK(vp, 0, td);
2643	vn_finished_write(mp);
2644	return (error);
2645}
2646
2647/*
2648 * Set ownership given a path name.
2649 */
2650#ifndef _SYS_SYSPROTO_H_
2651struct chown_args {
2652	char	*path;
2653	int	uid;
2654	int	gid;
2655};
2656#endif
2657int
2658chown(td, uap)
2659	struct thread *td;
2660	register struct chown_args /* {
2661		char *path;
2662		int uid;
2663		int gid;
2664	} */ *uap;
2665{
2666
2667	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2668}
2669
2670int
2671kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2672    int gid)
2673{
2674	int error;
2675	struct nameidata nd;
2676	int vfslocked;
2677
2678	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2679	if ((error = namei(&nd)) != 0)
2680		return (error);
2681	vfslocked = NDHASGIANT(&nd);
2682	NDFREE(&nd, NDF_ONLY_PNBUF);
2683	error = setfown(td, nd.ni_vp, uid, gid);
2684	vrele(nd.ni_vp);
2685	VFS_UNLOCK_GIANT(vfslocked);
2686	return (error);
2687}
2688
2689/*
2690 * Set ownership given a path name, do not cross symlinks.
2691 */
2692#ifndef _SYS_SYSPROTO_H_
2693struct lchown_args {
2694	char	*path;
2695	int	uid;
2696	int	gid;
2697};
2698#endif
2699int
2700lchown(td, uap)
2701	struct thread *td;
2702	register struct lchown_args /* {
2703		char *path;
2704		int uid;
2705		int gid;
2706	} */ *uap;
2707{
2708
2709	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
2710}
2711
2712int
2713kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
2714    int gid)
2715{
2716	int error;
2717	struct nameidata nd;
2718	int vfslocked;
2719
2720	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2721	if ((error = namei(&nd)) != 0)
2722		return (error);
2723	vfslocked = NDHASGIANT(&nd);
2724	NDFREE(&nd, NDF_ONLY_PNBUF);
2725	error = setfown(td, nd.ni_vp, uid, gid);
2726	vrele(nd.ni_vp);
2727	VFS_UNLOCK_GIANT(vfslocked);
2728	return (error);
2729}
2730
2731/*
2732 * Set ownership given a file descriptor.
2733 */
2734#ifndef _SYS_SYSPROTO_H_
2735struct fchown_args {
2736	int	fd;
2737	int	uid;
2738	int	gid;
2739};
2740#endif
2741int
2742fchown(td, uap)
2743	struct thread *td;
2744	register struct fchown_args /* {
2745		int fd;
2746		int uid;
2747		int gid;
2748	} */ *uap;
2749{
2750	struct file *fp;
2751	int vfslocked;
2752	int error;
2753
2754	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
2755		return (error);
2756	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2757	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
2758	fdrop(fp, td);
2759	VFS_UNLOCK_GIANT(vfslocked);
2760	return (error);
2761}
2762
2763/*
2764 * Common implementation code for utimes(), lutimes(), and futimes().
2765 */
2766static int
2767getutimes(usrtvp, tvpseg, tsp)
2768	const struct timeval *usrtvp;
2769	enum uio_seg tvpseg;
2770	struct timespec *tsp;
2771{
2772	struct timeval tv[2];
2773	const struct timeval *tvp;
2774	int error;
2775
2776	if (usrtvp == NULL) {
2777		microtime(&tv[0]);
2778		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
2779		tsp[1] = tsp[0];
2780	} else {
2781		if (tvpseg == UIO_SYSSPACE) {
2782			tvp = usrtvp;
2783		} else {
2784			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
2785				return (error);
2786			tvp = tv;
2787		}
2788
2789		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
2790		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
2791	}
2792	return (0);
2793}
2794
2795/*
2796 * Common implementation code for utimes(), lutimes(), and futimes().
2797 */
2798static int
2799setutimes(td, vp, ts, numtimes, nullflag)
2800	struct thread *td;
2801	struct vnode *vp;
2802	const struct timespec *ts;
2803	int numtimes;
2804	int nullflag;
2805{
2806	int error, setbirthtime;
2807	struct mount *mp;
2808	struct vattr vattr;
2809
2810	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2811		return (error);
2812	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2813	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2814	setbirthtime = 0;
2815	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
2816	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
2817		setbirthtime = 1;
2818	VATTR_NULL(&vattr);
2819	vattr.va_atime = ts[0];
2820	vattr.va_mtime = ts[1];
2821	if (setbirthtime)
2822		vattr.va_birthtime = ts[1];
2823	if (numtimes > 2)
2824		vattr.va_birthtime = ts[2];
2825	if (nullflag)
2826		vattr.va_vaflags |= VA_UTIMES_NULL;
2827#ifdef MAC
2828	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
2829	    vattr.va_mtime);
2830#endif
2831	if (error == 0)
2832		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2833	VOP_UNLOCK(vp, 0, td);
2834	vn_finished_write(mp);
2835	return (error);
2836}
2837
2838/*
2839 * Set the access and modification times of a file.
2840 */
2841#ifndef _SYS_SYSPROTO_H_
2842struct utimes_args {
2843	char	*path;
2844	struct	timeval *tptr;
2845};
2846#endif
2847int
2848utimes(td, uap)
2849	struct thread *td;
2850	register struct utimes_args /* {
2851		char *path;
2852		struct timeval *tptr;
2853	} */ *uap;
2854{
2855
2856	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2857	    UIO_USERSPACE));
2858}
2859
2860int
2861kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
2862    struct timeval *tptr, enum uio_seg tptrseg)
2863{
2864	struct timespec ts[2];
2865	int error;
2866	struct nameidata nd;
2867	int vfslocked;
2868
2869	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2870		return (error);
2871	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
2872	if ((error = namei(&nd)) != 0)
2873		return (error);
2874	vfslocked = NDHASGIANT(&nd);
2875	NDFREE(&nd, NDF_ONLY_PNBUF);
2876	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2877	vrele(nd.ni_vp);
2878	VFS_UNLOCK_GIANT(vfslocked);
2879	return (error);
2880}
2881
2882/*
2883 * Set the access and modification times of a file.
2884 */
2885#ifndef _SYS_SYSPROTO_H_
2886struct lutimes_args {
2887	char	*path;
2888	struct	timeval *tptr;
2889};
2890#endif
2891int
2892lutimes(td, uap)
2893	struct thread *td;
2894	register struct lutimes_args /* {
2895		char *path;
2896		struct timeval *tptr;
2897	} */ *uap;
2898{
2899
2900	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
2901	    UIO_USERSPACE));
2902}
2903
2904int
2905kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
2906    struct timeval *tptr, enum uio_seg tptrseg)
2907{
2908	struct timespec ts[2];
2909	int error;
2910	struct nameidata nd;
2911	int vfslocked;
2912
2913	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2914		return (error);
2915	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td);
2916	if ((error = namei(&nd)) != 0)
2917		return (error);
2918	vfslocked = NDHASGIANT(&nd);
2919	NDFREE(&nd, NDF_ONLY_PNBUF);
2920	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
2921	vrele(nd.ni_vp);
2922	VFS_UNLOCK_GIANT(vfslocked);
2923	return (error);
2924}
2925
2926/*
2927 * Set the access and modification times of a file.
2928 */
2929#ifndef _SYS_SYSPROTO_H_
2930struct futimes_args {
2931	int	fd;
2932	struct	timeval *tptr;
2933};
2934#endif
2935int
2936futimes(td, uap)
2937	struct thread *td;
2938	register struct futimes_args /* {
2939		int  fd;
2940		struct timeval *tptr;
2941	} */ *uap;
2942{
2943
2944	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
2945}
2946
2947int
2948kern_futimes(struct thread *td, int fd, struct timeval *tptr,
2949    enum uio_seg tptrseg)
2950{
2951	struct timespec ts[2];
2952	struct file *fp;
2953	int vfslocked;
2954	int error;
2955
2956	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
2957		return (error);
2958	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
2959		return (error);
2960	vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount);
2961	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
2962	fdrop(fp, td);
2963	VFS_UNLOCK_GIANT(vfslocked);
2964	return (error);
2965}
2966
2967/*
2968 * Truncate a file given its path name.
2969 */
2970#ifndef _SYS_SYSPROTO_H_
2971struct truncate_args {
2972	char	*path;
2973	int	pad;
2974	off_t	length;
2975};
2976#endif
2977int
2978truncate(td, uap)
2979	struct thread *td;
2980	register struct truncate_args /* {
2981		char *path;
2982		int pad;
2983		off_t length;
2984	} */ *uap;
2985{
2986
2987	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
2988}
2989
2990int
2991kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
2992{
2993	struct mount *mp;
2994	struct vnode *vp;
2995	struct vattr vattr;
2996	int error;
2997	struct nameidata nd;
2998	int vfslocked;
2999
3000	if (length < 0)
3001		return(EINVAL);
3002	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td);
3003	if ((error = namei(&nd)) != 0)
3004		return (error);
3005	vfslocked = NDHASGIANT(&nd);
3006	vp = nd.ni_vp;
3007	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3008		vrele(vp);
3009		VFS_UNLOCK_GIANT(vfslocked);
3010		return (error);
3011	}
3012	NDFREE(&nd, NDF_ONLY_PNBUF);
3013	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3014	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3015	if (vp->v_type == VDIR)
3016		error = EISDIR;
3017#ifdef MAC
3018	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
3019	}
3020#endif
3021	else if ((error = vn_writechk(vp)) == 0 &&
3022	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3023		VATTR_NULL(&vattr);
3024		vattr.va_size = length;
3025		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3026	}
3027	vput(vp);
3028	vn_finished_write(mp);
3029	VFS_UNLOCK_GIANT(vfslocked);
3030	return (error);
3031}
3032
3033/*
3034 * Truncate a file given a file descriptor.
3035 */
3036#ifndef _SYS_SYSPROTO_H_
3037struct ftruncate_args {
3038	int	fd;
3039	int	pad;
3040	off_t	length;
3041};
3042#endif
3043int
3044ftruncate(td, uap)
3045	struct thread *td;
3046	register struct ftruncate_args /* {
3047		int fd;
3048		int pad;
3049		off_t length;
3050	} */ *uap;
3051{
3052	struct mount *mp;
3053	struct vattr vattr;
3054	struct vnode *vp;
3055	struct file *fp;
3056	int vfslocked;
3057	int error;
3058
3059	if (uap->length < 0)
3060		return(EINVAL);
3061	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3062		return (error);
3063	if ((fp->f_flag & FWRITE) == 0) {
3064		fdrop(fp, td);
3065		return (EINVAL);
3066	}
3067	vp = fp->f_vnode;
3068	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3069	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3070		goto drop;
3071	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3072	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3073	if (vp->v_type == VDIR)
3074		error = EISDIR;
3075#ifdef MAC
3076	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
3077	    vp))) {
3078	}
3079#endif
3080	else if ((error = vn_writechk(vp)) == 0) {
3081		VATTR_NULL(&vattr);
3082		vattr.va_size = uap->length;
3083		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3084	}
3085	VOP_UNLOCK(vp, 0, td);
3086	vn_finished_write(mp);
3087drop:
3088	VFS_UNLOCK_GIANT(vfslocked);
3089	fdrop(fp, td);
3090	return (error);
3091}
3092
3093#if defined(COMPAT_43)
3094/*
3095 * Truncate a file given its path name.
3096 */
3097#ifndef _SYS_SYSPROTO_H_
3098struct otruncate_args {
3099	char	*path;
3100	long	length;
3101};
3102#endif
3103int
3104otruncate(td, uap)
3105	struct thread *td;
3106	register struct otruncate_args /* {
3107		char *path;
3108		long length;
3109	} */ *uap;
3110{
3111	struct truncate_args /* {
3112		char *path;
3113		int pad;
3114		off_t length;
3115	} */ nuap;
3116
3117	nuap.path = uap->path;
3118	nuap.length = uap->length;
3119	return (truncate(td, &nuap));
3120}
3121
3122/*
3123 * Truncate a file given a file descriptor.
3124 */
3125#ifndef _SYS_SYSPROTO_H_
3126struct oftruncate_args {
3127	int	fd;
3128	long	length;
3129};
3130#endif
3131int
3132oftruncate(td, uap)
3133	struct thread *td;
3134	register struct oftruncate_args /* {
3135		int fd;
3136		long length;
3137	} */ *uap;
3138{
3139	struct ftruncate_args /* {
3140		int fd;
3141		int pad;
3142		off_t length;
3143	} */ nuap;
3144
3145	nuap.fd = uap->fd;
3146	nuap.length = uap->length;
3147	return (ftruncate(td, &nuap));
3148}
3149#endif /* COMPAT_43 */
3150
3151/*
3152 * Sync an open file.
3153 */
3154#ifndef _SYS_SYSPROTO_H_
3155struct fsync_args {
3156	int	fd;
3157};
3158#endif
3159int
3160fsync(td, uap)
3161	struct thread *td;
3162	struct fsync_args /* {
3163		int fd;
3164	} */ *uap;
3165{
3166	struct vnode *vp;
3167	struct mount *mp;
3168	struct file *fp;
3169	int vfslocked;
3170	int error;
3171
3172	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3173		return (error);
3174	vp = fp->f_vnode;
3175	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3176	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3177		goto drop;
3178	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3179	if (vp->v_object != NULL) {
3180		VM_OBJECT_LOCK(vp->v_object);
3181		vm_object_page_clean(vp->v_object, 0, 0, 0);
3182		VM_OBJECT_UNLOCK(vp->v_object);
3183	}
3184	error = VOP_FSYNC(vp, MNT_WAIT, td);
3185	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
3186	    && softdep_fsync_hook != NULL)
3187		error = (*softdep_fsync_hook)(vp);
3188
3189	VOP_UNLOCK(vp, 0, td);
3190	vn_finished_write(mp);
3191drop:
3192	VFS_UNLOCK_GIANT(vfslocked);
3193	fdrop(fp, td);
3194	return (error);
3195}
3196
3197/*
3198 * Rename files.  Source and destination must either both be directories,
3199 * or both not be directories.  If target is a directory, it must be empty.
3200 */
3201#ifndef _SYS_SYSPROTO_H_
3202struct rename_args {
3203	char	*from;
3204	char	*to;
3205};
3206#endif
3207int
3208rename(td, uap)
3209	struct thread *td;
3210	register struct rename_args /* {
3211		char *from;
3212		char *to;
3213	} */ *uap;
3214{
3215
3216	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
3217}
3218
3219int
3220kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
3221{
3222	struct mount *mp = NULL;
3223	struct vnode *tvp, *fvp, *tdvp;
3224	struct nameidata fromnd, tond;
3225	int tvfslocked;
3226	int fvfslocked;
3227	int error;
3228
3229	bwillwrite();
3230#ifdef MAC
3231	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE,
3232	    pathseg, from, td);
3233#else
3234	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE,
3235	    pathseg, from, td);
3236#endif
3237	if ((error = namei(&fromnd)) != 0)
3238		return (error);
3239	fvfslocked = NDHASGIANT(&fromnd);
3240	tvfslocked = 0;
3241#ifdef MAC
3242	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
3243	    fromnd.ni_vp, &fromnd.ni_cnd);
3244	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
3245	VOP_UNLOCK(fromnd.ni_vp, 0, td);
3246#endif
3247	fvp = fromnd.ni_vp;
3248	if (error == 0)
3249		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
3250	if (error != 0) {
3251		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3252		vrele(fromnd.ni_dvp);
3253		vrele(fvp);
3254		goto out1;
3255	}
3256	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3257	    NOOBJ | MPSAFE, pathseg, to, td);
3258	if (fromnd.ni_vp->v_type == VDIR)
3259		tond.ni_cnd.cn_flags |= WILLBEDIR;
3260	if ((error = namei(&tond)) != 0) {
3261		/* Translate error code for rename("dir1", "dir2/."). */
3262		if (error == EISDIR && fvp->v_type == VDIR)
3263			error = EINVAL;
3264		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3265		vrele(fromnd.ni_dvp);
3266		vrele(fvp);
3267		goto out1;
3268	}
3269	tvfslocked = NDHASGIANT(&fromnd);
3270	tdvp = tond.ni_dvp;
3271	tvp = tond.ni_vp;
3272	if (tvp != NULL) {
3273		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3274			error = ENOTDIR;
3275			goto out;
3276		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3277			error = EISDIR;
3278			goto out;
3279		}
3280	}
3281	if (fvp == tdvp)
3282		error = EINVAL;
3283	/*
3284	 * If the source is the same as the destination (that is, if they
3285	 * are links to the same vnode), then there is nothing to do.
3286	 */
3287	if (fvp == tvp)
3288		error = -1;
3289#ifdef MAC
3290	else
3291		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
3292		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
3293#endif
3294out:
3295	if (!error) {
3296		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3297		if (fromnd.ni_dvp != tdvp) {
3298			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3299		}
3300		if (tvp) {
3301			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3302		}
3303		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3304				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3305		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3306		NDFREE(&tond, NDF_ONLY_PNBUF);
3307	} else {
3308		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3309		NDFREE(&tond, NDF_ONLY_PNBUF);
3310		if (tdvp == tvp)
3311			vrele(tdvp);
3312		else
3313			vput(tdvp);
3314		if (tvp)
3315			vput(tvp);
3316		vrele(fromnd.ni_dvp);
3317		vrele(fvp);
3318	}
3319	vrele(tond.ni_startdir);
3320	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3321	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3322	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3323	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3324out1:
3325	vn_finished_write(mp);
3326	if (fromnd.ni_startdir)
3327		vrele(fromnd.ni_startdir);
3328	VFS_UNLOCK_GIANT(fvfslocked);
3329	VFS_UNLOCK_GIANT(tvfslocked);
3330	if (error == -1)
3331		return (0);
3332	return (error);
3333}
3334
3335/*
3336 * Make a directory file.
3337 */
3338#ifndef _SYS_SYSPROTO_H_
3339struct mkdir_args {
3340	char	*path;
3341	int	mode;
3342};
3343#endif
3344int
3345mkdir(td, uap)
3346	struct thread *td;
3347	register struct mkdir_args /* {
3348		char *path;
3349		int mode;
3350	} */ *uap;
3351{
3352
3353	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
3354}
3355
3356int
3357kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
3358{
3359	struct mount *mp;
3360	struct vnode *vp;
3361	struct vattr vattr;
3362	int error;
3363	struct nameidata nd;
3364	int vfslocked;
3365
3366restart:
3367	bwillwrite();
3368	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, path, td);
3369	nd.ni_cnd.cn_flags |= WILLBEDIR;
3370	if ((error = namei(&nd)) != 0)
3371		return (error);
3372	vfslocked = NDHASGIANT(&nd);
3373	vp = nd.ni_vp;
3374	if (vp != NULL) {
3375		NDFREE(&nd, NDF_ONLY_PNBUF);
3376		vrele(vp);
3377		/*
3378		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
3379		 * the strange behaviour of leaving the vnode unlocked
3380		 * if the target is the same vnode as the parent.
3381		 */
3382		if (vp == nd.ni_dvp)
3383			vrele(nd.ni_dvp);
3384		else
3385			vput(nd.ni_dvp);
3386		VFS_UNLOCK_GIANT(vfslocked);
3387		return (EEXIST);
3388	}
3389	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3390		NDFREE(&nd, NDF_ONLY_PNBUF);
3391		vput(nd.ni_dvp);
3392		VFS_UNLOCK_GIANT(vfslocked);
3393		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3394			return (error);
3395		goto restart;
3396	}
3397	VATTR_NULL(&vattr);
3398	vattr.va_type = VDIR;
3399	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3400	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3401	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3402#ifdef MAC
3403	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
3404	    &vattr);
3405	if (error)
3406		goto out;
3407#endif
3408	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3409	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3410#ifdef MAC
3411out:
3412#endif
3413	NDFREE(&nd, NDF_ONLY_PNBUF);
3414	vput(nd.ni_dvp);
3415	if (!error)
3416		vput(nd.ni_vp);
3417	vn_finished_write(mp);
3418	VFS_UNLOCK_GIANT(vfslocked);
3419	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3420	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3421	return (error);
3422}
3423
3424/*
3425 * Remove a directory file.
3426 */
3427#ifndef _SYS_SYSPROTO_H_
3428struct rmdir_args {
3429	char	*path;
3430};
3431#endif
3432int
3433rmdir(td, uap)
3434	struct thread *td;
3435	struct rmdir_args /* {
3436		char *path;
3437	} */ *uap;
3438{
3439
3440	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
3441}
3442
3443int
3444kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
3445{
3446	struct mount *mp;
3447	struct vnode *vp;
3448	int error;
3449	struct nameidata nd;
3450	int vfslocked;
3451
3452restart:
3453	bwillwrite();
3454	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td);
3455	if ((error = namei(&nd)) != 0)
3456		return (error);
3457	vfslocked = NDHASGIANT(&nd);
3458	vp = nd.ni_vp;
3459	if (vp->v_type != VDIR) {
3460		error = ENOTDIR;
3461		goto out;
3462	}
3463	/*
3464	 * No rmdir "." please.
3465	 */
3466	if (nd.ni_dvp == vp) {
3467		error = EINVAL;
3468		goto out;
3469	}
3470	/*
3471	 * The root of a mounted filesystem cannot be deleted.
3472	 */
3473	if (vp->v_vflag & VV_ROOT) {
3474		error = EBUSY;
3475		goto out;
3476	}
3477#ifdef MAC
3478	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
3479	    &nd.ni_cnd);
3480	if (error)
3481		goto out;
3482#endif
3483	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3484		NDFREE(&nd, NDF_ONLY_PNBUF);
3485		if (nd.ni_dvp == vp)
3486			vrele(nd.ni_dvp);
3487		else
3488			vput(nd.ni_dvp);
3489		vput(vp);
3490		VFS_UNLOCK_GIANT(vfslocked);
3491		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3492			return (error);
3493		goto restart;
3494	}
3495	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3496	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3497	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3498	vn_finished_write(mp);
3499out:
3500	NDFREE(&nd, NDF_ONLY_PNBUF);
3501	if (nd.ni_dvp == vp)
3502		vrele(nd.ni_dvp);
3503	else
3504		vput(nd.ni_dvp);
3505	vput(vp);
3506	VFS_UNLOCK_GIANT(vfslocked);
3507	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3508	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3509	return (error);
3510}
3511
3512#ifdef COMPAT_43
3513/*
3514 * Read a block of directory entries in a filesystem independent format.
3515 */
3516#ifndef _SYS_SYSPROTO_H_
3517struct ogetdirentries_args {
3518	int	fd;
3519	char	*buf;
3520	u_int	count;
3521	long	*basep;
3522};
3523#endif
3524int
3525ogetdirentries(td, uap)
3526	struct thread *td;
3527	register struct ogetdirentries_args /* {
3528		int fd;
3529		char *buf;
3530		u_int count;
3531		long *basep;
3532	} */ *uap;
3533{
3534	struct vnode *vp;
3535	struct file *fp;
3536	struct uio auio, kuio;
3537	struct iovec aiov, kiov;
3538	struct dirent *dp, *edp;
3539	caddr_t dirbuf;
3540	int error, eofflag, readcnt;
3541	long loff;
3542
3543	/* XXX arbitrary sanity limit on `count'. */
3544	if (uap->count > 64 * 1024)
3545		return (EINVAL);
3546	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3547		return (error);
3548	if ((fp->f_flag & FREAD) == 0) {
3549		fdrop(fp, td);
3550		return (EBADF);
3551	}
3552	vp = fp->f_vnode;
3553unionread:
3554	if (vp->v_type != VDIR) {
3555		fdrop(fp, td);
3556		return (EINVAL);
3557	}
3558	aiov.iov_base = uap->buf;
3559	aiov.iov_len = uap->count;
3560	auio.uio_iov = &aiov;
3561	auio.uio_iovcnt = 1;
3562	auio.uio_rw = UIO_READ;
3563	auio.uio_segflg = UIO_USERSPACE;
3564	auio.uio_td = td;
3565	auio.uio_resid = uap->count;
3566	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3567	loff = auio.uio_offset = fp->f_offset;
3568#ifdef MAC
3569	error = mac_check_vnode_readdir(td->td_ucred, vp);
3570	if (error) {
3571		VOP_UNLOCK(vp, 0, td);
3572		fdrop(fp, td);
3573		return (error);
3574	}
3575#endif
3576#	if (BYTE_ORDER != LITTLE_ENDIAN)
3577		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3578			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3579			    NULL, NULL);
3580			fp->f_offset = auio.uio_offset;
3581		} else
3582#	endif
3583	{
3584		kuio = auio;
3585		kuio.uio_iov = &kiov;
3586		kuio.uio_segflg = UIO_SYSSPACE;
3587		kiov.iov_len = uap->count;
3588		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
3589		kiov.iov_base = dirbuf;
3590		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3591			    NULL, NULL);
3592		fp->f_offset = kuio.uio_offset;
3593		if (error == 0) {
3594			readcnt = uap->count - kuio.uio_resid;
3595			edp = (struct dirent *)&dirbuf[readcnt];
3596			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3597#				if (BYTE_ORDER == LITTLE_ENDIAN)
3598					/*
3599					 * The expected low byte of
3600					 * dp->d_namlen is our dp->d_type.
3601					 * The high MBZ byte of dp->d_namlen
3602					 * is our dp->d_namlen.
3603					 */
3604					dp->d_type = dp->d_namlen;
3605					dp->d_namlen = 0;
3606#				else
3607					/*
3608					 * The dp->d_type is the high byte
3609					 * of the expected dp->d_namlen,
3610					 * so must be zero'ed.
3611					 */
3612					dp->d_type = 0;
3613#				endif
3614				if (dp->d_reclen > 0) {
3615					dp = (struct dirent *)
3616					    ((char *)dp + dp->d_reclen);
3617				} else {
3618					error = EIO;
3619					break;
3620				}
3621			}
3622			if (dp >= edp)
3623				error = uiomove(dirbuf, readcnt, &auio);
3624		}
3625		FREE(dirbuf, M_TEMP);
3626	}
3627	VOP_UNLOCK(vp, 0, td);
3628	if (error) {
3629		fdrop(fp, td);
3630		return (error);
3631	}
3632	if (uap->count == auio.uio_resid) {
3633		if (union_dircheckp) {
3634			error = union_dircheckp(td, &vp, fp);
3635			if (error == -1)
3636				goto unionread;
3637			if (error) {
3638				fdrop(fp, td);
3639				return (error);
3640			}
3641		}
3642		/*
3643		 * XXX We could delay dropping the lock above but
3644		 * union_dircheckp complicates things.
3645		 */
3646		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
3647		if ((vp->v_vflag & VV_ROOT) &&
3648		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3649			struct vnode *tvp = vp;
3650			vp = vp->v_mount->mnt_vnodecovered;
3651			VREF(vp);
3652			fp->f_vnode = vp;
3653			fp->f_data = vp;
3654			fp->f_offset = 0;
3655			vput(tvp);
3656			goto unionread;
3657		}
3658		VOP_UNLOCK(vp, 0, td);
3659	}
3660	error = copyout(&loff, uap->basep, sizeof(long));
3661	fdrop(fp, td);
3662	td->td_retval[0] = uap->count - auio.uio_resid;
3663	return (error);
3664}
3665#endif /* COMPAT_43 */
3666
3667/*
3668 * Read a block of directory entries in a filesystem independent format.
3669 */
3670#ifndef _SYS_SYSPROTO_H_
3671struct getdirentries_args {
3672	int	fd;
3673	char	*buf;
3674	u_int	count;
3675	long	*basep;
3676};
3677#endif
3678int
3679getdirentries(td, uap)
3680	struct thread *td;
3681	register struct getdirentries_args /* {
3682		int fd;
3683		char *buf;
3684		u_int count;
3685		long *basep;
3686	} */ *uap;
3687{
3688	struct vnode *vp;
3689	struct file *fp;
3690	struct uio auio;
3691	struct iovec aiov;
3692	int vfslocked;
3693	long loff;
3694	int error, eofflag;
3695
3696	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
3697		return (error);
3698	if ((fp->f_flag & FREAD) == 0) {
3699		fdrop(fp, td);
3700		return (EBADF);
3701	}
3702	vp = fp->f_vnode;
3703unionread:
3704	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
3705	if (vp->v_type != VDIR) {
3706		error = EINVAL;
3707		goto fail;
3708	}
3709	aiov.iov_base = uap->buf;
3710	aiov.iov_len = uap->count;
3711	auio.uio_iov = &aiov;
3712	auio.uio_iovcnt = 1;
3713	auio.uio_rw = UIO_READ;
3714	auio.uio_segflg = UIO_USERSPACE;
3715	auio.uio_td = td;
3716	auio.uio_resid = uap->count;
3717	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3718	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3719	loff = auio.uio_offset = fp->f_offset;
3720#ifdef MAC
3721	error = mac_check_vnode_readdir(td->td_ucred, vp);
3722	if (error == 0)
3723#endif
3724		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
3725		    NULL);
3726	fp->f_offset = auio.uio_offset;
3727	VOP_UNLOCK(vp, 0, td);
3728	if (error)
3729		goto fail;
3730	if (uap->count == auio.uio_resid) {
3731		if (union_dircheckp) {
3732			error = union_dircheckp(td, &vp, fp);
3733			if (error == -1) {
3734				VFS_UNLOCK_GIANT(vfslocked);
3735				goto unionread;
3736			}
3737			if (error)
3738				goto fail;
3739		}
3740		/*
3741		 * XXX We could delay dropping the lock above but
3742		 * union_dircheckp complicates things.
3743		 */
3744		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3745		if ((vp->v_vflag & VV_ROOT) &&
3746		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3747			struct vnode *tvp = vp;
3748			vp = vp->v_mount->mnt_vnodecovered;
3749			VREF(vp);
3750			fp->f_vnode = vp;
3751			fp->f_data = vp;
3752			fp->f_offset = 0;
3753			vput(tvp);
3754			VFS_UNLOCK_GIANT(vfslocked);
3755			goto unionread;
3756		}
3757		VOP_UNLOCK(vp, 0, td);
3758	}
3759	if (uap->basep != NULL) {
3760		error = copyout(&loff, uap->basep, sizeof(long));
3761	}
3762	td->td_retval[0] = uap->count - auio.uio_resid;
3763fail:
3764	VFS_UNLOCK_GIANT(vfslocked);
3765	fdrop(fp, td);
3766	return (error);
3767}
3768#ifndef _SYS_SYSPROTO_H_
3769struct getdents_args {
3770	int fd;
3771	char *buf;
3772	size_t count;
3773};
3774#endif
3775int
3776getdents(td, uap)
3777	struct thread *td;
3778	register struct getdents_args /* {
3779		int fd;
3780		char *buf;
3781		u_int count;
3782	} */ *uap;
3783{
3784	struct getdirentries_args ap;
3785	ap.fd = uap->fd;
3786	ap.buf = uap->buf;
3787	ap.count = uap->count;
3788	ap.basep = NULL;
3789	return (getdirentries(td, &ap));
3790}
3791
3792/*
3793 * Set the mode mask for creation of filesystem nodes.
3794 *
3795 * MP SAFE
3796 */
3797#ifndef _SYS_SYSPROTO_H_
3798struct umask_args {
3799	int	newmask;
3800};
3801#endif
3802int
3803umask(td, uap)
3804	struct thread *td;
3805	struct umask_args /* {
3806		int newmask;
3807	} */ *uap;
3808{
3809	register struct filedesc *fdp;
3810
3811	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
3812	fdp = td->td_proc->p_fd;
3813	td->td_retval[0] = fdp->fd_cmask;
3814	fdp->fd_cmask = uap->newmask & ALLPERMS;
3815	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
3816	return (0);
3817}
3818
3819/*
3820 * Void all references to file by ripping underlying filesystem
3821 * away from vnode.
3822 */
3823#ifndef _SYS_SYSPROTO_H_
3824struct revoke_args {
3825	char	*path;
3826};
3827#endif
3828int
3829revoke(td, uap)
3830	struct thread *td;
3831	register struct revoke_args /* {
3832		char *path;
3833	} */ *uap;
3834{
3835	struct vnode *vp;
3836	struct vattr vattr;
3837	int error;
3838	struct nameidata nd;
3839	int vfslocked;
3840
3841	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE,
3842	    uap->path, td);
3843	if ((error = namei(&nd)) != 0)
3844		return (error);
3845	vfslocked = NDHASGIANT(&nd);
3846	vp = nd.ni_vp;
3847	NDFREE(&nd, NDF_ONLY_PNBUF);
3848	if (vp->v_type != VCHR) {
3849		error = EINVAL;
3850		goto putout;
3851	}
3852#ifdef MAC
3853	error = mac_check_vnode_revoke(td->td_ucred, vp);
3854	if (error)
3855		goto putout;
3856#endif
3857	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
3858	if (error)
3859		goto putout;
3860	VOP_UNLOCK(vp, 0, td);
3861	if (td->td_ucred->cr_uid != vattr.va_uid) {
3862		error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL);
3863		if (error)
3864			goto relout;
3865	}
3866	if (vcount(vp) > 1)
3867		VOP_REVOKE(vp, REVOKEALL);
3868relout:
3869	vrele(vp);
3870	VFS_UNLOCK_GIANT(vfslocked);
3871	return (error);
3872putout:
3873	vput(vp);
3874	VFS_UNLOCK_GIANT(vfslocked);
3875	return (error);
3876}
3877
3878/*
3879 * Convert a user file descriptor to a kernel file entry.
3880 * A reference on the file entry is held upon returning.
3881 */
3882int
3883getvnode(fdp, fd, fpp)
3884	struct filedesc *fdp;
3885	int fd;
3886	struct file **fpp;
3887{
3888	int error;
3889	struct file *fp;
3890
3891	fp = NULL;
3892	if (fdp == NULL)
3893		error = EBADF;
3894	else {
3895		FILEDESC_LOCK(fdp);
3896		if ((u_int)fd >= fdp->fd_nfiles ||
3897		    (fp = fdp->fd_ofiles[fd]) == NULL)
3898			error = EBADF;
3899		else if (fp->f_vnode == NULL) {
3900			fp = NULL;
3901			error = EINVAL;
3902		} else {
3903			fhold(fp);
3904			error = 0;
3905		}
3906		FILEDESC_UNLOCK(fdp);
3907	}
3908	*fpp = fp;
3909	return (error);
3910}
3911
3912/*
3913 * Get (NFS) file handle
3914 */
3915#ifndef _SYS_SYSPROTO_H_
3916struct lgetfh_args {
3917	char	*fname;
3918	fhandle_t *fhp;
3919};
3920#endif
3921int
3922lgetfh(td, uap)
3923	struct thread *td;
3924	register struct lgetfh_args *uap;
3925{
3926	struct nameidata nd;
3927	fhandle_t fh;
3928	register struct vnode *vp;
3929	int vfslocked;
3930	int error;
3931
3932	error = suser(td);
3933	if (error)
3934		return (error);
3935	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE,
3936	    UIO_USERSPACE, uap->fname, td);
3937	error = namei(&nd);
3938	if (error)
3939		return (error);
3940	vfslocked = NDHASGIANT(&nd);
3941	NDFREE(&nd, NDF_ONLY_PNBUF);
3942	vp = nd.ni_vp;
3943	bzero(&fh, sizeof(fh));
3944	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3945	error = VFS_VPTOFH(vp, &fh.fh_fid);
3946	vput(vp);
3947	VFS_UNLOCK_GIANT(vfslocked);
3948	if (error)
3949		return (error);
3950	error = copyout(&fh, uap->fhp, sizeof (fh));
3951	return (error);
3952}
3953
3954#ifndef _SYS_SYSPROTO_H_
3955struct getfh_args {
3956	char	*fname;
3957	fhandle_t *fhp;
3958};
3959#endif
3960int
3961getfh(td, uap)
3962	struct thread *td;
3963	register struct getfh_args *uap;
3964{
3965	struct nameidata nd;
3966	fhandle_t fh;
3967	register struct vnode *vp;
3968	int vfslocked;
3969	int error;
3970
3971	error = suser(td);
3972	if (error)
3973		return (error);
3974	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE,
3975	    UIO_USERSPACE, uap->fname, td);
3976	error = namei(&nd);
3977	if (error)
3978		return (error);
3979	vfslocked = NDHASGIANT(&nd);
3980	NDFREE(&nd, NDF_ONLY_PNBUF);
3981	vp = nd.ni_vp;
3982	bzero(&fh, sizeof(fh));
3983	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
3984	error = VFS_VPTOFH(vp, &fh.fh_fid);
3985	vput(vp);
3986	VFS_UNLOCK_GIANT(vfslocked);
3987	if (error)
3988		return (error);
3989	error = copyout(&fh, uap->fhp, sizeof (fh));
3990	return (error);
3991}
3992
3993/*
3994 * syscall for the rpc.lockd to use to translate a NFS file handle into
3995 * an open descriptor.
3996 *
3997 * warning: do not remove the suser() call or this becomes one giant
3998 * security hole.
3999 */
4000#ifndef _SYS_SYSPROTO_H_
4001struct fhopen_args {
4002	const struct fhandle *u_fhp;
4003	int flags;
4004};
4005#endif
4006int
4007fhopen(td, uap)
4008	struct thread *td;
4009	struct fhopen_args /* {
4010		const struct fhandle *u_fhp;
4011		int flags;
4012	} */ *uap;
4013{
4014	struct proc *p = td->td_proc;
4015	struct mount *mp;
4016	struct vnode *vp;
4017	struct fhandle fhp;
4018	struct vattr vat;
4019	struct vattr *vap = &vat;
4020	struct flock lf;
4021	struct file *fp;
4022	register struct filedesc *fdp = p->p_fd;
4023	int fmode, mode, error, type;
4024	struct file *nfp;
4025	int indx;
4026
4027	error = suser(td);
4028	if (error)
4029		return (error);
4030	fmode = FFLAGS(uap->flags);
4031	/* why not allow a non-read/write open for our lockd? */
4032	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4033		return (EINVAL);
4034	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
4035	if (error)
4036		return(error);
4037	/* find the mount point */
4038	mp = vfs_getvfs(&fhp.fh_fsid);
4039	if (mp == NULL)
4040		return (ESTALE);
4041	/* now give me my vnode, it gets returned to me locked */
4042	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4043	if (error)
4044		return (error);
4045	/*
4046	 * from now on we have to make sure not
4047	 * to forget about the vnode
4048	 * any error that causes an abort must vput(vp)
4049	 * just set error = err and 'goto bad;'.
4050	 */
4051
4052	/*
4053	 * from vn_open
4054	 */
4055	if (vp->v_type == VLNK) {
4056		error = EMLINK;
4057		goto bad;
4058	}
4059	if (vp->v_type == VSOCK) {
4060		error = EOPNOTSUPP;
4061		goto bad;
4062	}
4063	mode = 0;
4064	if (fmode & (FWRITE | O_TRUNC)) {
4065		if (vp->v_type == VDIR) {
4066			error = EISDIR;
4067			goto bad;
4068		}
4069		error = vn_writechk(vp);
4070		if (error)
4071			goto bad;
4072		mode |= VWRITE;
4073	}
4074	if (fmode & FREAD)
4075		mode |= VREAD;
4076	if (fmode & O_APPEND)
4077		mode |= VAPPEND;
4078#ifdef MAC
4079	error = mac_check_vnode_open(td->td_ucred, vp, mode);
4080	if (error)
4081		goto bad;
4082#endif
4083	if (mode) {
4084		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4085		if (error)
4086			goto bad;
4087	}
4088	if (fmode & O_TRUNC) {
4089		VOP_UNLOCK(vp, 0, td);				/* XXX */
4090		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4091			vrele(vp);
4092			return (error);
4093		}
4094		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4095		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4096#ifdef MAC
4097		/*
4098		 * We don't yet have fp->f_cred, so use td->td_ucred, which
4099		 * should be right.
4100		 */
4101		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
4102		if (error == 0) {
4103#endif
4104			VATTR_NULL(vap);
4105			vap->va_size = 0;
4106			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4107#ifdef MAC
4108		}
4109#endif
4110		vn_finished_write(mp);
4111		if (error)
4112			goto bad;
4113	}
4114	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
4115	if (error)
4116		goto bad;
4117
4118	if (fmode & FWRITE)
4119		vp->v_writecount++;
4120
4121	/*
4122	 * end of vn_open code
4123	 */
4124
4125	if ((error = falloc(td, &nfp, &indx)) != 0) {
4126		if (fmode & FWRITE)
4127			vp->v_writecount--;
4128		goto bad;
4129	}
4130	/* An extra reference on `nfp' has been held for us by falloc(). */
4131	fp = nfp;
4132
4133	nfp->f_vnode = vp;
4134	nfp->f_data = vp;
4135	nfp->f_flag = fmode & FMASK;
4136	nfp->f_ops = &vnops;
4137	nfp->f_type = DTYPE_VNODE;
4138	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4139		lf.l_whence = SEEK_SET;
4140		lf.l_start = 0;
4141		lf.l_len = 0;
4142		if (fmode & O_EXLOCK)
4143			lf.l_type = F_WRLCK;
4144		else
4145			lf.l_type = F_RDLCK;
4146		type = F_FLOCK;
4147		if ((fmode & FNONBLOCK) == 0)
4148			type |= F_WAIT;
4149		VOP_UNLOCK(vp, 0, td);
4150		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
4151			    type)) != 0) {
4152			/*
4153			 * The lock request failed.  Normally close the
4154			 * descriptor but handle the case where someone might
4155			 * have dup()d or close()d it when we weren't looking.
4156			 */
4157			fdclose(fdp, fp, indx, td);
4158
4159			/*
4160			 * release our private reference
4161			 */
4162			fdrop(fp, td);
4163			return(error);
4164		}
4165		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4166		fp->f_flag |= FHASLOCK;
4167	}
4168
4169	VOP_UNLOCK(vp, 0, td);
4170	fdrop(fp, td);
4171	td->td_retval[0] = indx;
4172	return (0);
4173
4174bad:
4175	vput(vp);
4176	return (error);
4177}
4178
4179/*
4180 * Stat an (NFS) file handle.
4181 */
4182#ifndef _SYS_SYSPROTO_H_
4183struct fhstat_args {
4184	struct fhandle *u_fhp;
4185	struct stat *sb;
4186};
4187#endif
4188int
4189fhstat(td, uap)
4190	struct thread *td;
4191	register struct fhstat_args /* {
4192		struct fhandle *u_fhp;
4193		struct stat *sb;
4194	} */ *uap;
4195{
4196	struct stat sb;
4197	fhandle_t fh;
4198	struct mount *mp;
4199	struct vnode *vp;
4200	int error;
4201
4202	error = suser(td);
4203	if (error)
4204		return (error);
4205	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
4206	if (error)
4207		return (error);
4208	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4209		return (ESTALE);
4210	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4211		return (error);
4212	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
4213	vput(vp);
4214	if (error)
4215		return (error);
4216	error = copyout(&sb, uap->sb, sizeof(sb));
4217	return (error);
4218}
4219
4220/*
4221 * Implement fstatfs() for (NFS) file handles.
4222 */
4223#ifndef _SYS_SYSPROTO_H_
4224struct fhstatfs_args {
4225	struct fhandle *u_fhp;
4226	struct statfs *buf;
4227};
4228#endif
4229int
4230fhstatfs(td, uap)
4231	struct thread *td;
4232	struct fhstatfs_args /* {
4233		struct fhandle *u_fhp;
4234		struct statfs *buf;
4235	} */ *uap;
4236{
4237	struct statfs *sp;
4238	struct mount *mp;
4239	struct vnode *vp;
4240	fhandle_t fh;
4241	int error;
4242
4243	error = suser(td);
4244	if (error)
4245		return (error);
4246	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
4247		return (error);
4248	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4249		return (ESTALE);
4250	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4251		return (error);
4252	mp = vp->v_mount;
4253	sp = &mp->mnt_stat;
4254	vput(vp);
4255#ifdef MAC
4256	error = mac_check_mount_stat(td->td_ucred, mp);
4257	if (error)
4258		return (error);
4259#endif
4260	/*
4261	 * Set these in case the underlying filesystem fails to do so.
4262	 */
4263	sp->f_version = STATFS_VERSION;
4264	sp->f_namemax = NAME_MAX;
4265	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4266	if ((error = VFS_STATFS(mp, sp, td)) != 0)
4267		return (error);
4268	return (copyout(sp, uap->buf, sizeof(*sp)));
4269}
4270
4271/*
4272 * Syscall to push extended attribute configuration information into the
4273 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4274 * a command (int cmd), and attribute name and misc data.  For now, the
4275 * attribute name is left in userspace for consumption by the VFS_op.
4276 * It will probably be changed to be copied into sysspace by the
4277 * syscall in the future, once issues with various consumers of the
4278 * attribute code have raised their hands.
4279 *
4280 * Currently this is used only by UFS Extended Attributes.
4281 */
4282int
4283extattrctl(td, uap)
4284	struct thread *td;
4285	struct extattrctl_args /* {
4286		const char *path;
4287		int cmd;
4288		const char *filename;
4289		int attrnamespace;
4290		const char *attrname;
4291	} */ *uap;
4292{
4293	struct vnode *filename_vp;
4294	struct nameidata nd;
4295	struct mount *mp, *mp_writable;
4296	char attrname[EXTATTR_MAXNAMELEN];
4297	int error;
4298
4299	/*
4300	 * uap->attrname is not always defined.  We check again later when we
4301	 * invoke the VFS call so as to pass in NULL there if needed.
4302	 */
4303	if (uap->attrname != NULL) {
4304		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4305		    NULL);
4306		if (error)
4307			return (error);
4308	}
4309
4310	/*
4311	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4312	 * which VFS_EXTATTRCTL() will later release.
4313	 */
4314	filename_vp = NULL;
4315	if (uap->filename != NULL) {
4316		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
4317		    uap->filename, td);
4318		error = namei(&nd);
4319		if (error)
4320			return (error);
4321		filename_vp = nd.ni_vp;
4322		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4323	}
4324
4325	/* uap->path is always defined. */
4326	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4327	error = namei(&nd);
4328	if (error) {
4329		if (filename_vp != NULL)
4330			vput(filename_vp);
4331		return (error);
4332	}
4333	mp = nd.ni_vp->v_mount;
4334	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4335	NDFREE(&nd, 0);
4336	if (error) {
4337		if (filename_vp != NULL)
4338			vput(filename_vp);
4339		return (error);
4340	}
4341
4342	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
4343	    uap->attrname != NULL ? attrname : NULL, td);
4344
4345	vn_finished_write(mp_writable);
4346	/*
4347	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4348	 * filename_vp, so vrele it if it is defined.
4349	 */
4350	if (filename_vp != NULL)
4351		vrele(filename_vp);
4352	return (error);
4353}
4354
4355/*-
4356 * Set a named extended attribute on a file or directory
4357 *
4358 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4359 *            kernelspace string pointer "attrname", userspace buffer
4360 *            pointer "data", buffer length "nbytes", thread "td".
4361 * Returns: 0 on success, an error number otherwise
4362 * Locks: none
4363 * References: vp must be a valid reference for the duration of the call
4364 */
4365static int
4366extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4367    void *data, size_t nbytes, struct thread *td)
4368{
4369	struct mount *mp;
4370	struct uio auio;
4371	struct iovec aiov;
4372	ssize_t cnt;
4373	int error;
4374
4375	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4376	if (error)
4377		return (error);
4378	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4379	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4380
4381	aiov.iov_base = data;
4382	aiov.iov_len = nbytes;
4383	auio.uio_iov = &aiov;
4384	auio.uio_iovcnt = 1;
4385	auio.uio_offset = 0;
4386	if (nbytes > INT_MAX) {
4387		error = EINVAL;
4388		goto done;
4389	}
4390	auio.uio_resid = nbytes;
4391	auio.uio_rw = UIO_WRITE;
4392	auio.uio_segflg = UIO_USERSPACE;
4393	auio.uio_td = td;
4394	cnt = nbytes;
4395
4396#ifdef MAC
4397	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
4398	    attrname, &auio);
4399	if (error)
4400		goto done;
4401#endif
4402
4403	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4404	    td->td_ucred, td);
4405	cnt -= auio.uio_resid;
4406	td->td_retval[0] = cnt;
4407
4408done:
4409	VOP_UNLOCK(vp, 0, td);
4410	vn_finished_write(mp);
4411	return (error);
4412}
4413
4414int
4415extattr_set_fd(td, uap)
4416	struct thread *td;
4417	struct extattr_set_fd_args /* {
4418		int fd;
4419		int attrnamespace;
4420		const char *attrname;
4421		void *data;
4422		size_t nbytes;
4423	} */ *uap;
4424{
4425	struct file *fp;
4426	char attrname[EXTATTR_MAXNAMELEN];
4427	int error;
4428
4429	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4430	if (error)
4431		return (error);
4432
4433	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4434	if (error)
4435		return (error);
4436
4437	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
4438	    attrname, uap->data, uap->nbytes, td);
4439	fdrop(fp, td);
4440
4441	return (error);
4442}
4443
4444int
4445extattr_set_file(td, uap)
4446	struct thread *td;
4447	struct extattr_set_file_args /* {
4448		const char *path;
4449		int attrnamespace;
4450		const char *attrname;
4451		void *data;
4452		size_t nbytes;
4453	} */ *uap;
4454{
4455	struct nameidata nd;
4456	char attrname[EXTATTR_MAXNAMELEN];
4457	int error;
4458
4459	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4460	if (error)
4461		return (error);
4462
4463	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4464	error = namei(&nd);
4465	if (error)
4466		return (error);
4467	NDFREE(&nd, NDF_ONLY_PNBUF);
4468
4469	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4470	    uap->data, uap->nbytes, td);
4471
4472	vrele(nd.ni_vp);
4473	return (error);
4474}
4475
4476int
4477extattr_set_link(td, uap)
4478	struct thread *td;
4479	struct extattr_set_link_args /* {
4480		const char *path;
4481		int attrnamespace;
4482		const char *attrname;
4483		void *data;
4484		size_t nbytes;
4485	} */ *uap;
4486{
4487	struct nameidata nd;
4488	char attrname[EXTATTR_MAXNAMELEN];
4489	int error;
4490
4491	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4492	if (error)
4493		return (error);
4494
4495	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4496	error = namei(&nd);
4497	if (error)
4498		return (error);
4499	NDFREE(&nd, NDF_ONLY_PNBUF);
4500
4501	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4502	    uap->data, uap->nbytes, td);
4503
4504	vrele(nd.ni_vp);
4505	return (error);
4506}
4507
4508/*-
4509 * Get a named extended attribute on a file or directory
4510 *
4511 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4512 *            kernelspace string pointer "attrname", userspace buffer
4513 *            pointer "data", buffer length "nbytes", thread "td".
4514 * Returns: 0 on success, an error number otherwise
4515 * Locks: none
4516 * References: vp must be a valid reference for the duration of the call
4517 */
4518static int
4519extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4520    void *data, size_t nbytes, struct thread *td)
4521{
4522	struct uio auio, *auiop;
4523	struct iovec aiov;
4524	ssize_t cnt;
4525	size_t size, *sizep;
4526	int error;
4527
4528	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4529	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4530
4531	/*
4532	 * Slightly unusual semantics: if the user provides a NULL data
4533	 * pointer, they don't want to receive the data, just the
4534	 * maximum read length.
4535	 */
4536	auiop = NULL;
4537	sizep = NULL;
4538	cnt = 0;
4539	if (data != NULL) {
4540		aiov.iov_base = data;
4541		aiov.iov_len = nbytes;
4542		auio.uio_iov = &aiov;
4543		auio.uio_offset = 0;
4544		if (nbytes > INT_MAX) {
4545			error = EINVAL;
4546			goto done;
4547		}
4548		auio.uio_resid = nbytes;
4549		auio.uio_rw = UIO_READ;
4550		auio.uio_segflg = UIO_USERSPACE;
4551		auio.uio_td = td;
4552		auiop = &auio;
4553		cnt = nbytes;
4554	} else
4555		sizep = &size;
4556
4557#ifdef MAC
4558	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
4559	    attrname, &auio);
4560	if (error)
4561		goto done;
4562#endif
4563
4564	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
4565	    td->td_ucred, td);
4566
4567	if (auiop != NULL) {
4568		cnt -= auio.uio_resid;
4569		td->td_retval[0] = cnt;
4570	} else
4571		td->td_retval[0] = size;
4572
4573done:
4574	VOP_UNLOCK(vp, 0, td);
4575	return (error);
4576}
4577
4578int
4579extattr_get_fd(td, uap)
4580	struct thread *td;
4581	struct extattr_get_fd_args /* {
4582		int fd;
4583		int attrnamespace;
4584		const char *attrname;
4585		void *data;
4586		size_t nbytes;
4587	} */ *uap;
4588{
4589	struct file *fp;
4590	char attrname[EXTATTR_MAXNAMELEN];
4591	int error;
4592
4593	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4594	if (error)
4595		return (error);
4596
4597	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4598	if (error)
4599		return (error);
4600
4601	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
4602	    attrname, uap->data, uap->nbytes, td);
4603
4604	fdrop(fp, td);
4605	return (error);
4606}
4607
4608int
4609extattr_get_file(td, uap)
4610	struct thread *td;
4611	struct extattr_get_file_args /* {
4612		const char *path;
4613		int attrnamespace;
4614		const char *attrname;
4615		void *data;
4616		size_t nbytes;
4617	} */ *uap;
4618{
4619	struct nameidata nd;
4620	char attrname[EXTATTR_MAXNAMELEN];
4621	int error;
4622
4623	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4624	if (error)
4625		return (error);
4626
4627	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4628	error = namei(&nd);
4629	if (error)
4630		return (error);
4631	NDFREE(&nd, NDF_ONLY_PNBUF);
4632
4633	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4634	    uap->data, uap->nbytes, td);
4635
4636	vrele(nd.ni_vp);
4637	return (error);
4638}
4639
4640int
4641extattr_get_link(td, uap)
4642	struct thread *td;
4643	struct extattr_get_link_args /* {
4644		const char *path;
4645		int attrnamespace;
4646		const char *attrname;
4647		void *data;
4648		size_t nbytes;
4649	} */ *uap;
4650{
4651	struct nameidata nd;
4652	char attrname[EXTATTR_MAXNAMELEN];
4653	int error;
4654
4655	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4656	if (error)
4657		return (error);
4658
4659	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4660	error = namei(&nd);
4661	if (error)
4662		return (error);
4663	NDFREE(&nd, NDF_ONLY_PNBUF);
4664
4665	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4666	    uap->data, uap->nbytes, td);
4667
4668	vrele(nd.ni_vp);
4669	return (error);
4670}
4671
4672/*
4673 * extattr_delete_vp(): Delete a named extended attribute on a file or
4674 *                      directory
4675 *
4676 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4677 *            kernelspace string pointer "attrname", proc "p"
4678 * Returns: 0 on success, an error number otherwise
4679 * Locks: none
4680 * References: vp must be a valid reference for the duration of the call
4681 */
4682static int
4683extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4684    struct thread *td)
4685{
4686	struct mount *mp;
4687	int error;
4688
4689	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
4690	if (error)
4691		return (error);
4692	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4693	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4694
4695#ifdef MAC
4696	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
4697	    attrname);
4698	if (error)
4699		goto done;
4700#endif
4701
4702	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
4703	    td);
4704	if (error == EOPNOTSUPP)
4705		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
4706		    td->td_ucred, td);
4707#ifdef MAC
4708done:
4709#endif
4710	VOP_UNLOCK(vp, 0, td);
4711	vn_finished_write(mp);
4712	return (error);
4713}
4714
4715int
4716extattr_delete_fd(td, uap)
4717	struct thread *td;
4718	struct extattr_delete_fd_args /* {
4719		int fd;
4720		int attrnamespace;
4721		const char *attrname;
4722	} */ *uap;
4723{
4724	struct file *fp;
4725	struct vnode *vp;
4726	char attrname[EXTATTR_MAXNAMELEN];
4727	int error;
4728
4729	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4730	if (error)
4731		return (error);
4732
4733	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4734	if (error)
4735		return (error);
4736	vp = fp->f_vnode;
4737
4738	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
4739	fdrop(fp, td);
4740	return (error);
4741}
4742
4743int
4744extattr_delete_file(td, uap)
4745	struct thread *td;
4746	struct extattr_delete_file_args /* {
4747		const char *path;
4748		int attrnamespace;
4749		const char *attrname;
4750	} */ *uap;
4751{
4752	struct nameidata nd;
4753	char attrname[EXTATTR_MAXNAMELEN];
4754	int error;
4755
4756	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4757	if (error)
4758		return(error);
4759
4760	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4761	error = namei(&nd);
4762	if (error)
4763		return(error);
4764	NDFREE(&nd, NDF_ONLY_PNBUF);
4765
4766	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4767	vrele(nd.ni_vp);
4768	return(error);
4769}
4770
4771int
4772extattr_delete_link(td, uap)
4773	struct thread *td;
4774	struct extattr_delete_link_args /* {
4775		const char *path;
4776		int attrnamespace;
4777		const char *attrname;
4778	} */ *uap;
4779{
4780	struct nameidata nd;
4781	char attrname[EXTATTR_MAXNAMELEN];
4782	int error;
4783
4784	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4785	if (error)
4786		return(error);
4787
4788	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4789	error = namei(&nd);
4790	if (error)
4791		return(error);
4792	NDFREE(&nd, NDF_ONLY_PNBUF);
4793
4794	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4795	vrele(nd.ni_vp);
4796	return(error);
4797}
4798
4799/*-
4800 * Retrieve a list of extended attributes on a file or directory.
4801 *
4802 * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
4803 *            userspace buffer pointer "data", buffer length "nbytes",
4804 *            thread "td".
4805 * Returns: 0 on success, an error number otherwise
4806 * Locks: none
4807 * References: vp must be a valid reference for the duration of the call
4808 */
4809static int
4810extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
4811    size_t nbytes, struct thread *td)
4812{
4813	struct uio auio, *auiop;
4814	size_t size, *sizep;
4815	struct iovec aiov;
4816	ssize_t cnt;
4817	int error;
4818
4819	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4820	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4821
4822	auiop = NULL;
4823	sizep = NULL;
4824	cnt = 0;
4825	if (data != NULL) {
4826		aiov.iov_base = data;
4827		aiov.iov_len = nbytes;
4828		auio.uio_iov = &aiov;
4829		auio.uio_offset = 0;
4830		if (nbytes > INT_MAX) {
4831			error = EINVAL;
4832			goto done;
4833		}
4834		auio.uio_resid = nbytes;
4835		auio.uio_rw = UIO_READ;
4836		auio.uio_segflg = UIO_USERSPACE;
4837		auio.uio_td = td;
4838		auiop = &auio;
4839		cnt = nbytes;
4840	} else
4841		sizep = &size;
4842
4843#ifdef MAC
4844	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
4845	if (error)
4846		goto done;
4847#endif
4848
4849	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
4850	    td->td_ucred, td);
4851
4852	if (auiop != NULL) {
4853		cnt -= auio.uio_resid;
4854		td->td_retval[0] = cnt;
4855	} else
4856		td->td_retval[0] = size;
4857
4858done:
4859	VOP_UNLOCK(vp, 0, td);
4860	return (error);
4861}
4862
4863
4864int
4865extattr_list_fd(td, uap)
4866	struct thread *td;
4867	struct extattr_list_fd_args /* {
4868		int fd;
4869		int attrnamespace;
4870		void *data;
4871		size_t nbytes;
4872	} */ *uap;
4873{
4874	struct file *fp;
4875	int error;
4876
4877	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
4878	if (error)
4879		return (error);
4880
4881	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
4882	    uap->nbytes, td);
4883
4884	fdrop(fp, td);
4885	return (error);
4886}
4887
4888int
4889extattr_list_file(td, uap)
4890	struct thread*td;
4891	struct extattr_list_file_args /* {
4892		const char *path;
4893		int attrnamespace;
4894		void *data;
4895		size_t nbytes;
4896	} */ *uap;
4897{
4898	struct nameidata nd;
4899	int error;
4900
4901	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4902	error = namei(&nd);
4903	if (error)
4904		return (error);
4905	NDFREE(&nd, NDF_ONLY_PNBUF);
4906
4907	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4908	    uap->nbytes, td);
4909
4910	vrele(nd.ni_vp);
4911	return (error);
4912}
4913
4914int
4915extattr_list_link(td, uap)
4916	struct thread*td;
4917	struct extattr_list_link_args /* {
4918		const char *path;
4919		int attrnamespace;
4920		void *data;
4921		size_t nbytes;
4922	} */ *uap;
4923{
4924	struct nameidata nd;
4925	int error;
4926
4927	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
4928	error = namei(&nd);
4929	if (error)
4930		return (error);
4931	NDFREE(&nd, NDF_ONLY_PNBUF);
4932
4933	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
4934	    uap->nbytes, td);
4935
4936	vrele(nd.ni_vp);
4937	return (error);
4938}
4939