vfs_syscalls.c revision 95093
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
39 * $FreeBSD: head/sys/kern/vfs_syscalls.c 95093 2002-04-20 01:37:08Z rwatson $
40 */
41
42/* For 4.3 integer FS ID compatibility */
43#include "opt_compat.h"
44#include "opt_ffs.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/sysent.h>
51#include <sys/malloc.h>
52#include <sys/mount.h>
53#include <sys/mutex.h>
54#include <sys/sysproto.h>
55#include <sys/namei.h>
56#include <sys/filedesc.h>
57#include <sys/kernel.h>
58#include <sys/fcntl.h>
59#include <sys/file.h>
60#include <sys/linker.h>
61#include <sys/stat.h>
62#include <sys/sx.h>
63#include <sys/unistd.h>
64#include <sys/vnode.h>
65#include <sys/proc.h>
66#include <sys/dirent.h>
67#include <sys/extattr.h>
68#include <sys/jail.h>
69#include <sys/sysctl.h>
70
71#include <machine/limits.h>
72#include <machine/stdarg.h>
73
74#include <vm/vm.h>
75#include <vm/vm_object.h>
76#include <vm/vm_page.h>
77#include <vm/uma.h>
78
79static int change_dir(struct nameidata *ndp, struct thread *td);
80static void checkdirs(struct vnode *olddp, struct vnode *newdp);
81static int chroot_refuse_vdir_fds(struct filedesc *fdp);
82static int getutimes(const struct timeval *, struct timespec *);
83static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
84static int setfmode(struct thread *td, struct vnode *, int);
85static int setfflags(struct thread *td, struct vnode *, int);
86static int setutimes(struct thread *td, struct vnode *,
87    const struct timespec *, int);
88static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
89    struct thread *td);
90static void vfs_freeopts(struct vfsoptlist *opt);
91static int vfs_nmount(struct thread *td, int, struct uio *);
92
93static int	usermount = 0;	/* if 1, non-root can mount fs. */
94
95int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
96
97SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
98
99/*
100 * Virtual File System System Calls
101 */
102
103#ifndef _SYS_SYSPROTO_H_
104struct nmount_args {
105	struct iovec    *iovp;
106	unsigned int    iovcnt;
107	int             flags;
108};
109#endif
110/* ARGSUSED */
111int
112nmount(td, uap)
113	struct thread *td;
114	struct nmount_args /* {
115		syscallarg(struct iovec *) iovp;
116		syscallarg(unsigned int) iovcnt;
117		syscallarg(int) flags;
118	} */ *uap;
119{
120	struct uio auio;
121	struct iovec *iov, *needfree;
122	struct iovec aiov[UIO_SMALLIOV];
123	long error, i;
124	u_int iovlen, iovcnt;
125
126	iovcnt = SCARG(uap, iovcnt);
127	iovlen = iovcnt * sizeof (struct iovec);
128	/*
129	 * Check that we have an even number of iovec's
130	 * and that we have at least two options.
131	 */
132	if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
133		return (EINVAL);
134
135	if (iovcnt > UIO_SMALLIOV) {
136		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
137		needfree = iov;
138	} else {
139		iov = aiov;
140		needfree = NULL;
141	}
142	auio.uio_iov = iov;
143	auio.uio_iovcnt = iovcnt;
144	auio.uio_rw = UIO_WRITE;
145	auio.uio_segflg = UIO_USERSPACE;
146	auio.uio_td = td;
147	auio.uio_offset = 0;
148	auio.uio_resid = 0;
149	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
150		goto finish;
151	for (i = 0; i < iovcnt; i++) {
152		if (iov->iov_len > INT_MAX - auio.uio_resid) {
153			error = EINVAL;
154			goto finish;
155		}
156		auio.uio_resid += iov->iov_len;
157		iov++;
158	}
159	error = vfs_nmount(td, SCARG(uap, flags), &auio);
160finish:
161	if (needfree != NULL)
162		free(needfree, M_TEMP);
163	return (error);
164}
165
166/*
167 * Release all resources related to the
168 * mount options.
169 */
170static void
171vfs_freeopts(struct vfsoptlist *opt)
172{
173	free(opt->opt, M_MOUNT);
174	free(opt->optbuf, M_MOUNT);
175	free(opt, M_MOUNT);
176}
177
178int
179kernel_mount(iovp, iovcnt, flags)
180	struct iovec *iovp;
181	unsigned int iovcnt;
182	int flags;
183{
184	struct uio auio;
185	struct iovec *iov;
186	int error, i;
187
188	/*
189	 * Check that we have an even number of iovec's
190	 * and that we have at least two options.
191	 */
192	if ((iovcnt & 1) || (iovcnt < 4))
193		return (EINVAL);
194
195	auio.uio_iov = iovp;
196	auio.uio_iovcnt = iovcnt;
197	auio.uio_rw = UIO_WRITE;
198	auio.uio_segflg = UIO_SYSSPACE;
199	auio.uio_offset = 0;
200	auio.uio_td = NULL;
201	auio.uio_resid = 0;
202	iov = iovp;
203	for (i = 0; i < iovcnt; i++) {
204		if (iov->iov_len > INT_MAX - auio.uio_resid) {
205			return (EINVAL);
206		}
207		auio.uio_resid += iov->iov_len;
208		iov++;
209	}
210
211	error = vfs_nmount(curthread, flags, &auio);
212	return (error);
213}
214
215int
216kernel_vmount(int flags, ...)
217{
218	struct iovec *iovp;
219	struct uio auio;
220	va_list ap;
221	unsigned int iovcnt, iovlen, len;
222	const char *cp;
223	char *buf, *pos;
224	size_t n;
225	int error, i;
226
227	len = 0;
228	va_start(ap, flags);
229	for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
230		len += strlen(cp) + 1;
231	va_end(ap);
232
233	if (iovcnt < 4 || iovcnt & 1)
234		return (EINVAL);
235
236	iovlen = iovcnt * sizeof (struct iovec);
237	MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
238	MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
239	pos = buf;
240	va_start(ap, flags);
241	for (i = 0; i < iovcnt; i++) {
242		cp = va_arg(ap, const char *);
243		copystr(cp, pos, len - (pos - buf), &n);
244		iovp[i].iov_base = pos;
245		iovp[i].iov_len = n;
246		pos += n;
247	}
248	va_end(ap);
249
250	auio.uio_iov = iovp;
251	auio.uio_iovcnt = iovcnt;
252	auio.uio_rw = UIO_WRITE;
253	auio.uio_segflg = UIO_SYSSPACE;
254	auio.uio_offset = 0;
255	auio.uio_td = NULL;
256	auio.uio_resid = len;
257
258	error = vfs_nmount(curthread, flags, &auio);
259	FREE(iovp, M_MOUNT);
260	FREE(buf, M_MOUNT);
261	return (error);
262}
263
264/*
265 * vfs_nmount(): actually attempt a filesystem mount.
266 */
267static int
268vfs_nmount(td, fsflags, fsoptions)
269	struct thread *td;
270	int fsflags;		/* Flags common to all filesystems. */
271	struct uio *fsoptions;	/* Options local to the filesystem. */
272{
273	linker_file_t lf;
274	struct vnode *vp;
275	struct mount *mp;
276	struct vfsconf *vfsp;
277	struct iovec *cur;
278	struct vfsoptlist *optlist;
279	struct vfsopt *opt;
280	char *buf, *fstype, *fspath;
281	int error, flag = 0, kern_flag = 0, i, len, optcnt;
282	int offset, iovcnt, fstypelen, fspathlen;
283	struct vattr va;
284	struct nameidata nd;
285
286	/*
287	 * Allocate memory to hold the vfsopt structures.
288	 */
289	iovcnt = fsoptions->uio_iovcnt;
290	optcnt = iovcnt >> 1;
291	opt = malloc(sizeof (struct vfsopt) * optcnt,
292	    M_MOUNT, M_WAITOK | M_ZERO);
293
294	/*
295	 * Count the size of the buffer for options,
296	 * allocate it, and fill in the vfsopt structures.
297	 */
298	cur = fsoptions->uio_iov;
299	len = fsoptions->uio_resid;
300	buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
301
302	optlist = malloc(sizeof (struct vfsoptlist), M_MOUNT, M_WAITOK);
303	optlist->opt = opt;
304	optlist->optbuf = buf;
305	optlist->optcnt = optcnt;
306
307	offset = i = 0;
308	cur = fsoptions->uio_iov;
309	while (i < optcnt) {
310		opt[i].name = buf + offset;
311		/* Ensure the name of an option is a string. */
312		if (opt[i].name[cur->iov_len - 1] != '\0') {
313			error = EINVAL;
314			goto bad;
315		}
316		offset += cur->iov_len;
317		cur++;
318		opt[i].len = cur->iov_len;
319		/*
320		 * Prevent consumers from trying to
321		 * read the value of a 0 length option
322		 * by setting it to NULL.
323		 */
324		if (opt[i].len == 0)
325			opt[i].value = NULL;
326		else
327			opt[i].value = buf + offset;
328		offset += cur->iov_len;
329		cur++; i++;
330	}
331
332	if ((error = uiomove(buf, len, fsoptions)) != 0)
333		goto bad;
334
335	/*
336	 * We need these two options before the others,
337	 * and they are mandatory for any filesystem.
338	 * Ensure they are NULL terminated as well.
339	 */
340	fstypelen = 0;
341	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
342	if (error || fstype[fstypelen - 1] != '\0') {
343		error = EINVAL;
344		goto bad;
345	}
346	fspathlen = 0;
347	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
348	if (error || fspath[fspathlen - 1] != '\0') {
349		error = EINVAL;
350		goto bad;
351	}
352
353	/*
354	 * Be ultra-paranoid about making sure the type and fspath
355	 * variables will fit in our mp buffers, including the
356	 * terminating NUL.
357	 */
358	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
359		error = ENAMETOOLONG;
360		goto bad;
361	}
362
363	if (usermount == 0) {
364	       	error = suser(td);
365		if (error)
366			goto bad;
367	}
368	/*
369	 * Do not allow NFS export by non-root users.
370	 */
371	if (fsflags & MNT_EXPORTED) {
372		error = suser(td);
373		if (error)
374			goto bad;
375	}
376	/*
377	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
378	 */
379	if (suser(td))
380		fsflags |= MNT_NOSUID | MNT_NODEV;
381	/*
382	 * Get vnode to be covered
383	 */
384	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
385	if ((error = namei(&nd)) != 0)
386		goto bad;
387	NDFREE(&nd, NDF_ONLY_PNBUF);
388	vp = nd.ni_vp;
389	if (fsflags & MNT_UPDATE) {
390		if ((vp->v_flag & VROOT) == 0) {
391			vput(vp);
392			error = EINVAL;
393			goto bad;
394		}
395		mp = vp->v_mount;
396		flag = mp->mnt_flag;
397		kern_flag = mp->mnt_kern_flag;
398		/*
399		 * We only allow the filesystem to be reloaded if it
400		 * is currently mounted read-only.
401		 */
402		if ((fsflags & MNT_RELOAD) &&
403		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
404			vput(vp);
405			error = EOPNOTSUPP;	/* Needs translation */
406			goto bad;
407		}
408		/*
409		 * Only root, or the user that did the original mount is
410		 * permitted to update it.
411		 */
412		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
413			error = suser(td);
414			if (error) {
415				vput(vp);
416				goto bad;
417			}
418		}
419		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
420			vput(vp);
421			error = EBUSY;
422			goto bad;
423		}
424		mtx_lock(&vp->v_interlock);
425		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
426			mtx_unlock(&vp->v_interlock);
427			vfs_unbusy(mp, td);
428			vput(vp);
429			error = EBUSY;
430			goto bad;
431		}
432		vp->v_flag |= VMOUNT;
433		mtx_unlock(&vp->v_interlock);
434		mp->mnt_flag |= fsflags &
435		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
436		VOP_UNLOCK(vp, 0, td);
437		mp->mnt_optnew = optlist;
438		goto update;
439	}
440	/*
441	 * If the user is not root, ensure that they own the directory
442	 * onto which we are attempting to mount.
443	 */
444	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
445	if (error) {
446		vput(vp);
447		goto bad;
448	}
449	if (va.va_uid != td->td_ucred->cr_uid) {
450		error = suser(td);
451		if (error) {
452			vput(vp);
453			goto bad;
454		}
455	}
456	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
457		vput(vp);
458		goto bad;
459	}
460	if (vp->v_type != VDIR) {
461		vput(vp);
462		error = ENOTDIR;
463		goto bad;
464	}
465	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
466		if (!strcmp(vfsp->vfc_name, fstype))
467			break;
468	if (vfsp == NULL) {
469		/* Only load modules for root (very important!). */
470		error = suser(td);
471		if (error) {
472			vput(vp);
473			goto bad;
474		}
475		error = securelevel_gt(td->td_ucred, 0);
476		if (error) {
477			vput(vp);
478			goto bad;
479		}
480		error = linker_load_file(fstype, &lf);
481		if (error || lf == NULL) {
482			vput(vp);
483			if (lf == NULL)
484				error = ENODEV;
485			goto bad;
486		}
487		lf->userrefs++;
488		/* Look up again to see if the VFS was loaded. */
489		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
490			if (!strcmp(vfsp->vfc_name, fstype))
491				break;
492		if (vfsp == NULL) {
493			lf->userrefs--;
494			linker_file_unload(lf);
495			vput(vp);
496			error = ENODEV;
497			goto bad;
498		}
499	}
500	mtx_lock(&vp->v_interlock);
501	if ((vp->v_flag & VMOUNT) != 0 ||
502	    vp->v_mountedhere != NULL) {
503		mtx_unlock(&vp->v_interlock);
504		vput(vp);
505		error = EBUSY;
506		goto bad;
507	}
508	vp->v_flag |= VMOUNT;
509	mtx_unlock(&vp->v_interlock);
510
511	/*
512	 * Allocate and initialize the filesystem.
513	 */
514	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
515	TAILQ_INIT(&mp->mnt_nvnodelist);
516	TAILQ_INIT(&mp->mnt_reservedvnlist);
517	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
518	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
519	mp->mnt_op = vfsp->vfc_vfsops;
520	mp->mnt_vfc = vfsp;
521	vfsp->vfc_refcount++;
522	mp->mnt_stat.f_type = vfsp->vfc_typenum;
523	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
524	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
525	mp->mnt_vnodecovered = vp;
526	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
527	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
528	mp->mnt_iosize_max = DFLTPHYS;
529	VOP_UNLOCK(vp, 0, td);
530
531	mp->mnt_opt = optlist;
532update:
533	/*
534	 * Check if the fs implements the new VFS_NMOUNT()
535	 * function, since the new system call was used.
536	 */
537	if (mp->mnt_op->vfs_mount != NULL) {
538		printf("%s doesn't support the new mount syscall\n",
539		    mp->mnt_vfc->vfc_name);
540		mtx_lock(&vp->v_interlock);
541		vp->v_flag &= ~VMOUNT;
542		mtx_unlock(&vp->v_interlock);
543		if (mp->mnt_flag & MNT_UPDATE)
544			vfs_unbusy(mp, td);
545		else {
546			mp->mnt_vfc->vfc_refcount--;
547			vfs_unbusy(mp, td);
548			free((caddr_t)mp, M_MOUNT);
549		}
550		vput(vp);
551		error = EOPNOTSUPP;
552		goto bad;
553	}
554
555	/*
556	 * Set the mount level flags.
557	 */
558	if (fsflags & MNT_RDONLY)
559		mp->mnt_flag |= MNT_RDONLY;
560	else if (mp->mnt_flag & MNT_RDONLY)
561		mp->mnt_kern_flag |= MNTK_WANTRDWR;
562	mp->mnt_flag &=~ MNT_UPDATEMASK;
563	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
564	/*
565	 * Mount the filesystem.
566	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
567	 * get.  No freeing of cn_pnbuf.
568	 */
569	error = VFS_NMOUNT(mp, &nd, td);
570	if (mp->mnt_flag & MNT_UPDATE) {
571		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
572			mp->mnt_flag &= ~MNT_RDONLY;
573		mp->mnt_flag &=~
574		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
575		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
576		if (error) {
577			mp->mnt_flag = flag;
578			mp->mnt_kern_flag = kern_flag;
579			vfs_freeopts(mp->mnt_optnew);
580		} else {
581			vfs_freeopts(mp->mnt_opt);
582			mp->mnt_opt = mp->mnt_optnew;
583		}
584		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
585			if (mp->mnt_syncer == NULL)
586				error = vfs_allocate_syncvnode(mp);
587		} else {
588			if (mp->mnt_syncer != NULL)
589				vrele(mp->mnt_syncer);
590			mp->mnt_syncer = NULL;
591		}
592		vfs_unbusy(mp, td);
593		mtx_lock(&vp->v_interlock);
594		vp->v_flag &= ~VMOUNT;
595		mtx_unlock(&vp->v_interlock);
596		vrele(vp);
597		return (error);
598	}
599	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
600	/*
601	 * Put the new filesystem on the mount list after root.
602	 */
603	cache_purge(vp);
604	if (!error) {
605		struct vnode *newdp;
606
607		mtx_lock(&vp->v_interlock);
608		vp->v_flag &= ~VMOUNT;
609		vp->v_mountedhere = mp;
610		mtx_unlock(&vp->v_interlock);
611		mtx_lock(&mountlist_mtx);
612		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
613		mtx_unlock(&mountlist_mtx);
614		if (VFS_ROOT(mp, &newdp))
615			panic("mount: lost mount");
616		checkdirs(vp, newdp);
617		vput(newdp);
618		VOP_UNLOCK(vp, 0, td);
619		if ((mp->mnt_flag & MNT_RDONLY) == 0)
620			error = vfs_allocate_syncvnode(mp);
621		vfs_unbusy(mp, td);
622		if ((error = VFS_START(mp, 0, td)) != 0) {
623			vrele(vp);
624			goto bad;
625		}
626	} else {
627		mtx_lock(&vp->v_interlock);
628		vp->v_flag &= ~VMOUNT;
629		mtx_unlock(&vp->v_interlock);
630		mp->mnt_vfc->vfc_refcount--;
631		vfs_unbusy(mp, td);
632		free((caddr_t)mp, M_MOUNT);
633		vput(vp);
634		goto bad;
635	}
636	return (0);
637bad:
638	vfs_freeopts(optlist);
639	return (error);
640}
641
642/*
643 * Old Mount API.
644 */
645#ifndef _SYS_SYSPROTO_H_
646struct mount_args {
647	char	*type;
648	char	*path;
649	int	flags;
650	caddr_t	data;
651};
652#endif
653/* ARGSUSED */
654int
655mount(td, uap)
656	struct thread *td;
657	struct mount_args /* {
658		syscallarg(char *) type;
659		syscallarg(char *) path;
660		syscallarg(int) flags;
661		syscallarg(caddr_t) data;
662	} */ *uap;
663{
664	char *fstype;
665	char *fspath;
666	int error;
667
668	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
669	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
670
671	/*
672	 * vfs_mount() actually takes a kernel string for `type' and
673	 * `path' now, so extract them.
674	 */
675	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
676	if (error)
677		goto finish;
678	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
679	if (error)
680		goto finish;
681	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
682	    SCARG(uap, data));
683finish:
684	free(fstype, M_TEMP);
685	free(fspath, M_TEMP);
686	return (error);
687}
688
689/*
690 * vfs_mount(): actually attempt a filesystem mount.
691 *
692 * This routine is designed to be a "generic" entry point for routines
693 * that wish to mount a filesystem. All parameters except `fsdata' are
694 * pointers into kernel space. `fsdata' is currently still a pointer
695 * into userspace.
696 */
697int
698vfs_mount(td, fstype, fspath, fsflags, fsdata)
699	struct thread *td;
700	const char *fstype;
701	char *fspath;
702	int fsflags;
703	void *fsdata;
704{
705	linker_file_t lf;
706	struct vnode *vp;
707	struct mount *mp;
708	struct vfsconf *vfsp;
709	int error, flag = 0, kern_flag = 0;
710	struct vattr va;
711	struct nameidata nd;
712
713	/*
714	 * Be ultra-paranoid about making sure the type and fspath
715	 * variables will fit in our mp buffers, including the
716	 * terminating NUL.
717	 */
718	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
719		return (ENAMETOOLONG);
720
721	if (usermount == 0) {
722		error = suser(td);
723		if (error)
724			return (error);
725	}
726	/*
727	 * Do not allow NFS export by non-root users.
728	 */
729	if (fsflags & MNT_EXPORTED) {
730		error = suser(td);
731		if (error)
732			return (error);
733	}
734	/*
735	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
736	 */
737	if (suser(td))
738		fsflags |= MNT_NOSUID | MNT_NODEV;
739	/*
740	 * Get vnode to be covered
741	 */
742	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
743	if ((error = namei(&nd)) != 0)
744		return (error);
745	NDFREE(&nd, NDF_ONLY_PNBUF);
746	vp = nd.ni_vp;
747	if (fsflags & MNT_UPDATE) {
748		if ((vp->v_flag & VROOT) == 0) {
749			vput(vp);
750			return (EINVAL);
751		}
752		mp = vp->v_mount;
753		flag = mp->mnt_flag;
754		kern_flag = mp->mnt_kern_flag;
755		/*
756		 * We only allow the filesystem to be reloaded if it
757		 * is currently mounted read-only.
758		 */
759		if ((fsflags & MNT_RELOAD) &&
760		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
761			vput(vp);
762			return (EOPNOTSUPP);	/* Needs translation */
763		}
764		/*
765		 * Only root, or the user that did the original mount is
766		 * permitted to update it.
767		 */
768		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
769			error = suser(td);
770			if (error) {
771				vput(vp);
772				return (error);
773			}
774		}
775		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
776			vput(vp);
777			return (EBUSY);
778		}
779		mtx_lock(&vp->v_interlock);
780		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
781			mtx_unlock(&vp->v_interlock);
782			vfs_unbusy(mp, td);
783			vput(vp);
784			return (EBUSY);
785		}
786		vp->v_flag |= VMOUNT;
787		mtx_unlock(&vp->v_interlock);
788		mp->mnt_flag |= fsflags &
789		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
790		VOP_UNLOCK(vp, 0, td);
791		goto update;
792	}
793	/*
794	 * If the user is not root, ensure that they own the directory
795	 * onto which we are attempting to mount.
796	 */
797	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
798	if (error) {
799		vput(vp);
800		return (error);
801	}
802	if (va.va_uid != td->td_ucred->cr_uid) {
803		error = suser(td);
804		if (error) {
805			vput(vp);
806			return (error);
807		}
808	}
809	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
810		vput(vp);
811		return (error);
812	}
813	if (vp->v_type != VDIR) {
814		vput(vp);
815		return (ENOTDIR);
816	}
817	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
818		if (!strcmp(vfsp->vfc_name, fstype))
819			break;
820	if (vfsp == NULL) {
821		/* Only load modules for root (very important!). */
822		error = suser(td);
823		if (error) {
824			vput(vp);
825			return (error);
826		}
827		error = securelevel_gt(td->td_ucred, 0);
828		if (error) {
829			vput(vp);
830			return (error);
831		}
832		error = linker_load_file(fstype, &lf);
833		if (error || lf == NULL) {
834			vput(vp);
835			if (lf == NULL)
836				error = ENODEV;
837			return (error);
838		}
839		lf->userrefs++;
840		/* Look up again to see if the VFS was loaded. */
841		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
842			if (!strcmp(vfsp->vfc_name, fstype))
843				break;
844		if (vfsp == NULL) {
845			lf->userrefs--;
846			linker_file_unload(lf);
847			vput(vp);
848			return (ENODEV);
849		}
850	}
851	mtx_lock(&vp->v_interlock);
852	if ((vp->v_flag & VMOUNT) != 0 ||
853	    vp->v_mountedhere != NULL) {
854		mtx_unlock(&vp->v_interlock);
855		vput(vp);
856		return (EBUSY);
857	}
858	vp->v_flag |= VMOUNT;
859	mtx_unlock(&vp->v_interlock);
860
861	/*
862	 * Allocate and initialize the filesystem.
863	 */
864	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
865	TAILQ_INIT(&mp->mnt_nvnodelist);
866	TAILQ_INIT(&mp->mnt_reservedvnlist);
867	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
868	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
869	mp->mnt_op = vfsp->vfc_vfsops;
870	mp->mnt_vfc = vfsp;
871	vfsp->vfc_refcount++;
872	mp->mnt_stat.f_type = vfsp->vfc_typenum;
873	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
874	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
875	mp->mnt_vnodecovered = vp;
876	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
877	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
878	mp->mnt_iosize_max = DFLTPHYS;
879	VOP_UNLOCK(vp, 0, td);
880update:
881	/*
882	 * Check if the fs implements the old VFS_MOUNT()
883	 * function, since the old system call was used.
884	 */
885	if (mp->mnt_op->vfs_mount == NULL) {
886		printf("%s doesn't support the old mount syscall\n",
887		    mp->mnt_vfc->vfc_name);
888		mtx_lock(&vp->v_interlock);
889		vp->v_flag &= ~VMOUNT;
890		mtx_unlock(&vp->v_interlock);
891		if (mp->mnt_flag & MNT_UPDATE)
892			vfs_unbusy(mp, td);
893		else {
894			mp->mnt_vfc->vfc_refcount--;
895			vfs_unbusy(mp, td);
896			free((caddr_t)mp, M_MOUNT);
897		}
898		vput(vp);
899		return (EOPNOTSUPP);
900	}
901
902	/*
903	 * Set the mount level flags.
904	 */
905	if (fsflags & MNT_RDONLY)
906		mp->mnt_flag |= MNT_RDONLY;
907	else if (mp->mnt_flag & MNT_RDONLY)
908		mp->mnt_kern_flag |= MNTK_WANTRDWR;
909	mp->mnt_flag &=~ MNT_UPDATEMASK;
910	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
911	/*
912	 * Mount the filesystem.
913	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
914	 * get.  No freeing of cn_pnbuf.
915	 */
916	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
917	if (mp->mnt_flag & MNT_UPDATE) {
918		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
919			mp->mnt_flag &= ~MNT_RDONLY;
920		mp->mnt_flag &=~
921		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
922		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
923		if (error) {
924			mp->mnt_flag = flag;
925			mp->mnt_kern_flag = kern_flag;
926		}
927		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
928			if (mp->mnt_syncer == NULL)
929				error = vfs_allocate_syncvnode(mp);
930		} else {
931			if (mp->mnt_syncer != NULL)
932				vrele(mp->mnt_syncer);
933			mp->mnt_syncer = NULL;
934		}
935		vfs_unbusy(mp, td);
936		mtx_lock(&vp->v_interlock);
937		vp->v_flag &= ~VMOUNT;
938		mtx_unlock(&vp->v_interlock);
939		vrele(vp);
940		return (error);
941	}
942	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
943	/*
944	 * Put the new filesystem on the mount list after root.
945	 */
946	cache_purge(vp);
947	if (!error) {
948		struct vnode *newdp;
949
950		mtx_lock(&vp->v_interlock);
951		vp->v_flag &= ~VMOUNT;
952		vp->v_mountedhere = mp;
953		mtx_unlock(&vp->v_interlock);
954		mtx_lock(&mountlist_mtx);
955		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
956		mtx_unlock(&mountlist_mtx);
957		if (VFS_ROOT(mp, &newdp))
958			panic("mount: lost mount");
959		checkdirs(vp, newdp);
960		vput(newdp);
961		VOP_UNLOCK(vp, 0, td);
962		if ((mp->mnt_flag & MNT_RDONLY) == 0)
963			error = vfs_allocate_syncvnode(mp);
964		vfs_unbusy(mp, td);
965		if ((error = VFS_START(mp, 0, td)) != 0)
966			vrele(vp);
967	} else {
968		mtx_lock(&vp->v_interlock);
969		vp->v_flag &= ~VMOUNT;
970		mtx_unlock(&vp->v_interlock);
971		mp->mnt_vfc->vfc_refcount--;
972		vfs_unbusy(mp, td);
973		free((caddr_t)mp, M_MOUNT);
974		vput(vp);
975	}
976	return (error);
977}
978
979/*
980 * Scan all active processes to see if any of them have a current
981 * or root directory of `olddp'. If so, replace them with the new
982 * mount point.
983 */
984static void
985checkdirs(olddp, newdp)
986	struct vnode *olddp, *newdp;
987{
988	struct filedesc *fdp;
989	struct proc *p;
990	int nrele;
991
992	if (olddp->v_usecount == 1)
993		return;
994	sx_slock(&allproc_lock);
995	LIST_FOREACH(p, &allproc, p_list) {
996		PROC_LOCK(p);
997		fdp = p->p_fd;
998		if (fdp == NULL) {
999			PROC_UNLOCK(p);
1000			continue;
1001		}
1002		nrele = 0;
1003		FILEDESC_LOCK(fdp);
1004		if (fdp->fd_cdir == olddp) {
1005			VREF(newdp);
1006			fdp->fd_cdir = newdp;
1007			nrele++;
1008		}
1009		if (fdp->fd_rdir == olddp) {
1010			VREF(newdp);
1011			fdp->fd_rdir = newdp;
1012			nrele++;
1013		}
1014		FILEDESC_UNLOCK(fdp);
1015		PROC_UNLOCK(p);
1016		while (nrele--)
1017			vrele(olddp);
1018	}
1019	sx_sunlock(&allproc_lock);
1020	if (rootvnode == olddp) {
1021		vrele(rootvnode);
1022		VREF(newdp);
1023		rootvnode = newdp;
1024	}
1025}
1026
1027/*
1028 * Unmount a file system.
1029 *
1030 * Note: unmount takes a path to the vnode mounted on as argument,
1031 * not special file (as before).
1032 */
1033#ifndef _SYS_SYSPROTO_H_
1034struct unmount_args {
1035	char	*path;
1036	int	flags;
1037};
1038#endif
1039/* ARGSUSED */
1040int
1041unmount(td, uap)
1042	struct thread *td;
1043	register struct unmount_args /* {
1044		syscallarg(char *) path;
1045		syscallarg(int) flags;
1046	} */ *uap;
1047{
1048	register struct vnode *vp;
1049	struct mount *mp;
1050	int error;
1051	struct nameidata nd;
1052
1053	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1054	    SCARG(uap, path), td);
1055	if ((error = namei(&nd)) != 0)
1056		return (error);
1057	vp = nd.ni_vp;
1058	NDFREE(&nd, NDF_ONLY_PNBUF);
1059	mp = vp->v_mount;
1060
1061	/*
1062	 * Only root, or the user that did the original mount is
1063	 * permitted to unmount this filesystem.
1064	 */
1065	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
1066		error = suser(td);
1067		if (error) {
1068			vput(vp);
1069			return (error);
1070		}
1071	}
1072
1073	/*
1074	 * Don't allow unmounting the root file system.
1075	 */
1076	if (mp->mnt_flag & MNT_ROOTFS) {
1077		vput(vp);
1078		return (EINVAL);
1079	}
1080
1081	/*
1082	 * Must be the root of the filesystem
1083	 */
1084	if ((vp->v_flag & VROOT) == 0) {
1085		vput(vp);
1086		return (EINVAL);
1087	}
1088	vput(vp);
1089	return (dounmount(mp, SCARG(uap, flags), td));
1090}
1091
1092/*
1093 * Do the actual file system unmount.
1094 */
1095int
1096dounmount(mp, flags, td)
1097	struct mount *mp;
1098	int flags;
1099	struct thread *td;
1100{
1101	struct vnode *coveredvp, *fsrootvp;
1102	int error;
1103	int async_flag;
1104
1105	mtx_lock(&mountlist_mtx);
1106	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1107		mtx_unlock(&mountlist_mtx);
1108		return (EBUSY);
1109	}
1110	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1111	/* Allow filesystems to detect that a forced unmount is in progress. */
1112	if (flags & MNT_FORCE)
1113		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1114	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1115	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1116	if (error) {
1117		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1118		if (mp->mnt_kern_flag & MNTK_MWAIT)
1119			wakeup((caddr_t)mp);
1120		return (error);
1121	}
1122	vn_start_write(NULL, &mp, V_WAIT);
1123
1124	if (mp->mnt_flag & MNT_EXPUBLIC)
1125		vfs_setpublicfs(NULL, NULL, NULL);
1126
1127	vfs_msync(mp, MNT_WAIT);
1128	async_flag = mp->mnt_flag & MNT_ASYNC;
1129	mp->mnt_flag &=~ MNT_ASYNC;
1130	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1131	if (mp->mnt_syncer != NULL)
1132		vrele(mp->mnt_syncer);
1133	/* Move process cdir/rdir refs on fs root to underlying vnode. */
1134	if (VFS_ROOT(mp, &fsrootvp) == 0) {
1135		if (mp->mnt_vnodecovered != NULL)
1136			checkdirs(fsrootvp, mp->mnt_vnodecovered);
1137		if (fsrootvp == rootvnode) {
1138			vrele(rootvnode);
1139			rootvnode = NULL;
1140		}
1141		vput(fsrootvp);
1142	}
1143	if (((mp->mnt_flag & MNT_RDONLY) ||
1144	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1145	    (flags & MNT_FORCE)) {
1146		error = VFS_UNMOUNT(mp, flags, td);
1147	}
1148	vn_finished_write(mp);
1149	if (error) {
1150		/* Undo cdir/rdir and rootvnode changes made above. */
1151		if (VFS_ROOT(mp, &fsrootvp) == 0) {
1152			if (mp->mnt_vnodecovered != NULL)
1153				checkdirs(mp->mnt_vnodecovered, fsrootvp);
1154			if (rootvnode == NULL) {
1155				rootvnode = fsrootvp;
1156				vref(rootvnode);
1157			}
1158			vput(fsrootvp);
1159		}
1160		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1161			(void) vfs_allocate_syncvnode(mp);
1162		mtx_lock(&mountlist_mtx);
1163		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1164		mp->mnt_flag |= async_flag;
1165		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1166		    &mountlist_mtx, td);
1167		if (mp->mnt_kern_flag & MNTK_MWAIT)
1168			wakeup((caddr_t)mp);
1169		return (error);
1170	}
1171	mtx_lock(&mountlist_mtx);
1172	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1173	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1174		coveredvp->v_mountedhere = NULL;
1175	mp->mnt_vfc->vfc_refcount--;
1176	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
1177		panic("unmount: dangling vnode");
1178	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
1179	lockdestroy(&mp->mnt_lock);
1180	if (coveredvp != NULL)
1181		vrele(coveredvp);
1182	if (mp->mnt_kern_flag & MNTK_MWAIT)
1183		wakeup((caddr_t)mp);
1184	if (mp->mnt_op->vfs_mount == NULL)
1185		vfs_freeopts(mp->mnt_opt);
1186	free((caddr_t)mp, M_MOUNT);
1187	return (0);
1188}
1189
1190/*
1191 * Sync each mounted filesystem.
1192 */
1193#ifndef _SYS_SYSPROTO_H_
1194struct sync_args {
1195        int     dummy;
1196};
1197#endif
1198
1199#ifdef DEBUG
1200static int syncprt = 0;
1201SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
1202#endif
1203
1204/* ARGSUSED */
1205int
1206sync(td, uap)
1207	struct thread *td;
1208	struct sync_args *uap;
1209{
1210	struct mount *mp, *nmp;
1211	int asyncflag;
1212
1213	mtx_lock(&mountlist_mtx);
1214	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1215		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
1216			nmp = TAILQ_NEXT(mp, mnt_list);
1217			continue;
1218		}
1219		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
1220		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
1221			asyncflag = mp->mnt_flag & MNT_ASYNC;
1222			mp->mnt_flag &= ~MNT_ASYNC;
1223			vfs_msync(mp, MNT_NOWAIT);
1224			VFS_SYNC(mp, MNT_NOWAIT,
1225			    ((td != NULL) ? td->td_ucred : NOCRED), td);
1226			mp->mnt_flag |= asyncflag;
1227			vn_finished_write(mp);
1228		}
1229		mtx_lock(&mountlist_mtx);
1230		nmp = TAILQ_NEXT(mp, mnt_list);
1231		vfs_unbusy(mp, td);
1232	}
1233	mtx_unlock(&mountlist_mtx);
1234#if 0
1235/*
1236 * XXX don't call vfs_bufstats() yet because that routine
1237 * was not imported in the Lite2 merge.
1238 */
1239#ifdef DIAGNOSTIC
1240	if (syncprt)
1241		vfs_bufstats();
1242#endif /* DIAGNOSTIC */
1243#endif
1244	return (0);
1245}
1246
1247/* XXX PRISON: could be per prison flag */
1248static int prison_quotas;
1249#if 0
1250SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
1251#endif
1252
1253/*
1254 * Change filesystem quotas.
1255 */
1256#ifndef _SYS_SYSPROTO_H_
1257struct quotactl_args {
1258	char *path;
1259	int cmd;
1260	int uid;
1261	caddr_t arg;
1262};
1263#endif
1264/* ARGSUSED */
1265int
1266quotactl(td, uap)
1267	struct thread *td;
1268	register struct quotactl_args /* {
1269		syscallarg(char *) path;
1270		syscallarg(int) cmd;
1271		syscallarg(int) uid;
1272		syscallarg(caddr_t) arg;
1273	} */ *uap;
1274{
1275	struct mount *mp;
1276	int error;
1277	struct nameidata nd;
1278
1279	if (jailed(td->td_ucred) && !prison_quotas)
1280		return (EPERM);
1281	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1282	if ((error = namei(&nd)) != 0)
1283		return (error);
1284	NDFREE(&nd, NDF_ONLY_PNBUF);
1285	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
1286	vrele(nd.ni_vp);
1287	if (error)
1288		return (error);
1289	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
1290	    SCARG(uap, arg), td);
1291	vn_finished_write(mp);
1292	return (error);
1293}
1294
1295/*
1296 * Get filesystem statistics.
1297 */
1298#ifndef _SYS_SYSPROTO_H_
1299struct statfs_args {
1300	char *path;
1301	struct statfs *buf;
1302};
1303#endif
1304/* ARGSUSED */
1305int
1306statfs(td, uap)
1307	struct thread *td;
1308	register struct statfs_args /* {
1309		syscallarg(char *) path;
1310		syscallarg(struct statfs *) buf;
1311	} */ *uap;
1312{
1313	register struct mount *mp;
1314	register struct statfs *sp;
1315	int error;
1316	struct nameidata nd;
1317	struct statfs sb;
1318
1319	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1320	if ((error = namei(&nd)) != 0)
1321		return (error);
1322	mp = nd.ni_vp->v_mount;
1323	sp = &mp->mnt_stat;
1324	NDFREE(&nd, NDF_ONLY_PNBUF);
1325	vrele(nd.ni_vp);
1326	error = VFS_STATFS(mp, sp, td);
1327	if (error)
1328		return (error);
1329	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1330	if (suser(td)) {
1331		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
1332		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
1333		sp = &sb;
1334	}
1335	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
1336}
1337
1338/*
1339 * Get filesystem statistics.
1340 */
1341#ifndef _SYS_SYSPROTO_H_
1342struct fstatfs_args {
1343	int fd;
1344	struct statfs *buf;
1345};
1346#endif
1347/* ARGSUSED */
1348int
1349fstatfs(td, uap)
1350	struct thread *td;
1351	register struct fstatfs_args /* {
1352		syscallarg(int) fd;
1353		syscallarg(struct statfs *) buf;
1354	} */ *uap;
1355{
1356	struct file *fp;
1357	struct mount *mp;
1358	register struct statfs *sp;
1359	int error;
1360	struct statfs sb;
1361
1362	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
1363		return (error);
1364	mp = ((struct vnode *)fp->f_data)->v_mount;
1365	fdrop(fp, td);
1366	if (mp == NULL)
1367		return (EBADF);
1368	sp = &mp->mnt_stat;
1369	error = VFS_STATFS(mp, sp, td);
1370	if (error)
1371		return (error);
1372	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1373	if (suser(td)) {
1374		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
1375		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
1376		sp = &sb;
1377	}
1378	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
1379}
1380
1381/*
1382 * Get statistics on all filesystems.
1383 */
1384#ifndef _SYS_SYSPROTO_H_
1385struct getfsstat_args {
1386	struct statfs *buf;
1387	long bufsize;
1388	int flags;
1389};
1390#endif
1391int
1392getfsstat(td, uap)
1393	struct thread *td;
1394	register struct getfsstat_args /* {
1395		syscallarg(struct statfs *) buf;
1396		syscallarg(long) bufsize;
1397		syscallarg(int) flags;
1398	} */ *uap;
1399{
1400	register struct mount *mp, *nmp;
1401	register struct statfs *sp;
1402	caddr_t sfsp;
1403	long count, maxcount, error;
1404
1405	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
1406	sfsp = (caddr_t)SCARG(uap, buf);
1407	count = 0;
1408	mtx_lock(&mountlist_mtx);
1409	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1410		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
1411			nmp = TAILQ_NEXT(mp, mnt_list);
1412			continue;
1413		}
1414		if (sfsp && count < maxcount) {
1415			sp = &mp->mnt_stat;
1416			/*
1417			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1418			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
1419			 * overrides MNT_WAIT.
1420			 */
1421			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
1422			    (SCARG(uap, flags) & MNT_WAIT)) &&
1423			    (error = VFS_STATFS(mp, sp, td))) {
1424				mtx_lock(&mountlist_mtx);
1425				nmp = TAILQ_NEXT(mp, mnt_list);
1426				vfs_unbusy(mp, td);
1427				continue;
1428			}
1429			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1430			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
1431			if (error) {
1432				vfs_unbusy(mp, td);
1433				return (error);
1434			}
1435			sfsp += sizeof(*sp);
1436		}
1437		count++;
1438		mtx_lock(&mountlist_mtx);
1439		nmp = TAILQ_NEXT(mp, mnt_list);
1440		vfs_unbusy(mp, td);
1441	}
1442	mtx_unlock(&mountlist_mtx);
1443	if (sfsp && count > maxcount)
1444		td->td_retval[0] = maxcount;
1445	else
1446		td->td_retval[0] = count;
1447	return (0);
1448}
1449
1450/*
1451 * Change current working directory to a given file descriptor.
1452 */
1453#ifndef _SYS_SYSPROTO_H_
1454struct fchdir_args {
1455	int	fd;
1456};
1457#endif
1458/* ARGSUSED */
1459int
1460fchdir(td, uap)
1461	struct thread *td;
1462	struct fchdir_args /* {
1463		syscallarg(int) fd;
1464	} */ *uap;
1465{
1466	register struct filedesc *fdp = td->td_proc->p_fd;
1467	struct vnode *vp, *tdp, *vpold;
1468	struct mount *mp;
1469	struct file *fp;
1470	int error;
1471
1472	if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1473		return (error);
1474	vp = (struct vnode *)fp->f_data;
1475	VREF(vp);
1476	fdrop(fp, td);
1477	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1478	if (vp->v_type != VDIR)
1479		error = ENOTDIR;
1480	else
1481		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1482	while (!error && (mp = vp->v_mountedhere) != NULL) {
1483		if (vfs_busy(mp, 0, 0, td))
1484			continue;
1485		error = VFS_ROOT(mp, &tdp);
1486		vfs_unbusy(mp, td);
1487		if (error)
1488			break;
1489		vput(vp);
1490		vp = tdp;
1491	}
1492	if (error) {
1493		vput(vp);
1494		return (error);
1495	}
1496	VOP_UNLOCK(vp, 0, td);
1497	FILEDESC_LOCK(fdp);
1498	vpold = fdp->fd_cdir;
1499	fdp->fd_cdir = vp;
1500	FILEDESC_UNLOCK(fdp);
1501	vrele(vpold);
1502	return (0);
1503}
1504
1505/*
1506 * Change current working directory (``.'').
1507 */
1508#ifndef _SYS_SYSPROTO_H_
1509struct chdir_args {
1510	char	*path;
1511};
1512#endif
1513/* ARGSUSED */
1514int
1515chdir(td, uap)
1516	struct thread *td;
1517	struct chdir_args /* {
1518		syscallarg(char *) path;
1519	} */ *uap;
1520{
1521	register struct filedesc *fdp = td->td_proc->p_fd;
1522	int error;
1523	struct nameidata nd;
1524	struct vnode *vp;
1525
1526	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1527	    SCARG(uap, path), td);
1528	if ((error = change_dir(&nd, td)) != 0)
1529		return (error);
1530	NDFREE(&nd, NDF_ONLY_PNBUF);
1531	FILEDESC_LOCK(fdp);
1532	vp = fdp->fd_cdir;
1533	fdp->fd_cdir = nd.ni_vp;
1534	FILEDESC_UNLOCK(fdp);
1535	vrele(vp);
1536	return (0);
1537}
1538
1539/*
1540 * Helper function for raised chroot(2) security function:  Refuse if
1541 * any filedescriptors are open directories.
1542 */
1543static int
1544chroot_refuse_vdir_fds(fdp)
1545	struct filedesc *fdp;
1546{
1547	struct vnode *vp;
1548	struct file *fp;
1549	int fd;
1550
1551	FILEDESC_LOCK(fdp);
1552	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
1553		fp = fget_locked(fdp, fd);
1554		if (fp == NULL)
1555			continue;
1556		if (fp->f_type == DTYPE_VNODE) {
1557			vp = (struct vnode *)fp->f_data;
1558			if (vp->v_type == VDIR) {
1559				FILEDESC_UNLOCK(fdp);
1560				return (EPERM);
1561			}
1562		}
1563	}
1564	FILEDESC_UNLOCK(fdp);
1565	return (0);
1566}
1567
1568/*
1569 * This sysctl determines if we will allow a process to chroot(2) if it
1570 * has a directory open:
1571 *	0: disallowed for all processes.
1572 *	1: allowed for processes that were not already chroot(2)'ed.
1573 *	2: allowed for all processes.
1574 */
1575
1576static int chroot_allow_open_directories = 1;
1577
1578SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
1579     &chroot_allow_open_directories, 0, "");
1580
1581/*
1582 * Change notion of root (``/'') directory.
1583 */
1584#ifndef _SYS_SYSPROTO_H_
1585struct chroot_args {
1586	char	*path;
1587};
1588#endif
1589/* ARGSUSED */
1590int
1591chroot(td, uap)
1592	struct thread *td;
1593	struct chroot_args /* {
1594		syscallarg(char *) path;
1595	} */ *uap;
1596{
1597	register struct filedesc *fdp = td->td_proc->p_fd;
1598	int error;
1599	struct nameidata nd;
1600	struct vnode *vp;
1601
1602	error = suser_cred(td->td_ucred, PRISON_ROOT);
1603	if (error)
1604		return (error);
1605	FILEDESC_LOCK(fdp);
1606	if (chroot_allow_open_directories == 0 ||
1607	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
1608		FILEDESC_UNLOCK(fdp);
1609		error = chroot_refuse_vdir_fds(fdp);
1610	} else
1611		FILEDESC_UNLOCK(fdp);
1612	if (error)
1613		return (error);
1614	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1615	    SCARG(uap, path), td);
1616	if ((error = change_dir(&nd, td)) != 0)
1617		return (error);
1618	NDFREE(&nd, NDF_ONLY_PNBUF);
1619	FILEDESC_LOCK(fdp);
1620	vp = fdp->fd_rdir;
1621	fdp->fd_rdir = nd.ni_vp;
1622	if (!fdp->fd_jdir) {
1623		fdp->fd_jdir = nd.ni_vp;
1624                VREF(fdp->fd_jdir);
1625	}
1626	FILEDESC_UNLOCK(fdp);
1627	vrele(vp);
1628	return (0);
1629}
1630
1631/*
1632 * Common routine for chroot and chdir.
1633 */
1634static int
1635change_dir(ndp, td)
1636	register struct nameidata *ndp;
1637	struct thread *td;
1638{
1639	struct vnode *vp;
1640	int error;
1641
1642	error = namei(ndp);
1643	if (error)
1644		return (error);
1645	vp = ndp->ni_vp;
1646	if (vp->v_type != VDIR)
1647		error = ENOTDIR;
1648	else
1649		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
1650	if (error)
1651		vput(vp);
1652	else
1653		VOP_UNLOCK(vp, 0, td);
1654	return (error);
1655}
1656
1657/*
1658 * Check permissions, allocate an open file structure,
1659 * and call the device open routine if any.
1660 */
1661#ifndef _SYS_SYSPROTO_H_
1662struct open_args {
1663	char	*path;
1664	int	flags;
1665	int	mode;
1666};
1667#endif
1668int
1669open(td, uap)
1670	struct thread *td;
1671	register struct open_args /* {
1672		syscallarg(char *) path;
1673		syscallarg(int) flags;
1674		syscallarg(int) mode;
1675	} */ *uap;
1676{
1677	struct proc *p = td->td_proc;
1678	struct filedesc *fdp = p->p_fd;
1679	struct file *fp;
1680	struct vnode *vp;
1681	struct vattr vat;
1682	struct mount *mp;
1683	int cmode, flags, oflags;
1684	struct file *nfp;
1685	int type, indx, error;
1686	struct flock lf;
1687	struct nameidata nd;
1688
1689	oflags = SCARG(uap, flags);
1690	if ((oflags & O_ACCMODE) == O_ACCMODE)
1691		return (EINVAL);
1692	flags = FFLAGS(oflags);
1693	error = falloc(td, &nfp, &indx);
1694	if (error)
1695		return (error);
1696	fp = nfp;
1697	FILEDESC_LOCK(fdp);
1698	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
1699	FILEDESC_UNLOCK(fdp);
1700	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
1701	td->td_dupfd = -indx - 1;		/* XXX check for fdopen */
1702	/*
1703	 * Bump the ref count to prevent another process from closing
1704	 * the descriptor while we are blocked in vn_open()
1705	 */
1706	fhold(fp);
1707	error = vn_open(&nd, &flags, cmode);
1708	if (error) {
1709		/*
1710		 * release our own reference
1711		 */
1712		fdrop(fp, td);
1713
1714		/*
1715		 * handle special fdopen() case.  bleh.  dupfdopen() is
1716		 * responsible for dropping the old contents of ofiles[indx]
1717		 * if it succeeds.
1718		 */
1719		if ((error == ENODEV || error == ENXIO) &&
1720		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
1721		    (error =
1722			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
1723			td->td_retval[0] = indx;
1724			return (0);
1725		}
1726		/*
1727		 * Clean up the descriptor, but only if another thread hadn't
1728		 * replaced or closed it.
1729		 */
1730		FILEDESC_LOCK(fdp);
1731		if (fdp->fd_ofiles[indx] == fp) {
1732			fdp->fd_ofiles[indx] = NULL;
1733			FILEDESC_UNLOCK(fdp);
1734			fdrop(fp, td);
1735		} else
1736			FILEDESC_UNLOCK(fdp);
1737
1738		if (error == ERESTART)
1739			error = EINTR;
1740		return (error);
1741	}
1742	td->td_dupfd = 0;
1743	NDFREE(&nd, NDF_ONLY_PNBUF);
1744	vp = nd.ni_vp;
1745
1746	/*
1747	 * There should be 2 references on the file, one from the descriptor
1748	 * table, and one for us.
1749	 *
1750	 * Handle the case where someone closed the file (via its file
1751	 * descriptor) while we were blocked.  The end result should look
1752	 * like opening the file succeeded but it was immediately closed.
1753	 */
1754	FILEDESC_LOCK(fdp);
1755	FILE_LOCK(fp);
1756	if (fp->f_count == 1) {
1757		KASSERT(fdp->fd_ofiles[indx] != fp,
1758		    ("Open file descriptor lost all refs"));
1759		FILEDESC_UNLOCK(fdp);
1760		FILE_UNLOCK(fp);
1761		VOP_UNLOCK(vp, 0, td);
1762		vn_close(vp, flags & FMASK, fp->f_cred, td);
1763		fdrop(fp, td);
1764		td->td_retval[0] = indx;
1765		return 0;
1766	}
1767
1768	fp->f_data = (caddr_t)vp;
1769	fp->f_flag = flags & FMASK;
1770	fp->f_ops = &vnops;
1771	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
1772	FILEDESC_UNLOCK(fdp);
1773	FILE_UNLOCK(fp);
1774	VOP_UNLOCK(vp, 0, td);
1775	if (flags & (O_EXLOCK | O_SHLOCK)) {
1776		lf.l_whence = SEEK_SET;
1777		lf.l_start = 0;
1778		lf.l_len = 0;
1779		if (flags & O_EXLOCK)
1780			lf.l_type = F_WRLCK;
1781		else
1782			lf.l_type = F_RDLCK;
1783		type = F_FLOCK;
1784		if ((flags & FNONBLOCK) == 0)
1785			type |= F_WAIT;
1786		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0)
1787			goto bad;
1788		fp->f_flag |= FHASLOCK;
1789	}
1790	if (flags & O_TRUNC) {
1791		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
1792			goto bad;
1793		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
1794		VATTR_NULL(&vat);
1795		vat.va_size = 0;
1796		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1797		error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
1798		VOP_UNLOCK(vp, 0, td);
1799		vn_finished_write(mp);
1800		if (error)
1801			goto bad;
1802	}
1803	/* assert that vn_open created a backing object if one is needed */
1804	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
1805		("open: vmio vnode has no backing object after vn_open"));
1806	/*
1807	 * Release our private reference, leaving the one associated with
1808	 * the descriptor table intact.
1809	 */
1810	fdrop(fp, td);
1811	td->td_retval[0] = indx;
1812	return (0);
1813bad:
1814	FILEDESC_LOCK(fdp);
1815	if (fdp->fd_ofiles[indx] == fp) {
1816		fdp->fd_ofiles[indx] = NULL;
1817		FILEDESC_UNLOCK(fdp);
1818		fdrop(fp, td);
1819	} else
1820		FILEDESC_UNLOCK(fdp);
1821	return (error);
1822}
1823
1824#ifdef COMPAT_43
1825/*
1826 * Create a file.
1827 */
1828#ifndef _SYS_SYSPROTO_H_
1829struct ocreat_args {
1830	char	*path;
1831	int	mode;
1832};
1833#endif
1834int
1835ocreat(td, uap)
1836	struct thread *td;
1837	register struct ocreat_args /* {
1838		syscallarg(char *) path;
1839		syscallarg(int) mode;
1840	} */ *uap;
1841{
1842	struct open_args /* {
1843		syscallarg(char *) path;
1844		syscallarg(int) flags;
1845		syscallarg(int) mode;
1846	} */ nuap;
1847
1848	SCARG(&nuap, path) = SCARG(uap, path);
1849	SCARG(&nuap, mode) = SCARG(uap, mode);
1850	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
1851	return (open(td, &nuap));
1852}
1853#endif /* COMPAT_43 */
1854
1855/*
1856 * Create a special file.
1857 */
1858#ifndef _SYS_SYSPROTO_H_
1859struct mknod_args {
1860	char	*path;
1861	int	mode;
1862	int	dev;
1863};
1864#endif
1865/* ARGSUSED */
1866int
1867mknod(td, uap)
1868	struct thread *td;
1869	register struct mknod_args /* {
1870		syscallarg(char *) path;
1871		syscallarg(int) mode;
1872		syscallarg(int) dev;
1873	} */ *uap;
1874{
1875	struct vnode *vp;
1876	struct mount *mp;
1877	struct vattr vattr;
1878	int error;
1879	int whiteout = 0;
1880	struct nameidata nd;
1881
1882	switch (SCARG(uap, mode) & S_IFMT) {
1883	case S_IFCHR:
1884	case S_IFBLK:
1885		error = suser(td);
1886		break;
1887	default:
1888		error = suser_cred(td->td_ucred, PRISON_ROOT);
1889		break;
1890	}
1891	if (error)
1892		return (error);
1893restart:
1894	bwillwrite();
1895	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1896	if ((error = namei(&nd)) != 0)
1897		return (error);
1898	vp = nd.ni_vp;
1899	if (vp != NULL) {
1900		vrele(vp);
1901		error = EEXIST;
1902	} else {
1903		VATTR_NULL(&vattr);
1904		FILEDESC_LOCK(td->td_proc->p_fd);
1905		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1906		FILEDESC_UNLOCK(td->td_proc->p_fd);
1907		vattr.va_rdev = SCARG(uap, dev);
1908		whiteout = 0;
1909
1910		switch (SCARG(uap, mode) & S_IFMT) {
1911		case S_IFMT:	/* used by badsect to flag bad sectors */
1912			vattr.va_type = VBAD;
1913			break;
1914		case S_IFCHR:
1915			vattr.va_type = VCHR;
1916			break;
1917		case S_IFBLK:
1918			vattr.va_type = VBLK;
1919			break;
1920		case S_IFWHT:
1921			whiteout = 1;
1922			break;
1923		default:
1924			error = EINVAL;
1925			break;
1926		}
1927	}
1928	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1929		NDFREE(&nd, NDF_ONLY_PNBUF);
1930		vput(nd.ni_dvp);
1931		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1932			return (error);
1933		goto restart;
1934	}
1935	if (!error) {
1936		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
1937		if (whiteout)
1938			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1939		else {
1940			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1941						&nd.ni_cnd, &vattr);
1942			if (error == 0)
1943				vput(nd.ni_vp);
1944		}
1945	}
1946	NDFREE(&nd, NDF_ONLY_PNBUF);
1947	vput(nd.ni_dvp);
1948	vn_finished_write(mp);
1949	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
1950	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
1951	return (error);
1952}
1953
1954/*
1955 * Create a named pipe.
1956 */
1957#ifndef _SYS_SYSPROTO_H_
1958struct mkfifo_args {
1959	char	*path;
1960	int	mode;
1961};
1962#endif
1963/* ARGSUSED */
1964int
1965mkfifo(td, uap)
1966	struct thread *td;
1967	register struct mkfifo_args /* {
1968		syscallarg(char *) path;
1969		syscallarg(int) mode;
1970	} */ *uap;
1971{
1972	struct mount *mp;
1973	struct vattr vattr;
1974	int error;
1975	struct nameidata nd;
1976
1977restart:
1978	bwillwrite();
1979	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
1980	if ((error = namei(&nd)) != 0)
1981		return (error);
1982	if (nd.ni_vp != NULL) {
1983		NDFREE(&nd, NDF_ONLY_PNBUF);
1984		vrele(nd.ni_vp);
1985		vput(nd.ni_dvp);
1986		return (EEXIST);
1987	}
1988	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1989		NDFREE(&nd, NDF_ONLY_PNBUF);
1990		vput(nd.ni_dvp);
1991		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
1992			return (error);
1993		goto restart;
1994	}
1995	VATTR_NULL(&vattr);
1996	vattr.va_type = VFIFO;
1997	FILEDESC_LOCK(td->td_proc->p_fd);
1998	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask;
1999	FILEDESC_UNLOCK(td->td_proc->p_fd);
2000	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2001	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2002	if (error == 0)
2003		vput(nd.ni_vp);
2004	NDFREE(&nd, NDF_ONLY_PNBUF);
2005	vput(nd.ni_dvp);
2006	vn_finished_write(mp);
2007	return (error);
2008}
2009
2010/*
2011 * Make a hard file link.
2012 */
2013#ifndef _SYS_SYSPROTO_H_
2014struct link_args {
2015	char	*path;
2016	char	*link;
2017};
2018#endif
2019/* ARGSUSED */
2020int
2021link(td, uap)
2022	struct thread *td;
2023	register struct link_args /* {
2024		syscallarg(char *) path;
2025		syscallarg(char *) link;
2026	} */ *uap;
2027{
2028	struct vnode *vp;
2029	struct mount *mp;
2030	struct nameidata nd;
2031	int error;
2032
2033	bwillwrite();
2034	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td);
2035	if ((error = namei(&nd)) != 0)
2036		return (error);
2037	NDFREE(&nd, NDF_ONLY_PNBUF);
2038	vp = nd.ni_vp;
2039	if (vp->v_type == VDIR) {
2040		vrele(vp);
2041		return (EPERM);		/* POSIX */
2042	}
2043	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
2044		vrele(vp);
2045		return (error);
2046	}
2047	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
2048	if ((error = namei(&nd)) == 0) {
2049		if (nd.ni_vp != NULL) {
2050			vrele(nd.ni_vp);
2051			error = EEXIST;
2052		} else {
2053			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2054			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2055			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2056		}
2057		NDFREE(&nd, NDF_ONLY_PNBUF);
2058		vput(nd.ni_dvp);
2059	}
2060	vrele(vp);
2061	vn_finished_write(mp);
2062	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
2063	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
2064	return (error);
2065}
2066
2067/*
2068 * Make a symbolic link.
2069 */
2070#ifndef _SYS_SYSPROTO_H_
2071struct symlink_args {
2072	char	*path;
2073	char	*link;
2074};
2075#endif
2076/* ARGSUSED */
2077int
2078symlink(td, uap)
2079	struct thread *td;
2080	register struct symlink_args /* {
2081		syscallarg(char *) path;
2082		syscallarg(char *) link;
2083	} */ *uap;
2084{
2085	struct mount *mp;
2086	struct vattr vattr;
2087	char *path;
2088	int error;
2089	struct nameidata nd;
2090
2091	path = uma_zalloc(namei_zone, M_WAITOK);
2092	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
2093		goto out;
2094restart:
2095	bwillwrite();
2096	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td);
2097	if ((error = namei(&nd)) != 0)
2098		goto out;
2099	if (nd.ni_vp) {
2100		NDFREE(&nd, NDF_ONLY_PNBUF);
2101		vrele(nd.ni_vp);
2102		vput(nd.ni_dvp);
2103		error = EEXIST;
2104		goto out;
2105	}
2106	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2107		NDFREE(&nd, NDF_ONLY_PNBUF);
2108		vput(nd.ni_dvp);
2109		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2110			return (error);
2111		goto restart;
2112	}
2113	VATTR_NULL(&vattr);
2114	FILEDESC_LOCK(td->td_proc->p_fd);
2115	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
2116	FILEDESC_UNLOCK(td->td_proc->p_fd);
2117	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2118	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2119	NDFREE(&nd, NDF_ONLY_PNBUF);
2120	if (error == 0)
2121		vput(nd.ni_vp);
2122	vput(nd.ni_dvp);
2123	vn_finished_write(mp);
2124	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
2125	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
2126out:
2127	uma_zfree(namei_zone, path);
2128	return (error);
2129}
2130
2131/*
2132 * Delete a whiteout from the filesystem.
2133 */
2134/* ARGSUSED */
2135int
2136undelete(td, uap)
2137	struct thread *td;
2138	register struct undelete_args /* {
2139		syscallarg(char *) path;
2140	} */ *uap;
2141{
2142	int error;
2143	struct mount *mp;
2144	struct nameidata nd;
2145
2146restart:
2147	bwillwrite();
2148	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
2149	    SCARG(uap, path), td);
2150	error = namei(&nd);
2151	if (error)
2152		return (error);
2153
2154	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2155		NDFREE(&nd, NDF_ONLY_PNBUF);
2156		if (nd.ni_vp)
2157			vrele(nd.ni_vp);
2158		vput(nd.ni_dvp);
2159		return (EEXIST);
2160	}
2161	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2162		NDFREE(&nd, NDF_ONLY_PNBUF);
2163		vput(nd.ni_dvp);
2164		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2165			return (error);
2166		goto restart;
2167	}
2168	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2169	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
2170	NDFREE(&nd, NDF_ONLY_PNBUF);
2171	vput(nd.ni_dvp);
2172	vn_finished_write(mp);
2173	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
2174	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
2175	return (error);
2176}
2177
2178/*
2179 * Delete a name from the filesystem.
2180 */
2181#ifndef _SYS_SYSPROTO_H_
2182struct unlink_args {
2183	char	*path;
2184};
2185#endif
2186/* ARGSUSED */
2187int
2188unlink(td, uap)
2189	struct thread *td;
2190	struct unlink_args /* {
2191		syscallarg(char *) path;
2192	} */ *uap;
2193{
2194	struct mount *mp;
2195	struct vnode *vp;
2196	int error;
2197	struct nameidata nd;
2198
2199restart:
2200	bwillwrite();
2201	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td);
2202	if ((error = namei(&nd)) != 0)
2203		return (error);
2204	vp = nd.ni_vp;
2205	if (vp->v_type == VDIR)
2206		error = EPERM;		/* POSIX */
2207	else {
2208		/*
2209		 * The root of a mounted filesystem cannot be deleted.
2210		 *
2211		 * XXX: can this only be a VDIR case?
2212		 */
2213		if (vp->v_flag & VROOT)
2214			error = EBUSY;
2215	}
2216	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2217		NDFREE(&nd, NDF_ONLY_PNBUF);
2218		vrele(vp);
2219		vput(nd.ni_dvp);
2220		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
2221			return (error);
2222		goto restart;
2223	}
2224	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2225	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2226	if (!error) {
2227		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
2228		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
2229	}
2230	NDFREE(&nd, NDF_ONLY_PNBUF);
2231	vput(nd.ni_dvp);
2232	vput(vp);
2233	vn_finished_write(mp);
2234	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
2235	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
2236	return (error);
2237}
2238
2239/*
2240 * Reposition read/write file offset.
2241 */
2242#ifndef _SYS_SYSPROTO_H_
2243struct lseek_args {
2244	int	fd;
2245	int	pad;
2246	off_t	offset;
2247	int	whence;
2248};
2249#endif
2250int
2251lseek(td, uap)
2252	struct thread *td;
2253	register struct lseek_args /* {
2254		syscallarg(int) fd;
2255		syscallarg(int) pad;
2256		syscallarg(off_t) offset;
2257		syscallarg(int) whence;
2258	} */ *uap;
2259{
2260	struct ucred *cred = td->td_ucred;
2261	struct file *fp;
2262	struct vnode *vp;
2263	struct vattr vattr;
2264	off_t offset;
2265	int error, noneg;
2266
2267	if ((error = fget(td, uap->fd, &fp)) != 0)
2268		return (error);
2269	if (fp->f_type != DTYPE_VNODE) {
2270		fdrop(fp, td);
2271		return (ESPIPE);
2272	}
2273	vp = (struct vnode *)fp->f_data;
2274	noneg = (vp->v_type != VCHR);
2275	offset = SCARG(uap, offset);
2276	switch (SCARG(uap, whence)) {
2277	case L_INCR:
2278		if (noneg &&
2279		    (fp->f_offset < 0 ||
2280		     (offset > 0 && fp->f_offset > OFF_MAX - offset)))
2281			return (EOVERFLOW);
2282		offset += fp->f_offset;
2283		break;
2284	case L_XTND:
2285		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2286		error = VOP_GETATTR(vp, &vattr, cred, td);
2287		VOP_UNLOCK(vp, 0, td);
2288		if (error)
2289			return (error);
2290		if (noneg &&
2291		    (vattr.va_size > OFF_MAX ||
2292		     (offset > 0 && vattr.va_size > OFF_MAX - offset)))
2293			return (EOVERFLOW);
2294		offset += vattr.va_size;
2295		break;
2296	case L_SET:
2297		break;
2298	default:
2299		fdrop(fp, td);
2300		return (EINVAL);
2301	}
2302	if (noneg && offset < 0)
2303		return (EINVAL);
2304	fp->f_offset = offset;
2305	*(off_t *)(td->td_retval) = fp->f_offset;
2306	fdrop(fp, td);
2307	return (0);
2308}
2309
2310#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2311/*
2312 * Reposition read/write file offset.
2313 */
2314#ifndef _SYS_SYSPROTO_H_
2315struct olseek_args {
2316	int	fd;
2317	long	offset;
2318	int	whence;
2319};
2320#endif
2321int
2322olseek(td, uap)
2323	struct thread *td;
2324	register struct olseek_args /* {
2325		syscallarg(int) fd;
2326		syscallarg(long) offset;
2327		syscallarg(int) whence;
2328	} */ *uap;
2329{
2330	struct lseek_args /* {
2331		syscallarg(int) fd;
2332		syscallarg(int) pad;
2333		syscallarg(off_t) offset;
2334		syscallarg(int) whence;
2335	} */ nuap;
2336	int error;
2337
2338	SCARG(&nuap, fd) = SCARG(uap, fd);
2339	SCARG(&nuap, offset) = SCARG(uap, offset);
2340	SCARG(&nuap, whence) = SCARG(uap, whence);
2341	error = lseek(td, &nuap);
2342	return (error);
2343}
2344#endif /* COMPAT_43 */
2345
2346/*
2347 * Check access permissions using passed credentials.
2348 */
2349static int
2350vn_access(vp, user_flags, cred, td)
2351	struct vnode	*vp;
2352	int		user_flags;
2353	struct ucred	*cred;
2354	struct thread	*td;
2355{
2356	int error, flags;
2357
2358	/* Flags == 0 means only check for existence. */
2359	error = 0;
2360	if (user_flags) {
2361		flags = 0;
2362		if (user_flags & R_OK)
2363			flags |= VREAD;
2364		if (user_flags & W_OK)
2365			flags |= VWRITE;
2366		if (user_flags & X_OK)
2367			flags |= VEXEC;
2368		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
2369			error = VOP_ACCESS(vp, flags, cred, td);
2370	}
2371	return (error);
2372}
2373
2374/*
2375 * Check access permissions using "real" credentials.
2376 */
2377#ifndef _SYS_SYSPROTO_H_
2378struct access_args {
2379	char	*path;
2380	int	flags;
2381};
2382#endif
2383int
2384access(td, uap)
2385	struct thread *td;
2386	register struct access_args /* {
2387		syscallarg(char *) path;
2388		syscallarg(int) flags;
2389	} */ *uap;
2390{
2391	struct ucred *cred, *tmpcred;
2392	register struct vnode *vp;
2393	int error;
2394	struct nameidata nd;
2395
2396	/*
2397	 * Create and modify a temporary credential instead of one that
2398	 * is potentially shared.  This could also mess up socket
2399	 * buffer accounting which can run in an interrupt context.
2400	 *
2401	 * XXX - Depending on how "threads" are finally implemented, it
2402	 * may be better to explicitly pass the credential to namei()
2403	 * rather than to modify the potentially shared process structure.
2404	 */
2405	cred = td->td_ucred;
2406	tmpcred = crdup(cred);
2407	tmpcred->cr_uid = cred->cr_ruid;
2408	tmpcred->cr_groups[0] = cred->cr_rgid;
2409	td->td_ucred = tmpcred;
2410	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2411	    SCARG(uap, path), td);
2412	if ((error = namei(&nd)) != 0)
2413		goto out1;
2414	vp = nd.ni_vp;
2415
2416	error = vn_access(vp, SCARG(uap, flags), tmpcred, td);
2417	NDFREE(&nd, NDF_ONLY_PNBUF);
2418	vput(vp);
2419out1:
2420	td->td_ucred = cred;
2421	crfree(tmpcred);
2422	return (error);
2423}
2424
2425/*
2426 * Check access permissions using "effective" credentials.
2427 */
2428#ifndef _SYS_SYSPROTO_H_
2429struct eaccess_args {
2430	char	*path;
2431	int	flags;
2432};
2433#endif
2434int
2435eaccess(td, uap)
2436	struct thread *td;
2437	register struct eaccess_args /* {
2438		syscallarg(char *) path;
2439		syscallarg(int) flags;
2440	} */ *uap;
2441{
2442	struct nameidata nd;
2443	struct vnode *vp;
2444	int error;
2445
2446	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2447	    SCARG(uap, path), td);
2448	if ((error = namei(&nd)) != 0)
2449		return (error);
2450	vp = nd.ni_vp;
2451
2452	error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td);
2453	NDFREE(&nd, NDF_ONLY_PNBUF);
2454	vput(vp);
2455	return (error);
2456}
2457
2458#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
2459/*
2460 * Get file status; this version follows links.
2461 */
2462#ifndef _SYS_SYSPROTO_H_
2463struct ostat_args {
2464	char	*path;
2465	struct ostat *ub;
2466};
2467#endif
2468/* ARGSUSED */
2469int
2470ostat(td, uap)
2471	struct thread *td;
2472	register struct ostat_args /* {
2473		syscallarg(char *) path;
2474		syscallarg(struct ostat *) ub;
2475	} */ *uap;
2476{
2477	struct stat sb;
2478	struct ostat osb;
2479	int error;
2480	struct nameidata nd;
2481
2482	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2483	    SCARG(uap, path), td);
2484	if ((error = namei(&nd)) != 0)
2485		return (error);
2486	NDFREE(&nd, NDF_ONLY_PNBUF);
2487	error = vn_stat(nd.ni_vp, &sb, td);
2488	vput(nd.ni_vp);
2489	if (error)
2490		return (error);
2491	cvtstat(&sb, &osb);
2492	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
2493	return (error);
2494}
2495
2496/*
2497 * Get file status; this version does not follow links.
2498 */
2499#ifndef _SYS_SYSPROTO_H_
2500struct olstat_args {
2501	char	*path;
2502	struct ostat *ub;
2503};
2504#endif
2505/* ARGSUSED */
2506int
2507olstat(td, uap)
2508	struct thread *td;
2509	register struct olstat_args /* {
2510		syscallarg(char *) path;
2511		syscallarg(struct ostat *) ub;
2512	} */ *uap;
2513{
2514	struct vnode *vp;
2515	struct stat sb;
2516	struct ostat osb;
2517	int error;
2518	struct nameidata nd;
2519
2520	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2521	    SCARG(uap, path), td);
2522	if ((error = namei(&nd)) != 0)
2523		return (error);
2524	vp = nd.ni_vp;
2525	error = vn_stat(vp, &sb, td);
2526	NDFREE(&nd, NDF_ONLY_PNBUF);
2527	vput(vp);
2528	if (error)
2529		return (error);
2530	cvtstat(&sb, &osb);
2531	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
2532	return (error);
2533}
2534
2535/*
2536 * Convert from an old to a new stat structure.
2537 */
2538void
2539cvtstat(st, ost)
2540	struct stat *st;
2541	struct ostat *ost;
2542{
2543
2544	ost->st_dev = st->st_dev;
2545	ost->st_ino = st->st_ino;
2546	ost->st_mode = st->st_mode;
2547	ost->st_nlink = st->st_nlink;
2548	ost->st_uid = st->st_uid;
2549	ost->st_gid = st->st_gid;
2550	ost->st_rdev = st->st_rdev;
2551	if (st->st_size < (quad_t)1 << 32)
2552		ost->st_size = st->st_size;
2553	else
2554		ost->st_size = -2;
2555	ost->st_atime = st->st_atime;
2556	ost->st_mtime = st->st_mtime;
2557	ost->st_ctime = st->st_ctime;
2558	ost->st_blksize = st->st_blksize;
2559	ost->st_blocks = st->st_blocks;
2560	ost->st_flags = st->st_flags;
2561	ost->st_gen = st->st_gen;
2562}
2563#endif /* COMPAT_43 || COMPAT_SUNOS */
2564
2565/*
2566 * Get file status; this version follows links.
2567 */
2568#ifndef _SYS_SYSPROTO_H_
2569struct stat_args {
2570	char	*path;
2571	struct stat *ub;
2572};
2573#endif
2574/* ARGSUSED */
2575int
2576stat(td, uap)
2577	struct thread *td;
2578	register struct stat_args /* {
2579		syscallarg(char *) path;
2580		syscallarg(struct stat *) ub;
2581	} */ *uap;
2582{
2583	struct stat sb;
2584	int error;
2585	struct nameidata nd;
2586
2587#ifndef LOOKUP_EXCLUSIVE
2588	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
2589	    UIO_USERSPACE, SCARG(uap, path), td);
2590#else
2591	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2592	    SCARG(uap, path), td);
2593#endif
2594	if ((error = namei(&nd)) != 0)
2595		return (error);
2596	error = vn_stat(nd.ni_vp, &sb, td);
2597	NDFREE(&nd, NDF_ONLY_PNBUF);
2598	vput(nd.ni_vp);
2599	if (error)
2600		return (error);
2601	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2602	return (error);
2603}
2604
2605/*
2606 * Get file status; this version does not follow links.
2607 */
2608#ifndef _SYS_SYSPROTO_H_
2609struct lstat_args {
2610	char	*path;
2611	struct stat *ub;
2612};
2613#endif
2614/* ARGSUSED */
2615int
2616lstat(td, uap)
2617	struct thread *td;
2618	register struct lstat_args /* {
2619		syscallarg(char *) path;
2620		syscallarg(struct stat *) ub;
2621	} */ *uap;
2622{
2623	int error;
2624	struct vnode *vp;
2625	struct stat sb;
2626	struct nameidata nd;
2627
2628	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2629	    SCARG(uap, path), td);
2630	if ((error = namei(&nd)) != 0)
2631		return (error);
2632	vp = nd.ni_vp;
2633	error = vn_stat(vp, &sb, td);
2634	NDFREE(&nd, NDF_ONLY_PNBUF);
2635	vput(vp);
2636	if (error)
2637		return (error);
2638	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
2639	return (error);
2640}
2641
2642/*
2643 * Implementation of the NetBSD stat() function.
2644 * XXX This should probably be collapsed with the FreeBSD version,
2645 * as the differences are only due to vn_stat() clearing spares at
2646 * the end of the structures.  vn_stat could be split to avoid this,
2647 * and thus collapse the following to close to zero code.
2648 */
2649void
2650cvtnstat(sb, nsb)
2651	struct stat *sb;
2652	struct nstat *nsb;
2653{
2654	nsb->st_dev = sb->st_dev;
2655	nsb->st_ino = sb->st_ino;
2656	nsb->st_mode = sb->st_mode;
2657	nsb->st_nlink = sb->st_nlink;
2658	nsb->st_uid = sb->st_uid;
2659	nsb->st_gid = sb->st_gid;
2660	nsb->st_rdev = sb->st_rdev;
2661	nsb->st_atimespec = sb->st_atimespec;
2662	nsb->st_mtimespec = sb->st_mtimespec;
2663	nsb->st_ctimespec = sb->st_ctimespec;
2664	nsb->st_size = sb->st_size;
2665	nsb->st_blocks = sb->st_blocks;
2666	nsb->st_blksize = sb->st_blksize;
2667	nsb->st_flags = sb->st_flags;
2668	nsb->st_gen = sb->st_gen;
2669	nsb->st_qspare[0] = sb->st_qspare[0];
2670	nsb->st_qspare[1] = sb->st_qspare[1];
2671}
2672
2673#ifndef _SYS_SYSPROTO_H_
2674struct nstat_args {
2675	char	*path;
2676	struct nstat *ub;
2677};
2678#endif
2679/* ARGSUSED */
2680int
2681nstat(td, uap)
2682	struct thread *td;
2683	register struct nstat_args /* {
2684		syscallarg(char *) path;
2685		syscallarg(struct nstat *) ub;
2686	} */ *uap;
2687{
2688	struct stat sb;
2689	struct nstat nsb;
2690	int error;
2691	struct nameidata nd;
2692
2693	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2694	    SCARG(uap, path), td);
2695	if ((error = namei(&nd)) != 0)
2696		return (error);
2697	NDFREE(&nd, NDF_ONLY_PNBUF);
2698	error = vn_stat(nd.ni_vp, &sb, td);
2699	vput(nd.ni_vp);
2700	if (error)
2701		return (error);
2702	cvtnstat(&sb, &nsb);
2703	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2704	return (error);
2705}
2706
2707/*
2708 * NetBSD lstat.  Get file status; this version does not follow links.
2709 */
2710#ifndef _SYS_SYSPROTO_H_
2711struct lstat_args {
2712	char	*path;
2713	struct stat *ub;
2714};
2715#endif
2716/* ARGSUSED */
2717int
2718nlstat(td, uap)
2719	struct thread *td;
2720	register struct nlstat_args /* {
2721		syscallarg(char *) path;
2722		syscallarg(struct nstat *) ub;
2723	} */ *uap;
2724{
2725	int error;
2726	struct vnode *vp;
2727	struct stat sb;
2728	struct nstat nsb;
2729	struct nameidata nd;
2730
2731	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2732	    SCARG(uap, path), td);
2733	if ((error = namei(&nd)) != 0)
2734		return (error);
2735	vp = nd.ni_vp;
2736	NDFREE(&nd, NDF_ONLY_PNBUF);
2737	error = vn_stat(vp, &sb, td);
2738	vput(vp);
2739	if (error)
2740		return (error);
2741	cvtnstat(&sb, &nsb);
2742	error = copyout((caddr_t)&nsb, (caddr_t)SCARG(uap, ub), sizeof (nsb));
2743	return (error);
2744}
2745
2746/*
2747 * Get configurable pathname variables.
2748 */
2749#ifndef _SYS_SYSPROTO_H_
2750struct pathconf_args {
2751	char	*path;
2752	int	name;
2753};
2754#endif
2755/* ARGSUSED */
2756int
2757pathconf(td, uap)
2758	struct thread *td;
2759	register struct pathconf_args /* {
2760		syscallarg(char *) path;
2761		syscallarg(int) name;
2762	} */ *uap;
2763{
2764	int error;
2765	struct nameidata nd;
2766
2767	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2768	    SCARG(uap, path), td);
2769	if ((error = namei(&nd)) != 0)
2770		return (error);
2771	NDFREE(&nd, NDF_ONLY_PNBUF);
2772	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval);
2773	vput(nd.ni_vp);
2774	return (error);
2775}
2776
2777/*
2778 * Return target name of a symbolic link.
2779 */
2780#ifndef _SYS_SYSPROTO_H_
2781struct readlink_args {
2782	char	*path;
2783	char	*buf;
2784	int	count;
2785};
2786#endif
2787/* ARGSUSED */
2788int
2789readlink(td, uap)
2790	struct thread *td;
2791	register struct readlink_args /* {
2792		syscallarg(char *) path;
2793		syscallarg(char *) buf;
2794		syscallarg(int) count;
2795	} */ *uap;
2796{
2797	register struct vnode *vp;
2798	struct iovec aiov;
2799	struct uio auio;
2800	int error;
2801	struct nameidata nd;
2802
2803	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
2804	    SCARG(uap, path), td);
2805	if ((error = namei(&nd)) != 0)
2806		return (error);
2807	NDFREE(&nd, NDF_ONLY_PNBUF);
2808	vp = nd.ni_vp;
2809	if (vp->v_type != VLNK)
2810		error = EINVAL;
2811	else {
2812		aiov.iov_base = SCARG(uap, buf);
2813		aiov.iov_len = SCARG(uap, count);
2814		auio.uio_iov = &aiov;
2815		auio.uio_iovcnt = 1;
2816		auio.uio_offset = 0;
2817		auio.uio_rw = UIO_READ;
2818		auio.uio_segflg = UIO_USERSPACE;
2819		auio.uio_td = td;
2820		auio.uio_resid = SCARG(uap, count);
2821		error = VOP_READLINK(vp, &auio, td->td_ucred);
2822	}
2823	vput(vp);
2824	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
2825	return (error);
2826}
2827
2828/*
2829 * Common implementation code for chflags() and fchflags().
2830 */
2831static int
2832setfflags(td, vp, flags)
2833	struct thread *td;
2834	struct vnode *vp;
2835	int flags;
2836{
2837	int error;
2838	struct mount *mp;
2839	struct vattr vattr;
2840
2841	/*
2842	 * Prevent non-root users from setting flags on devices.  When
2843	 * a device is reused, users can retain ownership of the device
2844	 * if they are allowed to set flags and programs assume that
2845	 * chown can't fail when done as root.
2846	 */
2847	if (vp->v_type == VCHR || vp->v_type == VBLK) {
2848		error = suser_cred(td->td_ucred, PRISON_ROOT);
2849		if (error)
2850			return (error);
2851	}
2852
2853	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2854		return (error);
2855	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2856	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2857	VATTR_NULL(&vattr);
2858	vattr.va_flags = flags;
2859	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2860	VOP_UNLOCK(vp, 0, td);
2861	vn_finished_write(mp);
2862	return (error);
2863}
2864
2865/*
2866 * Change flags of a file given a path name.
2867 */
2868#ifndef _SYS_SYSPROTO_H_
2869struct chflags_args {
2870	char	*path;
2871	int	flags;
2872};
2873#endif
2874/* ARGSUSED */
2875int
2876chflags(td, uap)
2877	struct thread *td;
2878	register struct chflags_args /* {
2879		syscallarg(char *) path;
2880		syscallarg(int) flags;
2881	} */ *uap;
2882{
2883	int error;
2884	struct nameidata nd;
2885
2886	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2887	if ((error = namei(&nd)) != 0)
2888		return (error);
2889	NDFREE(&nd, NDF_ONLY_PNBUF);
2890	error = setfflags(td, nd.ni_vp, SCARG(uap, flags));
2891	vrele(nd.ni_vp);
2892	return error;
2893}
2894
2895/*
2896 * Change flags of a file given a file descriptor.
2897 */
2898#ifndef _SYS_SYSPROTO_H_
2899struct fchflags_args {
2900	int	fd;
2901	int	flags;
2902};
2903#endif
2904/* ARGSUSED */
2905int
2906fchflags(td, uap)
2907	struct thread *td;
2908	register struct fchflags_args /* {
2909		syscallarg(int) fd;
2910		syscallarg(int) flags;
2911	} */ *uap;
2912{
2913	struct file *fp;
2914	int error;
2915
2916	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2917		return (error);
2918	error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags));
2919	fdrop(fp, td);
2920	return (error);
2921}
2922
2923/*
2924 * Common implementation code for chmod(), lchmod() and fchmod().
2925 */
2926static int
2927setfmode(td, vp, mode)
2928	struct thread *td;
2929	struct vnode *vp;
2930	int mode;
2931{
2932	int error;
2933	struct mount *mp;
2934	struct vattr vattr;
2935
2936	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
2937		return (error);
2938	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
2939	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
2940	VATTR_NULL(&vattr);
2941	vattr.va_mode = mode & ALLPERMS;
2942	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
2943	VOP_UNLOCK(vp, 0, td);
2944	vn_finished_write(mp);
2945	return error;
2946}
2947
2948/*
2949 * Change mode of a file given path name.
2950 */
2951#ifndef _SYS_SYSPROTO_H_
2952struct chmod_args {
2953	char	*path;
2954	int	mode;
2955};
2956#endif
2957/* ARGSUSED */
2958int
2959chmod(td, uap)
2960	struct thread *td;
2961	register struct chmod_args /* {
2962		syscallarg(char *) path;
2963		syscallarg(int) mode;
2964	} */ *uap;
2965{
2966	int error;
2967	struct nameidata nd;
2968
2969	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
2970	if ((error = namei(&nd)) != 0)
2971		return (error);
2972	NDFREE(&nd, NDF_ONLY_PNBUF);
2973	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
2974	vrele(nd.ni_vp);
2975	return error;
2976}
2977
2978/*
2979 * Change mode of a file given path name (don't follow links.)
2980 */
2981#ifndef _SYS_SYSPROTO_H_
2982struct lchmod_args {
2983	char	*path;
2984	int	mode;
2985};
2986#endif
2987/* ARGSUSED */
2988int
2989lchmod(td, uap)
2990	struct thread *td;
2991	register struct lchmod_args /* {
2992		syscallarg(char *) path;
2993		syscallarg(int) mode;
2994	} */ *uap;
2995{
2996	int error;
2997	struct nameidata nd;
2998
2999	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3000	if ((error = namei(&nd)) != 0)
3001		return (error);
3002	NDFREE(&nd, NDF_ONLY_PNBUF);
3003	error = setfmode(td, nd.ni_vp, SCARG(uap, mode));
3004	vrele(nd.ni_vp);
3005	return error;
3006}
3007
3008/*
3009 * Change mode of a file given a file descriptor.
3010 */
3011#ifndef _SYS_SYSPROTO_H_
3012struct fchmod_args {
3013	int	fd;
3014	int	mode;
3015};
3016#endif
3017/* ARGSUSED */
3018int
3019fchmod(td, uap)
3020	struct thread *td;
3021	register struct fchmod_args /* {
3022		syscallarg(int) fd;
3023		syscallarg(int) mode;
3024	} */ *uap;
3025{
3026	struct file *fp;
3027	struct vnode *vp;
3028	int error;
3029
3030	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3031		return (error);
3032	vp = (struct vnode *)fp->f_data;
3033	error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode));
3034	fdrop(fp, td);
3035	return (error);
3036}
3037
3038/*
3039 * Common implementation for chown(), lchown(), and fchown()
3040 */
3041static int
3042setfown(td, vp, uid, gid)
3043	struct thread *td;
3044	struct vnode *vp;
3045	uid_t uid;
3046	gid_t gid;
3047{
3048	int error;
3049	struct mount *mp;
3050	struct vattr vattr;
3051
3052	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3053		return (error);
3054	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3055	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3056	VATTR_NULL(&vattr);
3057	vattr.va_uid = uid;
3058	vattr.va_gid = gid;
3059	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3060	VOP_UNLOCK(vp, 0, td);
3061	vn_finished_write(mp);
3062	return error;
3063}
3064
3065/*
3066 * Set ownership given a path name.
3067 */
3068#ifndef _SYS_SYSPROTO_H_
3069struct chown_args {
3070	char	*path;
3071	int	uid;
3072	int	gid;
3073};
3074#endif
3075/* ARGSUSED */
3076int
3077chown(td, uap)
3078	struct thread *td;
3079	register struct chown_args /* {
3080		syscallarg(char *) path;
3081		syscallarg(int) uid;
3082		syscallarg(int) gid;
3083	} */ *uap;
3084{
3085	int error;
3086	struct nameidata nd;
3087
3088	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3089	if ((error = namei(&nd)) != 0)
3090		return (error);
3091	NDFREE(&nd, NDF_ONLY_PNBUF);
3092	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
3093	vrele(nd.ni_vp);
3094	return (error);
3095}
3096
3097/*
3098 * Set ownership given a path name, do not cross symlinks.
3099 */
3100#ifndef _SYS_SYSPROTO_H_
3101struct lchown_args {
3102	char	*path;
3103	int	uid;
3104	int	gid;
3105};
3106#endif
3107/* ARGSUSED */
3108int
3109lchown(td, uap)
3110	struct thread *td;
3111	register struct lchown_args /* {
3112		syscallarg(char *) path;
3113		syscallarg(int) uid;
3114		syscallarg(int) gid;
3115	} */ *uap;
3116{
3117	int error;
3118	struct nameidata nd;
3119
3120	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3121	if ((error = namei(&nd)) != 0)
3122		return (error);
3123	NDFREE(&nd, NDF_ONLY_PNBUF);
3124	error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid));
3125	vrele(nd.ni_vp);
3126	return (error);
3127}
3128
3129/*
3130 * Set ownership given a file descriptor.
3131 */
3132#ifndef _SYS_SYSPROTO_H_
3133struct fchown_args {
3134	int	fd;
3135	int	uid;
3136	int	gid;
3137};
3138#endif
3139/* ARGSUSED */
3140int
3141fchown(td, uap)
3142	struct thread *td;
3143	register struct fchown_args /* {
3144		syscallarg(int) fd;
3145		syscallarg(int) uid;
3146		syscallarg(int) gid;
3147	} */ *uap;
3148{
3149	struct file *fp;
3150	struct vnode *vp;
3151	int error;
3152
3153	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3154		return (error);
3155	vp = (struct vnode *)fp->f_data;
3156	error = setfown(td, (struct vnode *)fp->f_data,
3157		SCARG(uap, uid), SCARG(uap, gid));
3158	fdrop(fp, td);
3159	return (error);
3160}
3161
3162/*
3163 * Common implementation code for utimes(), lutimes(), and futimes().
3164 */
3165static int
3166getutimes(usrtvp, tsp)
3167	const struct timeval *usrtvp;
3168	struct timespec *tsp;
3169{
3170	struct timeval tv[2];
3171	int error;
3172
3173	if (usrtvp == NULL) {
3174		microtime(&tv[0]);
3175		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3176		tsp[1] = tsp[0];
3177	} else {
3178		if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0)
3179			return (error);
3180		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
3181		TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
3182	}
3183	return 0;
3184}
3185
3186/*
3187 * Common implementation code for utimes(), lutimes(), and futimes().
3188 */
3189static int
3190setutimes(td, vp, ts, nullflag)
3191	struct thread *td;
3192	struct vnode *vp;
3193	const struct timespec *ts;
3194	int nullflag;
3195{
3196	int error;
3197	struct mount *mp;
3198	struct vattr vattr;
3199
3200	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
3201		return (error);
3202	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3203	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3204	VATTR_NULL(&vattr);
3205	vattr.va_atime = ts[0];
3206	vattr.va_mtime = ts[1];
3207	if (nullflag)
3208		vattr.va_vaflags |= VA_UTIMES_NULL;
3209	error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3210	VOP_UNLOCK(vp, 0, td);
3211	vn_finished_write(mp);
3212	return error;
3213}
3214
3215/*
3216 * Set the access and modification times of a file.
3217 */
3218#ifndef _SYS_SYSPROTO_H_
3219struct utimes_args {
3220	char	*path;
3221	struct	timeval *tptr;
3222};
3223#endif
3224/* ARGSUSED */
3225int
3226utimes(td, uap)
3227	struct thread *td;
3228	register struct utimes_args /* {
3229		syscallarg(char *) path;
3230		syscallarg(struct timeval *) tptr;
3231	} */ *uap;
3232{
3233	struct timespec ts[2];
3234	struct timeval *usrtvp;
3235	int error;
3236	struct nameidata nd;
3237
3238	usrtvp = SCARG(uap, tptr);
3239	if ((error = getutimes(usrtvp, ts)) != 0)
3240		return (error);
3241	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3242	if ((error = namei(&nd)) != 0)
3243		return (error);
3244	NDFREE(&nd, NDF_ONLY_PNBUF);
3245	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
3246	vrele(nd.ni_vp);
3247	return (error);
3248}
3249
3250/*
3251 * Set the access and modification times of a file.
3252 */
3253#ifndef _SYS_SYSPROTO_H_
3254struct lutimes_args {
3255	char	*path;
3256	struct	timeval *tptr;
3257};
3258#endif
3259/* ARGSUSED */
3260int
3261lutimes(td, uap)
3262	struct thread *td;
3263	register struct lutimes_args /* {
3264		syscallarg(char *) path;
3265		syscallarg(struct timeval *) tptr;
3266	} */ *uap;
3267{
3268	struct timespec ts[2];
3269	struct timeval *usrtvp;
3270	int error;
3271	struct nameidata nd;
3272
3273	usrtvp = SCARG(uap, tptr);
3274	if ((error = getutimes(usrtvp, ts)) != 0)
3275		return (error);
3276	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3277	if ((error = namei(&nd)) != 0)
3278		return (error);
3279	NDFREE(&nd, NDF_ONLY_PNBUF);
3280	error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL);
3281	vrele(nd.ni_vp);
3282	return (error);
3283}
3284
3285/*
3286 * Set the access and modification times of a file.
3287 */
3288#ifndef _SYS_SYSPROTO_H_
3289struct futimes_args {
3290	int	fd;
3291	struct	timeval *tptr;
3292};
3293#endif
3294/* ARGSUSED */
3295int
3296futimes(td, uap)
3297	struct thread *td;
3298	register struct futimes_args /* {
3299		syscallarg(int ) fd;
3300		syscallarg(struct timeval *) tptr;
3301	} */ *uap;
3302{
3303	struct timespec ts[2];
3304	struct file *fp;
3305	struct timeval *usrtvp;
3306	int error;
3307
3308	usrtvp = SCARG(uap, tptr);
3309	if ((error = getutimes(usrtvp, ts)) != 0)
3310		return (error);
3311	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3312		return (error);
3313	error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL);
3314	fdrop(fp, td);
3315	return (error);
3316}
3317
3318/*
3319 * Truncate a file given its path name.
3320 */
3321#ifndef _SYS_SYSPROTO_H_
3322struct truncate_args {
3323	char	*path;
3324	int	pad;
3325	off_t	length;
3326};
3327#endif
3328/* ARGSUSED */
3329int
3330truncate(td, uap)
3331	struct thread *td;
3332	register struct truncate_args /* {
3333		syscallarg(char *) path;
3334		syscallarg(int) pad;
3335		syscallarg(off_t) length;
3336	} */ *uap;
3337{
3338	struct mount *mp;
3339	struct vnode *vp;
3340	struct vattr vattr;
3341	int error;
3342	struct nameidata nd;
3343
3344	if (uap->length < 0)
3345		return(EINVAL);
3346	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td);
3347	if ((error = namei(&nd)) != 0)
3348		return (error);
3349	vp = nd.ni_vp;
3350	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3351		vrele(vp);
3352		return (error);
3353	}
3354	NDFREE(&nd, NDF_ONLY_PNBUF);
3355	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3356	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3357	if (vp->v_type == VDIR)
3358		error = EISDIR;
3359	else if ((error = vn_writechk(vp)) == 0 &&
3360	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
3361		VATTR_NULL(&vattr);
3362		vattr.va_size = SCARG(uap, length);
3363		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
3364	}
3365	vput(vp);
3366	vn_finished_write(mp);
3367	return (error);
3368}
3369
3370/*
3371 * Truncate a file given a file descriptor.
3372 */
3373#ifndef _SYS_SYSPROTO_H_
3374struct ftruncate_args {
3375	int	fd;
3376	int	pad;
3377	off_t	length;
3378};
3379#endif
3380/* ARGSUSED */
3381int
3382ftruncate(td, uap)
3383	struct thread *td;
3384	register struct ftruncate_args /* {
3385		syscallarg(int) fd;
3386		syscallarg(int) pad;
3387		syscallarg(off_t) length;
3388	} */ *uap;
3389{
3390	struct mount *mp;
3391	struct vattr vattr;
3392	struct vnode *vp;
3393	struct file *fp;
3394	int error;
3395
3396	if (uap->length < 0)
3397		return(EINVAL);
3398	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3399		return (error);
3400	if ((fp->f_flag & FWRITE) == 0) {
3401		fdrop(fp, td);
3402		return (EINVAL);
3403	}
3404	vp = (struct vnode *)fp->f_data;
3405	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3406		fdrop(fp, td);
3407		return (error);
3408	}
3409	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3410	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3411	if (vp->v_type == VDIR)
3412		error = EISDIR;
3413	else if ((error = vn_writechk(vp)) == 0) {
3414		VATTR_NULL(&vattr);
3415		vattr.va_size = SCARG(uap, length);
3416		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
3417	}
3418	VOP_UNLOCK(vp, 0, td);
3419	vn_finished_write(mp);
3420	fdrop(fp, td);
3421	return (error);
3422}
3423
3424#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
3425/*
3426 * Truncate a file given its path name.
3427 */
3428#ifndef _SYS_SYSPROTO_H_
3429struct otruncate_args {
3430	char	*path;
3431	long	length;
3432};
3433#endif
3434/* ARGSUSED */
3435int
3436otruncate(td, uap)
3437	struct thread *td;
3438	register struct otruncate_args /* {
3439		syscallarg(char *) path;
3440		syscallarg(long) length;
3441	} */ *uap;
3442{
3443	struct truncate_args /* {
3444		syscallarg(char *) path;
3445		syscallarg(int) pad;
3446		syscallarg(off_t) length;
3447	} */ nuap;
3448
3449	SCARG(&nuap, path) = SCARG(uap, path);
3450	SCARG(&nuap, length) = SCARG(uap, length);
3451	return (truncate(td, &nuap));
3452}
3453
3454/*
3455 * Truncate a file given a file descriptor.
3456 */
3457#ifndef _SYS_SYSPROTO_H_
3458struct oftruncate_args {
3459	int	fd;
3460	long	length;
3461};
3462#endif
3463/* ARGSUSED */
3464int
3465oftruncate(td, uap)
3466	struct thread *td;
3467	register struct oftruncate_args /* {
3468		syscallarg(int) fd;
3469		syscallarg(long) length;
3470	} */ *uap;
3471{
3472	struct ftruncate_args /* {
3473		syscallarg(int) fd;
3474		syscallarg(int) pad;
3475		syscallarg(off_t) length;
3476	} */ nuap;
3477
3478	SCARG(&nuap, fd) = SCARG(uap, fd);
3479	SCARG(&nuap, length) = SCARG(uap, length);
3480	return (ftruncate(td, &nuap));
3481}
3482#endif /* COMPAT_43 || COMPAT_SUNOS */
3483
3484/*
3485 * Sync an open file.
3486 */
3487#ifndef _SYS_SYSPROTO_H_
3488struct fsync_args {
3489	int	fd;
3490};
3491#endif
3492/* ARGSUSED */
3493int
3494fsync(td, uap)
3495	struct thread *td;
3496	struct fsync_args /* {
3497		syscallarg(int) fd;
3498	} */ *uap;
3499{
3500	struct vnode *vp;
3501	struct mount *mp;
3502	struct file *fp;
3503	vm_object_t obj;
3504	int error;
3505
3506	GIANT_REQUIRED;
3507
3508	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3509		return (error);
3510	vp = (struct vnode *)fp->f_data;
3511	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
3512		fdrop(fp, td);
3513		return (error);
3514	}
3515	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3516	if (VOP_GETVOBJECT(vp, &obj) == 0) {
3517		vm_object_page_clean(obj, 0, 0, 0);
3518	}
3519	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
3520#ifdef SOFTUPDATES
3521	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3522	    error = softdep_fsync(vp);
3523#endif
3524
3525	VOP_UNLOCK(vp, 0, td);
3526	vn_finished_write(mp);
3527	fdrop(fp, td);
3528	return (error);
3529}
3530
3531/*
3532 * Rename files.  Source and destination must either both be directories,
3533 * or both not be directories.  If target is a directory, it must be empty.
3534 */
3535#ifndef _SYS_SYSPROTO_H_
3536struct rename_args {
3537	char	*from;
3538	char	*to;
3539};
3540#endif
3541/* ARGSUSED */
3542int
3543rename(td, uap)
3544	struct thread *td;
3545	register struct rename_args /* {
3546		syscallarg(char *) from;
3547		syscallarg(char *) to;
3548	} */ *uap;
3549{
3550	struct mount *mp;
3551	struct vnode *tvp, *fvp, *tdvp;
3552	struct nameidata fromnd, tond;
3553	int error;
3554
3555	bwillwrite();
3556	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3557	    SCARG(uap, from), td);
3558	if ((error = namei(&fromnd)) != 0)
3559		return (error);
3560	fvp = fromnd.ni_vp;
3561	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
3562		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3563		vrele(fromnd.ni_dvp);
3564		vrele(fvp);
3565		goto out1;
3566	}
3567	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
3568	    UIO_USERSPACE, SCARG(uap, to), td);
3569	if (fromnd.ni_vp->v_type == VDIR)
3570		tond.ni_cnd.cn_flags |= WILLBEDIR;
3571	if ((error = namei(&tond)) != 0) {
3572		/* Translate error code for rename("dir1", "dir2/."). */
3573		if (error == EISDIR && fvp->v_type == VDIR)
3574			error = EINVAL;
3575		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3576		vrele(fromnd.ni_dvp);
3577		vrele(fvp);
3578		goto out1;
3579	}
3580	tdvp = tond.ni_dvp;
3581	tvp = tond.ni_vp;
3582	if (tvp != NULL) {
3583		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3584			error = ENOTDIR;
3585			goto out;
3586		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3587			error = EISDIR;
3588			goto out;
3589		}
3590	}
3591	if (fvp == tdvp)
3592		error = EINVAL;
3593	/*
3594	 * If source is the same as the destination (that is the
3595	 * same inode number with the same name in the same directory),
3596	 * then there is nothing to do.
3597	 */
3598	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
3599	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3600	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
3601	      fromnd.ni_cnd.cn_namelen))
3602		error = -1;
3603out:
3604	if (!error) {
3605		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
3606		if (fromnd.ni_dvp != tdvp) {
3607			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3608		}
3609		if (tvp) {
3610			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
3611		}
3612		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3613				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3614		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3615		NDFREE(&tond, NDF_ONLY_PNBUF);
3616	} else {
3617		NDFREE(&fromnd, NDF_ONLY_PNBUF);
3618		NDFREE(&tond, NDF_ONLY_PNBUF);
3619		if (tdvp == tvp)
3620			vrele(tdvp);
3621		else
3622			vput(tdvp);
3623		if (tvp)
3624			vput(tvp);
3625		vrele(fromnd.ni_dvp);
3626		vrele(fvp);
3627	}
3628	vrele(tond.ni_startdir);
3629	vn_finished_write(mp);
3630	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
3631	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
3632	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
3633	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
3634out1:
3635	if (fromnd.ni_startdir)
3636		vrele(fromnd.ni_startdir);
3637	if (error == -1)
3638		return (0);
3639	return (error);
3640}
3641
3642/*
3643 * Make a directory file.
3644 */
3645#ifndef _SYS_SYSPROTO_H_
3646struct mkdir_args {
3647	char	*path;
3648	int	mode;
3649};
3650#endif
3651/* ARGSUSED */
3652int
3653mkdir(td, uap)
3654	struct thread *td;
3655	register struct mkdir_args /* {
3656		syscallarg(char *) path;
3657		syscallarg(int) mode;
3658	} */ *uap;
3659{
3660
3661	return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td);
3662}
3663
3664int
3665vn_mkdir(path, mode, segflg, td)
3666	char *path;
3667	int mode;
3668	enum uio_seg segflg;
3669	struct thread *td;
3670{
3671	struct mount *mp;
3672	struct vnode *vp;
3673	struct vattr vattr;
3674	int error;
3675	struct nameidata nd;
3676
3677restart:
3678	bwillwrite();
3679	NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td);
3680	nd.ni_cnd.cn_flags |= WILLBEDIR;
3681	if ((error = namei(&nd)) != 0)
3682		return (error);
3683	vp = nd.ni_vp;
3684	if (vp != NULL) {
3685		NDFREE(&nd, NDF_ONLY_PNBUF);
3686		vrele(vp);
3687		vput(nd.ni_dvp);
3688		return (EEXIST);
3689	}
3690	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3691		NDFREE(&nd, NDF_ONLY_PNBUF);
3692		vput(nd.ni_dvp);
3693		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3694			return (error);
3695		goto restart;
3696	}
3697	VATTR_NULL(&vattr);
3698	vattr.va_type = VDIR;
3699	FILEDESC_LOCK(td->td_proc->p_fd);
3700	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
3701	FILEDESC_UNLOCK(td->td_proc->p_fd);
3702	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3703	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3704	NDFREE(&nd, NDF_ONLY_PNBUF);
3705	vput(nd.ni_dvp);
3706	if (!error)
3707		vput(nd.ni_vp);
3708	vn_finished_write(mp);
3709	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
3710	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
3711	return (error);
3712}
3713
3714/*
3715 * Remove a directory file.
3716 */
3717#ifndef _SYS_SYSPROTO_H_
3718struct rmdir_args {
3719	char	*path;
3720};
3721#endif
3722/* ARGSUSED */
3723int
3724rmdir(td, uap)
3725	struct thread *td;
3726	struct rmdir_args /* {
3727		syscallarg(char *) path;
3728	} */ *uap;
3729{
3730	struct mount *mp;
3731	struct vnode *vp;
3732	int error;
3733	struct nameidata nd;
3734
3735restart:
3736	bwillwrite();
3737	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3738	    SCARG(uap, path), td);
3739	if ((error = namei(&nd)) != 0)
3740		return (error);
3741	vp = nd.ni_vp;
3742	if (vp->v_type != VDIR) {
3743		error = ENOTDIR;
3744		goto out;
3745	}
3746	/*
3747	 * No rmdir "." please.
3748	 */
3749	if (nd.ni_dvp == vp) {
3750		error = EINVAL;
3751		goto out;
3752	}
3753	/*
3754	 * The root of a mounted filesystem cannot be deleted.
3755	 */
3756	if (vp->v_flag & VROOT) {
3757		error = EBUSY;
3758		goto out;
3759	}
3760	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3761		NDFREE(&nd, NDF_ONLY_PNBUF);
3762		if (nd.ni_dvp == vp)
3763			vrele(nd.ni_dvp);
3764		else
3765			vput(nd.ni_dvp);
3766		vput(vp);
3767		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
3768			return (error);
3769		goto restart;
3770	}
3771	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
3772	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
3773	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3774	vn_finished_write(mp);
3775out:
3776	NDFREE(&nd, NDF_ONLY_PNBUF);
3777	if (nd.ni_dvp == vp)
3778		vrele(nd.ni_dvp);
3779	else
3780		vput(nd.ni_dvp);
3781	vput(vp);
3782	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
3783	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
3784	return (error);
3785}
3786
3787#ifdef COMPAT_43
3788/*
3789 * Read a block of directory entries in a file system independent format.
3790 */
3791#ifndef _SYS_SYSPROTO_H_
3792struct ogetdirentries_args {
3793	int	fd;
3794	char	*buf;
3795	u_int	count;
3796	long	*basep;
3797};
3798#endif
3799int
3800ogetdirentries(td, uap)
3801	struct thread *td;
3802	register struct ogetdirentries_args /* {
3803		syscallarg(int) fd;
3804		syscallarg(char *) buf;
3805		syscallarg(u_int) count;
3806		syscallarg(long *) basep;
3807	} */ *uap;
3808{
3809	struct vnode *vp;
3810	struct file *fp;
3811	struct uio auio, kuio;
3812	struct iovec aiov, kiov;
3813	struct dirent *dp, *edp;
3814	caddr_t dirbuf;
3815	int error, eofflag, readcnt;
3816	long loff;
3817
3818	/* XXX arbitrary sanity limit on `count'. */
3819	if (SCARG(uap, count) > 64 * 1024)
3820		return (EINVAL);
3821	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3822		return (error);
3823	if ((fp->f_flag & FREAD) == 0) {
3824		fdrop(fp, td);
3825		return (EBADF);
3826	}
3827	vp = (struct vnode *)fp->f_data;
3828unionread:
3829	if (vp->v_type != VDIR) {
3830		fdrop(fp, td);
3831		return (EINVAL);
3832	}
3833	aiov.iov_base = SCARG(uap, buf);
3834	aiov.iov_len = SCARG(uap, count);
3835	auio.uio_iov = &aiov;
3836	auio.uio_iovcnt = 1;
3837	auio.uio_rw = UIO_READ;
3838	auio.uio_segflg = UIO_USERSPACE;
3839	auio.uio_td = td;
3840	auio.uio_resid = SCARG(uap, count);
3841	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3842	loff = auio.uio_offset = fp->f_offset;
3843#	if (BYTE_ORDER != LITTLE_ENDIAN)
3844		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
3845			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
3846			    NULL, NULL);
3847			fp->f_offset = auio.uio_offset;
3848		} else
3849#	endif
3850	{
3851		kuio = auio;
3852		kuio.uio_iov = &kiov;
3853		kuio.uio_segflg = UIO_SYSSPACE;
3854		kiov.iov_len = SCARG(uap, count);
3855		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
3856		kiov.iov_base = dirbuf;
3857		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
3858			    NULL, NULL);
3859		fp->f_offset = kuio.uio_offset;
3860		if (error == 0) {
3861			readcnt = SCARG(uap, count) - kuio.uio_resid;
3862			edp = (struct dirent *)&dirbuf[readcnt];
3863			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
3864#				if (BYTE_ORDER == LITTLE_ENDIAN)
3865					/*
3866					 * The expected low byte of
3867					 * dp->d_namlen is our dp->d_type.
3868					 * The high MBZ byte of dp->d_namlen
3869					 * is our dp->d_namlen.
3870					 */
3871					dp->d_type = dp->d_namlen;
3872					dp->d_namlen = 0;
3873#				else
3874					/*
3875					 * The dp->d_type is the high byte
3876					 * of the expected dp->d_namlen,
3877					 * so must be zero'ed.
3878					 */
3879					dp->d_type = 0;
3880#				endif
3881				if (dp->d_reclen > 0) {
3882					dp = (struct dirent *)
3883					    ((char *)dp + dp->d_reclen);
3884				} else {
3885					error = EIO;
3886					break;
3887				}
3888			}
3889			if (dp >= edp)
3890				error = uiomove(dirbuf, readcnt, &auio);
3891		}
3892		FREE(dirbuf, M_TEMP);
3893	}
3894	VOP_UNLOCK(vp, 0, td);
3895	if (error) {
3896		fdrop(fp, td);
3897		return (error);
3898	}
3899	if (SCARG(uap, count) == auio.uio_resid) {
3900		if (union_dircheckp) {
3901			error = union_dircheckp(td, &vp, fp);
3902			if (error == -1)
3903				goto unionread;
3904			if (error) {
3905				fdrop(fp, td);
3906				return (error);
3907			}
3908		}
3909		if ((vp->v_flag & VROOT) &&
3910		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3911			struct vnode *tvp = vp;
3912			vp = vp->v_mount->mnt_vnodecovered;
3913			VREF(vp);
3914			fp->f_data = (caddr_t) vp;
3915			fp->f_offset = 0;
3916			vrele(tvp);
3917			goto unionread;
3918		}
3919	}
3920	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
3921	    sizeof(long));
3922	fdrop(fp, td);
3923	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
3924	return (error);
3925}
3926#endif /* COMPAT_43 */
3927
3928/*
3929 * Read a block of directory entries in a file system independent format.
3930 */
3931#ifndef _SYS_SYSPROTO_H_
3932struct getdirentries_args {
3933	int	fd;
3934	char	*buf;
3935	u_int	count;
3936	long	*basep;
3937};
3938#endif
3939int
3940getdirentries(td, uap)
3941	struct thread *td;
3942	register struct getdirentries_args /* {
3943		syscallarg(int) fd;
3944		syscallarg(char *) buf;
3945		syscallarg(u_int) count;
3946		syscallarg(long *) basep;
3947	} */ *uap;
3948{
3949	struct vnode *vp;
3950	struct file *fp;
3951	struct uio auio;
3952	struct iovec aiov;
3953	long loff;
3954	int error, eofflag;
3955
3956	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3957		return (error);
3958	if ((fp->f_flag & FREAD) == 0) {
3959		fdrop(fp, td);
3960		return (EBADF);
3961	}
3962	vp = (struct vnode *)fp->f_data;
3963unionread:
3964	if (vp->v_type != VDIR) {
3965		fdrop(fp, td);
3966		return (EINVAL);
3967	}
3968	aiov.iov_base = SCARG(uap, buf);
3969	aiov.iov_len = SCARG(uap, count);
3970	auio.uio_iov = &aiov;
3971	auio.uio_iovcnt = 1;
3972	auio.uio_rw = UIO_READ;
3973	auio.uio_segflg = UIO_USERSPACE;
3974	auio.uio_td = td;
3975	auio.uio_resid = SCARG(uap, count);
3976	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
3977	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
3978	loff = auio.uio_offset = fp->f_offset;
3979	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
3980	fp->f_offset = auio.uio_offset;
3981	VOP_UNLOCK(vp, 0, td);
3982	if (error) {
3983		fdrop(fp, td);
3984		return (error);
3985	}
3986	if (SCARG(uap, count) == auio.uio_resid) {
3987		if (union_dircheckp) {
3988			error = union_dircheckp(td, &vp, fp);
3989			if (error == -1)
3990				goto unionread;
3991			if (error) {
3992				fdrop(fp, td);
3993				return (error);
3994			}
3995		}
3996		if ((vp->v_flag & VROOT) &&
3997		    (vp->v_mount->mnt_flag & MNT_UNION)) {
3998			struct vnode *tvp = vp;
3999			vp = vp->v_mount->mnt_vnodecovered;
4000			VREF(vp);
4001			fp->f_data = (caddr_t) vp;
4002			fp->f_offset = 0;
4003			vrele(tvp);
4004			goto unionread;
4005		}
4006	}
4007	if (SCARG(uap, basep) != NULL) {
4008		error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
4009		    sizeof(long));
4010	}
4011	td->td_retval[0] = SCARG(uap, count) - auio.uio_resid;
4012	fdrop(fp, td);
4013	return (error);
4014}
4015#ifndef _SYS_SYSPROTO_H_
4016struct getdents_args {
4017	int fd;
4018	char *buf;
4019	size_t count;
4020};
4021#endif
4022int
4023getdents(td, uap)
4024	struct thread *td;
4025	register struct getdents_args /* {
4026		syscallarg(int) fd;
4027		syscallarg(char *) buf;
4028		syscallarg(u_int) count;
4029	} */ *uap;
4030{
4031	struct getdirentries_args ap;
4032	ap.fd = uap->fd;
4033	ap.buf = uap->buf;
4034	ap.count = uap->count;
4035	ap.basep = NULL;
4036	return getdirentries(td, &ap);
4037}
4038
4039/*
4040 * Set the mode mask for creation of filesystem nodes.
4041 *
4042 * MP SAFE
4043 */
4044#ifndef _SYS_SYSPROTO_H_
4045struct umask_args {
4046	int	newmask;
4047};
4048#endif
4049int
4050umask(td, uap)
4051	struct thread *td;
4052	struct umask_args /* {
4053		syscallarg(int) newmask;
4054	} */ *uap;
4055{
4056	register struct filedesc *fdp;
4057
4058	FILEDESC_LOCK(td->td_proc->p_fd);
4059	fdp = td->td_proc->p_fd;
4060	td->td_retval[0] = fdp->fd_cmask;
4061	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
4062	FILEDESC_UNLOCK(td->td_proc->p_fd);
4063	return (0);
4064}
4065
4066/*
4067 * Void all references to file by ripping underlying filesystem
4068 * away from vnode.
4069 */
4070#ifndef _SYS_SYSPROTO_H_
4071struct revoke_args {
4072	char	*path;
4073};
4074#endif
4075/* ARGSUSED */
4076int
4077revoke(td, uap)
4078	struct thread *td;
4079	register struct revoke_args /* {
4080		syscallarg(char *) path;
4081	} */ *uap;
4082{
4083	struct mount *mp;
4084	struct vnode *vp;
4085	struct vattr vattr;
4086	int error;
4087	struct nameidata nd;
4088
4089	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path),
4090	    td);
4091	if ((error = namei(&nd)) != 0)
4092		return (error);
4093	vp = nd.ni_vp;
4094	NDFREE(&nd, NDF_ONLY_PNBUF);
4095	if (vp->v_type != VCHR) {
4096		vput(vp);
4097		return (EINVAL);
4098	}
4099	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
4100	if (error) {
4101		vput(vp);
4102		return (error);
4103	}
4104	VOP_UNLOCK(vp, 0, td);
4105	if (td->td_ucred->cr_uid != vattr.va_uid) {
4106		error = suser_cred(td->td_ucred, PRISON_ROOT);
4107		if (error)
4108			goto out;
4109	}
4110	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4111		goto out;
4112	if (vcount(vp) > 1)
4113		VOP_REVOKE(vp, REVOKEALL);
4114	vn_finished_write(mp);
4115out:
4116	vrele(vp);
4117	return (error);
4118}
4119
4120/*
4121 * Convert a user file descriptor to a kernel file entry.
4122 * The file entry is locked upon returning.
4123 */
4124int
4125getvnode(fdp, fd, fpp)
4126	struct filedesc *fdp;
4127	int fd;
4128	struct file **fpp;
4129{
4130	int error;
4131	struct file *fp;
4132
4133	fp = NULL;
4134	if (fdp == NULL)
4135		error = EBADF;
4136	else {
4137		FILEDESC_LOCK(fdp);
4138		if ((u_int)fd >= fdp->fd_nfiles ||
4139		    (fp = fdp->fd_ofiles[fd]) == NULL)
4140			error = EBADF;
4141		else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
4142			fp = NULL;
4143			error = EINVAL;
4144		} else {
4145			fhold(fp);
4146			error = 0;
4147		}
4148		FILEDESC_UNLOCK(fdp);
4149	}
4150	*fpp = fp;
4151	return (error);
4152}
4153/*
4154 * Get (NFS) file handle
4155 */
4156#ifndef _SYS_SYSPROTO_H_
4157struct getfh_args {
4158	char	*fname;
4159	fhandle_t *fhp;
4160};
4161#endif
4162int
4163getfh(td, uap)
4164	struct thread *td;
4165	register struct getfh_args *uap;
4166{
4167	struct nameidata nd;
4168	fhandle_t fh;
4169	register struct vnode *vp;
4170	int error;
4171
4172	/*
4173	 * Must be super user
4174	 */
4175	error = suser(td);
4176	if (error)
4177		return (error);
4178	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
4179	error = namei(&nd);
4180	if (error)
4181		return (error);
4182	NDFREE(&nd, NDF_ONLY_PNBUF);
4183	vp = nd.ni_vp;
4184	bzero(&fh, sizeof(fh));
4185	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4186	error = VFS_VPTOFH(vp, &fh.fh_fid);
4187	vput(vp);
4188	if (error)
4189		return (error);
4190	error = copyout(&fh, uap->fhp, sizeof (fh));
4191	return (error);
4192}
4193
4194/*
4195 * syscall for the rpc.lockd to use to translate a NFS file handle into
4196 * an open descriptor.
4197 *
4198 * warning: do not remove the suser() call or this becomes one giant
4199 * security hole.
4200 */
4201#ifndef _SYS_SYSPROTO_H_
4202struct fhopen_args {
4203	const struct fhandle *u_fhp;
4204	int flags;
4205};
4206#endif
4207int
4208fhopen(td, uap)
4209	struct thread *td;
4210	struct fhopen_args /* {
4211		syscallarg(const struct fhandle *) u_fhp;
4212		syscallarg(int) flags;
4213	} */ *uap;
4214{
4215	struct proc *p = td->td_proc;
4216	struct mount *mp;
4217	struct vnode *vp;
4218	struct fhandle fhp;
4219	struct vattr vat;
4220	struct vattr *vap = &vat;
4221	struct flock lf;
4222	struct file *fp;
4223	register struct filedesc *fdp = p->p_fd;
4224	int fmode, mode, error, type;
4225	struct file *nfp;
4226	int indx;
4227
4228	/*
4229	 * Must be super user
4230	 */
4231	error = suser(td);
4232	if (error)
4233		return (error);
4234
4235	fmode = FFLAGS(SCARG(uap, flags));
4236	/* why not allow a non-read/write open for our lockd? */
4237	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
4238		return (EINVAL);
4239	error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp));
4240	if (error)
4241		return(error);
4242	/* find the mount point */
4243	mp = vfs_getvfs(&fhp.fh_fsid);
4244	if (mp == NULL)
4245		return (ESTALE);
4246	/* now give me my vnode, it gets returned to me locked */
4247	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
4248	if (error)
4249		return (error);
4250 	/*
4251	 * from now on we have to make sure not
4252	 * to forget about the vnode
4253	 * any error that causes an abort must vput(vp)
4254	 * just set error = err and 'goto bad;'.
4255	 */
4256
4257	/*
4258	 * from vn_open
4259	 */
4260	if (vp->v_type == VLNK) {
4261		error = EMLINK;
4262		goto bad;
4263	}
4264	if (vp->v_type == VSOCK) {
4265		error = EOPNOTSUPP;
4266		goto bad;
4267	}
4268	mode = 0;
4269	if (fmode & (FWRITE | O_TRUNC)) {
4270		if (vp->v_type == VDIR) {
4271			error = EISDIR;
4272			goto bad;
4273		}
4274		error = vn_writechk(vp);
4275		if (error)
4276			goto bad;
4277		mode |= VWRITE;
4278	}
4279	if (fmode & FREAD)
4280		mode |= VREAD;
4281	if (mode) {
4282		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
4283		if (error)
4284			goto bad;
4285	}
4286	if (fmode & O_TRUNC) {
4287		VOP_UNLOCK(vp, 0, td);				/* XXX */
4288		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
4289			vrele(vp);
4290			return (error);
4291		}
4292		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4293		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
4294		VATTR_NULL(vap);
4295		vap->va_size = 0;
4296		error = VOP_SETATTR(vp, vap, td->td_ucred, td);
4297		vn_finished_write(mp);
4298		if (error)
4299			goto bad;
4300	}
4301	error = VOP_OPEN(vp, fmode, td->td_ucred, td);
4302	if (error)
4303		goto bad;
4304	/*
4305	 * Make sure that a VM object is created for VMIO support.
4306	 */
4307	if (vn_canvmio(vp) == TRUE) {
4308		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
4309			goto bad;
4310	}
4311	if (fmode & FWRITE)
4312		vp->v_writecount++;
4313
4314	/*
4315	 * end of vn_open code
4316	 */
4317
4318	if ((error = falloc(td, &nfp, &indx)) != 0) {
4319		if (fmode & FWRITE)
4320			vp->v_writecount--;
4321		goto bad;
4322	}
4323	fp = nfp;
4324
4325	/*
4326	 * Hold an extra reference to avoid having fp ripped out
4327	 * from under us while we block in the lock op
4328	 */
4329	fhold(fp);
4330	nfp->f_data = (caddr_t)vp;
4331	nfp->f_flag = fmode & FMASK;
4332	nfp->f_ops = &vnops;
4333	nfp->f_type = DTYPE_VNODE;
4334	if (fmode & (O_EXLOCK | O_SHLOCK)) {
4335		lf.l_whence = SEEK_SET;
4336		lf.l_start = 0;
4337		lf.l_len = 0;
4338		if (fmode & O_EXLOCK)
4339			lf.l_type = F_WRLCK;
4340		else
4341			lf.l_type = F_RDLCK;
4342		type = F_FLOCK;
4343		if ((fmode & FNONBLOCK) == 0)
4344			type |= F_WAIT;
4345		VOP_UNLOCK(vp, 0, td);
4346		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
4347			/*
4348			 * The lock request failed.  Normally close the
4349			 * descriptor but handle the case where someone might
4350			 * have dup()d or close()d it when we weren't looking.
4351			 */
4352			FILEDESC_LOCK(fdp);
4353			if (fdp->fd_ofiles[indx] == fp) {
4354				fdp->fd_ofiles[indx] = NULL;
4355				FILEDESC_UNLOCK(fdp);
4356				fdrop(fp, td);
4357			} else
4358				FILEDESC_UNLOCK(fdp);
4359			/*
4360			 * release our private reference
4361			 */
4362			fdrop(fp, td);
4363			return(error);
4364		}
4365		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4366		fp->f_flag |= FHASLOCK;
4367	}
4368	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
4369		vfs_object_create(vp, td, td->td_ucred);
4370
4371	VOP_UNLOCK(vp, 0, td);
4372	fdrop(fp, td);
4373	td->td_retval[0] = indx;
4374	return (0);
4375
4376bad:
4377	vput(vp);
4378	return (error);
4379}
4380
4381/*
4382 * Stat an (NFS) file handle.
4383 */
4384#ifndef _SYS_SYSPROTO_H_
4385struct fhstat_args {
4386	struct fhandle *u_fhp;
4387	struct stat *sb;
4388};
4389#endif
4390int
4391fhstat(td, uap)
4392	struct thread *td;
4393	register struct fhstat_args /* {
4394		syscallarg(struct fhandle *) u_fhp;
4395		syscallarg(struct stat *) sb;
4396	} */ *uap;
4397{
4398	struct stat sb;
4399	fhandle_t fh;
4400	struct mount *mp;
4401	struct vnode *vp;
4402	int error;
4403
4404	/*
4405	 * Must be super user
4406	 */
4407	error = suser(td);
4408	if (error)
4409		return (error);
4410
4411	error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t));
4412	if (error)
4413		return (error);
4414
4415	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4416		return (ESTALE);
4417	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4418		return (error);
4419	error = vn_stat(vp, &sb, td);
4420	vput(vp);
4421	if (error)
4422		return (error);
4423	error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
4424	return (error);
4425}
4426
4427/*
4428 * Implement fstatfs() for (NFS) file handles.
4429 */
4430#ifndef _SYS_SYSPROTO_H_
4431struct fhstatfs_args {
4432	struct fhandle *u_fhp;
4433	struct statfs *buf;
4434};
4435#endif
4436int
4437fhstatfs(td, uap)
4438	struct thread *td;
4439	struct fhstatfs_args /* {
4440		syscallarg(struct fhandle) *u_fhp;
4441		syscallarg(struct statfs) *buf;
4442	} */ *uap;
4443{
4444	struct statfs *sp;
4445	struct mount *mp;
4446	struct vnode *vp;
4447	struct statfs sb;
4448	fhandle_t fh;
4449	int error;
4450
4451	/*
4452	 * Must be super user
4453	 */
4454	error = suser(td);
4455	if (error)
4456		return (error);
4457
4458	if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0)
4459		return (error);
4460
4461	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
4462		return (ESTALE);
4463	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
4464		return (error);
4465	mp = vp->v_mount;
4466	sp = &mp->mnt_stat;
4467	vput(vp);
4468	if ((error = VFS_STATFS(mp, sp, td)) != 0)
4469		return (error);
4470	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
4471	if (suser(td)) {
4472		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
4473		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
4474		sp = &sb;
4475	}
4476	return (copyout(sp, SCARG(uap, buf), sizeof(*sp)));
4477}
4478
4479/*
4480 * Syscall to push extended attribute configuration information into the
4481 * VFS.  Accepts a path, which it converts to a mountpoint, as well as
4482 * a command (int cmd), and attribute name and misc data.  For now, the
4483 * attribute name is left in userspace for consumption by the VFS_op.
4484 * It will probably be changed to be copied into sysspace by the
4485 * syscall in the future, once issues with various consumers of the
4486 * attribute code have raised their hands.
4487 *
4488 * Currently this is used only by UFS Extended Attributes.
4489 */
4490int
4491extattrctl(td, uap)
4492	struct thread *td;
4493	struct extattrctl_args /* {
4494		syscallarg(const char *) path;
4495		syscallarg(int) cmd;
4496		syscallarg(const char *) filename;
4497		syscallarg(int) attrnamespace;
4498		syscallarg(const char *) attrname;
4499	} */ *uap;
4500{
4501	struct vnode *filename_vp;
4502	struct nameidata nd;
4503	struct mount *mp, *mp_writable;
4504	char attrname[EXTATTR_MAXNAMELEN];
4505	int error;
4506
4507	/*
4508	 * uap->attrname is not always defined.  We check again later when we
4509	 * invoke the VFS call so as to pass in NULL there if needed.
4510	 */
4511	if (uap->attrname != NULL) {
4512		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
4513		    NULL);
4514		if (error)
4515			return (error);
4516	}
4517
4518	/*
4519	 * uap->filename is not always defined.  If it is, grab a vnode lock,
4520	 * which VFS_EXTATTRCTL() will later release.
4521	 */
4522	filename_vp = NULL;
4523	if (uap->filename != NULL) {
4524		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
4525		    uap->filename, td);
4526		if ((error = namei(&nd)) != 0)
4527			return (error);
4528		filename_vp = nd.ni_vp;
4529		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
4530	}
4531
4532	/* uap->path is always defined. */
4533	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4534	if ((error = namei(&nd)) != 0) {
4535		if (filename_vp != NULL)
4536			vput(filename_vp);
4537		return (error);
4538	}
4539	mp = nd.ni_vp->v_mount;
4540	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
4541	NDFREE(&nd, 0);
4542	if (error) {
4543		if (filename_vp != NULL)
4544			vput(filename_vp);
4545		return (error);
4546	}
4547
4548	if (uap->attrname != NULL) {
4549		error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp,
4550		    uap->attrnamespace, attrname, td);
4551	} else {
4552		error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp,
4553		    uap->attrnamespace, NULL, td);
4554	}
4555
4556	vn_finished_write(mp_writable);
4557	/*
4558	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
4559	 * filename_vp, so vrele it if it is defined.
4560	 */
4561	if (filename_vp != NULL)
4562		vrele(filename_vp);
4563
4564	return (error);
4565}
4566
4567/*-
4568 * Set a named extended attribute on a file or directory
4569 *
4570 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4571 *            kernelspace string pointer "attrname", userspace buffer
4572 *            pointer "data", buffer length "nbytes", thread "td".
4573 * Returns: 0 on success, an error number otherwise
4574 * Locks: none
4575 * References: vp must be a valid reference for the duration of the call
4576 */
4577static int
4578extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4579    void *data, size_t nbytes, struct thread *td)
4580{
4581	struct mount *mp;
4582	struct uio auio;
4583	struct iovec aiov;
4584	ssize_t cnt;
4585	int error;
4586
4587	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4588		return (error);
4589	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4590	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4591
4592	aiov.iov_base = data;
4593	aiov.iov_len = nbytes;
4594	auio.uio_iov = &aiov;
4595	auio.uio_iovcnt = 1;
4596	auio.uio_offset = 0;
4597	if (nbytes > INT_MAX) {
4598		error = EINVAL;
4599		goto done;
4600	}
4601	auio.uio_resid = nbytes;
4602	auio.uio_rw = UIO_WRITE;
4603	auio.uio_segflg = UIO_USERSPACE;
4604	auio.uio_td = td;
4605	cnt = nbytes;
4606
4607	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
4608	    td->td_ucred, td);
4609	cnt -= auio.uio_resid;
4610	td->td_retval[0] = cnt;
4611
4612done:
4613	VOP_UNLOCK(vp, 0, td);
4614	vn_finished_write(mp);
4615	return (error);
4616}
4617
4618int
4619extattr_set_file(td, uap)
4620	struct thread *td;
4621	struct extattr_set_file_args /* {
4622		syscallarg(const char *) path;
4623		syscallarg(int) attrnamespace;
4624		syscallarg(const char *) attrname;
4625		syscallarg(void *) data;
4626		syscallarg(size_t) nbytes;
4627	} */ *uap;
4628{
4629	struct nameidata nd;
4630	char attrname[EXTATTR_MAXNAMELEN];
4631	int error;
4632
4633	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4634	if (error)
4635		return (error);
4636
4637	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4638	if ((error = namei(&nd)) != 0)
4639		return (error);
4640	NDFREE(&nd, NDF_ONLY_PNBUF);
4641
4642	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
4643	    uap->data, uap->nbytes, td);
4644
4645	vrele(nd.ni_vp);
4646	return (error);
4647}
4648
4649int
4650extattr_set_fd(td, uap)
4651	struct thread *td;
4652	struct extattr_set_fd_args /* {
4653		syscallarg(int) fd;
4654		syscallarg(int) attrnamespace;
4655		syscallarg(const char *) attrname;
4656		syscallarg(void *) data;
4657		syscallarg(size_t) nbytes;
4658	} */ *uap;
4659{
4660	struct file *fp;
4661	char attrname[EXTATTR_MAXNAMELEN];
4662	int error;
4663
4664	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4665	if (error)
4666		return (error);
4667
4668	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
4669		return (error);
4670
4671	error = extattr_set_vp((struct vnode *)fp->f_data, uap->attrnamespace,
4672	    attrname, uap->data, uap->nbytes, td);
4673	fdrop(fp, td);
4674
4675	return (error);
4676}
4677
4678/*-
4679 * Get a named extended attribute on a file or directory
4680 *
4681 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4682 *            kernelspace string pointer "attrname", userspace buffer
4683 *            pointer "data", buffer length "nbytes", thread "td".
4684 * Returns: 0 on success, an error number otherwise
4685 * Locks: none
4686 * References: vp must be a valid reference for the duration of the call
4687 */
4688static int
4689extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4690    void *data, size_t nbytes, struct thread *td)
4691{
4692	struct uio auio;
4693	struct iovec aiov;
4694	ssize_t cnt;
4695	size_t size;
4696	int error;
4697
4698	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
4699	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4700
4701	/*
4702	 * Slightly unusual semantics: if the user provides a NULL data
4703	 * pointer, they don't want to receive the data, just the
4704	 * maximum read length.
4705	 */
4706	if (data != NULL) {
4707		aiov.iov_base = data;
4708		aiov.iov_len = nbytes;
4709		auio.uio_iov = &aiov;
4710		auio.uio_offset = 0;
4711		if (nbytes > INT_MAX) {
4712			error = EINVAL;
4713			goto done;
4714		}
4715		auio.uio_resid = nbytes;
4716		auio.uio_rw = UIO_READ;
4717		auio.uio_segflg = UIO_USERSPACE;
4718		auio.uio_td = td;
4719		cnt = nbytes;
4720		error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio,
4721		    NULL, td->td_ucred, td);
4722		cnt -= auio.uio_resid;
4723		td->td_retval[0] = cnt;
4724	} else {
4725		error = VOP_GETEXTATTR(vp, attrnamespace, attrname, NULL,
4726		    &size, td->td_ucred, td);
4727		td->td_retval[0] = size;
4728	}
4729done:
4730	VOP_UNLOCK(vp, 0, td);
4731	return (error);
4732}
4733
4734int
4735extattr_get_file(td, uap)
4736	struct thread *td;
4737	struct extattr_get_file_args /* {
4738		syscallarg(const char *) path;
4739		syscallarg(int) attrnamespace;
4740		syscallarg(const char *) attrname;
4741		syscallarg(void *) data;
4742		syscallarg(size_t) nbytes;
4743	} */ *uap;
4744{
4745	struct nameidata nd;
4746	char attrname[EXTATTR_MAXNAMELEN];
4747	int error;
4748
4749	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4750	if (error)
4751		return (error);
4752
4753	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4754	if ((error = namei(&nd)) != 0)
4755		return (error);
4756	NDFREE(&nd, NDF_ONLY_PNBUF);
4757
4758	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
4759	    uap->data, uap->nbytes, td);
4760
4761	vrele(nd.ni_vp);
4762	return (error);
4763}
4764
4765int
4766extattr_get_fd(td, uap)
4767	struct thread *td;
4768	struct extattr_get_fd_args /* {
4769		syscallarg(int) fd;
4770		syscallarg(int) attrnamespace;
4771		syscallarg(const char *) attrname;
4772		syscallarg(void *) data;
4773		syscallarg(size_t) nbytes;
4774	} */ *uap;
4775{
4776	struct file *fp;
4777	char attrname[EXTATTR_MAXNAMELEN];
4778	int error;
4779
4780	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4781	if (error)
4782		return (error);
4783
4784	if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
4785		return (error);
4786
4787	error = extattr_get_vp((struct vnode *)fp->f_data, uap->attrnamespace,
4788	    attrname, uap->data, uap->nbytes, td);
4789
4790	fdrop(fp, td);
4791	return (error);
4792}
4793
4794/*
4795 * extattr_delete_vp(): Delete a named extended attribute on a file or
4796 *                      directory
4797 *
4798 * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
4799 *            kernelspace string pointer "attrname", proc "p"
4800 * Returns: 0 on success, an error number otherwise
4801 * Locks: none
4802 * References: vp must be a valid reference for the duration of the call
4803 */
4804static int
4805extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
4806    struct thread *td)
4807{
4808	struct mount *mp;
4809	int error;
4810
4811	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
4812		return (error);
4813	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
4814	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
4815
4816	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred,
4817	    td);
4818
4819	VOP_UNLOCK(vp, 0, td);
4820	vn_finished_write(mp);
4821	return (error);
4822}
4823
4824int
4825extattr_delete_file(td, uap)
4826	struct thread *td;
4827	struct extattr_delete_file_args /* {
4828		syscallarg(const char *) path;
4829		syscallarg(int) attrnamespace;
4830		syscallarg(const char *) attrname;
4831	} */ *uap;
4832{
4833	struct nameidata nd;
4834	char attrname[EXTATTR_MAXNAMELEN];
4835	int error;
4836
4837	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4838	if (error)
4839		return(error);
4840
4841	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
4842	if ((error = namei(&nd)) != 0)
4843		return(error);
4844	NDFREE(&nd, NDF_ONLY_PNBUF);
4845
4846	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
4847
4848	vrele(nd.ni_vp);
4849	return(error);
4850}
4851
4852int
4853extattr_delete_fd(td, uap)
4854	struct thread *td;
4855	struct extattr_delete_fd_args /* {
4856		syscallarg(int) fd;
4857		syscallarg(int) attrnamespace;
4858		syscallarg(const char *) attrname;
4859	} */ *uap;
4860{
4861	struct file *fp;
4862	struct vnode *vp;
4863	char attrname[EXTATTR_MAXNAMELEN];
4864	int error;
4865
4866	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
4867	if (error)
4868		return (error);
4869
4870	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
4871		return (error);
4872	vp = (struct vnode *)fp->f_data;
4873
4874	error = extattr_delete_vp((struct vnode *)fp->f_data,
4875	    uap->attrnamespace, attrname, td);
4876
4877	fdrop(fp, td);
4878	return (error);
4879}
4880