1/*	$NetBSD: nfs_serv.c,v 1.163.2.1 2012/09/03 18:57:11 riz Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)nfs_serv.c	8.8 (Berkeley) 7/31/95
35 */
36
37/*
38 * nfs version 2 and 3 server calls to vnode ops
39 * - these routines generally have 3 phases
40 *   1 - break down and validate rpc request in mbuf list
41 *   2 - do the vnode ops for the request
42 *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
43 *   3 - build the rpc reply in an mbuf list
44 *   nb:
45 *	- do not mix the phases, since the nfsm_?? macros can return failures
46 *	  on a bad rpc or similar and do not do any vrele() or vput()'s
47 *
48 *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
49 *	error number iff error != 0 whereas
50 *	returning an error from the server function implies a fatal error
51 *	such as a badly constructed rpc request that should be dropped without
52 *	a reply.
53 *	For Version 3, nfsm_reply() does not return for the error case, since
54 *	most version 3 rpcs return more than the status for error cases.
55 */
56
57#include <sys/cdefs.h>
58__KERNEL_RCSID(0, "$NetBSD: nfs_serv.c,v 1.163.2.1 2012/09/03 18:57:11 riz Exp $");
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/proc.h>
63#include <sys/file.h>
64#include <sys/namei.h>
65#include <sys/vnode.h>
66#include <sys/mount.h>
67#include <sys/socket.h>
68#include <sys/socketvar.h>
69#include <sys/mbuf.h>
70#include <sys/dirent.h>
71#include <sys/stat.h>
72#include <sys/kernel.h>
73#include <sys/hash.h>
74#include <sys/kauth.h>
75#include <sys/module.h>
76#include <sys/syscall.h>
77#include <sys/syscallargs.h>
78#include <sys/syscallvar.h>
79
80#include <uvm/uvm.h>
81
82#include <nfs/nfsproto.h>
83#include <nfs/rpcv2.h>
84#include <nfs/nfs.h>
85#include <nfs/xdr_subs.h>
86#include <nfs/nfsm_subs.h>
87#include <nfs/nfs_var.h>
88
89MODULE(MODULE_CLASS_MISC, nfsserver, "nfs");
90
91/* Global vars */
92extern u_int32_t nfs_xdrneg1;
93extern u_int32_t nfs_false, nfs_true;
94extern const enum vtype nv3tov_type[8];
95extern struct nfsstats nfsstats;
96extern const nfstype nfsv2_type[9];
97extern const nfstype nfsv3_type[9];
98int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
99bool nfsd_use_loan = true;	/* use page-loan for READ OP */
100
101#define	nqsrv_getl(vp, rw)	/* nothing */
102
103static const struct syscall_package nfsserver_syscalls[] = {
104	{ SYS_nfssvc, 0, (sy_call_t *)sys_nfssvc },
105	{ 0, 0, NULL },
106};
107
108static int
109nfsserver_modcmd(modcmd_t cmd, void *arg)
110{
111	extern struct vfs_hooks nfs_export_hooks;	/* XXX */
112	int error;
113
114	switch (cmd) {
115	case MODULE_CMD_INIT:
116		error = syscall_establish(NULL, nfsserver_syscalls);
117		if (error != 0) {
118			return error;
119		}
120		nfs_init();	/* XXX for monolithic kernel */
121		netexport_init();
122		nfsrv_initcache();	/* Init the server request cache */
123		nfsrv_init(0);		/* Init server data structures */
124		vfs_hooks_attach(&nfs_export_hooks);
125		nfs_timer_srvinit(nfsrv_timer);
126		return 0;
127	case MODULE_CMD_FINI:
128		error = syscall_disestablish(NULL, nfsserver_syscalls);
129		if (error != 0) {
130			return error;
131		}
132		/*
133		 * Kill export list before detaching VFS hooks, so we
134		 * we don't leak state due to a concurrent umount().
135		 */
136		netexport_fini();
137		vfs_hooks_detach(&nfs_export_hooks);
138
139		/* Kill timer before server goes away. */
140		nfs_timer_srvfini();
141		nfsrv_fini();
142
143		/* Server uses server cache, so kill cache last. */
144		nfsrv_finicache();
145		nfs_fini();
146		return 0;
147	default:
148		return ENOTTY;
149	}
150}
151
152/*
153 * nfs v3 access service
154 */
155int
156nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
157{
158	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
159	struct mbuf *nam = nfsd->nd_nam;
160	char *dpos = nfsd->nd_dpos;
161	kauth_cred_t cred = nfsd->nd_cr;
162	struct vnode *vp;
163	nfsrvfh_t nsfh;
164	u_int32_t *tl;
165	int32_t t1;
166	char *bpos;
167	int error = 0, rdonly, cache = 0, getret;
168	char *cp2;
169	struct mbuf *mb, *mreq;
170	struct vattr va;
171	u_long inmode, testmode, outmode;
172	u_quad_t frev;
173
174	nfsm_srvmtofh(&nsfh);
175	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
176	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam, &rdonly,
177	    (nfsd->nd_flag & ND_KERBAUTH), false);
178	if (error) {
179		nfsm_reply(NFSX_UNSIGNED);
180		nfsm_srvpostop_attr(1, (struct vattr *)0);
181		return (0);
182	}
183	inmode = fxdr_unsigned(u_int32_t, *tl);
184	outmode = 0;
185	if ((inmode & NFSV3ACCESS_READ) &&
186	    nfsrv_access(vp, VREAD, cred, rdonly, lwp, 0) == 0)
187		outmode |= NFSV3ACCESS_READ;
188	if (vp->v_type != VDIR) {
189		testmode = inmode & (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
190		if (testmode &&
191		    nfsrv_access(vp, VWRITE, cred, rdonly, lwp, 0) == 0)
192			outmode |= testmode;
193		if ((inmode & NFSV3ACCESS_EXECUTE) &&
194		    nfsrv_access(vp, VEXEC, cred, rdonly, lwp, 0) == 0)
195			outmode |= NFSV3ACCESS_EXECUTE;
196	} else {
197		testmode = inmode & (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
198		    NFSV3ACCESS_DELETE);
199		if (testmode &&
200		    nfsrv_access(vp, VWRITE, cred, rdonly, lwp, 0) == 0)
201			outmode |= testmode;
202		if ((inmode & NFSV3ACCESS_LOOKUP) &&
203		    nfsrv_access(vp, VEXEC, cred, rdonly, lwp, 0) == 0)
204			outmode |= NFSV3ACCESS_LOOKUP;
205	}
206	getret = VOP_GETATTR(vp, &va, cred);
207	vput(vp);
208	nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
209	nfsm_srvpostop_attr(getret, &va);
210	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
211	*tl = txdr_unsigned(outmode);
212	nfsm_srvdone;
213}
214
215/*
216 * nfs getattr service
217 */
218int
219nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
220{
221	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
222	struct mbuf *nam = nfsd->nd_nam;
223	char *dpos = nfsd->nd_dpos;
224	kauth_cred_t cred = nfsd->nd_cr;
225	struct nfs_fattr *fp;
226	struct vattr va;
227	struct vnode *vp;
228	nfsrvfh_t nsfh;
229	u_int32_t *tl;
230	int32_t t1;
231	char *bpos;
232	int error = 0, rdonly, cache = 0;
233	char *cp2;
234	struct mbuf *mb, *mreq;
235	u_quad_t frev;
236
237	nfsm_srvmtofh(&nsfh);
238	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam, &rdonly,
239	    (nfsd->nd_flag & ND_KERBAUTH), false);
240	if (error) {
241		nfsm_reply(0);
242		return (0);
243	}
244	nqsrv_getl(vp, ND_READ);
245	error = VOP_GETATTR(vp, &va, cred);
246	vput(vp);
247	nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
248	if (error)
249		return (0);
250	nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
251	nfsm_srvfillattr(&va, fp);
252	nfsm_srvdone;
253}
254
255/*
256 * nfs setattr service
257 */
258int
259nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
260{
261	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
262	struct mbuf *nam = nfsd->nd_nam;
263	char *dpos = nfsd->nd_dpos;
264	kauth_cred_t cred = nfsd->nd_cr;
265	struct vattr va, preat;
266	struct nfsv2_sattr *sp;
267	struct nfs_fattr *fp;
268	struct vnode *vp;
269	nfsrvfh_t nsfh;
270	u_int32_t *tl;
271	int32_t t1;
272	char *bpos;
273	int error = 0, rdonly, cache = 0, preat_ret = 1, postat_ret = 1;
274	int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
275	char *cp2;
276	struct mbuf *mb, *mreq;
277	u_quad_t frev;
278	struct timespec guard;
279
280	memset(&guard, 0, sizeof guard);	/* XXX gcc */
281
282	nfsm_srvmtofh(&nsfh);
283	vattr_null(&va);
284	if (v3) {
285		nfsm_srvsattr(&va);
286		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
287		gcheck = fxdr_unsigned(int, *tl);
288		if (gcheck) {
289			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
290			fxdr_nfsv3time(tl, &guard);
291		}
292	} else {
293		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
294		/*
295		 * Nah nah nah nah na nah
296		 * There is a bug in the Sun client that puts 0xffff in the mode
297		 * field of sattr when it should put in 0xffffffff. The u_short
298		 * doesn't sign extend.
299		 * --> check the low order 2 bytes for 0xffff
300		 */
301		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
302			va.va_mode = nfstov_mode(sp->sa_mode);
303		if (sp->sa_uid != nfs_xdrneg1)
304			va.va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
305		if (sp->sa_gid != nfs_xdrneg1)
306			va.va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
307		if (sp->sa_size != nfs_xdrneg1)
308			va.va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
309		if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
310#ifdef notyet
311			fxdr_nfsv2time(&sp->sa_atime, &va.va_atime);
312#else
313			va.va_atime.tv_sec =
314				fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
315			va.va_atime.tv_nsec = 0;
316#endif
317		}
318		if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
319			fxdr_nfsv2time(&sp->sa_mtime, &va.va_mtime);
320
321	}
322
323	/*
324	 * Now that we have all the fields, lets do it.
325	 */
326	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam, &rdonly,
327	    (nfsd->nd_flag & ND_KERBAUTH), false);
328	if (error) {
329		nfsm_reply(2 * NFSX_UNSIGNED);
330		nfsm_srvwcc_data(preat_ret, &preat, postat_ret, &va);
331		return (0);
332	}
333	nqsrv_getl(vp, ND_WRITE);
334	if (v3) {
335		error = preat_ret = VOP_GETATTR(vp, &preat, cred);
336		if (!error && gcheck &&
337			(preat.va_ctime.tv_sec != guard.tv_sec ||
338			 preat.va_ctime.tv_nsec != guard.tv_nsec))
339			error = NFSERR_NOT_SYNC;
340		if (error) {
341			vput(vp);
342			nfsm_reply(NFSX_WCCDATA(v3));
343			nfsm_srvwcc_data(preat_ret, &preat, postat_ret, &va);
344			return (0);
345		}
346	}
347
348	/*
349	 * If the size is being changed write acces is required, otherwise
350	 * just check for a read only file system.
351	 */
352	if (va.va_size == ((u_quad_t)((quad_t) -1))) {
353		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
354			error = EROFS;
355			goto out;
356		}
357	} else {
358		if (vp->v_type == VDIR) {
359			error = EISDIR;
360			goto out;
361		} else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
362			lwp, 0)) != 0)
363			goto out;
364	}
365	error = VOP_SETATTR(vp, &va, cred);
366	postat_ret = VOP_GETATTR(vp, &va, cred);
367	if (!error)
368		error = postat_ret;
369out:
370	vput(vp);
371	nfsm_reply(NFSX_WCCORFATTR(v3));
372	if (v3) {
373		nfsm_srvwcc_data(preat_ret, &preat, postat_ret, &va);
374		return (0);
375	} else {
376		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
377		nfsm_srvfillattr(&va, fp);
378	}
379	nfsm_srvdone;
380}
381
382/*
383 * nfs lookup rpc
384 */
385int
386nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
387{
388	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
389	struct mbuf *nam = nfsd->nd_nam;
390	char *dpos = nfsd->nd_dpos;
391	kauth_cred_t cred = nfsd->nd_cr;
392	struct nfs_fattr *fp;
393	struct nameidata nd, ind, *ndp = &nd;
394	struct pathbuf *ipb = NULL;
395	struct vnode *vp, *dirp;
396	nfsrvfh_t nsfh;
397	char *cp;
398	u_int32_t *tl;
399	int32_t t1;
400	char *bpos;
401	int error = 0, cache = 0, dirattr_ret = 1;
402	uint32_t len;
403	int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
404	char *cp2;
405	struct mbuf *mb, *mreq;
406	struct vattr va, dirattr;
407	u_quad_t frev;
408
409	nfsm_srvmtofh(&nsfh);
410	nfsm_srvnamesiz(len);
411
412	pubflag = nfs_ispublicfh(&nsfh);
413
414	nd.ni_cnd.cn_cred = cred;
415	nd.ni_cnd.cn_nameiop = LOOKUP;
416	nd.ni_cnd.cn_flags = LOCKLEAF;
417	error = nfs_namei(&nd, &nsfh, len, slp, nam, &md, &dpos,
418		&dirp, lwp, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
419
420	if (!error && pubflag) {
421		if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL &&
422		    (ipb = pathbuf_create(nfs_pub.np_index)) != NULL) {
423			/*
424			 * Setup call to lookup() to see if we can find
425			 * the index file. Arguably, this doesn't belong
426			 * in a kernel.. Ugh.
427			 */
428			ind = nd;
429			VOP_UNLOCK(nd.ni_vp);
430			ind.ni_pathbuf = ipb;
431
432			error = lookup_for_nfsd_index(&ind, nd.ni_vp);
433			if (!error) {
434				/*
435				 * Found an index file. Get rid of
436				 * the old references.
437				 */
438				if (dirp)
439					vrele(dirp);
440				dirp = nd.ni_vp;
441				ndp = &ind;
442			} else
443				error = 0;
444		}
445		/*
446		 * If the public filehandle was used, check that this lookup
447		 * didn't result in a filehandle outside the publicly exported
448		 * filesystem.
449		 */
450
451		if (!error && ndp->ni_vp->v_mount != nfs_pub.np_mount) {
452			vput(nd.ni_vp);
453			error = EPERM;
454		}
455	}
456
457	if (error) {
458		if (nd.ni_pathbuf != NULL) {
459			pathbuf_destroy(nd.ni_pathbuf);
460		}
461		if (ipb != NULL) {
462			pathbuf_destroy(ipb);
463		}
464		if (dirp) {
465			if (v3) {
466				vn_lock(dirp, LK_SHARED | LK_RETRY);
467				dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred);
468				vput(dirp);
469			} else
470				vrele(dirp);
471		}
472		nfsm_reply(NFSX_POSTOPATTR(v3));
473		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
474		return (0);
475	}
476
477	nqsrv_getl(ndp->ni_startdir, ND_READ);
478	pathbuf_destroy(nd.ni_pathbuf);
479	if (ipb != NULL) {
480		pathbuf_destroy(ipb);
481	}
482	vp = ndp->ni_vp;
483	error = nfsrv_composefh(vp, &nsfh, v3);
484	if (!error)
485		error = VOP_GETATTR(vp, &va, cred);
486	vput(vp);
487	if (dirp) {
488		if (v3) {
489			vn_lock(dirp, LK_SHARED | LK_RETRY);
490			dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred);
491			vput(dirp);
492		} else
493			vrele(dirp);
494	}
495	nfsm_reply(NFSX_SRVFH(&nsfh, v3) + NFSX_POSTOPORFATTR(v3) +
496	    NFSX_POSTOPATTR(v3));
497	if (error) {
498		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
499		return (0);
500	}
501	nfsm_srvfhtom(&nsfh, v3);
502	if (v3) {
503		nfsm_srvpostop_attr(0, &va);
504		nfsm_srvpostop_attr(dirattr_ret, &dirattr);
505	} else {
506		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
507		nfsm_srvfillattr(&va, fp);
508	}
509	nfsm_srvdone;
510}
511
512/*
513 * nfs readlink service
514 */
515int
516nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
517{
518	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
519	struct mbuf *nam = nfsd->nd_nam;
520	char *dpos = nfsd->nd_dpos;
521	kauth_cred_t cred = nfsd->nd_cr;
522	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
523	struct iovec *ivp = iv;
524	struct mbuf *mp;
525	u_int32_t *tl;
526	int32_t t1;
527	char *bpos;
528	int error = 0, rdonly, cache = 0, i, padlen, getret;
529	uint32_t len;
530	int v3 = (nfsd->nd_flag & ND_NFSV3);
531	char *cp2;
532	struct mbuf *mb, *mp2 = NULL, *mp3 = NULL, *mreq;
533	struct vnode *vp;
534	struct vattr attr;
535	nfsrvfh_t nsfh;
536	struct uio io, *uiop = &io;
537	u_quad_t frev;
538
539	nfsm_srvmtofh(&nsfh);
540	len = 0;
541	i = 0;
542	while (len < NFS_MAXPATHLEN) {
543		mp = m_get(M_WAIT, MT_DATA);
544		MCLAIM(mp, &nfs_mowner);
545		m_clget(mp, M_WAIT);
546		mp->m_len = NFSMSIZ(mp);
547		if (len == 0)
548			mp3 = mp2 = mp;
549		else {
550			mp2->m_next = mp;
551			mp2 = mp;
552		}
553		if ((len+mp->m_len) > NFS_MAXPATHLEN) {
554			mp->m_len = NFS_MAXPATHLEN-len;
555			len = NFS_MAXPATHLEN;
556		} else
557			len += mp->m_len;
558		ivp->iov_base = mtod(mp, void *);
559		ivp->iov_len = mp->m_len;
560		i++;
561		ivp++;
562	}
563	uiop->uio_iov = iv;
564	uiop->uio_iovcnt = i;
565	uiop->uio_offset = 0;
566	uiop->uio_resid = len;
567	uiop->uio_rw = UIO_READ;
568	UIO_SETUP_SYSSPACE(uiop);
569	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam,
570		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), false);
571	if (error) {
572		m_freem(mp3);
573		nfsm_reply(2 * NFSX_UNSIGNED);
574		nfsm_srvpostop_attr(1, (struct vattr *)0);
575		return (0);
576	}
577	if (vp->v_type != VLNK) {
578		if (v3)
579			error = EINVAL;
580		else
581			error = ENXIO;
582		goto out;
583	}
584	nqsrv_getl(vp, ND_READ);
585	error = VOP_READLINK(vp, uiop, cred);
586out:
587	getret = VOP_GETATTR(vp, &attr, cred);
588	vput(vp);
589	if (error)
590		m_freem(mp3);
591	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
592	if (v3) {
593		nfsm_srvpostop_attr(getret, &attr);
594		if (error)
595			return (0);
596	}
597	len -= uiop->uio_resid;
598	padlen = nfsm_padlen(len);
599	if (uiop->uio_resid || padlen)
600		nfs_zeropad(mp3, uiop->uio_resid, padlen);
601	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
602	*tl = txdr_unsigned(len);
603	mb->m_next = mp3;
604	nfsm_srvdone;
605}
606
607/*
608 * nfs read service
609 */
610int
611nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
612{
613	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
614	struct mbuf *nam = nfsd->nd_nam;
615	char *dpos = nfsd->nd_dpos;
616	kauth_cred_t cred = nfsd->nd_cr;
617	struct mbuf *m;
618	struct nfs_fattr *fp;
619	u_int32_t *tl;
620	int32_t t1;
621	int i;
622	char *bpos;
623	int error = 0, rdonly, cache = 0, getret;
624	int v3 = (nfsd->nd_flag & ND_NFSV3);
625	uint32_t reqlen, len, cnt, left;
626	int padlen;
627	char *cp2;
628	struct mbuf *mb, *mreq;
629	struct vnode *vp;
630	nfsrvfh_t nsfh;
631	struct uio io, *uiop = &io;
632	struct vattr va;
633	off_t off;
634	u_quad_t frev;
635
636	nfsm_srvmtofh(&nsfh);
637	if (v3) {
638		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
639		off = fxdr_hyper(tl);
640	} else {
641		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
642		off = (off_t)fxdr_unsigned(u_int32_t, *tl);
643	}
644	nfsm_dissect(tl, uint32_t *, NFSX_UNSIGNED);
645	reqlen = fxdr_unsigned(uint32_t, *tl);
646	reqlen = MIN(reqlen, NFS_SRVMAXDATA(nfsd));
647	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam,
648		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), false);
649	if (error) {
650		nfsm_reply(2 * NFSX_UNSIGNED);
651		nfsm_srvpostop_attr(1, (struct vattr *)0);
652		return (0);
653	}
654	if (vp->v_type != VREG) {
655		if (v3)
656			error = EINVAL;
657		else
658			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
659	}
660	if (!error) {
661	    nqsrv_getl(vp, ND_READ);
662	    if ((error = nfsrv_access(vp, VREAD, cred, rdonly, lwp, 1)) != 0)
663		error = nfsrv_access(vp, VEXEC, cred, rdonly, lwp, 1);
664	}
665	getret = VOP_GETATTR(vp, &va, cred);
666	if (!error)
667		error = getret;
668	if (error) {
669		vput(vp);
670		nfsm_reply(NFSX_POSTOPATTR(v3));
671		nfsm_srvpostop_attr(getret, &va);
672		return (0);
673	}
674	if (off >= va.va_size)
675		cnt = 0;
676	else if ((off + reqlen) > va.va_size)
677		cnt = va.va_size - off;
678	else
679		cnt = reqlen;
680	nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
681	if (v3) {
682		nfsm_build(tl, u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
683		*tl++ = nfs_true;
684		fp = (struct nfs_fattr *)tl;
685		tl += (NFSX_V3FATTR / sizeof (u_int32_t));
686	} else {
687		nfsm_build(tl, u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
688		fp = (struct nfs_fattr *)tl;
689		tl += (NFSX_V2FATTR / sizeof (u_int32_t));
690	}
691	len = left = cnt;
692	if (cnt > 0) {
693		if (nfsd_use_loan) {
694			struct vm_page **pgpp;
695			voff_t pgoff = trunc_page(off);
696			int npages;
697			vaddr_t lva;
698
699			npages = (round_page(off + cnt) - pgoff) >> PAGE_SHIFT;
700			KASSERT(npages <= M_EXT_MAXPAGES); /* XXX */
701
702			/* allocate kva for mbuf data */
703			lva = sokvaalloc(pgoff, npages << PAGE_SHIFT,
704			    slp->ns_so);
705			if (lva == 0) {
706				/* fall back to VOP_READ */
707				goto loan_fail;
708			}
709
710			/* allocate mbuf */
711			m = m_get(M_WAIT, MT_DATA);
712			MCLAIM(m, &nfs_mowner);
713			pgpp = m->m_ext.ext_pgs;
714
715			/* loan pages */
716			error = uvm_loanuobjpages(&vp->v_uobj, pgoff, npages,
717			    pgpp);
718			if (error) {
719				sokvafree(lva, npages << PAGE_SHIFT);
720				m_free(m);
721				if (error == EBUSY)
722					goto loan_fail;
723				goto read_error;
724			}
725
726			/* associate kva to mbuf */
727			MEXTADD(m, (void *)(lva + ((vaddr_t)off & PAGE_MASK)),
728			    cnt, M_MBUF, soloanfree, slp->ns_so);
729			m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP;
730			m->m_len = cnt;
731
732			/* map pages */
733			for (i = 0; i < npages; i++) {
734				pmap_kenter_pa(lva, VM_PAGE_TO_PHYS(pgpp[i]),
735				    VM_PROT_READ, 0);
736				lva += PAGE_SIZE;
737			}
738
739			pmap_update(pmap_kernel());
740
741			mb->m_next = m;
742			mb = m;
743			error = 0;
744			uiop->uio_resid = 0;
745		} else {
746			struct iovec *iv;
747			struct iovec *iv2;
748			struct mbuf *m2;
749			int siz;
750loan_fail:
751			/*
752			 * Generate the mbuf list with the uio_iov ref. to it.
753			 */
754			i = 0;
755			m = m2 = mb;
756			while (left > 0) {
757				siz = min(M_TRAILINGSPACE(m), left);
758				if (siz > 0) {
759					left -= siz;
760					i++;
761				}
762				if (left > 0) {
763					m = m_get(M_WAIT, MT_DATA);
764					MCLAIM(m, &nfs_mowner);
765					m_clget(m, M_WAIT);
766					m->m_len = 0;
767					m2->m_next = m;
768					m2 = m;
769				}
770			}
771			iv = malloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
772			uiop->uio_iov = iv2 = iv;
773			m = mb;
774			left = cnt;
775			i = 0;
776			while (left > 0) {
777				if (m == NULL)
778					panic("nfsrv_read iov");
779				siz = min(M_TRAILINGSPACE(m), left);
780				if (siz > 0) {
781					iv->iov_base = mtod(m, char *) +
782					    m->m_len;
783					iv->iov_len = siz;
784					m->m_len += siz;
785					left -= siz;
786					iv++;
787					i++;
788				}
789				m = m->m_next;
790			}
791			uiop->uio_iovcnt = i;
792			uiop->uio_offset = off;
793			uiop->uio_resid = cnt;
794			uiop->uio_rw = UIO_READ;
795			UIO_SETUP_SYSSPACE(uiop);
796			error = VOP_READ(vp, uiop, IO_NODELOCKED, cred);
797			free((void *)iv2, M_TEMP);
798		}
799read_error:
800		if (error || (getret = VOP_GETATTR(vp, &va, cred)) != 0){
801			if (!error)
802				error = getret;
803			m_freem(mreq);
804			vput(vp);
805			nfsm_reply(NFSX_POSTOPATTR(v3));
806			nfsm_srvpostop_attr(getret, &va);
807			return (0);
808		}
809	} else {
810		uiop->uio_resid = 0;
811	}
812	vput(vp);
813	nfsm_srvfillattr(&va, fp);
814	len -= uiop->uio_resid;
815	padlen = nfsm_padlen(len);
816	if (uiop->uio_resid || padlen)
817		nfs_zeropad(mb, uiop->uio_resid, padlen);
818	if (v3) {
819		/* count */
820		*tl++ = txdr_unsigned(len);
821		/* eof */
822		if (off + len >= va.va_size)
823			*tl++ = nfs_true;
824		else
825			*tl++ = nfs_false;
826	}
827	*tl = txdr_unsigned(len);
828	nfsm_srvdone;
829}
830
831/*
832 * nfs write service
833 */
834int
835nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
836{
837	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
838	struct mbuf *nam = nfsd->nd_nam;
839	char *dpos = nfsd->nd_dpos;
840	kauth_cred_t cred = nfsd->nd_cr;
841	struct iovec *ivp;
842	int i, cnt;
843	struct mbuf *mp;
844	struct nfs_fattr *fp;
845	struct iovec *iv;
846	struct vattr va, forat;
847	u_int32_t *tl;
848	int32_t t1;
849	char *bpos;
850	int error = 0, rdonly, cache = 0, len, forat_ret = 1;
851	int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
852	int stable = NFSV3WRITE_FILESYNC;
853	int v3 = (nfsd->nd_flag & ND_NFSV3);
854	char *cp2;
855	struct mbuf *mb, *mreq;
856	struct vnode *vp;
857	nfsrvfh_t nsfh;
858	struct uio io, *uiop = &io;
859	off_t off;
860	u_quad_t frev;
861
862	if (mrep == NULL) {
863		*mrq = NULL;
864		return (0);
865	}
866	nfsm_srvmtofh(&nsfh);
867	if (v3) {
868		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
869		off = fxdr_hyper(tl);
870		tl += 3;
871		stable = fxdr_unsigned(int, *tl++);
872	} else {
873		nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
874		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
875		tl += 2;
876	}
877	retlen = len = fxdr_unsigned(int32_t, *tl);
878	cnt = i = 0;
879
880	/*
881	 * For NFS Version 2, it is not obvious what a write of zero length
882	 * should do, but I might as well be consistent with Version 3,
883	 * which is to return ok so long as there are no permission problems.
884	 */
885	if (len > 0) {
886		zeroing = 1;
887		mp = mrep;
888		while (mp) {
889			if (mp == md) {
890				zeroing = 0;
891				adjust = dpos - mtod(mp, char *);
892				mp->m_len -= adjust;
893				if (mp->m_len > 0 && adjust > 0)
894					NFSMADV(mp, adjust);
895			}
896			if (zeroing)
897				mp->m_len = 0;
898			else if (mp->m_len > 0) {
899				i += mp->m_len;
900				if (i > len) {
901					mp->m_len -= (i - len);
902					zeroing	= 1;
903				}
904				if (mp->m_len > 0)
905					cnt++;
906			}
907			mp = mp->m_next;
908		}
909	}
910	if (len > NFS_MAXDATA || len < 0 || i < len) {
911		error = EIO;
912		nfsm_reply(2 * NFSX_UNSIGNED);
913		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
914		return (0);
915	}
916	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam,
917		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), false);
918	if (error) {
919		nfsm_reply(2 * NFSX_UNSIGNED);
920		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
921		return (0);
922	}
923	if (v3)
924		forat_ret = VOP_GETATTR(vp, &forat, cred);
925	if (vp->v_type != VREG) {
926		if (v3)
927			error = EINVAL;
928		else
929			error = (vp->v_type == VDIR) ? EISDIR : EACCES;
930	}
931	if (!error) {
932		nqsrv_getl(vp, ND_WRITE);
933		error = nfsrv_access(vp, VWRITE, cred, rdonly, lwp, 1);
934	}
935	if (error) {
936		vput(vp);
937		nfsm_reply(NFSX_WCCDATA(v3));
938		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
939		return (0);
940	}
941
942	if (len > 0) {
943		ivp = malloc(cnt * sizeof (struct iovec), M_TEMP, M_WAITOK);
944		uiop->uio_iov = iv = ivp;
945		uiop->uio_iovcnt = cnt;
946		mp = mrep;
947		while (mp) {
948			if (mp->m_len > 0) {
949				ivp->iov_base = mtod(mp, void *);
950				ivp->iov_len = mp->m_len;
951				ivp++;
952			}
953			mp = mp->m_next;
954		}
955
956		/*
957		 * XXX
958		 * The IO_METASYNC flag indicates that all metadata (and not
959		 * just enough to ensure data integrity) must be written to
960		 * stable storage synchronously.
961		 * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
962		 */
963		if (stable == NFSV3WRITE_UNSTABLE)
964			ioflags = IO_NODELOCKED;
965		else if (stable == NFSV3WRITE_DATASYNC)
966			ioflags = (IO_SYNC | IO_NODELOCKED);
967		else
968			ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
969		uiop->uio_resid = len;
970		uiop->uio_rw = UIO_WRITE;
971		uiop->uio_offset = off;
972		UIO_SETUP_SYSSPACE(uiop);
973		error = VOP_WRITE(vp, uiop, ioflags, cred);
974		nfsstats.srvvop_writes++;
975		free(iv, M_TEMP);
976	}
977	aftat_ret = VOP_GETATTR(vp, &va, cred);
978	vput(vp);
979	if (!error)
980		error = aftat_ret;
981	nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
982		2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
983	if (v3) {
984		nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
985		if (error)
986			return (0);
987		nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
988		*tl++ = txdr_unsigned(retlen);
989		if (stable == NFSV3WRITE_UNSTABLE)
990			*tl++ = txdr_unsigned(stable);
991		else
992			*tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
993		/*
994		 * Actually, there is no need to txdr these fields,
995		 * but it may make the values more human readable,
996		 * for debugging purposes.
997		 */
998		*tl++ = txdr_unsigned(boottime.tv_sec);
999		*tl = txdr_unsigned(boottime.tv_nsec / 1000);
1000	} else {
1001		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1002		nfsm_srvfillattr(&va, fp);
1003	}
1004	nfsm_srvdone;
1005}
1006
1007/*
1008 * XXX elad: the original NFSW_SAMECRED() macro also made sure the
1009 *	     two nd_flag fields of the descriptors contained
1010 *	     ND_KERBAUTH.
1011 */
1012static int
1013nfsrv_samecred(kauth_cred_t cred1, kauth_cred_t cred2)
1014{
1015	int i, do_ngroups;
1016
1017	if (kauth_cred_geteuid(cred1) != kauth_cred_geteuid(cred2))
1018		return (0);
1019	if (kauth_cred_ngroups(cred1) != kauth_cred_ngroups(cred2))
1020		return (0);
1021	do_ngroups = kauth_cred_ngroups(cred1);
1022	for (i = 0; i < do_ngroups; i++)
1023		if (kauth_cred_group(cred1, i) !=
1024		    kauth_cred_group(cred2, i))
1025			return (0);
1026
1027	return (1);
1028}
1029
1030static struct nfsrvw_delayhash *
1031nfsrv_nwdelayhash(struct nfssvc_sock *slp, const nfsrvfh_t *nsfh)
1032{
1033	uint32_t hash;
1034
1035	hash = hash32_buf(NFSRVFH_DATA(nsfh), NFSRVFH_SIZE(nsfh),
1036	    HASH32_BUF_INIT);
1037	return &slp->ns_wdelayhashtbl[hash % NFS_WDELAYHASHSIZ];
1038}
1039
1040/*
1041 * NFS write service with write gathering support. Called when
1042 * nfsrvw_procrastinate > 0.
1043 * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1044 * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1045 * Jan. 1994.
1046 */
1047int
1048nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
1049{
1050	struct timeval now;
1051	struct iovec *ivp;
1052	struct mbuf *mp;
1053	struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1054	struct nfs_fattr *fp;
1055	int i = 0;
1056	struct iovec *iov;
1057	struct nfsrvw_delayhash *wpp;
1058	kauth_cred_t cred;
1059	struct vattr va, forat;
1060	u_int32_t *tl;
1061	int32_t t1;
1062	char *bpos, *dpos;
1063	int error = 0, rdonly, cache = 0, len = 0, forat_ret = 1;
1064	int ioflags, aftat_ret = 1, adjust, v3, zeroing;
1065	char *cp2;
1066	struct mbuf *mb, *mreq, *mrep, *md;
1067	struct vnode *vp;
1068	struct uio io, *uiop = &io;
1069	u_quad_t frev, cur_usec;
1070
1071	*mrq = NULL;
1072	if (*ndp) {
1073	    nfsd = *ndp;
1074	    *ndp = NULL;
1075	    mrep = nfsd->nd_mrep;
1076	    md = nfsd->nd_md;
1077	    dpos = nfsd->nd_dpos;
1078	    cred = nfsd->nd_cr;
1079	    v3 = (nfsd->nd_flag & ND_NFSV3);
1080	    LIST_INIT(&nfsd->nd_coalesce);
1081	    nfsd->nd_mreq = NULL;
1082	    nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1083	    getmicrotime(&now);
1084	    cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
1085	    nfsd->nd_time = cur_usec + nfsrvw_procrastinate;
1086
1087	    /*
1088	     * Now, get the write header..
1089	     */
1090	    nfsm_srvmtofh(&nfsd->nd_fh);
1091	    if (v3) {
1092		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1093		nfsd->nd_off = fxdr_hyper(tl);
1094		tl += 3;
1095		nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1096	    } else {
1097		nfsm_dissect(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1098		nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1099		tl += 2;
1100	    }
1101	    len = fxdr_unsigned(int32_t, *tl);
1102	    nfsd->nd_len = len;
1103	    nfsd->nd_eoff = nfsd->nd_off + len;
1104
1105	    /*
1106	     * Trim the header out of the mbuf list and trim off any trailing
1107	     * junk so that the mbuf list has only the write data.
1108	     */
1109	    zeroing = 1;
1110	    i = 0;
1111	    mp = mrep;
1112	    while (mp) {
1113		if (mp == md) {
1114		    zeroing = 0;
1115		    adjust = dpos - mtod(mp, char *);
1116		    mp->m_len -= adjust;
1117		    if (mp->m_len > 0 && adjust > 0)
1118			NFSMADV(mp, adjust);
1119		}
1120		if (zeroing)
1121		    mp->m_len = 0;
1122		else {
1123		    i += mp->m_len;
1124		    if (i > len) {
1125			mp->m_len -= (i - len);
1126			zeroing = 1;
1127		    }
1128		}
1129		mp = mp->m_next;
1130	    }
1131	    if (len > NFS_MAXDATA || len < 0  || i < len) {
1132nfsmout:
1133		m_freem(mrep);
1134		error = EIO;
1135		nfsm_writereply(2 * NFSX_UNSIGNED, v3);
1136		if (v3)
1137		    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1138		nfsd->nd_mreq = mreq;
1139		nfsd->nd_mrep = NULL;
1140		nfsd->nd_time = 0;
1141	    }
1142
1143	    /*
1144	     * Add this entry to the hash and time queues.
1145	     */
1146	    owp = NULL;
1147	    mutex_enter(&nfsd_lock);
1148	    wp = LIST_FIRST(&slp->ns_tq);
1149	    while (wp && wp->nd_time < nfsd->nd_time) {
1150		owp = wp;
1151		wp = LIST_NEXT(wp, nd_tq);
1152	    }
1153	    if (owp) {
1154		LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1155	    } else {
1156		LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1157	    }
1158	    if (nfsd->nd_mrep) {
1159		wpp = nfsrv_nwdelayhash(slp, &nfsd->nd_fh);
1160		owp = NULL;
1161		wp = LIST_FIRST(wpp);
1162		while (wp && nfsrv_comparefh(&nfsd->nd_fh, &wp->nd_fh)) {
1163		    owp = wp;
1164		    wp = LIST_NEXT(wp, nd_hash);
1165		}
1166		while (wp && wp->nd_off < nfsd->nd_off &&
1167		    !nfsrv_comparefh(&nfsd->nd_fh, &wp->nd_fh)) {
1168		    owp = wp;
1169		    wp = LIST_NEXT(wp, nd_hash);
1170		}
1171		if (owp) {
1172		    LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1173
1174		    /*
1175		     * Search the hash list for overlapping entries and
1176		     * coalesce.
1177		     */
1178		    for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1179			wp = LIST_NEXT(nfsd, nd_hash);
1180			if (nfsrv_samecred(owp->nd_cr, nfsd->nd_cr))
1181			    nfsrvw_coalesce(owp, nfsd);
1182		    }
1183		} else {
1184		    LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1185		}
1186	    }
1187	    mutex_exit(&nfsd_lock);
1188	}
1189
1190	/*
1191	 * Now, do VOP_WRITE()s for any one(s) that need to be done now
1192	 * and generate the associated reply mbuf list(s).
1193	 */
1194loop1:
1195	getmicrotime(&now);
1196	cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
1197	mutex_enter(&nfsd_lock);
1198	for (nfsd = LIST_FIRST(&slp->ns_tq); nfsd; nfsd = owp) {
1199		owp = LIST_NEXT(nfsd, nd_tq);
1200		if (nfsd->nd_time > cur_usec)
1201		    break;
1202		if (nfsd->nd_mreq)
1203		    continue;
1204		LIST_REMOVE(nfsd, nd_tq);
1205		LIST_REMOVE(nfsd, nd_hash);
1206		mutex_exit(&nfsd_lock);
1207
1208		mrep = nfsd->nd_mrep;
1209		nfsd->nd_mrep = NULL;
1210		cred = nfsd->nd_cr;
1211		v3 = (nfsd->nd_flag & ND_NFSV3);
1212		forat_ret = aftat_ret = 1;
1213		error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp,
1214		    nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH),
1215		    false);
1216		if (!error) {
1217		    if (v3)
1218			forat_ret = VOP_GETATTR(vp, &forat, cred);
1219		    if (vp->v_type != VREG) {
1220			if (v3)
1221			    error = EINVAL;
1222			else
1223			    error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1224		    }
1225		} else
1226		    vp = NULL;
1227		if (!error) {
1228		    nqsrv_getl(vp, ND_WRITE);
1229		    error = nfsrv_access(vp, VWRITE, cred, rdonly, lwp, 1);
1230		}
1231
1232		if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1233		    ioflags = IO_NODELOCKED;
1234		else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1235		    ioflags = (IO_SYNC | IO_NODELOCKED);
1236		else
1237		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1238		uiop->uio_rw = UIO_WRITE;
1239		uiop->uio_offset = nfsd->nd_off;
1240		uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1241		UIO_SETUP_SYSSPACE(uiop);
1242		if (uiop->uio_resid > 0) {
1243		    mp = mrep;
1244		    i = 0;
1245		    while (mp) {
1246			if (mp->m_len > 0)
1247			    i++;
1248			mp = mp->m_next;
1249		    }
1250		    uiop->uio_iovcnt = i;
1251		    iov = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
1252		    uiop->uio_iov = ivp = iov;
1253		    mp = mrep;
1254		    while (mp) {
1255			if (mp->m_len > 0) {
1256			    ivp->iov_base = mtod(mp, void *);
1257			    ivp->iov_len = mp->m_len;
1258			    ivp++;
1259			}
1260			mp = mp->m_next;
1261		    }
1262		    if (!error) {
1263			error = VOP_WRITE(vp, uiop, ioflags, cred);
1264			nfsstats.srvvop_writes++;
1265		    }
1266		    free((void *)iov, M_TEMP);
1267		}
1268		m_freem(mrep);
1269		if (vp) {
1270		    aftat_ret = VOP_GETATTR(vp, &va, cred);
1271		    vput(vp);
1272		}
1273
1274		/*
1275		 * Loop around generating replies for all write rpcs that have
1276		 * now been completed.
1277		 */
1278		swp = nfsd;
1279		do {
1280		    if (error) {
1281			nfsm_writereply(NFSX_WCCDATA(v3), v3);
1282			if (v3) {
1283			    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1284			}
1285		    } else {
1286			nfsm_writereply(NFSX_PREOPATTR(v3) +
1287			    NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED +
1288			    NFSX_WRITEVERF(v3), v3);
1289			if (v3) {
1290			    nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
1291			    nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1292			    *tl++ = txdr_unsigned(nfsd->nd_len);
1293			    *tl++ = txdr_unsigned(swp->nd_stable);
1294			    /*
1295			     * Actually, there is no need to txdr these fields,
1296			     * but it may make the values more human readable,
1297			     * for debugging purposes.
1298			     */
1299			    *tl++ = txdr_unsigned(boottime.tv_sec);
1300			    *tl = txdr_unsigned(boottime.tv_nsec / 1000);
1301			} else {
1302			    nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1303			    nfsm_srvfillattr(&va, fp);
1304			}
1305		    }
1306		    nfsd->nd_mreq = mreq;
1307		    if (nfsd->nd_mrep)
1308			panic("nfsrv_write: nd_mrep not free");
1309
1310		    /*
1311		     * Done. Put it at the head of the timer queue so that
1312		     * the final phase can return the reply.
1313		     */
1314		    mutex_enter(&nfsd_lock);
1315		    if (nfsd != swp) {
1316			nfsd->nd_time = 0;
1317			LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1318		    }
1319		    nfsd = LIST_FIRST(&swp->nd_coalesce);
1320		    if (nfsd) {
1321			LIST_REMOVE(nfsd, nd_tq);
1322		    }
1323		    mutex_exit(&nfsd_lock);
1324		} while (nfsd);
1325		swp->nd_time = 0;
1326
1327		mutex_enter(&nfsd_lock);
1328		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1329		mutex_exit(&nfsd_lock);
1330		goto loop1;
1331	}
1332	mutex_exit(&nfsd_lock);
1333	nfs_timer_start();
1334
1335	/*
1336	 * Search for a reply to return.
1337	 */
1338	mutex_enter(&nfsd_lock);
1339	LIST_FOREACH(nfsd, &slp->ns_tq, nd_tq) {
1340		if (nfsd->nd_mreq) {
1341		    LIST_REMOVE(nfsd, nd_tq);
1342		    *mrq = nfsd->nd_mreq;
1343		    *ndp = nfsd;
1344		    break;
1345		}
1346	}
1347	mutex_exit(&nfsd_lock);
1348	return (0);
1349}
1350
1351/*
1352 * Coalesce the write request nfsd into owp. To do this we must:
1353 * - remove nfsd from the queues
1354 * - merge nfsd->nd_mrep into owp->nd_mrep
1355 * - update the nd_eoff and nd_stable for owp
1356 * - put nfsd on owp's nd_coalesce list
1357 * NB: Must be called at splsoftclock().
1358 */
1359void
1360nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1361{
1362        int overlap;
1363        struct mbuf *mp;
1364	struct nfsrv_descript *m;
1365
1366	KASSERT(mutex_owned(&nfsd_lock));
1367
1368        LIST_REMOVE(nfsd, nd_hash);
1369        LIST_REMOVE(nfsd, nd_tq);
1370        if (owp->nd_eoff < nfsd->nd_eoff) {
1371            overlap = owp->nd_eoff - nfsd->nd_off;
1372            if (overlap < 0)
1373                panic("nfsrv_coalesce: bad off");
1374            if (overlap > 0)
1375                m_adj(nfsd->nd_mrep, overlap);
1376            mp = owp->nd_mrep;
1377            while (mp->m_next)
1378                mp = mp->m_next;
1379            mp->m_next = nfsd->nd_mrep;
1380            owp->nd_eoff = nfsd->nd_eoff;
1381        } else
1382            m_freem(nfsd->nd_mrep);
1383        nfsd->nd_mrep = NULL;
1384        if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1385            owp->nd_stable = NFSV3WRITE_FILESYNC;
1386        else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1387            owp->nd_stable == NFSV3WRITE_UNSTABLE)
1388            owp->nd_stable = NFSV3WRITE_DATASYNC;
1389        LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1390 	/*
1391 	 * nfsd might hold coalesce elements! Move them to owp.
1392 	 * Otherwise, requests may be lost and clients will be stuck.
1393 	 */
1394	while ((m = LIST_FIRST(&nfsd->nd_coalesce)) != NULL) {
1395		LIST_REMOVE(m, nd_tq);
1396		LIST_INSERT_HEAD(&owp->nd_coalesce, m, nd_tq);
1397	}
1398}
1399
1400/*
1401 * nfs create service
1402 * now does a truncate to 0 length via. setattr if it already exists
1403 */
1404int
1405nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
1406{
1407	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1408	struct mbuf *nam = nfsd->nd_nam;
1409	char *dpos = nfsd->nd_dpos;
1410	kauth_cred_t cred = nfsd->nd_cr;
1411	struct nfs_fattr *fp;
1412	struct vattr va, dirfor, diraft;
1413	struct nfsv2_sattr *sp;
1414	u_int32_t *tl;
1415	struct nameidata nd;
1416	char *cp;
1417	int32_t t1;
1418	char *bpos;
1419	int error = 0, cache = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1420	int rdev = 0, abort = 0;
1421	int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
1422	char *cp2;
1423	struct mbuf *mb, *mreq;
1424	struct vnode *vp = NULL, *dirp = NULL;
1425	nfsrvfh_t nsfh;
1426	u_quad_t frev, tempsize;
1427	u_char cverf[NFSX_V3CREATEVERF];
1428
1429	nd.ni_cnd.cn_nameiop = 0;
1430	nfsm_srvmtofh(&nsfh);
1431	nfsm_srvnamesiz(len);
1432	nd.ni_cnd.cn_cred = cred;
1433	nd.ni_cnd.cn_nameiop = CREATE;
1434	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
1435	error = nfs_namei(&nd, &nsfh, len, slp, nam, &md, &dpos,
1436		&dirp, lwp, (nfsd->nd_flag & ND_KERBAUTH), false);
1437	if (dirp && v3) {
1438		dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred);
1439	}
1440	if (error) {
1441		nfsm_reply(NFSX_WCCDATA(v3));
1442		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1443		if (dirp)
1444			vrele(dirp);
1445		if (nd.ni_pathbuf != NULL) {
1446			pathbuf_destroy(nd.ni_pathbuf);
1447			nd.ni_pathbuf = NULL;
1448		}
1449		return (0);
1450	}
1451	abort = 1;
1452	vattr_null(&va);
1453	if (v3) {
1454		va.va_mode = 0;
1455		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1456		how = fxdr_unsigned(int, *tl);
1457		switch (how) {
1458		case NFSV3CREATE_GUARDED:
1459			if (nd.ni_vp) {
1460				error = EEXIST;
1461				break;
1462			}
1463		case NFSV3CREATE_UNCHECKED:
1464			nfsm_srvsattr(&va);
1465			break;
1466		case NFSV3CREATE_EXCLUSIVE:
1467			nfsm_dissect(cp, void *, NFSX_V3CREATEVERF);
1468			memcpy(cverf, cp, NFSX_V3CREATEVERF);
1469			exclusive_flag = 1;
1470			break;
1471		};
1472		va.va_type = VREG;
1473	} else {
1474		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
1475		va.va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1476		if (va.va_type == VNON)
1477			va.va_type = VREG;
1478		va.va_mode = nfstov_mode(sp->sa_mode);
1479		switch (va.va_type) {
1480		case VREG:
1481			tsize = fxdr_unsigned(int32_t, sp->sa_size);
1482			if (tsize != -1)
1483				va.va_size = (u_quad_t)tsize;
1484			break;
1485		case VCHR:
1486		case VBLK:
1487		case VFIFO:
1488			rdev = fxdr_unsigned(int32_t, sp->sa_size);
1489			break;
1490		default:
1491			break;
1492		};
1493	}
1494
1495	/*
1496	 * Iff doesn't exist, create it
1497	 * otherwise just truncate to 0 length
1498	 *   should I set the mode too ??
1499	 */
1500	if (nd.ni_vp == NULL) {
1501		if (va.va_type == VREG || va.va_type == VSOCK) {
1502			nqsrv_getl(nd.ni_dvp, ND_WRITE);
1503			error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va);
1504			if (!error) {
1505				if (exclusive_flag) {
1506					exclusive_flag = 0;
1507					vattr_null(&va);
1508					/*
1509					 * XXX
1510					 * assuming NFSX_V3CREATEVERF
1511					 * == sizeof(nfstime3)
1512					 */
1513					fxdr_nfsv3time(cverf, &va.va_atime);
1514					error = VOP_SETATTR(nd.ni_vp, &va,
1515						cred);
1516				}
1517			}
1518		} else if (va.va_type == VCHR || va.va_type == VBLK ||
1519			va.va_type == VFIFO) {
1520			if (va.va_type == VCHR && rdev == 0xffffffff)
1521				va.va_type = VFIFO;
1522			if (va.va_type != VFIFO &&
1523			    (error = kauth_authorize_system(cred,
1524			    KAUTH_SYSTEM_MKNOD, 0, NULL, NULL, NULL))) {
1525				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1526				vput(nd.ni_dvp);
1527				abort = 0;
1528				nfsm_reply(0);
1529				if (nd.ni_pathbuf != NULL) {
1530					pathbuf_destroy(nd.ni_pathbuf);
1531					nd.ni_pathbuf = NULL;
1532				}
1533				return (error);
1534			} else
1535				va.va_rdev = (dev_t)rdev;
1536			nqsrv_getl(nd.ni_dvp, ND_WRITE);
1537			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd,
1538			    &va);
1539			if (error) {
1540				nfsm_reply(0);
1541			}
1542		} else {
1543			VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1544			if (nd.ni_pathbuf != NULL) {
1545				pathbuf_destroy(nd.ni_pathbuf);
1546				nd.ni_pathbuf = NULL;
1547			}
1548			vput(nd.ni_dvp);
1549			error = ENXIO;
1550			abort = 0;
1551		}
1552		vp = nd.ni_vp;
1553	} else {
1554		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1555		if (nd.ni_pathbuf != NULL) {
1556			pathbuf_destroy(nd.ni_pathbuf);
1557			nd.ni_pathbuf = NULL;
1558		}
1559		vp = nd.ni_vp;
1560		if (nd.ni_dvp == vp)
1561			vrele(nd.ni_dvp);
1562		else
1563			vput(nd.ni_dvp);
1564		abort = 0;
1565		if (!error && va.va_size != -1) {
1566			error = nfsrv_access(vp, VWRITE, cred,
1567			    (nd.ni_cnd.cn_flags & RDONLY), lwp, 0);
1568			if (!error) {
1569				nqsrv_getl(vp, ND_WRITE);
1570				tempsize = va.va_size;
1571				vattr_null(&va);
1572				va.va_size = tempsize;
1573				error = VOP_SETATTR(vp, &va, cred);
1574			}
1575		}
1576		if (error)
1577			vput(vp);
1578	}
1579	if (!error) {
1580		error = nfsrv_composefh(vp, &nsfh, v3);
1581		if (!error)
1582			error = VOP_GETATTR(vp, &va, cred);
1583		vput(vp);
1584	}
1585	if (v3) {
1586		if (exclusive_flag && !error) {
1587			/*
1588			 * XXX assuming NFSX_V3CREATEVERF == sizeof(nfstime3)
1589			 */
1590			char oldverf[NFSX_V3CREATEVERF];
1591
1592			txdr_nfsv3time(&va.va_atime, oldverf);
1593			if (memcmp(cverf, oldverf, NFSX_V3CREATEVERF))
1594				error = EEXIST;
1595		}
1596		if (dirp) {
1597			vn_lock(dirp, LK_SHARED | LK_RETRY);
1598			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
1599			VOP_UNLOCK(dirp);
1600		}
1601	}
1602	if (dirp) {
1603		vrele(dirp);
1604		dirp = NULL;
1605	}
1606	if (nd.ni_pathbuf != NULL) {
1607		pathbuf_destroy(nd.ni_pathbuf);
1608		nd.ni_pathbuf = NULL;
1609	}
1610	abort = 0;
1611	nfsm_reply(NFSX_SRVFH(&nsfh, v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
1612	if (v3) {
1613		if (!error) {
1614			nfsm_srvpostop_fh(&nsfh);
1615			nfsm_srvpostop_attr(0, &va);
1616		}
1617		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1618	} else {
1619		nfsm_srvfhtom(&nsfh, v3);
1620		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
1621		nfsm_srvfillattr(&va, fp);
1622	}
1623	return (0);
1624nfsmout:
1625	if (dirp)
1626		vrele(dirp);
1627	if (abort) {
1628		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1629		if (nd.ni_dvp == nd.ni_vp)
1630			vrele(nd.ni_dvp);
1631		else
1632			vput(nd.ni_dvp);
1633		if (nd.ni_vp)
1634			vput(nd.ni_vp);
1635	}
1636	if (nd.ni_pathbuf != NULL) {
1637		pathbuf_destroy(nd.ni_pathbuf);
1638		nd.ni_pathbuf = NULL;
1639	}
1640	return (error);
1641}
1642
1643/*
1644 * nfs v3 mknod service
1645 */
1646int
1647nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
1648{
1649	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1650	struct mbuf *nam = nfsd->nd_nam;
1651	char *dpos = nfsd->nd_dpos;
1652	kauth_cred_t cred = nfsd->nd_cr;
1653	struct vattr va, dirfor, diraft;
1654	u_int32_t *tl;
1655	struct nameidata nd;
1656	int32_t t1;
1657	char *bpos;
1658	int error = 0, cache = 0, len, dirfor_ret = 1, diraft_ret = 1;
1659	int abort = 0;
1660	u_int32_t major, minor;
1661	enum vtype vtyp;
1662	char *cp2;
1663	struct mbuf *mb, *mreq;
1664	struct vnode *vp, *dirp = (struct vnode *)0;
1665	nfsrvfh_t nsfh;
1666	u_quad_t frev;
1667
1668	nd.ni_cnd.cn_nameiop = 0;
1669	nfsm_srvmtofh(&nsfh);
1670	nfsm_srvnamesiz(len);
1671	nd.ni_cnd.cn_cred = cred;
1672	nd.ni_cnd.cn_nameiop = CREATE;
1673	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
1674	error = nfs_namei(&nd, &nsfh, len, slp, nam, &md, &dpos,
1675		&dirp, lwp, (nfsd->nd_flag & ND_KERBAUTH), false);
1676	if (dirp)
1677		dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred);
1678	if (error) {
1679		nfsm_reply(NFSX_WCCDATA(1));
1680		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1681		if (dirp)
1682			vrele(dirp);
1683		if (nd.ni_pathbuf != NULL) {
1684			pathbuf_destroy(nd.ni_pathbuf);
1685			nd.ni_pathbuf = NULL;
1686		}
1687		return (0);
1688	}
1689	abort = 1;
1690	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1691	vtyp = nfsv3tov_type(*tl);
1692	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1693		error = NFSERR_BADTYPE;
1694		goto abort;
1695	}
1696	vattr_null(&va);
1697	va.va_mode = 0;
1698	nfsm_srvsattr(&va);
1699	if (vtyp == VCHR || vtyp == VBLK) {
1700		dev_t rdev;
1701
1702		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1703		major = fxdr_unsigned(u_int32_t, *tl++);
1704		minor = fxdr_unsigned(u_int32_t, *tl);
1705		rdev = makedev(major, minor);
1706		if (major(rdev) != major || minor(rdev) != minor) {
1707			error = EINVAL;
1708			goto abort;
1709		}
1710		va.va_rdev = rdev;
1711	}
1712
1713	/*
1714	 * Iff doesn't exist, create it.
1715	 */
1716	if (nd.ni_vp) {
1717		error = EEXIST;
1718abort:
1719		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1720		if (nd.ni_dvp == nd.ni_vp)
1721			vrele(nd.ni_dvp);
1722		else
1723			vput(nd.ni_dvp);
1724		if (nd.ni_vp)
1725			vput(nd.ni_vp);
1726		if (nd.ni_pathbuf != NULL) {
1727			pathbuf_destroy(nd.ni_pathbuf);
1728			nd.ni_pathbuf = NULL;
1729		}
1730		goto out;
1731	}
1732	va.va_type = vtyp;
1733	if (vtyp == VSOCK) {
1734		nqsrv_getl(nd.ni_dvp, ND_WRITE);
1735		error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va);
1736	} else {
1737		if (va.va_type != VFIFO &&
1738		    (error = kauth_authorize_system(cred,
1739		    KAUTH_SYSTEM_MKNOD, 0, NULL, NULL, NULL))) {
1740			VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1741			vput(nd.ni_dvp);
1742			goto out;
1743		}
1744		nqsrv_getl(nd.ni_dvp, ND_WRITE);
1745		error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va);
1746		if (error)
1747			goto out;
1748	}
1749out:
1750	vp = nd.ni_vp;
1751	if (!error) {
1752		error = nfsrv_composefh(vp, &nsfh, true);
1753		if (!error)
1754			error = VOP_GETATTR(vp, &va, cred);
1755		vput(vp);
1756	}
1757	if (dirp) {
1758		vn_lock(dirp, LK_SHARED | LK_RETRY);
1759		diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
1760		VOP_UNLOCK(dirp);
1761		vrele(dirp);
1762		dirp = NULL;
1763	}
1764	if (nd.ni_pathbuf != NULL) {
1765		pathbuf_destroy(nd.ni_pathbuf);
1766		nd.ni_pathbuf = NULL;
1767	}
1768	abort = 0;
1769	nfsm_reply(NFSX_SRVFH(&nsfh, true) + NFSX_POSTOPATTR(1) +
1770	    NFSX_WCCDATA(1));
1771	if (!error) {
1772		nfsm_srvpostop_fh(&nsfh);
1773		nfsm_srvpostop_attr(0, &va);
1774	}
1775	nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1776	return (0);
1777nfsmout:
1778	if (abort) {
1779		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1780		if (nd.ni_dvp == nd.ni_vp)
1781			vrele(nd.ni_dvp);
1782		else
1783			vput(nd.ni_dvp);
1784		if (nd.ni_vp)
1785			vput(nd.ni_vp);
1786	}
1787	if (nd.ni_pathbuf != NULL) {
1788		pathbuf_destroy(nd.ni_pathbuf);
1789		nd.ni_pathbuf = NULL;
1790	}
1791	if (dirp)
1792		vrele(dirp);
1793	return (error);
1794}
1795
1796/*
1797 * nfs remove service
1798 */
1799int
1800nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
1801{
1802	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1803	struct mbuf *nam = nfsd->nd_nam;
1804	char *dpos = nfsd->nd_dpos;
1805	kauth_cred_t cred = nfsd->nd_cr;
1806	struct nameidata nd;
1807	u_int32_t *tl;
1808	int32_t t1;
1809	char *bpos;
1810	int error = 0, cache = 0, len, dirfor_ret = 1, diraft_ret = 1;
1811	int v3 = (nfsd->nd_flag & ND_NFSV3);
1812	char *cp2;
1813	struct mbuf *mb, *mreq;
1814	struct vnode *vp, *dirp;
1815	struct vattr dirfor, diraft;
1816	nfsrvfh_t nsfh;
1817	u_quad_t frev;
1818
1819#ifndef nolint
1820	vp = (struct vnode *)0;
1821#endif
1822	nfsm_srvmtofh(&nsfh);
1823	nfsm_srvnamesiz(len);
1824	nd.ni_cnd.cn_cred = cred;
1825	nd.ni_cnd.cn_nameiop = DELETE;
1826	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
1827	error = nfs_namei(&nd, &nsfh, len, slp, nam, &md, &dpos,
1828		&dirp, lwp, (nfsd->nd_flag & ND_KERBAUTH), false);
1829	if (error == 0 && dirp && v3) {
1830		if (nd.ni_dvp == nd.ni_vp)
1831			vn_lock(dirp, LK_SHARED | LK_RETRY);
1832		dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred);
1833		if (nd.ni_dvp == nd.ni_vp)
1834			VOP_UNLOCK(dirp);
1835	}
1836	if (!error) {
1837		vp = nd.ni_vp;
1838		if (vp->v_type == VDIR) {
1839			error = EPERM;
1840			goto out;
1841		}
1842		/*
1843		 * The root of a mounted filesystem cannot be deleted.
1844		 */
1845		if (vp->v_vflag & VV_ROOT) {
1846			error = EBUSY;
1847		}
1848out:
1849		if (!error) {
1850			nqsrv_getl(nd.ni_dvp, ND_WRITE);
1851			nqsrv_getl(vp, ND_WRITE);
1852			error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1853		} else {
1854			VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1855			if (nd.ni_dvp == vp)
1856				vrele(nd.ni_dvp);
1857			else
1858				vput(nd.ni_dvp);
1859			vput(vp);
1860		}
1861	}
1862	if (nd.ni_pathbuf != NULL) {
1863		pathbuf_destroy(nd.ni_pathbuf);
1864		nd.ni_pathbuf = NULL;
1865	}
1866	if (dirp) {
1867		if (v3) {
1868			vn_lock(dirp, LK_SHARED | LK_RETRY);
1869			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
1870			VOP_UNLOCK(dirp);
1871		}
1872		vrele(dirp);
1873	}
1874	nfsm_reply(NFSX_WCCDATA(v3));
1875	if (v3) {
1876		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
1877		return (0);
1878	}
1879	nfsm_srvdone;
1880}
1881
1882/*
1883 * nfs rename service
1884 */
1885int
1886nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
1887{
1888	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
1889	struct mbuf *nam = nfsd->nd_nam;
1890	char *dpos = nfsd->nd_dpos;
1891	kauth_cred_t cred = nfsd->nd_cr;
1892	u_int32_t *tl;
1893	int32_t t1;
1894	char *bpos;
1895	int error = 0, cache = 0, fdirfor_ret = 1, fdiraft_ret = 1;
1896	uint32_t len, len2;
1897	int tdirfor_ret = 1, tdiraft_ret = 1;
1898	int v3 = (nfsd->nd_flag & ND_NFSV3);
1899	char *cp2;
1900	struct mbuf *mb, *mreq;
1901	struct nameidata fromnd, tond;
1902	struct vnode *fvp, *tvp, *tdvp;
1903	struct vnode *fdirp = NULL, *tdirp = NULL;
1904	struct mount *localfs = NULL;
1905	struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
1906	nfsrvfh_t fnsfh, tnsfh;
1907	u_quad_t frev;
1908	uid_t saved_uid;
1909
1910#ifndef nolint
1911	fvp = (struct vnode *)0;
1912#endif
1913	fromnd.ni_cnd.cn_nameiop = 0;
1914	tond.ni_cnd.cn_nameiop = 0;
1915	nfsm_srvmtofh(&fnsfh);
1916	nfsm_srvnamesiz(len);
1917	/*
1918	 * Remember our original uid so that we can reset cr_uid before
1919	 * the second nfs_namei() call, in case it is remapped.
1920	 */
1921	saved_uid = kauth_cred_geteuid(cred);
1922	fromnd.ni_cnd.cn_cred = cred;
1923	fromnd.ni_cnd.cn_nameiop = DELETE;
1924	fromnd.ni_cnd.cn_flags = LOCKPARENT | INRENAME;
1925	error = nfs_namei(&fromnd, &fnsfh, len, slp, nam, &md,
1926		&dpos, &fdirp, lwp, (nfsd->nd_flag & ND_KERBAUTH), false);
1927	if (error == 0 && fdirp && v3) {
1928		if (fromnd.ni_dvp == fromnd.ni_vp)
1929			vn_lock(fdirp, LK_SHARED | LK_RETRY);
1930		fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, cred);
1931		if (fromnd.ni_dvp == fromnd.ni_vp)
1932			VOP_UNLOCK(fdirp);
1933	}
1934	if (error) {
1935		fromnd.ni_cnd.cn_nameiop = 0;
1936		nfsm_reply(2 * NFSX_WCCDATA(v3));
1937		nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
1938		nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
1939		if (fdirp)
1940			vrele(fdirp);
1941		if (fromnd.ni_pathbuf != NULL) {
1942			pathbuf_destroy(fromnd.ni_pathbuf);
1943		}
1944		return (0);
1945	}
1946	if (fromnd.ni_dvp != fromnd.ni_vp) {
1947		VOP_UNLOCK(fromnd.ni_dvp);
1948	}
1949	fvp = fromnd.ni_vp;
1950
1951	localfs = fvp->v_mount;
1952	error = VFS_RENAMELOCK_ENTER(localfs);
1953	if (error) {
1954		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
1955		vrele(fromnd.ni_dvp);
1956		vrele(fvp);
1957		goto out1;
1958	}
1959
1960	/* Copied, regrettably, from vfs_syscalls.c (q.v.) */
1961	vrele(fvp);
1962	if ((fromnd.ni_cnd.cn_namelen == 1 &&
1963	     fromnd.ni_cnd.cn_nameptr[0] == '.') ||
1964	    (fromnd.ni_cnd.cn_namelen == 2 &&
1965	     fromnd.ni_cnd.cn_nameptr[0] == '.' &&
1966	     fromnd.ni_cnd.cn_nameptr[1] == '.')) {
1967		error = EINVAL;
1968		VFS_RENAMELOCK_EXIT(localfs);
1969		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
1970		vrele(fromnd.ni_dvp);
1971		goto out1;
1972	}
1973	vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
1974	error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd, 0);
1975	if (error) {
1976		VOP_UNLOCK(fromnd.ni_dvp);
1977		VFS_RENAMELOCK_EXIT(localfs);
1978		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
1979		vrele(fromnd.ni_dvp);
1980		goto out1;
1981	}
1982	VOP_UNLOCK(fromnd.ni_vp);
1983	if (fromnd.ni_dvp != fromnd.ni_vp)
1984		VOP_UNLOCK(fromnd.ni_dvp);
1985	fvp = fromnd.ni_vp;
1986
1987	nfsm_srvmtofh(&tnsfh);
1988	if (v3) {
1989		nfsm_dissect(tl, uint32_t *, NFSX_UNSIGNED);
1990		len2 = fxdr_unsigned(uint32_t, *tl);
1991		/* len2 will be checked by nfs_namei */
1992	}
1993	else {
1994		/* NFSv2 */
1995		nfsm_strsiz(len2, NFS_MAXNAMLEN);
1996	}
1997	kauth_cred_seteuid(cred, saved_uid);
1998	tond.ni_cnd.cn_cred = cred;
1999	tond.ni_cnd.cn_nameiop = RENAME;
2000	tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | INRENAME;
2001	error = nfs_namei(&tond, &tnsfh, len2, slp, nam, &md,
2002		&dpos, &tdirp, lwp, (nfsd->nd_flag & ND_KERBAUTH), false);
2003	if (tdirp && v3) {
2004		tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, cred);
2005	}
2006	if (error) {
2007		VFS_RENAMELOCK_EXIT(localfs);
2008		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2009		vrele(fromnd.ni_dvp);
2010		vrele(fvp);
2011		goto out1;
2012	}
2013	tdvp = tond.ni_dvp;
2014	tvp = tond.ni_vp;
2015	if (tvp != NULL) {
2016		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2017			if (v3)
2018				error = EEXIST;
2019			else
2020				error = EISDIR;
2021			goto out;
2022		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2023			if (v3)
2024				error = EEXIST;
2025			else
2026				error = ENOTDIR;
2027			goto out;
2028		}
2029		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
2030			if (v3)
2031				error = EXDEV;
2032			else
2033				error = ENOTEMPTY;
2034			goto out;
2035		}
2036	}
2037	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
2038		if (v3)
2039			error = EXDEV;
2040		else
2041			error = ENOTEMPTY;
2042		goto out;
2043	}
2044	if (fvp->v_mount != tdvp->v_mount) {
2045		if (v3)
2046			error = EXDEV;
2047		else
2048			error = ENOTEMPTY;
2049		goto out;
2050	}
2051	if (fvp == tdvp) {
2052		if (v3)
2053			error = EINVAL;
2054		else
2055			error = ENOTEMPTY;
2056	}
2057	/*
2058	 * If source is the same as the destination (that is the
2059	 * same vnode with the same name in the same directory),
2060	 * then there is nothing to do.
2061	 */
2062	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
2063	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
2064	    !memcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
2065	      fromnd.ni_cnd.cn_namelen))
2066		error = -1;
2067out:
2068	if (!error) {
2069		nqsrv_getl(fromnd.ni_dvp, ND_WRITE);
2070		nqsrv_getl(tdvp, ND_WRITE);
2071		if (tvp) {
2072			nqsrv_getl(tvp, ND_WRITE);
2073		}
2074		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
2075				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
2076		VFS_RENAMELOCK_EXIT(localfs);
2077	} else {
2078		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
2079		if (tdvp == tvp)
2080			vrele(tdvp);
2081		else
2082			vput(tdvp);
2083		if (tvp)
2084			vput(tvp);
2085		VFS_RENAMELOCK_EXIT(localfs);
2086		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2087		vrele(fromnd.ni_dvp);
2088		vrele(fvp);
2089		if (error == -1)
2090			error = 0;
2091	}
2092	if (tond.ni_pathbuf != NULL) {
2093		pathbuf_destroy(tond.ni_pathbuf);
2094		tond.ni_pathbuf = NULL;
2095	}
2096	tond.ni_cnd.cn_nameiop = 0;
2097out1:
2098	if (fdirp) {
2099		if (v3) {
2100			vn_lock(fdirp, LK_SHARED | LK_RETRY);
2101			fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred);
2102			VOP_UNLOCK(fdirp);
2103		}
2104		vrele(fdirp);
2105		fdirp = NULL;
2106	}
2107	if (tdirp) {
2108		if (v3) {
2109			vn_lock(tdirp, LK_SHARED | LK_RETRY);
2110			tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred);
2111			VOP_UNLOCK(tdirp);
2112		}
2113		vrele(tdirp);
2114		tdirp = NULL;
2115	}
2116	pathbuf_destroy(fromnd.ni_pathbuf);
2117	fromnd.ni_pathbuf = NULL;
2118	fromnd.ni_cnd.cn_nameiop = 0;
2119	localfs = NULL;
2120	nfsm_reply(2 * NFSX_WCCDATA(v3));
2121	if (v3) {
2122		nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
2123		nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
2124	}
2125	return (0);
2126
2127nfsmout:
2128	if (fdirp)
2129		vrele(fdirp);
2130#ifdef notdef
2131	if (tdirp)
2132		vrele(tdirp);
2133#endif
2134	if (tond.ni_cnd.cn_nameiop) {
2135		if (tond.ni_pathbuf != NULL) {
2136			pathbuf_destroy(tond.ni_pathbuf);
2137			tond.ni_pathbuf = NULL;
2138		}
2139	}
2140	if (localfs) {
2141		VFS_RENAMELOCK_EXIT(localfs);
2142	}
2143	if (fromnd.ni_cnd.cn_nameiop) {
2144		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
2145		if (fromnd.ni_pathbuf != NULL) {
2146			pathbuf_destroy(fromnd.ni_pathbuf);
2147			fromnd.ni_pathbuf = NULL;
2148		}
2149		vrele(fromnd.ni_dvp);
2150		vrele(fvp);
2151	}
2152	return (error);
2153}
2154
2155/*
2156 * nfs link service
2157 */
2158int
2159nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
2160{
2161	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2162	struct mbuf *nam = nfsd->nd_nam;
2163	char *dpos = nfsd->nd_dpos;
2164	kauth_cred_t cred = nfsd->nd_cr;
2165	struct nameidata nd;
2166	u_int32_t *tl;
2167	int32_t t1;
2168	char *bpos;
2169	int error = 0, rdonly, cache = 0, len, dirfor_ret = 1, diraft_ret = 1;
2170	int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
2171	char *cp2;
2172	struct mbuf *mb, *mreq;
2173	struct vnode *vp, *xp, *dirp = (struct vnode *)0;
2174	struct vattr dirfor, diraft, at;
2175	nfsrvfh_t nsfh, dnsfh;
2176	u_quad_t frev;
2177
2178	nfsm_srvmtofh(&nsfh);
2179	nfsm_srvmtofh(&dnsfh);
2180	nfsm_srvnamesiz(len);
2181	error = nfsrv_fhtovp(&nsfh, false, &vp, cred, slp, nam,
2182		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), false);
2183	if (error) {
2184		nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2185		nfsm_srvpostop_attr(getret, &at);
2186		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2187		return (0);
2188	}
2189	if (vp->v_type == VDIR) {
2190		error = EPERM;
2191		goto out1;
2192	}
2193	nd.ni_cnd.cn_cred = cred;
2194	nd.ni_cnd.cn_nameiop = CREATE;
2195	nd.ni_cnd.cn_flags = LOCKPARENT;
2196	error = nfs_namei(&nd, &dnsfh, len, slp, nam, &md, &dpos,
2197		&dirp, lwp, (nfsd->nd_flag & ND_KERBAUTH), false);
2198	if (dirp && v3) {
2199		dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred);
2200	}
2201	if (error)
2202		goto out1;
2203	xp = nd.ni_vp;
2204	if (xp != NULL) {
2205		error = EEXIST;
2206		goto out;
2207	}
2208	xp = nd.ni_dvp;
2209	if (vp->v_mount != xp->v_mount)
2210		error = EXDEV;
2211out:
2212	if (!error) {
2213		nqsrv_getl(vp, ND_WRITE);
2214		nqsrv_getl(xp, ND_WRITE);
2215		error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2216	} else {
2217		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2218		if (nd.ni_dvp == nd.ni_vp)
2219			vrele(nd.ni_dvp);
2220		else
2221			vput(nd.ni_dvp);
2222		if (nd.ni_vp)
2223			vrele(nd.ni_vp);
2224	}
2225out1:
2226	if (v3) {
2227		vn_lock(vp, LK_SHARED | LK_RETRY);
2228		getret = VOP_GETATTR(vp, &at, cred);
2229		VOP_UNLOCK(vp);
2230	}
2231	if (dirp) {
2232		if (v3) {
2233			vn_lock(dirp, LK_SHARED | LK_RETRY);
2234			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
2235			VOP_UNLOCK(dirp);
2236		}
2237		vrele(dirp);
2238	}
2239	vrele(vp);
2240	if (nd.ni_pathbuf != NULL) {
2241		pathbuf_destroy(nd.ni_pathbuf);
2242		nd.ni_pathbuf = NULL;
2243	}
2244	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
2245	if (v3) {
2246		nfsm_srvpostop_attr(getret, &at);
2247		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2248		return (0);
2249	}
2250	nfsm_srvdone;
2251}
2252
2253/*
2254 * nfs symbolic link service
2255 */
2256int
2257nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
2258{
2259	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2260	struct mbuf *nam = nfsd->nd_nam;
2261	char *dpos = nfsd->nd_dpos;
2262	kauth_cred_t cred = nfsd->nd_cr;
2263	struct vattr va, dirfor, diraft;
2264	struct nameidata nd;
2265	u_int32_t *tl;
2266	int32_t t1;
2267	struct nfsv2_sattr *sp;
2268	char *bpos, *pathcp = NULL, *cp2;
2269	struct uio io;
2270	struct iovec iv;
2271	int error = 0, cache = 0, dirfor_ret = 1, diraft_ret = 1, abort = 0;
2272	uint32_t len, len2;
2273	int v3 = (nfsd->nd_flag & ND_NFSV3);
2274	struct mbuf *mb, *mreq;
2275	struct vnode *dirp = (struct vnode *)0;
2276	nfsrvfh_t nsfh;
2277	u_quad_t frev;
2278
2279	nd.ni_cnd.cn_nameiop = 0;
2280	nfsm_srvmtofh(&nsfh);
2281	nfsm_srvnamesiz(len);
2282	nd.ni_cnd.cn_cred = cred;
2283	nd.ni_cnd.cn_nameiop = CREATE;
2284	nd.ni_cnd.cn_flags = LOCKPARENT;
2285	error = nfs_namei(&nd, &nsfh, len, slp, nam, &md, &dpos,
2286		&dirp, lwp, (nfsd->nd_flag & ND_KERBAUTH), false);
2287	if (dirp && v3) {
2288		dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred);
2289	}
2290	if (error)
2291		goto out;
2292	abort = 1;
2293	vattr_null(&va);
2294	va.va_type = VLNK;
2295	if (v3) {
2296		va.va_mode = 0;
2297		nfsm_srvsattr(&va);
2298		nfsm_dissect(tl, uint32_t *, NFSX_UNSIGNED);
2299		len2 = fxdr_unsigned(uint32_t, *tl);
2300		if (len2 > PATH_MAX) {
2301			/* XXX should check _PC_NO_TRUNC */
2302			error = ENAMETOOLONG;
2303			goto abortop;
2304		}
2305	}
2306	else {
2307		/* NFSv2 */
2308		nfsm_strsiz(len2, NFS_MAXPATHLEN);
2309	}
2310	pathcp = malloc(len2 + 1, M_TEMP, M_WAITOK);
2311	iv.iov_base = pathcp;
2312	iv.iov_len = len2;
2313	io.uio_resid = len2;
2314	io.uio_offset = 0;
2315	io.uio_iov = &iv;
2316	io.uio_iovcnt = 1;
2317	io.uio_rw = UIO_READ;
2318	UIO_SETUP_SYSSPACE(&io);
2319	nfsm_mtouio(&io, len2);
2320	if (!v3) {
2321		nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2322		va.va_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
2323	}
2324	*(pathcp + len2) = '\0';
2325	if (nd.ni_vp) {
2326		error = EEXIST;
2327abortop:
2328		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2329		if (nd.ni_dvp == nd.ni_vp)
2330			vrele(nd.ni_dvp);
2331		else
2332			vput(nd.ni_dvp);
2333		if (nd.ni_vp)
2334			vrele(nd.ni_vp);
2335		goto out;
2336	}
2337	nqsrv_getl(nd.ni_dvp, ND_WRITE);
2338	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va, pathcp);
2339	if (!error) {
2340	    if (v3) {
2341		error = nfsrv_composefh(nd.ni_vp, &nsfh, v3);
2342		if (!error)
2343		    error = VOP_GETATTR(nd.ni_vp, &va, cred);
2344		vput(nd.ni_vp);
2345	    } else {
2346		vput(nd.ni_vp);
2347	    }
2348	}
2349out:
2350	if (pathcp)
2351		free(pathcp, M_TEMP);
2352	if (dirp) {
2353		if (v3) {
2354			vn_lock(dirp, LK_SHARED | LK_RETRY);
2355			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
2356			VOP_UNLOCK(dirp);
2357		}
2358		vrele(dirp);
2359		dirp = NULL;
2360	}
2361	if (nd.ni_pathbuf != NULL) {
2362		pathbuf_destroy(nd.ni_pathbuf);
2363		nd.ni_pathbuf = NULL;
2364	}
2365	abort = 0;
2366	nfsm_reply(NFSX_SRVFH(&nsfh, v3) + NFSX_POSTOPATTR(v3) +
2367	    NFSX_WCCDATA(v3));
2368	if (v3) {
2369		if (!error) {
2370			nfsm_srvpostop_fh(&nsfh);
2371			nfsm_srvpostop_attr(0, &va);
2372		}
2373		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2374	}
2375	return (0);
2376nfsmout:
2377	if (abort) {
2378		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2379		if (nd.ni_dvp == nd.ni_vp)
2380			vrele(nd.ni_dvp);
2381		else
2382			vput(nd.ni_dvp);
2383		if (nd.ni_vp)
2384			vrele(nd.ni_vp);
2385		if (nd.ni_pathbuf != NULL) {
2386			pathbuf_destroy(nd.ni_pathbuf);
2387			nd.ni_pathbuf = NULL;
2388		}
2389	}
2390	if (dirp)
2391		vrele(dirp);
2392	if (pathcp)
2393		free(pathcp, M_TEMP);
2394	return (error);
2395}
2396
2397/*
2398 * nfs mkdir service
2399 */
2400int
2401nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
2402{
2403	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2404	struct mbuf *nam = nfsd->nd_nam;
2405	char *dpos = nfsd->nd_dpos;
2406	kauth_cred_t cred = nfsd->nd_cr;
2407	struct vattr va, dirfor, diraft;
2408	struct nfs_fattr *fp;
2409	struct nameidata nd;
2410	char *cp;
2411	u_int32_t *tl;
2412	int32_t t1;
2413	char *bpos;
2414	int error = 0, cache = 0, len, dirfor_ret = 1, diraft_ret = 1;
2415	int abort = 0;
2416	int v3 = (nfsd->nd_flag & ND_NFSV3);
2417	char *cp2;
2418	struct mbuf *mb, *mreq;
2419	struct vnode *vp, *dirp = (struct vnode *)0;
2420	nfsrvfh_t nsfh;
2421	u_quad_t frev;
2422
2423	nfsm_srvmtofh(&nsfh);
2424	nfsm_srvnamesiz(len);
2425	nd.ni_cnd.cn_cred = cred;
2426	nd.ni_cnd.cn_nameiop = CREATE;
2427	nd.ni_cnd.cn_flags = LOCKPARENT;
2428	error = nfs_namei(&nd, &nsfh, len, slp, nam, &md, &dpos,
2429		&dirp, lwp, (nfsd->nd_flag & ND_KERBAUTH), false);
2430	if (dirp && v3) {
2431		dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred);
2432	}
2433	if (error) {
2434		if (nd.ni_pathbuf != NULL) {
2435			pathbuf_destroy(nd.ni_pathbuf);
2436			nd.ni_pathbuf = NULL;
2437		}
2438		nfsm_reply(NFSX_WCCDATA(v3));
2439		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2440		if (dirp)
2441			vrele(dirp);
2442		return (0);
2443	}
2444	abort = 1;
2445	vattr_null(&va);
2446	if (v3) {
2447		va.va_mode = 0;
2448		nfsm_srvsattr(&va);
2449	} else {
2450		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
2451		va.va_mode = nfstov_mode(*tl++);
2452	}
2453	va.va_type = VDIR;
2454	vp = nd.ni_vp;
2455	if (vp != NULL) {
2456		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2457		if (nd.ni_dvp == vp)
2458			vrele(nd.ni_dvp);
2459		else
2460			vput(nd.ni_dvp);
2461		vrele(vp);
2462		error = EEXIST;
2463		goto out;
2464	}
2465	nqsrv_getl(nd.ni_dvp, ND_WRITE);
2466	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va);
2467	if (!error) {
2468		vp = nd.ni_vp;
2469		error = nfsrv_composefh(vp, &nsfh, v3);
2470		if (!error)
2471			error = VOP_GETATTR(vp, &va, cred);
2472		vput(vp);
2473	}
2474out:
2475	if (dirp) {
2476		if (v3) {
2477			vn_lock(dirp, LK_SHARED | LK_RETRY);
2478			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
2479			VOP_UNLOCK(dirp);
2480		}
2481		vrele(dirp);
2482		dirp = NULL;
2483	}
2484	if (nd.ni_pathbuf != NULL) {
2485		pathbuf_destroy(nd.ni_pathbuf);
2486		nd.ni_pathbuf = NULL;
2487	}
2488	abort = 0;
2489	nfsm_reply(NFSX_SRVFH(&nsfh, v3) + NFSX_POSTOPATTR(v3) +
2490	    NFSX_WCCDATA(v3));
2491	if (v3) {
2492		if (!error) {
2493			nfsm_srvpostop_fh(&nsfh);
2494			nfsm_srvpostop_attr(0, &va);
2495		}
2496		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2497	} else {
2498		nfsm_srvfhtom(&nsfh, v3);
2499		nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR);
2500		nfsm_srvfillattr(&va, fp);
2501	}
2502	return (0);
2503nfsmout:
2504	if (abort) {
2505		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2506		if (nd.ni_dvp == nd.ni_vp)
2507			vrele(nd.ni_dvp);
2508		else
2509			vput(nd.ni_dvp);
2510		if (nd.ni_vp)
2511			vrele(nd.ni_vp);
2512		if (nd.ni_pathbuf != NULL) {
2513			pathbuf_destroy(nd.ni_pathbuf);
2514			nd.ni_pathbuf = NULL;
2515		}
2516	}
2517	if (dirp)
2518		vrele(dirp);
2519	return (error);
2520}
2521
2522/*
2523 * nfs rmdir service
2524 */
2525int
2526nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
2527{
2528	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2529	struct mbuf *nam = nfsd->nd_nam;
2530	char *dpos = nfsd->nd_dpos;
2531	kauth_cred_t cred = nfsd->nd_cr;
2532	u_int32_t *tl;
2533	int32_t t1;
2534	char *bpos;
2535	int error = 0, cache = 0, len, dirfor_ret = 1, diraft_ret = 1;
2536	int v3 = (nfsd->nd_flag & ND_NFSV3);
2537	char *cp2;
2538	struct mbuf *mb, *mreq;
2539	struct vnode *vp, *dirp = (struct vnode *)0;
2540	struct vattr dirfor, diraft;
2541	nfsrvfh_t nsfh;
2542	struct nameidata nd;
2543	u_quad_t frev;
2544
2545	nfsm_srvmtofh(&nsfh);
2546	nfsm_srvnamesiz(len);
2547	nd.ni_cnd.cn_cred = cred;
2548	nd.ni_cnd.cn_nameiop = DELETE;
2549	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
2550	error = nfs_namei(&nd, &nsfh, len, slp, nam, &md, &dpos,
2551		&dirp, lwp, (nfsd->nd_flag & ND_KERBAUTH), false);
2552	if (dirp && v3) {
2553		dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred);
2554	}
2555	if (error) {
2556		if (nd.ni_pathbuf != NULL) {
2557			pathbuf_destroy(nd.ni_pathbuf);
2558			nd.ni_pathbuf = NULL;
2559		}
2560		nfsm_reply(NFSX_WCCDATA(v3));
2561		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2562		if (dirp)
2563			vrele(dirp);
2564		return (0);
2565	}
2566	vp = nd.ni_vp;
2567	if (vp->v_type != VDIR) {
2568		error = ENOTDIR;
2569		goto out;
2570	}
2571	/*
2572	 * No rmdir "." please.
2573	 */
2574	if (nd.ni_dvp == vp) {
2575		error = EINVAL;
2576		goto out;
2577	}
2578	/*
2579	 * The root of a mounted filesystem cannot be deleted.
2580	 */
2581	if (vp->v_vflag & VV_ROOT)
2582		error = EBUSY;
2583out:
2584	if (!error) {
2585		nqsrv_getl(nd.ni_dvp, ND_WRITE);
2586		nqsrv_getl(vp, ND_WRITE);
2587		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2588	} else {
2589		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2590		if (nd.ni_dvp == nd.ni_vp)
2591			vrele(nd.ni_dvp);
2592		else
2593			vput(nd.ni_dvp);
2594		vput(vp);
2595	}
2596	if (nd.ni_pathbuf != NULL) {
2597		pathbuf_destroy(nd.ni_pathbuf);
2598		nd.ni_pathbuf = NULL;
2599	}
2600	if (dirp) {
2601		if (v3) {
2602			vn_lock(dirp, LK_SHARED | LK_RETRY);
2603			diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
2604			VOP_UNLOCK(dirp);
2605		}
2606		vrele(dirp);
2607	}
2608	nfsm_reply(NFSX_WCCDATA(v3));
2609	if (v3) {
2610		nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
2611		return (0);
2612	}
2613	nfsm_srvdone;
2614}
2615
2616/*
2617 * nfs readdir service
2618 * - mallocs what it thinks is enough to read
2619 *	count rounded up to a multiple of NFS_SRVDIRBLKSIZ <= NFS_MAXREADDIR
2620 * - calls VOP_READDIR()
2621 * - loops around building the reply
2622 *	if the output generated exceeds count break out of loop
2623 *	The nfsm_clget macro is used here so that the reply will be packed
2624 *	tightly in mbuf clusters.
2625 * - it only knows that it has encountered eof when the VOP_READDIR()
2626 *	reads nothing
2627 * - as such one readdir rpc will return eof false although you are there
2628 *	and then the next will return eof
2629 * - it trims out records with d_fileno == 0
2630 *	this doesn't matter for Unix clients, but they might confuse clients
2631 *	for other os'.
2632 * - it trims out records with d_type == DT_WHT
2633 *	these cannot be seen through NFS (unless we extend the protocol)
2634 * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2635 *	than requested, but this may not apply to all filesystems. For
2636 *	example, client NFS does not { although it is never remote mounted
2637 *	anyhow }
2638 *     The alternate call nfsrv_readdirplus() does lookups as well.
2639 * PS: The NFS protocol spec. does not clarify what the "count" byte
2640 *	argument is a count of.. just name strings and file id's or the
2641 *	entire reply rpc or ...
2642 *	I tried just file name and id sizes and it confused the Sun client,
2643 *	so I am using the full rpc size now. The "paranoia.." comment refers
2644 *	to including the status longwords that are not a part of the dir.
2645 *	"entry" structures, but are in the rpc.
2646 */
2647
2648#define	NFS_SRVDIRBLKSIZ	1024
2649
2650struct flrep {
2651	nfsuint64 fl_off;
2652	u_int32_t fl_postopok;
2653	struct nfs_fattr fl_fattr; /* XXX: must be of fattr3 size */
2654	u_int32_t fl_fhok;
2655	u_int32_t fl_fhsize;
2656	/* handle comes here, filled in dynamically */
2657};
2658
2659int
2660nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
2661{
2662	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2663	struct mbuf *nam = nfsd->nd_nam;
2664	char *dpos = nfsd->nd_dpos;
2665	kauth_cred_t cred = nfsd->nd_cr;
2666	char *bp, *be;
2667	struct mbuf *mp;
2668	struct dirent *dp;
2669	char *cp;
2670	u_int32_t *tl;
2671	int32_t t1;
2672	char *bpos;
2673	struct mbuf *mb, *mreq, *mp2;
2674	char *cpos, *cend, *cp2, *rbuf;
2675	struct vnode *vp;
2676	struct vattr at;
2677	nfsrvfh_t nsfh;
2678	struct uio io;
2679	struct iovec iv;
2680	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2681	int siz, cnt, fullsiz, eofflag, rdonly, cache = 0, ncookies;
2682	int v3 = (nfsd->nd_flag & ND_NFSV3);
2683	u_quad_t frev, off, toff, verf;
2684	off_t *cookies = NULL, *cookiep;
2685	nfsuint64 jar;
2686
2687	nfsm_srvmtofh(&nsfh);
2688	if (v3) {
2689		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
2690		toff = fxdr_hyper(tl);
2691		tl += 2;
2692		verf = fxdr_hyper(tl);
2693		tl += 2;
2694	} else {
2695		nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2696		toff = fxdr_unsigned(u_quad_t, *tl++);
2697	}
2698	off = toff;
2699	cnt = fxdr_unsigned(int, *tl);
2700	siz = ((cnt + NFS_SRVDIRBLKSIZ - 1) & ~(NFS_SRVDIRBLKSIZ - 1));
2701	xfer = NFS_SRVMAXDATA(nfsd);
2702	if (siz > xfer)
2703		siz = xfer;
2704	fullsiz = siz;
2705	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam,
2706		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), false);
2707	if (!error && vp->v_type != VDIR) {
2708		error = ENOTDIR;
2709		vput(vp);
2710	}
2711	if (error) {
2712		nfsm_reply(NFSX_UNSIGNED);
2713		nfsm_srvpostop_attr(getret, &at);
2714		return (0);
2715	}
2716	nqsrv_getl(vp, ND_READ);
2717	if (v3) {
2718		error = getret = VOP_GETATTR(vp, &at, cred);
2719#ifdef NFS3_STRICTVERF
2720		/*
2721		 * XXX This check is too strict for Solaris 2.5 clients.
2722		 */
2723		if (!error && toff && verf != at.va_filerev)
2724			error = NFSERR_BAD_COOKIE;
2725#endif
2726	}
2727	if (!error)
2728		error = nfsrv_access(vp, VEXEC, cred, rdonly, lwp, 0);
2729	if (error) {
2730		vput(vp);
2731		nfsm_reply(NFSX_POSTOPATTR(v3));
2732		nfsm_srvpostop_attr(getret, &at);
2733		return (0);
2734	}
2735	VOP_UNLOCK(vp);
2736	rbuf = malloc(siz, M_TEMP, M_WAITOK);
2737again:
2738	iv.iov_base = rbuf;
2739	iv.iov_len = fullsiz;
2740	io.uio_iov = &iv;
2741	io.uio_iovcnt = 1;
2742	io.uio_offset = (off_t)off;
2743	io.uio_resid = fullsiz;
2744	io.uio_rw = UIO_READ;
2745	UIO_SETUP_SYSSPACE(&io);
2746	eofflag = 0;
2747	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2748
2749	error = VOP_READDIR(vp, &io, cred, &eofflag, &cookies, &ncookies);
2750
2751	off = (off_t)io.uio_offset;
2752	if (!cookies && !error)
2753		error = NFSERR_PERM;
2754	if (v3) {
2755		getret = VOP_GETATTR(vp, &at, cred);
2756		if (!error)
2757			error = getret;
2758	}
2759
2760	VOP_UNLOCK(vp);
2761	if (error) {
2762		vrele(vp);
2763		free((void *)rbuf, M_TEMP);
2764		if (cookies)
2765			free((void *)cookies, M_TEMP);
2766		nfsm_reply(NFSX_POSTOPATTR(v3));
2767		nfsm_srvpostop_attr(getret, &at);
2768		return (0);
2769	}
2770	if (io.uio_resid) {
2771		siz -= io.uio_resid;
2772
2773		/*
2774		 * If nothing read, return eof
2775		 * rpc reply
2776		 */
2777		if (siz == 0) {
2778			vrele(vp);
2779			nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
2780				2 * NFSX_UNSIGNED);
2781			if (v3) {
2782				nfsm_srvpostop_attr(getret, &at);
2783				nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
2784				txdr_hyper(at.va_filerev, tl);
2785				tl += 2;
2786			} else
2787				nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2788			*tl++ = nfs_false;
2789			*tl = nfs_true;
2790			free((void *)rbuf, M_TEMP);
2791			free((void *)cookies, M_TEMP);
2792			return (0);
2793		}
2794	}
2795
2796	/*
2797	 * Check for degenerate cases of nothing useful read.
2798	 * If so go try again
2799	 */
2800	cpos = rbuf;
2801	cend = rbuf + siz;
2802	dp = (struct dirent *)cpos;
2803	cookiep = cookies;
2804
2805	while (cpos < cend && ncookies > 0 &&
2806		(dp->d_fileno == 0 || dp->d_type == DT_WHT)) {
2807		cpos += dp->d_reclen;
2808		dp = (struct dirent *)cpos;
2809		cookiep++;
2810		ncookies--;
2811	}
2812	if (cpos >= cend || ncookies == 0) {
2813		toff = off;
2814		siz = fullsiz;
2815		free(cookies, M_TEMP);
2816		cookies = NULL;
2817		goto again;
2818	}
2819
2820	len = 3 * NFSX_UNSIGNED;	/* paranoia, probably can be 0 */
2821	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
2822	if (v3) {
2823		nfsm_srvpostop_attr(getret, &at);
2824		nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2825		txdr_hyper(at.va_filerev, tl);
2826	}
2827	mp = mp2 = mb;
2828	bp = bpos;
2829	be = bp + M_TRAILINGSPACE(mp);
2830
2831	/* Loop through the records and build reply */
2832	while (cpos < cend && ncookies > 0) {
2833		if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
2834			nlen = dp->d_namlen;
2835			rem = nfsm_rndup(nlen)-nlen;
2836			len += (4 * NFSX_UNSIGNED + nlen + rem);
2837			if (v3)
2838				len += 2 * NFSX_UNSIGNED;
2839			if (len > cnt) {
2840				eofflag = 0;
2841				break;
2842			}
2843			/*
2844			 * Build the directory record xdr from
2845			 * the dirent entry.
2846			 */
2847			nfsm_clget;
2848			*tl = nfs_true;
2849			bp += NFSX_UNSIGNED;
2850			if (v3) {
2851				nfsm_clget;
2852				*tl = txdr_unsigned(dp->d_fileno >> 32);
2853				bp += NFSX_UNSIGNED;
2854			}
2855			nfsm_clget;
2856			*tl = txdr_unsigned(dp->d_fileno);
2857			bp += NFSX_UNSIGNED;
2858			nfsm_clget;
2859			*tl = txdr_unsigned(nlen);
2860			bp += NFSX_UNSIGNED;
2861
2862			/* And loop around copying the name */
2863			xfer = nlen;
2864			cp = dp->d_name;
2865			while (xfer > 0) {
2866				nfsm_clget;
2867				if ((bp+xfer) > be)
2868					tsiz = be-bp;
2869				else
2870					tsiz = xfer;
2871				memcpy(bp, cp, tsiz);
2872				bp += tsiz;
2873				xfer -= tsiz;
2874				if (xfer > 0)
2875					cp += tsiz;
2876			}
2877			/* And null pad to an int32_t boundary */
2878			for (i = 0; i < rem; i++)
2879				*bp++ = '\0';
2880			nfsm_clget;
2881
2882			/* Finish off the record */
2883			txdr_hyper(*cookiep, &jar);
2884			if (v3) {
2885				*tl = jar.nfsuquad[0];
2886				bp += NFSX_UNSIGNED;
2887				nfsm_clget;
2888			}
2889			*tl = jar.nfsuquad[1];
2890			bp += NFSX_UNSIGNED;
2891		}
2892		cpos += dp->d_reclen;
2893		dp = (struct dirent *)cpos;
2894		cookiep++;
2895		ncookies--;
2896	}
2897	vrele(vp);
2898	nfsm_clget;
2899	*tl = nfs_false;
2900	bp += NFSX_UNSIGNED;
2901	nfsm_clget;
2902	if (eofflag)
2903		*tl = nfs_true;
2904	else
2905		*tl = nfs_false;
2906	bp += NFSX_UNSIGNED;
2907	if (mp != mb) {
2908		if (bp < be)
2909			mp->m_len = bp - mtod(mp, char *);
2910	} else
2911		mp->m_len += bp - bpos;
2912	free((void *)rbuf, M_TEMP);
2913	free((void *)cookies, M_TEMP);
2914	nfsm_srvdone;
2915}
2916
2917int
2918nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
2919{
2920	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
2921	struct mbuf *nam = nfsd->nd_nam;
2922	char *dpos = nfsd->nd_dpos;
2923	kauth_cred_t cred = nfsd->nd_cr;
2924	char *bp, *be;
2925	struct mbuf *mp;
2926	struct dirent *dp;
2927	char *cp;
2928	u_int32_t *tl;
2929	int32_t t1;
2930	char *bpos;
2931	struct mbuf *mb, *mreq, *mp2;
2932	char *cpos, *cend, *cp2, *rbuf;
2933	struct vnode *vp, *nvp;
2934	struct flrep fl;
2935	nfsrvfh_t nsfh;
2936	struct uio io;
2937	struct iovec iv;
2938	struct vattr va, at, *vap = &va;
2939	struct nfs_fattr *fp;
2940	int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2941	int siz, cnt, fullsiz, eofflag, rdonly, cache = 0, dirlen, ncookies;
2942	u_quad_t frev, off, toff, verf;
2943	off_t *cookies = NULL, *cookiep;
2944
2945	nfsm_srvmtofh(&nsfh);
2946	nfsm_dissect(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
2947	toff = fxdr_hyper(tl);
2948	tl += 2;
2949	verf = fxdr_hyper(tl);
2950	tl += 2;
2951	siz = fxdr_unsigned(int, *tl++);
2952	cnt = fxdr_unsigned(int, *tl);
2953	off = toff;
2954	siz = ((siz + NFS_SRVDIRBLKSIZ - 1) & ~(NFS_SRVDIRBLKSIZ - 1));
2955	xfer = NFS_SRVMAXDATA(nfsd);
2956	if (siz > xfer)
2957		siz = xfer;
2958	fullsiz = siz;
2959	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam,
2960		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), false);
2961	if (!error && vp->v_type != VDIR) {
2962		error = ENOTDIR;
2963		vput(vp);
2964	}
2965	if (error) {
2966		nfsm_reply(NFSX_UNSIGNED);
2967		nfsm_srvpostop_attr(getret, &at);
2968		return (0);
2969	}
2970	error = getret = VOP_GETATTR(vp, &at, cred);
2971#ifdef NFS3_STRICTVERF
2972	/*
2973	 * XXX This check is too strict for Solaris 2.5 clients.
2974	 */
2975	if (!error && toff && verf != at.va_filerev)
2976		error = NFSERR_BAD_COOKIE;
2977#endif
2978	if (!error) {
2979		nqsrv_getl(vp, ND_READ);
2980		error = nfsrv_access(vp, VEXEC, cred, rdonly, lwp, 0);
2981	}
2982	if (error) {
2983		vput(vp);
2984		nfsm_reply(NFSX_V3POSTOPATTR);
2985		nfsm_srvpostop_attr(getret, &at);
2986		return (0);
2987	}
2988	VOP_UNLOCK(vp);
2989
2990	rbuf = malloc(siz, M_TEMP, M_WAITOK);
2991again:
2992	iv.iov_base = rbuf;
2993	iv.iov_len = fullsiz;
2994	io.uio_iov = &iv;
2995	io.uio_iovcnt = 1;
2996	io.uio_offset = (off_t)off;
2997	io.uio_resid = fullsiz;
2998	io.uio_rw = UIO_READ;
2999	UIO_SETUP_SYSSPACE(&io);
3000	eofflag = 0;
3001
3002	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3003
3004	error = VOP_READDIR(vp, &io, cred, &eofflag, &cookies, &ncookies);
3005
3006	off = (u_quad_t)io.uio_offset;
3007	getret = VOP_GETATTR(vp, &at, cred);
3008
3009	VOP_UNLOCK(vp);
3010
3011	/*
3012	 * If the VGET operation doesn't work for this filesystem,
3013	 * we can't support readdirplus. Returning NOTSUPP should
3014	 * make clients fall back to plain readdir.
3015	 * There's no need to check for VPTOFH as well, we wouldn't
3016	 * even be here otherwise.
3017	 */
3018	if (!getret) {
3019		if ((getret = VFS_VGET(vp->v_mount, at.va_fileid, &nvp)))
3020			getret = (getret == EOPNOTSUPP) ?
3021				NFSERR_NOTSUPP : NFSERR_IO;
3022		else
3023			vput(nvp);
3024	}
3025
3026	if (!cookies && !error)
3027		error = NFSERR_PERM;
3028	if (!error)
3029		error = getret;
3030	if (error) {
3031		vrele(vp);
3032		if (cookies)
3033			free((void *)cookies, M_TEMP);
3034		free((void *)rbuf, M_TEMP);
3035		nfsm_reply(NFSX_V3POSTOPATTR);
3036		nfsm_srvpostop_attr(getret, &at);
3037		return (0);
3038	}
3039	if (io.uio_resid) {
3040		siz -= io.uio_resid;
3041
3042		/*
3043		 * If nothing read, return eof
3044		 * rpc reply
3045		 */
3046		if (siz == 0) {
3047			vrele(vp);
3048			nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3049				2 * NFSX_UNSIGNED);
3050			nfsm_srvpostop_attr(getret, &at);
3051			nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
3052			txdr_hyper(at.va_filerev, tl);
3053			tl += 2;
3054			*tl++ = nfs_false;
3055			*tl = nfs_true;
3056			free((void *)cookies, M_TEMP);
3057			free((void *)rbuf, M_TEMP);
3058			return (0);
3059		}
3060	}
3061
3062	/*
3063	 * Check for degenerate cases of nothing useful read.
3064	 * If so go try again
3065	 */
3066	cpos = rbuf;
3067	cend = rbuf + siz;
3068	dp = (struct dirent *)cpos;
3069	cookiep = cookies;
3070
3071	while (cpos < cend && ncookies > 0 &&
3072		(dp->d_fileno == 0 || dp->d_type == DT_WHT)) {
3073		cpos += dp->d_reclen;
3074		dp = (struct dirent *)cpos;
3075		cookiep++;
3076		ncookies--;
3077	}
3078	if (cpos >= cend || ncookies == 0) {
3079		toff = off;
3080		siz = fullsiz;
3081		free(cookies, M_TEMP);
3082		cookies = NULL;
3083		goto again;
3084	}
3085
3086	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED;
3087	nfsm_reply(cnt);
3088	nfsm_srvpostop_attr(getret, &at);
3089	nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
3090	txdr_hyper(at.va_filerev, tl);
3091	mp = mp2 = mb;
3092	bp = bpos;
3093	be = bp + M_TRAILINGSPACE(mp);
3094
3095	/* Loop through the records and build reply */
3096	while (cpos < cend && ncookies > 0) {
3097		if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
3098			nfsrvfh_t nnsfh;
3099
3100			nlen = dp->d_namlen;
3101			rem = nfsm_rndup(nlen)-nlen;
3102
3103			/*
3104			 * For readdir_and_lookup get the vnode using
3105			 * the file number.
3106			 */
3107			if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp))
3108				goto invalid;
3109			if (nfsrv_composefh(nvp, &nnsfh, true)) {
3110				vput(nvp);
3111				goto invalid;
3112			}
3113			if (VOP_GETATTR(nvp, vap, cred)) {
3114				vput(nvp);
3115				goto invalid;
3116			}
3117			vput(nvp);
3118
3119			/*
3120			 * If either the dircount or maxcount will be
3121			 * exceeded, get out now. Both of these lengths
3122			 * are calculated conservatively, including all
3123			 * XDR overheads.
3124			 */
3125			len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3126				NFSX_V3POSTOPATTR);
3127			dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3128			if (len > cnt || dirlen > fullsiz) {
3129				eofflag = 0;
3130				break;
3131			}
3132
3133			/*
3134			 * Build the directory record xdr from
3135			 * the dirent entry.
3136			 */
3137			fp = (struct nfs_fattr *)&fl.fl_fattr;
3138			nfsm_srvfillattr(vap, fp);
3139			fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3140			fl.fl_fhok = nfs_true;
3141			fl.fl_postopok = nfs_true;
3142			txdr_hyper(*cookiep, fl.fl_off.nfsuquad);
3143
3144			nfsm_clget;
3145			*tl = nfs_true;
3146			bp += NFSX_UNSIGNED;
3147			nfsm_clget;
3148			*tl = txdr_unsigned(dp->d_fileno >> 32);
3149			bp += NFSX_UNSIGNED;
3150			nfsm_clget;
3151			*tl = txdr_unsigned(dp->d_fileno);
3152			bp += NFSX_UNSIGNED;
3153			nfsm_clget;
3154			*tl = txdr_unsigned(nlen);
3155			bp += NFSX_UNSIGNED;
3156
3157			/* And loop around copying the name */
3158			xfer = nlen;
3159			cp = dp->d_name;
3160			while (xfer > 0) {
3161				nfsm_clget;
3162				if ((bp + xfer) > be)
3163					tsiz = be - bp;
3164				else
3165					tsiz = xfer;
3166				memcpy(bp, cp, tsiz);
3167				bp += tsiz;
3168				xfer -= tsiz;
3169				if (xfer > 0)
3170					cp += tsiz;
3171			}
3172			/* And null pad to an int32_t boundary */
3173			for (i = 0; i < rem; i++)
3174				*bp++ = '\0';
3175
3176			/*
3177			 * Now copy the flrep structure out.
3178			 */
3179			xfer = sizeof(struct flrep);
3180			cp = (void *)&fl;
3181			while (xfer > 0) {
3182				nfsm_clget;
3183				if ((bp + xfer) > be)
3184					tsiz = be - bp;
3185				else
3186					tsiz = xfer;
3187				memcpy(bp, cp, tsiz);
3188				bp += tsiz;
3189				xfer -= tsiz;
3190				if (xfer > 0)
3191					cp += tsiz;
3192			}
3193
3194			/*
3195			 * ... and filehandle.
3196			 */
3197			xfer = NFSRVFH_SIZE(&nnsfh);
3198			cp = NFSRVFH_DATA(&nnsfh);
3199			while (xfer > 0) {
3200				nfsm_clget;
3201				if ((bp + xfer) > be)
3202					tsiz = be - bp;
3203				else
3204					tsiz = xfer;
3205				memcpy(bp, cp, tsiz);
3206				bp += tsiz;
3207				xfer -= tsiz;
3208				if (xfer > 0)
3209					cp += tsiz;
3210			}
3211		}
3212invalid:
3213		cpos += dp->d_reclen;
3214		dp = (struct dirent *)cpos;
3215		cookiep++;
3216		ncookies--;
3217	}
3218	vrele(vp);
3219	nfsm_clget;
3220	*tl = nfs_false;
3221	bp += NFSX_UNSIGNED;
3222	nfsm_clget;
3223	if (eofflag)
3224		*tl = nfs_true;
3225	else
3226		*tl = nfs_false;
3227	bp += NFSX_UNSIGNED;
3228	if (mp != mb) {
3229		if (bp < be)
3230			mp->m_len = bp - mtod(mp, char *);
3231	} else
3232		mp->m_len += bp - bpos;
3233	free((void *)cookies, M_TEMP);
3234	free((void *)rbuf, M_TEMP);
3235	nfsm_srvdone;
3236}
3237
3238/*
3239 * nfs commit service
3240 */
3241int
3242nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
3243{
3244	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3245	struct mbuf *nam = nfsd->nd_nam;
3246	char *dpos = nfsd->nd_dpos;
3247	kauth_cred_t cred = nfsd->nd_cr;
3248	struct vattr bfor, aft;
3249	struct vnode *vp;
3250	nfsrvfh_t nsfh;
3251	u_int32_t *tl;
3252	int32_t t1;
3253	char *bpos;
3254	int error = 0, rdonly, for_ret = 1, aft_ret = 1, cache = 0;
3255	uint32_t cnt;
3256	char *cp2;
3257	struct mbuf *mb, *mreq;
3258	u_quad_t frev, off, end;
3259
3260	nfsm_srvmtofh(&nsfh);
3261	nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
3262
3263	off = fxdr_hyper(tl);
3264	tl += 2;
3265	cnt = fxdr_unsigned(uint32_t, *tl);
3266	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam,
3267		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), false);
3268	if (error) {
3269		nfsm_reply(2 * NFSX_UNSIGNED);
3270		nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3271		return (0);
3272	}
3273	for_ret = VOP_GETATTR(vp, &bfor, cred);
3274	end = (cnt > 0) ? off + cnt : vp->v_size;
3275	if (end < off || end > vp->v_size)
3276		end = vp->v_size;
3277	if (off < vp->v_size)
3278		error = VOP_FSYNC(vp, cred, FSYNC_WAIT, off, end);
3279	/* else error == 0, from nfsrv_fhtovp() */
3280	aft_ret = VOP_GETATTR(vp, &aft, cred);
3281	vput(vp);
3282	nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
3283	nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
3284	if (!error) {
3285		nfsm_build(tl, u_int32_t *, NFSX_V3WRITEVERF);
3286		*tl++ = txdr_unsigned(boottime.tv_sec);
3287		*tl = txdr_unsigned(boottime.tv_nsec / 1000);
3288	} else {
3289		return (0);
3290	}
3291	nfsm_srvdone;
3292}
3293
3294/*
3295 * nfs statfs service
3296 */
3297int
3298nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
3299{
3300	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3301	struct mbuf *nam = nfsd->nd_nam;
3302	char *dpos = nfsd->nd_dpos;
3303	kauth_cred_t cred = nfsd->nd_cr;
3304	struct statvfs *sf = NULL;
3305	struct nfs_statfs *sfp;
3306	u_int32_t *tl;
3307	int32_t t1;
3308	char *bpos;
3309	int error = 0, rdonly, cache = 0, getret = 1;
3310	int v3 = (nfsd->nd_flag & ND_NFSV3);
3311	char *cp2;
3312	struct mbuf *mb, *mreq;
3313	struct vnode *vp;
3314	struct vattr at;
3315	nfsrvfh_t nsfh;
3316	u_quad_t frev, tval;
3317
3318	nfsm_srvmtofh(&nsfh);
3319	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam,
3320		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), false);
3321	if (error) {
3322		nfsm_reply(NFSX_UNSIGNED);
3323		nfsm_srvpostop_attr(getret, &at);
3324		return (0);
3325	}
3326	sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK);
3327	error = VFS_STATVFS(vp->v_mount, sf);
3328	getret = VOP_GETATTR(vp, &at, cred);
3329	vput(vp);
3330	nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
3331	if (v3)
3332		nfsm_srvpostop_attr(getret, &at);
3333	if (error) {
3334		free(sf, M_TEMP);
3335		return (0);
3336	}
3337	nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3));
3338	if (v3) {
3339		tval = (u_quad_t)((quad_t)sf->f_blocks * (quad_t)sf->f_frsize);
3340		txdr_hyper(tval, &sfp->sf_tbytes);
3341		tval = (u_quad_t)((quad_t)sf->f_bfree * (quad_t)sf->f_frsize);
3342		txdr_hyper(tval, &sfp->sf_fbytes);
3343		tval = (u_quad_t)((quad_t)sf->f_bavail * (quad_t)sf->f_frsize);
3344		txdr_hyper(tval, &sfp->sf_abytes);
3345		tval = (u_quad_t)sf->f_files;
3346		txdr_hyper(tval, &sfp->sf_tfiles);
3347		tval = (u_quad_t)sf->f_ffree;
3348		txdr_hyper(tval, &sfp->sf_ffiles);
3349		txdr_hyper(tval, &sfp->sf_afiles);
3350		sfp->sf_invarsec = 0;
3351	} else {
3352		sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3353		sfp->sf_bsize = txdr_unsigned(sf->f_frsize);
3354		sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3355		sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3356		sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3357	}
3358nfsmout:
3359	if (sf)
3360	    free(sf, M_TEMP);
3361	return error;
3362}
3363
3364/*
3365 * nfs fsinfo service
3366 */
3367int
3368nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
3369{
3370	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3371	struct mbuf *nam = nfsd->nd_nam;
3372	char *dpos = nfsd->nd_dpos;
3373	kauth_cred_t cred = nfsd->nd_cr;
3374	u_int32_t *tl;
3375	struct nfsv3_fsinfo *sip;
3376	int32_t t1;
3377	char *bpos;
3378	int error = 0, rdonly, cache = 0, getret = 1;
3379	uint32_t maxdata;
3380	char *cp2;
3381	struct mbuf *mb, *mreq;
3382	struct vnode *vp;
3383	struct vattr at;
3384	nfsrvfh_t nsfh;
3385	u_quad_t frev, maxfsize;
3386	struct statvfs *sb;
3387
3388	nfsm_srvmtofh(&nsfh);
3389	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam,
3390		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), false);
3391	if (error) {
3392		nfsm_reply(NFSX_UNSIGNED);
3393		nfsm_srvpostop_attr(getret, &at);
3394		return (0);
3395	}
3396
3397	/* XXX Try to make a guess on the max file size. */
3398	sb = malloc(sizeof(*sb), M_TEMP, M_WAITOK);
3399	VFS_STATVFS(vp->v_mount, sb);
3400	maxfsize = (u_quad_t)0x80000000 * sb->f_frsize - 1;
3401	free(sb, M_TEMP);
3402
3403	getret = VOP_GETATTR(vp, &at, cred);
3404	vput(vp);
3405	nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
3406	nfsm_srvpostop_attr(getret, &at);
3407	nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO);
3408
3409	/*
3410	 * XXX
3411	 * There should be file system VFS OP(s) to get this information.
3412	 * For now, assume ufs.
3413	 */
3414	if (slp->ns_so->so_type == SOCK_DGRAM)
3415		maxdata = NFS_MAXDGRAMDATA;
3416	else
3417		maxdata = NFS_MAXDATA;
3418	sip->fs_rtmax = txdr_unsigned(maxdata);
3419	sip->fs_rtpref = txdr_unsigned(maxdata);
3420	sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3421	sip->fs_wtmax = txdr_unsigned(maxdata);
3422	sip->fs_wtpref = txdr_unsigned(maxdata);
3423	sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3424	sip->fs_dtpref = txdr_unsigned(maxdata);
3425	txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3426	sip->fs_timedelta.nfsv3_sec = 0;
3427	sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3428	sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3429		NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3430		NFSV3FSINFO_CANSETTIME);
3431	nfsm_srvdone;
3432}
3433
3434/*
3435 * nfs pathconf service
3436 */
3437int
3438nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct lwp *lwp, struct mbuf **mrq)
3439{
3440	struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
3441	struct mbuf *nam = nfsd->nd_nam;
3442	char *dpos = nfsd->nd_dpos;
3443	kauth_cred_t cred = nfsd->nd_cr;
3444	u_int32_t *tl;
3445	struct nfsv3_pathconf *pc;
3446	int32_t t1;
3447	char *bpos;
3448	int error = 0, rdonly, cache = 0, getret = 1;
3449	register_t linkmax, namemax, chownres, notrunc;
3450	char *cp2;
3451	struct mbuf *mb, *mreq;
3452	struct vnode *vp;
3453	struct vattr at;
3454	nfsrvfh_t nsfh;
3455	u_quad_t frev;
3456
3457	nfsm_srvmtofh(&nsfh);
3458	error = nfsrv_fhtovp(&nsfh, 1, &vp, cred, slp, nam,
3459		 &rdonly, (nfsd->nd_flag & ND_KERBAUTH), false);
3460	if (error) {
3461		nfsm_reply(NFSX_UNSIGNED);
3462		nfsm_srvpostop_attr(getret, &at);
3463		return (0);
3464	}
3465	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3466	if (!error)
3467		error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3468	if (!error)
3469		error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3470	if (!error)
3471		error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3472	getret = VOP_GETATTR(vp, &at, cred);
3473	vput(vp);
3474	nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
3475	nfsm_srvpostop_attr(getret, &at);
3476	if (error)
3477		return (0);
3478	nfsm_build(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF);
3479
3480	pc->pc_linkmax = txdr_unsigned(linkmax);
3481	pc->pc_namemax = txdr_unsigned(namemax);
3482	pc->pc_notrunc = txdr_unsigned(notrunc);
3483	pc->pc_chownrestricted = txdr_unsigned(chownres);
3484
3485	/*
3486	 * These should probably be supported by VOP_PATHCONF(), but
3487	 * until msdosfs is exportable (why would you want to?), the
3488	 * Unix defaults should be ok.
3489	 */
3490	pc->pc_caseinsensitive = nfs_false;
3491	pc->pc_casepreserving = nfs_true;
3492	nfsm_srvdone;
3493}
3494
3495/*
3496 * Null operation, used by clients to ping server
3497 */
3498/* ARGSUSED */
3499int
3500nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3501    struct lwp *lwp, struct mbuf **mrq)
3502{
3503	struct mbuf *mrep = nfsd->nd_mrep;
3504	char *bpos;
3505	int error = NFSERR_RETVOID, cache = 0;
3506	struct mbuf *mb, *mreq;
3507	u_quad_t frev;
3508
3509	nfsm_reply(0);
3510nfsmout:
3511	return (0);
3512}
3513
3514/*
3515 * No operation, used for obsolete procedures
3516 */
3517/* ARGSUSED */
3518int
3519nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3520    struct lwp *lwp, struct mbuf **mrq)
3521{
3522	struct mbuf *mrep = nfsd->nd_mrep;
3523	char *bpos;
3524	int error, cache = 0;
3525	struct mbuf *mb, *mreq;
3526	u_quad_t frev;
3527
3528	if (nfsd->nd_repstat)
3529		error = nfsd->nd_repstat;
3530	else
3531		error = EPROCUNAVAIL;
3532	nfsm_reply(0);
3533nfsmout:
3534	return (0);
3535}
3536
3537/*
3538 * Perform access checking for vnodes obtained from file handles that would
3539 * refer to files already opened by a Unix client. You cannot just use
3540 * vn_writechk() and VOP_ACCESS() for two reasons.
3541 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3542 * 2 - The owner is to be given access irrespective of mode bits for some
3543 *     operations, so that processes that chmod after opening a file don't
3544 *     break. I don't like this because it opens a security hole, but since
3545 *     the nfs server opens a security hole the size of a barn door anyhow,
3546 *     what the heck.
3547 *
3548 * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3549 * will return EPERM instead of EACCES. EPERM is always an error.
3550 */
3551int
3552nfsrv_access(struct vnode *vp, int flags, kauth_cred_t cred, int rdonly, struct lwp *lwp, int override)
3553{
3554	struct vattr vattr;
3555	int error;
3556	if (flags & VWRITE) {
3557		/* Just vn_writechk() changed to check rdonly */
3558		/*
3559		 * Disallow write attempts on read-only file systems;
3560		 * unless the file is a socket or a block or character
3561		 * device resident on the file system.
3562		 */
3563		if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
3564			switch (vp->v_type) {
3565			case VREG:
3566			case VDIR:
3567			case VLNK:
3568				return (EROFS);
3569			default:
3570				break;
3571			}
3572		}
3573
3574		/*
3575		 * If the vnode is in use as a process's text,
3576		 * we can't allow writing.
3577		 */
3578		if (vp->v_iflag & VI_TEXT)
3579			return (ETXTBSY);
3580	}
3581	error = VOP_GETATTR(vp, &vattr, cred);
3582	if (error)
3583		return (error);
3584	error = VOP_ACCESS(vp, flags, cred);
3585	/*
3586	 * Allow certain operations for the owner (reads and writes
3587	 * on files that are already open).
3588	 */
3589	if (override && error == EACCES && kauth_cred_geteuid(cred) == vattr.va_uid)
3590		error = 0;
3591	return error;
3592}
3593