1/*
2 * Copyright (c) 2000-2011 Apple Inc.  All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 *    must display the following acknowledgement:
46 *	This product includes software developed by the University of
47 *	California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 *    may be used to endorse or promote products derived from this software
50 *    without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *	@(#)nfs_serv.c	8.7 (Berkeley) 5/14/95
65 * FreeBSD-Id: nfs_serv.c,v 1.52 1997/10/28 15:59:05 bde Exp $
66 */
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/proc.h>
71#include <sys/kauth.h>
72#include <sys/unistd.h>
73#include <sys/malloc.h>
74#include <sys/vnode.h>
75#include <sys/mount_internal.h>
76#include <sys/socket.h>
77#include <sys/socketvar.h>
78#include <sys/kpi_mbuf.h>
79#include <sys/dirent.h>
80#include <sys/stat.h>
81#include <sys/kernel.h>
82#include <sys/ubc.h>
83#include <sys/vnode_internal.h>
84#include <sys/uio_internal.h>
85#include <libkern/OSAtomic.h>
86#include <sys/fsevents.h>
87#include <kern/thread_call.h>
88
89#include <sys/vm.h>
90#include <sys/vmparam.h>
91
92#include <netinet/in.h>
93
94#include <nfs/nfsproto.h>
95#include <nfs/rpcv2.h>
96#include <nfs/nfs.h>
97#include <nfs/xdr_subs.h>
98#include <nfs/nfsm_subs.h>
99#include <nfs/nfsrvcache.h>
100#include <nfs/nfs_gss.h>
101
102#if NFSSERVER
103
104/*
105 * NFS server globals
106 */
107
108int nfsd_thread_count = 0;
109int nfsd_thread_max = 0;
110lck_grp_t *nfsd_lck_grp;
111lck_mtx_t *nfsd_mutex;
112struct nfsd_head nfsd_head, nfsd_queue;
113
114lck_grp_t *nfsrv_slp_rwlock_group;
115lck_grp_t *nfsrv_slp_mutex_group;
116struct nfsrv_sockhead nfsrv_socklist, nfsrv_deadsocklist, nfsrv_sockwg,
117			nfsrv_sockwait, nfsrv_sockwork;
118struct nfsrv_sock *nfsrv_udpsock = NULL;
119struct nfsrv_sock *nfsrv_udp6sock = NULL;
120
121/* NFS exports */
122struct nfsrv_expfs_list nfsrv_exports;
123struct nfsrv_export_hashhead *nfsrv_export_hashtbl = NULL;
124int nfsrv_export_hash_size = NFSRVEXPHASHSZ;
125u_long nfsrv_export_hash;
126lck_grp_t *nfsrv_export_rwlock_group;
127lck_rw_t nfsrv_export_rwlock;
128
129#if CONFIG_FSE
130/* NFS server file modification event generator */
131struct nfsrv_fmod_hashhead *nfsrv_fmod_hashtbl;
132u_long nfsrv_fmod_hash;
133lck_grp_t *nfsrv_fmod_grp;
134lck_mtx_t *nfsrv_fmod_mutex;
135static int nfsrv_fmod_timer_on = 0;
136int nfsrv_fsevents_enabled = 1;
137#endif
138
139/* NFS server timers */
140#if CONFIG_FSE
141thread_call_t	nfsrv_fmod_timer_call;
142#endif
143thread_call_t	nfsrv_deadsock_timer_call;
144thread_call_t	nfsrv_wg_timer_call;
145int nfsrv_wg_timer_on;
146
147/* globals for the active user list */
148uint32_t nfsrv_user_stat_enabled = 1;
149uint32_t nfsrv_user_stat_node_count = 0;
150uint32_t nfsrv_user_stat_max_idle_sec = NFSRV_USER_STAT_DEF_IDLE_SEC;
151uint32_t nfsrv_user_stat_max_nodes = NFSRV_USER_STAT_DEF_MAX_NODES;
152lck_grp_t *nfsrv_active_user_mutex_group;
153
154int nfsrv_wg_delay = NFSRV_WGATHERDELAY * 1000;
155int nfsrv_wg_delay_v3 = 0;
156
157int nfsrv_async = 0;
158
159int nfsrv_authorize(vnode_t,vnode_t,kauth_action_t,vfs_context_t,struct nfs_export_options*,int);
160int nfsrv_wg_coalesce(struct nfsrv_descript *, struct nfsrv_descript *);
161void nfsrv_modified(vnode_t, vfs_context_t);
162
163extern void IOSleep(int);
164extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
165
166/*
167 * Initialize the data structures for the server.
168 */
169
170#define NFSRV_NOT_INITIALIZED	0
171#define NFSRV_INITIALIZING	1
172#define NFSRV_INITIALIZED	2
173static volatile UInt32 nfsrv_initted = NFSRV_NOT_INITIALIZED;
174
175int
176nfsrv_is_initialized(void)
177{
178	return (nfsrv_initted == NFSRV_INITIALIZED);
179}
180
181void
182nfsrv_init(void)
183{
184	/* make sure we init only once */
185	if (!OSCompareAndSwap(NFSRV_NOT_INITIALIZED, NFSRV_INITIALIZING, &nfsrv_initted)) {
186		/* wait until initialization is complete */
187		while (!nfsrv_is_initialized())
188			IOSleep(500);
189		return;
190	}
191
192	if (sizeof (struct nfsrv_sock) > NFS_SVCALLOC)
193		printf("struct nfsrv_sock bloated (> %dbytes)\n",NFS_SVCALLOC);
194
195	/* init nfsd mutex */
196	nfsd_lck_grp = lck_grp_alloc_init("nfsd", LCK_GRP_ATTR_NULL);
197	nfsd_mutex = lck_mtx_alloc_init(nfsd_lck_grp, LCK_ATTR_NULL);
198
199	/* init slp rwlock */
200	nfsrv_slp_rwlock_group = lck_grp_alloc_init("nfsrv-slp-rwlock", LCK_GRP_ATTR_NULL);
201	nfsrv_slp_mutex_group  = lck_grp_alloc_init("nfsrv-slp-mutex", LCK_GRP_ATTR_NULL);
202
203	/* init export data structures */
204	LIST_INIT(&nfsrv_exports);
205	nfsrv_export_rwlock_group = lck_grp_alloc_init("nfsrv-export-rwlock", LCK_GRP_ATTR_NULL);
206	lck_rw_init(&nfsrv_export_rwlock, nfsrv_export_rwlock_group, LCK_ATTR_NULL);
207
208	/* init active user list mutex structures */
209	nfsrv_active_user_mutex_group = lck_grp_alloc_init("nfs-active-user-mutex", LCK_GRP_ATTR_NULL);
210
211	/* init nfs server request cache mutex */
212	nfsrv_reqcache_lck_grp = lck_grp_alloc_init("nfsrv_reqcache", LCK_GRP_ATTR_NULL);
213	nfsrv_reqcache_mutex = lck_mtx_alloc_init(nfsrv_reqcache_lck_grp, LCK_ATTR_NULL);
214
215#if CONFIG_FSE
216	/* init NFS server file modified event generation */
217	nfsrv_fmod_hashtbl = hashinit(NFSRVFMODHASHSZ, M_TEMP, &nfsrv_fmod_hash);
218	nfsrv_fmod_grp = lck_grp_alloc_init("nfsrv_fmod", LCK_GRP_ATTR_NULL);
219	nfsrv_fmod_mutex = lck_mtx_alloc_init(nfsrv_fmod_grp, LCK_ATTR_NULL);
220#endif
221
222	/* initialize NFS server timer callouts */
223#if CONFIG_FSE
224	nfsrv_fmod_timer_call = thread_call_allocate(nfsrv_fmod_timer, NULL);
225#endif
226	nfsrv_deadsock_timer_call = thread_call_allocate(nfsrv_deadsock_timer, NULL);
227	nfsrv_wg_timer_call = thread_call_allocate(nfsrv_wg_timer, NULL);
228
229	/* Init server data structures */
230	TAILQ_INIT(&nfsrv_socklist);
231	TAILQ_INIT(&nfsrv_sockwait);
232	TAILQ_INIT(&nfsrv_sockwork);
233	TAILQ_INIT(&nfsrv_deadsocklist);
234	TAILQ_INIT(&nfsrv_sockwg);
235	TAILQ_INIT(&nfsd_head);
236	TAILQ_INIT(&nfsd_queue);
237	nfsrv_udpsock = NULL;
238	nfsrv_udp6sock = NULL;
239
240	/* Setup the up-call handling */
241	nfsrv_uc_init();
242
243	/* initialization complete */
244	nfsrv_initted = NFSRV_INITIALIZED;
245}
246
247
248/*
249 *
250 * NFS version 2 and 3 server request processing functions
251 *
252 * These functions take the following parameters:
253 *
254 *      struct nfsrv_descript *nd - the NFS request descriptor
255 *      struct nfsrv_sock *slp    - the NFS socket the request came in on
256 *      vfs_context_t ctx         - VFS context
257 *      mbuf_t *mrepp             - pointer to hold the reply mbuf list
258 *
259 * These routines generally have 3 phases:
260 *
261 *   1 - break down and validate the RPC request in the mbuf chain
262 *       provided in nd->nd_nmreq.
263 *   2 - perform the vnode operations for the request
264 *       (many are very similar to syscalls in vfs_syscalls.c and
265 *       should therefore be kept in sync with those implementations)
266 *   3 - build the RPC reply in an mbuf chain (nmrep) and return the mbuf chain
267 *
268 */
269
270/*
271 * nfs v3 access service
272 */
273int
274nfsrv_access(
275	struct nfsrv_descript *nd,
276	struct nfsrv_sock *slp,
277	vfs_context_t ctx,
278	mbuf_t *mrepp)
279{
280	struct nfsm_chain *nmreq, nmrep;
281	vnode_t vp;
282	int error, attrerr;
283	struct vnode_attr vattr;
284	struct nfs_filehandle nfh;
285	u_int32_t nfsmode;
286	kauth_action_t testaction;
287	struct nfs_export *nx;
288	struct nfs_export_options *nxo;
289
290	error = 0;
291	attrerr = ENOENT;
292	nfsmode = 0;
293	nmreq = &nd->nd_nmreq;
294	nfsm_chain_null(&nmrep);
295	*mrepp = NULL;
296	vp = NULL;
297
298	nfsm_chain_get_fh_ptr(error, nmreq, NFS_VER3, nfh.nfh_fhp, nfh.nfh_len);
299	nfsm_chain_get_32(error, nmreq, nfsmode);
300	nfsmerr_if(error);
301	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
302	nfsmerr_if(error);
303
304	/* update export stats */
305	NFSStatAdd64(&nx->nx_stats.ops, 1);
306
307	/* update active user stats */
308	nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0);
309
310	error = nfsrv_credcheck(nd, ctx, nx, nxo);
311	nfsmerr_if(error);
312
313	/*
314	 * Each NFS mode bit is tested separately.
315	 *
316	 * XXX this code is nominally correct, but returns a pessimistic
317	 *     rather than optimistic result.  It will be necessary to add
318	 *     an NFS-specific interface to the vnode_authorize code to
319	 *     obtain good performance in the optimistic mode.
320	 */
321	if (nfsmode & NFS_ACCESS_READ) {
322		testaction = vnode_isdir(vp) ? KAUTH_VNODE_LIST_DIRECTORY : KAUTH_VNODE_READ_DATA;
323		if (nfsrv_authorize(vp, NULL, testaction, ctx, nxo, 0))
324			nfsmode &= ~NFS_ACCESS_READ;
325	}
326	if ((nfsmode & NFS_ACCESS_LOOKUP) &&
327	    (!vnode_isdir(vp) ||
328	    nfsrv_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx, nxo, 0)))
329		nfsmode &= ~NFS_ACCESS_LOOKUP;
330	if (nfsmode & NFS_ACCESS_MODIFY) {
331		if (vnode_isdir(vp)) {
332			testaction =
333			    KAUTH_VNODE_ADD_FILE |
334			    KAUTH_VNODE_ADD_SUBDIRECTORY |
335			    KAUTH_VNODE_DELETE_CHILD;
336		} else {
337			testaction =
338			    KAUTH_VNODE_WRITE_DATA;
339		}
340		if (nfsrv_authorize(vp, NULL, testaction, ctx, nxo, 0))
341			nfsmode &= ~NFS_ACCESS_MODIFY;
342	}
343	if (nfsmode & NFS_ACCESS_EXTEND) {
344		if (vnode_isdir(vp)) {
345			testaction =
346			    KAUTH_VNODE_ADD_FILE |
347			    KAUTH_VNODE_ADD_SUBDIRECTORY;
348		} else {
349			testaction =
350			    KAUTH_VNODE_WRITE_DATA |
351			    KAUTH_VNODE_APPEND_DATA;
352		}
353		if (nfsrv_authorize(vp, NULL, testaction, ctx, nxo, 0))
354			nfsmode &= ~NFS_ACCESS_EXTEND;
355	}
356
357	/*
358	 * Note concerning NFS_ACCESS_DELETE:
359	 * For hard links, the answer may be wrong if the vnode
360	 * has multiple parents with different permissions.
361	 * Also, some clients (e.g. MacOSX 10.3) may incorrectly
362	 * interpret the missing/cleared DELETE bit.
363	 * So we'll just leave the DELETE bit alone.  At worst,
364	 * we're telling the client it might be able to do
365	 * something it really can't.
366	 */
367
368	if ((nfsmode & NFS_ACCESS_EXECUTE) &&
369	    (vnode_isdir(vp) ||
370	    nfsrv_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, ctx, nxo, 0)))
371		nfsmode &= ~NFS_ACCESS_EXECUTE;
372
373	/* get postop attributes */
374	nfsm_srv_vattr_init(&vattr, NFS_VER3);
375	attrerr = vnode_getattr(vp, &vattr, ctx);
376
377nfsmerr:
378	/* assemble reply */
379	nd->nd_repstat = error;
380	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(NFS_VER3) + NFSX_UNSIGNED);
381	nfsmout_if(error);
382	*mrepp = nmrep.nmc_mhead;
383	nfsmout_on_status(nd, error);
384	nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &vattr);
385	if (!nd->nd_repstat)
386		nfsm_chain_add_32(error, &nmrep, nfsmode);
387nfsmout:
388	nfsm_chain_build_done(error, &nmrep);
389	if (vp)
390		vnode_put(vp);
391	if (error) {
392		nfsm_chain_cleanup(&nmrep);
393		*mrepp = NULL;
394	}
395	return (error);
396}
397
398/*
399 * nfs getattr service
400 */
401int
402nfsrv_getattr(
403	struct nfsrv_descript *nd,
404	struct nfsrv_sock *slp,
405	vfs_context_t ctx,
406	mbuf_t *mrepp)
407{
408	struct nfsm_chain *nmreq, nmrep;
409	struct vnode_attr vattr;
410	vnode_t vp;
411	int error;
412	struct nfs_filehandle nfh;
413	struct nfs_export *nx;
414	struct nfs_export_options *nxo;
415
416	error = 0;
417	nmreq = &nd->nd_nmreq;
418	nfsm_chain_null(&nmrep);
419	*mrepp = NULL;
420	vp = NULL;
421
422	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
423	nfsmerr_if(error);
424	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
425	nfsmerr_if(error);
426
427	/* update export stats */
428	NFSStatAdd64(&nx->nx_stats.ops, 1);
429
430	/* update active user stats */
431	nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0);
432
433	error = nfsrv_credcheck(nd, ctx, nx, nxo);
434	nfsmerr_if(error);
435
436	nfsm_srv_vattr_init(&vattr, nd->nd_vers);
437	error = vnode_getattr(vp, &vattr, ctx);
438	vnode_put(vp);
439	vp = NULL;
440
441nfsmerr:
442	/* assemble reply */
443	nd->nd_repstat = error;
444	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_FATTR(nd->nd_vers));
445	nfsmout_if(error);
446	*mrepp = nmrep.nmc_mhead;
447	nfsmout_if(nd->nd_repstat);
448	error = nfsm_chain_add_fattr(nd, &nmrep, &vattr);
449nfsmout:
450	nfsm_chain_build_done(error, &nmrep);
451	if (vp)
452		vnode_put(vp);
453	if (error) {
454		nfsm_chain_cleanup(&nmrep);
455		*mrepp = NULL;
456	}
457	return (error);
458}
459
460/*
461 * nfs setattr service
462 */
463int
464nfsrv_setattr(
465	struct nfsrv_descript *nd,
466	struct nfsrv_sock *slp,
467	vfs_context_t ctx,
468	mbuf_t *mrepp)
469{
470	struct nfsm_chain *nmreq, nmrep;
471	struct vnode_attr preattr, postattr;
472	struct vnode_attr vattr, *vap = &vattr;
473	vnode_t vp;
474	struct nfs_export *nx;
475	struct nfs_export_options *nxo;
476	int error, preattrerr, postattrerr, gcheck;
477	struct nfs_filehandle nfh;
478	struct timespec guard = { 0, 0 };
479	kauth_action_t action;
480	uid_t saved_uid;
481
482	error = 0;
483	preattrerr = postattrerr = ENOENT;
484	gcheck = 0;
485	nmreq = &nd->nd_nmreq;
486	nfsm_chain_null(&nmrep);
487	*mrepp = NULL;
488	vp = NULL;
489
490	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
491	nfsmerr_if(error);
492
493	VATTR_INIT(vap);
494	error = nfsm_chain_get_sattr(nd, nmreq, vap);
495	if (nd->nd_vers == NFS_VER3) {
496		nfsm_chain_get_32(error, nmreq, gcheck);
497		if (gcheck)
498			nfsm_chain_get_time(error, nmreq, nd->nd_vers, guard.tv_sec, guard.tv_nsec);
499	}
500	nfsmerr_if(error);
501
502	/*
503	 * Save the original credential UID in case they are
504	 * mapped and we need to map the IDs in the attributes.
505	 */
506	saved_uid = kauth_cred_getuid(nd->nd_cr);
507
508	/*
509	 * Now that we have all the fields, lets do it.
510	 */
511	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
512	nfsmerr_if(error);
513
514	/* update export stats */
515	NFSStatAdd64(&nx->nx_stats.ops, 1);
516
517	/* update active user stats */
518	nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0);
519
520	error = nfsrv_credcheck(nd, ctx, nx, nxo);
521	nfsmerr_if(error);
522
523	if (nd->nd_vers == NFS_VER3) {
524		nfsm_srv_pre_vattr_init(&preattr);
525		error = preattrerr = vnode_getattr(vp, &preattr, ctx);
526		if (!error && gcheck && VATTR_IS_SUPPORTED(&preattr, va_change_time) &&
527			(preattr.va_change_time.tv_sec != guard.tv_sec ||
528			 preattr.va_change_time.tv_nsec != guard.tv_nsec))
529			error = NFSERR_NOT_SYNC;
530		if (!preattrerr && !VATTR_ALL_SUPPORTED(&preattr))
531			preattrerr = ENOENT;
532		nfsmerr_if(error);
533	}
534
535	/*
536	 * If the credentials were mapped, we should
537	 * map the same values in the attributes.
538	 */
539	if ((vap->va_uid == saved_uid) && (kauth_cred_getuid(nd->nd_cr) != saved_uid)) {
540		int ismember;
541		VATTR_SET(vap, va_uid, kauth_cred_getuid(nd->nd_cr));
542		if (kauth_cred_ismember_gid(nd->nd_cr, vap->va_gid, &ismember) || !ismember)
543			VATTR_SET(vap, va_gid, kauth_cred_getgid(nd->nd_cr));
544	}
545
546	/* Authorize the attribute changes.  */
547	error = vnode_authattr(vp, vap, &action, ctx);
548	if (!error)
549		error = nfsrv_authorize(vp, NULL, action, ctx, nxo, 0);
550
551	/* set the new attributes */
552	if (!error)
553		error = vnode_setattr(vp, vap, ctx);
554
555	if (!error || (nd->nd_vers == NFS_VER3)) {
556		nfsm_srv_vattr_init(&postattr, nd->nd_vers);
557		postattrerr = vnode_getattr(vp, &postattr, ctx);
558		if (!error)
559			error = postattrerr;
560	}
561
562nfsmerr:
563	if (vp)
564		vnode_put(vp);
565
566	/* assemble reply */
567	nd->nd_repstat = error;
568	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_WCCORFATTR(nd->nd_vers));
569	nfsmout_if(error);
570	*mrepp = nmrep.nmc_mhead;
571	nfsmout_on_status(nd, error);
572	if (nd->nd_vers == NFS_VER3)
573		nfsm_chain_add_wcc_data(error, nd, &nmrep,
574			preattrerr, &preattr, postattrerr, &postattr);
575	else
576		error = nfsm_chain_add_fattr(nd, &nmrep, &postattr);
577nfsmout:
578	nfsm_chain_build_done(error, &nmrep);
579	if (error) {
580		nfsm_chain_cleanup(&nmrep);
581		*mrepp = NULL;
582	}
583	return (error);
584}
585
586/*
587 * nfs lookup rpc
588 */
589int
590nfsrv_lookup(
591	struct nfsrv_descript *nd,
592	struct nfsrv_sock *slp,
593	vfs_context_t ctx,
594	mbuf_t *mrepp)
595{
596	struct nameidata ni;
597	vnode_t vp, dirp = NULL;
598	struct nfs_filehandle dnfh, nfh;
599	struct nfs_export *nx = NULL;
600	struct nfs_export_options *nxo;
601	int error, attrerr, dirattrerr, isdotdot;
602	uint32_t len = 0;
603	uid_t saved_uid;
604	struct vnode_attr va, dirattr, *vap = &va;
605	struct nfsm_chain *nmreq, nmrep;
606
607	error = 0;
608	attrerr = dirattrerr = ENOENT;
609	nmreq = &nd->nd_nmreq;
610	nfsm_chain_null(&nmrep);
611	saved_uid = kauth_cred_getuid(nd->nd_cr);
612
613	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, dnfh.nfh_fhp, dnfh.nfh_len);
614	nfsm_chain_get_32(error, nmreq, len);
615	nfsm_name_len_check(error, nd, len);
616	nfsmerr_if(error);
617
618	ni.ni_cnd.cn_nameiop = LOOKUP;
619#if CONFIG_TRIGGERS
620	ni.ni_op = OP_LOOKUP;
621#endif
622	ni.ni_cnd.cn_flags = LOCKLEAF;
623	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
624	isdotdot = ((len == 2) && (ni.ni_cnd.cn_pnbuf[0] == '.') && (ni.ni_cnd.cn_pnbuf[1] == '.'));
625	if (!error) {
626		error = nfsrv_namei(nd, ctx, &ni, &dnfh, &dirp, &nx, &nxo);
627		if (nx != NULL) {
628			/* update export stats */
629			NFSStatAdd64(&nx->nx_stats.ops, 1);
630
631			/* update active user stats */
632			nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0);
633		}
634	}
635
636	if (dirp) {
637		if (nd->nd_vers == NFS_VER3) {
638			nfsm_srv_vattr_init(&dirattr, NFS_VER3);
639			dirattrerr = vnode_getattr(dirp, &dirattr, ctx);
640		}
641		vnode_put(dirp);
642	}
643	nfsmerr_if(error);
644
645	nameidone(&ni);
646
647	vp = ni.ni_vp;
648	error = nfsrv_vptofh(nx, nd->nd_vers, (isdotdot ? &dnfh : NULL), vp, ctx, &nfh);
649	if (!error) {
650		nfsm_srv_vattr_init(vap, nd->nd_vers);
651		attrerr = vnode_getattr(vp, vap, ctx);
652	}
653	vnode_put(vp);
654
655nfsmerr:
656	/* assemble reply */
657	nd->nd_repstat = error;
658	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_SRVFH(nd->nd_vers, &nfh) +
659			NFSX_POSTOPORFATTR(nd->nd_vers) + NFSX_POSTOPATTR(nd->nd_vers));
660	nfsmout_if(error);
661	*mrepp = nmrep.nmc_mhead;
662	if (nd->nd_repstat) {
663		if (nd->nd_vers == NFS_VER3)
664			nfsm_chain_add_postop_attr(error, nd, &nmrep, dirattrerr, &dirattr);
665		goto nfsmout;
666	}
667	nfsm_chain_add_fh(error, &nmrep, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
668	if (nd->nd_vers == NFS_VER3) {
669		nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, vap);
670		nfsm_chain_add_postop_attr(error, nd, &nmrep, dirattrerr, &dirattr);
671	} else if (!error) {
672		error = nfsm_chain_add_fattr(nd, &nmrep, vap);
673	}
674nfsmout:
675	nfsm_chain_build_done(error, &nmrep);
676	if (error) {
677		nfsm_chain_cleanup(&nmrep);
678		*mrepp = NULL;
679	}
680	return (error);
681}
682
683/*
684 * nfs readlink service
685 */
686int
687nfsrv_readlink(
688	struct nfsrv_descript *nd,
689	struct nfsrv_sock *slp,
690	vfs_context_t ctx,
691	mbuf_t *mrepp)
692{
693	int error, mpcnt, tlen, len, attrerr;
694	vnode_t vp;
695	struct vnode_attr vattr;
696	struct nfs_filehandle nfh;
697	struct nfs_export *nx;
698	struct nfs_export_options *nxo;
699	struct nfsm_chain *nmreq, nmrep;
700	mbuf_t mpath, mp;
701	uio_t auio = NULL;
702	char uio_buf[ UIO_SIZEOF(4) ];
703	char *uio_bufp = &uio_buf[0];
704	int uio_buflen = UIO_SIZEOF(4);
705
706	error = 0;
707	attrerr = ENOENT;
708	nmreq = &nd->nd_nmreq;
709	nfsm_chain_null(&nmrep);
710	mpath = NULL;
711	vp = NULL;
712	len = NFS_MAXPATHLEN;
713
714	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
715	nfsmerr_if(error);
716
717	/* get mbuf list to hold symlink path */
718	error = nfsm_mbuf_get_list(len, &mpath, &mpcnt);
719	nfsmerr_if(error);
720	if (mpcnt > 4) {
721		uio_buflen = UIO_SIZEOF(mpcnt);
722		MALLOC(uio_bufp, char*, uio_buflen, M_TEMP, M_WAITOK);
723		if (!uio_bufp)
724			error = ENOMEM;
725		nfsmerr_if(error);
726	}
727	auio = uio_createwithbuffer(mpcnt, 0, UIO_SYSSPACE, UIO_READ, uio_bufp, uio_buflen);
728	if (!auio)
729		error = ENOMEM;
730	nfsmerr_if(error);
731
732	for (mp = mpath; mp; mp = mbuf_next(mp))
733		uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(mp)), mbuf_len(mp));
734
735	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
736	nfsmerr_if(error);
737
738	/* update export stats */
739	NFSStatAdd64(&nx->nx_stats.ops, 1);
740
741	/* update active user stats */
742	nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0);
743
744	error = nfsrv_credcheck(nd, ctx, nx, nxo);
745	nfsmerr_if(error);
746
747	if (vnode_vtype(vp) != VLNK) {
748		if (nd->nd_vers == NFS_VER3)
749			error = EINVAL;
750		else
751			error = ENXIO;
752	}
753
754	if (!error)
755		error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx, nxo, 0);
756	if (!error)
757		error = VNOP_READLINK(vp, auio, ctx);
758	if (vp) {
759		if (nd->nd_vers == NFS_VER3) {
760			nfsm_srv_vattr_init(&vattr, NFS_VER3);
761			attrerr = vnode_getattr(vp, &vattr, ctx);
762		}
763		vnode_put(vp);
764		vp = NULL;
765	}
766	if (error) {
767		mbuf_freem(mpath);
768		mpath = NULL;
769	}
770
771nfsmerr:
772	/* assemble reply */
773	nd->nd_repstat = error;
774	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers) + NFSX_UNSIGNED);
775	nfsmout_if(error);
776	*mrepp = nmrep.nmc_mhead;
777	nfsmout_on_status(nd, error);
778	if (nd->nd_vers == NFS_VER3)
779		nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &vattr);
780	if (error || nd->nd_repstat) {
781		nfsm_chain_build_done(error, &nmrep);
782		goto nfsmout;
783	}
784	if (auio && (uio_resid(auio) > 0)) {
785		len -= uio_resid(auio);
786		tlen = nfsm_rndup(len);
787		nfsm_adj(mpath, NFS_MAXPATHLEN-tlen, tlen-len);
788	}
789	nfsm_chain_add_32(error, &nmrep, len);
790	nfsm_chain_build_done(error, &nmrep);
791	nfsmout_if(error);
792	error = mbuf_setnext(nmrep.nmc_mcur, mpath);
793	if (!error)
794		mpath = NULL;
795nfsmout:
796	if (vp)
797		vnode_put(vp);
798	if (mpath)
799		mbuf_freem(mpath);
800	if (uio_bufp != &uio_buf[0])
801		FREE(uio_bufp, M_TEMP);
802	if (error) {
803		nfsm_chain_cleanup(&nmrep);
804		*mrepp = NULL;
805	}
806	return (error);
807}
808
809/*
810 * nfs read service
811 */
812int
813nfsrv_read(
814	struct nfsrv_descript *nd,
815	struct nfsrv_sock *slp,
816	vfs_context_t ctx,
817	mbuf_t *mrepp)
818{
819	int error, attrerr, mreadcnt;
820	uint32_t reqlen, maxlen, count, len, tlen, left;
821	mbuf_t mread, m;
822	vnode_t vp;
823	struct nfs_filehandle nfh;
824	struct nfs_export *nx;
825	struct nfs_export_options *nxo;
826	uio_t auio = NULL;
827	char *uio_bufp = NULL;
828	struct vnode_attr vattr, *vap = &vattr;
829	off_t off;
830	uid_t saved_uid;
831	char uio_buf[ UIO_SIZEOF(0) ];
832	struct nfsm_chain *nmreq, nmrep;
833
834	error = 0;
835	attrerr = ENOENT;
836	nmreq = &nd->nd_nmreq;
837	nfsm_chain_null(&nmrep);
838	mread = NULL;
839	vp = NULL;
840	len = reqlen = 0;
841	saved_uid = kauth_cred_getuid(nd->nd_cr);
842
843	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
844	nfsmerr_if(error);
845	if (nd->nd_vers == NFS_VER3)
846		nfsm_chain_get_64(error, nmreq, off);
847	else
848		nfsm_chain_get_32(error, nmreq, off);
849	nfsm_chain_get_32(error, nmreq, reqlen);
850	maxlen = NFSRV_NDMAXDATA(nd);
851	if (reqlen > maxlen)
852		reqlen = maxlen;
853	nfsmerr_if(error);
854	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
855	nfsmerr_if(error);
856
857	/* update export stats */
858	NFSStatAdd64(&nx->nx_stats.ops, 1);
859
860	error = nfsrv_credcheck(nd, ctx, nx, nxo);
861	nfsmerr_if(error);
862
863	if (vnode_vtype(vp) != VREG) {
864		if (nd->nd_vers == NFS_VER3)
865			error = EINVAL;
866		else
867			error = (vnode_vtype(vp) == VDIR) ? EISDIR : EACCES;
868	}
869
870	if (!error) {
871	    if ((error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx, nxo, 1)))
872		error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, ctx, nxo, 1);
873	}
874	nfsm_srv_vattr_init(vap, nd->nd_vers);
875	attrerr = vnode_getattr(vp, vap, ctx);
876	if (!error)
877		error = attrerr;
878	nfsmerr_if(error);
879
880	if ((u_quad_t)off >= vap->va_data_size)
881		count = 0;
882	else if (((u_quad_t)off + reqlen) > vap->va_data_size)
883		count = nfsm_rndup(vap->va_data_size - off);
884	else
885		count = reqlen;
886
887	len = left = count;
888	if (count > 0) {
889		/* get mbuf list to hold read data */
890		error = nfsm_mbuf_get_list(count, &mread, &mreadcnt);
891		nfsmerr_if(error);
892		MALLOC(uio_bufp, char *, UIO_SIZEOF(mreadcnt), M_TEMP, M_WAITOK);
893		if (uio_bufp)
894			auio = uio_createwithbuffer(mreadcnt, off, UIO_SYSSPACE,
895					UIO_READ, uio_bufp, UIO_SIZEOF(mreadcnt));
896		if (!uio_bufp || !auio) {
897			error = ENOMEM;
898			goto errorexit;
899		}
900		for (m = mread; m; m = mbuf_next(m))
901			uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), mbuf_len(m));
902		error = VNOP_READ(vp, auio, IO_NODELOCKED, ctx);
903	} else {
904		auio = uio_createwithbuffer(0, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
905		if (!auio) {
906			error = ENOMEM;
907			goto errorexit;
908		}
909	}
910
911errorexit:
912	if (!error || (nd->nd_vers == NFS_VER3)) {
913		nfsm_srv_vattr_init(vap, nd->nd_vers);
914		attrerr = vnode_getattr(vp, vap, ctx);
915		if (!error && (nd->nd_vers == NFS_VER2))
916			error = attrerr; /* NFSv2 must have attributes to return */
917	}
918	nfsmerr_if(error);
919
920	vnode_put(vp);
921	vp = NULL;
922
923	/* trim off any data not actually read */
924	len -= uio_resid(auio);
925	tlen = nfsm_rndup(len);
926	if (count != tlen || tlen != len)
927		nfsm_adj(mread, count - tlen, tlen - len);
928
929nfsmerr:
930	/* assemble reply */
931	nd->nd_repstat = error;
932	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPORFATTR(nd->nd_vers) + 3 * NFSX_UNSIGNED);
933	nfsmout_if(error);
934	*mrepp = nmrep.nmc_mhead;
935	nfsmout_on_status(nd, error);
936	if (nd->nd_vers == NFS_VER3)
937		nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, vap);
938	if (error || nd->nd_repstat) {
939		nfsm_chain_build_done(error, &nmrep);
940		goto nfsmout;
941	}
942	if (nd->nd_vers == NFS_VER3) {
943		nfsm_chain_add_32(error, &nmrep, len);
944		nfsm_chain_add_32(error, &nmrep, (len < reqlen) ? TRUE : FALSE);
945	} else {
946		error = nfsm_chain_add_fattr(nd, &nmrep, vap);
947	}
948	nfsm_chain_add_32(error, &nmrep, len);
949	nfsm_chain_build_done(error, &nmrep);
950	nfsmout_if(error);
951	error = mbuf_setnext(nmrep.nmc_mcur, mread);
952	if (!error)
953		mread = NULL;
954
955	/* update export stats */
956	NFSStatAdd64(&nx->nx_stats.bytes_read, len);
957
958	/* update active user stats */
959	nfsrv_update_user_stat(nx, nd, saved_uid, 1, len, 0);
960nfsmout:
961	if (vp)
962		vnode_put(vp);
963	if (mread)
964		mbuf_freem(mread);
965	if (uio_bufp != NULL)
966		FREE(uio_bufp, M_TEMP);
967	if (error) {
968		nfsm_chain_cleanup(&nmrep);
969		*mrepp = NULL;
970	}
971	return (error);
972}
973
974#if CONFIG_FSE
975/*
976 * NFS File modification reporting
977 *
978 * When the contents of a file are changed, a "content modified"
979 * fsevent needs to be issued.  Normally this would be done at
980 * file close time.  This is difficult for NFS because the protocol
981 * has no "close" operation.  The client sends a stream of write
982 * requests that just stop.  So we keep a hash table full of
983 * vnodes that have been written to recently, and issue a
984 * "content modified" fsevent only if there are no writes to
985 * a vnode for nfsrv_fmod_pendtime milliseconds.
986 */
987int nfsrv_fmod_pending;		/* count of vnodes being written to */
988int nfsrv_fmod_pendtime = 1000;	/* msec to wait */
989int nfsrv_fmod_min_interval = 100;	/* msec min interval between callbacks */
990
991/*
992 * This function is called via the kernel's callout
993 * mechanism.  Calls are made only when there are
994 * vnodes pending a fsevent creation, and no more
995 * frequently than every nfsrv_fmod_min_interval ms.
996 */
997void
998nfsrv_fmod_timer(__unused void *param0, __unused void *param1)
999{
1000	struct nfsrv_fmod_hashhead *headp, firehead;
1001	struct nfsrv_fmod *fp, *nfp, *pfp;
1002	uint64_t timenow, next_deadline;
1003	int interval = 0, i, fmod_fire;
1004
1005	LIST_INIT(&firehead);
1006	lck_mtx_lock(nfsrv_fmod_mutex);
1007again:
1008	clock_get_uptime(&timenow);
1009	clock_interval_to_deadline(nfsrv_fmod_pendtime, 1000 * 1000,
1010		&next_deadline);
1011
1012	/*
1013	 * Scan all the hash chains
1014	 */
1015	fmod_fire = 0;
1016	for (i = 0; i < NFSRVFMODHASHSZ; i++) {
1017		/*
1018		 * For each hash chain, look for an entry
1019		 * that has exceeded the deadline.
1020		 */
1021		headp = &nfsrv_fmod_hashtbl[i];
1022		LIST_FOREACH(fp, headp, fm_link) {
1023			if (timenow >= fp->fm_deadline)
1024				break;
1025			if (fp->fm_deadline < next_deadline)
1026				next_deadline = fp->fm_deadline;
1027		}
1028
1029		/*
1030		 * If we have an entry that's exceeded the
1031		 * deadline, then the same is true for all
1032		 * following entries in the chain, since they're
1033		 * sorted in time order.
1034		 */
1035		pfp = NULL;
1036		while (fp) {
1037			/* move each entry to the fire list */
1038			nfp = LIST_NEXT(fp, fm_link);
1039			LIST_REMOVE(fp, fm_link);
1040			fmod_fire++;
1041			if (pfp)
1042				LIST_INSERT_AFTER(pfp, fp, fm_link);
1043			else
1044				LIST_INSERT_HEAD(&firehead, fp, fm_link);
1045			pfp = fp;
1046			fp = nfp;
1047		}
1048	}
1049
1050	if (fmod_fire) {
1051		lck_mtx_unlock(nfsrv_fmod_mutex);
1052		/*
1053		 * Fire off the content modified fsevent for each
1054		 * entry and free it.
1055		 */
1056		LIST_FOREACH_SAFE(fp, &firehead, fm_link, nfp) {
1057			if (nfsrv_fsevents_enabled) {
1058				fp->fm_context.vc_thread = current_thread();
1059				add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
1060					FSE_ARG_VNODE, fp->fm_vp,
1061					FSE_ARG_DONE);
1062			}
1063			vnode_put(fp->fm_vp);
1064			kauth_cred_unref(&fp->fm_context.vc_ucred);
1065			LIST_REMOVE(fp, fm_link);
1066			FREE(fp, M_TEMP);
1067		}
1068		lck_mtx_lock(nfsrv_fmod_mutex);
1069		nfsrv_fmod_pending -= fmod_fire;
1070		goto again;
1071	}
1072
1073	/*
1074	 * If there are still pending entries, set up another
1075	 * callout to handle them later. Set the timeout deadline
1076	 * so that the callout happens when the oldest pending
1077	 * entry is ready to send its fsevent.
1078	 */
1079	if (nfsrv_fmod_pending > 0) {
1080		interval = (next_deadline - timenow) / (1000 * 1000);
1081		if (interval < nfsrv_fmod_min_interval)
1082			interval = nfsrv_fmod_min_interval;
1083	}
1084
1085	nfsrv_fmod_timer_on = interval > 0;
1086	if (nfsrv_fmod_timer_on)
1087		nfs_interval_timer_start(nfsrv_fmod_timer_call, interval);
1088
1089	lck_mtx_unlock(nfsrv_fmod_mutex);
1090}
1091
1092/*
1093 * When a vnode has been written to, enter it in the hash
1094 * table of vnodes pending creation of an fsevent. If the
1095 * callout timer isn't already running, schedule a callback
1096 * for nfsrv_fmod_pendtime msec from now.
1097 */
1098void
1099nfsrv_modified(vnode_t vp, vfs_context_t ctx)
1100{
1101	uint64_t deadline;
1102	struct nfsrv_fmod *fp;
1103	struct nfsrv_fmod_hashhead *head;
1104
1105	lck_mtx_lock(nfsrv_fmod_mutex);
1106
1107	/*
1108	 * Compute the time in the future when the
1109	 * content modified fsevent is to be issued.
1110	 */
1111	clock_interval_to_deadline(nfsrv_fmod_pendtime, 1000 * 1000, &deadline);
1112
1113	/*
1114	 * Check if there's already a file content change fsevent
1115	 * pending for this vnode.  If there is, update its
1116	 * timestamp and make sure it's at the front of the hash chain.
1117	 */
1118	head = &nfsrv_fmod_hashtbl[NFSRVFMODHASH(vp)];
1119	LIST_FOREACH(fp, head, fm_link) {
1120		if (vp == fp->fm_vp) {
1121			fp->fm_deadline = deadline;
1122			if (fp != LIST_FIRST(head)) {
1123				LIST_REMOVE(fp, fm_link);
1124				LIST_INSERT_HEAD(head, fp, fm_link);
1125			}
1126			lck_mtx_unlock(nfsrv_fmod_mutex);
1127			return;
1128		}
1129	}
1130
1131	/*
1132	 * First content change fsevent for this vnode.
1133	 * Allocate a new file mod entry and add it
1134	 * on the front of the hash chain.
1135	 */
1136	if (vnode_get(vp) != 0)
1137		goto done;
1138	MALLOC(fp, struct nfsrv_fmod *, sizeof(*fp), M_TEMP, M_WAITOK);
1139	if (fp == NULL) {
1140		vnode_put(vp);
1141		goto done;
1142	}
1143	fp->fm_vp = vp;
1144	kauth_cred_ref(vfs_context_ucred(ctx));
1145	fp->fm_context = *ctx;
1146	fp->fm_deadline = deadline;
1147	LIST_INSERT_HEAD(head, fp, fm_link);
1148
1149	/*
1150	 * If added to an empty hash table, then set the
1151	 * callout timer to go off after nfsrv_fmod_pendtime.
1152	 */
1153	nfsrv_fmod_pending++;
1154	if (!nfsrv_fmod_timer_on) {
1155		nfsrv_fmod_timer_on = 1;
1156		nfs_interval_timer_start(nfsrv_fmod_timer_call,
1157			nfsrv_fmod_pendtime);
1158	}
1159done:
1160	lck_mtx_unlock(nfsrv_fmod_mutex);
1161	return;
1162}
1163#endif /* CONFIG_FSE */
1164
1165/*
1166 * nfs write service
1167 */
1168int
1169nfsrv_write(
1170	struct nfsrv_descript *nd,
1171	struct nfsrv_sock *slp,
1172	vfs_context_t ctx,
1173	mbuf_t *mrepp)
1174{
1175	struct vnode_attr preattr, postattr;
1176	int error, preattrerr, postattrerr;
1177	int ioflags, len, retlen;
1178	int mlen, mcount;
1179	int stable = NFS_WRITE_FILESYNC;
1180	mbuf_t m;
1181	vnode_t vp;
1182	struct nfs_filehandle nfh;
1183	struct nfs_export *nx;
1184	struct nfs_export_options *nxo;
1185	uio_t auio = NULL;
1186	char *uio_bufp = NULL;
1187	off_t off;
1188	uid_t saved_uid;
1189	struct nfsm_chain *nmreq, nmrep;
1190
1191	if (nd->nd_nmreq.nmc_mhead == NULL) {
1192		*mrepp = NULL;
1193		return (0);
1194	}
1195
1196	error = 0;
1197	preattrerr = postattrerr = ENOENT;
1198	saved_uid = kauth_cred_getuid(nd->nd_cr);
1199	nmreq = &nd->nd_nmreq;
1200	nfsm_chain_null(&nmrep);
1201	vp = NULL;
1202	len = retlen = 0;
1203
1204	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
1205	nfsmerr_if(error);
1206	if (nd->nd_vers == NFS_VER3) {
1207		nfsm_chain_get_64(error, nmreq, off);
1208		nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
1209		nfsm_chain_get_32(error, nmreq, stable);
1210	} else {
1211		nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
1212		nfsm_chain_get_32(error, nmreq, off);
1213		nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
1214		if (nfsrv_async)
1215	    		stable = NFS_WRITE_UNSTABLE;
1216	}
1217	nfsm_chain_get_32(error, nmreq, len);
1218	nfsmerr_if(error);
1219	retlen = len;
1220
1221	/*
1222	 * For NFS Version 2, it is not obvious what a write of zero length
1223	 * should do, but I might as well be consistent with Version 3,
1224	 * which is to return ok so long as there are no permission problems.
1225	 */
1226
1227	if (len > 0) {
1228		error = nfsm_chain_trim_data(nmreq, len, &mlen);
1229		nfsmerr_if(error);
1230	} else {
1231		mlen = 0;
1232	}
1233	if ((len > NFSRV_MAXDATA) || (len < 0) || (mlen < len)) {
1234		error = EIO;
1235		goto nfsmerr;
1236	}
1237	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
1238	nfsmerr_if(error);
1239
1240	/* update export stats */
1241	NFSStatAdd64(&nx->nx_stats.ops, 1);
1242
1243	error = nfsrv_credcheck(nd, ctx, nx, nxo);
1244	nfsmerr_if(error);
1245
1246	if (nd->nd_vers == NFS_VER3) {
1247		nfsm_srv_pre_vattr_init(&preattr);
1248		preattrerr = vnode_getattr(vp, &preattr, ctx);
1249	}
1250	if (vnode_vtype(vp) != VREG) {
1251		if (nd->nd_vers == NFS_VER3)
1252			error = EINVAL;
1253		else
1254			error = (vnode_vtype(vp) == VDIR) ? EISDIR : EACCES;
1255	}
1256	if (!error)
1257		error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx, nxo, 1);
1258	nfsmerr_if(error);
1259
1260	if (len > 0) {
1261		for (mcount=0, m=nmreq->nmc_mcur; m; m = mbuf_next(m))
1262			if (mbuf_len(m) > 0)
1263				mcount++;
1264		MALLOC(uio_bufp, char *, UIO_SIZEOF(mcount), M_TEMP, M_WAITOK);
1265		if (uio_bufp)
1266			auio = uio_createwithbuffer(mcount, off, UIO_SYSSPACE, UIO_WRITE, uio_bufp, UIO_SIZEOF(mcount));
1267		if (!uio_bufp || !auio)
1268			error = ENOMEM;
1269		nfsmerr_if(error);
1270		for (m = nmreq->nmc_mcur; m; m = mbuf_next(m))
1271			if ((mlen = mbuf_len(m)) > 0)
1272				uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), mlen);
1273		/*
1274		 * XXX The IO_METASYNC flag indicates that all metadata (and not just
1275		 * enough to ensure data integrity) mus be written to stable storage
1276		 * synchronously.  (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1277		 */
1278		if (stable == NFS_WRITE_UNSTABLE)
1279			ioflags = IO_NODELOCKED;
1280		else if (stable == NFS_WRITE_DATASYNC)
1281			ioflags = (IO_SYNC | IO_NODELOCKED);
1282		else
1283			ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1284
1285		error = VNOP_WRITE(vp, auio, ioflags, ctx);
1286		OSAddAtomic64(1, &nfsstats.srvvop_writes);
1287
1288		/* update export stats */
1289		NFSStatAdd64(&nx->nx_stats.bytes_written, len);
1290
1291		/* update active user stats */
1292		nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, len);
1293
1294#if CONFIG_FSE
1295		if (nfsrv_fsevents_enabled && !error && need_fsevent(FSE_CONTENT_MODIFIED, vp))
1296			nfsrv_modified(vp, ctx);
1297#endif
1298	}
1299	nfsm_srv_vattr_init(&postattr, nd->nd_vers);
1300	postattrerr = vnode_getattr(vp, &postattr, ctx);
1301	if (!error && (nd->nd_vers == NFS_VER2))
1302		error = postattrerr; /* NFSv2 must have attributes to return */
1303	vnode_put(vp);
1304	vp = NULL;
1305
1306nfsmerr:
1307	/* assemble reply */
1308	nd->nd_repstat = error;
1309	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_PREOPATTR(nd->nd_vers) +
1310			NFSX_POSTOPORFATTR(nd->nd_vers) + 2 * NFSX_UNSIGNED +
1311			NFSX_WRITEVERF(nd->nd_vers));
1312	nfsmout_if(error);
1313	*mrepp = nmrep.nmc_mhead;
1314	nfsmout_on_status(nd, error);
1315	if (nd->nd_vers == NFS_VER3) {
1316		nfsm_chain_add_wcc_data(error, nd, &nmrep,
1317			preattrerr, &preattr, postattrerr, &postattr);
1318		nfsmout_if(error || nd->nd_repstat);
1319		nfsm_chain_add_32(error, &nmrep, retlen);
1320		/* If nfsrv_async is set, then pretend the write was FILESYNC. */
1321		if ((stable == NFS_WRITE_UNSTABLE) && !nfsrv_async)
1322			nfsm_chain_add_32(error, &nmrep, stable);
1323		else
1324			nfsm_chain_add_32(error, &nmrep, NFS_WRITE_FILESYNC);
1325		/* write verifier */
1326		nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_sec);
1327		nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_usec);
1328	} else {
1329		error = nfsm_chain_add_fattr(nd, &nmrep, &postattr);
1330	}
1331nfsmout:
1332	nfsm_chain_build_done(error, &nmrep);
1333	if (vp)
1334		vnode_put(vp);
1335	if (uio_bufp != NULL)
1336		FREE(uio_bufp, M_TEMP);
1337	if (error) {
1338		nfsm_chain_cleanup(&nmrep);
1339		*mrepp = NULL;
1340	}
1341	return (error);
1342}
1343
1344/*
1345 * NFS write service with write gathering support. Called when
1346 * nfsrv_wg_delay > 0.
1347 * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1348 * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1349 * Jan. 1994.
1350 */
1351
1352#define	NWDELAYHASH(sock, f) \
1353	(&(sock)->ns_wdelayhashtbl[(*((u_int32_t *)(f))) % NFS_WDELAYHASHSIZ])
1354/* These macros compare nfsrv_descript structures.  */
1355#define NFSW_CONTIG(o, n) \
1356		(((o)->nd_eoff >= (n)->nd_off) && nfsrv_fhmatch(&(o)->nd_fh, &(n)->nd_fh))
1357/*
1358 * XXX The following is an incorrect comparison; it fails to take into account
1359 * XXX scoping of MAC labels, but we currently lack KPI for credential
1360 * XXX comparisons.
1361 */
1362#define NFSW_SAMECRED(o, n) \
1363	(!bcmp((caddr_t)(o)->nd_cr, (caddr_t)(n)->nd_cr, \
1364		sizeof (struct ucred)))
1365
1366int
1367nfsrv_writegather(
1368	struct nfsrv_descript **ndp,
1369	struct nfsrv_sock *slp,
1370	vfs_context_t ctx,
1371	mbuf_t *mrepp)
1372{
1373	struct nfsrv_descript *nd, *wp, *owp, *swp;
1374	struct nfs_export *nx;
1375	struct nfs_export_options *nxo;
1376	struct nfsrv_wg_delayhash *wpp;
1377	uid_t saved_uid;
1378	struct vnode_attr preattr, postattr;
1379	int error, mlen, i, ioflags, tlen;
1380	int preattrerr, postattrerr;
1381	vnode_t vp;
1382	mbuf_t m;
1383	uio_t auio = NULL;
1384	char *uio_bufp = NULL;
1385	u_quad_t cur_usec;
1386	struct timeval now;
1387	struct nfsm_chain *nmreq, nmrep;
1388
1389	error = 0;
1390	preattrerr = postattrerr = ENOENT;
1391	nfsm_chain_null(&nmrep);
1392	vp = NULL;
1393
1394	*mrepp = NULL;
1395	if (*ndp) {
1396	    nd = *ndp;
1397	    *ndp = NULL;
1398	    nmreq = &nd->nd_nmreq;
1399	    LIST_INIT(&nd->nd_coalesce);
1400	    nd->nd_mrep = NULL;
1401	    nd->nd_stable = NFS_WRITE_FILESYNC;
1402	    microuptime(&now);
1403	    cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
1404	    nd->nd_time = cur_usec +
1405		((nd->nd_vers == NFS_VER3) ? nfsrv_wg_delay_v3 : nfsrv_wg_delay);
1406
1407	    /* Now, get the write header... */
1408	    nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nd->nd_fh.nfh_fhp, nd->nd_fh.nfh_len);
1409	    /* XXX shouldn't we be checking for invalid FHs before doing any more work? */
1410	    nfsmerr_if(error);
1411	    if (nd->nd_vers == NFS_VER3) {
1412		    nfsm_chain_get_64(error, nmreq, nd->nd_off);
1413		    nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
1414		    nfsm_chain_get_32(error, nmreq, nd->nd_stable);
1415	    } else {
1416		    nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
1417		    nfsm_chain_get_32(error, nmreq, nd->nd_off);
1418		    nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
1419		    if (nfsrv_async)
1420			    nd->nd_stable = NFS_WRITE_UNSTABLE;
1421	    }
1422	    nfsm_chain_get_32(error, nmreq, nd->nd_len);
1423	    nfsmerr_if(error);
1424	    nd->nd_eoff = nd->nd_off + nd->nd_len;
1425
1426	    if (nd->nd_len > 0) {
1427		    error = nfsm_chain_trim_data(nmreq, nd->nd_len, &mlen);
1428		    nfsmerr_if(error);
1429	    } else {
1430		mlen = 0;
1431	    }
1432
1433	    if ((nd->nd_len > NFSRV_MAXDATA) || (nd->nd_len < 0)  || (mlen < nd->nd_len)) {
1434		error = EIO;
1435nfsmerr:
1436		nd->nd_repstat = error;
1437		error = nfsrv_rephead(nd, slp, &nmrep, NFSX_WCCDATA(nd->nd_vers));
1438		if (!error) {
1439		    nd->nd_mrep = nmrep.nmc_mhead;
1440		    if (nd->nd_vers == NFS_VER3)
1441			nfsm_chain_add_wcc_data(error, nd, &nmrep,
1442				preattrerr, &preattr, postattrerr, &postattr);
1443		}
1444		nfsm_chain_build_done(error, &nmrep);
1445		nd->nd_time = 1;
1446	    }
1447
1448	    /*
1449	     * Add this entry to the hash and time queues.
1450	     */
1451	    lck_mtx_lock(&slp->ns_wgmutex);
1452	    owp = NULL;
1453	    wp = slp->ns_tq.lh_first;
1454	    while (wp && wp->nd_time < nd->nd_time) {
1455		owp = wp;
1456		wp = wp->nd_tq.le_next;
1457	    }
1458	    if (owp) {
1459		LIST_INSERT_AFTER(owp, nd, nd_tq);
1460	    } else {
1461		LIST_INSERT_HEAD(&slp->ns_tq, nd, nd_tq);
1462	    }
1463	    if (!error) {
1464		wpp = NWDELAYHASH(slp, nd->nd_fh.nfh_fid);
1465		owp = NULL;
1466		wp = wpp->lh_first;
1467		while (wp && !nfsrv_fhmatch(&nd->nd_fh, &wp->nd_fh)) {
1468		    owp = wp;
1469		    wp = wp->nd_hash.le_next;
1470		}
1471		while (wp && (wp->nd_off < nd->nd_off) &&
1472		    nfsrv_fhmatch(&nd->nd_fh, &wp->nd_fh)) {
1473		    owp = wp;
1474		    wp = wp->nd_hash.le_next;
1475		}
1476		if (owp) {
1477		    LIST_INSERT_AFTER(owp, nd, nd_hash);
1478		    /*
1479		     * Search the hash list for overlapping entries and
1480		     * coalesce.
1481		     */
1482		    for(; nd && NFSW_CONTIG(owp, nd); nd = wp) {
1483			wp = nd->nd_hash.le_next;
1484			if (NFSW_SAMECRED(owp, nd))
1485			    nfsrv_wg_coalesce(owp, nd);
1486		    }
1487		} else {
1488		    LIST_INSERT_HEAD(wpp, nd, nd_hash);
1489		}
1490	    }
1491	} else {
1492	    lck_mtx_lock(&slp->ns_wgmutex);
1493	}
1494
1495	/*
1496	 * Now, do VNOP_WRITE()s for any one(s) that need to be done now
1497	 * and generate the associated reply mbuf list(s).
1498	 */
1499loop1:
1500	microuptime(&now);
1501	cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec;
1502	for (nd = slp->ns_tq.lh_first; nd; nd = owp) {
1503		owp = nd->nd_tq.le_next;
1504		if (nd->nd_time > cur_usec)
1505		    break;
1506		if (nd->nd_mrep)
1507		    continue;
1508		LIST_REMOVE(nd, nd_tq);
1509		LIST_REMOVE(nd, nd_hash);
1510		nmreq = &nd->nd_nmreq;
1511		preattrerr = postattrerr = ENOENT;
1512
1513		/* save the incoming uid before mapping, */
1514		/* for updating active user stats later */
1515		saved_uid = kauth_cred_getuid(nd->nd_cr);
1516
1517		error = nfsrv_fhtovp(&nd->nd_fh, nd, &vp, &nx, &nxo);
1518		if (!error) {
1519		    /* update per-export stats */
1520		    NFSStatAdd64(&nx->nx_stats.ops, 1);
1521
1522		    error = nfsrv_credcheck(nd, ctx, nx, nxo);
1523		    if (error)
1524		    	vnode_put(vp);
1525		}
1526		if (!error) {
1527		    if (nd->nd_vers == NFS_VER3) {
1528			nfsm_srv_pre_vattr_init(&preattr);
1529			preattrerr = vnode_getattr(vp, &preattr, ctx);
1530		    }
1531		    if (vnode_vtype(vp) != VREG) {
1532			if (nd->nd_vers == NFS_VER3)
1533			    error = EINVAL;
1534			else
1535			    error = (vnode_vtype(vp) == VDIR) ? EISDIR : EACCES;
1536		    }
1537		} else
1538		    vp = NULL;
1539		if (!error)
1540		    error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx, nxo, 1);
1541
1542		if (nd->nd_stable == NFS_WRITE_UNSTABLE)
1543		    ioflags = IO_NODELOCKED;
1544		else if (nd->nd_stable == NFS_WRITE_DATASYNC)
1545		    ioflags = (IO_SYNC | IO_NODELOCKED);
1546		else
1547		    ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1548
1549		if (!error && ((nd->nd_eoff - nd->nd_off) > 0)) {
1550		    for (i=0, m=nmreq->nmc_mhead; m; m = mbuf_next(m))
1551			if (mbuf_len(m) > 0)
1552			    i++;
1553
1554		    MALLOC(uio_bufp, char *, UIO_SIZEOF(i), M_TEMP, M_WAITOK);
1555		    if (uio_bufp)
1556			auio = uio_createwithbuffer(i, nd->nd_off, UIO_SYSSPACE,
1557						UIO_WRITE, uio_bufp, UIO_SIZEOF(i));
1558		    if (!uio_bufp || !auio)
1559			error = ENOMEM;
1560		    if (!error) {
1561			for (m = nmreq->nmc_mhead; m; m = mbuf_next(m))
1562			    if ((tlen = mbuf_len(m)) > 0)
1563				uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), tlen);
1564			error = VNOP_WRITE(vp, auio, ioflags, ctx);
1565			OSAddAtomic64(1, &nfsstats.srvvop_writes);
1566
1567			/* update export stats */
1568			NFSStatAdd64(&nx->nx_stats.bytes_written, nd->nd_len);
1569			/* update active user stats */
1570			nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, nd->nd_len);
1571
1572#if CONFIG_FSE
1573			if (nfsrv_fsevents_enabled && !error && need_fsevent(FSE_CONTENT_MODIFIED, vp))
1574				nfsrv_modified(vp, ctx);
1575#endif
1576		    }
1577		    if (uio_bufp) {
1578			FREE(uio_bufp, M_TEMP);
1579			uio_bufp = NULL;
1580		    }
1581		}
1582		if (vp) {
1583		    nfsm_srv_vattr_init(&postattr, nd->nd_vers);
1584		    postattrerr = vnode_getattr(vp, &postattr, ctx);
1585		    vnode_put(vp);
1586		}
1587
1588		/*
1589		 * Loop around generating replies for all write rpcs that have
1590		 * now been completed.
1591		 */
1592		swp = nd;
1593		do {
1594		    if (error) {
1595			nd->nd_repstat = error;
1596			error = nfsrv_rephead(nd, slp, &nmrep, NFSX_WCCDATA(nd->nd_vers));
1597			if (!error && (nd->nd_vers == NFS_VER3)) {
1598			    nfsm_chain_add_wcc_data(error, nd, &nmrep,
1599				    preattrerr, &preattr, postattrerr, &postattr);
1600			}
1601		    } else {
1602			nd->nd_repstat = error;
1603			error = nfsrv_rephead(nd, slp, &nmrep, NFSX_PREOPATTR(nd->nd_vers) +
1604			    NFSX_POSTOPORFATTR(nd->nd_vers) + 2 * NFSX_UNSIGNED +
1605			    NFSX_WRITEVERF(nd->nd_vers));
1606			if (!error && (nd->nd_vers == NFS_VER3)) {
1607			    nfsm_chain_add_wcc_data(error, nd, &nmrep,
1608				    preattrerr, &preattr, postattrerr, &postattr);
1609			    nfsm_chain_add_32(error, &nmrep, nd->nd_len);
1610			    nfsm_chain_add_32(error, &nmrep, nd->nd_stable);
1611			    /* write verifier */
1612			    nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_sec);
1613			    nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_usec);
1614			} else if (!error) {
1615			    error = nfsm_chain_add_fattr(nd, &nmrep, &postattr);
1616			}
1617		    }
1618		    nfsm_chain_build_done(error, &nmrep);
1619		    nfsmerr_if(error);
1620		    nd->nd_mrep = nmrep.nmc_mhead;
1621
1622		    /*
1623		     * Done. Put it at the head of the timer queue so that
1624		     * the final phase can return the reply.
1625		     */
1626		    if (nd != swp) {
1627			nd->nd_time = 1;
1628			LIST_INSERT_HEAD(&slp->ns_tq, nd, nd_tq);
1629		    }
1630		    nd = swp->nd_coalesce.lh_first;
1631		    if (nd) {
1632			LIST_REMOVE(nd, nd_tq);
1633		    }
1634		} while (nd);
1635		swp->nd_time = 1;
1636		LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1637		goto loop1;
1638	}
1639
1640	/*
1641	 * Search for a reply to return.
1642	 */
1643	for (nd = slp->ns_tq.lh_first; nd; nd = nd->nd_tq.le_next)
1644		if (nd->nd_mrep) {
1645		    LIST_REMOVE(nd, nd_tq);
1646		    *mrepp = nd->nd_mrep;
1647		    *ndp = nd;
1648		    break;
1649		}
1650	slp->ns_wgtime = slp->ns_tq.lh_first ? slp->ns_tq.lh_first->nd_time : 0;
1651	lck_mtx_unlock(&slp->ns_wgmutex);
1652
1653	/*
1654	 * If we've just created a write pending gather,
1655	 * start the timer to check on it soon to make sure
1656	 * the write will be completed.
1657	 *
1658	 * Add/Remove the socket in the nfsrv_sockwg queue as needed.
1659	 */
1660	lck_mtx_lock(nfsd_mutex);
1661	if (slp->ns_wgtime) {
1662		if (slp->ns_wgq.tqe_next == SLPNOLIST) {
1663			TAILQ_INSERT_HEAD(&nfsrv_sockwg, slp, ns_wgq);
1664		}
1665		if (!nfsrv_wg_timer_on) {
1666			nfsrv_wg_timer_on = 1;
1667			nfs_interval_timer_start(nfsrv_wg_timer_call,
1668				NFSRV_WGATHERDELAY);
1669		}
1670	} else if (slp->ns_wgq.tqe_next != SLPNOLIST) {
1671		TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
1672		slp->ns_wgq.tqe_next = SLPNOLIST;
1673	}
1674	lck_mtx_unlock(nfsd_mutex);
1675
1676	return (0);
1677}
1678
1679/*
1680 * Coalesce the write request nd into owp. To do this we must:
1681 * - remove nd from the queues
1682 * - merge nd->nd_nmreq into owp->nd_nmreq
1683 * - update the nd_eoff and nd_stable for owp
1684 * - put nd on owp's nd_coalesce list
1685 */
1686int
1687nfsrv_wg_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nd)
1688{
1689	int overlap, error;
1690	mbuf_t mp, mpnext;
1691	struct nfsrv_descript *p;
1692
1693	LIST_REMOVE(nd, nd_hash);
1694	LIST_REMOVE(nd, nd_tq);
1695	if (owp->nd_eoff < nd->nd_eoff) {
1696		overlap = owp->nd_eoff - nd->nd_off;
1697		if (overlap < 0)
1698			return (EIO);
1699		if (overlap > 0)
1700			mbuf_adj(nd->nd_nmreq.nmc_mhead, overlap);
1701		mp = owp->nd_nmreq.nmc_mhead;
1702		while ((mpnext = mbuf_next(mp)))
1703			mp = mpnext;
1704		error = mbuf_setnext(mp, nd->nd_nmreq.nmc_mhead);
1705		if (error)
1706			return (error);
1707		owp->nd_eoff = nd->nd_eoff;
1708	} else {
1709		mbuf_freem(nd->nd_nmreq.nmc_mhead);
1710	}
1711	nd->nd_nmreq.nmc_mhead = NULL;
1712	nd->nd_nmreq.nmc_mcur = NULL;
1713	if (nd->nd_stable == NFS_WRITE_FILESYNC)
1714		owp->nd_stable = NFS_WRITE_FILESYNC;
1715	else if ((nd->nd_stable == NFS_WRITE_DATASYNC) &&
1716		 (owp->nd_stable == NFS_WRITE_UNSTABLE))
1717		owp->nd_stable = NFS_WRITE_DATASYNC;
1718	LIST_INSERT_HEAD(&owp->nd_coalesce, nd, nd_tq);
1719
1720	/*
1721	 * If nd had anything else coalesced into it, transfer them
1722	 * to owp, otherwise their replies will never get sent.
1723	 */
1724	while ((p = nd->nd_coalesce.lh_first)) {
1725		LIST_REMOVE(p, nd_tq);
1726		LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1727	}
1728	return (0);
1729}
1730
1731/*
1732 * Scan the write gathering queues for writes that need to be
1733 * completed now.
1734 */
1735void
1736nfsrv_wg_timer(__unused void *param0, __unused void *param1)
1737{
1738	struct timeval now;
1739	uint64_t cur_usec, next_usec;
1740	int interval;
1741	struct nfsrv_sock *slp;
1742	int writes_pending = 0;
1743
1744	microuptime(&now);
1745	cur_usec = (uint64_t)now.tv_sec * 1000000 + (uint64_t)now.tv_usec;
1746	next_usec = cur_usec + (NFSRV_WGATHERDELAY * 1000);
1747
1748	lck_mtx_lock(nfsd_mutex);
1749	TAILQ_FOREACH(slp, &nfsrv_sockwg, ns_wgq) {
1750		if (slp->ns_wgtime) {
1751			writes_pending++;
1752			if (slp->ns_wgtime <= cur_usec) {
1753				lck_rw_lock_exclusive(&slp->ns_rwlock);
1754				slp->ns_flag |= SLP_DOWRITES;
1755				lck_rw_done(&slp->ns_rwlock);
1756				nfsrv_wakenfsd(slp);
1757				continue;
1758			}
1759			if (slp->ns_wgtime < next_usec)
1760				next_usec = slp->ns_wgtime;
1761		}
1762	}
1763
1764	if (writes_pending == 0) {
1765		nfsrv_wg_timer_on = 0;
1766		lck_mtx_unlock(nfsd_mutex);
1767		return;
1768	}
1769	lck_mtx_unlock(nfsd_mutex);
1770
1771	/*
1772	 * Return the number of msec to wait again
1773	 */
1774	interval = (next_usec - cur_usec) / 1000;
1775	if (interval < 1)
1776		interval = 1;
1777	nfs_interval_timer_start(nfsrv_wg_timer_call, interval);
1778}
1779
1780/*
1781 * Sort the group list in increasing numerical order.
1782 * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
1783 *  that used to be here.)
1784 */
1785void
1786nfsrv_group_sort(gid_t *list, int num)
1787{
1788	int i, j;
1789	gid_t v;
1790
1791	/* Insertion sort. */
1792	for (i = 1; i < num; i++) {
1793		v = list[i];
1794		/* find correct slot for value v, moving others up */
1795		for (j = i; --j >= 0 && v < list[j];)
1796			list[j + 1] = list[j];
1797		list[j + 1] = v;
1798	}
1799}
1800
1801/*
1802 * nfs create service
1803 * now does a truncate to 0 length via. setattr if it already exists
1804 */
1805int
1806nfsrv_create(
1807	struct nfsrv_descript *nd,
1808	struct nfsrv_sock *slp,
1809	vfs_context_t ctx,
1810	mbuf_t *mrepp)
1811{
1812	struct vnode_attr dpreattr, dpostattr, postattr;
1813	struct vnode_attr va, *vap = &va;
1814	struct nameidata ni;
1815	int error, rdev, dpreattrerr, dpostattrerr, postattrerr;
1816	int how, exclusive_flag;
1817	uint32_t len = 0, cnflags;
1818	vnode_t vp, dvp, dirp;
1819	struct nfs_filehandle nfh;
1820	struct nfs_export *nx = NULL;
1821	struct nfs_export_options *nxo;
1822	u_quad_t tempsize;
1823	u_char cverf[NFSX_V3CREATEVERF];
1824	uid_t saved_uid;
1825	struct nfsm_chain *nmreq, nmrep;
1826
1827	error = 0;
1828	dpreattrerr = dpostattrerr = postattrerr = ENOENT;
1829	nmreq = &nd->nd_nmreq;
1830	nfsm_chain_null(&nmrep);
1831	vp = dvp = dirp = NULL;
1832	exclusive_flag = 0;
1833	ni.ni_cnd.cn_nameiop = 0;
1834	rdev = 0;
1835
1836	saved_uid = kauth_cred_getuid(nd->nd_cr);
1837
1838	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
1839	nfsm_chain_get_32(error, nmreq, len);
1840	nfsm_name_len_check(error, nd, len);
1841	nfsmerr_if(error);
1842
1843	ni.ni_cnd.cn_nameiop = CREATE;
1844#if CONFIG_TRIGGERS
1845	ni.ni_op = OP_LINK;
1846#endif
1847	ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
1848	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
1849	if (!error) {
1850		error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo);
1851		if (nx != NULL) {
1852			/* update export stats */
1853			NFSStatAdd64(&nx->nx_stats.ops, 1);
1854
1855			/* update active user stats */
1856			nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0);
1857		}
1858	}
1859	if (dirp) {
1860		if (nd->nd_vers == NFS_VER3) {
1861			nfsm_srv_pre_vattr_init(&dpreattr);
1862			dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx);
1863		} else {
1864			vnode_put(dirp);
1865			dirp = NULL;
1866		}
1867	}
1868
1869	if (error) {
1870		ni.ni_cnd.cn_nameiop = 0;
1871		goto nfsmerr;
1872	}
1873
1874	dvp = ni.ni_dvp;
1875	vp = ni.ni_vp;
1876	VATTR_INIT(vap);
1877
1878	if (nd->nd_vers == NFS_VER3) {
1879		nfsm_chain_get_32(error, nmreq, how);
1880		nfsmerr_if(error);
1881		switch (how) {
1882		case NFS_CREATE_GUARDED:
1883			if (vp) {
1884				error = EEXIST;
1885				break;
1886			}
1887		case NFS_CREATE_UNCHECKED:
1888			error = nfsm_chain_get_sattr(nd, nmreq, vap);
1889			break;
1890		case NFS_CREATE_EXCLUSIVE:
1891			nfsm_chain_get_opaque(error, nmreq, NFSX_V3CREATEVERF, cverf);
1892			exclusive_flag = 1;
1893			if (vp == NULL)
1894				VATTR_SET(vap, va_mode, 0);
1895			break;
1896		};
1897		VATTR_SET(vap, va_type, VREG);
1898	} else {
1899	        enum vtype v_type;
1900
1901		error = nfsm_chain_get_sattr(nd, nmreq, vap);
1902		nfsmerr_if(error);
1903		v_type = vap->va_type;
1904		if (v_type == VNON)
1905			v_type = VREG;
1906		VATTR_SET(vap, va_type, v_type);
1907
1908		switch (v_type) {
1909		case VCHR:
1910		case VBLK:
1911		case VFIFO:
1912			rdev = vap->va_data_size;
1913			VATTR_CLEAR_ACTIVE(vap, va_data_size);
1914			break;
1915		default:
1916			break;
1917		};
1918	}
1919	nfsmerr_if(error);
1920
1921	/*
1922	 * If it doesn't exist, create it
1923	 * otherwise just truncate to 0 length
1924	 *   should I set the mode too ??
1925	 */
1926	if (vp == NULL) {
1927	        kauth_acl_t xacl = NULL;
1928
1929		/* authorize before creating */
1930		error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0);
1931
1932		/* construct ACL and handle inheritance */
1933		if (!error) {
1934			error = kauth_acl_inherit(dvp,
1935			    NULL,
1936			    &xacl,
1937			    0 /* !isdir */,
1938			    ctx);
1939
1940			if (!error && xacl != NULL)
1941			        VATTR_SET(vap, va_acl, xacl);
1942		}
1943		VATTR_CLEAR_ACTIVE(vap, va_data_size);
1944		VATTR_CLEAR_ACTIVE(vap, va_access_time);
1945		/*
1946		 * Server policy is to alway use the mapped rpc credential for
1947		 * file system object creation. This has the nice side effect of
1948		 * enforcing BSD creation semantics
1949		 */
1950		VATTR_CLEAR_ACTIVE(vap, va_uid);
1951		VATTR_CLEAR_ACTIVE(vap, va_gid);
1952
1953		/* validate new-file security information */
1954		if (!error)
1955			error = vnode_authattr_new(dvp, vap, 0, ctx);
1956
1957		if (vap->va_type == VREG || vap->va_type == VSOCK) {
1958
1959			if (!error)
1960				error = VNOP_CREATE(dvp, &vp, &ni.ni_cnd, vap, ctx);
1961
1962			if (!error && !VATTR_ALL_SUPPORTED(vap))
1963			        /*
1964				 * If some of the requested attributes weren't handled by the VNOP,
1965				 * use our fallback code.
1966				 */
1967				error = vnode_setattr_fallback(vp, vap, ctx);
1968
1969			if (xacl != NULL)
1970				kauth_acl_free(xacl);
1971
1972			if (!error) {
1973				if (exclusive_flag) {
1974					exclusive_flag = 0;
1975					VATTR_INIT(vap);
1976					bcopy(cverf, (caddr_t)&vap->va_access_time,
1977						NFSX_V3CREATEVERF);
1978					VATTR_SET_ACTIVE(vap, va_access_time);
1979					// skip authorization, as this is an
1980					// NFS internal implementation detail.
1981					error = vnode_setattr(vp, vap, ctx);
1982				}
1983
1984#if CONFIG_FSE
1985				if (nfsrv_fsevents_enabled && need_fsevent(FSE_CREATE_FILE, vp)) {
1986				        add_fsevent(FSE_CREATE_FILE, ctx,
1987						    FSE_ARG_VNODE, vp,
1988						    FSE_ARG_DONE);
1989				}
1990#endif
1991			}
1992
1993		} else if (vap->va_type == VCHR || vap->va_type == VBLK ||
1994			vap->va_type == VFIFO) {
1995			if (vap->va_type == VCHR && rdev == (int)0xffffffff)
1996				VATTR_SET(vap, va_type, VFIFO);
1997			if (vap->va_type != VFIFO) {
1998				error = suser(nd->nd_cr, NULL);
1999				nfsmerr_if(error);
2000			}
2001			VATTR_SET(vap, va_rdev, (dev_t)rdev);
2002
2003			error = VNOP_MKNOD(dvp, &vp, &ni.ni_cnd, vap, ctx);
2004
2005			if (xacl != NULL)
2006				kauth_acl_free(xacl);
2007
2008			nfsmerr_if(error);
2009
2010			if (vp) {
2011				vnode_recycle(vp);
2012				vnode_put(vp);
2013				vp = NULL;
2014			}
2015			ni.ni_cnd.cn_nameiop = LOOKUP;
2016#if CONFIG_TRIGGERS
2017			ni.ni_op = OP_LOOKUP;
2018#endif
2019			ni.ni_cnd.cn_flags &= ~LOCKPARENT;
2020			ni.ni_cnd.cn_context = ctx;
2021			ni.ni_startdir = dvp;
2022			ni.ni_usedvp   = dvp;
2023			cnflags = ni.ni_cnd.cn_flags; /* store in case we have to restore */
2024			while ((error = lookup(&ni)) == ERECYCLE) {
2025				ni.ni_cnd.cn_flags = cnflags;
2026				ni.ni_cnd.cn_nameptr = ni.ni_cnd.cn_pnbuf;
2027				ni.ni_usedvp = ni.ni_dvp = ni.ni_startdir = dvp;
2028			}
2029			if (!error) {
2030				if (ni.ni_cnd.cn_flags & ISSYMLINK)
2031					error = EINVAL;
2032				vp = ni.ni_vp;
2033			}
2034			nfsmerr_if(error);
2035		} else {
2036			error = ENXIO;
2037		}
2038		/*
2039		 * nameidone has to happen before we vnode_put(dvp)
2040		 * since it may need to release the fs_nodelock on the dvp
2041		 */
2042		nameidone(&ni);
2043		ni.ni_cnd.cn_nameiop = 0;
2044
2045		vnode_put(dvp);
2046	} else {
2047	        /*
2048		 * nameidone has to happen before we vnode_put(dvp)
2049		 * since it may need to release the fs_nodelock on the dvp
2050		 */
2051	        nameidone(&ni);
2052		ni.ni_cnd.cn_nameiop = 0;
2053
2054		vnode_put(dvp);
2055
2056		if (!error && VATTR_IS_ACTIVE(vap, va_data_size)) {
2057			error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA,
2058			    ctx, nxo, 0);
2059			if (!error) {
2060				tempsize = vap->va_data_size;
2061				VATTR_INIT(vap);
2062				VATTR_SET(vap, va_data_size, tempsize);
2063				error = vnode_setattr(vp, vap, ctx);
2064			}
2065		}
2066	}
2067	if (!error) {
2068		error = nfsrv_vptofh(nx, nd->nd_vers, NULL, vp, ctx, &nfh);
2069		if (!error) {
2070			nfsm_srv_vattr_init(&postattr, nd->nd_vers);
2071			postattrerr = vnode_getattr(vp, &postattr, ctx);
2072			if (nd->nd_vers == NFS_VER2)
2073				error = postattrerr;
2074		}
2075	}
2076	if (vp)
2077	        vnode_put(vp);
2078
2079	if (nd->nd_vers == NFS_VER3) {
2080		if (exclusive_flag && !error &&
2081		    bcmp(cverf, &postattr.va_access_time, NFSX_V3CREATEVERF))
2082			error = EEXIST;
2083		nfsm_srv_vattr_init(&dpostattr, NFS_VER3);
2084		dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx);
2085		vnode_put(dirp);
2086		dirp = NULL;
2087	}
2088
2089nfsmerr:
2090	/* assemble reply */
2091	nd->nd_repstat = error;
2092	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_SRVFH(nd->nd_vers, &nfh) +
2093			NFSX_FATTR(nd->nd_vers) + NFSX_WCCDATA(nd->nd_vers));
2094	nfsmout_if(error);
2095	*mrepp = nmrep.nmc_mhead;
2096	nfsmout_on_status(nd, error);
2097	if (nd->nd_vers == NFS_VER3) {
2098		if (!nd->nd_repstat) {
2099			nfsm_chain_add_postop_fh(error, &nmrep, nfh.nfh_fhp, nfh.nfh_len);
2100			nfsm_chain_add_postop_attr(error, nd, &nmrep, postattrerr, &postattr);
2101		}
2102		nfsm_chain_add_wcc_data(error, nd, &nmrep,
2103			dpreattrerr, &dpreattr, dpostattrerr, &dpostattr);
2104	} else {
2105		nfsm_chain_add_fh(error, &nmrep, NFS_VER2, nfh.nfh_fhp, nfh.nfh_len);
2106		if (!error)
2107			error = nfsm_chain_add_fattr(nd, &nmrep, &postattr);
2108	}
2109nfsmout:
2110	nfsm_chain_build_done(error, &nmrep);
2111	if (ni.ni_cnd.cn_nameiop) {
2112	        /*
2113		 * nameidone has to happen before we vnode_put(dvp)
2114		 * since it may need to release the fs_nodelock on the dvp
2115		 */
2116		nameidone(&ni);
2117
2118		if (vp)
2119			vnode_put(vp);
2120		vnode_put(dvp);
2121	}
2122	if (dirp)
2123		vnode_put(dirp);
2124	if (error) {
2125		nfsm_chain_cleanup(&nmrep);
2126		*mrepp = NULL;
2127	}
2128	return (error);
2129}
2130
2131/*
2132 * nfs v3 mknod service
2133 */
2134int
2135nfsrv_mknod(
2136	struct nfsrv_descript *nd,
2137	struct nfsrv_sock *slp,
2138	vfs_context_t ctx,
2139	mbuf_t *mrepp)
2140{
2141	struct vnode_attr dpreattr, dpostattr, postattr;
2142	struct vnode_attr va, *vap = &va;
2143	struct nameidata ni;
2144	int error, dpreattrerr, dpostattrerr, postattrerr;
2145	uint32_t len = 0, cnflags;
2146	u_int32_t major = 0, minor = 0;
2147	enum vtype vtyp;
2148	nfstype nvtype;
2149	vnode_t vp, dvp, dirp;
2150	struct nfs_filehandle nfh;
2151	struct nfs_export *nx = NULL;
2152	struct nfs_export_options *nxo;
2153	uid_t saved_uid;
2154	kauth_acl_t xacl = NULL;
2155	struct nfsm_chain *nmreq, nmrep;
2156
2157	error = 0;
2158	dpreattrerr = dpostattrerr = postattrerr = ENOENT;
2159	nmreq = &nd->nd_nmreq;
2160	nfsm_chain_null(&nmrep);
2161	vp = dvp = dirp = NULL;
2162	ni.ni_cnd.cn_nameiop = 0;
2163
2164	saved_uid = kauth_cred_getuid(nd->nd_cr);
2165
2166	nfsm_chain_get_fh_ptr(error, nmreq, NFS_VER3, nfh.nfh_fhp, nfh.nfh_len);
2167	nfsm_chain_get_32(error, nmreq, len);
2168	nfsm_name_len_check(error, nd, len);
2169	nfsmerr_if(error);
2170
2171	ni.ni_cnd.cn_nameiop = CREATE;
2172#if CONFIG_TRIGGERS
2173	ni.ni_op = OP_LINK;
2174#endif
2175	ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
2176	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
2177	if (!error) {
2178		error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo);
2179		if (nx != NULL) {
2180			/* update export stats */
2181			NFSStatAdd64(&nx->nx_stats.ops, 1);
2182
2183			/* update active user stats */
2184			nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0);
2185		}
2186	}
2187	if (dirp) {
2188		nfsm_srv_pre_vattr_init(&dpreattr);
2189		dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx);
2190	}
2191	if (error) {
2192		ni.ni_cnd.cn_nameiop = 0;
2193		goto nfsmerr;
2194	}
2195
2196	dvp = ni.ni_dvp;
2197	vp = ni.ni_vp;
2198
2199	nfsm_chain_get_32(error, nmreq, nvtype);
2200	nfsmerr_if(error);
2201	vtyp = nfstov_type(nvtype, NFS_VER3);
2202	if (!error && (vtyp != VCHR) && (vtyp != VBLK) && (vtyp != VSOCK) && (vtyp != VFIFO)) {
2203		error = NFSERR_BADTYPE;
2204		goto out;
2205	}
2206
2207	VATTR_INIT(vap);
2208	error = nfsm_chain_get_sattr(nd, nmreq, vap);
2209	if ((vtyp == VCHR) || (vtyp == VBLK)) {
2210		nfsm_chain_get_32(error, nmreq, major);
2211		nfsm_chain_get_32(error, nmreq, minor);
2212		nfsmerr_if(error);
2213		VATTR_SET(vap, va_rdev, makedev(major, minor));
2214	}
2215	nfsmerr_if(error);
2216
2217	/*
2218	 * If it doesn't exist, create it.
2219	 */
2220	if (vp) {
2221		error = EEXIST;
2222		goto out;
2223	}
2224	VATTR_SET(vap, va_type, vtyp);
2225
2226	/* authorize before creating */
2227	error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0);
2228
2229	/* construct ACL and handle inheritance */
2230	if (!error) {
2231		error = kauth_acl_inherit(dvp,
2232		    NULL,
2233		    &xacl,
2234		    0 /* !isdir */,
2235		    ctx);
2236
2237		if (!error && xacl != NULL)
2238		        VATTR_SET(vap, va_acl, xacl);
2239	}
2240	VATTR_CLEAR_ACTIVE(vap, va_data_size);
2241	VATTR_CLEAR_ACTIVE(vap, va_access_time);
2242	/*
2243	 * Server policy is to alway use the mapped rpc credential for
2244	 * file system object creation. This has the nice side effect of
2245	 * enforcing BSD creation semantics
2246	 */
2247	VATTR_CLEAR_ACTIVE(vap, va_uid);
2248	VATTR_CLEAR_ACTIVE(vap, va_gid);
2249
2250	/* validate new-file security information */
2251	if (!error)
2252		error = vnode_authattr_new(dvp, vap, 0, ctx);
2253
2254	if (error)
2255		goto out1;
2256
2257	if (vtyp == VSOCK) {
2258		error = VNOP_CREATE(dvp, &vp, &ni.ni_cnd, vap, ctx);
2259
2260		if (!error && !VATTR_ALL_SUPPORTED(vap))
2261		        /*
2262			 * If some of the requested attributes weren't handled by the VNOP,
2263			 * use our fallback code.
2264			 */
2265			error = vnode_setattr_fallback(vp, vap, ctx);
2266	} else {
2267		if (vtyp != VFIFO && (error = suser(nd->nd_cr, (u_short *)0)))
2268			goto out1;
2269		if ((error = VNOP_MKNOD(dvp, &vp, &ni.ni_cnd, vap, ctx)))
2270			goto out1;
2271		if (vp) {
2272			vnode_recycle(vp);
2273			vnode_put(vp);
2274			vp = NULL;
2275		}
2276		ni.ni_cnd.cn_nameiop = LOOKUP;
2277#if CONFIG_TRIGGERS
2278		ni.ni_op = OP_LOOKUP;
2279#endif
2280		ni.ni_cnd.cn_flags &= ~LOCKPARENT;
2281		ni.ni_cnd.cn_context = vfs_context_current();
2282		ni.ni_startdir = dvp;
2283		ni.ni_usedvp   = dvp;
2284		cnflags = ni.ni_cnd.cn_flags; /* store in case we have to restore */
2285		while ((error = lookup(&ni)) == ERECYCLE) {
2286			ni.ni_cnd.cn_flags = cnflags;
2287			ni.ni_cnd.cn_nameptr = ni.ni_cnd.cn_pnbuf;
2288			ni.ni_usedvp = ni.ni_dvp = ni.ni_startdir = dvp;
2289		}
2290		if (!error) {
2291		        vp = ni.ni_vp;
2292			if (ni.ni_cnd.cn_flags & ISSYMLINK)
2293			        error = EINVAL;
2294		}
2295	}
2296out1:
2297	if (xacl != NULL)
2298		kauth_acl_free(xacl);
2299out:
2300	/*
2301	 * nameidone has to happen before we vnode_put(dvp)
2302	 * since it may need to release the fs_nodelock on the dvp
2303	 */
2304	nameidone(&ni);
2305	ni.ni_cnd.cn_nameiop = 0;
2306
2307	vnode_put(dvp);
2308	dvp = NULL;
2309
2310	if (!error) {
2311		error = nfsrv_vptofh(nx, NFS_VER3, NULL, vp, ctx, &nfh);
2312		if (!error) {
2313			nfsm_srv_vattr_init(&postattr, NFS_VER3);
2314			postattrerr = vnode_getattr(vp, &postattr, ctx);
2315		}
2316	}
2317	if (vp) {
2318		vnode_put(vp);
2319		vp = NULL;
2320	}
2321
2322	nfsm_srv_vattr_init(&dpostattr, NFS_VER3);
2323	dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx);
2324	vnode_put(dirp);
2325	dirp = NULL;
2326
2327nfsmerr:
2328	/* assemble reply */
2329	nd->nd_repstat = error;
2330	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_SRVFH(NFS_VER3, &nfh) +
2331			NFSX_POSTOPATTR(NFS_VER3) + NFSX_WCCDATA(NFS_VER3));
2332	nfsmout_if(error);
2333	*mrepp = nmrep.nmc_mhead;
2334	nfsmout_on_status(nd, error);
2335	if (!nd->nd_repstat) {
2336		nfsm_chain_add_postop_fh(error, &nmrep, nfh.nfh_fhp, nfh.nfh_len);
2337		nfsm_chain_add_postop_attr(error, nd, &nmrep, postattrerr, &postattr);
2338	}
2339	nfsm_chain_add_wcc_data(error, nd, &nmrep,
2340		dpreattrerr, &dpreattr, dpostattrerr, &dpostattr);
2341nfsmout:
2342	nfsm_chain_build_done(error, &nmrep);
2343	if (ni.ni_cnd.cn_nameiop) {
2344	        /*
2345		 * nameidone has to happen before we vnode_put(dvp)
2346		 * since it may need to release the fs_nodelock on the dvp
2347		 */
2348		nameidone(&ni);
2349
2350		if (vp)
2351			vnode_put(vp);
2352		vnode_put(dvp);
2353	}
2354	if (dvp)
2355		vnode_put(dvp);
2356	if (vp)
2357		vnode_put(vp);
2358	if (dirp)
2359		vnode_put(dirp);
2360	if (error) {
2361		nfsm_chain_cleanup(&nmrep);
2362		*mrepp = NULL;
2363	}
2364	return (error);
2365}
2366
2367/*
2368 * nfs remove service
2369 */
2370int
2371nfsrv_remove(
2372	struct nfsrv_descript *nd,
2373	struct nfsrv_sock *slp,
2374	vfs_context_t ctx,
2375	mbuf_t *mrepp)
2376{
2377	struct nameidata ni;
2378	int error, dpreattrerr, dpostattrerr;
2379	uint32_t len = 0;
2380	uid_t saved_uid;
2381	vnode_t vp, dvp, dirp = NULL;
2382	struct vnode_attr dpreattr, dpostattr;
2383	struct nfs_filehandle nfh;
2384	struct nfs_export *nx = NULL;
2385	struct nfs_export_options *nxo;
2386	struct nfsm_chain *nmreq, nmrep;
2387
2388	error = 0;
2389	dpreattrerr = dpostattrerr = ENOENT;
2390	saved_uid = kauth_cred_getuid(nd->nd_cr);
2391	dvp = vp = dirp = NULL;
2392	nmreq = &nd->nd_nmreq;
2393	nfsm_chain_null(&nmrep);
2394
2395	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
2396	nfsm_chain_get_32(error, nmreq, len);
2397	nfsm_name_len_check(error, nd, len);
2398	nfsmerr_if(error);
2399
2400	ni.ni_cnd.cn_nameiop = DELETE;
2401#if CONFIG_TRIGGERS
2402	ni.ni_op = OP_UNLINK;
2403#endif
2404	ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
2405	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
2406	if (!error) {
2407		error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo);
2408		if (nx != NULL) {
2409			/* update export stats */
2410			NFSStatAdd64(&nx->nx_stats.ops, 1);
2411
2412			/* update active user stats */
2413			nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0);
2414		}
2415	}
2416	if (dirp) {
2417	        if (nd->nd_vers == NFS_VER3) {
2418			nfsm_srv_pre_vattr_init(&dpreattr);
2419			dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx);
2420		} else {
2421			vnode_put(dirp);
2422			dirp = NULL;
2423		}
2424	}
2425
2426	if (!error) {
2427		dvp = ni.ni_dvp;
2428		vp = ni.ni_vp;
2429
2430		if (vnode_vtype(vp) == VDIR)
2431			error = EPERM;		/* POSIX */
2432		else if (vnode_isvroot(vp))
2433		        /*
2434			 * The root of a mounted filesystem cannot be deleted.
2435			 */
2436			error = EBUSY;
2437		else
2438			error = nfsrv_authorize(vp, dvp, KAUTH_VNODE_DELETE, ctx, nxo, 0);
2439
2440		if (!error) {
2441#if CONFIG_FSE
2442			char     *path = NULL;
2443			int       plen;
2444			fse_info  finfo;
2445
2446			if (nfsrv_fsevents_enabled && need_fsevent(FSE_DELETE, dvp)) {
2447				plen = MAXPATHLEN;
2448				if ((path = get_pathbuff()) && !vn_getpath(vp, path, &plen)) {
2449					get_fse_info(vp, &finfo, ctx);
2450				} else if (path) {
2451					release_pathbuff(path);
2452					path = NULL;
2453				}
2454			}
2455#endif
2456		    	error = VNOP_REMOVE(dvp, vp, &ni.ni_cnd, 0, ctx);
2457
2458#if CONFIG_FSE
2459			if (path) {
2460				if (!error)
2461					add_fsevent(FSE_DELETE, ctx,
2462						    FSE_ARG_STRING, plen, path,
2463						    FSE_ARG_FINFO, &finfo,
2464						    FSE_ARG_DONE);
2465			        release_pathbuff(path);
2466			}
2467#endif
2468		}
2469
2470		/*
2471		 * nameidone has to happen before we vnode_put(dvp)
2472		 * since it may need to release the fs_nodelock on the dvp
2473		 */
2474		nameidone(&ni);
2475
2476		vnode_put(vp);
2477	        vnode_put(dvp);
2478	}
2479
2480nfsmerr:
2481	if (dirp) {
2482		nfsm_srv_vattr_init(&dpostattr, nd->nd_vers);
2483	        dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx);
2484		vnode_put(dirp);
2485	}
2486
2487	/* assemble reply */
2488	nd->nd_repstat = error;
2489	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_WCCDATA(nd->nd_vers));
2490	nfsmout_if(error);
2491	*mrepp = nmrep.nmc_mhead;
2492	nfsmout_on_status(nd, error);
2493	if (nd->nd_vers == NFS_VER3)
2494		nfsm_chain_add_wcc_data(error, nd, &nmrep,
2495			dpreattrerr, &dpreattr, dpostattrerr, &dpostattr);
2496nfsmout:
2497	nfsm_chain_build_done(error, &nmrep);
2498	if (error) {
2499		nfsm_chain_cleanup(&nmrep);
2500		*mrepp = NULL;
2501	}
2502	return (error);
2503}
2504
2505/*
2506 * nfs rename service
2507 */
2508int
2509nfsrv_rename(
2510	struct nfsrv_descript *nd,
2511	struct nfsrv_sock *slp,
2512	vfs_context_t ctx,
2513	mbuf_t *mrepp)
2514{
2515	kauth_cred_t saved_cred = NULL;
2516	uid_t saved_uid;
2517	int error;
2518	uint32_t fromlen, tolen;
2519	int fdpreattrerr, fdpostattrerr;
2520	int tdpreattrerr, tdpostattrerr;
2521	char *frompath = NULL, *topath = NULL;
2522	struct nameidata fromni, toni;
2523	vnode_t fvp, tvp, tdvp, fdvp, fdirp, tdirp;
2524	struct vnode_attr fdpreattr, fdpostattr;
2525	struct vnode_attr tdpreattr, tdpostattr;
2526	struct nfs_filehandle fnfh, tnfh;
2527	struct nfs_export *fnx, *tnx;
2528	struct nfs_export_options *fnxo, *tnxo;
2529	enum vtype fvtype, tvtype;
2530	int holding_mntlock;
2531	mount_t locked_mp;
2532	struct nfsm_chain *nmreq, nmrep;
2533	char *from_name, *to_name;
2534#if CONFIG_FSE
2535	int from_len=0, to_len=0;
2536	fse_info from_finfo, to_finfo;
2537#endif
2538	u_char didstats = 0;
2539	const char *oname;
2540
2541	error = 0;
2542	fdpreattrerr = fdpostattrerr = ENOENT;
2543	tdpreattrerr = tdpostattrerr = ENOENT;
2544	saved_uid = kauth_cred_getuid(nd->nd_cr);
2545	fromlen = tolen = 0;
2546	frompath = topath = NULL;
2547	fdirp = tdirp = NULL;
2548	nmreq = &nd->nd_nmreq;
2549	nfsm_chain_null(&nmrep);
2550
2551	/*
2552	 * these need to be set before calling any code
2553	 * that they may take us out through the error path.
2554	 */
2555	holding_mntlock = 0;
2556	fvp = tvp = NULL;
2557	fdvp = tdvp = NULL;
2558	locked_mp = NULL;
2559
2560	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, fnfh.nfh_fhp, fnfh.nfh_len);
2561	nfsm_chain_get_32(error, nmreq, fromlen);
2562	nfsm_name_len_check(error, nd, fromlen);
2563	nfsmerr_if(error);
2564	error = nfsm_chain_get_path_namei(nmreq, fromlen, &fromni);
2565	nfsmerr_if(error);
2566	frompath = fromni.ni_cnd.cn_pnbuf;
2567
2568	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, tnfh.nfh_fhp, tnfh.nfh_len);
2569	nfsm_chain_get_32(error, nmreq, tolen);
2570	nfsm_name_len_check(error, nd, tolen);
2571	nfsmerr_if(error);
2572	error = nfsm_chain_get_path_namei(nmreq, tolen, &toni);
2573	nfsmerr_if(error);
2574	topath = toni.ni_cnd.cn_pnbuf;
2575
2576	/*
2577	 * Remember our original uid so that we can reset cr_uid before
2578	 * the second nfsrv_namei() call, in case it is remapped.
2579	 */
2580	saved_cred = nd->nd_cr;
2581	kauth_cred_ref(saved_cred);
2582retry:
2583	fromni.ni_cnd.cn_nameiop = DELETE;
2584#if CONFIG_TRIGGERS
2585	fromni.ni_op = OP_UNLINK;
2586#endif
2587	fromni.ni_cnd.cn_flags = WANTPARENT;
2588
2589	fromni.ni_cnd.cn_pnbuf = frompath;
2590	frompath = NULL;
2591	fromni.ni_cnd.cn_pnlen = MAXPATHLEN;
2592	fromni.ni_cnd.cn_flags |= HASBUF;
2593
2594	error = nfsrv_namei(nd, ctx, &fromni, &fnfh, &fdirp, &fnx, &fnxo);
2595	if (error)
2596		goto out;
2597	fdvp = fromni.ni_dvp;
2598	fvp  = fromni.ni_vp;
2599
2600	if (fdirp) {
2601		if (nd->nd_vers == NFS_VER3) {
2602			nfsm_srv_pre_vattr_init(&fdpreattr);
2603			fdpreattrerr = vnode_getattr(fdirp, &fdpreattr, ctx);
2604		} else {
2605			vnode_put(fdirp);
2606			fdirp = NULL;
2607		}
2608	}
2609	fvtype = vnode_vtype(fvp);
2610
2611	/* reset credential if it was remapped */
2612	if (nd->nd_cr != saved_cred) {
2613		kauth_cred_ref(saved_cred);
2614		kauth_cred_unref(&nd->nd_cr);
2615		ctx->vc_ucred = nd->nd_cr = saved_cred;
2616	}
2617
2618	toni.ni_cnd.cn_nameiop = RENAME;
2619#if CONFIG_TRIGGERS
2620	toni.ni_op = OP_RENAME;
2621#endif
2622	toni.ni_cnd.cn_flags = WANTPARENT;
2623
2624	toni.ni_cnd.cn_pnbuf = topath;
2625	topath = NULL;
2626	toni.ni_cnd.cn_pnlen = MAXPATHLEN;
2627	toni.ni_cnd.cn_flags |= HASBUF;
2628
2629	if (fvtype == VDIR)
2630		toni.ni_cnd.cn_flags |= WILLBEDIR;
2631
2632	tnx = NULL;
2633	error = nfsrv_namei(nd, ctx, &toni, &tnfh, &tdirp, &tnx, &tnxo);
2634	if (error) {
2635		/*
2636		 * Translate error code for rename("dir1", "dir2/.").
2637		 */
2638	        if (error == EISDIR && fvtype == VDIR) {
2639		        if (nd->nd_vers == NFS_VER3)
2640			        error = EINVAL;
2641			else
2642			        error = ENOTEMPTY;
2643		}
2644		goto out;
2645	}
2646	tdvp = toni.ni_dvp;
2647	tvp  = toni.ni_vp;
2648
2649	if (!didstats) {
2650		/* update export stats once only */
2651		if (tnx != NULL) {
2652			/* update export stats */
2653			NFSStatAdd64(&tnx->nx_stats.ops, 1);
2654
2655			/* update active user stats */
2656			nfsrv_update_user_stat(tnx, nd, saved_uid, 1, 0, 0);
2657			didstats = 1;
2658		}
2659	}
2660
2661	if (tdirp) {
2662		if (nd->nd_vers == NFS_VER3) {
2663			nfsm_srv_pre_vattr_init(&tdpreattr);
2664			tdpreattrerr = vnode_getattr(tdirp, &tdpreattr, ctx);
2665		} else {
2666			vnode_put(tdirp);
2667			tdirp = NULL;
2668		}
2669	}
2670
2671	if (tvp != NULL) {
2672		tvtype = vnode_vtype(tvp);
2673
2674		if (fvtype == VDIR && tvtype != VDIR) {
2675			if (nd->nd_vers == NFS_VER3)
2676				error = EEXIST;
2677			else
2678				error = EISDIR;
2679			goto out;
2680		} else if (fvtype != VDIR && tvtype == VDIR) {
2681			if (nd->nd_vers == NFS_VER3)
2682				error = EEXIST;
2683			else
2684				error = ENOTDIR;
2685			goto out;
2686		}
2687		if (tvtype == VDIR && vnode_mountedhere(tvp)) {
2688			if (nd->nd_vers == NFS_VER3)
2689				error = EXDEV;
2690			else
2691				error = ENOTEMPTY;
2692			goto out;
2693		}
2694	}
2695	if (fvp == tdvp) {
2696		if (nd->nd_vers == NFS_VER3)
2697			error = EINVAL;
2698		else
2699			error = ENOTEMPTY;
2700		goto out;
2701	}
2702
2703	/*
2704	 * Authorization.
2705	 *
2706	 * If tvp is a directory and not the same as fdvp, or tdvp is not the same as fdvp,
2707	 * the node is moving between directories and we need rights to remove from the
2708	 * old and add to the new.
2709	 *
2710	 * If tvp already exists and is not a directory, we need to be allowed to delete it.
2711	 *
2712	 * Note that we do not inherit when renaming.  XXX this needs to be revisited to
2713	 * implement the deferred-inherit bit.
2714	 */
2715	{
2716		int moving = 0;
2717
2718		error = 0;
2719		if ((tvp != NULL) && vnode_isdir(tvp)) {
2720			if (tvp != fdvp)
2721				moving = 1;
2722		} else if (tdvp != fdvp) {
2723			moving = 1;
2724		}
2725		if (moving) {
2726			/* moving out of fdvp, must have delete rights */
2727			if ((error = nfsrv_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx, fnxo, 0)) != 0)
2728				goto auth_exit;
2729			/* moving into tdvp or tvp, must have rights to add */
2730			if ((error = nfsrv_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
2731				 NULL,
2732				 vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
2733				 ctx, tnxo, 0)) != 0)
2734				goto auth_exit;
2735		} else {
2736			/* node staying in same directory, must be allowed to add new name */
2737			if ((error = nfsrv_authorize(fdvp, NULL,
2738				 vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
2739				 ctx, fnxo, 0)) != 0)
2740				goto auth_exit;
2741		}
2742		/* overwriting tvp */
2743		if ((tvp != NULL) && !vnode_isdir(tvp) &&
2744		    ((error = nfsrv_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx, tnxo, 0)) != 0))
2745			goto auth_exit;
2746
2747		/* XXX more checks? */
2748
2749auth_exit:
2750		/* authorization denied */
2751		if (error != 0)
2752			goto out;
2753	}
2754
2755	if ((vnode_mount(fvp) != vnode_mount(tdvp)) ||
2756	    (tvp && (vnode_mount(fvp) != vnode_mount(tvp)))) {
2757		if (nd->nd_vers == NFS_VER3)
2758			error = EXDEV;
2759		else
2760			error = ENOTEMPTY;
2761		goto out;
2762	}
2763	/*
2764	 * The following edge case is caught here:
2765	 * (to cannot be a descendent of from)
2766	 *
2767	 *       o fdvp
2768	 *      /
2769	 *     /
2770	 *    o fvp
2771	 *     \
2772	 *      \
2773	 *       o tdvp
2774	 *      /
2775	 *     /
2776	 *    o tvp
2777	 */
2778	if (tdvp->v_parent == fvp) {
2779		if (nd->nd_vers == NFS_VER3)
2780			error = EXDEV;
2781		else
2782			error = ENOTEMPTY;
2783		goto out;
2784	}
2785	if (fvtype == VDIR && vnode_mountedhere(fvp)) {
2786		if (nd->nd_vers == NFS_VER3)
2787			error = EXDEV;
2788		else
2789			error = ENOTEMPTY;
2790		goto out;
2791	}
2792	/*
2793	 * If source is the same as the destination (that is the
2794	 * same vnode) then there is nothing to do...
2795	 * EXCEPT if the underlying file system supports case
2796	 * insensitivity and is case preserving.  In this case
2797	 * the file system needs to handle the special case of
2798	 * getting the same vnode as target (fvp) and source (tvp).
2799	 *
2800	 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
2801	 * and _PC_CASE_PRESERVING can have this exception, and they need to
2802	 * handle the special case of getting the same vnode as target and
2803	 * source.  NOTE: Then the target is unlocked going into vnop_rename,
2804	 * so not to cause locking problems. There is a single reference on tvp.
2805	 *
2806	 * NOTE - that fvp == tvp also occurs if they are hard linked - NOTE
2807	 * that correct behaviour then is just to remove the source (link)
2808	 */
2809	if ((fvp == tvp) && (fdvp == tdvp)) {
2810		if (fromni.ni_cnd.cn_namelen == toni.ni_cnd.cn_namelen &&
2811	       	    !bcmp(fromni.ni_cnd.cn_nameptr, toni.ni_cnd.cn_nameptr,
2812			  fromni.ni_cnd.cn_namelen)) {
2813			goto out;
2814		}
2815	}
2816
2817	if (holding_mntlock && vnode_mount(fvp) != locked_mp) {
2818	        /*
2819		 * we're holding a reference and lock
2820		 * on locked_mp, but it no longer matches
2821		 * what we want to do... so drop our hold
2822		 */
2823		mount_unlock_renames(locked_mp);
2824		mount_drop(locked_mp, 0);
2825	        holding_mntlock = 0;
2826	}
2827	if (tdvp != fdvp && fvtype == VDIR) {
2828	        /*
2829		 * serialize renames that re-shape
2830		 * the tree... if holding_mntlock is
2831		 * set, then we're ready to go...
2832		 * otherwise we
2833		 * first need to drop the iocounts
2834		 * we picked up, second take the
2835		 * lock to serialize the access,
2836		 * then finally start the lookup
2837		 * process over with the lock held
2838		 */
2839	        if (!holding_mntlock) {
2840		        /*
2841			 * need to grab a reference on
2842			 * the mount point before we
2843			 * drop all the iocounts... once
2844			 * the iocounts are gone, the mount
2845			 * could follow
2846			 */
2847			locked_mp = vnode_mount(fvp);
2848			mount_ref(locked_mp, 0);
2849
2850			/* make a copy of to path to pass to nfsrv_namei() again */
2851			MALLOC_ZONE(topath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
2852			if (topath)
2853				bcopy(toni.ni_cnd.cn_pnbuf, topath, tolen + 1);
2854
2855			/*
2856			 * nameidone has to happen before we vnode_put(tdvp)
2857			 * since it may need to release the fs_nodelock on the tdvp
2858			 */
2859			nameidone(&toni);
2860
2861			if (tvp)
2862			        vnode_put(tvp);
2863			vnode_put(tdvp);
2864
2865			/* make a copy of from path to pass to nfsrv_namei() again */
2866			MALLOC_ZONE(frompath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
2867			if (frompath)
2868				bcopy(fromni.ni_cnd.cn_pnbuf, frompath, fromlen + 1);
2869
2870			/*
2871			 * nameidone has to happen before we vnode_put(fdvp)
2872			 * since it may need to release the fs_nodelock on the fdvp
2873			 */
2874			nameidone(&fromni);
2875
2876			vnode_put(fvp);
2877			vnode_put(fdvp);
2878
2879			if (fdirp) {
2880			        vnode_put(fdirp);
2881				fdirp = NULL;
2882			}
2883			if (tdirp) {
2884			        vnode_put(tdirp);
2885				tdirp = NULL;
2886			}
2887			mount_lock_renames(locked_mp);
2888			holding_mntlock = 1;
2889
2890			fvp = tvp = NULL;
2891			fdvp = tdvp = NULL;
2892
2893			fdpreattrerr = tdpreattrerr = ENOENT;
2894
2895			if (!topath || !frompath) {
2896				/* we couldn't allocate a path, so bail */
2897				error = ENOMEM;
2898				goto out;
2899			}
2900
2901			/* reset credential if it was remapped */
2902			if (nd->nd_cr != saved_cred) {
2903				kauth_cred_ref(saved_cred);
2904				kauth_cred_unref(&nd->nd_cr);
2905				ctx->vc_ucred = nd->nd_cr = saved_cred;
2906			}
2907
2908			goto retry;
2909		}
2910	} else {
2911	        /*
2912		 * when we dropped the iocounts to take
2913		 * the lock, we allowed the identity of
2914		 * the various vnodes to change... if they did,
2915		 * we may no longer be dealing with a rename
2916		 * that reshapes the tree... once we're holding
2917		 * the iocounts, the vnodes can't change type
2918		 * so we're free to drop the lock at this point
2919		 * and continue on
2920		 */
2921	        if (holding_mntlock) {
2922			mount_unlock_renames(locked_mp);
2923			mount_drop(locked_mp, 0);
2924		        holding_mntlock = 0;
2925		}
2926	}
2927
2928	// save these off so we can later verify that fvp is the same
2929	vnode_t oparent;
2930	oname   = fvp->v_name;
2931	oparent = fvp->v_parent;
2932
2933	/*
2934	 * If generating an fsevent, then
2935	 * stash any pre-rename info we may need.
2936	 */
2937#if CONFIG_FSE
2938	if (nfsrv_fsevents_enabled && need_fsevent(FSE_RENAME, fvp)) {
2939		int from_truncated = 0, to_truncated = 0;
2940
2941	        get_fse_info(fvp, &from_finfo, ctx);
2942		if (tvp)
2943		        get_fse_info(tvp, &to_finfo, ctx);
2944
2945	        from_name = get_pathbuff();
2946		if (from_name) {
2947			from_len = safe_getpath(fdvp, fromni.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
2948		}
2949
2950		to_name = from_name ? get_pathbuff() : NULL;
2951		if (to_name) {
2952			to_len = safe_getpath(tdvp, toni.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
2953		}
2954
2955		if (from_truncated || to_truncated) {
2956			from_finfo.mode |= FSE_TRUNCATED_PATH;
2957		}
2958
2959	} else {
2960	        from_name = NULL;
2961	        to_name   = NULL;
2962	}
2963#else /* CONFIG_FSE */
2964	from_name = NULL;
2965	to_name   = NULL;
2966#endif /* CONFIG_FSE */
2967
2968	error = VNOP_RENAME(fromni.ni_dvp, fromni.ni_vp, &fromni.ni_cnd,
2969			    toni.ni_dvp, toni.ni_vp, &toni.ni_cnd, ctx);
2970	/*
2971	 * fix up name & parent pointers.  note that we first
2972	 * check that fvp has the same name/parent pointers it
2973	 * had before the rename call... this is a 'weak' check
2974	 * at best...
2975	 */
2976	if (oname == fvp->v_name && oparent == fvp->v_parent) {
2977		int update_flags;
2978		update_flags = VNODE_UPDATE_NAME;
2979		if (fdvp != tdvp)
2980			update_flags |= VNODE_UPDATE_PARENT;
2981		vnode_update_identity(fvp, tdvp, toni.ni_cnd.cn_nameptr,
2982			toni.ni_cnd.cn_namelen, toni.ni_cnd.cn_hash, update_flags);
2983	}
2984
2985	/*
2986	 * If the rename is OK and we've got the paths
2987	 * then add an fsevent.
2988	 */
2989#if CONFIG_FSE
2990	if (nfsrv_fsevents_enabled && !error && from_name && to_name) {
2991	        if (tvp) {
2992		        add_fsevent(FSE_RENAME, ctx,
2993				    FSE_ARG_STRING, from_len, from_name,
2994				    FSE_ARG_FINFO, &from_finfo,
2995				    FSE_ARG_STRING, to_len, to_name,
2996				    FSE_ARG_FINFO, &to_finfo,
2997				    FSE_ARG_DONE);
2998		} else {
2999		        add_fsevent(FSE_RENAME, ctx,
3000				    FSE_ARG_STRING, from_len, from_name,
3001				    FSE_ARG_FINFO, &from_finfo,
3002				    FSE_ARG_STRING, to_len, to_name,
3003				    FSE_ARG_DONE);
3004		}
3005	}
3006	if (from_name)
3007	        release_pathbuff(from_name);
3008	if (to_name)
3009	        release_pathbuff(to_name);
3010#endif /* CONFIG_FSE */
3011	from_name = to_name = NULL;
3012
3013out:
3014	if (holding_mntlock) {
3015	        mount_unlock_renames(locked_mp);
3016		mount_drop(locked_mp, 0);
3017		holding_mntlock = 0;
3018	}
3019	if (tdvp) {
3020	        /*
3021		 * nameidone has to happen before we vnode_put(tdvp)
3022		 * since it may need to release the fs_nodelock on the tdvp
3023		 */
3024		nameidone(&toni);
3025		if (tvp)
3026		        vnode_put(tvp);
3027	        vnode_put(tdvp);
3028
3029		tdvp = NULL;
3030	}
3031	if (fdvp) {
3032		/*
3033		 * nameidone has to happen before we vnode_put(fdvp)
3034		 * since it may need to release the fs_nodelock on the fdvp
3035		 */
3036		nameidone(&fromni);
3037
3038		if (fvp)
3039		        vnode_put(fvp);
3040	        vnode_put(fdvp);
3041
3042		fdvp = NULL;
3043	}
3044	if (fdirp) {
3045		nfsm_srv_vattr_init(&fdpostattr, nd->nd_vers);
3046		fdpostattrerr = vnode_getattr(fdirp, &fdpostattr, ctx);
3047		vnode_put(fdirp);
3048		fdirp = NULL;
3049	}
3050	if (tdirp) {
3051		nfsm_srv_vattr_init(&tdpostattr, nd->nd_vers);
3052		tdpostattrerr = vnode_getattr(tdirp, &tdpostattr, ctx);
3053		vnode_put(tdirp);
3054		tdirp = NULL;
3055	}
3056
3057nfsmerr:
3058	/* assemble reply */
3059	nd->nd_repstat = error;
3060	error = nfsrv_rephead(nd, slp, &nmrep, 2 * NFSX_WCCDATA(nd->nd_vers));
3061	nfsmout_if(error);
3062	*mrepp = nmrep.nmc_mhead;
3063	nfsmout_on_status(nd, error);
3064	if (nd->nd_vers == NFS_VER3) {
3065		nfsm_chain_add_wcc_data(error, nd, &nmrep,
3066			fdpreattrerr, &fdpreattr, fdpostattrerr, &fdpostattr);
3067		nfsm_chain_add_wcc_data(error, nd, &nmrep,
3068			tdpreattrerr, &tdpreattr, tdpostattrerr, &tdpostattr);
3069	}
3070nfsmout:
3071	nfsm_chain_build_done(error, &nmrep);
3072	if (holding_mntlock) {
3073	        mount_unlock_renames(locked_mp);
3074		mount_drop(locked_mp, 0);
3075	}
3076	if (tdvp) {
3077		/*
3078		 * nameidone has to happen before we vnode_put(tdvp)
3079		 * since it may need to release the fs_nodelock on the tdvp
3080		 */
3081		nameidone(&toni);
3082
3083		if (tvp)
3084		        vnode_put(tvp);
3085	        vnode_put(tdvp);
3086	}
3087	if (fdvp) {
3088		/*
3089		 * nameidone has to happen before we vnode_put(fdvp)
3090		 * since it may need to release the fs_nodelock on the fdvp
3091		 */
3092		nameidone(&fromni);
3093
3094		if (fvp)
3095		        vnode_put(fvp);
3096	        vnode_put(fdvp);
3097	}
3098	if (fdirp)
3099		vnode_put(fdirp);
3100	if (tdirp)
3101		vnode_put(tdirp);
3102	if (frompath)
3103		FREE_ZONE(frompath, MAXPATHLEN, M_NAMEI);
3104	if (topath)
3105		FREE_ZONE(topath, MAXPATHLEN, M_NAMEI);
3106	if (saved_cred)
3107		kauth_cred_unref(&saved_cred);
3108	if (error) {
3109		nfsm_chain_cleanup(&nmrep);
3110		*mrepp = NULL;
3111	}
3112	return (error);
3113}
3114
3115/*
3116 * nfs link service
3117 */
3118int
3119nfsrv_link(
3120	struct nfsrv_descript *nd,
3121	struct nfsrv_sock *slp,
3122	vfs_context_t ctx,
3123	mbuf_t *mrepp)
3124{
3125	struct nameidata ni;
3126	int error, dpreattrerr, dpostattrerr, attrerr;
3127	uint32_t len = 0;
3128	vnode_t vp, xp, dvp, dirp;
3129	struct vnode_attr dpreattr, dpostattr, attr;
3130	struct nfs_filehandle nfh, dnfh;
3131	struct nfs_export *nx;
3132	struct nfs_export_options *nxo;
3133	struct nfsm_chain *nmreq, nmrep;
3134
3135	error = 0;
3136	dpreattrerr = dpostattrerr = attrerr = ENOENT;
3137	vp = xp = dvp = dirp = NULL;
3138	nmreq = &nd->nd_nmreq;
3139	nfsm_chain_null(&nmrep);
3140
3141	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
3142	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, dnfh.nfh_fhp, dnfh.nfh_len);
3143	nfsm_chain_get_32(error, nmreq, len);
3144	nfsm_name_len_check(error, nd, len);
3145	nfsmerr_if(error);
3146	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
3147	nfsmerr_if(error);
3148
3149	/* update export stats */
3150	NFSStatAdd64(&nx->nx_stats.ops, 1);
3151
3152	/* update active user stats */
3153	nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0);
3154
3155	error = nfsrv_credcheck(nd, ctx, nx, nxo);
3156	nfsmerr_if(error);
3157
3158	/* we're not allowed to link to directories... */
3159	if (vnode_vtype(vp) == VDIR) {
3160		error = EPERM;		/* POSIX */
3161		goto out;
3162	}
3163
3164	/* ...or to anything that kauth doesn't want us to (eg. immutable items) */
3165	if ((error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx, nxo, 0)) != 0)
3166		goto out;
3167
3168	ni.ni_cnd.cn_nameiop = CREATE;
3169#if CONFIG_TRIGGERS
3170	ni.ni_op = OP_LINK;
3171#endif
3172	ni.ni_cnd.cn_flags = LOCKPARENT;
3173	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
3174	if (!error)
3175		error = nfsrv_namei(nd, ctx, &ni, &dnfh, &dirp, &nx, &nxo);
3176	if (dirp) {
3177		if (nd->nd_vers == NFS_VER3) {
3178			nfsm_srv_pre_vattr_init(&dpreattr);
3179			dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx);
3180		} else {
3181			vnode_put(dirp);
3182			dirp = NULL;
3183		}
3184	}
3185	if (error)
3186		goto out;
3187	dvp = ni.ni_dvp;
3188	xp = ni.ni_vp;
3189
3190	if (xp != NULL)
3191		error = EEXIST;
3192	else if (vnode_mount(vp) != vnode_mount(dvp))
3193		error = EXDEV;
3194	else
3195		error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0);
3196
3197	if (!error)
3198		error = VNOP_LINK(vp, dvp, &ni.ni_cnd, ctx);
3199
3200#if CONFIG_FSE
3201	if (nfsrv_fsevents_enabled && !error && need_fsevent(FSE_CREATE_FILE, dvp)) {
3202		char *target_path = NULL;
3203		int plen, truncated=0;
3204		fse_info finfo;
3205
3206		/* build the path to the new link file */
3207		target_path = get_pathbuff();
3208		if (target_path) {
3209			plen = safe_getpath(dvp, ni.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
3210
3211			if (get_fse_info(vp, &finfo, ctx) == 0) {
3212				if (truncated) {
3213					finfo.mode |= FSE_TRUNCATED_PATH;
3214				}
3215				add_fsevent(FSE_CREATE_FILE, ctx,
3216					    FSE_ARG_STRING, plen, target_path,
3217					    FSE_ARG_FINFO, &finfo,
3218					    FSE_ARG_DONE);
3219			}
3220
3221			release_pathbuff(target_path);
3222		}
3223	}
3224#endif
3225
3226	/*
3227	 * nameidone has to happen before we vnode_put(dvp)
3228	 * since it may need to release the fs_nodelock on the dvp
3229	 */
3230	nameidone(&ni);
3231
3232	if (xp)
3233		vnode_put(xp);
3234	vnode_put(dvp);
3235out:
3236	if (nd->nd_vers == NFS_VER3) {
3237		nfsm_srv_vattr_init(&attr, NFS_VER3);
3238		attrerr = vnode_getattr(vp, &attr, ctx);
3239	}
3240	if (dirp) {
3241		nfsm_srv_vattr_init(&dpostattr, nd->nd_vers);
3242		dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx);
3243		vnode_put(dirp);
3244		dirp = NULL;
3245	}
3246	vnode_put(vp);
3247	vp = NULL;
3248
3249nfsmerr:
3250	/* assemble reply */
3251	nd->nd_repstat = error;
3252	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers) + NFSX_WCCDATA(nd->nd_vers));
3253	nfsmout_if(error);
3254	*mrepp = nmrep.nmc_mhead;
3255	nfsmout_on_status(nd, error);
3256	if (nd->nd_vers == NFS_VER3) {
3257		nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr);
3258		nfsm_chain_add_wcc_data(error, nd, &nmrep,
3259			dpreattrerr, &dpreattr, dpostattrerr, &dpostattr);
3260	}
3261nfsmout:
3262	nfsm_chain_build_done(error, &nmrep);
3263	if (vp)
3264		vnode_put(vp);
3265	if (error) {
3266		nfsm_chain_cleanup(&nmrep);
3267		*mrepp = NULL;
3268	}
3269	return (error);
3270}
3271
3272/*
3273 * nfs symbolic link service
3274 */
3275int
3276nfsrv_symlink(
3277	struct nfsrv_descript *nd,
3278	struct nfsrv_sock *slp,
3279	vfs_context_t ctx,
3280	mbuf_t *mrepp)
3281{
3282	struct vnode_attr dpreattr, dpostattr, postattr;
3283	struct vnode_attr va, *vap = &va;
3284	struct nameidata ni;
3285	int error, dpreattrerr, dpostattrerr, postattrerr;
3286	uint32_t len = 0, linkdatalen, cnflags;
3287	uid_t saved_uid;
3288	char *linkdata;
3289	vnode_t vp, dvp, dirp;
3290	struct nfs_filehandle nfh;
3291	struct nfs_export *nx = NULL;
3292	struct nfs_export_options *nxo;
3293	uio_t auio = NULL;
3294	char uio_buf[ UIO_SIZEOF(1) ];
3295	struct nfsm_chain *nmreq, nmrep;
3296
3297	error = 0;
3298	dpreattrerr = dpostattrerr = postattrerr = ENOENT;
3299	nmreq = &nd->nd_nmreq;
3300	nfsm_chain_null(&nmrep);
3301	linkdata = NULL;
3302	dirp = NULL;
3303
3304	saved_uid = kauth_cred_getuid(nd->nd_cr);
3305
3306	ni.ni_cnd.cn_nameiop = 0;
3307	vp = dvp = NULL;
3308
3309	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
3310	nfsm_chain_get_32(error, nmreq, len);
3311	nfsm_name_len_check(error, nd, len);
3312	nfsmerr_if(error);
3313
3314	ni.ni_cnd.cn_nameiop = CREATE;
3315#if CONFIG_TRIGGERS
3316	ni.ni_op = OP_LINK;
3317#endif
3318	ni.ni_cnd.cn_flags = LOCKPARENT;
3319	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
3320	if (!error) {
3321		error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo);
3322		if (nx != NULL) {
3323			/* update export stats */
3324			NFSStatAdd64(&nx->nx_stats.ops, 1);
3325
3326			/* update active user stats */
3327			nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0);
3328		}
3329	}
3330	if (dirp) {
3331		if (nd->nd_vers == NFS_VER3) {
3332			nfsm_srv_pre_vattr_init(&dpreattr);
3333			dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx);
3334		} else {
3335			vnode_put(dirp);
3336			dirp = NULL;
3337		}
3338	}
3339	if (error) {
3340		ni.ni_cnd.cn_nameiop = 0;
3341		goto out1;
3342	}
3343	dvp = ni.ni_dvp;
3344	vp = ni.ni_vp;
3345
3346	VATTR_INIT(vap);
3347	if (nd->nd_vers == NFS_VER3)
3348		error = nfsm_chain_get_sattr(nd, nmreq, vap);
3349	nfsm_chain_get_32(error, nmreq, linkdatalen);
3350	if (!error && (((nd->nd_vers == NFS_VER2) && (linkdatalen > NFS_MAXPATHLEN)) ||
3351			((nd->nd_vers == NFS_VER3) && (linkdatalen > MAXPATHLEN))))
3352		error = NFSERR_NAMETOL;
3353	nfsmerr_if(error);
3354	MALLOC(linkdata, caddr_t, linkdatalen + 1, M_TEMP, M_WAITOK);
3355	if (linkdata)
3356		auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
3357					&uio_buf[0], sizeof(uio_buf));
3358	if (!linkdata || !auio) {
3359		error = ENOMEM;
3360		goto out;
3361	}
3362	uio_addiov(auio, CAST_USER_ADDR_T(linkdata), linkdatalen);
3363	error = nfsm_chain_get_uio(nmreq, linkdatalen, auio);
3364	if (!error && (nd->nd_vers == NFS_VER2))
3365		error = nfsm_chain_get_sattr(nd, nmreq, vap);
3366	nfsmerr_if(error);
3367	*(linkdata + linkdatalen) = '\0';
3368	if (vp) {
3369		error = EEXIST;
3370		goto out;
3371	}
3372
3373	VATTR_SET(vap, va_type, VLNK);
3374	VATTR_CLEAR_ACTIVE(vap, va_data_size);
3375	VATTR_CLEAR_ACTIVE(vap, va_access_time);
3376	/*
3377	 * Server policy is to alway use the mapped rpc credential for
3378	 * file system object creation. This has the nice side effect of
3379	 * enforcing BSD creation semantics
3380	 */
3381	VATTR_CLEAR_ACTIVE(vap, va_uid);
3382	VATTR_CLEAR_ACTIVE(vap, va_gid);
3383
3384	/* authorize before creating */
3385	error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0);
3386
3387	/* validate given attributes */
3388	if (!error)
3389		error = vnode_authattr_new(dvp, vap, 0, ctx);
3390
3391	if (!error)
3392		error = VNOP_SYMLINK(dvp, &vp, &ni.ni_cnd, vap, linkdata, ctx);
3393
3394	if (!error && (nd->nd_vers == NFS_VER3)) {
3395		if (vp == NULL) {
3396			ni.ni_cnd.cn_nameiop = LOOKUP;
3397#if CONFIG_TRIGGERS
3398			ni.ni_op = OP_LOOKUP;
3399#endif
3400			ni.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW);
3401			ni.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF);
3402			ni.ni_cnd.cn_context = ctx;
3403			ni.ni_startdir = dvp;
3404			ni.ni_usedvp   = dvp;
3405			cnflags = ni.ni_cnd.cn_flags; /* store in case we have to restore */
3406			while ((error = lookup(&ni)) == ERECYCLE) {
3407				ni.ni_cnd.cn_flags = cnflags;
3408				ni.ni_cnd.cn_nameptr = ni.ni_cnd.cn_pnbuf;
3409				ni.ni_usedvp = ni.ni_dvp = ni.ni_startdir = dvp;
3410			}
3411			if (!error)
3412			        vp = ni.ni_vp;
3413		}
3414		if (!error) {
3415			error = nfsrv_vptofh(nx, NFS_VER3, NULL, vp, ctx, &nfh);
3416			if (!error) {
3417				nfsm_srv_vattr_init(&postattr, NFS_VER3);
3418				postattrerr = vnode_getattr(vp, &postattr, ctx);
3419			}
3420		}
3421	}
3422
3423#if CONFIG_FSE
3424	if (nfsrv_fsevents_enabled && !error && vp) {
3425		add_fsevent(FSE_CREATE_FILE, ctx,
3426			    FSE_ARG_VNODE, vp,
3427			    FSE_ARG_DONE);
3428	}
3429#endif
3430out:
3431	/*
3432	 * nameidone has to happen before we vnode_put(dvp)
3433	 * since it may need to release the fs_nodelock on the dvp
3434	 */
3435	nameidone(&ni);
3436	ni.ni_cnd.cn_nameiop = 0;
3437	if (vp)
3438	        vnode_put(vp);
3439	vnode_put(dvp);
3440out1:
3441	if (linkdata) {
3442		FREE(linkdata, M_TEMP);
3443		linkdata = NULL;
3444	}
3445	if (dirp) {
3446		nfsm_srv_vattr_init(&dpostattr, nd->nd_vers);
3447		dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx);
3448		vnode_put(dirp);
3449		dirp = NULL;
3450	}
3451
3452nfsmerr:
3453	/* assemble reply */
3454	nd->nd_repstat = error;
3455	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_SRVFH(nd->nd_vers, &nfh) +
3456			NFSX_POSTOPATTR(nd->nd_vers) + NFSX_WCCDATA(nd->nd_vers));
3457	nfsmout_if(error);
3458	*mrepp = nmrep.nmc_mhead;
3459	nfsmout_on_status(nd, error);
3460	if (nd->nd_vers == NFS_VER3) {
3461		if (!nd->nd_repstat) {
3462			nfsm_chain_add_postop_fh(error, &nmrep, nfh.nfh_fhp, nfh.nfh_len);
3463			nfsm_chain_add_postop_attr(error, nd, &nmrep, postattrerr, &postattr);
3464		}
3465		nfsm_chain_add_wcc_data(error, nd, &nmrep,
3466			dpreattrerr, &dpreattr, dpostattrerr, &dpostattr);
3467	}
3468nfsmout:
3469	nfsm_chain_build_done(error, &nmrep);
3470	if (ni.ni_cnd.cn_nameiop) {
3471	        /*
3472		 * nameidone has to happen before we vnode_put(dvp)
3473		 * since it may need to release the fs_nodelock on the dvp
3474		 */
3475		nameidone(&ni);
3476
3477		if (vp)
3478			vnode_put(vp);
3479		vnode_put(dvp);
3480	}
3481	if (dirp)
3482		vnode_put(dirp);
3483	if (linkdata)
3484		FREE(linkdata, M_TEMP);
3485	if (error) {
3486		nfsm_chain_cleanup(&nmrep);
3487		*mrepp = NULL;
3488	}
3489	return (error);
3490}
3491
3492/*
3493 * nfs mkdir service
3494 */
3495
3496int
3497nfsrv_mkdir(
3498	struct nfsrv_descript *nd,
3499	struct nfsrv_sock *slp,
3500	vfs_context_t ctx,
3501	mbuf_t *mrepp)
3502{
3503	struct vnode_attr dpreattr, dpostattr, postattr;
3504	struct vnode_attr va, *vap = &va;
3505	struct nameidata ni;
3506	int error, dpreattrerr, dpostattrerr, postattrerr;
3507	uint32_t len = 0;
3508	vnode_t vp, dvp, dirp;
3509	struct nfs_filehandle nfh;
3510	struct nfs_export *nx = NULL;
3511	struct nfs_export_options *nxo;
3512	uid_t saved_uid;
3513	kauth_acl_t xacl = NULL;
3514	struct nfsm_chain *nmreq, nmrep;
3515
3516	error = 0;
3517	dpreattrerr = dpostattrerr = postattrerr = ENOENT;
3518	nmreq = &nd->nd_nmreq;
3519	nfsm_chain_null(&nmrep);
3520
3521	saved_uid = kauth_cred_getuid(nd->nd_cr);
3522
3523	ni.ni_cnd.cn_nameiop = 0;
3524	vp = dvp = dirp = NULL;
3525
3526	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
3527	nfsm_chain_get_32(error, nmreq, len);
3528	nfsm_name_len_check(error, nd, len);
3529	nfsmerr_if(error);
3530
3531	ni.ni_cnd.cn_nameiop = CREATE;
3532#if CONFIG_TRIGGERS
3533	ni.ni_op = OP_LINK;
3534#endif
3535	ni.ni_cnd.cn_flags = LOCKPARENT;
3536	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
3537	if (!error) {
3538		error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo);
3539		if (nx != NULL) {
3540			/* update export stats */
3541			NFSStatAdd64(&nx->nx_stats.ops, 1);
3542
3543			/* update active user stats */
3544			nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0);
3545		}
3546	}
3547	if (dirp) {
3548		if (nd->nd_vers == NFS_VER3) {
3549			nfsm_srv_pre_vattr_init(&dpreattr);
3550			dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx);
3551		} else {
3552			vnode_put(dirp);
3553			dirp = NULL;
3554		}
3555	}
3556	if (error) {
3557		ni.ni_cnd.cn_nameiop = 0;
3558		goto nfsmerr;
3559	}
3560	dvp = ni.ni_dvp;
3561	vp = ni.ni_vp;
3562
3563	VATTR_INIT(vap);
3564	error = nfsm_chain_get_sattr(nd, nmreq, vap);
3565	nfsmerr_if(error);
3566	VATTR_SET(vap, va_type, VDIR);
3567
3568	if (vp != NULL) {
3569	        /*
3570		 * nameidone has to happen before we vnode_put(dvp)
3571		 * since it may need to release the fs_nodelock on the dvp
3572		 */
3573	        nameidone(&ni);
3574		vnode_put(dvp);
3575		vnode_put(vp);
3576		error = EEXIST;
3577		goto out;
3578	}
3579
3580	error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx, nxo, 0);
3581
3582	/* construct ACL and handle inheritance */
3583	if (!error) {
3584		error = kauth_acl_inherit(dvp,
3585		    NULL,
3586		    &xacl,	/* isdir */
3587		    1,
3588		    ctx);
3589
3590		if (!error && xacl != NULL)
3591		        VATTR_SET(vap, va_acl, xacl);
3592	}
3593
3594	VATTR_CLEAR_ACTIVE(vap, va_data_size);
3595	VATTR_CLEAR_ACTIVE(vap, va_access_time);
3596	/*
3597	 * We don't support the S_ISGID bit for directories. Solaris and other
3598	 * SRV4 derived systems might set this to get BSD semantics, which we enforce
3599	 * any ways.
3600	 */
3601	if (VATTR_IS_ACTIVE(vap, va_mode))
3602		vap->va_mode &= ~S_ISGID;
3603	/*
3604	 * Server policy is to alway use the mapped rpc credential for
3605	 * file system object creation. This has the nice side effect of
3606	 * enforcing BSD creation semantics
3607	 */
3608	VATTR_CLEAR_ACTIVE(vap, va_uid);
3609	VATTR_CLEAR_ACTIVE(vap, va_gid);
3610
3611	/* validate new-file security information */
3612	if (!error)
3613		error = vnode_authattr_new(dvp, vap, 0, ctx);
3614	/*
3615	 * vnode_authattr_new can return errors other than EPERM, but that's not going to
3616	 * sit well with our clients so we map all errors to EPERM.
3617         */
3618	if (error)
3619		error = EPERM;
3620
3621	if (!error)
3622		error = VNOP_MKDIR(dvp, &vp, &ni.ni_cnd, vap, ctx);
3623
3624#if CONFIG_FSE
3625	if (nfsrv_fsevents_enabled && !error)
3626		add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
3627#endif
3628
3629	if (!error && !VATTR_ALL_SUPPORTED(vap))
3630	        /*
3631		 * If some of the requested attributes weren't handled by the VNOP,
3632		 * use our fallback code.
3633		 */
3634		error = vnode_setattr_fallback(vp, vap, ctx);
3635
3636	if (xacl != NULL)
3637		kauth_acl_free(xacl);
3638
3639	if (!error) {
3640		error = nfsrv_vptofh(nx, nd->nd_vers, NULL, vp, ctx, &nfh);
3641		if (!error) {
3642			nfsm_srv_vattr_init(&postattr, nd->nd_vers);
3643			postattrerr = vnode_getattr(vp, &postattr, ctx);
3644			if (nd->nd_vers == NFS_VER2)
3645				error = postattrerr;
3646		}
3647		vnode_put(vp);
3648		vp = NULL;
3649	}
3650	/*
3651	 * nameidone has to happen before we vnode_put(dvp)
3652	 * since it may need to release the fs_nodelock on the dvp
3653	 */
3654	nameidone(&ni);
3655	vnode_put(dvp);
3656out:
3657	ni.ni_cnd.cn_nameiop = 0;
3658
3659	if (dirp) {
3660		nfsm_srv_vattr_init(&dpostattr, nd->nd_vers);
3661		dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx);
3662		vnode_put(dirp);
3663		dirp = NULL;
3664	}
3665
3666nfsmerr:
3667	/* assemble reply */
3668	nd->nd_repstat = error;
3669	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_SRVFH(nd->nd_vers, &nfh) +
3670			NFSX_POSTOPATTR(nd->nd_vers) + NFSX_WCCDATA(nd->nd_vers));
3671	nfsmout_if(error);
3672	*mrepp = nmrep.nmc_mhead;
3673	nfsmout_on_status(nd, error);
3674	if (nd->nd_vers == NFS_VER3) {
3675		if (!nd->nd_repstat) {
3676			nfsm_chain_add_postop_fh(error, &nmrep, nfh.nfh_fhp, nfh.nfh_len);
3677			nfsm_chain_add_postop_attr(error, nd, &nmrep, postattrerr, &postattr);
3678		}
3679		nfsm_chain_add_wcc_data(error, nd, &nmrep,
3680			dpreattrerr, &dpreattr, dpostattrerr, &dpostattr);
3681	} else {
3682		nfsm_chain_add_fh(error, &nmrep, NFS_VER2, nfh.nfh_fhp, nfh.nfh_len);
3683		if (!error)
3684			error = nfsm_chain_add_fattr(nd, &nmrep, &postattr);
3685	}
3686nfsmout:
3687	nfsm_chain_build_done(error, &nmrep);
3688	if (ni.ni_cnd.cn_nameiop) {
3689	        /*
3690		 * nameidone has to happen before we vnode_put(dvp)
3691		 * since it may need to release the fs_nodelock on the dvp
3692		 */
3693		nameidone(&ni);
3694		vnode_put(dvp);
3695		if (vp)
3696			vnode_put(vp);
3697	}
3698	if (dirp)
3699		vnode_put(dirp);
3700	if (error) {
3701		nfsm_chain_cleanup(&nmrep);
3702		*mrepp = NULL;
3703	}
3704	return (error);
3705}
3706
3707/*
3708 * nfs rmdir service
3709 */
3710int
3711nfsrv_rmdir(
3712	struct nfsrv_descript *nd,
3713	struct nfsrv_sock *slp,
3714	vfs_context_t ctx,
3715	mbuf_t *mrepp)
3716{
3717	int error, dpreattrerr, dpostattrerr;
3718	uint32_t len = 0;
3719	uid_t saved_uid;
3720	vnode_t vp, dvp, dirp;
3721	struct vnode_attr dpreattr, dpostattr;
3722	struct nfs_filehandle nfh;
3723	struct nfs_export *nx = NULL;
3724	struct nfs_export_options *nxo;
3725	struct nameidata ni;
3726	struct nfsm_chain *nmreq, nmrep;
3727
3728	error = 0;
3729	dpreattrerr = dpostattrerr = ENOENT;
3730	saved_uid = kauth_cred_getuid(nd->nd_cr);
3731	nmreq = &nd->nd_nmreq;
3732	nfsm_chain_null(&nmrep);
3733
3734	vp = dvp = dirp = NULL;
3735
3736	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
3737	nfsm_chain_get_32(error, nmreq, len);
3738	nfsm_name_len_check(error, nd, len);
3739	nfsmerr_if(error);
3740
3741	ni.ni_cnd.cn_nameiop = DELETE;
3742#if CONFIG_TRIGGERS
3743	ni.ni_op = OP_UNLINK;
3744#endif
3745	ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
3746	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
3747	if (!error) {
3748		error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo);
3749		if (nx != NULL) {
3750			/* update export stats */
3751			NFSStatAdd64(&nx->nx_stats.ops, 1);
3752
3753			/* update active user stats */
3754			nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0);
3755		}
3756	}
3757	if (dirp) {
3758	        if (nd->nd_vers == NFS_VER3) {
3759			nfsm_srv_pre_vattr_init(&dpreattr);
3760			dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx);
3761		} else {
3762			vnode_put(dirp);
3763			dirp = NULL;
3764		}
3765	}
3766	nfsmerr_if(error);
3767
3768	dvp = ni.ni_dvp;
3769	vp = ni.ni_vp;
3770
3771	if (vnode_vtype(vp) != VDIR) {
3772		error = ENOTDIR;
3773		goto out;
3774	}
3775	/*
3776	 * No rmdir "." please.
3777	 */
3778	if (dvp == vp) {
3779		error = EINVAL;
3780		goto out;
3781	}
3782	/*
3783	 * The root of a mounted filesystem cannot be deleted.
3784	 */
3785	if (vnode_isvroot(vp))
3786		error = EBUSY;
3787	if (!error)
3788		error = nfsrv_authorize(vp, dvp, KAUTH_VNODE_DELETE, ctx, nxo, 0);
3789	if (!error) {
3790#if CONFIG_FSE
3791		char     *path = NULL;
3792		int       plen;
3793		fse_info  finfo;
3794
3795		if (nfsrv_fsevents_enabled && need_fsevent(FSE_DELETE, dvp)) {
3796			plen = MAXPATHLEN;
3797		        if ((path = get_pathbuff()) && !vn_getpath(vp, path, &plen)) {
3798				get_fse_info(vp, &finfo, ctx);
3799			} else if (path) {
3800				release_pathbuff(path);
3801				path = NULL;
3802			}
3803		}
3804#endif /* CONFIG_FSE */
3805
3806		error = VNOP_RMDIR(dvp, vp, &ni.ni_cnd, ctx);
3807
3808#if CONFIG_FSE
3809		if (path) {
3810			if (!error)
3811				add_fsevent(FSE_DELETE, ctx,
3812					FSE_ARG_STRING, plen, path,
3813					FSE_ARG_FINFO, &finfo,
3814					FSE_ARG_DONE);
3815		        release_pathbuff(path);
3816		}
3817#endif /* CONFIG_FSE */
3818	}
3819out:
3820	/*
3821	 * nameidone has to happen before we vnode_put(dvp)
3822	 * since it may need to release the fs_nodelock on the dvp
3823	 */
3824	nameidone(&ni);
3825
3826	vnode_put(dvp);
3827	vnode_put(vp);
3828
3829	if (dirp) {
3830		nfsm_srv_vattr_init(&dpostattr, nd->nd_vers);
3831		dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx);
3832		vnode_put(dirp);
3833		dirp = NULL;
3834	}
3835
3836nfsmerr:
3837	/* assemble reply */
3838	nd->nd_repstat = error;
3839	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_WCCDATA(nd->nd_vers));
3840	nfsmout_if(error);
3841	*mrepp = nmrep.nmc_mhead;
3842	nfsmout_on_status(nd, error);
3843	if (nd->nd_vers == NFS_VER3)
3844		nfsm_chain_add_wcc_data(error, nd, &nmrep,
3845			dpreattrerr, &dpreattr, dpostattrerr, &dpostattr);
3846nfsmout:
3847	nfsm_chain_build_done(error, &nmrep);
3848	if (dirp)
3849		vnode_put(dirp);
3850	if (error) {
3851		nfsm_chain_cleanup(&nmrep);
3852		*mrepp = NULL;
3853	}
3854	return (error);
3855}
3856
3857/*
3858 * nfs readdir service
3859 * - mallocs what it thinks is enough to read
3860 *	count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
3861 * - calls VNOP_READDIR()
3862 * - loops around building the reply
3863 *	if the output generated exceeds count break out of loop
3864 *	The nfsm_clget macro is used here so that the reply will be packed
3865 *	tightly in mbuf clusters.
3866 * - it only knows that it has encountered eof when the VNOP_READDIR()
3867 *	reads nothing
3868 * - as such one readdir rpc will return eof false although you are there
3869 *	and then the next will return eof
3870 * - it trims out records with d_fileno == 0
3871 *	this doesn't matter for Unix clients, but they might confuse clients
3872 *	for other os'.
3873 * NB: It is tempting to set eof to true if the VNOP_READDIR() reads less
3874 *	than requested, but this may not apply to all filesystems. For
3875 *	example, client NFS does not { although it is never remote mounted
3876 *	anyhow }
3877 *     The alternate call nfsrv_readdirplus() does lookups as well.
3878 * PS:  The XNFS protocol spec clearly describes what the "count"s arguments
3879 *      are supposed to cover.  For readdir, the count is the total number of
3880 *      bytes included in everything from the directory's postopattr through
3881 *      the EOF flag.  For readdirplus, the maxcount is the same, and the
3882 *      dircount includes all that except for the entry attributes and handles.
3883 */
3884int
3885nfsrv_readdir(
3886	struct nfsrv_descript *nd,
3887	struct nfsrv_sock *slp,
3888	vfs_context_t ctx,
3889	mbuf_t *mrepp)
3890{
3891	struct direntry *dp;
3892	char *cpos, *cend, *rbuf;
3893	vnode_t vp;
3894	struct vnode_attr attr;
3895	struct nfs_filehandle nfh;
3896	struct nfs_export *nx;
3897	struct nfs_export_options *nxo;
3898	uio_t auio = NULL;
3899	char uio_buf[ UIO_SIZEOF(1) ];
3900	int len, nlen, rem, xfer, error, attrerr;
3901	int siz, count, fullsiz, eofflag, nentries;
3902	u_quad_t off, toff, verf;
3903	int vnopflag;
3904	struct nfsm_chain *nmreq, nmrep;
3905
3906	error = 0;
3907	attrerr = ENOENT;
3908	count = nentries = 0;
3909	nmreq = &nd->nd_nmreq;
3910	nfsm_chain_null(&nmrep);
3911	rbuf = NULL;
3912	vp = NULL;
3913
3914	vnopflag = VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF;
3915
3916	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
3917	if (nd->nd_vers == NFS_VER3) {
3918		nfsm_chain_get_64(error, nmreq, toff);
3919		nfsm_chain_get_64(error, nmreq, verf);
3920	} else {
3921		nfsm_chain_get_32(error, nmreq, toff);
3922	}
3923	nfsm_chain_get_32(error, nmreq, count);
3924	nfsmerr_if(error);
3925
3926	off = toff;
3927	siz = ((count + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3928	xfer = NFSRV_NDMAXDATA(nd);
3929	if (siz > xfer)
3930		siz = xfer;
3931	fullsiz = siz;
3932
3933	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
3934	nfsmerr_if(error);
3935
3936	/* update export stats */
3937	NFSStatAdd64(&nx->nx_stats.ops, 1);
3938
3939	/* update active user stats */
3940	nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0);
3941
3942	error = nfsrv_credcheck(nd, ctx, nx, nxo);
3943	nfsmerr_if(error);
3944
3945	if (nxo->nxo_flags & NX_MANGLEDNAMES || nd->nd_vers == NFS_VER2)
3946		vnopflag |= VNODE_READDIR_NAMEMAX;
3947
3948	if ((nd->nd_vers == NFS_VER2) || (nxo->nxo_flags & NX_32BITCLIENTS))
3949		vnopflag |= VNODE_READDIR_SEEKOFF32;
3950
3951	if (nd->nd_vers == NFS_VER3) {
3952		nfsm_srv_vattr_init(&attr, NFS_VER3);
3953		error = attrerr = vnode_getattr(vp, &attr, ctx);
3954		if (!error && toff && verf && (verf != attr.va_filerev))
3955			error = NFSERR_BAD_COOKIE;
3956	}
3957	if (!error)
3958		error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_LIST_DIRECTORY, ctx, nxo, 0);
3959	nfsmerr_if(error);
3960
3961	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3962	if (rbuf)
3963		auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
3964				    &uio_buf[0], sizeof(uio_buf));
3965	if (!rbuf || !auio) {
3966		error = ENOMEM;
3967		goto nfsmerr;
3968	}
3969again:
3970	uio_reset(auio, off, UIO_SYSSPACE, UIO_READ);
3971	uio_addiov(auio, CAST_USER_ADDR_T(rbuf), fullsiz);
3972	eofflag = 0;
3973	error = VNOP_READDIR(vp, auio, vnopflag, &eofflag, &nentries, ctx);
3974	off = uio_offset(auio);
3975
3976	if (nd->nd_vers == NFS_VER3) {
3977		nfsm_srv_vattr_init(&attr, NFS_VER3);
3978		attrerr = vnode_getattr(vp, &attr, ctx);
3979	}
3980	nfsmerr_if(error);
3981
3982	if (uio_resid(auio) != 0) {
3983		siz -= uio_resid(auio);
3984
3985		/* If nothing read, return empty reply with eof set */
3986		if (siz == 0) {
3987			vnode_put(vp);
3988			vp = NULL;
3989			FREE(rbuf, M_TEMP);
3990			/* assemble reply */
3991			nd->nd_repstat = error;
3992			error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers) +
3993					NFSX_COOKIEVERF(nd->nd_vers) + 2 * NFSX_UNSIGNED);
3994			nfsmout_if(error);
3995			*mrepp = nmrep.nmc_mhead;
3996			nfsmout_on_status(nd, error);
3997			if (nd->nd_vers == NFS_VER3) {
3998				nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr);
3999				nfsm_chain_add_64(error, &nmrep, attr.va_filerev);
4000			}
4001			nfsm_chain_add_32(error, &nmrep, FALSE);
4002			nfsm_chain_add_32(error, &nmrep, TRUE);
4003			nfsm_chain_build_done(error, &nmrep);
4004			return (error);
4005		}
4006	}
4007
4008	/*
4009	 * Check for degenerate cases of nothing useful read.
4010	 * If so go try again
4011	 */
4012	cpos = rbuf;
4013	cend = rbuf + siz;
4014	dp = (struct direntry *)cpos;
4015	while ((dp->d_fileno == 0) && (cpos < cend) && (nentries > 0)) {
4016		cpos += dp->d_reclen;
4017		dp = (struct direntry *)cpos;
4018		nentries--;
4019	}
4020	if ((cpos >= cend) || (nentries == 0)) {
4021		toff = off;
4022		siz = fullsiz;
4023		goto again;
4024	}
4025
4026	vnode_put(vp);
4027	vp = NULL;
4028
4029	/* assemble reply */
4030	nd->nd_repstat = error;
4031	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers) +
4032			NFSX_COOKIEVERF(nd->nd_vers) + siz);
4033	nfsmout_if(error);
4034	*mrepp = nmrep.nmc_mhead;
4035	nfsmout_on_status(nd, error);
4036	nmrep.nmc_flags |= NFSM_CHAIN_FLAG_ADD_CLUSTERS;
4037
4038	len = 2 * NFSX_UNSIGNED;
4039	if (nd->nd_vers == NFS_VER3) {
4040		len += NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF;
4041		nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr);
4042		nfsm_chain_add_64(error, &nmrep, attr.va_filerev);
4043		nfsmerr_if(error);
4044	}
4045
4046	/* Loop through the records and build reply */
4047	while ((cpos < cend) && (nentries > 0)) {
4048		if (dp->d_fileno != 0) {
4049			nlen = dp->d_namlen;
4050			if ((nd->nd_vers == NFS_VER2) && (nlen > NFS_MAXNAMLEN))
4051				nlen = NFS_MAXNAMLEN;
4052			rem = nfsm_rndup(nlen)-nlen;
4053			len += (4 * NFSX_UNSIGNED + nlen + rem);
4054			if (nd->nd_vers == NFS_VER3)
4055				len += 2 * NFSX_UNSIGNED;
4056			if (len > count) {
4057				eofflag = 0;
4058				break;
4059			}
4060			/* Build the directory record xdr from the direntry. */
4061			nfsm_chain_add_32(error, &nmrep, TRUE);
4062			if (nd->nd_vers == NFS_VER3) {
4063				nfsm_chain_add_64(error, &nmrep, dp->d_fileno);
4064			} else {
4065				nfsm_chain_add_32(error, &nmrep, dp->d_fileno);
4066			}
4067			nfsm_chain_add_string(error, &nmrep, dp->d_name, nlen);
4068			if (nd->nd_vers == NFS_VER3) {
4069				if (vnopflag & VNODE_READDIR_SEEKOFF32)
4070					dp->d_seekoff &= 0x00000000ffffffffULL;
4071				nfsm_chain_add_64(error, &nmrep, dp->d_seekoff);
4072			} else {
4073				nfsm_chain_add_32(error, &nmrep, dp->d_seekoff);
4074			}
4075			nfsmerr_if(error);
4076		}
4077		cpos += dp->d_reclen;
4078		dp = (struct direntry *)cpos;
4079		nentries--;
4080	}
4081	nfsm_chain_add_32(error, &nmrep, FALSE);
4082	nfsm_chain_add_32(error, &nmrep, eofflag ? TRUE : FALSE);
4083	FREE(rbuf, M_TEMP);
4084	goto nfsmout;
4085nfsmerr:
4086	if (rbuf)
4087		FREE(rbuf, M_TEMP);
4088	if (vp)
4089		vnode_put(vp);
4090	nd->nd_repstat = error;
4091	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers));
4092	nfsmout_if(error);
4093	*mrepp = nmrep.nmc_mhead;
4094	nfsmout_on_status(nd, error);
4095	if (nd->nd_vers == NFS_VER3)
4096		nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr);
4097nfsmout:
4098	nfsm_chain_build_done(error, &nmrep);
4099	if (error) {
4100		nfsm_chain_cleanup(&nmrep);
4101		*mrepp = NULL;
4102	}
4103	return (error);
4104}
4105
4106int
4107nfsrv_readdirplus(
4108	struct nfsrv_descript *nd,
4109	struct nfsrv_sock *slp,
4110	vfs_context_t ctx,
4111	mbuf_t *mrepp)
4112{
4113	struct direntry *dp;
4114	char *cpos, *cend, *rbuf;
4115	vnode_t vp, nvp;
4116	struct nfs_filehandle dnfh, nfh;
4117	struct nfs_export *nx;
4118	struct nfs_export_options *nxo;
4119	uio_t auio = NULL;
4120	char uio_buf[ UIO_SIZEOF(1) ];
4121	struct vnode_attr attr, va, *vap = &va;
4122	int len, nlen, rem, xfer, error, attrerr, gotfh, gotattr;
4123	int siz, dircount, maxcount, fullsiz, eofflag, dirlen, nentries, isdotdot;
4124	u_quad_t off, toff, verf;
4125	int vnopflag;
4126	struct nfsm_chain *nmreq, nmrep;
4127
4128	error = 0;
4129	attrerr = ENOENT;
4130	nentries = 0;
4131	nmreq = &nd->nd_nmreq;
4132	nfsm_chain_null(&nmrep);
4133	rbuf = NULL;
4134	vp = NULL;
4135	dircount = maxcount = 0;
4136
4137	vnopflag = VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF;
4138
4139	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, dnfh.nfh_fhp, dnfh.nfh_len);
4140	nfsm_chain_get_64(error, nmreq, toff);
4141	nfsm_chain_get_64(error, nmreq, verf);
4142	nfsm_chain_get_32(error, nmreq, dircount);
4143	nfsm_chain_get_32(error, nmreq, maxcount);
4144	nfsmerr_if(error);
4145
4146	off = toff;
4147	xfer = NFSRV_NDMAXDATA(nd);
4148	dircount = ((dircount + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
4149	if (dircount > xfer)
4150		dircount = xfer;
4151	fullsiz = siz = dircount;
4152	maxcount = ((maxcount + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
4153	if (maxcount > xfer)
4154		maxcount = xfer;
4155
4156	error = nfsrv_fhtovp(&dnfh, nd, &vp, &nx, &nxo);
4157	nfsmerr_if(error);
4158
4159	/* update export stats */
4160	NFSStatAdd64(&nx->nx_stats.ops, 1);
4161
4162	/* update active user stats */
4163	nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0);
4164
4165	error = nfsrv_credcheck(nd, ctx, nx, nxo);
4166	nfsmerr_if(error);
4167
4168	if (nxo->nxo_flags & NX_32BITCLIENTS)
4169		vnopflag |= VNODE_READDIR_SEEKOFF32;
4170
4171	if (nxo->nxo_flags & NX_MANGLEDNAMES)
4172		vnopflag |= VNODE_READDIR_NAMEMAX;
4173
4174	nfsm_srv_vattr_init(&attr, NFS_VER3);
4175	error = attrerr = vnode_getattr(vp, &attr, ctx);
4176	if (!error && toff && verf && (verf != attr.va_filerev))
4177		error = NFSERR_BAD_COOKIE;
4178	if (!error)
4179		error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_LIST_DIRECTORY, ctx, nxo, 0);
4180	nfsmerr_if(error);
4181
4182	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
4183	if (rbuf)
4184		auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
4185				    &uio_buf[0], sizeof(uio_buf));
4186	if (!rbuf || !auio) {
4187		error = ENOMEM;
4188		goto nfsmerr;
4189	}
4190
4191again:
4192	uio_reset(auio, off, UIO_SYSSPACE, UIO_READ);
4193	uio_addiov(auio, CAST_USER_ADDR_T(rbuf), fullsiz);
4194	eofflag = 0;
4195	error = VNOP_READDIR(vp, auio, vnopflag, &eofflag, &nentries, ctx);
4196	off = uio_offset(auio);
4197	nfsm_srv_vattr_init(&attr, NFS_VER3);
4198	attrerr = vnode_getattr(vp, &attr, ctx);
4199	nfsmerr_if(error);
4200
4201	if (uio_resid(auio) != 0) {
4202		siz -= uio_resid(auio);
4203
4204		/* If nothing read, return empty reply with eof set */
4205		if (siz == 0) {
4206			vnode_put(vp);
4207			vp = NULL;
4208			FREE(rbuf, M_TEMP);
4209			/* assemble reply */
4210			nd->nd_repstat = error;
4211			error = nfsrv_rephead(nd, slp, &nmrep, NFSX_V3POSTOPATTR +
4212					NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED);
4213			nfsmout_if(error);
4214			*mrepp = nmrep.nmc_mhead;
4215			nfsmout_on_status(nd, error);
4216			nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr);
4217			nfsm_chain_add_64(error, &nmrep, attr.va_filerev);
4218			nfsm_chain_add_32(error, &nmrep, FALSE);
4219			nfsm_chain_add_32(error, &nmrep, TRUE);
4220			nfsm_chain_build_done(error, &nmrep);
4221			return (error);
4222		}
4223	}
4224
4225	/*
4226	 * Check for degenerate cases of nothing useful read.
4227	 * If so go try again
4228	 */
4229	cpos = rbuf;
4230	cend = rbuf + siz;
4231	dp = (struct direntry *)cpos;
4232	while ((dp->d_fileno == 0) && (cpos < cend) && (nentries > 0)) {
4233		cpos += dp->d_reclen;
4234		dp = (struct direntry *)cpos;
4235		nentries--;
4236	}
4237	if ((cpos >= cend) || (nentries == 0)) {
4238		toff = off;
4239		siz = fullsiz;
4240		goto again;
4241	}
4242
4243	/*
4244	 * Probe one of the directory entries to see if the filesystem
4245	 * supports VGET.
4246	 */
4247	if ((error = VFS_VGET(vnode_mount(vp), (ino64_t)dp->d_fileno, &nvp, ctx))) {
4248		if (error == ENOTSUP) /* let others get passed back */
4249			error = NFSERR_NOTSUPP;
4250		goto nfsmerr;
4251	}
4252	vnode_put(nvp);
4253
4254	/* assemble reply */
4255	nd->nd_repstat = error;
4256	error = nfsrv_rephead(nd, slp, &nmrep, maxcount);
4257	nfsmout_if(error);
4258	*mrepp = nmrep.nmc_mhead;
4259	nfsmout_on_status(nd, error);
4260	nmrep.nmc_flags |= NFSM_CHAIN_FLAG_ADD_CLUSTERS;
4261
4262	dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED;
4263	nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr);
4264	nfsm_chain_add_64(error, &nmrep, attr.va_filerev);
4265	nfsmerr_if(error);
4266
4267	/* Loop through the records and build reply */
4268	while ((cpos < cend) && (nentries > 0)) {
4269		if (dp->d_fileno != 0) {
4270			nlen = dp->d_namlen;
4271			rem = nfsm_rndup(nlen)-nlen;
4272			gotfh = gotattr = 1;
4273
4274			/* Got to get the vnode for lookup per entry. */
4275			if (VFS_VGET(vnode_mount(vp), (ino64_t)dp->d_fileno, &nvp, ctx)) {
4276				/* Can't get the vnode... so no fh or attrs */
4277				gotfh = gotattr = 0;
4278			} else {
4279				isdotdot = ((dp->d_namlen == 2) &&
4280					    (dp->d_name[0] == '.') && (dp->d_name[1] == '.'));
4281				if (nfsrv_vptofh(nx, 0, (isdotdot ? &dnfh : NULL), nvp, ctx, &nfh))
4282					gotfh = 0;
4283				nfsm_srv_vattr_init(vap, NFS_VER3);
4284				if (vnode_getattr(nvp, vap, ctx))
4285					gotattr = 0;
4286				vnode_put(nvp);
4287			}
4288
4289			/*
4290			 * If either the dircount or maxcount will be
4291			 * exceeded, get out now. Both of these lengths
4292			 * are calculated conservatively, including all
4293			 * XDR overheads.
4294			 */
4295			len += 8 * NFSX_UNSIGNED + nlen + rem;
4296			if (gotattr)
4297				len += NFSX_V3FATTR;
4298			if (gotfh)
4299				len += NFSX_UNSIGNED + nfsm_rndup(nfh.nfh_len);
4300			dirlen += 6 * NFSX_UNSIGNED + nlen + rem;
4301			if ((len > maxcount) || (dirlen > dircount)) {
4302				eofflag = 0;
4303				break;
4304			}
4305
4306			/* Build the directory record xdr from the direntry. */
4307			nfsm_chain_add_32(error, &nmrep, TRUE);
4308			nfsm_chain_add_64(error, &nmrep, dp->d_fileno);
4309			nfsm_chain_add_string(error, &nmrep, dp->d_name, nlen);
4310			if (vnopflag & VNODE_READDIR_SEEKOFF32)
4311				dp->d_seekoff &= 0x00000000ffffffffULL;
4312			nfsm_chain_add_64(error, &nmrep, dp->d_seekoff);
4313			nfsm_chain_add_postop_attr(error, nd, &nmrep, (gotattr ? 0 : ENOENT), vap);
4314			if (gotfh)
4315				nfsm_chain_add_postop_fh(error, &nmrep, nfh.nfh_fhp, nfh.nfh_len);
4316			else
4317				nfsm_chain_add_32(error, &nmrep, FALSE);
4318			nfsmerr_if(error);
4319		}
4320		cpos += dp->d_reclen;
4321		dp = (struct direntry *)cpos;
4322		nentries--;
4323	}
4324	vnode_put(vp);
4325	vp = NULL;
4326	nfsm_chain_add_32(error, &nmrep, FALSE);
4327	nfsm_chain_add_32(error, &nmrep, eofflag ? TRUE : FALSE);
4328	FREE(rbuf, M_TEMP);
4329	goto nfsmout;
4330nfsmerr:
4331	if (rbuf)
4332		FREE(rbuf, M_TEMP);
4333	nd->nd_repstat = error;
4334	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_V3POSTOPATTR);
4335	nfsmout_if(error);
4336	*mrepp = nmrep.nmc_mhead;
4337	nfsmout_on_status(nd, error);
4338	nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr);
4339nfsmout:
4340	nfsm_chain_build_done(error, &nmrep);
4341	if (vp)
4342		vnode_put(vp);
4343	if (error) {
4344		nfsm_chain_cleanup(&nmrep);
4345		*mrepp = NULL;
4346	}
4347	return (error);
4348}
4349
4350/*
4351 * nfs commit service
4352 */
4353int
4354nfsrv_commit(
4355	struct nfsrv_descript *nd,
4356	struct nfsrv_sock *slp,
4357	vfs_context_t ctx,
4358	mbuf_t *mrepp)
4359{
4360	vnode_t vp;
4361	struct nfs_filehandle nfh;
4362	struct nfs_export *nx;
4363	struct nfs_export_options *nxo;
4364	int error, preattrerr, postattrerr, count;
4365	struct vnode_attr preattr, postattr;
4366	u_quad_t off;
4367	struct nfsm_chain *nmreq, nmrep;
4368
4369	error = 0;
4370	preattrerr = postattrerr = ENOENT;
4371	nmreq = &nd->nd_nmreq;
4372	nfsm_chain_null(&nmrep);
4373	vp = NULL;
4374
4375	/*
4376	 * XXX At this time VNOP_FSYNC() does not accept offset and byte
4377	 * count parameters, so those arguments are useless (someday maybe).
4378	 */
4379
4380	nfsm_chain_get_fh_ptr(error, nmreq, NFS_VER3, nfh.nfh_fhp, nfh.nfh_len);
4381	nfsm_chain_get_64(error, nmreq, off);
4382	nfsm_chain_get_32(error, nmreq, count);
4383	nfsmerr_if(error);
4384
4385	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
4386	nfsmerr_if(error);
4387
4388	/* update export stats */
4389	NFSStatAdd64(&nx->nx_stats.ops, 1);
4390
4391	/* update active user stats */
4392	nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0);
4393
4394	error = nfsrv_credcheck(nd, ctx, nx, nxo);
4395	nfsmerr_if(error);
4396
4397	nfsm_srv_pre_vattr_init(&preattr);
4398	preattrerr = vnode_getattr(vp, &preattr, ctx);
4399
4400	error = VNOP_FSYNC(vp, MNT_WAIT, ctx);
4401
4402	nfsm_srv_vattr_init(&postattr, 1);
4403	postattrerr = vnode_getattr(vp, &postattr, ctx);
4404
4405nfsmerr:
4406	if (vp)
4407		vnode_put(vp);
4408
4409	/* assemble reply */
4410	nd->nd_repstat = error;
4411	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
4412	nfsmout_if(error);
4413	*mrepp = nmrep.nmc_mhead;
4414	nfsmout_on_status(nd, error);
4415	nfsm_chain_add_wcc_data(error, nd, &nmrep,
4416		preattrerr, &preattr, postattrerr, &postattr);
4417	if (!nd->nd_repstat) {
4418		nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_sec);
4419		nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_usec);
4420	}
4421nfsmout:
4422	nfsm_chain_build_done(error, &nmrep);
4423	if (error) {
4424		nfsm_chain_cleanup(&nmrep);
4425		*mrepp = NULL;
4426	}
4427	return (error);
4428}
4429
4430/*
4431 * nfs statfs service
4432 */
4433int
4434nfsrv_statfs(
4435	struct nfsrv_descript *nd,
4436	struct nfsrv_sock *slp,
4437	vfs_context_t ctx,
4438	mbuf_t *mrepp)
4439{
4440	struct vfs_attr va;
4441	int error, attrerr;
4442	vnode_t vp;
4443	struct vnode_attr attr;
4444	struct nfs_filehandle nfh;
4445	struct nfs_export *nx;
4446	struct nfs_export_options *nxo;
4447	off_t blksize;
4448	struct nfsm_chain *nmreq, nmrep;
4449
4450	error = 0;
4451	attrerr = ENOENT;
4452	nmreq = &nd->nd_nmreq;
4453	nfsm_chain_null(&nmrep);
4454	vp = NULL;
4455	blksize = 512;
4456
4457	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
4458	nfsmerr_if(error);
4459	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
4460	nfsmerr_if(error);
4461
4462	/* update export stats */
4463	NFSStatAdd64(&nx->nx_stats.ops, 1);
4464
4465	/* update active user stats */
4466	nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0);
4467
4468	error = nfsrv_credcheck(nd, ctx, nx, nxo);
4469	nfsmerr_if(error);
4470
4471	VFSATTR_INIT(&va);
4472	VFSATTR_WANTED(&va, f_blocks);
4473	VFSATTR_WANTED(&va, f_bavail);
4474	VFSATTR_WANTED(&va, f_files);
4475	VFSATTR_WANTED(&va, f_ffree);
4476	error = vfs_getattr(vnode_mount(vp), &va, ctx);
4477	blksize = vnode_mount(vp)->mnt_vfsstat.f_bsize;
4478
4479	if (nd->nd_vers == NFS_VER3) {
4480		nfsm_srv_vattr_init(&attr, nd->nd_vers);
4481		attrerr = vnode_getattr(vp, &attr, ctx);
4482	}
4483
4484nfsmerr:
4485	if (vp)
4486		vnode_put(vp);
4487
4488	/* assemble reply */
4489	nd->nd_repstat = error;
4490	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers) + NFSX_STATFS(nd->nd_vers));
4491	nfsmout_if(error);
4492	*mrepp = nmrep.nmc_mhead;
4493	nfsmout_on_status(nd, error);
4494	if (nd->nd_vers == NFS_VER3)
4495		nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr);
4496	nfsmout_if(nd->nd_repstat);
4497
4498	if (nd->nd_vers == NFS_VER3) {
4499		nfsm_chain_add_64(error, &nmrep, va.f_blocks * blksize);
4500		nfsm_chain_add_64(error, &nmrep, va.f_bfree * blksize);
4501		nfsm_chain_add_64(error, &nmrep, va.f_bavail * blksize);
4502		nfsm_chain_add_64(error, &nmrep, va.f_files);
4503		nfsm_chain_add_64(error, &nmrep, va.f_ffree);
4504		nfsm_chain_add_64(error, &nmrep, va.f_ffree);
4505		nfsm_chain_add_32(error, &nmrep, 0); /* invarsec */
4506	} else {
4507		nfsm_chain_add_32(error, &nmrep, NFS_V2MAXDATA);
4508		nfsm_chain_add_32(error, &nmrep, blksize);
4509		nfsm_chain_add_32(error, &nmrep, va.f_blocks);
4510		nfsm_chain_add_32(error, &nmrep, va.f_bfree);
4511		nfsm_chain_add_32(error, &nmrep, va.f_bavail);
4512	}
4513nfsmout:
4514	nfsm_chain_build_done(error, &nmrep);
4515	if (error) {
4516		nfsm_chain_cleanup(&nmrep);
4517		*mrepp = NULL;
4518	}
4519	return (error);
4520}
4521
4522/*
4523 * nfs fsinfo service
4524 */
4525int
4526nfsrv_fsinfo(
4527	struct nfsrv_descript *nd,
4528	struct nfsrv_sock *slp,
4529	vfs_context_t ctx,
4530	mbuf_t *mrepp)
4531{
4532	int error, attrerr, prefsize, maxsize;
4533	vnode_t vp;
4534	struct vnode_attr attr;
4535	struct nfs_filehandle nfh;
4536	struct nfs_export *nx;
4537	struct nfs_export_options *nxo;
4538	struct nfsm_chain *nmreq, nmrep;
4539
4540	error = 0;
4541	attrerr = ENOENT;
4542	nmreq = &nd->nd_nmreq;
4543	nfsm_chain_null(&nmrep);
4544	vp = NULL;
4545
4546	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
4547	nfsmerr_if(error);
4548	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
4549	nfsmerr_if(error);
4550
4551	/* update export stats */
4552	NFSStatAdd64(&nx->nx_stats.ops, 1);
4553
4554	/* update active user stats */
4555	nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0);
4556
4557	error = nfsrv_credcheck(nd, ctx, nx, nxo);
4558	nfsmerr_if(error);
4559
4560	nfsm_srv_vattr_init(&attr, NFS_VER3);
4561	attrerr = vnode_getattr(vp, &attr, ctx);
4562
4563nfsmerr:
4564	if (vp)
4565		vnode_put(vp);
4566
4567	/* assemble reply */
4568	nd->nd_repstat = error;
4569	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
4570	nfsmout_if(error);
4571	*mrepp = nmrep.nmc_mhead;
4572	nfsmout_on_status(nd, error);
4573	nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr);
4574	nfsmout_if(nd->nd_repstat);
4575
4576	/*
4577	 * XXX There should be file system VFS OP(s) to get this information.
4578	 * For now, assume our usual NFS defaults.
4579	 */
4580	if (slp->ns_sotype == SOCK_DGRAM) {
4581		maxsize = NFS_MAXDGRAMDATA;
4582		prefsize = NFS_PREFDGRAMDATA;
4583	} else
4584		maxsize = prefsize = NFSRV_MAXDATA;
4585
4586	nfsm_chain_add_32(error, &nmrep, maxsize);
4587	nfsm_chain_add_32(error, &nmrep, prefsize);
4588	nfsm_chain_add_32(error, &nmrep, NFS_FABLKSIZE);
4589	nfsm_chain_add_32(error, &nmrep, maxsize);
4590	nfsm_chain_add_32(error, &nmrep, prefsize);
4591	nfsm_chain_add_32(error, &nmrep, NFS_FABLKSIZE);
4592	nfsm_chain_add_32(error, &nmrep, prefsize);
4593	nfsm_chain_add_64(error, &nmrep, 0xffffffffffffffffULL);
4594	nfsm_chain_add_32(error, &nmrep, 0);
4595	nfsm_chain_add_32(error, &nmrep, 1);
4596	/* XXX link/symlink support should be taken from volume capabilities */
4597	nfsm_chain_add_32(error, &nmrep,
4598		NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK |
4599		NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME);
4600
4601nfsmout:
4602	nfsm_chain_build_done(error, &nmrep);
4603	if (error) {
4604		nfsm_chain_cleanup(&nmrep);
4605		*mrepp = NULL;
4606	}
4607	return (error);
4608}
4609
4610/*
4611 * nfs pathconf service
4612 */
4613int
4614nfsrv_pathconf(
4615	struct nfsrv_descript *nd,
4616	struct nfsrv_sock *slp,
4617	vfs_context_t ctx,
4618	mbuf_t *mrepp)
4619{
4620	int error, attrerr, linkmax, namemax;
4621	int chownres, notrunc, case_sensitive, case_preserving;
4622	vnode_t vp;
4623	struct vnode_attr attr;
4624	struct nfs_filehandle nfh;
4625	struct nfs_export *nx;
4626	struct nfs_export_options *nxo;
4627	struct nfsm_chain *nmreq, nmrep;
4628
4629	error = 0;
4630	attrerr = ENOENT;
4631	nmreq = &nd->nd_nmreq;
4632	nfsm_chain_null(&nmrep);
4633	vp = NULL;
4634
4635	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
4636	nfsmerr_if(error);
4637	error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo);
4638	nfsmerr_if(error);
4639
4640	/* update export stats */
4641	NFSStatAdd64(&nx->nx_stats.ops, 1);
4642
4643	/* update active user stats */
4644	nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0);
4645
4646	error = nfsrv_credcheck(nd, ctx, nx, nxo);
4647	nfsmerr_if(error);
4648
4649	error = VNOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax, ctx);
4650	if (!error)
4651		error = VNOP_PATHCONF(vp, _PC_NAME_MAX, &namemax, ctx);
4652	if (!error)
4653		error = VNOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres, ctx);
4654	if (!error)
4655		error = VNOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc, ctx);
4656	if (!error)
4657		error = VNOP_PATHCONF(vp, _PC_CASE_SENSITIVE, &case_sensitive, ctx);
4658	if (!error)
4659		error = VNOP_PATHCONF(vp, _PC_CASE_PRESERVING, &case_preserving, ctx);
4660
4661	nfsm_srv_vattr_init(&attr, NFS_VER3);
4662	attrerr = vnode_getattr(vp, &attr, ctx);
4663
4664nfsmerr:
4665	if (vp)
4666		vnode_put(vp);
4667
4668	/* assemble reply */
4669	nd->nd_repstat = error;
4670	error = nfsrv_rephead(nd, slp, &nmrep, NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
4671	nfsmout_if(error);
4672	*mrepp = nmrep.nmc_mhead;
4673	nfsmout_on_status(nd, error);
4674	nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr);
4675	nfsmout_if(nd->nd_repstat);
4676
4677	nfsm_chain_add_32(error, &nmrep, linkmax);
4678	nfsm_chain_add_32(error, &nmrep, namemax);
4679	nfsm_chain_add_32(error, &nmrep, notrunc);
4680	nfsm_chain_add_32(error, &nmrep, chownres);
4681	nfsm_chain_add_32(error, &nmrep, !case_sensitive);
4682	nfsm_chain_add_32(error, &nmrep, case_preserving);
4683
4684nfsmout:
4685	nfsm_chain_build_done(error, &nmrep);
4686	if (error) {
4687		nfsm_chain_cleanup(&nmrep);
4688		*mrepp = NULL;
4689	}
4690	return (error);
4691}
4692
4693/*
4694 * Null operation, used by clients to ping server
4695 */
4696/* ARGSUSED */
4697int
4698nfsrv_null(
4699	struct nfsrv_descript *nd,
4700	struct nfsrv_sock *slp,
4701	__unused vfs_context_t ctx,
4702	mbuf_t *mrepp)
4703{
4704	int error = NFSERR_RETVOID;
4705	struct nfsm_chain nmrep;
4706
4707	/*
4708	 * RPCSEC_GSS context setup ?
4709	 */
4710	if (nd->nd_gss_context)
4711		return(nfs_gss_svc_ctx_init(nd, slp, mrepp));
4712
4713	nfsm_chain_null(&nmrep);
4714
4715	/* assemble reply */
4716	nd->nd_repstat = error;
4717	error = nfsrv_rephead(nd, slp, &nmrep, 0);
4718	nfsmout_if(error);
4719	*mrepp = nmrep.nmc_mhead;
4720nfsmout:
4721	nfsm_chain_build_done(error, &nmrep);
4722	if (error) {
4723		nfsm_chain_cleanup(&nmrep);
4724		*mrepp = NULL;
4725	}
4726	return (error);
4727}
4728
4729/*
4730 * No operation, used for obsolete procedures
4731 */
4732/* ARGSUSED */
4733int
4734nfsrv_noop(
4735	struct nfsrv_descript *nd,
4736	struct nfsrv_sock *slp,
4737	__unused vfs_context_t ctx,
4738	mbuf_t *mrepp)
4739{
4740	int error;
4741	struct nfsm_chain nmrep;
4742
4743	nfsm_chain_null(&nmrep);
4744
4745	if (nd->nd_repstat)
4746		error = nd->nd_repstat;
4747	else
4748		error = EPROCUNAVAIL;
4749
4750	/* assemble reply */
4751	nd->nd_repstat = error;
4752	error = nfsrv_rephead(nd, slp, &nmrep, 0);
4753	nfsmout_if(error);
4754	*mrepp = nmrep.nmc_mhead;
4755nfsmout:
4756	nfsm_chain_build_done(error, &nmrep);
4757	if (error) {
4758		nfsm_chain_cleanup(&nmrep);
4759		*mrepp = NULL;
4760	}
4761	return (error);
4762}
4763
4764int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
4765				    struct nfsrv_sock *slp,
4766				    vfs_context_t ctx,
4767				    mbuf_t *mrepp) = {
4768	nfsrv_null,
4769	nfsrv_getattr,
4770	nfsrv_setattr,
4771	nfsrv_lookup,
4772	nfsrv_access,
4773	nfsrv_readlink,
4774	nfsrv_read,
4775	nfsrv_write,
4776	nfsrv_create,
4777	nfsrv_mkdir,
4778	nfsrv_symlink,
4779	nfsrv_mknod,
4780	nfsrv_remove,
4781	nfsrv_rmdir,
4782	nfsrv_rename,
4783	nfsrv_link,
4784	nfsrv_readdir,
4785	nfsrv_readdirplus,
4786	nfsrv_statfs,
4787	nfsrv_fsinfo,
4788	nfsrv_pathconf,
4789	nfsrv_commit,
4790	nfsrv_noop
4791};
4792
4793/*
4794 * Perform access checking for vnodes obtained from file handles that would
4795 * refer to files already opened by a Unix client. You cannot just use
4796 * vnode_authorize() for two reasons.
4797 * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
4798 * 2 - The owner is to be given access irrespective of mode bits so that
4799 *     processes that chmod after opening a file don't break. I don't like
4800 *     this because it opens a security hole, but since the nfs server opens
4801 *     a security hole the size of a barn door anyhow, what the heck.
4802 *
4803 * The exception to rule 2 is EPERM. If a file is IMMUTABLE, vnode_authorize()
4804 * will return EPERM instead of EACCESS. EPERM is always an error.
4805 */
4806
4807int
4808nfsrv_authorize(
4809	vnode_t vp,
4810	vnode_t dvp,
4811	kauth_action_t action,
4812	vfs_context_t ctx,
4813	struct nfs_export_options *nxo,
4814	int override)
4815{
4816	struct vnode_attr vattr;
4817	int error;
4818
4819	if (action & KAUTH_VNODE_WRITE_RIGHTS) {
4820		/*
4821		 * Disallow write attempts on read-only exports;
4822		 * unless the file is a socket or a block or character
4823		 * device resident on the file system.
4824		 */
4825		if (nxo->nxo_flags & NX_READONLY) {
4826			switch (vnode_vtype(vp)) {
4827			case VREG: case VDIR: case VLNK: case VCPLX:
4828				return (EROFS);
4829			default:
4830				break;
4831			}
4832		}
4833	}
4834	error = vnode_authorize(vp, dvp, action, ctx);
4835	/*
4836	 * Allow certain operations for the owner (reads and writes
4837	 * on files that are already open). Picking up from FreeBSD.
4838	 */
4839	if (override && (error == EACCES)) {
4840		VATTR_INIT(&vattr);
4841		VATTR_WANTED(&vattr, va_uid);
4842		if ((vnode_getattr(vp, &vattr, ctx) == 0) &&
4843		    (kauth_cred_getuid(vfs_context_ucred(ctx)) == vattr.va_uid))
4844			error = 0;
4845	}
4846	return error;
4847}
4848
4849#endif /* NFSSERVER */
4850
4851