1/*
2 * Copyright (c) 2006-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * vnode op calls for NFS version 4
31 */
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/systm.h>
35#include <sys/resourcevar.h>
36#include <sys/proc_internal.h>
37#include <sys/kauth.h>
38#include <sys/mount_internal.h>
39#include <sys/malloc.h>
40#include <sys/kpi_mbuf.h>
41#include <sys/conf.h>
42#include <sys/vnode_internal.h>
43#include <sys/dirent.h>
44#include <sys/fcntl.h>
45#include <sys/lockf.h>
46#include <sys/ubc_internal.h>
47#include <sys/attr.h>
48#include <sys/signalvar.h>
49#include <sys/uio_internal.h>
50#include <sys/xattr.h>
51#include <sys/paths.h>
52
53#include <vfs/vfs_support.h>
54
55#include <sys/vm.h>
56
57#include <sys/time.h>
58#include <kern/clock.h>
59#include <libkern/OSAtomic.h>
60
61#include <miscfs/fifofs/fifo.h>
62#include <miscfs/specfs/specdev.h>
63
64#include <nfs/rpcv2.h>
65#include <nfs/nfsproto.h>
66#include <nfs/nfs.h>
67#include <nfs/nfsnode.h>
68#include <nfs/nfs_gss.h>
69#include <nfs/nfsmount.h>
70#include <nfs/nfs_lock.h>
71#include <nfs/xdr_subs.h>
72#include <nfs/nfsm_subs.h>
73
74#include <net/if.h>
75#include <netinet/in.h>
76#include <netinet/in_var.h>
77#include <vm/vm_kern.h>
78
79#include <kern/task.h>
80#include <kern/sched_prim.h>
81
82int
83nfs4_access_rpc(nfsnode_t np, u_int32_t *access, int rpcflags, vfs_context_t ctx)
84{
85	int error = 0, lockerror = ENOENT, status, numops, slot;
86	u_int64_t xid;
87	struct nfsm_chain nmreq, nmrep;
88	struct timeval now;
89	uint32_t access_result = 0, supported = 0, missing;
90	struct nfsmount *nmp = NFSTONMP(np);
91	int nfsvers = nmp->nm_vers;
92	uid_t uid;
93	struct nfsreq_secinfo_args si;
94
95	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
96		return (0);
97
98	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
99	nfsm_chain_null(&nmreq);
100	nfsm_chain_null(&nmrep);
101
102	// PUTFH, ACCESS, GETATTR
103	numops = 3;
104	nfsm_chain_build_alloc_init(error, &nmreq, 17 * NFSX_UNSIGNED);
105	nfsm_chain_add_compound_header(error, &nmreq, "access", numops);
106	numops--;
107	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
108	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
109	numops--;
110	nfsm_chain_add_32(error, &nmreq, NFS_OP_ACCESS);
111	nfsm_chain_add_32(error, &nmreq, *access);
112	numops--;
113	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
114	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
115	nfsm_chain_build_done(error, &nmreq);
116	nfsm_assert(error, (numops == 0), EPROTO);
117	nfsmout_if(error);
118	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND,
119		vfs_context_thread(ctx), vfs_context_ucred(ctx),
120		&si, rpcflags, &nmrep, &xid, &status);
121
122	if ((lockerror = nfs_node_lock(np)))
123		error = lockerror;
124	nfsm_chain_skip_tag(error, &nmrep);
125	nfsm_chain_get_32(error, &nmrep, numops);
126	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
127	nfsm_chain_op_check(error, &nmrep, NFS_OP_ACCESS);
128	nfsm_chain_get_32(error, &nmrep, supported);
129	nfsm_chain_get_32(error, &nmrep, access_result);
130	nfsmout_if(error);
131	if ((missing = (*access & ~supported))) {
132		/* missing support for something(s) we wanted */
133		if (missing & NFS_ACCESS_DELETE) {
134			/*
135			 * If the server doesn't report DELETE (possible
136			 * on UNIX systems), we'll assume that it is OK
137			 * and just let any subsequent delete action fail
138			 * if it really isn't deletable.
139			 */
140			access_result |= NFS_ACCESS_DELETE;
141		}
142	}
143	/* ".zfs" subdirectories may erroneously give a denied answer for modify/delete */
144	if (nfs_access_dotzfs) {
145		vnode_t dvp = NULLVP;
146		if (np->n_flag & NISDOTZFSCHILD) /* may be able to create/delete snapshot dirs */
147			access_result |= (NFS_ACCESS_MODIFY|NFS_ACCESS_EXTEND|NFS_ACCESS_DELETE);
148		else if (((dvp = vnode_getparent(NFSTOV(np))) != NULLVP) && (VTONFS(dvp)->n_flag & NISDOTZFSCHILD))
149			access_result |= NFS_ACCESS_DELETE; /* may be able to delete snapshot dirs */
150		if (dvp != NULLVP)
151			vnode_put(dvp);
152	}
153	/* Some servers report DELETE support but erroneously give a denied answer. */
154	if (nfs_access_delete && (*access & NFS_ACCESS_DELETE) && !(access_result & NFS_ACCESS_DELETE))
155		access_result |= NFS_ACCESS_DELETE;
156	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
157	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
158	nfsmout_if(error);
159
160	uid = kauth_cred_getuid(vfs_context_ucred(ctx));
161	slot = nfs_node_access_slot(np, uid, 1);
162	np->n_accessuid[slot] = uid;
163	microuptime(&now);
164	np->n_accessstamp[slot] = now.tv_sec;
165	np->n_access[slot] = access_result;
166
167	/* pass back the access returned with this request */
168	*access = np->n_access[slot];
169nfsmout:
170	if (!lockerror)
171		nfs_node_unlock(np);
172	nfsm_chain_cleanup(&nmreq);
173	nfsm_chain_cleanup(&nmrep);
174	return (error);
175}
176
177int
178nfs4_getattr_rpc(
179	nfsnode_t np,
180	mount_t mp,
181	u_char *fhp,
182	size_t fhsize,
183	int flags,
184	vfs_context_t ctx,
185	struct nfs_vattr *nvap,
186	u_int64_t *xidp)
187{
188	struct nfsmount *nmp = mp ? VFSTONFS(mp) : NFSTONMP(np);
189	int error = 0, status, nfsvers, numops, rpcflags = 0, acls;
190	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
191	struct nfsm_chain nmreq, nmrep;
192	struct nfsreq_secinfo_args si;
193
194	if (nfs_mount_gone(nmp))
195		return (ENXIO);
196	nfsvers = nmp->nm_vers;
197	acls = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL);
198
199	if (np && (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)) {
200		nfs4_default_attrs_for_referral_trigger(VTONFS(np->n_parent), NULL, 0, nvap, NULL);
201		return (0);
202	}
203
204	if (flags & NGA_MONITOR) /* vnode monitor requests should be soft */
205		rpcflags = R_RECOVER;
206
207	if (flags & NGA_SOFT) /* Return ETIMEDOUT if server not responding */
208		rpcflags |= R_SOFT;
209
210	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
211	nfsm_chain_null(&nmreq);
212	nfsm_chain_null(&nmrep);
213
214	// PUTFH, GETATTR
215	numops = 2;
216	nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
217	nfsm_chain_add_compound_header(error, &nmreq, "getattr", numops);
218	numops--;
219	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
220	nfsm_chain_add_fh(error, &nmreq, nfsvers, fhp, fhsize);
221	numops--;
222	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
223	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
224	if ((flags & NGA_ACL) && acls)
225		NFS_BITMAP_SET(bitmap, NFS_FATTR_ACL);
226	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
227	nfsm_chain_build_done(error, &nmreq);
228	nfsm_assert(error, (numops == 0), EPROTO);
229	nfsmout_if(error);
230	error = nfs_request2(np, mp, &nmreq, NFSPROC4_COMPOUND,
231			vfs_context_thread(ctx), vfs_context_ucred(ctx),
232			NULL, rpcflags, &nmrep, xidp, &status);
233
234	nfsm_chain_skip_tag(error, &nmrep);
235	nfsm_chain_get_32(error, &nmrep, numops);
236	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
237	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
238	nfsmout_if(error);
239	error = nfs4_parsefattr(&nmrep, NULL, nvap, NULL, NULL, NULL);
240	nfsmout_if(error);
241	if ((flags & NGA_ACL) && acls && !NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_ACL)) {
242		/* we asked for the ACL but didn't get one... assume there isn't one */
243		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_ACL);
244		nvap->nva_acl = NULL;
245	}
246nfsmout:
247	nfsm_chain_cleanup(&nmreq);
248	nfsm_chain_cleanup(&nmrep);
249	return (error);
250}
251
252int
253nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
254{
255	struct nfsmount *nmp;
256	int error = 0, lockerror = ENOENT, status, numops;
257	uint32_t len = 0;
258	u_int64_t xid;
259	struct nfsm_chain nmreq, nmrep;
260	struct nfsreq_secinfo_args si;
261
262	nmp = NFSTONMP(np);
263	if (nfs_mount_gone(nmp))
264		return (ENXIO);
265	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
266		return (EINVAL);
267	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
268	nfsm_chain_null(&nmreq);
269	nfsm_chain_null(&nmrep);
270
271	// PUTFH, GETATTR, READLINK
272	numops = 3;
273	nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
274	nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops);
275	numops--;
276	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
277	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
278	numops--;
279	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
280	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
281	numops--;
282	nfsm_chain_add_32(error, &nmreq, NFS_OP_READLINK);
283	nfsm_chain_build_done(error, &nmreq);
284	nfsm_assert(error, (numops == 0), EPROTO);
285	nfsmout_if(error);
286	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
287
288	if ((lockerror = nfs_node_lock(np)))
289		error = lockerror;
290	nfsm_chain_skip_tag(error, &nmrep);
291	nfsm_chain_get_32(error, &nmrep, numops);
292	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
293	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
294	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
295	nfsm_chain_op_check(error, &nmrep, NFS_OP_READLINK);
296	nfsm_chain_get_32(error, &nmrep, len);
297	nfsmout_if(error);
298	if (len >= *buflenp) {
299		if (np->n_size && (np->n_size < *buflenp))
300			len = np->n_size;
301		else
302			len = *buflenp - 1;
303	}
304	nfsm_chain_get_opaque(error, &nmrep, len, buf);
305	if (!error)
306		*buflenp = len;
307nfsmout:
308	if (!lockerror)
309		nfs_node_unlock(np);
310	nfsm_chain_cleanup(&nmreq);
311	nfsm_chain_cleanup(&nmrep);
312	return (error);
313}
314
315int
316nfs4_read_rpc_async(
317	nfsnode_t np,
318	off_t offset,
319	size_t len,
320	thread_t thd,
321	kauth_cred_t cred,
322	struct nfsreq_cbinfo *cb,
323	struct nfsreq **reqp)
324{
325	struct nfsmount *nmp;
326	int error = 0, nfsvers, numops;
327	nfs_stateid stateid;
328	struct nfsm_chain nmreq;
329	struct nfsreq_secinfo_args si;
330
331	nmp = NFSTONMP(np);
332	if (nfs_mount_gone(nmp))
333		return (ENXIO);
334	nfsvers = nmp->nm_vers;
335	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
336		return (EINVAL);
337
338	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
339	nfsm_chain_null(&nmreq);
340
341	// PUTFH, READ, GETATTR
342	numops = 3;
343	nfsm_chain_build_alloc_init(error, &nmreq, 22 * NFSX_UNSIGNED);
344	nfsm_chain_add_compound_header(error, &nmreq, "read", numops);
345	numops--;
346	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
347	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
348	numops--;
349	nfsm_chain_add_32(error, &nmreq, NFS_OP_READ);
350	nfs_get_stateid(np, thd, cred, &stateid);
351	nfsm_chain_add_stateid(error, &nmreq, &stateid);
352	nfsm_chain_add_64(error, &nmreq, offset);
353	nfsm_chain_add_32(error, &nmreq, len);
354	numops--;
355	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
356	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
357	nfsm_chain_build_done(error, &nmreq);
358	nfsm_assert(error, (numops == 0), EPROTO);
359	nfsmout_if(error);
360	error = nfs_request_async(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, cb, reqp);
361nfsmout:
362	nfsm_chain_cleanup(&nmreq);
363	return (error);
364}
365
366int
367nfs4_read_rpc_async_finish(
368	nfsnode_t np,
369	struct nfsreq *req,
370	uio_t uio,
371	size_t *lenp,
372	int *eofp)
373{
374	struct nfsmount *nmp;
375	int error = 0, lockerror, nfsvers, numops, status, eof = 0;
376	size_t retlen = 0;
377	u_int64_t xid;
378	struct nfsm_chain nmrep;
379
380	nmp = NFSTONMP(np);
381	if (nfs_mount_gone(nmp)) {
382		nfs_request_async_cancel(req);
383		return (ENXIO);
384	}
385	nfsvers = nmp->nm_vers;
386
387	nfsm_chain_null(&nmrep);
388
389	error = nfs_request_async_finish(req, &nmrep, &xid, &status);
390	if (error == EINPROGRESS) /* async request restarted */
391		return (error);
392
393	if ((lockerror = nfs_node_lock(np)))
394		error = lockerror;
395	nfsm_chain_skip_tag(error, &nmrep);
396	nfsm_chain_get_32(error, &nmrep, numops);
397	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
398	nfsm_chain_op_check(error, &nmrep, NFS_OP_READ);
399	nfsm_chain_get_32(error, &nmrep, eof);
400	nfsm_chain_get_32(error, &nmrep, retlen);
401	if (!error) {
402		*lenp = MIN(retlen, *lenp);
403		error = nfsm_chain_get_uio(&nmrep, *lenp, uio);
404	}
405	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
406	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
407	if (!lockerror)
408		nfs_node_unlock(np);
409	if (eofp) {
410		if (!eof && !retlen)
411			eof = 1;
412		*eofp = eof;
413	}
414	nfsm_chain_cleanup(&nmrep);
415	if (np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR)
416		microuptime(&np->n_lastio);
417	return (error);
418}
419
420int
421nfs4_write_rpc_async(
422	nfsnode_t np,
423	uio_t uio,
424	size_t len,
425	thread_t thd,
426	kauth_cred_t cred,
427	int iomode,
428	struct nfsreq_cbinfo *cb,
429	struct nfsreq **reqp)
430{
431	struct nfsmount *nmp;
432	mount_t mp;
433	int error = 0, nfsvers, numops;
434	nfs_stateid stateid;
435	struct nfsm_chain nmreq;
436	struct nfsreq_secinfo_args si;
437
438	nmp = NFSTONMP(np);
439	if (nfs_mount_gone(nmp))
440		return (ENXIO);
441	nfsvers = nmp->nm_vers;
442	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
443		return (EINVAL);
444
445	/* for async mounts, don't bother sending sync write requests */
446	if ((iomode != NFS_WRITE_UNSTABLE) && nfs_allow_async &&
447	    ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
448		iomode = NFS_WRITE_UNSTABLE;
449
450	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
451	nfsm_chain_null(&nmreq);
452
453	// PUTFH, WRITE, GETATTR
454	numops = 3;
455	nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED + len);
456	nfsm_chain_add_compound_header(error, &nmreq, "write", numops);
457	numops--;
458	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
459	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
460	numops--;
461	nfsm_chain_add_32(error, &nmreq, NFS_OP_WRITE);
462	nfs_get_stateid(np, thd, cred, &stateid);
463	nfsm_chain_add_stateid(error, &nmreq, &stateid);
464	nfsm_chain_add_64(error, &nmreq, uio_offset(uio));
465	nfsm_chain_add_32(error, &nmreq, iomode);
466	nfsm_chain_add_32(error, &nmreq, len);
467	if (!error)
468		error = nfsm_chain_add_uio(&nmreq, uio, len);
469	numops--;
470	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
471	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
472	nfsm_chain_build_done(error, &nmreq);
473	nfsm_assert(error, (numops == 0), EPROTO);
474	nfsmout_if(error);
475
476	error = nfs_request_async(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, cb, reqp);
477nfsmout:
478	nfsm_chain_cleanup(&nmreq);
479	return (error);
480}
481
482int
483nfs4_write_rpc_async_finish(
484	nfsnode_t np,
485	struct nfsreq *req,
486	int *iomodep,
487	size_t *rlenp,
488	uint64_t *wverfp)
489{
490	struct nfsmount *nmp;
491	int error = 0, lockerror = ENOENT, nfsvers, numops, status;
492	int committed = NFS_WRITE_FILESYNC;
493	size_t rlen = 0;
494	u_int64_t xid, wverf;
495	mount_t mp;
496	struct nfsm_chain nmrep;
497
498	nmp = NFSTONMP(np);
499	if (nfs_mount_gone(nmp)) {
500		nfs_request_async_cancel(req);
501		return (ENXIO);
502	}
503	nfsvers = nmp->nm_vers;
504
505	nfsm_chain_null(&nmrep);
506
507	error = nfs_request_async_finish(req, &nmrep, &xid, &status);
508	if (error == EINPROGRESS) /* async request restarted */
509		return (error);
510	nmp = NFSTONMP(np);
511	if (nfs_mount_gone(nmp))
512		error = ENXIO;
513	if (!error && (lockerror = nfs_node_lock(np)))
514		error = lockerror;
515	nfsm_chain_skip_tag(error, &nmrep);
516	nfsm_chain_get_32(error, &nmrep, numops);
517	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
518	nfsm_chain_op_check(error, &nmrep, NFS_OP_WRITE);
519	nfsm_chain_get_32(error, &nmrep, rlen);
520	nfsmout_if(error);
521	*rlenp = rlen;
522	if (rlen <= 0)
523		error = NFSERR_IO;
524	nfsm_chain_get_32(error, &nmrep, committed);
525	nfsm_chain_get_64(error, &nmrep, wverf);
526	nfsmout_if(error);
527	if (wverfp)
528		*wverfp = wverf;
529	lck_mtx_lock(&nmp->nm_lock);
530	if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
531		nmp->nm_verf = wverf;
532		nmp->nm_state |= NFSSTA_HASWRITEVERF;
533	} else if (nmp->nm_verf != wverf) {
534		nmp->nm_verf = wverf;
535	}
536	lck_mtx_unlock(&nmp->nm_lock);
537	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
538	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
539nfsmout:
540	if (!lockerror)
541		nfs_node_unlock(np);
542	nfsm_chain_cleanup(&nmrep);
543	if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
544	    ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
545		committed = NFS_WRITE_FILESYNC;
546	*iomodep = committed;
547	if (np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR)
548		microuptime(&np->n_lastio);
549	return (error);
550}
551
552int
553nfs4_remove_rpc(
554	nfsnode_t dnp,
555	char *name,
556	int namelen,
557	thread_t thd,
558	kauth_cred_t cred)
559{
560	int error = 0, lockerror = ENOENT, remove_error = 0, status;
561	struct nfsmount *nmp;
562	int nfsvers, numops;
563	u_int64_t xid;
564	struct nfsm_chain nmreq, nmrep;
565	struct nfsreq_secinfo_args si;
566
567	nmp = NFSTONMP(dnp);
568	if (nfs_mount_gone(nmp))
569		return (ENXIO);
570	nfsvers = nmp->nm_vers;
571	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
572		return (EINVAL);
573	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
574restart:
575	nfsm_chain_null(&nmreq);
576	nfsm_chain_null(&nmrep);
577
578	// PUTFH, REMOVE, GETATTR
579	numops = 3;
580	nfsm_chain_build_alloc_init(error, &nmreq, 17 * NFSX_UNSIGNED + namelen);
581	nfsm_chain_add_compound_header(error, &nmreq, "remove", numops);
582	numops--;
583	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
584	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
585	numops--;
586	nfsm_chain_add_32(error, &nmreq, NFS_OP_REMOVE);
587	nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
588	numops--;
589	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
590	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
591	nfsm_chain_build_done(error, &nmreq);
592	nfsm_assert(error, (numops == 0), EPROTO);
593	nfsmout_if(error);
594
595	error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, &nmrep, &xid, &status);
596
597	if ((lockerror = nfs_node_lock(dnp)))
598		error = lockerror;
599	nfsm_chain_skip_tag(error, &nmrep);
600	nfsm_chain_get_32(error, &nmrep, numops);
601	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
602	nfsm_chain_op_check(error, &nmrep, NFS_OP_REMOVE);
603	remove_error = error;
604	nfsm_chain_check_change_info(error, &nmrep, dnp);
605	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
606	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
607	if (error && !lockerror)
608		NATTRINVALIDATE(dnp);
609nfsmout:
610	nfsm_chain_cleanup(&nmreq);
611	nfsm_chain_cleanup(&nmrep);
612
613	if (!lockerror) {
614		dnp->n_flag |= NMODIFIED;
615		nfs_node_unlock(dnp);
616	}
617	if (error == NFSERR_GRACE) {
618		tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
619		goto restart;
620	}
621
622	return (remove_error);
623}
624
625int
626nfs4_rename_rpc(
627	nfsnode_t fdnp,
628	char *fnameptr,
629	int fnamelen,
630	nfsnode_t tdnp,
631	char *tnameptr,
632	int tnamelen,
633	vfs_context_t ctx)
634{
635	int error = 0, lockerror = ENOENT, status, nfsvers, numops;
636	struct nfsmount *nmp;
637	u_int64_t xid, savedxid;
638	struct nfsm_chain nmreq, nmrep;
639	struct nfsreq_secinfo_args si;
640
641	nmp = NFSTONMP(fdnp);
642	if (nfs_mount_gone(nmp))
643		return (ENXIO);
644	nfsvers = nmp->nm_vers;
645	if (fdnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
646		return (EINVAL);
647	if (tdnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
648		return (EINVAL);
649
650	NFSREQ_SECINFO_SET(&si, fdnp, NULL, 0, NULL, 0);
651	nfsm_chain_null(&nmreq);
652	nfsm_chain_null(&nmrep);
653
654	// PUTFH(FROM), SAVEFH, PUTFH(TO), RENAME, GETATTR(TO), RESTOREFH, GETATTR(FROM)
655	numops = 7;
656	nfsm_chain_build_alloc_init(error, &nmreq, 30 * NFSX_UNSIGNED + fnamelen + tnamelen);
657	nfsm_chain_add_compound_header(error, &nmreq, "rename", numops);
658	numops--;
659	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
660	nfsm_chain_add_fh(error, &nmreq, nfsvers, fdnp->n_fhp, fdnp->n_fhsize);
661	numops--;
662	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
663	numops--;
664	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
665	nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
666	numops--;
667	nfsm_chain_add_32(error, &nmreq, NFS_OP_RENAME);
668	nfsm_chain_add_name(error, &nmreq, fnameptr, fnamelen, nmp);
669	nfsm_chain_add_name(error, &nmreq, tnameptr, tnamelen, nmp);
670	numops--;
671	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
672	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, tdnp);
673	numops--;
674	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
675	numops--;
676	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
677	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, fdnp);
678	nfsm_chain_build_done(error, &nmreq);
679	nfsm_assert(error, (numops == 0), EPROTO);
680	nfsmout_if(error);
681
682	error = nfs_request(fdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
683
684	if ((lockerror = nfs_node_lock2(fdnp, tdnp)))
685		error = lockerror;
686	nfsm_chain_skip_tag(error, &nmrep);
687	nfsm_chain_get_32(error, &nmrep, numops);
688	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
689	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
690	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
691	nfsm_chain_op_check(error, &nmrep, NFS_OP_RENAME);
692	nfsm_chain_check_change_info(error, &nmrep, fdnp);
693	nfsm_chain_check_change_info(error, &nmrep, tdnp);
694	/* directory attributes: if we don't get them, make sure to invalidate */
695	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
696	savedxid = xid;
697	nfsm_chain_loadattr(error, &nmrep, tdnp, nfsvers, &xid);
698	if (error && !lockerror)
699		NATTRINVALIDATE(tdnp);
700	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
701	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
702	xid = savedxid;
703	nfsm_chain_loadattr(error, &nmrep, fdnp, nfsvers, &xid);
704	if (error && !lockerror)
705		NATTRINVALIDATE(fdnp);
706nfsmout:
707	nfsm_chain_cleanup(&nmreq);
708	nfsm_chain_cleanup(&nmrep);
709	if (!lockerror) {
710		fdnp->n_flag |= NMODIFIED;
711		tdnp->n_flag |= NMODIFIED;
712		nfs_node_unlock2(fdnp, tdnp);
713	}
714	return (error);
715}
716
717/*
718 * NFS V4 readdir RPC.
719 */
720int
721nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
722{
723	struct nfsmount *nmp;
724	int error = 0, lockerror, nfsvers, namedattr, rdirplus, bigcookies, numops;
725	int i, status, more_entries = 1, eof, bp_dropped = 0;
726	uint32_t nmreaddirsize, nmrsize;
727	uint32_t namlen, skiplen, fhlen, xlen, attrlen, reclen, space_free, space_needed;
728	uint64_t cookie, lastcookie, xid, savedxid;
729	struct nfsm_chain nmreq, nmrep, nmrepsave;
730	fhandle_t fh;
731	struct nfs_vattr nvattr, *nvattrp;
732	struct nfs_dir_buf_header *ndbhp;
733	struct direntry *dp;
734	char *padstart, padlen;
735	const char *tag;
736	uint32_t entry_attrs[NFS_ATTR_BITMAP_LEN];
737	struct timeval now;
738	struct nfsreq_secinfo_args si;
739
740	nmp = NFSTONMP(dnp);
741	if (nfs_mount_gone(nmp))
742		return (ENXIO);
743	nfsvers = nmp->nm_vers;
744	nmreaddirsize = nmp->nm_readdirsize;
745	nmrsize = nmp->nm_rsize;
746	bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES;
747	namedattr = (dnp->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) ? 1 : 0;
748	rdirplus = (NMFLAG(nmp, RDIRPLUS) || namedattr) ? 1 : 0;
749	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
750		return (EINVAL);
751	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
752
753	/*
754	 * Set up attribute request for entries.
755	 * For READDIRPLUS functionality, get everything.
756	 * Otherwise, just get what we need for struct direntry.
757	 */
758	if (rdirplus) {
759		tag = "readdirplus";
760		NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, entry_attrs);
761		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_FILEHANDLE);
762	} else {
763		tag = "readdir";
764		NFS_CLEAR_ATTRIBUTES(entry_attrs);
765		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_TYPE);
766		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_FILEID);
767		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_MOUNTED_ON_FILEID);
768	}
769	NFS_BITMAP_SET(entry_attrs, NFS_FATTR_RDATTR_ERROR);
770
771	/* lock to protect access to cookie verifier */
772	if ((lockerror = nfs_node_lock(dnp)))
773		return (lockerror);
774
775	/* determine cookie to use, and move dp to the right offset */
776	ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
777	dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
778	if (ndbhp->ndbh_count) {
779		for (i=0; i < ndbhp->ndbh_count-1; i++)
780			dp = NFS_DIRENTRY_NEXT(dp);
781		cookie = dp->d_seekoff;
782		dp = NFS_DIRENTRY_NEXT(dp);
783	} else {
784		cookie = bp->nb_lblkno;
785		/* increment with every buffer read */
786		OSAddAtomic64(1, &nfsstats.readdir_bios);
787	}
788	lastcookie = cookie;
789
790	/*
791	 * The NFS client is responsible for the "." and ".." entries in the
792	 * directory.  So, we put them at the start of the first buffer.
793	 * Don't bother for attribute directories.
794	 */
795	if (((bp->nb_lblkno == 0) && (ndbhp->ndbh_count == 0)) &&
796	    !(dnp->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR)) {
797		fh.fh_len = 0;
798		fhlen = rdirplus ? fh.fh_len + 1 : 0;
799		xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0;
800		/* "." */
801		namlen = 1;
802		reclen = NFS_DIRENTRY_LEN(namlen + xlen);
803		if (xlen)
804			bzero(&dp->d_name[namlen+1], xlen);
805		dp->d_namlen = namlen;
806		strlcpy(dp->d_name, ".", namlen+1);
807		dp->d_fileno = dnp->n_vattr.nva_fileid;
808		dp->d_type = DT_DIR;
809		dp->d_reclen = reclen;
810		dp->d_seekoff = 1;
811		padstart = dp->d_name + dp->d_namlen + 1 + xlen;
812		dp = NFS_DIRENTRY_NEXT(dp);
813		padlen = (char*)dp - padstart;
814		if (padlen > 0)
815			bzero(padstart, padlen);
816		if (rdirplus) /* zero out attributes */
817			bzero(NFS_DIR_BUF_NVATTR(bp, 0), sizeof(struct nfs_vattr));
818
819		/* ".." */
820		namlen = 2;
821		reclen = NFS_DIRENTRY_LEN(namlen + xlen);
822		if (xlen)
823			bzero(&dp->d_name[namlen+1], xlen);
824		dp->d_namlen = namlen;
825		strlcpy(dp->d_name, "..", namlen+1);
826		if (dnp->n_parent)
827			dp->d_fileno = VTONFS(dnp->n_parent)->n_vattr.nva_fileid;
828		else
829			dp->d_fileno = dnp->n_vattr.nva_fileid;
830		dp->d_type = DT_DIR;
831		dp->d_reclen = reclen;
832		dp->d_seekoff = 2;
833		padstart = dp->d_name + dp->d_namlen + 1 + xlen;
834		dp = NFS_DIRENTRY_NEXT(dp);
835		padlen = (char*)dp - padstart;
836		if (padlen > 0)
837			bzero(padstart, padlen);
838		if (rdirplus) /* zero out attributes */
839			bzero(NFS_DIR_BUF_NVATTR(bp, 1), sizeof(struct nfs_vattr));
840
841		ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data;
842		ndbhp->ndbh_count = 2;
843	}
844
845	/*
846	 * Loop around doing readdir(plus) RPCs of size nm_readdirsize until
847	 * the buffer is full (or we hit EOF).  Then put the remainder of the
848	 * results in the next buffer(s).
849	 */
850	nfsm_chain_null(&nmreq);
851	nfsm_chain_null(&nmrep);
852	while (nfs_dir_buf_freespace(bp, rdirplus) && !(ndbhp->ndbh_flags & NDB_FULL)) {
853
854		// PUTFH, GETATTR, READDIR
855		numops = 3;
856		nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
857		nfsm_chain_add_compound_header(error, &nmreq, tag, numops);
858		numops--;
859		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
860		nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
861		numops--;
862		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
863		nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
864		numops--;
865		nfsm_chain_add_32(error, &nmreq, NFS_OP_READDIR);
866		nfsm_chain_add_64(error, &nmreq, (cookie <= 2) ? 0 : cookie);
867		nfsm_chain_add_64(error, &nmreq, dnp->n_cookieverf);
868		nfsm_chain_add_32(error, &nmreq, nmreaddirsize);
869		nfsm_chain_add_32(error, &nmreq, nmrsize);
870		nfsm_chain_add_bitmap_supported(error, &nmreq, entry_attrs, nmp, dnp);
871		nfsm_chain_build_done(error, &nmreq);
872		nfsm_assert(error, (numops == 0), EPROTO);
873		nfs_node_unlock(dnp);
874		nfsmout_if(error);
875		error = nfs_request(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
876
877		if ((lockerror = nfs_node_lock(dnp)))
878			error = lockerror;
879
880		savedxid = xid;
881		nfsm_chain_skip_tag(error, &nmrep);
882		nfsm_chain_get_32(error, &nmrep, numops);
883		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
884		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
885		nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
886		nfsm_chain_op_check(error, &nmrep, NFS_OP_READDIR);
887		nfsm_chain_get_64(error, &nmrep, dnp->n_cookieverf);
888		nfsm_chain_get_32(error, &nmrep, more_entries);
889
890		if (!lockerror) {
891			nfs_node_unlock(dnp);
892			lockerror = ENOENT;
893		}
894		nfsmout_if(error);
895
896		if (rdirplus)
897			microuptime(&now);
898
899		/* loop through the entries packing them into the buffer */
900		while (more_entries) {
901			/* Entry: COOKIE, NAME, FATTR */
902			nfsm_chain_get_64(error, &nmrep, cookie);
903			nfsm_chain_get_32(error, &nmrep, namlen);
904			nfsmout_if(error);
905			if (!bigcookies && (cookie >> 32) && (nmp == NFSTONMP(dnp))) {
906				/* we've got a big cookie, make sure flag is set */
907				lck_mtx_lock(&nmp->nm_lock);
908				nmp->nm_state |= NFSSTA_BIGCOOKIES;
909				lck_mtx_unlock(&nmp->nm_lock);
910				bigcookies = 1;
911			}
912			/* just truncate names that don't fit in direntry.d_name */
913			if (namlen <= 0) {
914				error = EBADRPC;
915				goto nfsmout;
916			}
917			if (namlen > (sizeof(dp->d_name)-1)) {
918				skiplen = namlen - sizeof(dp->d_name) + 1;
919				namlen = sizeof(dp->d_name) - 1;
920			} else {
921				skiplen = 0;
922			}
923			/* guess that fh size will be same as parent */
924			fhlen = rdirplus ? (1 + dnp->n_fhsize) : 0;
925			xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0;
926			attrlen = rdirplus ? sizeof(struct nfs_vattr) : 0;
927			reclen = NFS_DIRENTRY_LEN(namlen + xlen);
928			space_needed = reclen + attrlen;
929			space_free = nfs_dir_buf_freespace(bp, rdirplus);
930			if (space_needed > space_free) {
931				/*
932				 * We still have entries to pack, but we've
933				 * run out of room in the current buffer.
934				 * So we need to move to the next buffer.
935				 * The block# for the next buffer is the
936				 * last cookie in the current buffer.
937				 */
938nextbuffer:
939				ndbhp->ndbh_flags |= NDB_FULL;
940				nfs_buf_release(bp, 0);
941				bp_dropped = 1;
942				bp = NULL;
943				error = nfs_buf_get(dnp, lastcookie, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp);
944				nfsmout_if(error);
945				/* initialize buffer */
946				ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
947				ndbhp->ndbh_flags = 0;
948				ndbhp->ndbh_count = 0;
949				ndbhp->ndbh_entry_end = sizeof(*ndbhp);
950				ndbhp->ndbh_ncgen = dnp->n_ncgen;
951				space_free = nfs_dir_buf_freespace(bp, rdirplus);
952				dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
953				/* increment with every buffer read */
954				OSAddAtomic64(1, &nfsstats.readdir_bios);
955			}
956			nmrepsave = nmrep;
957			dp->d_fileno = cookie; /* placeholder */
958			dp->d_seekoff = cookie;
959			dp->d_namlen = namlen;
960			dp->d_reclen = reclen;
961			dp->d_type = DT_UNKNOWN;
962			nfsm_chain_get_opaque(error, &nmrep, namlen, dp->d_name);
963			nfsmout_if(error);
964			dp->d_name[namlen] = '\0';
965			if (skiplen)
966				nfsm_chain_adv(error, &nmrep,
967					nfsm_rndup(namlen + skiplen) - nfsm_rndup(namlen));
968			nfsmout_if(error);
969			nvattrp = rdirplus ? NFS_DIR_BUF_NVATTR(bp, ndbhp->ndbh_count) : &nvattr;
970			error = nfs4_parsefattr(&nmrep, NULL, nvattrp, &fh, NULL, NULL);
971			if (!error && NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_ACL)) {
972				/* we do NOT want ACLs returned to us here */
973				NFS_BITMAP_CLR(nvattrp->nva_bitmap, NFS_FATTR_ACL);
974				if (nvattrp->nva_acl) {
975					kauth_acl_free(nvattrp->nva_acl);
976					nvattrp->nva_acl = NULL;
977				}
978			}
979			if (error && NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_RDATTR_ERROR)) {
980				/* OK, we may not have gotten all of the attributes but we will use what we can. */
981				if ((error == NFSERR_MOVED) || (error == NFSERR_INVAL)) {
982					/* set this up to look like a referral trigger */
983					nfs4_default_attrs_for_referral_trigger(dnp, dp->d_name, namlen, nvattrp, &fh);
984				}
985				error = 0;
986			}
987			/* check for more entries after this one */
988			nfsm_chain_get_32(error, &nmrep, more_entries);
989			nfsmout_if(error);
990
991			/* Skip any "." and ".." entries returned from server. */
992			/* Also skip any bothersome named attribute entries. */
993			if (((dp->d_name[0] == '.') && ((namlen == 1) || ((namlen == 2) && (dp->d_name[1] == '.')))) ||
994			    (namedattr && (namlen == 11) && (!strcmp(dp->d_name, "SUNWattr_ro") || !strcmp(dp->d_name, "SUNWattr_rw")))) {
995				lastcookie = cookie;
996				continue;
997			}
998
999			if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_TYPE))
1000				dp->d_type = IFTODT(VTTOIF(nvattrp->nva_type));
1001			if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_FILEID))
1002				dp->d_fileno = nvattrp->nva_fileid;
1003			if (rdirplus) {
1004				/* fileid is already in d_fileno, so stash xid in attrs */
1005				nvattrp->nva_fileid = savedxid;
1006				if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_FILEHANDLE)) {
1007					fhlen = fh.fh_len + 1;
1008					xlen = fhlen + sizeof(time_t);
1009					reclen = NFS_DIRENTRY_LEN(namlen + xlen);
1010					space_needed = reclen + attrlen;
1011					if (space_needed > space_free) {
1012						/* didn't actually have the room... move on to next buffer */
1013						nmrep = nmrepsave;
1014						goto nextbuffer;
1015					}
1016					/* pack the file handle into the record */
1017					dp->d_name[dp->d_namlen+1] = fh.fh_len;
1018					bcopy(fh.fh_data, &dp->d_name[dp->d_namlen+2], fh.fh_len);
1019				} else {
1020					/* mark the file handle invalid */
1021					fh.fh_len = 0;
1022					fhlen = fh.fh_len + 1;
1023					xlen = fhlen + sizeof(time_t);
1024					reclen = NFS_DIRENTRY_LEN(namlen + xlen);
1025					bzero(&dp->d_name[dp->d_namlen+1], fhlen);
1026				}
1027				*(time_t*)(&dp->d_name[dp->d_namlen+1+fhlen]) = now.tv_sec;
1028				dp->d_reclen = reclen;
1029			}
1030			padstart = dp->d_name + dp->d_namlen + 1 + xlen;
1031			ndbhp->ndbh_count++;
1032			lastcookie = cookie;
1033
1034			/* advance to next direntry in buffer */
1035			dp = NFS_DIRENTRY_NEXT(dp);
1036			ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data;
1037			/* zero out the pad bytes */
1038			padlen = (char*)dp - padstart;
1039			if (padlen > 0)
1040				bzero(padstart, padlen);
1041		}
1042		/* Finally, get the eof boolean */
1043		nfsm_chain_get_32(error, &nmrep, eof);
1044		nfsmout_if(error);
1045		if (eof) {
1046			ndbhp->ndbh_flags |= (NDB_FULL|NDB_EOF);
1047			nfs_node_lock_force(dnp);
1048			dnp->n_eofcookie = lastcookie;
1049			nfs_node_unlock(dnp);
1050		} else {
1051			more_entries = 1;
1052		}
1053		if (bp_dropped) {
1054			nfs_buf_release(bp, 0);
1055			bp = NULL;
1056			break;
1057		}
1058		if ((lockerror = nfs_node_lock(dnp)))
1059			error = lockerror;
1060		nfsmout_if(error);
1061		nfsm_chain_cleanup(&nmrep);
1062		nfsm_chain_null(&nmreq);
1063	}
1064nfsmout:
1065	if (bp_dropped && bp)
1066		nfs_buf_release(bp, 0);
1067	if (!lockerror)
1068		nfs_node_unlock(dnp);
1069	nfsm_chain_cleanup(&nmreq);
1070	nfsm_chain_cleanup(&nmrep);
1071	return (bp_dropped ? NFSERR_DIRBUFDROPPED : error);
1072}
1073
1074int
1075nfs4_lookup_rpc_async(
1076	nfsnode_t dnp,
1077	char *name,
1078	int namelen,
1079	vfs_context_t ctx,
1080	struct nfsreq **reqp)
1081{
1082	int error = 0, isdotdot = 0, nfsvers, numops;
1083	struct nfsm_chain nmreq;
1084	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
1085	struct nfsmount *nmp;
1086	struct nfsreq_secinfo_args si;
1087
1088	nmp = NFSTONMP(dnp);
1089	if (nfs_mount_gone(nmp))
1090		return (ENXIO);
1091	nfsvers = nmp->nm_vers;
1092	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
1093		return (EINVAL);
1094
1095	if ((name[0] == '.') && (name[1] == '.') && (namelen == 2)) {
1096		isdotdot = 1;
1097		NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
1098	} else {
1099		NFSREQ_SECINFO_SET(&si, dnp, dnp->n_fhp, dnp->n_fhsize, name, namelen);
1100	}
1101
1102	nfsm_chain_null(&nmreq);
1103
1104	// PUTFH, GETATTR, LOOKUP(P), GETFH, GETATTR (FH)
1105	numops = 5;
1106	nfsm_chain_build_alloc_init(error, &nmreq, 20 * NFSX_UNSIGNED + namelen);
1107	nfsm_chain_add_compound_header(error, &nmreq, "lookup", numops);
1108	numops--;
1109	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
1110	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
1111	numops--;
1112	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
1113	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
1114	numops--;
1115	if (isdotdot) {
1116		nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUPP);
1117	} else {
1118		nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
1119		nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
1120	}
1121	numops--;
1122	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETFH);
1123	numops--;
1124	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
1125	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
1126	/* some ".zfs" directories can't handle being asked for some attributes */
1127	if ((dnp->n_flag & NISDOTZFS) && !isdotdot)
1128		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
1129	if ((dnp->n_flag & NISDOTZFSCHILD) && isdotdot)
1130		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
1131	if (((namelen == 4) && (name[0] == '.') && (name[1] == 'z') && (name[2] == 'f') && (name[3] == 's')))
1132		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
1133	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, NULL);
1134	nfsm_chain_build_done(error, &nmreq);
1135	nfsm_assert(error, (numops == 0), EPROTO);
1136	nfsmout_if(error);
1137	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND,
1138			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, reqp);
1139nfsmout:
1140	nfsm_chain_cleanup(&nmreq);
1141	return (error);
1142}
1143
1144
1145int
1146nfs4_lookup_rpc_async_finish(
1147	nfsnode_t dnp,
1148	char *name,
1149	int namelen,
1150	vfs_context_t ctx,
1151	struct nfsreq *req,
1152	u_int64_t *xidp,
1153	fhandle_t *fhp,
1154	struct nfs_vattr *nvap)
1155{
1156	int error = 0, lockerror = ENOENT, status, nfsvers, numops, isdotdot = 0;
1157	uint32_t op = NFS_OP_LOOKUP;
1158	u_int64_t xid;
1159	struct nfsmount *nmp;
1160	struct nfsm_chain nmrep;
1161
1162	nmp = NFSTONMP(dnp);
1163	nfsvers = nmp->nm_vers;
1164	if ((name[0] == '.') && (name[1] == '.') && (namelen == 2))
1165		isdotdot = 1;
1166
1167	nfsm_chain_null(&nmrep);
1168
1169	error = nfs_request_async_finish(req, &nmrep, &xid, &status);
1170
1171	if ((lockerror = nfs_node_lock(dnp)))
1172		error = lockerror;
1173	nfsm_chain_skip_tag(error, &nmrep);
1174	nfsm_chain_get_32(error, &nmrep, numops);
1175	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
1176	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
1177	if (xidp)
1178		*xidp = xid;
1179	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
1180
1181	nfsm_chain_op_check(error, &nmrep, (isdotdot ? NFS_OP_LOOKUPP : NFS_OP_LOOKUP));
1182	nfsmout_if(error || !fhp || !nvap);
1183	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETFH);
1184	nfsm_chain_get_32(error, &nmrep, fhp->fh_len);
1185	nfsm_chain_get_opaque(error, &nmrep, fhp->fh_len, fhp->fh_data);
1186	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
1187	if ((error == NFSERR_MOVED) || (error == NFSERR_INVAL)) {
1188		/* set this up to look like a referral trigger */
1189		nfs4_default_attrs_for_referral_trigger(dnp, name, namelen, nvap, fhp);
1190		error = 0;
1191	} else {
1192		nfsmout_if(error);
1193		error = nfs4_parsefattr(&nmrep, NULL, nvap, NULL, NULL, NULL);
1194	}
1195nfsmout:
1196	if (!lockerror)
1197		nfs_node_unlock(dnp);
1198	nfsm_chain_cleanup(&nmrep);
1199	if (!error && (op == NFS_OP_LOOKUP) && (nmp->nm_state & NFSSTA_NEEDSECINFO)) {
1200		/* We still need to get SECINFO to set default for mount. */
1201		/* Do so for the first LOOKUP that returns successfully. */
1202		struct nfs_sec sec;
1203
1204		sec.count = NX_MAX_SEC_FLAVORS;
1205		error = nfs4_secinfo_rpc(nmp, &req->r_secinfo, vfs_context_ucred(ctx), sec.flavors, &sec.count);
1206		/* [sigh] some implementations return "illegal" error for unsupported ops */
1207		if (error == NFSERR_OP_ILLEGAL)
1208			error = 0;
1209		if (!error) {
1210			/* set our default security flavor to the first in the list */
1211			lck_mtx_lock(&nmp->nm_lock);
1212			if (sec.count)
1213				nmp->nm_auth = sec.flavors[0];
1214			nmp->nm_state &= ~NFSSTA_NEEDSECINFO;
1215			lck_mtx_unlock(&nmp->nm_lock);
1216		}
1217	}
1218	return (error);
1219}
1220
1221int
1222nfs4_commit_rpc(
1223	nfsnode_t np,
1224	uint64_t offset,
1225	uint64_t count,
1226	kauth_cred_t cred,
1227	uint64_t wverf)
1228{
1229	struct nfsmount *nmp;
1230	int error = 0, lockerror, status, nfsvers, numops;
1231	u_int64_t xid, newwverf;
1232	uint32_t count32;
1233	struct nfsm_chain nmreq, nmrep;
1234	struct nfsreq_secinfo_args si;
1235
1236	nmp = NFSTONMP(np);
1237	FSDBG(521, np, offset, count, nmp ? nmp->nm_state : 0);
1238	if (nfs_mount_gone(nmp))
1239		return (ENXIO);
1240	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
1241		return (EINVAL);
1242	if (!(nmp->nm_state & NFSSTA_HASWRITEVERF))
1243		return (0);
1244	nfsvers = nmp->nm_vers;
1245
1246	if (count > UINT32_MAX)
1247		count32 = 0;
1248	else
1249		count32 = count;
1250
1251	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
1252	nfsm_chain_null(&nmreq);
1253	nfsm_chain_null(&nmrep);
1254
1255	// PUTFH, COMMIT, GETATTR
1256	numops = 3;
1257	nfsm_chain_build_alloc_init(error, &nmreq, 19 * NFSX_UNSIGNED);
1258	nfsm_chain_add_compound_header(error, &nmreq, "commit", numops);
1259	numops--;
1260	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
1261	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1262	numops--;
1263	nfsm_chain_add_32(error, &nmreq, NFS_OP_COMMIT);
1264	nfsm_chain_add_64(error, &nmreq, offset);
1265	nfsm_chain_add_32(error, &nmreq, count32);
1266	numops--;
1267	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
1268	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
1269	nfsm_chain_build_done(error, &nmreq);
1270	nfsm_assert(error, (numops == 0), EPROTO);
1271	nfsmout_if(error);
1272	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND,
1273			current_thread(), cred, &si, 0, &nmrep, &xid, &status);
1274
1275	if ((lockerror = nfs_node_lock(np)))
1276		error = lockerror;
1277	nfsm_chain_skip_tag(error, &nmrep);
1278	nfsm_chain_get_32(error, &nmrep, numops);
1279	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
1280	nfsm_chain_op_check(error, &nmrep, NFS_OP_COMMIT);
1281	nfsm_chain_get_64(error, &nmrep, newwverf);
1282	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
1283	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
1284	if (!lockerror)
1285		nfs_node_unlock(np);
1286	nfsmout_if(error);
1287	lck_mtx_lock(&nmp->nm_lock);
1288	if (nmp->nm_verf != newwverf)
1289		nmp->nm_verf = newwverf;
1290	if (wverf != newwverf)
1291		error = NFSERR_STALEWRITEVERF;
1292	lck_mtx_unlock(&nmp->nm_lock);
1293nfsmout:
1294	nfsm_chain_cleanup(&nmreq);
1295	nfsm_chain_cleanup(&nmrep);
1296	return (error);
1297}
1298
1299int
1300nfs4_pathconf_rpc(
1301	nfsnode_t np,
1302	struct nfs_fsattr *nfsap,
1303	vfs_context_t ctx)
1304{
1305	u_int64_t xid;
1306	int error = 0, lockerror, status, nfsvers, numops;
1307	struct nfsm_chain nmreq, nmrep;
1308	struct nfsmount *nmp = NFSTONMP(np);
1309	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
1310	struct nfs_vattr nvattr;
1311	struct nfsreq_secinfo_args si;
1312
1313	if (nfs_mount_gone(nmp))
1314		return (ENXIO);
1315	nfsvers = nmp->nm_vers;
1316	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
1317		return (EINVAL);
1318
1319	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
1320	NVATTR_INIT(&nvattr);
1321	nfsm_chain_null(&nmreq);
1322	nfsm_chain_null(&nmrep);
1323
1324	/* NFSv4: fetch "pathconf" info for this node */
1325	// PUTFH, GETATTR
1326	numops = 2;
1327	nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
1328	nfsm_chain_add_compound_header(error, &nmreq, "pathconf", numops);
1329	numops--;
1330	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
1331	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1332	numops--;
1333	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
1334	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
1335	NFS_BITMAP_SET(bitmap, NFS_FATTR_MAXLINK);
1336	NFS_BITMAP_SET(bitmap, NFS_FATTR_MAXNAME);
1337	NFS_BITMAP_SET(bitmap, NFS_FATTR_NO_TRUNC);
1338	NFS_BITMAP_SET(bitmap, NFS_FATTR_CHOWN_RESTRICTED);
1339	NFS_BITMAP_SET(bitmap, NFS_FATTR_CASE_INSENSITIVE);
1340	NFS_BITMAP_SET(bitmap, NFS_FATTR_CASE_PRESERVING);
1341	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
1342	nfsm_chain_build_done(error, &nmreq);
1343	nfsm_assert(error, (numops == 0), EPROTO);
1344	nfsmout_if(error);
1345	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
1346
1347	nfsm_chain_skip_tag(error, &nmrep);
1348	nfsm_chain_get_32(error, &nmrep, numops);
1349	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
1350	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
1351	nfsmout_if(error);
1352	error = nfs4_parsefattr(&nmrep, nfsap, &nvattr, NULL, NULL, NULL);
1353	nfsmout_if(error);
1354	if ((lockerror = nfs_node_lock(np)))
1355		error = lockerror;
1356	if (!error)
1357		nfs_loadattrcache(np, &nvattr, &xid, 0);
1358	if (!lockerror)
1359		nfs_node_unlock(np);
1360nfsmout:
1361	NVATTR_CLEANUP(&nvattr);
1362	nfsm_chain_cleanup(&nmreq);
1363	nfsm_chain_cleanup(&nmrep);
1364	return (error);
1365}
1366
1367int
1368nfs4_vnop_getattr(
1369	struct vnop_getattr_args /* {
1370		struct vnodeop_desc *a_desc;
1371		vnode_t a_vp;
1372		struct vnode_attr *a_vap;
1373		vfs_context_t a_context;
1374	} */ *ap)
1375{
1376	struct vnode_attr *vap = ap->a_vap;
1377	struct nfsmount *nmp;
1378	struct nfs_vattr nva;
1379	int error, acls, ngaflags;
1380
1381	nmp = VTONMP(ap->a_vp);
1382	if (nfs_mount_gone(nmp))
1383		return (ENXIO);
1384	acls = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL);
1385
1386	ngaflags = NGA_CACHED;
1387	if (VATTR_IS_ACTIVE(vap, va_acl) && acls)
1388		ngaflags |= NGA_ACL;
1389	error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, ngaflags);
1390	if (error)
1391		return (error);
1392
1393	/* copy what we have in nva to *a_vap */
1394	if (VATTR_IS_ACTIVE(vap, va_rdev) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_RAWDEV)) {
1395		dev_t rdev = makedev(nva.nva_rawdev.specdata1, nva.nva_rawdev.specdata2);
1396		VATTR_RETURN(vap, va_rdev, rdev);
1397	}
1398	if (VATTR_IS_ACTIVE(vap, va_nlink) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_NUMLINKS))
1399		VATTR_RETURN(vap, va_nlink, nva.nva_nlink);
1400	if (VATTR_IS_ACTIVE(vap, va_data_size) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_SIZE))
1401		VATTR_RETURN(vap, va_data_size, nva.nva_size);
1402	// VATTR_RETURN(vap, va_data_alloc, ???);
1403	// VATTR_RETURN(vap, va_total_size, ???);
1404	if (VATTR_IS_ACTIVE(vap, va_total_alloc) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_SPACE_USED))
1405		VATTR_RETURN(vap, va_total_alloc, nva.nva_bytes);
1406	if (VATTR_IS_ACTIVE(vap, va_uid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER))
1407		VATTR_RETURN(vap, va_uid, nva.nva_uid);
1408	if (VATTR_IS_ACTIVE(vap, va_uuuid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER))
1409		VATTR_RETURN(vap, va_uuuid, nva.nva_uuuid);
1410	if (VATTR_IS_ACTIVE(vap, va_gid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER_GROUP))
1411		VATTR_RETURN(vap, va_gid, nva.nva_gid);
1412	if (VATTR_IS_ACTIVE(vap, va_guuid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER_GROUP))
1413		VATTR_RETURN(vap, va_guuid, nva.nva_guuid);
1414	if (VATTR_IS_ACTIVE(vap, va_mode)) {
1415		if (NMFLAG(nmp, ACLONLY) || !NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_MODE))
1416			VATTR_RETURN(vap, va_mode, 0777);
1417		else
1418			VATTR_RETURN(vap, va_mode, nva.nva_mode);
1419	}
1420	if (VATTR_IS_ACTIVE(vap, va_flags) &&
1421	    (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_ARCHIVE) ||
1422	     NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_HIDDEN) ||
1423	     (nva.nva_flags & NFS_FFLAG_TRIGGER))) {
1424		uint32_t flags = 0;
1425		if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_ARCHIVE) &&
1426		    (nva.nva_flags & NFS_FFLAG_ARCHIVED))
1427			flags |= SF_ARCHIVED;
1428		if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_HIDDEN) &&
1429		    (nva.nva_flags & NFS_FFLAG_HIDDEN))
1430			flags |= UF_HIDDEN;
1431		VATTR_RETURN(vap, va_flags, flags);
1432	}
1433	if (VATTR_IS_ACTIVE(vap, va_create_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_CREATE)) {
1434		vap->va_create_time.tv_sec = nva.nva_timesec[NFSTIME_CREATE];
1435		vap->va_create_time.tv_nsec = nva.nva_timensec[NFSTIME_CREATE];
1436		VATTR_SET_SUPPORTED(vap, va_create_time);
1437	}
1438	if (VATTR_IS_ACTIVE(vap, va_access_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_ACCESS)) {
1439		vap->va_access_time.tv_sec = nva.nva_timesec[NFSTIME_ACCESS];
1440		vap->va_access_time.tv_nsec = nva.nva_timensec[NFSTIME_ACCESS];
1441		VATTR_SET_SUPPORTED(vap, va_access_time);
1442	}
1443	if (VATTR_IS_ACTIVE(vap, va_modify_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_MODIFY)) {
1444		vap->va_modify_time.tv_sec = nva.nva_timesec[NFSTIME_MODIFY];
1445		vap->va_modify_time.tv_nsec = nva.nva_timensec[NFSTIME_MODIFY];
1446		VATTR_SET_SUPPORTED(vap, va_modify_time);
1447	}
1448	if (VATTR_IS_ACTIVE(vap, va_change_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_METADATA)) {
1449		vap->va_change_time.tv_sec = nva.nva_timesec[NFSTIME_CHANGE];
1450		vap->va_change_time.tv_nsec = nva.nva_timensec[NFSTIME_CHANGE];
1451		VATTR_SET_SUPPORTED(vap, va_change_time);
1452	}
1453	if (VATTR_IS_ACTIVE(vap, va_backup_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_BACKUP)) {
1454		vap->va_backup_time.tv_sec = nva.nva_timesec[NFSTIME_BACKUP];
1455		vap->va_backup_time.tv_nsec = nva.nva_timensec[NFSTIME_BACKUP];
1456		VATTR_SET_SUPPORTED(vap, va_backup_time);
1457	}
1458	if (VATTR_IS_ACTIVE(vap, va_fileid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_FILEID))
1459		VATTR_RETURN(vap, va_fileid, nva.nva_fileid);
1460	if (VATTR_IS_ACTIVE(vap, va_type) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TYPE))
1461		VATTR_RETURN(vap, va_type, nva.nva_type);
1462	if (VATTR_IS_ACTIVE(vap, va_filerev) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_CHANGE))
1463		VATTR_RETURN(vap, va_filerev, nva.nva_change);
1464
1465	if (VATTR_IS_ACTIVE(vap, va_acl) && acls) {
1466		VATTR_RETURN(vap, va_acl, nva.nva_acl);
1467		nva.nva_acl = NULL;
1468	}
1469
1470	// other attrs we might support someday:
1471	// VATTR_RETURN(vap, va_encoding, ??? /* potentially unnormalized UTF-8? */);
1472
1473	NVATTR_CLEANUP(&nva);
1474	return (error);
1475}
1476
1477int
1478nfs4_setattr_rpc(
1479	nfsnode_t np,
1480	struct vnode_attr *vap,
1481	vfs_context_t ctx)
1482{
1483	struct nfsmount *nmp = NFSTONMP(np);
1484	int error = 0, setattr_error = 0, lockerror = ENOENT, status, nfsvers, numops;
1485	u_int64_t xid, nextxid;
1486	struct nfsm_chain nmreq, nmrep;
1487	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
1488	uint32_t getbitmap[NFS_ATTR_BITMAP_LEN];
1489	uint32_t setbitmap[NFS_ATTR_BITMAP_LEN];
1490	nfs_stateid stateid;
1491	struct nfsreq_secinfo_args si;
1492
1493	if (nfs_mount_gone(nmp))
1494		return (ENXIO);
1495	nfsvers = nmp->nm_vers;
1496	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
1497		return (EINVAL);
1498
1499	if (VATTR_IS_ACTIVE(vap, va_flags) && (vap->va_flags & ~(SF_ARCHIVED|UF_HIDDEN))) {
1500		/* we don't support setting unsupported flags (duh!) */
1501		if (vap->va_active & ~VNODE_ATTR_va_flags)
1502			return (EINVAL);	/* return EINVAL if other attributes also set */
1503		else
1504			return (ENOTSUP);	/* return ENOTSUP for chflags(2) */
1505	}
1506
1507	/* don't bother requesting some changes if they don't look like they are changing */
1508	if (VATTR_IS_ACTIVE(vap, va_uid) && (vap->va_uid == np->n_vattr.nva_uid))
1509		VATTR_CLEAR_ACTIVE(vap, va_uid);
1510	if (VATTR_IS_ACTIVE(vap, va_gid) && (vap->va_gid == np->n_vattr.nva_gid))
1511		VATTR_CLEAR_ACTIVE(vap, va_gid);
1512	if (VATTR_IS_ACTIVE(vap, va_uuuid) && kauth_guid_equal(&vap->va_uuuid, &np->n_vattr.nva_uuuid))
1513		VATTR_CLEAR_ACTIVE(vap, va_uuuid);
1514	if (VATTR_IS_ACTIVE(vap, va_guuid) && kauth_guid_equal(&vap->va_guuid, &np->n_vattr.nva_guuid))
1515		VATTR_CLEAR_ACTIVE(vap, va_guuid);
1516
1517tryagain:
1518	/* do nothing if no attributes will be sent */
1519	nfs_vattr_set_bitmap(nmp, bitmap, vap);
1520	if (!bitmap[0] && !bitmap[1])
1521		return (0);
1522
1523	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
1524	nfsm_chain_null(&nmreq);
1525	nfsm_chain_null(&nmrep);
1526
1527	/*
1528	 * Prepare GETATTR bitmap: if we are setting the ACL or mode, we
1529	 * need to invalidate any cached ACL.  And if we had an ACL cached,
1530	 * we might as well also fetch the new value.
1531	 */
1532	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, getbitmap);
1533	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ACL) ||
1534	    NFS_BITMAP_ISSET(bitmap, NFS_FATTR_MODE)) {
1535		if (NACLVALID(np))
1536			NFS_BITMAP_SET(getbitmap, NFS_FATTR_ACL);
1537		NACLINVALIDATE(np);
1538	}
1539
1540	// PUTFH, SETATTR, GETATTR
1541	numops = 3;
1542	nfsm_chain_build_alloc_init(error, &nmreq, 40 * NFSX_UNSIGNED);
1543	nfsm_chain_add_compound_header(error, &nmreq, "setattr", numops);
1544	numops--;
1545	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
1546	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1547	numops--;
1548	nfsm_chain_add_32(error, &nmreq, NFS_OP_SETATTR);
1549	if (VATTR_IS_ACTIVE(vap, va_data_size))
1550		nfs_get_stateid(np, vfs_context_thread(ctx), vfs_context_ucred(ctx), &stateid);
1551	else
1552		stateid.seqid = stateid.other[0] = stateid.other[1] = stateid.other[2] = 0;
1553	nfsm_chain_add_stateid(error, &nmreq, &stateid);
1554	nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
1555	numops--;
1556	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
1557	nfsm_chain_add_bitmap_supported(error, &nmreq, getbitmap, nmp, np);
1558	nfsm_chain_build_done(error, &nmreq);
1559	nfsm_assert(error, (numops == 0), EPROTO);
1560	nfsmout_if(error);
1561	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
1562
1563	if ((lockerror = nfs_node_lock(np)))
1564		error = lockerror;
1565	nfsm_chain_skip_tag(error, &nmrep);
1566	nfsm_chain_get_32(error, &nmrep, numops);
1567	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
1568	nfsmout_if(error);
1569	nfsm_chain_op_check(error, &nmrep, NFS_OP_SETATTR);
1570	nfsmout_if(error == EBADRPC);
1571	setattr_error = error;
1572	error = 0;
1573	bmlen = NFS_ATTR_BITMAP_LEN;
1574	nfsm_chain_get_bitmap(error, &nmrep, setbitmap, bmlen);
1575	if (!error) {
1576		if (VATTR_IS_ACTIVE(vap, va_data_size) && (np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
1577			microuptime(&np->n_lastio);
1578		nfs_vattr_set_supported(setbitmap, vap);
1579		error = setattr_error;
1580	}
1581	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
1582	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
1583	if (error)
1584		NATTRINVALIDATE(np);
1585	/*
1586	 * We just changed the attributes and we want to make sure that we
1587	 * see the latest attributes.  Get the next XID.  If it's not the
1588	 * next XID after the SETATTR XID, then it's possible that another
1589	 * RPC was in flight at the same time and it might put stale attributes
1590	 * in the cache.  In that case, we invalidate the attributes and set
1591	 * the attribute cache XID to guarantee that newer attributes will
1592	 * get loaded next.
1593	 */
1594	nextxid = 0;
1595	nfs_get_xid(&nextxid);
1596	if (nextxid != (xid + 1)) {
1597		np->n_xid = nextxid;
1598		NATTRINVALIDATE(np);
1599	}
1600nfsmout:
1601	if (!lockerror)
1602		nfs_node_unlock(np);
1603	nfsm_chain_cleanup(&nmreq);
1604	nfsm_chain_cleanup(&nmrep);
1605	if ((setattr_error == EINVAL) && VATTR_IS_ACTIVE(vap, va_acl) && VATTR_IS_ACTIVE(vap, va_mode) && !NMFLAG(nmp, ACLONLY)) {
1606		/*
1607		 * Some server's may not like ACL/mode combos that get sent.
1608		 * If it looks like that's what the server choked on, try setting
1609		 * just the ACL and not the mode (unless it looks like everything
1610		 * but mode was already successfully set).
1611		 */
1612		if (((bitmap[0] & setbitmap[0]) != bitmap[0]) ||
1613		    ((bitmap[1] & (setbitmap[1]|NFS_FATTR_MODE)) != bitmap[1])) {
1614			VATTR_CLEAR_ACTIVE(vap, va_mode);
1615			error = 0;
1616			goto tryagain;
1617		}
1618	}
1619	return (error);
1620}
1621
1622/*
1623 * Wait for any pending recovery to complete.
1624 */
1625int
1626nfs_mount_state_wait_for_recovery(struct nfsmount *nmp)
1627{
1628	struct timespec ts = { 1, 0 };
1629	int error = 0, slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
1630
1631	lck_mtx_lock(&nmp->nm_lock);
1632	while (nmp->nm_state & NFSSTA_RECOVER) {
1633		if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
1634			break;
1635		nfs_mount_sock_thread_wake(nmp);
1636		msleep(&nmp->nm_state, &nmp->nm_lock, slpflag|(PZERO-1), "nfsrecoverwait", &ts);
1637		slpflag = 0;
1638	}
1639	lck_mtx_unlock(&nmp->nm_lock);
1640
1641	return (error);
1642}
1643
1644/*
1645 * We're about to use/manipulate NFS mount's open/lock state.
1646 * Wait for any pending state recovery to complete, then
1647 * mark the state as being in use (which will hold off
1648 * the recovery thread until we're done).
1649 */
1650int
1651nfs_mount_state_in_use_start(struct nfsmount *nmp, thread_t thd)
1652{
1653	struct timespec ts = { 1, 0 };
1654	int error = 0, slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
1655
1656	if (nfs_mount_gone(nmp))
1657		return (ENXIO);
1658	lck_mtx_lock(&nmp->nm_lock);
1659	if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) {
1660		lck_mtx_unlock(&nmp->nm_lock);
1661		return (ENXIO);
1662	}
1663	while (nmp->nm_state & NFSSTA_RECOVER) {
1664		if ((error = nfs_sigintr(nmp, NULL, thd, 1)))
1665			break;
1666		nfs_mount_sock_thread_wake(nmp);
1667		msleep(&nmp->nm_state, &nmp->nm_lock, slpflag|(PZERO-1), "nfsrecoverwait", &ts);
1668		slpflag = 0;
1669	}
1670	if (!error)
1671		nmp->nm_stateinuse++;
1672	lck_mtx_unlock(&nmp->nm_lock);
1673
1674	return (error);
1675}
1676
1677/*
1678 * We're done using/manipulating the NFS mount's open/lock
1679 * state.  If the given error indicates that recovery should
1680 * be performed, we'll initiate recovery.
1681 */
1682int
1683nfs_mount_state_in_use_end(struct nfsmount *nmp, int error)
1684{
1685	int restart = nfs_mount_state_error_should_restart(error);
1686
1687	if (nfs_mount_gone(nmp))
1688		return (restart);
1689	lck_mtx_lock(&nmp->nm_lock);
1690	if (restart && (error != NFSERR_OLD_STATEID) && (error != NFSERR_GRACE)) {
1691		printf("nfs_mount_state_in_use_end: error %d, initiating recovery for %s, 0x%x\n",
1692			error, vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid);
1693		nfs_need_recover(nmp, error);
1694	}
1695	if (nmp->nm_stateinuse > 0)
1696		nmp->nm_stateinuse--;
1697	else
1698		panic("NFS mount state in use count underrun");
1699	if (!nmp->nm_stateinuse && (nmp->nm_state & NFSSTA_RECOVER))
1700		wakeup(&nmp->nm_stateinuse);
1701	lck_mtx_unlock(&nmp->nm_lock);
1702	if (error == NFSERR_GRACE)
1703		tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
1704
1705	return (restart);
1706}
1707
1708/*
1709 * Does the error mean we should restart/redo a state-related operation?
1710 */
1711int
1712nfs_mount_state_error_should_restart(int error)
1713{
1714	switch (error) {
1715	case NFSERR_STALE_STATEID:
1716	case NFSERR_STALE_CLIENTID:
1717	case NFSERR_ADMIN_REVOKED:
1718	case NFSERR_EXPIRED:
1719	case NFSERR_OLD_STATEID:
1720	case NFSERR_BAD_STATEID:
1721	case NFSERR_GRACE:
1722		return (1);
1723	}
1724	return (0);
1725}
1726
1727/*
1728 * In some cases we may want to limit how many times we restart a
1729 * state-related operation - e.g. we're repeatedly getting NFSERR_GRACE.
1730 * Base the limit on the lease (as long as it's not too short).
1731 */
1732uint
1733nfs_mount_state_max_restarts(struct nfsmount *nmp)
1734{
1735	return (MAX(nmp->nm_fsattr.nfsa_lease, 60));
1736}
1737
1738/*
1739 * Does the error mean we probably lost a delegation?
1740 */
1741int
1742nfs_mount_state_error_delegation_lost(int error)
1743{
1744	switch (error) {
1745	case NFSERR_STALE_STATEID:
1746	case NFSERR_ADMIN_REVOKED:
1747	case NFSERR_EXPIRED:
1748	case NFSERR_OLD_STATEID:
1749	case NFSERR_BAD_STATEID:
1750	case NFSERR_GRACE: /* ugh! (stupid) RFC 3530 specifically disallows CLAIM_DELEGATE_CUR during grace period? */
1751		return (1);
1752	}
1753	return (0);
1754}
1755
1756
1757/*
1758 * Mark an NFS node's open state as busy.
1759 */
1760int
1761nfs_open_state_set_busy(nfsnode_t np, thread_t thd)
1762{
1763	struct nfsmount *nmp;
1764	struct timespec ts = {2, 0};
1765	int error = 0, slpflag;
1766
1767	nmp = NFSTONMP(np);
1768	if (nfs_mount_gone(nmp))
1769		return (ENXIO);
1770	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
1771
1772	lck_mtx_lock(&np->n_openlock);
1773	while (np->n_openflags & N_OPENBUSY) {
1774		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
1775			break;
1776		np->n_openflags |= N_OPENWANT;
1777		msleep(&np->n_openflags, &np->n_openlock, slpflag, "nfs_open_state_set_busy", &ts);
1778		slpflag = 0;
1779	}
1780	if (!error)
1781		np->n_openflags |= N_OPENBUSY;
1782	lck_mtx_unlock(&np->n_openlock);
1783
1784	return (error);
1785}
1786
1787/*
1788 * Clear an NFS node's open state busy flag and wake up
1789 * anyone wanting it.
1790 */
1791void
1792nfs_open_state_clear_busy(nfsnode_t np)
1793{
1794	int wanted;
1795
1796	lck_mtx_lock(&np->n_openlock);
1797	if (!(np->n_openflags & N_OPENBUSY))
1798		panic("nfs_open_state_clear_busy");
1799	wanted = (np->n_openflags & N_OPENWANT);
1800	np->n_openflags &= ~(N_OPENBUSY|N_OPENWANT);
1801	lck_mtx_unlock(&np->n_openlock);
1802	if (wanted)
1803		wakeup(&np->n_openflags);
1804}
1805
1806/*
1807 * Search a mount's open owner list for the owner for this credential.
1808 * If not found and "alloc" is set, then allocate a new one.
1809 */
1810struct nfs_open_owner *
1811nfs_open_owner_find(struct nfsmount *nmp, kauth_cred_t cred, int alloc)
1812{
1813	uid_t uid = kauth_cred_getuid(cred);
1814	struct nfs_open_owner *noop, *newnoop = NULL;
1815
1816tryagain:
1817	lck_mtx_lock(&nmp->nm_lock);
1818	TAILQ_FOREACH(noop, &nmp->nm_open_owners, noo_link) {
1819		if (kauth_cred_getuid(noop->noo_cred) == uid)
1820			break;
1821	}
1822
1823	if (!noop && !newnoop && alloc) {
1824		lck_mtx_unlock(&nmp->nm_lock);
1825		MALLOC(newnoop, struct nfs_open_owner *, sizeof(struct nfs_open_owner), M_TEMP, M_WAITOK);
1826		if (!newnoop)
1827			return (NULL);
1828		bzero(newnoop, sizeof(*newnoop));
1829		lck_mtx_init(&newnoop->noo_lock, nfs_open_grp, LCK_ATTR_NULL);
1830		newnoop->noo_mount = nmp;
1831		kauth_cred_ref(cred);
1832		newnoop->noo_cred = cred;
1833		newnoop->noo_name = OSAddAtomic(1, &nfs_open_owner_seqnum);
1834		TAILQ_INIT(&newnoop->noo_opens);
1835		goto tryagain;
1836	}
1837	if (!noop && newnoop) {
1838		newnoop->noo_flags |= NFS_OPEN_OWNER_LINK;
1839		TAILQ_INSERT_HEAD(&nmp->nm_open_owners, newnoop, noo_link);
1840		noop = newnoop;
1841	}
1842	lck_mtx_unlock(&nmp->nm_lock);
1843
1844	if (newnoop && (noop != newnoop))
1845		nfs_open_owner_destroy(newnoop);
1846
1847	if (noop)
1848		nfs_open_owner_ref(noop);
1849
1850	return (noop);
1851}
1852
1853/*
1854 * destroy an open owner that's no longer needed
1855 */
1856void
1857nfs_open_owner_destroy(struct nfs_open_owner *noop)
1858{
1859	if (noop->noo_cred)
1860		kauth_cred_unref(&noop->noo_cred);
1861	lck_mtx_destroy(&noop->noo_lock, nfs_open_grp);
1862	FREE(noop, M_TEMP);
1863}
1864
1865/*
1866 * acquire a reference count on an open owner
1867 */
1868void
1869nfs_open_owner_ref(struct nfs_open_owner *noop)
1870{
1871	lck_mtx_lock(&noop->noo_lock);
1872	noop->noo_refcnt++;
1873	lck_mtx_unlock(&noop->noo_lock);
1874}
1875
1876/*
1877 * drop a reference count on an open owner and destroy it if
1878 * it is no longer referenced and no longer on the mount's list.
1879 */
1880void
1881nfs_open_owner_rele(struct nfs_open_owner *noop)
1882{
1883	lck_mtx_lock(&noop->noo_lock);
1884	if (noop->noo_refcnt < 1)
1885		panic("nfs_open_owner_rele: no refcnt");
1886	noop->noo_refcnt--;
1887	if (!noop->noo_refcnt && (noop->noo_flags & NFS_OPEN_OWNER_BUSY))
1888		panic("nfs_open_owner_rele: busy");
1889	/* XXX we may potentially want to clean up idle/unused open owner structures */
1890	if (noop->noo_refcnt || (noop->noo_flags & NFS_OPEN_OWNER_LINK)) {
1891		lck_mtx_unlock(&noop->noo_lock);
1892		return;
1893	}
1894	/* owner is no longer referenced or linked to mount, so destroy it */
1895	lck_mtx_unlock(&noop->noo_lock);
1896	nfs_open_owner_destroy(noop);
1897}
1898
1899/*
1900 * Mark an open owner as busy because we are about to
1901 * start an operation that uses and updates open owner state.
1902 */
1903int
1904nfs_open_owner_set_busy(struct nfs_open_owner *noop, thread_t thd)
1905{
1906	struct nfsmount *nmp;
1907	struct timespec ts = {2, 0};
1908	int error = 0, slpflag;
1909
1910	nmp = noop->noo_mount;
1911	if (nfs_mount_gone(nmp))
1912		return (ENXIO);
1913	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
1914
1915	lck_mtx_lock(&noop->noo_lock);
1916	while (noop->noo_flags & NFS_OPEN_OWNER_BUSY) {
1917		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
1918			break;
1919		noop->noo_flags |= NFS_OPEN_OWNER_WANT;
1920		msleep(noop, &noop->noo_lock, slpflag, "nfs_open_owner_set_busy", &ts);
1921		slpflag = 0;
1922	}
1923	if (!error)
1924		noop->noo_flags |= NFS_OPEN_OWNER_BUSY;
1925	lck_mtx_unlock(&noop->noo_lock);
1926
1927	return (error);
1928}
1929
1930/*
1931 * Clear the busy flag on an open owner and wake up anyone waiting
1932 * to mark it busy.
1933 */
1934void
1935nfs_open_owner_clear_busy(struct nfs_open_owner *noop)
1936{
1937	int wanted;
1938
1939	lck_mtx_lock(&noop->noo_lock);
1940	if (!(noop->noo_flags & NFS_OPEN_OWNER_BUSY))
1941		panic("nfs_open_owner_clear_busy");
1942	wanted = (noop->noo_flags & NFS_OPEN_OWNER_WANT);
1943	noop->noo_flags &= ~(NFS_OPEN_OWNER_BUSY|NFS_OPEN_OWNER_WANT);
1944	lck_mtx_unlock(&noop->noo_lock);
1945	if (wanted)
1946		wakeup(noop);
1947}
1948
1949/*
1950 * Given an open/lock owner and an error code, increment the
1951 * sequence ID if appropriate.
1952 */
1953void
1954nfs_owner_seqid_increment(struct nfs_open_owner *noop, struct nfs_lock_owner *nlop, int error)
1955{
1956	switch (error) {
1957	case NFSERR_STALE_CLIENTID:
1958	case NFSERR_STALE_STATEID:
1959	case NFSERR_OLD_STATEID:
1960	case NFSERR_BAD_STATEID:
1961	case NFSERR_BAD_SEQID:
1962	case NFSERR_BADXDR:
1963	case NFSERR_RESOURCE:
1964	case NFSERR_NOFILEHANDLE:
1965		/* do not increment the open seqid on these errors */
1966		return;
1967	}
1968	if (noop)
1969		noop->noo_seqid++;
1970	if (nlop)
1971		nlop->nlo_seqid++;
1972}
1973
1974/*
1975 * Search a node's open file list for any conflicts with this request.
1976 * Also find this open owner's open file structure.
1977 * If not found and "alloc" is set, then allocate one.
1978 */
1979int
1980nfs_open_file_find(
1981	nfsnode_t np,
1982	struct nfs_open_owner *noop,
1983	struct nfs_open_file **nofpp,
1984	uint32_t accessMode,
1985	uint32_t denyMode,
1986	int alloc)
1987{
1988	*nofpp = NULL;
1989	return nfs_open_file_find_internal(np, noop, nofpp, accessMode, denyMode, alloc);
1990}
1991
1992/*
1993 * Internally, allow using a provisional nodeless nofp (passed in via *nofpp)
1994 * if an existing one is not found.  This is used in "create" scenarios to
1995 * officially add the provisional nofp to the node once the node is created.
1996 */
1997int
1998nfs_open_file_find_internal(
1999	nfsnode_t np,
2000	struct nfs_open_owner *noop,
2001	struct nfs_open_file **nofpp,
2002	uint32_t accessMode,
2003	uint32_t denyMode,
2004	int alloc)
2005{
2006	struct nfs_open_file *nofp = NULL, *nofp2, *newnofp = NULL;
2007
2008	if (!np)
2009		goto alloc;
2010tryagain:
2011	lck_mtx_lock(&np->n_openlock);
2012	TAILQ_FOREACH(nofp2, &np->n_opens, nof_link) {
2013		if (nofp2->nof_owner == noop) {
2014			nofp = nofp2;
2015			if (!accessMode)
2016				break;
2017		}
2018		if ((accessMode & nofp2->nof_deny) || (denyMode & nofp2->nof_access)) {
2019			/* This request conflicts with an existing open on this client. */
2020			lck_mtx_unlock(&np->n_openlock);
2021			return (EACCES);
2022		}
2023	}
2024
2025	/*
2026	 * If this open owner doesn't have an open
2027	 * file structure yet, we create one for it.
2028	 */
2029	if (!nofp && !*nofpp && !newnofp && alloc) {
2030		lck_mtx_unlock(&np->n_openlock);
2031alloc:
2032		MALLOC(newnofp, struct nfs_open_file *, sizeof(struct nfs_open_file), M_TEMP, M_WAITOK);
2033		if (!newnofp)
2034			return (ENOMEM);
2035		bzero(newnofp, sizeof(*newnofp));
2036		lck_mtx_init(&newnofp->nof_lock, nfs_open_grp, LCK_ATTR_NULL);
2037		newnofp->nof_owner = noop;
2038		nfs_open_owner_ref(noop);
2039		newnofp->nof_np = np;
2040		lck_mtx_lock(&noop->noo_lock);
2041		TAILQ_INSERT_HEAD(&noop->noo_opens, newnofp, nof_oolink);
2042		lck_mtx_unlock(&noop->noo_lock);
2043		if (np)
2044			goto tryagain;
2045	}
2046	if (!nofp) {
2047		if (*nofpp) {
2048			(*nofpp)->nof_np = np;
2049			nofp = *nofpp;
2050		} else {
2051			nofp = newnofp;
2052		}
2053		if (nofp && np)
2054			TAILQ_INSERT_HEAD(&np->n_opens, nofp, nof_link);
2055	}
2056	if (np)
2057		lck_mtx_unlock(&np->n_openlock);
2058
2059	if (alloc && newnofp && (nofp != newnofp))
2060		nfs_open_file_destroy(newnofp);
2061
2062	*nofpp = nofp;
2063	return (nofp ? 0 : ESRCH);
2064}
2065
2066/*
2067 * Destroy an open file structure.
2068 */
2069void
2070nfs_open_file_destroy(struct nfs_open_file *nofp)
2071{
2072	lck_mtx_lock(&nofp->nof_owner->noo_lock);
2073	TAILQ_REMOVE(&nofp->nof_owner->noo_opens, nofp, nof_oolink);
2074	lck_mtx_unlock(&nofp->nof_owner->noo_lock);
2075	nfs_open_owner_rele(nofp->nof_owner);
2076	lck_mtx_destroy(&nofp->nof_lock, nfs_open_grp);
2077	FREE(nofp, M_TEMP);
2078}
2079
2080/*
2081 * Mark an open file as busy because we are about to
2082 * start an operation that uses and updates open file state.
2083 */
2084int
2085nfs_open_file_set_busy(struct nfs_open_file *nofp, thread_t thd)
2086{
2087	struct nfsmount *nmp;
2088	struct timespec ts = {2, 0};
2089	int error = 0, slpflag;
2090
2091	nmp = nofp->nof_owner->noo_mount;
2092	if (nfs_mount_gone(nmp))
2093		return (ENXIO);
2094	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
2095
2096	lck_mtx_lock(&nofp->nof_lock);
2097	while (nofp->nof_flags & NFS_OPEN_FILE_BUSY) {
2098		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
2099			break;
2100		nofp->nof_flags |= NFS_OPEN_FILE_WANT;
2101		msleep(nofp, &nofp->nof_lock, slpflag, "nfs_open_file_set_busy", &ts);
2102		slpflag = 0;
2103	}
2104	if (!error)
2105		nofp->nof_flags |= NFS_OPEN_FILE_BUSY;
2106	lck_mtx_unlock(&nofp->nof_lock);
2107
2108	return (error);
2109}
2110
2111/*
2112 * Clear the busy flag on an open file and wake up anyone waiting
2113 * to mark it busy.
2114 */
2115void
2116nfs_open_file_clear_busy(struct nfs_open_file *nofp)
2117{
2118	int wanted;
2119
2120	lck_mtx_lock(&nofp->nof_lock);
2121	if (!(nofp->nof_flags & NFS_OPEN_FILE_BUSY))
2122		panic("nfs_open_file_clear_busy");
2123	wanted = (nofp->nof_flags & NFS_OPEN_FILE_WANT);
2124	nofp->nof_flags &= ~(NFS_OPEN_FILE_BUSY|NFS_OPEN_FILE_WANT);
2125	lck_mtx_unlock(&nofp->nof_lock);
2126	if (wanted)
2127		wakeup(nofp);
2128}
2129
2130/*
2131 * Add the open state for the given access/deny modes to this open file.
2132 */
2133void
2134nfs_open_file_add_open(struct nfs_open_file *nofp, uint32_t accessMode, uint32_t denyMode, int delegated)
2135{
2136	lck_mtx_lock(&nofp->nof_lock);
2137	nofp->nof_access |= accessMode;
2138	nofp->nof_deny |= denyMode;
2139
2140	if (delegated) {
2141		if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
2142			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2143				nofp->nof_d_r++;
2144			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2145				nofp->nof_d_w++;
2146			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2147				nofp->nof_d_rw++;
2148		} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
2149			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2150				nofp->nof_d_r_dw++;
2151			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2152				nofp->nof_d_w_dw++;
2153			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2154				nofp->nof_d_rw_dw++;
2155		} else { /* NFS_OPEN_SHARE_DENY_BOTH */
2156			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2157				nofp->nof_d_r_drw++;
2158			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2159				nofp->nof_d_w_drw++;
2160			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2161				nofp->nof_d_rw_drw++;
2162		}
2163	} else {
2164		if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
2165			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2166				nofp->nof_r++;
2167			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2168				nofp->nof_w++;
2169			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2170				nofp->nof_rw++;
2171		} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
2172			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2173				nofp->nof_r_dw++;
2174			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2175				nofp->nof_w_dw++;
2176			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2177				nofp->nof_rw_dw++;
2178		} else { /* NFS_OPEN_SHARE_DENY_BOTH */
2179			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2180				nofp->nof_r_drw++;
2181			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2182				nofp->nof_w_drw++;
2183			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2184				nofp->nof_rw_drw++;
2185		}
2186	}
2187
2188	nofp->nof_opencnt++;
2189	lck_mtx_unlock(&nofp->nof_lock);
2190}
2191
2192/*
2193 * Find which particular open combo will be closed and report what
2194 * the new modes will be and whether the open was delegated.
2195 */
2196void
2197nfs_open_file_remove_open_find(
2198	struct nfs_open_file *nofp,
2199	uint32_t accessMode,
2200	uint32_t denyMode,
2201	uint32_t *newAccessMode,
2202	uint32_t *newDenyMode,
2203	int *delegated)
2204{
2205	/*
2206	 * Calculate new modes: a mode bit gets removed when there's only
2207	 * one count in all the corresponding counts
2208	 */
2209	*newAccessMode = nofp->nof_access;
2210	*newDenyMode = nofp->nof_deny;
2211
2212	if ((accessMode & NFS_OPEN_SHARE_ACCESS_READ) &&
2213	    (nofp->nof_access & NFS_OPEN_SHARE_ACCESS_READ) &&
2214	    ((nofp->nof_r + nofp->nof_d_r +
2215	      nofp->nof_rw + nofp->nof_d_rw +
2216	      nofp->nof_r_dw + nofp->nof_d_r_dw +
2217	      nofp->nof_rw_dw + nofp->nof_d_rw_dw +
2218	      nofp->nof_r_drw + nofp->nof_d_r_drw +
2219	      nofp->nof_rw_dw + nofp->nof_d_rw_dw) == 1))
2220		*newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
2221	if ((accessMode & NFS_OPEN_SHARE_ACCESS_WRITE) &&
2222	    (nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE) &&
2223	    ((nofp->nof_w + nofp->nof_d_w +
2224	      nofp->nof_rw + nofp->nof_d_rw +
2225	      nofp->nof_w_dw + nofp->nof_d_w_dw +
2226	      nofp->nof_rw_dw + nofp->nof_d_rw_dw +
2227	      nofp->nof_w_drw + nofp->nof_d_w_drw +
2228	      nofp->nof_rw_dw + nofp->nof_d_rw_dw) == 1))
2229		*newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_WRITE;
2230	if ((denyMode & NFS_OPEN_SHARE_DENY_READ) &&
2231	    (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ) &&
2232	    ((nofp->nof_r_drw + nofp->nof_d_r_drw +
2233	      nofp->nof_w_drw + nofp->nof_d_w_drw +
2234	      nofp->nof_rw_drw + nofp->nof_d_rw_drw) == 1))
2235		*newDenyMode &= ~NFS_OPEN_SHARE_DENY_READ;
2236	if ((denyMode & NFS_OPEN_SHARE_DENY_WRITE) &&
2237	    (nofp->nof_deny & NFS_OPEN_SHARE_DENY_WRITE) &&
2238	    ((nofp->nof_r_drw + nofp->nof_d_r_drw +
2239	      nofp->nof_w_drw + nofp->nof_d_w_drw +
2240	      nofp->nof_rw_drw + nofp->nof_d_rw_drw +
2241	      nofp->nof_r_dw + nofp->nof_d_r_dw +
2242	      nofp->nof_w_dw + nofp->nof_d_w_dw +
2243	      nofp->nof_rw_dw + nofp->nof_d_rw_dw) == 1))
2244		*newDenyMode &= ~NFS_OPEN_SHARE_DENY_WRITE;
2245
2246	/* Find the corresponding open access/deny mode counter. */
2247	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
2248		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2249			*delegated = (nofp->nof_d_r != 0);
2250		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2251			*delegated = (nofp->nof_d_w != 0);
2252		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2253			*delegated = (nofp->nof_d_rw != 0);
2254		else
2255			*delegated = 0;
2256	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
2257		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2258			*delegated = (nofp->nof_d_r_dw != 0);
2259		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2260			*delegated = (nofp->nof_d_w_dw != 0);
2261		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2262			*delegated = (nofp->nof_d_rw_dw != 0);
2263		else
2264			*delegated = 0;
2265	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
2266		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2267			*delegated = (nofp->nof_d_r_drw != 0);
2268		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2269			*delegated = (nofp->nof_d_w_drw != 0);
2270		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2271			*delegated = (nofp->nof_d_rw_drw != 0);
2272		else
2273			*delegated = 0;
2274	}
2275}
2276
2277/*
2278 * Remove the open state for the given access/deny modes to this open file.
2279 */
2280void
2281nfs_open_file_remove_open(struct nfs_open_file *nofp, uint32_t accessMode, uint32_t denyMode)
2282{
2283	uint32_t newAccessMode, newDenyMode;
2284	int delegated = 0;
2285
2286	lck_mtx_lock(&nofp->nof_lock);
2287	nfs_open_file_remove_open_find(nofp, accessMode, denyMode, &newAccessMode, &newDenyMode, &delegated);
2288
2289	/* Decrement the corresponding open access/deny mode counter. */
2290	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
2291		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
2292			if (delegated) {
2293				if (nofp->nof_d_r == 0)
2294					NP(nofp->nof_np, "nfs: open(R) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2295				else
2296					nofp->nof_d_r--;
2297			} else {
2298				if (nofp->nof_r == 0)
2299					NP(nofp->nof_np, "nfs: open(R) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2300				else
2301					nofp->nof_r--;
2302			}
2303		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
2304			if (delegated) {
2305				if (nofp->nof_d_w == 0)
2306					NP(nofp->nof_np, "nfs: open(W) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2307				else
2308					nofp->nof_d_w--;
2309			} else {
2310				if (nofp->nof_w == 0)
2311					NP(nofp->nof_np, "nfs: open(W) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2312				else
2313					nofp->nof_w--;
2314			}
2315		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
2316			if (delegated) {
2317				if (nofp->nof_d_rw == 0)
2318					NP(nofp->nof_np, "nfs: open(RW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2319				else
2320					nofp->nof_d_rw--;
2321			} else {
2322				if (nofp->nof_rw == 0)
2323					NP(nofp->nof_np, "nfs: open(RW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2324				else
2325					nofp->nof_rw--;
2326			}
2327		}
2328	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
2329		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
2330			if (delegated) {
2331				if (nofp->nof_d_r_dw == 0)
2332					NP(nofp->nof_np, "nfs: open(R,DW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2333				else
2334					nofp->nof_d_r_dw--;
2335			} else {
2336				if (nofp->nof_r_dw == 0)
2337					NP(nofp->nof_np, "nfs: open(R,DW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2338				else
2339					nofp->nof_r_dw--;
2340			}
2341		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
2342			if (delegated) {
2343				if (nofp->nof_d_w_dw == 0)
2344					NP(nofp->nof_np, "nfs: open(W,DW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2345				else
2346					nofp->nof_d_w_dw--;
2347			} else {
2348				if (nofp->nof_w_dw == 0)
2349					NP(nofp->nof_np, "nfs: open(W,DW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2350				else
2351					nofp->nof_w_dw--;
2352			}
2353		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
2354			if (delegated) {
2355				if (nofp->nof_d_rw_dw == 0)
2356					NP(nofp->nof_np, "nfs: open(RW,DW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2357				else
2358					nofp->nof_d_rw_dw--;
2359			} else {
2360				if (nofp->nof_rw_dw == 0)
2361					NP(nofp->nof_np, "nfs: open(RW,DW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2362				else
2363					nofp->nof_rw_dw--;
2364			}
2365		}
2366	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
2367		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
2368			if (delegated) {
2369				if (nofp->nof_d_r_drw == 0)
2370					NP(nofp->nof_np, "nfs: open(R,DRW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2371				else
2372					nofp->nof_d_r_drw--;
2373			} else {
2374				if (nofp->nof_r_drw == 0)
2375					NP(nofp->nof_np, "nfs: open(R,DRW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2376				else
2377					nofp->nof_r_drw--;
2378			}
2379		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
2380			if (delegated) {
2381				if (nofp->nof_d_w_drw == 0)
2382					NP(nofp->nof_np, "nfs: open(W,DRW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2383				else
2384					nofp->nof_d_w_drw--;
2385			} else {
2386				if (nofp->nof_w_drw == 0)
2387					NP(nofp->nof_np, "nfs: open(W,DRW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2388				else
2389					nofp->nof_w_drw--;
2390			}
2391		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
2392			if (delegated) {
2393				if (nofp->nof_d_rw_drw == 0)
2394					NP(nofp->nof_np, "nfs: open(RW,DRW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2395				else
2396					nofp->nof_d_rw_drw--;
2397			} else {
2398				if (nofp->nof_rw_drw == 0)
2399					NP(nofp->nof_np, "nfs: open(RW,DRW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2400				else
2401					nofp->nof_rw_drw--;
2402			}
2403		}
2404	}
2405
2406	/* update the modes */
2407	nofp->nof_access = newAccessMode;
2408	nofp->nof_deny = newDenyMode;
2409	nofp->nof_opencnt--;
2410	lck_mtx_unlock(&nofp->nof_lock);
2411}
2412
2413
2414/*
2415 * Get the current (delegation, lock, open, default) stateid for this node.
2416 * If node has a delegation, use that stateid.
2417 * If pid has a lock, use the lockowner's stateid.
2418 * Or use the open file's stateid.
2419 * If no open file, use a default stateid of all ones.
2420 */
2421void
2422nfs_get_stateid(nfsnode_t np, thread_t thd, kauth_cred_t cred, nfs_stateid *sid)
2423{
2424	struct nfsmount *nmp = NFSTONMP(np);
2425	proc_t p = thd ? get_bsdthreadtask_info(thd) : current_proc();  // XXX async I/O requests don't have a thread
2426	struct nfs_open_owner *noop = NULL;
2427	struct nfs_open_file *nofp = NULL;
2428	struct nfs_lock_owner *nlop = NULL;
2429	nfs_stateid *s = NULL;
2430
2431	if (np->n_openflags & N_DELEG_MASK) {
2432		s = &np->n_dstateid;
2433	} else {
2434		if (p)
2435			nlop = nfs_lock_owner_find(np, p, 0);
2436		if (nlop && !TAILQ_EMPTY(&nlop->nlo_locks)) {
2437			/* we hold locks, use lock stateid */
2438			s = &nlop->nlo_stateid;
2439		} else if (((noop = nfs_open_owner_find(nmp, cred, 0))) &&
2440			 (nfs_open_file_find(np, noop, &nofp, 0, 0, 0) == 0) &&
2441			 !(nofp->nof_flags & NFS_OPEN_FILE_LOST) &&
2442			 nofp->nof_access) {
2443			/* we (should) have the file open, use open stateid */
2444			if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)
2445				nfs4_reopen(nofp, thd);
2446			if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST))
2447				s = &nofp->nof_stateid;
2448		}
2449	}
2450
2451	if (s) {
2452		sid->seqid = s->seqid;
2453		sid->other[0] = s->other[0];
2454		sid->other[1] = s->other[1];
2455		sid->other[2] = s->other[2];
2456	} else {
2457		/* named attributes may not have a stateid for reads, so don't complain for them */
2458		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
2459			NP(np, "nfs_get_stateid: no stateid");
2460		sid->seqid = sid->other[0] = sid->other[1] = sid->other[2] = 0xffffffff;
2461	}
2462	if (nlop)
2463		nfs_lock_owner_rele(nlop);
2464	if (noop)
2465		nfs_open_owner_rele(noop);
2466}
2467
2468
2469/*
2470 * When we have a delegation, we may be able to perform the OPEN locally.
2471 * Perform the OPEN by checking the delegation ACE and/or checking via ACCESS.
2472 */
2473int
2474nfs4_open_delegated(
2475	nfsnode_t np,
2476	struct nfs_open_file *nofp,
2477	uint32_t accessMode,
2478	uint32_t denyMode,
2479	vfs_context_t ctx)
2480{
2481	int error = 0, ismember, readtoo = 0, authorized = 0;
2482	uint32_t action;
2483	struct kauth_acl_eval eval;
2484	kauth_cred_t cred = vfs_context_ucred(ctx);
2485
2486	if (!(accessMode & NFS_OPEN_SHARE_ACCESS_READ)) {
2487		/*
2488		 * Try to open it for read access too,
2489		 * so the buffer cache can read data.
2490		 */
2491		readtoo = 1;
2492		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
2493	}
2494
2495tryagain:
2496	action = 0;
2497	if (accessMode & NFS_OPEN_SHARE_ACCESS_READ)
2498		action |= KAUTH_VNODE_READ_DATA;
2499	if (accessMode & NFS_OPEN_SHARE_ACCESS_WRITE)
2500		action |= KAUTH_VNODE_WRITE_DATA;
2501
2502	/* evaluate ACE (if we have one) */
2503	if (np->n_dace.ace_flags) {
2504		eval.ae_requested = action;
2505		eval.ae_acl = &np->n_dace;
2506		eval.ae_count = 1;
2507		eval.ae_options = 0;
2508		if (np->n_vattr.nva_uid == kauth_cred_getuid(cred))
2509			eval.ae_options |= KAUTH_AEVAL_IS_OWNER;
2510		error = kauth_cred_ismember_gid(cred, np->n_vattr.nva_gid, &ismember);
2511		if (!error && ismember)
2512			eval.ae_options |= KAUTH_AEVAL_IN_GROUP;
2513
2514		eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS;
2515		eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS;
2516		eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS;
2517		eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS;
2518
2519		error = kauth_acl_evaluate(cred, &eval);
2520
2521		if (!error && (eval.ae_result == KAUTH_RESULT_ALLOW))
2522			authorized = 1;
2523	}
2524
2525	if (!authorized) {
2526		/* need to ask the server via ACCESS */
2527		struct vnop_access_args naa;
2528		naa.a_desc = &vnop_access_desc;
2529		naa.a_vp = NFSTOV(np);
2530		naa.a_action = action;
2531		naa.a_context = ctx;
2532		if (!(error = nfs_vnop_access(&naa)))
2533			authorized = 1;
2534	}
2535
2536	if (!authorized) {
2537		if (readtoo) {
2538			/* try again without the extra read access */
2539			accessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
2540			readtoo = 0;
2541			goto tryagain;
2542		}
2543		return (error ? error : EACCES);
2544	}
2545
2546	nfs_open_file_add_open(nofp, accessMode, denyMode, 1);
2547
2548	return (0);
2549}
2550
2551
2552/*
2553 * Open a file with the given access/deny modes.
2554 *
2555 * If we have a delegation, we may be able to handle the open locally.
2556 * Otherwise, we will always send the open RPC even if this open's mode is
2557 * a subset of all the existing opens.  This makes sure that we will always
2558 * be able to do a downgrade to any of the open modes.
2559 *
2560 * Note: local conflicts should have already been checked in nfs_open_file_find().
2561 */
2562int
2563nfs4_open(
2564	nfsnode_t np,
2565	struct nfs_open_file *nofp,
2566	uint32_t accessMode,
2567	uint32_t denyMode,
2568	vfs_context_t ctx)
2569{
2570	vnode_t vp = NFSTOV(np);
2571	vnode_t dvp = NULL;
2572	struct componentname cn;
2573	const char *vname = NULL;
2574	size_t namelen;
2575	char smallname[128];
2576	char *filename = NULL;
2577	int error = 0, readtoo = 0;
2578
2579	/*
2580	 * We can handle the OPEN ourselves if we have a delegation,
2581	 * unless it's a read delegation and the open is asking for
2582	 * either write access or deny read.  We also don't bother to
2583	 * use the delegation if it's being returned.
2584	 */
2585	if (np->n_openflags & N_DELEG_MASK) {
2586		if ((error = nfs_open_state_set_busy(np, vfs_context_thread(ctx))))
2587			return (error);
2588		if ((np->n_openflags & N_DELEG_MASK) && !(np->n_openflags & N_DELEG_RETURN) &&
2589		    (((np->n_openflags & N_DELEG_MASK) == N_DELEG_WRITE) ||
2590		     (!(accessMode & NFS_OPEN_SHARE_ACCESS_WRITE) && !(denyMode & NFS_OPEN_SHARE_DENY_READ)))) {
2591			error = nfs4_open_delegated(np, nofp, accessMode, denyMode, ctx);
2592			nfs_open_state_clear_busy(np);
2593			return (error);
2594		}
2595		nfs_open_state_clear_busy(np);
2596	}
2597
2598	/*
2599	 * [sigh] We can't trust VFS to get the parent right for named
2600	 * attribute nodes.  (It likes to reparent the nodes after we've
2601	 * created them.)  Luckily we can probably get the right parent
2602	 * from the n_parent we have stashed away.
2603	 */
2604	if ((np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) &&
2605	    (((dvp = np->n_parent)) && (error = vnode_get(dvp))))
2606		dvp = NULL;
2607	if (!dvp)
2608		dvp = vnode_getparent(vp);
2609	vname = vnode_getname(vp);
2610	if (!dvp || !vname) {
2611		if (!error)
2612			error = EIO;
2613		goto out;
2614	}
2615	filename = &smallname[0];
2616	namelen = snprintf(filename, sizeof(smallname), "%s", vname);
2617	if (namelen >= sizeof(smallname)) {
2618		MALLOC(filename, char *, namelen+1, M_TEMP, M_WAITOK);
2619		if (!filename) {
2620			error = ENOMEM;
2621			goto out;
2622		}
2623		snprintf(filename, namelen+1, "%s", vname);
2624	}
2625	bzero(&cn, sizeof(cn));
2626	cn.cn_nameptr = filename;
2627	cn.cn_namelen = namelen;
2628
2629	if (!(accessMode & NFS_OPEN_SHARE_ACCESS_READ)) {
2630		/*
2631		 * Try to open it for read access too,
2632		 * so the buffer cache can read data.
2633		 */
2634		readtoo = 1;
2635		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
2636	}
2637tryagain:
2638	error = nfs4_open_rpc(nofp, ctx, &cn, NULL, dvp, &vp, NFS_OPEN_NOCREATE, accessMode, denyMode);
2639	if (error) {
2640		if (!nfs_mount_state_error_should_restart(error) &&
2641		    (error != EINTR) && (error != ERESTART) && readtoo) {
2642			/* try again without the extra read access */
2643			accessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
2644			readtoo = 0;
2645			goto tryagain;
2646		}
2647		goto out;
2648	}
2649	nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
2650out:
2651	if (filename && (filename != &smallname[0]))
2652		FREE(filename, M_TEMP);
2653	if (vname)
2654		vnode_putname(vname);
2655	if (dvp != NULLVP)
2656		vnode_put(dvp);
2657	return (error);
2658}
2659
2660int
2661nfs_vnop_mmap(
2662	struct vnop_mmap_args /* {
2663		struct vnodeop_desc *a_desc;
2664		vnode_t a_vp;
2665		int a_fflags;
2666		vfs_context_t a_context;
2667	} */ *ap)
2668{
2669	vfs_context_t ctx = ap->a_context;
2670	vnode_t vp = ap->a_vp;
2671	nfsnode_t np = VTONFS(vp);
2672	int error = 0, accessMode, denyMode, delegated;
2673	struct nfsmount *nmp;
2674	struct nfs_open_owner *noop = NULL;
2675	struct nfs_open_file *nofp = NULL;
2676
2677	nmp = VTONMP(vp);
2678	if (nfs_mount_gone(nmp))
2679		return (ENXIO);
2680
2681	if (!vnode_isreg(vp) || !(ap->a_fflags & (PROT_READ|PROT_WRITE)))
2682		return (EINVAL);
2683	if (np->n_flag & NREVOKE)
2684		return (EIO);
2685
2686	/*
2687	 * fflags contains some combination of: PROT_READ, PROT_WRITE
2688	 * Since it's not possible to mmap() without having the file open for reading,
2689	 * read access is always there (regardless if PROT_READ is not set).
2690	 */
2691	accessMode = NFS_OPEN_SHARE_ACCESS_READ;
2692	if (ap->a_fflags & PROT_WRITE)
2693		accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
2694	denyMode = NFS_OPEN_SHARE_DENY_NONE;
2695
2696	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
2697	if (!noop)
2698		return (ENOMEM);
2699
2700restart:
2701	error = nfs_mount_state_in_use_start(nmp, NULL);
2702	if (error) {
2703		nfs_open_owner_rele(noop);
2704		return (error);
2705	}
2706	if (np->n_flag & NREVOKE) {
2707		error = EIO;
2708		nfs_mount_state_in_use_end(nmp, 0);
2709		nfs_open_owner_rele(noop);
2710		return (error);
2711	}
2712
2713	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
2714	if (error || (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST))) {
2715		NP(np, "nfs_vnop_mmap: no open file for owner, error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
2716		error = EPERM;
2717	}
2718	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
2719		nfs_mount_state_in_use_end(nmp, 0);
2720		error = nfs4_reopen(nofp, NULL);
2721		nofp = NULL;
2722		if (!error)
2723			goto restart;
2724	}
2725	if (!error)
2726		error = nfs_open_file_set_busy(nofp, NULL);
2727	if (error) {
2728		nofp = NULL;
2729		goto out;
2730	}
2731
2732	/*
2733	 * The open reference for mmap must mirror an existing open because
2734	 * we may need to reclaim it after the file is closed.
2735	 * So grab another open count matching the accessMode passed in.
2736	 * If we already had an mmap open, prefer read/write without deny mode.
2737	 * This means we may have to drop the current mmap open first.
2738	 */
2739
2740	if (!nofp->nof_access) {
2741		if (accessMode != NFS_OPEN_SHARE_ACCESS_READ) {
2742			/* not asking for just read access -> fail */
2743			error = EPERM;
2744			goto out;
2745		}
2746		/* we don't have the file open, so open it for read access */
2747		if (nmp->nm_vers < NFS_VER4) {
2748			/* NFS v2/v3 opens are always allowed - so just add it. */
2749			nfs_open_file_add_open(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, 0);
2750			error = 0;
2751		} else {
2752			error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
2753		}
2754		if (!error)
2755			nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE;
2756		if (error)
2757			goto out;
2758	}
2759
2760	/* determine deny mode for open */
2761	if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
2762		if (nofp->nof_d_rw || nofp->nof_d_rw_dw || nofp->nof_d_rw_drw) {
2763			delegated = 1;
2764			if (nofp->nof_d_rw)
2765				denyMode = NFS_OPEN_SHARE_DENY_NONE;
2766			else if (nofp->nof_d_rw_dw)
2767				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
2768			else if (nofp->nof_d_rw_drw)
2769				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
2770		} else if (nofp->nof_rw || nofp->nof_rw_dw || nofp->nof_rw_drw) {
2771			delegated = 0;
2772			if (nofp->nof_rw)
2773				denyMode = NFS_OPEN_SHARE_DENY_NONE;
2774			else if (nofp->nof_rw_dw)
2775				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
2776			else if (nofp->nof_rw_drw)
2777				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
2778		} else {
2779			error = EPERM;
2780		}
2781	} else { /* NFS_OPEN_SHARE_ACCESS_READ */
2782		if (nofp->nof_d_r || nofp->nof_d_r_dw || nofp->nof_d_r_drw) {
2783			delegated = 1;
2784			if (nofp->nof_d_r)
2785				denyMode = NFS_OPEN_SHARE_DENY_NONE;
2786			else if (nofp->nof_d_r_dw)
2787				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
2788			else if (nofp->nof_d_r_drw)
2789				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
2790		} else if (nofp->nof_r || nofp->nof_r_dw || nofp->nof_r_drw) {
2791			delegated = 0;
2792			if (nofp->nof_r)
2793				denyMode = NFS_OPEN_SHARE_DENY_NONE;
2794			else if (nofp->nof_r_dw)
2795				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
2796			else if (nofp->nof_r_drw)
2797				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
2798		} else {
2799			error = EPERM;
2800		}
2801	}
2802	if (error) /* mmap mode without proper open mode */
2803		goto out;
2804
2805	/*
2806	 * If the existing mmap access is more than the new access OR the
2807	 * existing access is the same and the existing deny mode is less,
2808	 * then we'll stick with the existing mmap open mode.
2809	 */
2810	if ((nofp->nof_mmap_access > accessMode) ||
2811	    ((nofp->nof_mmap_access == accessMode) && (nofp->nof_mmap_deny <= denyMode)))
2812		goto out;
2813
2814	/* update mmap open mode */
2815	if (nofp->nof_mmap_access) {
2816		error = nfs_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx);
2817		if (error) {
2818			if (!nfs_mount_state_error_should_restart(error))
2819				NP(np, "nfs_vnop_mmap: close of previous mmap mode failed: %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
2820			NP(np, "nfs_vnop_mmap: update, close error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
2821			goto out;
2822		}
2823		nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
2824	}
2825
2826	nfs_open_file_add_open(nofp, accessMode, denyMode, delegated);
2827	nofp->nof_mmap_access = accessMode;
2828	nofp->nof_mmap_deny = denyMode;
2829
2830out:
2831	if (nofp)
2832		nfs_open_file_clear_busy(nofp);
2833	if (nfs_mount_state_in_use_end(nmp, error)) {
2834		nofp = NULL;
2835		goto restart;
2836	}
2837	if (noop)
2838		nfs_open_owner_rele(noop);
2839
2840	if (!error) {
2841		int ismapped = 0;
2842		nfs_node_lock_force(np);
2843		if ((np->n_flag & NISMAPPED) == 0) {
2844			np->n_flag |= NISMAPPED;
2845			ismapped = 1;
2846		}
2847		nfs_node_unlock(np);
2848		if (ismapped) {
2849			lck_mtx_lock(&nmp->nm_lock);
2850			nmp->nm_state &= ~NFSSTA_SQUISHY;
2851			nmp->nm_curdeadtimeout = nmp->nm_deadtimeout;
2852			if (nmp->nm_curdeadtimeout <= 0)
2853				nmp->nm_deadto_start = 0;
2854			nmp->nm_mappers++;
2855			lck_mtx_unlock(&nmp->nm_lock);
2856		}
2857	}
2858
2859	return (error);
2860}
2861
2862
2863int
2864nfs_vnop_mnomap(
2865	struct vnop_mnomap_args /* {
2866		struct vnodeop_desc *a_desc;
2867		vnode_t a_vp;
2868		vfs_context_t a_context;
2869	} */ *ap)
2870{
2871	vfs_context_t ctx = ap->a_context;
2872	vnode_t vp = ap->a_vp;
2873	nfsnode_t np = VTONFS(vp);
2874	struct nfsmount *nmp;
2875	struct nfs_open_file *nofp = NULL;
2876	off_t size;
2877	int error;
2878	int is_mapped_flag = 0;
2879
2880	nmp = VTONMP(vp);
2881	if (nfs_mount_gone(nmp))
2882		return (ENXIO);
2883
2884	nfs_node_lock_force(np);
2885	if (np->n_flag & NISMAPPED) {
2886		is_mapped_flag = 1;
2887		np->n_flag &= ~NISMAPPED;
2888	}
2889	nfs_node_unlock(np);
2890	if (is_mapped_flag) {
2891		lck_mtx_lock(&nmp->nm_lock);
2892		if (nmp->nm_mappers)
2893			nmp->nm_mappers--;
2894		else
2895			NP(np, "nfs_vnop_mnomap: removing mmap reference from mount, but mount has no files mmapped");
2896		lck_mtx_unlock(&nmp->nm_lock);
2897	}
2898
2899	/* flush buffers/ubc before we drop the open (in case it's our last open) */
2900	nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
2901	if (UBCINFOEXISTS(vp) && (size = ubc_getsize(vp)))
2902		ubc_msync(vp, 0, size, NULL, UBC_PUSHALL | UBC_SYNC);
2903
2904	/* walk all open files and close all mmap opens */
2905loop:
2906	error = nfs_mount_state_in_use_start(nmp, NULL);
2907	if (error)
2908		return (error);
2909	lck_mtx_lock(&np->n_openlock);
2910	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
2911		if (!nofp->nof_mmap_access)
2912			continue;
2913		lck_mtx_unlock(&np->n_openlock);
2914		if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
2915			nfs_mount_state_in_use_end(nmp, 0);
2916			error = nfs4_reopen(nofp, NULL);
2917			if (!error)
2918				goto loop;
2919		}
2920		if (!error)
2921			error = nfs_open_file_set_busy(nofp, NULL);
2922		if (error) {
2923			lck_mtx_lock(&np->n_openlock);
2924			break;
2925		}
2926		if (nofp->nof_mmap_access) {
2927			error = nfs_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx);
2928			if (!nfs_mount_state_error_should_restart(error)) {
2929				if (error) /* not a state-operation-restarting error, so just clear the access */
2930					NP(np, "nfs_vnop_mnomap: close of mmap mode failed: %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
2931				nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
2932			}
2933			if (error)
2934				NP(np, "nfs_vnop_mnomap: error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
2935		}
2936		nfs_open_file_clear_busy(nofp);
2937		nfs_mount_state_in_use_end(nmp, error);
2938		goto loop;
2939	}
2940	lck_mtx_unlock(&np->n_openlock);
2941	nfs_mount_state_in_use_end(nmp, error);
2942	return (error);
2943}
2944
2945/*
2946 * Search a node's lock owner list for the owner for this process.
2947 * If not found and "alloc" is set, then allocate a new one.
2948 */
2949struct nfs_lock_owner *
2950nfs_lock_owner_find(nfsnode_t np, proc_t p, int alloc)
2951{
2952	pid_t pid = proc_pid(p);
2953	struct nfs_lock_owner *nlop, *newnlop = NULL;
2954
2955tryagain:
2956	lck_mtx_lock(&np->n_openlock);
2957	TAILQ_FOREACH(nlop, &np->n_lock_owners, nlo_link) {
2958		if (nlop->nlo_pid != pid)
2959			continue;
2960		if (timevalcmp(&nlop->nlo_pid_start, &p->p_start, ==))
2961			break;
2962		/* stale lock owner... reuse it if we can */
2963		if (nlop->nlo_refcnt) {
2964			TAILQ_REMOVE(&np->n_lock_owners, nlop, nlo_link);
2965			nlop->nlo_flags &= ~NFS_LOCK_OWNER_LINK;
2966			lck_mtx_unlock(&np->n_openlock);
2967			goto tryagain;
2968		}
2969		nlop->nlo_pid_start = p->p_start;
2970		nlop->nlo_seqid = 0;
2971		nlop->nlo_stategenid = 0;
2972		break;
2973	}
2974
2975	if (!nlop && !newnlop && alloc) {
2976		lck_mtx_unlock(&np->n_openlock);
2977		MALLOC(newnlop, struct nfs_lock_owner *, sizeof(struct nfs_lock_owner), M_TEMP, M_WAITOK);
2978		if (!newnlop)
2979			return (NULL);
2980		bzero(newnlop, sizeof(*newnlop));
2981		lck_mtx_init(&newnlop->nlo_lock, nfs_open_grp, LCK_ATTR_NULL);
2982		newnlop->nlo_pid = pid;
2983		newnlop->nlo_pid_start = p->p_start;
2984		newnlop->nlo_name = OSAddAtomic(1, &nfs_lock_owner_seqnum);
2985		TAILQ_INIT(&newnlop->nlo_locks);
2986		goto tryagain;
2987	}
2988	if (!nlop && newnlop) {
2989		newnlop->nlo_flags |= NFS_LOCK_OWNER_LINK;
2990		TAILQ_INSERT_HEAD(&np->n_lock_owners, newnlop, nlo_link);
2991		nlop = newnlop;
2992	}
2993	lck_mtx_unlock(&np->n_openlock);
2994
2995	if (newnlop && (nlop != newnlop))
2996		nfs_lock_owner_destroy(newnlop);
2997
2998	if (nlop)
2999		nfs_lock_owner_ref(nlop);
3000
3001	return (nlop);
3002}
3003
3004/*
3005 * destroy a lock owner that's no longer needed
3006 */
3007void
3008nfs_lock_owner_destroy(struct nfs_lock_owner *nlop)
3009{
3010	if (nlop->nlo_open_owner) {
3011		nfs_open_owner_rele(nlop->nlo_open_owner);
3012		nlop->nlo_open_owner = NULL;
3013	}
3014	lck_mtx_destroy(&nlop->nlo_lock, nfs_open_grp);
3015	FREE(nlop, M_TEMP);
3016}
3017
3018/*
3019 * acquire a reference count on a lock owner
3020 */
3021void
3022nfs_lock_owner_ref(struct nfs_lock_owner *nlop)
3023{
3024	lck_mtx_lock(&nlop->nlo_lock);
3025	nlop->nlo_refcnt++;
3026	lck_mtx_unlock(&nlop->nlo_lock);
3027}
3028
3029/*
3030 * drop a reference count on a lock owner and destroy it if
3031 * it is no longer referenced and no longer on the mount's list.
3032 */
3033void
3034nfs_lock_owner_rele(struct nfs_lock_owner *nlop)
3035{
3036	lck_mtx_lock(&nlop->nlo_lock);
3037	if (nlop->nlo_refcnt < 1)
3038		panic("nfs_lock_owner_rele: no refcnt");
3039	nlop->nlo_refcnt--;
3040	if (!nlop->nlo_refcnt && (nlop->nlo_flags & NFS_LOCK_OWNER_BUSY))
3041		panic("nfs_lock_owner_rele: busy");
3042	/* XXX we may potentially want to clean up idle/unused lock owner structures */
3043	if (nlop->nlo_refcnt || (nlop->nlo_flags & NFS_LOCK_OWNER_LINK)) {
3044		lck_mtx_unlock(&nlop->nlo_lock);
3045		return;
3046	}
3047	/* owner is no longer referenced or linked to mount, so destroy it */
3048	lck_mtx_unlock(&nlop->nlo_lock);
3049	nfs_lock_owner_destroy(nlop);
3050}
3051
3052/*
3053 * Mark a lock owner as busy because we are about to
3054 * start an operation that uses and updates lock owner state.
3055 */
3056int
3057nfs_lock_owner_set_busy(struct nfs_lock_owner *nlop, thread_t thd)
3058{
3059	struct nfsmount *nmp;
3060	struct timespec ts = {2, 0};
3061	int error = 0, slpflag;
3062
3063	nmp = nlop->nlo_open_owner->noo_mount;
3064	if (nfs_mount_gone(nmp))
3065		return (ENXIO);
3066	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
3067
3068	lck_mtx_lock(&nlop->nlo_lock);
3069	while (nlop->nlo_flags & NFS_LOCK_OWNER_BUSY) {
3070		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
3071			break;
3072		nlop->nlo_flags |= NFS_LOCK_OWNER_WANT;
3073		msleep(nlop, &nlop->nlo_lock, slpflag, "nfs_lock_owner_set_busy", &ts);
3074		slpflag = 0;
3075	}
3076	if (!error)
3077		nlop->nlo_flags |= NFS_LOCK_OWNER_BUSY;
3078	lck_mtx_unlock(&nlop->nlo_lock);
3079
3080	return (error);
3081}
3082
3083/*
3084 * Clear the busy flag on a lock owner and wake up anyone waiting
3085 * to mark it busy.
3086 */
3087void
3088nfs_lock_owner_clear_busy(struct nfs_lock_owner *nlop)
3089{
3090	int wanted;
3091
3092	lck_mtx_lock(&nlop->nlo_lock);
3093	if (!(nlop->nlo_flags & NFS_LOCK_OWNER_BUSY))
3094		panic("nfs_lock_owner_clear_busy");
3095	wanted = (nlop->nlo_flags & NFS_LOCK_OWNER_WANT);
3096	nlop->nlo_flags &= ~(NFS_LOCK_OWNER_BUSY|NFS_LOCK_OWNER_WANT);
3097	lck_mtx_unlock(&nlop->nlo_lock);
3098	if (wanted)
3099		wakeup(nlop);
3100}
3101
3102/*
3103 * Insert a held lock into a lock owner's sorted list.
3104 * (flock locks are always inserted at the head the list)
3105 */
3106void
3107nfs_lock_owner_insert_held_lock(struct nfs_lock_owner *nlop, struct nfs_file_lock *newnflp)
3108{
3109	struct nfs_file_lock *nflp;
3110
3111	/* insert new lock in lock owner's held lock list */
3112	lck_mtx_lock(&nlop->nlo_lock);
3113	if ((newnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK) {
3114		TAILQ_INSERT_HEAD(&nlop->nlo_locks, newnflp, nfl_lolink);
3115	} else {
3116		TAILQ_FOREACH(nflp, &nlop->nlo_locks, nfl_lolink) {
3117			if (newnflp->nfl_start < nflp->nfl_start)
3118				break;
3119		}
3120		if (nflp)
3121			TAILQ_INSERT_BEFORE(nflp, newnflp, nfl_lolink);
3122		else
3123			TAILQ_INSERT_TAIL(&nlop->nlo_locks, newnflp, nfl_lolink);
3124	}
3125	lck_mtx_unlock(&nlop->nlo_lock);
3126}
3127
3128/*
3129 * Get a file lock structure for this lock owner.
3130 */
3131struct nfs_file_lock *
3132nfs_file_lock_alloc(struct nfs_lock_owner *nlop)
3133{
3134	struct nfs_file_lock *nflp = NULL;
3135
3136	lck_mtx_lock(&nlop->nlo_lock);
3137	if (!nlop->nlo_alock.nfl_owner) {
3138		nflp = &nlop->nlo_alock;
3139		nflp->nfl_owner = nlop;
3140	}
3141	lck_mtx_unlock(&nlop->nlo_lock);
3142	if (!nflp) {
3143		MALLOC(nflp, struct nfs_file_lock *, sizeof(struct nfs_file_lock), M_TEMP, M_WAITOK);
3144		if (!nflp)
3145			return (NULL);
3146		bzero(nflp, sizeof(*nflp));
3147		nflp->nfl_flags |= NFS_FILE_LOCK_ALLOC;
3148		nflp->nfl_owner = nlop;
3149	}
3150	nfs_lock_owner_ref(nlop);
3151	return (nflp);
3152}
3153
3154/*
3155 * destroy the given NFS file lock structure
3156 */
3157void
3158nfs_file_lock_destroy(struct nfs_file_lock *nflp)
3159{
3160	struct nfs_lock_owner *nlop = nflp->nfl_owner;
3161
3162	if (nflp->nfl_flags & NFS_FILE_LOCK_ALLOC) {
3163		nflp->nfl_owner = NULL;
3164		FREE(nflp, M_TEMP);
3165	} else {
3166		lck_mtx_lock(&nlop->nlo_lock);
3167		bzero(nflp, sizeof(nflp));
3168		lck_mtx_unlock(&nlop->nlo_lock);
3169	}
3170	nfs_lock_owner_rele(nlop);
3171}
3172
3173/*
3174 * Check if one file lock conflicts with another.
3175 * (nflp1 is the new lock.  nflp2 is the existing lock.)
3176 */
3177int
3178nfs_file_lock_conflict(struct nfs_file_lock *nflp1, struct nfs_file_lock *nflp2, int *willsplit)
3179{
3180	/* no conflict if lock is dead */
3181	if ((nflp1->nfl_flags & NFS_FILE_LOCK_DEAD) || (nflp2->nfl_flags & NFS_FILE_LOCK_DEAD))
3182		return (0);
3183	/* no conflict if it's ours - unless the lock style doesn't match */
3184	if ((nflp1->nfl_owner == nflp2->nfl_owner) &&
3185	    ((nflp1->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == (nflp2->nfl_flags & NFS_FILE_LOCK_STYLE_MASK))) {
3186		if (willsplit && (nflp1->nfl_type != nflp2->nfl_type) &&
3187		    (nflp1->nfl_start > nflp2->nfl_start) &&
3188		    (nflp1->nfl_end < nflp2->nfl_end))
3189			*willsplit = 1;
3190		return (0);
3191	}
3192	/* no conflict if ranges don't overlap */
3193	if ((nflp1->nfl_start > nflp2->nfl_end) || (nflp1->nfl_end < nflp2->nfl_start))
3194		return (0);
3195	/* no conflict if neither lock is exclusive */
3196	if ((nflp1->nfl_type != F_WRLCK) && (nflp2->nfl_type != F_WRLCK))
3197		return (0);
3198	/* conflict */
3199	return (1);
3200}
3201
3202/*
3203 * Send an NFSv4 LOCK RPC to the server.
3204 */
3205int
3206nfs4_setlock_rpc(
3207	nfsnode_t np,
3208	struct nfs_open_file *nofp,
3209	struct nfs_file_lock *nflp,
3210	int reclaim,
3211	int flags,
3212	thread_t thd,
3213	kauth_cred_t cred)
3214{
3215	struct nfs_lock_owner *nlop = nflp->nfl_owner;
3216	struct nfsmount *nmp;
3217	struct nfsm_chain nmreq, nmrep;
3218	uint64_t xid;
3219	uint32_t locktype;
3220	int error = 0, lockerror = ENOENT, newlocker, numops, status;
3221	struct nfsreq_secinfo_args si;
3222
3223	nmp = NFSTONMP(np);
3224	if (nfs_mount_gone(nmp))
3225		return (ENXIO);
3226	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
3227		return (EINVAL);
3228
3229	newlocker = (nlop->nlo_stategenid != nmp->nm_stategenid);
3230	locktype = (nflp->nfl_flags & NFS_FILE_LOCK_WAIT) ?
3231			((nflp->nfl_type == F_WRLCK) ?
3232				NFS_LOCK_TYPE_WRITEW :
3233				NFS_LOCK_TYPE_READW) :
3234			((nflp->nfl_type == F_WRLCK) ?
3235				NFS_LOCK_TYPE_WRITE :
3236				NFS_LOCK_TYPE_READ);
3237	if (newlocker) {
3238		error = nfs_open_file_set_busy(nofp, thd);
3239		if (error)
3240			return (error);
3241		error = nfs_open_owner_set_busy(nofp->nof_owner, thd);
3242		if (error) {
3243			nfs_open_file_clear_busy(nofp);
3244			return (error);
3245		}
3246		if (!nlop->nlo_open_owner) {
3247			nfs_open_owner_ref(nofp->nof_owner);
3248			nlop->nlo_open_owner = nofp->nof_owner;
3249		}
3250	}
3251	error = nfs_lock_owner_set_busy(nlop, thd);
3252	if (error) {
3253		if (newlocker) {
3254			nfs_open_owner_clear_busy(nofp->nof_owner);
3255			nfs_open_file_clear_busy(nofp);
3256		}
3257		return (error);
3258	}
3259
3260	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
3261	nfsm_chain_null(&nmreq);
3262	nfsm_chain_null(&nmrep);
3263
3264	// PUTFH, GETATTR, LOCK
3265	numops = 3;
3266	nfsm_chain_build_alloc_init(error, &nmreq, 33 * NFSX_UNSIGNED);
3267	nfsm_chain_add_compound_header(error, &nmreq, "lock", numops);
3268	numops--;
3269	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
3270	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
3271	numops--;
3272	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
3273	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
3274	numops--;
3275	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCK);
3276	nfsm_chain_add_32(error, &nmreq, locktype);
3277	nfsm_chain_add_32(error, &nmreq, reclaim);
3278	nfsm_chain_add_64(error, &nmreq, nflp->nfl_start);
3279	nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(nflp->nfl_start, nflp->nfl_end));
3280	nfsm_chain_add_32(error, &nmreq, newlocker);
3281	if (newlocker) {
3282		nfsm_chain_add_32(error, &nmreq, nofp->nof_owner->noo_seqid);
3283		nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid);
3284		nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid);
3285		nfsm_chain_add_lock_owner4(error, &nmreq, nmp, nlop);
3286	} else {
3287		nfsm_chain_add_stateid(error, &nmreq, &nlop->nlo_stateid);
3288		nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid);
3289	}
3290	nfsm_chain_build_done(error, &nmreq);
3291	nfsm_assert(error, (numops == 0), EPROTO);
3292	nfsmout_if(error);
3293
3294	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
3295
3296	if ((lockerror = nfs_node_lock(np)))
3297		error = lockerror;
3298	nfsm_chain_skip_tag(error, &nmrep);
3299	nfsm_chain_get_32(error, &nmrep, numops);
3300	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
3301	nfsmout_if(error);
3302	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
3303	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
3304	nfsmout_if(error);
3305	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCK);
3306	nfs_owner_seqid_increment(newlocker ? nofp->nof_owner : NULL, nlop, error);
3307	nfsm_chain_get_stateid(error, &nmrep, &nlop->nlo_stateid);
3308
3309	/* Update the lock owner's stategenid once it appears the server has state for it. */
3310	/* We determine this by noting the request was successful (we got a stateid). */
3311	if (newlocker && !error)
3312		nlop->nlo_stategenid = nmp->nm_stategenid;
3313nfsmout:
3314	if (!lockerror)
3315		nfs_node_unlock(np);
3316	nfs_lock_owner_clear_busy(nlop);
3317	if (newlocker) {
3318		nfs_open_owner_clear_busy(nofp->nof_owner);
3319		nfs_open_file_clear_busy(nofp);
3320	}
3321	nfsm_chain_cleanup(&nmreq);
3322	nfsm_chain_cleanup(&nmrep);
3323	return (error);
3324}
3325
3326/*
3327 * Send an NFSv4 LOCKU RPC to the server.
3328 */
3329int
3330nfs4_unlock_rpc(
3331	nfsnode_t np,
3332	struct nfs_lock_owner *nlop,
3333	int type,
3334	uint64_t start,
3335	uint64_t end,
3336	int flags,
3337	thread_t thd,
3338	kauth_cred_t cred)
3339{
3340	struct nfsmount *nmp;
3341	struct nfsm_chain nmreq, nmrep;
3342	uint64_t xid;
3343	int error = 0, lockerror = ENOENT, numops, status;
3344	struct nfsreq_secinfo_args si;
3345
3346	nmp = NFSTONMP(np);
3347	if (nfs_mount_gone(nmp))
3348		return (ENXIO);
3349	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
3350		return (EINVAL);
3351
3352	error = nfs_lock_owner_set_busy(nlop, NULL);
3353	if (error)
3354		return (error);
3355
3356	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
3357	nfsm_chain_null(&nmreq);
3358	nfsm_chain_null(&nmrep);
3359
3360	// PUTFH, GETATTR, LOCKU
3361	numops = 3;
3362	nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
3363	nfsm_chain_add_compound_header(error, &nmreq, "unlock", numops);
3364	numops--;
3365	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
3366	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
3367	numops--;
3368	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
3369	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
3370	numops--;
3371	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCKU);
3372	nfsm_chain_add_32(error, &nmreq, (type == F_WRLCK) ? NFS_LOCK_TYPE_WRITE : NFS_LOCK_TYPE_READ);
3373	nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid);
3374	nfsm_chain_add_stateid(error, &nmreq, &nlop->nlo_stateid);
3375	nfsm_chain_add_64(error, &nmreq, start);
3376	nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(start, end));
3377	nfsm_chain_build_done(error, &nmreq);
3378	nfsm_assert(error, (numops == 0), EPROTO);
3379	nfsmout_if(error);
3380
3381	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
3382
3383	if ((lockerror = nfs_node_lock(np)))
3384		error = lockerror;
3385	nfsm_chain_skip_tag(error, &nmrep);
3386	nfsm_chain_get_32(error, &nmrep, numops);
3387	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
3388	nfsmout_if(error);
3389	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
3390	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
3391	nfsmout_if(error);
3392	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCKU);
3393	nfs_owner_seqid_increment(NULL, nlop, error);
3394	nfsm_chain_get_stateid(error, &nmrep, &nlop->nlo_stateid);
3395nfsmout:
3396	if (!lockerror)
3397		nfs_node_unlock(np);
3398	nfs_lock_owner_clear_busy(nlop);
3399	nfsm_chain_cleanup(&nmreq);
3400	nfsm_chain_cleanup(&nmrep);
3401	return (error);
3402}
3403
3404/*
3405 * Send an NFSv4 LOCKT RPC to the server.
3406 */
3407int
3408nfs4_getlock_rpc(
3409	nfsnode_t np,
3410	struct nfs_lock_owner *nlop,
3411	struct flock *fl,
3412	uint64_t start,
3413	uint64_t end,
3414	vfs_context_t ctx)
3415{
3416	struct nfsmount *nmp;
3417	struct nfsm_chain nmreq, nmrep;
3418	uint64_t xid, val64 = 0;
3419	uint32_t val = 0;
3420	int error = 0, lockerror, numops, status;
3421	struct nfsreq_secinfo_args si;
3422
3423	nmp = NFSTONMP(np);
3424	if (nfs_mount_gone(nmp))
3425		return (ENXIO);
3426	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
3427		return (EINVAL);
3428
3429	lockerror = ENOENT;
3430	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
3431	nfsm_chain_null(&nmreq);
3432	nfsm_chain_null(&nmrep);
3433
3434	// PUTFH, GETATTR, LOCKT
3435	numops = 3;
3436	nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
3437	nfsm_chain_add_compound_header(error, &nmreq, "locktest", numops);
3438	numops--;
3439	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
3440	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
3441	numops--;
3442	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
3443	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
3444	numops--;
3445	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCKT);
3446	nfsm_chain_add_32(error, &nmreq, (fl->l_type == F_WRLCK) ? NFS_LOCK_TYPE_WRITE : NFS_LOCK_TYPE_READ);
3447	nfsm_chain_add_64(error, &nmreq, start);
3448	nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(start, end));
3449	nfsm_chain_add_lock_owner4(error, &nmreq, nmp, nlop);
3450	nfsm_chain_build_done(error, &nmreq);
3451	nfsm_assert(error, (numops == 0), EPROTO);
3452	nfsmout_if(error);
3453
3454	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
3455
3456	if ((lockerror = nfs_node_lock(np)))
3457		error = lockerror;
3458	nfsm_chain_skip_tag(error, &nmrep);
3459	nfsm_chain_get_32(error, &nmrep, numops);
3460	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
3461	nfsmout_if(error);
3462	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
3463	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
3464	nfsmout_if(error);
3465	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCKT);
3466	if (error == NFSERR_DENIED) {
3467		error = 0;
3468		nfsm_chain_get_64(error, &nmrep, fl->l_start);
3469		nfsm_chain_get_64(error, &nmrep, val64);
3470		fl->l_len = (val64 == UINT64_MAX) ? 0 : val64;
3471		nfsm_chain_get_32(error, &nmrep, val);
3472		fl->l_type = (val == NFS_LOCK_TYPE_WRITE) ? F_WRLCK : F_RDLCK;
3473		fl->l_pid = 0;
3474		fl->l_whence = SEEK_SET;
3475	} else if (!error) {
3476		fl->l_type = F_UNLCK;
3477	}
3478nfsmout:
3479	if (!lockerror)
3480		nfs_node_unlock(np);
3481	nfsm_chain_cleanup(&nmreq);
3482	nfsm_chain_cleanup(&nmrep);
3483	return (error);
3484}
3485
3486
3487/*
3488 * Check for any conflicts with the given lock.
3489 *
3490 * Checking for a lock doesn't require the file to be opened.
3491 * So we skip all the open owner, open file, lock owner work
3492 * and just check for a conflicting lock.
3493 */
3494int
3495nfs_advlock_getlock(
3496	nfsnode_t np,
3497	struct nfs_lock_owner *nlop,
3498	struct flock *fl,
3499	uint64_t start,
3500	uint64_t end,
3501	vfs_context_t ctx)
3502{
3503	struct nfsmount *nmp;
3504	struct nfs_file_lock *nflp;
3505	int error = 0, answered = 0;
3506
3507	nmp = NFSTONMP(np);
3508	if (nfs_mount_gone(nmp))
3509		return (ENXIO);
3510
3511restart:
3512	if ((error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx))))
3513		return (error);
3514
3515	lck_mtx_lock(&np->n_openlock);
3516	/* scan currently held locks for conflict */
3517	TAILQ_FOREACH(nflp, &np->n_locks, nfl_link) {
3518		if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
3519			continue;
3520		if ((start <= nflp->nfl_end) && (end >= nflp->nfl_start) &&
3521		    ((fl->l_type == F_WRLCK) || (nflp->nfl_type == F_WRLCK)))
3522			break;
3523	}
3524	if (nflp) {
3525		/* found a conflicting lock */
3526		fl->l_type = nflp->nfl_type;
3527		fl->l_pid = (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_FLOCK) ? -1 : nflp->nfl_owner->nlo_pid;
3528		fl->l_start = nflp->nfl_start;
3529		fl->l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
3530		fl->l_whence = SEEK_SET;
3531		answered = 1;
3532	} else if ((np->n_openflags & N_DELEG_WRITE) && !(np->n_openflags & N_DELEG_RETURN)) {
3533		/*
3534		 * If we have a write delegation, we know there can't be other
3535		 * locks on the server.  So the answer is no conflicting lock found.
3536		 */
3537		fl->l_type = F_UNLCK;
3538		answered = 1;
3539	}
3540	lck_mtx_unlock(&np->n_openlock);
3541	if (answered) {
3542		nfs_mount_state_in_use_end(nmp, 0);
3543		return (0);
3544	}
3545
3546	/* no conflict found locally, so ask the server */
3547	error = nmp->nm_funcs->nf_getlock_rpc(np, nlop, fl, start, end, ctx);
3548
3549	if (nfs_mount_state_in_use_end(nmp, error))
3550		goto restart;
3551	return (error);
3552}
3553
3554/*
3555 * Acquire a file lock for the given range.
3556 *
3557 * Add the lock (request) to the lock queue.
3558 * Scan the lock queue for any conflicting locks.
3559 * If a conflict is found, block or return an error.
3560 * Once end of queue is reached, send request to the server.
3561 * If the server grants the lock, scan the lock queue and
3562 * update any existing locks.  Then (optionally) scan the
3563 * queue again to coalesce any locks adjacent to the new one.
3564 */
3565int
3566nfs_advlock_setlock(
3567	nfsnode_t np,
3568	struct nfs_open_file *nofp,
3569	struct nfs_lock_owner *nlop,
3570	int op,
3571	uint64_t start,
3572	uint64_t end,
3573	int style,
3574	short type,
3575	vfs_context_t ctx)
3576{
3577	struct nfsmount *nmp;
3578	struct nfs_file_lock *newnflp, *nflp, *nflp2 = NULL, *nextnflp, *flocknflp = NULL;
3579	struct nfs_file_lock *coalnflp;
3580	int error = 0, error2, willsplit = 0, delay, slpflag, busy = 0, inuse = 0, restart, inqueue = 0;
3581	struct timespec ts = {1, 0};
3582
3583	nmp = NFSTONMP(np);
3584	if (nfs_mount_gone(nmp))
3585		return (ENXIO);
3586	slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
3587
3588	if ((type != F_RDLCK) && (type != F_WRLCK))
3589		return (EINVAL);
3590
3591	/* allocate a new lock */
3592	newnflp = nfs_file_lock_alloc(nlop);
3593	if (!newnflp)
3594		return (ENOLCK);
3595	newnflp->nfl_start = start;
3596	newnflp->nfl_end = end;
3597	newnflp->nfl_type = type;
3598	if (op == F_SETLKW)
3599		newnflp->nfl_flags |= NFS_FILE_LOCK_WAIT;
3600	newnflp->nfl_flags |= style;
3601	newnflp->nfl_flags |= NFS_FILE_LOCK_BLOCKED;
3602
3603	if ((style == NFS_FILE_LOCK_STYLE_FLOCK) && (type == F_WRLCK)) {
3604		/*
3605		 * For exclusive flock-style locks, if we block waiting for the
3606		 * lock, we need to first release any currently held shared
3607		 * flock-style lock.  So, the first thing we do is check if we
3608		 * have a shared flock-style lock.
3609		 */
3610		nflp = TAILQ_FIRST(&nlop->nlo_locks);
3611		if (nflp && ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != NFS_FILE_LOCK_STYLE_FLOCK))
3612			nflp = NULL;
3613		if (nflp && (nflp->nfl_type != F_RDLCK))
3614			nflp = NULL;
3615		flocknflp = nflp;
3616	}
3617
3618restart:
3619	restart = 0;
3620	error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
3621	if (error)
3622		goto error_out;
3623	inuse = 1;
3624	if (np->n_flag & NREVOKE) {
3625		error = EIO;
3626		nfs_mount_state_in_use_end(nmp, 0);
3627		inuse = 0;
3628		goto error_out;
3629	}
3630	if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
3631		nfs_mount_state_in_use_end(nmp, 0);
3632		inuse = 0;
3633		error = nfs4_reopen(nofp, vfs_context_thread(ctx));
3634		if (error)
3635			goto error_out;
3636		goto restart;
3637	}
3638
3639	lck_mtx_lock(&np->n_openlock);
3640	if (!inqueue) {
3641		/* insert new lock at beginning of list */
3642		TAILQ_INSERT_HEAD(&np->n_locks, newnflp, nfl_link);
3643		inqueue = 1;
3644	}
3645
3646	/* scan current list of locks (held and pending) for conflicts */
3647	for (nflp = TAILQ_NEXT(newnflp, nfl_link); nflp; nflp = nextnflp) {
3648		nextnflp = TAILQ_NEXT(nflp, nfl_link);
3649		if (!nfs_file_lock_conflict(newnflp, nflp, &willsplit))
3650			continue;
3651		/* Conflict */
3652		if (!(newnflp->nfl_flags & NFS_FILE_LOCK_WAIT)) {
3653			error = EAGAIN;
3654			break;
3655		}
3656		/* Block until this lock is no longer held. */
3657		if (nflp->nfl_blockcnt == UINT_MAX) {
3658			error = ENOLCK;
3659			break;
3660		}
3661		nflp->nfl_blockcnt++;
3662		do {
3663			if (flocknflp) {
3664				/* release any currently held shared lock before sleeping */
3665				lck_mtx_unlock(&np->n_openlock);
3666				nfs_mount_state_in_use_end(nmp, 0);
3667				inuse = 0;
3668				error = nfs_advlock_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx);
3669				flocknflp = NULL;
3670				if (!error)
3671					error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
3672				if (error) {
3673					lck_mtx_lock(&np->n_openlock);
3674					break;
3675				}
3676				inuse = 1;
3677				lck_mtx_lock(&np->n_openlock);
3678				/* no need to block/sleep if the conflict is gone */
3679				if (!nfs_file_lock_conflict(newnflp, nflp, NULL))
3680					break;
3681			}
3682			msleep(nflp, &np->n_openlock, slpflag, "nfs_advlock_setlock_blocked", &ts);
3683			slpflag = 0;
3684			error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0);
3685			if (!error && (nmp->nm_state & NFSSTA_RECOVER)) {
3686				/* looks like we have a recover pending... restart */
3687				restart = 1;
3688				lck_mtx_unlock(&np->n_openlock);
3689				nfs_mount_state_in_use_end(nmp, 0);
3690				inuse = 0;
3691				lck_mtx_lock(&np->n_openlock);
3692				break;
3693			}
3694			if (!error && (np->n_flag & NREVOKE))
3695				error = EIO;
3696		} while (!error && nfs_file_lock_conflict(newnflp, nflp, NULL));
3697		nflp->nfl_blockcnt--;
3698		if ((nflp->nfl_flags & NFS_FILE_LOCK_DEAD) && !nflp->nfl_blockcnt) {
3699			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
3700			nfs_file_lock_destroy(nflp);
3701		}
3702		if (error || restart)
3703			break;
3704		/* We have released n_openlock and we can't trust that nextnflp is still valid. */
3705		/* So, start this lock-scanning loop over from where it started. */
3706		nextnflp = TAILQ_NEXT(newnflp, nfl_link);
3707	}
3708	lck_mtx_unlock(&np->n_openlock);
3709	if (restart)
3710		goto restart;
3711	if (error)
3712		goto error_out;
3713
3714	if (willsplit) {
3715		/*
3716		 * It looks like this operation is splitting a lock.
3717		 * We allocate a new lock now so we don't have to worry
3718		 * about the allocation failing after we've updated some state.
3719		 */
3720		nflp2 = nfs_file_lock_alloc(nlop);
3721		if (!nflp2) {
3722			error = ENOLCK;
3723			goto error_out;
3724		}
3725	}
3726
3727	/* once scan for local conflicts is clear, send request to server */
3728	if ((error = nfs_open_state_set_busy(np, vfs_context_thread(ctx))))
3729		goto error_out;
3730	busy = 1;
3731	delay = 0;
3732	do {
3733		/* do we have a delegation? (that we're not returning?) */
3734		if ((np->n_openflags & N_DELEG_MASK) && !(np->n_openflags & N_DELEG_RETURN)) {
3735			if (np->n_openflags & N_DELEG_WRITE) {
3736				/* with a write delegation, just take the lock delegated */
3737				newnflp->nfl_flags |= NFS_FILE_LOCK_DELEGATED;
3738				error = 0;
3739				/* make sure the lock owner knows its open owner */
3740				if (!nlop->nlo_open_owner) {
3741					nfs_open_owner_ref(nofp->nof_owner);
3742					nlop->nlo_open_owner = nofp->nof_owner;
3743				}
3744				break;
3745			} else {
3746				/*
3747				 * If we don't have any non-delegated opens but we do have
3748				 * delegated opens, then we need to first claim the delegated
3749				 * opens so that the lock request on the server can be associated
3750				 * with an open it knows about.
3751				 */
3752				if ((!nofp->nof_rw_drw && !nofp->nof_w_drw && !nofp->nof_r_drw &&
3753				     !nofp->nof_rw_dw && !nofp->nof_w_dw && !nofp->nof_r_dw &&
3754				     !nofp->nof_rw && !nofp->nof_w && !nofp->nof_r) &&
3755				    (nofp->nof_d_rw_drw || nofp->nof_d_w_drw || nofp->nof_d_r_drw ||
3756				     nofp->nof_d_rw_dw || nofp->nof_d_w_dw || nofp->nof_d_r_dw ||
3757				     nofp->nof_d_rw || nofp->nof_d_w || nofp->nof_d_r)) {
3758					error = nfs4_claim_delegated_state_for_open_file(nofp, 0);
3759					if (error)
3760						break;
3761				}
3762			}
3763		}
3764		if (np->n_flag & NREVOKE)
3765			error = EIO;
3766		if (!error)
3767			error = nmp->nm_funcs->nf_setlock_rpc(np, nofp, newnflp, 0, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
3768		if (!error || ((error != NFSERR_DENIED) && (error != NFSERR_GRACE)))
3769			break;
3770		/* request was denied due to either conflict or grace period */
3771		if ((error == NFSERR_DENIED) && !(newnflp->nfl_flags & NFS_FILE_LOCK_WAIT)) {
3772			error = EAGAIN;
3773			break;
3774		}
3775		if (flocknflp) {
3776			/* release any currently held shared lock before sleeping */
3777			nfs_open_state_clear_busy(np);
3778			busy = 0;
3779			nfs_mount_state_in_use_end(nmp, 0);
3780			inuse = 0;
3781			error2 = nfs_advlock_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx);
3782			flocknflp = NULL;
3783			if (!error2)
3784				error2 = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
3785			if (!error2) {
3786				inuse = 1;
3787				error2 = nfs_open_state_set_busy(np, vfs_context_thread(ctx));
3788			}
3789			if (error2) {
3790				error = error2;
3791				break;
3792			}
3793			busy = 1;
3794		}
3795		/*
3796		 * Wait a little bit and send the request again.
3797		 * Except for retries of blocked v2/v3 request where we've already waited a bit.
3798		 */
3799		if ((nmp->nm_vers >= NFS_VER4) || (error == NFSERR_GRACE)) {
3800			if (error == NFSERR_GRACE)
3801				delay = 4;
3802			if (delay < 4)
3803				delay++;
3804			tsleep(newnflp, slpflag, "nfs_advlock_setlock_delay", delay * (hz/2));
3805			slpflag = 0;
3806		}
3807		error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0);
3808		if (!error && (nmp->nm_state & NFSSTA_RECOVER)) {
3809			/* looks like we have a recover pending... restart */
3810			nfs_open_state_clear_busy(np);
3811			busy = 0;
3812			nfs_mount_state_in_use_end(nmp, 0);
3813			inuse = 0;
3814			goto restart;
3815		}
3816		if (!error && (np->n_flag & NREVOKE))
3817			error = EIO;
3818	} while (!error);
3819
3820error_out:
3821	if (nfs_mount_state_error_should_restart(error)) {
3822		/* looks like we need to restart this operation */
3823		if (busy) {
3824			nfs_open_state_clear_busy(np);
3825			busy = 0;
3826		}
3827		if (inuse) {
3828			nfs_mount_state_in_use_end(nmp, error);
3829			inuse = 0;
3830		}
3831		goto restart;
3832	}
3833	lck_mtx_lock(&np->n_openlock);
3834	newnflp->nfl_flags &= ~NFS_FILE_LOCK_BLOCKED;
3835	if (error) {
3836		newnflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
3837		if (newnflp->nfl_blockcnt) {
3838			/* wake up anyone blocked on this lock */
3839			wakeup(newnflp);
3840		} else {
3841			/* remove newnflp from lock list and destroy */
3842			if (inqueue)
3843				TAILQ_REMOVE(&np->n_locks, newnflp, nfl_link);
3844			nfs_file_lock_destroy(newnflp);
3845		}
3846		lck_mtx_unlock(&np->n_openlock);
3847		if (busy)
3848			nfs_open_state_clear_busy(np);
3849		if (inuse)
3850			nfs_mount_state_in_use_end(nmp, error);
3851		if (nflp2)
3852			nfs_file_lock_destroy(nflp2);
3853		return (error);
3854	}
3855
3856	/* server granted the lock */
3857
3858	/*
3859	 * Scan for locks to update.
3860	 *
3861	 * Locks completely covered are killed.
3862	 * At most two locks may need to be clipped.
3863	 * It's possible that a single lock may need to be split.
3864	 */
3865	TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
3866		if (nflp == newnflp)
3867			continue;
3868		if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
3869			continue;
3870		if (nflp->nfl_owner != nlop)
3871			continue;
3872		if ((newnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK))
3873			continue;
3874		if ((newnflp->nfl_start > nflp->nfl_end) || (newnflp->nfl_end < nflp->nfl_start))
3875			continue;
3876		/* here's one to update */
3877		if ((newnflp->nfl_start <= nflp->nfl_start) && (newnflp->nfl_end >= nflp->nfl_end)) {
3878			/* The entire lock is being replaced. */
3879			nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
3880			lck_mtx_lock(&nlop->nlo_lock);
3881			TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
3882			lck_mtx_unlock(&nlop->nlo_lock);
3883			/* lock will be destroyed below, if no waiters */
3884		} else if ((newnflp->nfl_start > nflp->nfl_start) && (newnflp->nfl_end < nflp->nfl_end)) {
3885			/* We're replacing a range in the middle of a lock. */
3886			/* The current lock will be split into two locks. */
3887			/* Update locks and insert new lock after current lock. */
3888			nflp2->nfl_flags |= (nflp->nfl_flags & (NFS_FILE_LOCK_STYLE_MASK|NFS_FILE_LOCK_DELEGATED));
3889			nflp2->nfl_type = nflp->nfl_type;
3890			nflp2->nfl_start = newnflp->nfl_end + 1;
3891			nflp2->nfl_end = nflp->nfl_end;
3892			nflp->nfl_end = newnflp->nfl_start - 1;
3893			TAILQ_INSERT_AFTER(&np->n_locks, nflp, nflp2, nfl_link);
3894			nfs_lock_owner_insert_held_lock(nlop, nflp2);
3895			nextnflp = nflp2;
3896			nflp2 = NULL;
3897		} else if (newnflp->nfl_start > nflp->nfl_start) {
3898			/* We're replacing the end of a lock. */
3899			nflp->nfl_end = newnflp->nfl_start - 1;
3900		} else if (newnflp->nfl_end < nflp->nfl_end) {
3901			/* We're replacing the start of a lock. */
3902			nflp->nfl_start = newnflp->nfl_end + 1;
3903		}
3904		if (nflp->nfl_blockcnt) {
3905			/* wake up anyone blocked on this lock */
3906			wakeup(nflp);
3907		} else if (nflp->nfl_flags & NFS_FILE_LOCK_DEAD) {
3908			/* remove nflp from lock list and destroy */
3909			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
3910			nfs_file_lock_destroy(nflp);
3911		}
3912	}
3913
3914	nfs_lock_owner_insert_held_lock(nlop, newnflp);
3915
3916	/*
3917	 * POSIX locks should be coalesced when possible.
3918	 */
3919	if ((style == NFS_FILE_LOCK_STYLE_POSIX) && (nofp->nof_flags & NFS_OPEN_FILE_POSIXLOCK)) {
3920		/*
3921		 * Walk through the lock queue and check each of our held locks with
3922		 * the previous and next locks in the lock owner's "held lock list".
3923		 * If the two locks can be coalesced, we merge the current lock into
3924		 * the other (previous or next) lock.  Merging this way makes sure that
3925		 * lock ranges are always merged forward in the lock queue.  This is
3926		 * important because anyone blocked on the lock being "merged away"
3927		 * will still need to block on that range and it will simply continue
3928		 * checking locks that are further down the list.
3929		 */
3930		TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
3931			if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
3932				continue;
3933			if (nflp->nfl_owner != nlop)
3934				continue;
3935			if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != NFS_FILE_LOCK_STYLE_POSIX)
3936				continue;
3937			if (((coalnflp = TAILQ_PREV(nflp, nfs_file_lock_queue, nfl_lolink))) &&
3938			    ((coalnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) &&
3939			    (coalnflp->nfl_type == nflp->nfl_type) &&
3940			    (coalnflp->nfl_end == (nflp->nfl_start - 1))) {
3941				coalnflp->nfl_end = nflp->nfl_end;
3942				nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
3943				lck_mtx_lock(&nlop->nlo_lock);
3944				TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
3945				lck_mtx_unlock(&nlop->nlo_lock);
3946			} else if (((coalnflp = TAILQ_NEXT(nflp, nfl_lolink))) &&
3947			    ((coalnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) &&
3948			    (coalnflp->nfl_type == nflp->nfl_type) &&
3949			    (coalnflp->nfl_start == (nflp->nfl_end + 1))) {
3950				coalnflp->nfl_start = nflp->nfl_start;
3951				nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
3952				lck_mtx_lock(&nlop->nlo_lock);
3953				TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
3954				lck_mtx_unlock(&nlop->nlo_lock);
3955			}
3956			if (!(nflp->nfl_flags & NFS_FILE_LOCK_DEAD))
3957				continue;
3958			if (nflp->nfl_blockcnt) {
3959				/* wake up anyone blocked on this lock */
3960				wakeup(nflp);
3961			} else {
3962				/* remove nflp from lock list and destroy */
3963				TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
3964				nfs_file_lock_destroy(nflp);
3965			}
3966		}
3967	}
3968
3969	lck_mtx_unlock(&np->n_openlock);
3970	nfs_open_state_clear_busy(np);
3971	nfs_mount_state_in_use_end(nmp, error);
3972
3973	if (nflp2)
3974		nfs_file_lock_destroy(nflp2);
3975	return (error);
3976}
3977
3978/*
3979 * Release all (same style) locks within the given range.
3980 */
3981int
3982nfs_advlock_unlock(
3983	nfsnode_t np,
3984	struct nfs_open_file *nofp,
3985	struct nfs_lock_owner *nlop,
3986	uint64_t start,
3987	uint64_t end,
3988	int style,
3989	vfs_context_t ctx)
3990{
3991	struct nfsmount *nmp;
3992	struct nfs_file_lock *nflp, *nextnflp, *newnflp = NULL;
3993	int error = 0, willsplit = 0, send_unlock_rpcs = 1;
3994
3995	nmp = NFSTONMP(np);
3996	if (nfs_mount_gone(nmp))
3997		return (ENXIO);
3998
3999restart:
4000	if ((error = nfs_mount_state_in_use_start(nmp, NULL)))
4001		return (error);
4002	if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
4003		nfs_mount_state_in_use_end(nmp, 0);
4004		error = nfs4_reopen(nofp, NULL);
4005		if (error)
4006			return (error);
4007		goto restart;
4008	}
4009	if ((error = nfs_open_state_set_busy(np, NULL))) {
4010		nfs_mount_state_in_use_end(nmp, error);
4011		return (error);
4012	}
4013
4014	lck_mtx_lock(&np->n_openlock);
4015	if ((start > 0) && (end < UINT64_MAX) && !willsplit) {
4016		/*
4017		 * We may need to allocate a new lock if an existing lock gets split.
4018		 * So, we first scan the list to check for a split, and if there's
4019		 * going to be one, we'll allocate one now.
4020		 */
4021		TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
4022			if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
4023				continue;
4024			if (nflp->nfl_owner != nlop)
4025				continue;
4026			if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != style)
4027				continue;
4028			if ((start > nflp->nfl_end) || (end < nflp->nfl_start))
4029				continue;
4030			if ((start > nflp->nfl_start) && (end < nflp->nfl_end)) {
4031				willsplit = 1;
4032				break;
4033			}
4034		}
4035		if (willsplit) {
4036			lck_mtx_unlock(&np->n_openlock);
4037			nfs_open_state_clear_busy(np);
4038			nfs_mount_state_in_use_end(nmp, 0);
4039			newnflp = nfs_file_lock_alloc(nlop);
4040			if (!newnflp)
4041				return (ENOMEM);
4042			goto restart;
4043		}
4044	}
4045
4046	/*
4047	 * Free all of our locks in the given range.
4048	 *
4049	 * Note that this process requires sending requests to the server.
4050	 * Because of this, we will release the n_openlock while performing
4051	 * the unlock RPCs.  The N_OPENBUSY state keeps the state of *held*
4052	 * locks from changing underneath us.  However, other entries in the
4053	 * list may be removed.  So we need to be careful walking the list.
4054	 */
4055
4056	/*
4057	 * Don't unlock ranges that are held by other-style locks.
4058	 * If style is posix, don't send any unlock rpcs if flock is held.
4059	 * If we unlock an flock, don't send unlock rpcs for any posix-style
4060	 * ranges held - instead send unlocks for the ranges not held.
4061	 */
4062	if ((style == NFS_FILE_LOCK_STYLE_POSIX) &&
4063	    ((nflp = TAILQ_FIRST(&nlop->nlo_locks))) &&
4064	    ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK))
4065		send_unlock_rpcs = 0;
4066	if ((style == NFS_FILE_LOCK_STYLE_FLOCK) &&
4067	    ((nflp = TAILQ_FIRST(&nlop->nlo_locks))) &&
4068	    ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK) &&
4069	    ((nflp = TAILQ_NEXT(nflp, nfl_lolink))) &&
4070	    ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX)) {
4071		uint64_t s = 0;
4072		int type = TAILQ_FIRST(&nlop->nlo_locks)->nfl_type;
4073		int delegated = (TAILQ_FIRST(&nlop->nlo_locks)->nfl_flags & NFS_FILE_LOCK_DELEGATED);
4074		while (!delegated && nflp) {
4075			if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) {
4076				/* unlock the range preceding this lock */
4077				lck_mtx_unlock(&np->n_openlock);
4078				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, type, s, nflp->nfl_start-1, 0,
4079						vfs_context_thread(ctx), vfs_context_ucred(ctx));
4080				if (nfs_mount_state_error_should_restart(error)) {
4081					nfs_open_state_clear_busy(np);
4082					nfs_mount_state_in_use_end(nmp, error);
4083					goto restart;
4084				}
4085				lck_mtx_lock(&np->n_openlock);
4086				if (error)
4087					goto out;
4088				s = nflp->nfl_end+1;
4089			}
4090			nflp = TAILQ_NEXT(nflp, nfl_lolink);
4091		}
4092		if (!delegated) {
4093			lck_mtx_unlock(&np->n_openlock);
4094			error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, type, s, end, 0,
4095					vfs_context_thread(ctx), vfs_context_ucred(ctx));
4096			if (nfs_mount_state_error_should_restart(error)) {
4097				nfs_open_state_clear_busy(np);
4098				nfs_mount_state_in_use_end(nmp, error);
4099				goto restart;
4100			}
4101			lck_mtx_lock(&np->n_openlock);
4102			if (error)
4103				goto out;
4104		}
4105		send_unlock_rpcs = 0;
4106	}
4107
4108	TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
4109		if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
4110			continue;
4111		if (nflp->nfl_owner != nlop)
4112			continue;
4113		if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != style)
4114			continue;
4115		if ((start > nflp->nfl_end) || (end < nflp->nfl_start))
4116			continue;
4117		/* here's one to unlock */
4118		if ((start <= nflp->nfl_start) && (end >= nflp->nfl_end)) {
4119			/* The entire lock is being unlocked. */
4120			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
4121				lck_mtx_unlock(&np->n_openlock);
4122				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, nflp->nfl_end, 0,
4123						vfs_context_thread(ctx), vfs_context_ucred(ctx));
4124				if (nfs_mount_state_error_should_restart(error)) {
4125					nfs_open_state_clear_busy(np);
4126					nfs_mount_state_in_use_end(nmp, error);
4127					goto restart;
4128				}
4129				lck_mtx_lock(&np->n_openlock);
4130			}
4131			nextnflp = TAILQ_NEXT(nflp, nfl_link);
4132			if (error)
4133				break;
4134			nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
4135			lck_mtx_lock(&nlop->nlo_lock);
4136			TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
4137			lck_mtx_unlock(&nlop->nlo_lock);
4138			/* lock will be destroyed below, if no waiters */
4139		} else if ((start > nflp->nfl_start) && (end < nflp->nfl_end)) {
4140			/* We're unlocking a range in the middle of a lock. */
4141			/* The current lock will be split into two locks. */
4142			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
4143				lck_mtx_unlock(&np->n_openlock);
4144				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, start, end, 0,
4145						vfs_context_thread(ctx), vfs_context_ucred(ctx));
4146				if (nfs_mount_state_error_should_restart(error)) {
4147					nfs_open_state_clear_busy(np);
4148					nfs_mount_state_in_use_end(nmp, error);
4149					goto restart;
4150				}
4151				lck_mtx_lock(&np->n_openlock);
4152			}
4153			if (error)
4154				break;
4155			/* update locks and insert new lock after current lock */
4156			newnflp->nfl_flags |= (nflp->nfl_flags & (NFS_FILE_LOCK_STYLE_MASK|NFS_FILE_LOCK_DELEGATED));
4157			newnflp->nfl_type = nflp->nfl_type;
4158			newnflp->nfl_start = end + 1;
4159			newnflp->nfl_end = nflp->nfl_end;
4160			nflp->nfl_end = start - 1;
4161			TAILQ_INSERT_AFTER(&np->n_locks, nflp, newnflp, nfl_link);
4162			nfs_lock_owner_insert_held_lock(nlop, newnflp);
4163			nextnflp = newnflp;
4164			newnflp = NULL;
4165		} else if (start > nflp->nfl_start) {
4166			/* We're unlocking the end of a lock. */
4167			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
4168				lck_mtx_unlock(&np->n_openlock);
4169				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, start, nflp->nfl_end, 0,
4170						vfs_context_thread(ctx), vfs_context_ucred(ctx));
4171				if (nfs_mount_state_error_should_restart(error)) {
4172					nfs_open_state_clear_busy(np);
4173					nfs_mount_state_in_use_end(nmp, error);
4174					goto restart;
4175				}
4176				lck_mtx_lock(&np->n_openlock);
4177			}
4178			nextnflp = TAILQ_NEXT(nflp, nfl_link);
4179			if (error)
4180				break;
4181			nflp->nfl_end = start - 1;
4182		} else if (end < nflp->nfl_end) {
4183			/* We're unlocking the start of a lock. */
4184			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
4185				lck_mtx_unlock(&np->n_openlock);
4186				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, end, 0,
4187						vfs_context_thread(ctx), vfs_context_ucred(ctx));
4188				if (nfs_mount_state_error_should_restart(error)) {
4189					nfs_open_state_clear_busy(np);
4190					nfs_mount_state_in_use_end(nmp, error);
4191					goto restart;
4192				}
4193				lck_mtx_lock(&np->n_openlock);
4194			}
4195			nextnflp = TAILQ_NEXT(nflp, nfl_link);
4196			if (error)
4197				break;
4198			nflp->nfl_start = end + 1;
4199		}
4200		if (nflp->nfl_blockcnt) {
4201			/* wake up anyone blocked on this lock */
4202			wakeup(nflp);
4203		} else if (nflp->nfl_flags & NFS_FILE_LOCK_DEAD) {
4204			/* remove nflp from lock list and destroy */
4205			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
4206			nfs_file_lock_destroy(nflp);
4207		}
4208	}
4209out:
4210	lck_mtx_unlock(&np->n_openlock);
4211	nfs_open_state_clear_busy(np);
4212	nfs_mount_state_in_use_end(nmp, 0);
4213
4214	if (newnflp)
4215		nfs_file_lock_destroy(newnflp);
4216	return (error);
4217}
4218
4219/*
4220 * NFSv4 advisory file locking
4221 */
4222int
4223nfs_vnop_advlock(
4224	struct vnop_advlock_args /* {
4225		struct vnodeop_desc *a_desc;
4226		vnode_t a_vp;
4227		caddr_t a_id;
4228		int a_op;
4229		struct flock *a_fl;
4230		int a_flags;
4231		vfs_context_t a_context;
4232	} */ *ap)
4233{
4234	vnode_t vp = ap->a_vp;
4235	nfsnode_t np = VTONFS(ap->a_vp);
4236	struct flock *fl = ap->a_fl;
4237	int op = ap->a_op;
4238	int flags = ap->a_flags;
4239	vfs_context_t ctx = ap->a_context;
4240	struct nfsmount *nmp;
4241	struct nfs_open_owner *noop = NULL;
4242	struct nfs_open_file *nofp = NULL;
4243	struct nfs_lock_owner *nlop = NULL;
4244	off_t lstart;
4245	uint64_t start, end;
4246	int error = 0, modified, style;
4247	enum vtype vtype;
4248#define OFF_MAX QUAD_MAX
4249
4250	nmp = VTONMP(ap->a_vp);
4251	if (nfs_mount_gone(nmp))
4252		return (ENXIO);
4253	lck_mtx_lock(&nmp->nm_lock);
4254	if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED)) {
4255		lck_mtx_unlock(&nmp->nm_lock);
4256		return (ENOTSUP);
4257	}
4258	lck_mtx_unlock(&nmp->nm_lock);
4259
4260	if (np->n_flag & NREVOKE)
4261		return (EIO);
4262	vtype = vnode_vtype(ap->a_vp);
4263	if (vtype == VDIR) /* ignore lock requests on directories */
4264		return (0);
4265	if (vtype != VREG) /* anything other than regular files is invalid */
4266		return (EINVAL);
4267
4268	/* Convert the flock structure into a start and end. */
4269	switch (fl->l_whence) {
4270	case SEEK_SET:
4271	case SEEK_CUR:
4272		/*
4273		 * Caller is responsible for adding any necessary offset
4274		 * to fl->l_start when SEEK_CUR is used.
4275		 */
4276		lstart = fl->l_start;
4277		break;
4278	case SEEK_END:
4279		/* need to flush, and refetch attributes to make */
4280		/* sure we have the correct end of file offset   */
4281		if ((error = nfs_node_lock(np)))
4282			return (error);
4283		modified = (np->n_flag & NMODIFIED);
4284		nfs_node_unlock(np);
4285		if (modified && ((error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1))))
4286			return (error);
4287		if ((error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED)))
4288			return (error);
4289		nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
4290		if ((np->n_size > OFF_MAX) ||
4291		    ((fl->l_start > 0) && (np->n_size > (u_quad_t)(OFF_MAX - fl->l_start))))
4292			error = EOVERFLOW;
4293		lstart = np->n_size + fl->l_start;
4294		nfs_data_unlock(np);
4295		if (error)
4296			return (error);
4297		break;
4298	default:
4299		return (EINVAL);
4300	}
4301	if (lstart < 0)
4302		return (EINVAL);
4303	start = lstart;
4304	if (fl->l_len == 0) {
4305		end = UINT64_MAX;
4306	} else if (fl->l_len > 0) {
4307		if ((fl->l_len - 1) > (OFF_MAX - lstart))
4308			return (EOVERFLOW);
4309		end = start - 1 + fl->l_len;
4310	} else { /* l_len is negative */
4311		if ((lstart + fl->l_len) < 0)
4312			return (EINVAL);
4313		end = start - 1;
4314		start += fl->l_len;
4315	}
4316	if ((nmp->nm_vers == NFS_VER2) && ((start > INT32_MAX) || (fl->l_len && (end > INT32_MAX))))
4317		return (EINVAL);
4318
4319	style = (flags & F_FLOCK) ? NFS_FILE_LOCK_STYLE_FLOCK : NFS_FILE_LOCK_STYLE_POSIX;
4320	if ((style == NFS_FILE_LOCK_STYLE_FLOCK) && ((start != 0) || (end != UINT64_MAX)))
4321		return (EINVAL);
4322
4323	/* find the lock owner, alloc if not unlock */
4324	nlop = nfs_lock_owner_find(np, vfs_context_proc(ctx), (op != F_UNLCK));
4325	if (!nlop) {
4326		error = (op == F_UNLCK) ? 0 : ENOMEM;
4327		if (error)
4328			NP(np, "nfs_vnop_advlock: no lock owner, error %d", error);
4329		goto out;
4330	}
4331
4332	if (op == F_GETLK) {
4333		error = nfs_advlock_getlock(np, nlop, fl, start, end, ctx);
4334	} else {
4335		/* find the open owner */
4336		noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
4337		if (!noop) {
4338			NP(np, "nfs_vnop_advlock: no open owner %d", kauth_cred_getuid(vfs_context_ucred(ctx)));
4339			error = EPERM;
4340			goto out;
4341		}
4342		/* find the open file */
4343restart:
4344		error = nfs_open_file_find(np, noop, &nofp, 0, 0, 0);
4345		if (error)
4346			error = EBADF;
4347		if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
4348			NP(np, "nfs_vnop_advlock: LOST %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
4349			error = EIO;
4350		}
4351		if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
4352			error = nfs4_reopen(nofp, ((op == F_UNLCK) ? NULL : vfs_context_thread(ctx)));
4353			nofp = NULL;
4354			if (!error)
4355				goto restart;
4356		}
4357		if (error) {
4358			NP(np, "nfs_vnop_advlock: no open file %d, %d", error, kauth_cred_getuid(noop->noo_cred));
4359			goto out;
4360		}
4361		if (op == F_UNLCK) {
4362			error = nfs_advlock_unlock(np, nofp, nlop, start, end, style, ctx);
4363		} else if ((op == F_SETLK) || (op == F_SETLKW)) {
4364			if ((op == F_SETLK) && (flags & F_WAIT))
4365				op = F_SETLKW;
4366			error = nfs_advlock_setlock(np, nofp, nlop, op, start, end, style, fl->l_type, ctx);
4367		} else {
4368			/* not getlk, unlock or lock? */
4369			error = EINVAL;
4370		}
4371	}
4372
4373out:
4374	if (nlop)
4375		nfs_lock_owner_rele(nlop);
4376	if (noop)
4377		nfs_open_owner_rele(noop);
4378	return (error);
4379}
4380
4381/*
4382 * Check if an open owner holds any locks on a file.
4383 */
4384int
4385nfs_check_for_locks(struct nfs_open_owner *noop, struct nfs_open_file *nofp)
4386{
4387	struct nfs_lock_owner *nlop;
4388
4389	TAILQ_FOREACH(nlop, &nofp->nof_np->n_lock_owners, nlo_link) {
4390		if (nlop->nlo_open_owner != noop)
4391			continue;
4392		if (!TAILQ_EMPTY(&nlop->nlo_locks))
4393			break;
4394	}
4395	return (nlop ? 1 : 0);
4396}
4397
4398/*
4399 * Reopen simple (no deny, no locks) open state that was lost.
4400 */
4401int
4402nfs4_reopen(struct nfs_open_file *nofp, thread_t thd)
4403{
4404	struct nfs_open_owner *noop = nofp->nof_owner;
4405	struct nfsmount *nmp = NFSTONMP(nofp->nof_np);
4406	nfsnode_t np = nofp->nof_np;
4407	vnode_t vp = NFSTOV(np);
4408	vnode_t dvp = NULL;
4409	struct componentname cn;
4410	const char *vname = NULL;
4411	const char *name = NULL;
4412	size_t namelen;
4413	char smallname[128];
4414	char *filename = NULL;
4415	int error = 0, done = 0, slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
4416	struct timespec ts = { 1, 0 };
4417
4418	lck_mtx_lock(&nofp->nof_lock);
4419	while (nofp->nof_flags & NFS_OPEN_FILE_REOPENING) {
4420		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
4421			break;
4422		msleep(&nofp->nof_flags, &nofp->nof_lock, slpflag|(PZERO-1), "nfsreopenwait", &ts);
4423		slpflag = 0;
4424	}
4425	if (error || !(nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
4426		lck_mtx_unlock(&nofp->nof_lock);
4427		return (error);
4428	}
4429	nofp->nof_flags |= NFS_OPEN_FILE_REOPENING;
4430	lck_mtx_unlock(&nofp->nof_lock);
4431
4432	nfs_node_lock_force(np);
4433	if ((vnode_vtype(vp) != VDIR) && np->n_sillyrename) {
4434		/*
4435		 * The node's been sillyrenamed, so we need to use
4436		 * the sillyrename directory/name to do the open.
4437		 */
4438		struct nfs_sillyrename *nsp = np->n_sillyrename;
4439		dvp = NFSTOV(nsp->nsr_dnp);
4440		if ((error = vnode_get(dvp))) {
4441			nfs_node_unlock(np);
4442			goto out;
4443		}
4444		name = nsp->nsr_name;
4445	} else {
4446		/*
4447		 * [sigh] We can't trust VFS to get the parent right for named
4448		 * attribute nodes.  (It likes to reparent the nodes after we've
4449		 * created them.)  Luckily we can probably get the right parent
4450		 * from the n_parent we have stashed away.
4451		 */
4452		if ((np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) &&
4453		    (((dvp = np->n_parent)) && (error = vnode_get(dvp))))
4454			dvp = NULL;
4455		if (!dvp)
4456			dvp = vnode_getparent(vp);
4457		vname = vnode_getname(vp);
4458		if (!dvp || !vname) {
4459			if (!error)
4460				error = EIO;
4461			nfs_node_unlock(np);
4462			goto out;
4463		}
4464		name = vname;
4465	}
4466	filename = &smallname[0];
4467	namelen = snprintf(filename, sizeof(smallname), "%s", name);
4468	if (namelen >= sizeof(smallname)) {
4469		MALLOC(filename, char *, namelen+1, M_TEMP, M_WAITOK);
4470		if (!filename) {
4471			error = ENOMEM;
4472			goto out;
4473		}
4474		snprintf(filename, namelen+1, "%s", name);
4475	}
4476	nfs_node_unlock(np);
4477	bzero(&cn, sizeof(cn));
4478	cn.cn_nameptr = filename;
4479	cn.cn_namelen = namelen;
4480
4481restart:
4482	done = 0;
4483	if ((error = nfs_mount_state_in_use_start(nmp, thd)))
4484		goto out;
4485
4486	if (nofp->nof_rw)
4487		error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE);
4488	if (!error && nofp->nof_w)
4489		error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE);
4490	if (!error && nofp->nof_r)
4491		error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE);
4492
4493	if (nfs_mount_state_in_use_end(nmp, error)) {
4494		if (error == NFSERR_GRACE)
4495			goto restart;
4496		printf("nfs4_reopen: RPC failed, error %d, lost %d, %s\n", error,
4497			(nofp->nof_flags & NFS_OPEN_FILE_LOST) ? 1 : 0, name ? name : "???");
4498		error = 0;
4499		goto out;
4500	}
4501	done = 1;
4502out:
4503	if (error && (error != EINTR) && (error != ERESTART))
4504		nfs_revoke_open_state_for_node(np);
4505	lck_mtx_lock(&nofp->nof_lock);
4506	nofp->nof_flags &= ~NFS_OPEN_FILE_REOPENING;
4507	if (done)
4508		nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN;
4509	else if (error)
4510		printf("nfs4_reopen: failed, error %d, lost %d, %s\n", error,
4511			(nofp->nof_flags & NFS_OPEN_FILE_LOST) ? 1 : 0, name ? name : "???");
4512	lck_mtx_unlock(&nofp->nof_lock);
4513	if (filename && (filename != &smallname[0]))
4514		FREE(filename, M_TEMP);
4515	if (vname)
4516		vnode_putname(vname);
4517	if (dvp != NULLVP)
4518		vnode_put(dvp);
4519	return (error);
4520}
4521
4522/*
4523 * Send a normal OPEN RPC to open/create a file.
4524 */
4525int
4526nfs4_open_rpc(
4527	struct nfs_open_file *nofp,
4528	vfs_context_t ctx,
4529	struct componentname *cnp,
4530	struct vnode_attr *vap,
4531	vnode_t dvp,
4532	vnode_t *vpp,
4533	int create,
4534	int share_access,
4535	int share_deny)
4536{
4537	return (nfs4_open_rpc_internal(nofp, ctx, vfs_context_thread(ctx), vfs_context_ucred(ctx),
4538					cnp, vap, dvp, vpp, create, share_access, share_deny));
4539}
4540
4541/*
4542 * Send an OPEN RPC to reopen a file.
4543 */
4544int
4545nfs4_open_reopen_rpc(
4546	struct nfs_open_file *nofp,
4547	thread_t thd,
4548	kauth_cred_t cred,
4549	struct componentname *cnp,
4550	vnode_t dvp,
4551	vnode_t *vpp,
4552	int share_access,
4553	int share_deny)
4554{
4555	return (nfs4_open_rpc_internal(nofp, NULL, thd, cred, cnp, NULL, dvp, vpp, NFS_OPEN_NOCREATE, share_access, share_deny));
4556}
4557
4558/*
4559 * Send an OPEN_CONFIRM RPC to confirm an OPEN.
4560 */
4561int
4562nfs4_open_confirm_rpc(
4563	struct nfsmount *nmp,
4564	nfsnode_t dnp,
4565	u_char *fhp,
4566	int fhlen,
4567	struct nfs_open_owner *noop,
4568	nfs_stateid *sid,
4569	thread_t thd,
4570	kauth_cred_t cred,
4571	struct nfs_vattr *nvap,
4572	uint64_t *xidp)
4573{
4574	struct nfsm_chain nmreq, nmrep;
4575	int error = 0, status, numops;
4576	struct nfsreq_secinfo_args si;
4577
4578	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
4579	nfsm_chain_null(&nmreq);
4580	nfsm_chain_null(&nmrep);
4581
4582	// PUTFH, OPEN_CONFIRM, GETATTR
4583	numops = 3;
4584	nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
4585	nfsm_chain_add_compound_header(error, &nmreq, "open_confirm", numops);
4586	numops--;
4587	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
4588	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen);
4589	numops--;
4590	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_CONFIRM);
4591	nfsm_chain_add_stateid(error, &nmreq, sid);
4592	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
4593	numops--;
4594	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
4595	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
4596	nfsm_chain_build_done(error, &nmreq);
4597	nfsm_assert(error, (numops == 0), EPROTO);
4598	nfsmout_if(error);
4599	error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, R_NOINTR, &nmrep, xidp, &status);
4600
4601	nfsm_chain_skip_tag(error, &nmrep);
4602	nfsm_chain_get_32(error, &nmrep, numops);
4603	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
4604	nfsmout_if(error);
4605	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_CONFIRM);
4606	nfs_owner_seqid_increment(noop, NULL, error);
4607	nfsm_chain_get_stateid(error, &nmrep, sid);
4608	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
4609	nfsmout_if(error);
4610	error = nfs4_parsefattr(&nmrep, NULL, nvap, NULL, NULL, NULL);
4611nfsmout:
4612	nfsm_chain_cleanup(&nmreq);
4613	nfsm_chain_cleanup(&nmrep);
4614	return (error);
4615}
4616
4617/*
4618 * common OPEN RPC code
4619 *
4620 * If create is set, ctx must be passed in.
4621 * Returns a node on success if no node passed in.
4622 */
4623int
4624nfs4_open_rpc_internal(
4625	struct nfs_open_file *nofp,
4626	vfs_context_t ctx,
4627	thread_t thd,
4628	kauth_cred_t cred,
4629	struct componentname *cnp,
4630	struct vnode_attr *vap,
4631	vnode_t dvp,
4632	vnode_t *vpp,
4633	int create,
4634	int share_access,
4635	int share_deny)
4636{
4637	struct nfsmount *nmp;
4638	struct nfs_open_owner *noop = nofp->nof_owner;
4639	struct nfs_vattr nvattr;
4640	int error = 0, open_error = EIO, lockerror = ENOENT, busyerror = ENOENT, status;
4641	int nfsvers, namedattrs, numops, exclusive = 0, gotuid, gotgid;
4642	u_int64_t xid, savedxid = 0;
4643	nfsnode_t dnp = VTONFS(dvp);
4644	nfsnode_t np, newnp = NULL;
4645	vnode_t newvp = NULL;
4646	struct nfsm_chain nmreq, nmrep;
4647	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
4648	uint32_t rflags, delegation, recall;
4649	struct nfs_stateid stateid, dstateid, *sid;
4650	fhandle_t fh;
4651	struct nfsreq rq, *req = &rq;
4652	struct nfs_dulookup dul;
4653	char sbuf[64], *s;
4654	uint32_t ace_type, ace_flags, ace_mask, len, slen;
4655	struct kauth_ace ace;
4656	struct nfsreq_secinfo_args si;
4657
4658	if (create && !ctx)
4659		return (EINVAL);
4660
4661	nmp = VTONMP(dvp);
4662	if (nfs_mount_gone(nmp))
4663		return (ENXIO);
4664	nfsvers = nmp->nm_vers;
4665	namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
4666	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
4667		return (EINVAL);
4668
4669	np = *vpp ? VTONFS(*vpp) : NULL;
4670	if (create && vap) {
4671		exclusive = (vap->va_vaflags & VA_EXCLUSIVE);
4672		nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
4673		gotuid = VATTR_IS_ACTIVE(vap, va_uid);
4674		gotgid = VATTR_IS_ACTIVE(vap, va_gid);
4675		if (exclusive && (!VATTR_IS_ACTIVE(vap, va_access_time) || !VATTR_IS_ACTIVE(vap, va_modify_time)))
4676			vap->va_vaflags |= VA_UTIMES_NULL;
4677	} else {
4678		exclusive = gotuid = gotgid = 0;
4679	}
4680	if (nofp) {
4681		sid = &nofp->nof_stateid;
4682	} else {
4683		stateid.seqid = stateid.other[0] = stateid.other[1] = stateid.other[2] = 0;
4684		sid = &stateid;
4685	}
4686
4687	if ((error = nfs_open_owner_set_busy(noop, thd)))
4688		return (error);
4689again:
4690	rflags = delegation = recall = 0;
4691	ace.ace_flags = 0;
4692	s = sbuf;
4693	slen = sizeof(sbuf);
4694	NVATTR_INIT(&nvattr);
4695	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, cnp->cn_nameptr, cnp->cn_namelen);
4696
4697	nfsm_chain_null(&nmreq);
4698	nfsm_chain_null(&nmrep);
4699
4700	// PUTFH, SAVEFH, OPEN(CREATE?), GETATTR(FH), RESTOREFH, GETATTR
4701	numops = 6;
4702	nfsm_chain_build_alloc_init(error, &nmreq, 53 * NFSX_UNSIGNED + cnp->cn_namelen);
4703	nfsm_chain_add_compound_header(error, &nmreq, create ? "create" : "open", numops);
4704	numops--;
4705	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
4706	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4707	numops--;
4708	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
4709	numops--;
4710	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
4711	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
4712	nfsm_chain_add_32(error, &nmreq, share_access);
4713	nfsm_chain_add_32(error, &nmreq, share_deny);
4714	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid);
4715	nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
4716	nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred));
4717	nfsm_chain_add_32(error, &nmreq, create);
4718	if (create) {
4719		if (exclusive) {
4720			static uint32_t create_verf; // XXX need a better verifier
4721			create_verf++;
4722			nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE);
4723			/* insert 64 bit verifier */
4724			nfsm_chain_add_32(error, &nmreq, create_verf);
4725			nfsm_chain_add_32(error, &nmreq, create_verf);
4726		} else {
4727			nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED);
4728			nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
4729		}
4730	}
4731	nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_NULL);
4732	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4733	numops--;
4734	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
4735	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
4736	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
4737	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
4738	numops--;
4739	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
4740	numops--;
4741	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
4742	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
4743	nfsm_chain_build_done(error, &nmreq);
4744	nfsm_assert(error, (numops == 0), EPROTO);
4745	if (!error)
4746		error = busyerror = nfs_node_set_busy(dnp, thd);
4747	nfsmout_if(error);
4748
4749	if (create && !namedattrs)
4750		nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4751
4752	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, R_NOINTR, NULL, &req);
4753	if (!error) {
4754		if (create && !namedattrs)
4755			nfs_dulookup_start(&dul, dnp, ctx);
4756		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
4757		savedxid = xid;
4758	}
4759
4760	if (create && !namedattrs)
4761		nfs_dulookup_finish(&dul, dnp, ctx);
4762
4763	if ((lockerror = nfs_node_lock(dnp)))
4764		error = lockerror;
4765	nfsm_chain_skip_tag(error, &nmrep);
4766	nfsm_chain_get_32(error, &nmrep, numops);
4767	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
4768	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
4769	nfsmout_if(error);
4770	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
4771	nfs_owner_seqid_increment(noop, NULL, error);
4772	nfsm_chain_get_stateid(error, &nmrep, sid);
4773	nfsm_chain_check_change_info(error, &nmrep, dnp);
4774	nfsm_chain_get_32(error, &nmrep, rflags);
4775	bmlen = NFS_ATTR_BITMAP_LEN;
4776	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
4777	nfsm_chain_get_32(error, &nmrep, delegation);
4778	if (!error)
4779		switch (delegation) {
4780		case NFS_OPEN_DELEGATE_NONE:
4781			break;
4782		case NFS_OPEN_DELEGATE_READ:
4783		case NFS_OPEN_DELEGATE_WRITE:
4784			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
4785			nfsm_chain_get_32(error, &nmrep, recall);
4786			if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
4787				nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
4788			/* if we have any trouble accepting the ACE, just invalidate it */
4789			ace_type = ace_flags = ace_mask = len = 0;
4790			nfsm_chain_get_32(error, &nmrep, ace_type);
4791			nfsm_chain_get_32(error, &nmrep, ace_flags);
4792			nfsm_chain_get_32(error, &nmrep, ace_mask);
4793			nfsm_chain_get_32(error, &nmrep, len);
4794			ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
4795			ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
4796			ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
4797			if (!error && (len >= slen)) {
4798				MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
4799				if (s)
4800					slen = len+1;
4801				else
4802					ace.ace_flags = 0;
4803			}
4804			if (s)
4805				nfsm_chain_get_opaque(error, &nmrep, len, s);
4806			else
4807				nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
4808			if (!error && s) {
4809				s[len] = '\0';
4810				if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
4811					ace.ace_flags = 0;
4812			}
4813			if (error || !s)
4814				ace.ace_flags = 0;
4815			if (s && (s != sbuf))
4816				FREE(s, M_TEMP);
4817			break;
4818		default:
4819			error = EBADRPC;
4820			break;
4821		}
4822	/* At this point if we have no error, the object was created/opened. */
4823	open_error = error;
4824	nfsmout_if(error);
4825	if (create && vap && !exclusive)
4826		nfs_vattr_set_supported(bitmap, vap);
4827	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
4828	nfsmout_if(error);
4829	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
4830	nfsmout_if(error);
4831	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
4832		printf("nfs: open/create didn't return filehandle? %s\n", cnp->cn_nameptr);
4833		error = EBADRPC;
4834		goto nfsmout;
4835	}
4836	if (!create && np && !NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
4837		// XXX for the open case, what if fh doesn't match the vnode we think we're opening?
4838		// Solaris Named Attributes may do this due to a bug.... so don't warn for named attributes.
4839		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
4840			NP(np, "nfs4_open_rpc: warning: file handle mismatch");
4841	}
4842	/* directory attributes: if we don't get them, make sure to invalidate */
4843	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
4844	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
4845	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
4846	if (error)
4847		NATTRINVALIDATE(dnp);
4848	nfsmout_if(error);
4849
4850	if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
4851		nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
4852
4853	if (rflags & NFS_OPEN_RESULT_CONFIRM) {
4854		nfs_node_unlock(dnp);
4855		lockerror = ENOENT;
4856		NVATTR_CLEANUP(&nvattr);
4857		error = nfs4_open_confirm_rpc(nmp, dnp, fh.fh_data, fh.fh_len, noop, sid, thd, cred, &nvattr, &xid);
4858		nfsmout_if(error);
4859		savedxid = xid;
4860		if ((lockerror = nfs_node_lock(dnp)))
4861			error = lockerror;
4862	}
4863
4864nfsmout:
4865	nfsm_chain_cleanup(&nmreq);
4866	nfsm_chain_cleanup(&nmrep);
4867
4868	if (!lockerror && create) {
4869		if (!open_error && (dnp->n_flag & NNEGNCENTRIES)) {
4870			dnp->n_flag &= ~NNEGNCENTRIES;
4871			cache_purge_negatives(dvp);
4872		}
4873		dnp->n_flag |= NMODIFIED;
4874		nfs_node_unlock(dnp);
4875		lockerror = ENOENT;
4876		nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
4877	}
4878	if (!lockerror)
4879		nfs_node_unlock(dnp);
4880	if (!error && !np && fh.fh_len) {
4881		/* create the vnode with the filehandle and attributes */
4882		xid = savedxid;
4883		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &newnp);
4884		if (!error)
4885			newvp = NFSTOV(newnp);
4886	}
4887	NVATTR_CLEANUP(&nvattr);
4888	if (!busyerror)
4889		nfs_node_clear_busy(dnp);
4890	if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) {
4891		if (!np)
4892			np = newnp;
4893		if (!error && np && !recall) {
4894			/* stuff the delegation state in the node */
4895			lck_mtx_lock(&np->n_openlock);
4896			np->n_openflags &= ~N_DELEG_MASK;
4897			np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
4898			np->n_dstateid = dstateid;
4899			np->n_dace = ace;
4900			if (np->n_dlink.tqe_next == NFSNOLIST) {
4901				lck_mtx_lock(&nmp->nm_lock);
4902				if (np->n_dlink.tqe_next == NFSNOLIST)
4903					TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
4904				lck_mtx_unlock(&nmp->nm_lock);
4905			}
4906			lck_mtx_unlock(&np->n_openlock);
4907		} else {
4908			/* give the delegation back */
4909			if (np) {
4910				if (NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
4911					/* update delegation state and return it */
4912					lck_mtx_lock(&np->n_openlock);
4913					np->n_openflags &= ~N_DELEG_MASK;
4914					np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
4915					np->n_dstateid = dstateid;
4916					np->n_dace = ace;
4917					if (np->n_dlink.tqe_next == NFSNOLIST) {
4918						lck_mtx_lock(&nmp->nm_lock);
4919						if (np->n_dlink.tqe_next == NFSNOLIST)
4920							TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
4921						lck_mtx_unlock(&nmp->nm_lock);
4922					}
4923					lck_mtx_unlock(&np->n_openlock);
4924					/* don't need to send a separate delegreturn for fh */
4925					fh.fh_len = 0;
4926				}
4927				/* return np's current delegation */
4928				nfs4_delegation_return(np, 0, thd, cred);
4929			}
4930			if (fh.fh_len) /* return fh's delegation if it wasn't for np */
4931				nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, 0, thd, cred);
4932		}
4933	}
4934	if (error) {
4935		if (exclusive && (error == NFSERR_NOTSUPP)) {
4936			exclusive = 0;
4937			goto again;
4938		}
4939		if (newvp) {
4940			nfs_node_unlock(newnp);
4941			vnode_put(newvp);
4942		}
4943	} else if (create) {
4944		nfs_node_unlock(newnp);
4945		if (exclusive) {
4946			error = nfs4_setattr_rpc(newnp, vap, ctx);
4947			if (error && (gotuid || gotgid)) {
4948				/* it's possible the server didn't like our attempt to set IDs. */
4949				/* so, let's try it again without those */
4950				VATTR_CLEAR_ACTIVE(vap, va_uid);
4951				VATTR_CLEAR_ACTIVE(vap, va_gid);
4952				error = nfs4_setattr_rpc(newnp, vap, ctx);
4953			}
4954		}
4955		if (error)
4956			vnode_put(newvp);
4957		else
4958			*vpp = newvp;
4959	}
4960	nfs_open_owner_clear_busy(noop);
4961	return (error);
4962}
4963
4964
4965/*
4966 * Send an OPEN RPC to claim a delegated open for a file
4967 */
4968int
4969nfs4_claim_delegated_open_rpc(
4970	struct nfs_open_file *nofp,
4971	int share_access,
4972	int share_deny,
4973	int flags)
4974{
4975	struct nfsmount *nmp;
4976	struct nfs_open_owner *noop = nofp->nof_owner;
4977	struct nfs_vattr nvattr;
4978	int error = 0, lockerror = ENOENT, status;
4979	int nfsvers, numops;
4980	u_int64_t xid;
4981	nfsnode_t np = nofp->nof_np;
4982	struct nfsm_chain nmreq, nmrep;
4983	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
4984	uint32_t rflags = 0, delegation, recall = 0;
4985	fhandle_t fh;
4986	struct nfs_stateid dstateid;
4987	char sbuf[64], *s = sbuf;
4988	uint32_t ace_type, ace_flags, ace_mask, len, slen = sizeof(sbuf);
4989	struct kauth_ace ace;
4990	vnode_t dvp = NULL;
4991	const char *vname = NULL;
4992	const char *name = NULL;
4993	size_t namelen;
4994	char smallname[128];
4995	char *filename = NULL;
4996	struct nfsreq_secinfo_args si;
4997
4998	nmp = NFSTONMP(np);
4999	if (nfs_mount_gone(nmp))
5000		return (ENXIO);
5001	nfsvers = nmp->nm_vers;
5002
5003	nfs_node_lock_force(np);
5004	if ((vnode_vtype(NFSTOV(np)) != VDIR) && np->n_sillyrename) {
5005		/*
5006		 * The node's been sillyrenamed, so we need to use
5007		 * the sillyrename directory/name to do the open.
5008		 */
5009		struct nfs_sillyrename *nsp = np->n_sillyrename;
5010		dvp = NFSTOV(nsp->nsr_dnp);
5011		if ((error = vnode_get(dvp))) {
5012			nfs_node_unlock(np);
5013			goto out;
5014		}
5015		name = nsp->nsr_name;
5016	} else {
5017		/*
5018		 * [sigh] We can't trust VFS to get the parent right for named
5019		 * attribute nodes.  (It likes to reparent the nodes after we've
5020		 * created them.)  Luckily we can probably get the right parent
5021		 * from the n_parent we have stashed away.
5022		 */
5023		if ((np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) &&
5024		    (((dvp = np->n_parent)) && (error = vnode_get(dvp))))
5025			dvp = NULL;
5026		if (!dvp)
5027			dvp = vnode_getparent(NFSTOV(np));
5028		vname = vnode_getname(NFSTOV(np));
5029		if (!dvp || !vname) {
5030			if (!error)
5031				error = EIO;
5032			nfs_node_unlock(np);
5033			goto out;
5034		}
5035		name = vname;
5036	}
5037	filename = &smallname[0];
5038	namelen = snprintf(filename, sizeof(smallname), "%s", name);
5039	if (namelen >= sizeof(smallname)) {
5040		MALLOC(filename, char *, namelen+1, M_TEMP, M_WAITOK);
5041		if (!filename) {
5042			error = ENOMEM;
5043			goto out;
5044		}
5045		snprintf(filename, namelen+1, "%s", name);
5046	}
5047	nfs_node_unlock(np);
5048
5049	if ((error = nfs_open_owner_set_busy(noop, NULL)))
5050		return (error);
5051
5052	NVATTR_INIT(&nvattr);
5053	delegation = NFS_OPEN_DELEGATE_NONE;
5054	dstateid = np->n_dstateid;
5055	NFSREQ_SECINFO_SET(&si, VTONFS(dvp), NULL, 0, filename, namelen);
5056
5057	nfsm_chain_null(&nmreq);
5058	nfsm_chain_null(&nmrep);
5059
5060	// PUTFH, OPEN, GETATTR(FH)
5061	numops = 3;
5062	nfsm_chain_build_alloc_init(error, &nmreq, 48 * NFSX_UNSIGNED);
5063	nfsm_chain_add_compound_header(error, &nmreq, "open_claim_d", numops);
5064	numops--;
5065	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
5066	nfsm_chain_add_fh(error, &nmreq, nfsvers, VTONFS(dvp)->n_fhp, VTONFS(dvp)->n_fhsize);
5067	numops--;
5068	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
5069	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
5070	nfsm_chain_add_32(error, &nmreq, share_access);
5071	nfsm_chain_add_32(error, &nmreq, share_deny);
5072	// open owner: clientid + uid
5073	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid
5074	nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
5075	nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred)); // open_owner4.owner
5076	// openflag4
5077	nfsm_chain_add_32(error, &nmreq, NFS_OPEN_NOCREATE);
5078	// open_claim4
5079	nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_DELEGATE_CUR);
5080	nfsm_chain_add_stateid(error, &nmreq, &np->n_dstateid);
5081	nfsm_chain_add_name(error, &nmreq, filename, namelen, nmp);
5082	numops--;
5083	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
5084	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
5085	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
5086	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
5087	nfsm_chain_build_done(error, &nmreq);
5088	nfsm_assert(error, (numops == 0), EPROTO);
5089	nfsmout_if(error);
5090
5091	error = nfs_request2(np, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, current_thread(),
5092			noop->noo_cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
5093
5094	if ((lockerror = nfs_node_lock(np)))
5095		error = lockerror;
5096	nfsm_chain_skip_tag(error, &nmrep);
5097	nfsm_chain_get_32(error, &nmrep, numops);
5098	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
5099	nfsmout_if(error);
5100	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
5101	nfs_owner_seqid_increment(noop, NULL, error);
5102	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
5103	nfsm_chain_check_change_info(error, &nmrep, np);
5104	nfsm_chain_get_32(error, &nmrep, rflags);
5105	bmlen = NFS_ATTR_BITMAP_LEN;
5106	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
5107	nfsm_chain_get_32(error, &nmrep, delegation);
5108	if (!error)
5109		switch (delegation) {
5110		case NFS_OPEN_DELEGATE_NONE:
5111			// if (!(np->n_openflags & N_DELEG_RETURN)) /* don't warn if delegation is being returned */
5112			// 	printf("nfs: open delegated claim didn't return a delegation %s\n", filename ? filename : "???");
5113			break;
5114		case NFS_OPEN_DELEGATE_READ:
5115		case NFS_OPEN_DELEGATE_WRITE:
5116			if ((((np->n_openflags & N_DELEG_MASK) == N_DELEG_READ) &&
5117			     (delegation == NFS_OPEN_DELEGATE_WRITE)) ||
5118			    (((np->n_openflags & N_DELEG_MASK) == N_DELEG_WRITE) &&
5119			     (delegation == NFS_OPEN_DELEGATE_READ)))
5120				printf("nfs: open delegated claim returned a different delegation type! have %s got %s %s\n",
5121				     ((np->n_openflags & N_DELEG_MASK) == N_DELEG_WRITE) ? "W" : "R",
5122				     (delegation == NFS_OPEN_DELEGATE_WRITE) ? "W" : "R", filename ? filename : "???");
5123			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
5124			nfsm_chain_get_32(error, &nmrep, recall);
5125			if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
5126				nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
5127			/* if we have any trouble accepting the ACE, just invalidate it */
5128			ace_type = ace_flags = ace_mask = len = 0;
5129			nfsm_chain_get_32(error, &nmrep, ace_type);
5130			nfsm_chain_get_32(error, &nmrep, ace_flags);
5131			nfsm_chain_get_32(error, &nmrep, ace_mask);
5132			nfsm_chain_get_32(error, &nmrep, len);
5133			ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
5134			ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
5135			ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
5136			if (!error && (len >= slen)) {
5137				MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
5138				if (s)
5139					slen = len+1;
5140				else
5141					ace.ace_flags = 0;
5142			}
5143			if (s)
5144				nfsm_chain_get_opaque(error, &nmrep, len, s);
5145			else
5146				nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
5147			if (!error && s) {
5148				s[len] = '\0';
5149				if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
5150					ace.ace_flags = 0;
5151			}
5152			if (error || !s)
5153				ace.ace_flags = 0;
5154			if (s && (s != sbuf))
5155				FREE(s, M_TEMP);
5156			if (!error) {
5157				/* stuff the latest delegation state in the node */
5158				lck_mtx_lock(&np->n_openlock);
5159				np->n_openflags &= ~N_DELEG_MASK;
5160				np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
5161				np->n_dstateid = dstateid;
5162				np->n_dace = ace;
5163				if (np->n_dlink.tqe_next == NFSNOLIST) {
5164					lck_mtx_lock(&nmp->nm_lock);
5165					if (np->n_dlink.tqe_next == NFSNOLIST)
5166						TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
5167					lck_mtx_unlock(&nmp->nm_lock);
5168				}
5169				lck_mtx_unlock(&np->n_openlock);
5170			}
5171			break;
5172		default:
5173			error = EBADRPC;
5174			break;
5175		}
5176	nfsmout_if(error);
5177	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
5178	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
5179	nfsmout_if(error);
5180	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
5181		printf("nfs: open reclaim didn't return filehandle? %s\n", filename ? filename : "???");
5182		error = EBADRPC;
5183		goto nfsmout;
5184	}
5185	if (!NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
5186		// XXX what if fh doesn't match the vnode we think we're re-opening?
5187		// Solaris Named Attributes may do this due to a bug.... so don't warn for named attributes.
5188		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
5189			printf("nfs4_claim_delegated_open_rpc: warning: file handle mismatch %s\n", filename ? filename : "???");
5190	}
5191	error = nfs_loadattrcache(np, &nvattr, &xid, 1);
5192	nfsmout_if(error);
5193	if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
5194		nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
5195nfsmout:
5196	NVATTR_CLEANUP(&nvattr);
5197	nfsm_chain_cleanup(&nmreq);
5198	nfsm_chain_cleanup(&nmrep);
5199	if (!lockerror)
5200		nfs_node_unlock(np);
5201	nfs_open_owner_clear_busy(noop);
5202	if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) {
5203		if (recall) {
5204			/*
5205			 * We're making a delegated claim.
5206			 * Don't return the delegation here in case we have more to claim.
5207			 * Just make sure it's queued up to be returned.
5208			 */
5209			nfs4_delegation_return_enqueue(np);
5210		}
5211	}
5212out:
5213	// if (!error)
5214	// 	printf("nfs: open claim delegated (%d, %d) succeeded for %s\n", share_access, share_deny, filename ? filename : "???");
5215	if (filename && (filename != &smallname[0]))
5216		FREE(filename, M_TEMP);
5217	if (vname)
5218		vnode_putname(vname);
5219	if (dvp != NULLVP)
5220		vnode_put(dvp);
5221	return (error);
5222}
5223
5224/*
5225 * Send an OPEN RPC to reclaim an open file.
5226 */
5227int
5228nfs4_open_reclaim_rpc(
5229	struct nfs_open_file *nofp,
5230	int share_access,
5231	int share_deny)
5232{
5233	struct nfsmount *nmp;
5234	struct nfs_open_owner *noop = nofp->nof_owner;
5235	struct nfs_vattr nvattr;
5236	int error = 0, lockerror = ENOENT, status;
5237	int nfsvers, numops;
5238	u_int64_t xid;
5239	nfsnode_t np = nofp->nof_np;
5240	struct nfsm_chain nmreq, nmrep;
5241	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
5242	uint32_t rflags = 0, delegation, recall = 0;
5243	fhandle_t fh;
5244	struct nfs_stateid dstateid;
5245	char sbuf[64], *s = sbuf;
5246	uint32_t ace_type, ace_flags, ace_mask, len, slen = sizeof(sbuf);
5247	struct kauth_ace ace;
5248	struct nfsreq_secinfo_args si;
5249
5250	nmp = NFSTONMP(np);
5251	if (nfs_mount_gone(nmp))
5252		return (ENXIO);
5253	nfsvers = nmp->nm_vers;
5254
5255	if ((error = nfs_open_owner_set_busy(noop, NULL)))
5256		return (error);
5257
5258	NVATTR_INIT(&nvattr);
5259	delegation = NFS_OPEN_DELEGATE_NONE;
5260	dstateid = np->n_dstateid;
5261	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
5262
5263	nfsm_chain_null(&nmreq);
5264	nfsm_chain_null(&nmrep);
5265
5266	// PUTFH, OPEN, GETATTR(FH)
5267	numops = 3;
5268	nfsm_chain_build_alloc_init(error, &nmreq, 48 * NFSX_UNSIGNED);
5269	nfsm_chain_add_compound_header(error, &nmreq, "open_reclaim", numops);
5270	numops--;
5271	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
5272	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
5273	numops--;
5274	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
5275	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
5276	nfsm_chain_add_32(error, &nmreq, share_access);
5277	nfsm_chain_add_32(error, &nmreq, share_deny);
5278	// open owner: clientid + uid
5279	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid
5280	nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
5281	nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred)); // open_owner4.owner
5282	// openflag4
5283	nfsm_chain_add_32(error, &nmreq, NFS_OPEN_NOCREATE);
5284	// open_claim4
5285	nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_PREVIOUS);
5286	delegation = (np->n_openflags & N_DELEG_READ) ? NFS_OPEN_DELEGATE_READ :
5287			(np->n_openflags & N_DELEG_WRITE) ? NFS_OPEN_DELEGATE_WRITE :
5288			NFS_OPEN_DELEGATE_NONE;
5289	nfsm_chain_add_32(error, &nmreq, delegation);
5290	delegation = NFS_OPEN_DELEGATE_NONE;
5291	numops--;
5292	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
5293	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
5294	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
5295	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
5296	nfsm_chain_build_done(error, &nmreq);
5297	nfsm_assert(error, (numops == 0), EPROTO);
5298	nfsmout_if(error);
5299
5300	error = nfs_request2(np, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, current_thread(),
5301			noop->noo_cred, &si, R_RECOVER|R_NOINTR, &nmrep, &xid, &status);
5302
5303	if ((lockerror = nfs_node_lock(np)))
5304		error = lockerror;
5305	nfsm_chain_skip_tag(error, &nmrep);
5306	nfsm_chain_get_32(error, &nmrep, numops);
5307	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
5308	nfsmout_if(error);
5309	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
5310	nfs_owner_seqid_increment(noop, NULL, error);
5311	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
5312	nfsm_chain_check_change_info(error, &nmrep, np);
5313	nfsm_chain_get_32(error, &nmrep, rflags);
5314	bmlen = NFS_ATTR_BITMAP_LEN;
5315	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
5316	nfsm_chain_get_32(error, &nmrep, delegation);
5317	if (!error)
5318		switch (delegation) {
5319		case NFS_OPEN_DELEGATE_NONE:
5320			if (np->n_openflags & N_DELEG_MASK) {
5321				/*
5322				 * Hey!  We were supposed to get our delegation back even
5323				 * if it was getting immediately recalled.  Bad server!
5324				 *
5325				 * Just try to return the existing delegation.
5326				 */
5327				// NP(np, "nfs: open reclaim didn't return delegation?");
5328				delegation = (np->n_openflags & N_DELEG_WRITE) ? NFS_OPEN_DELEGATE_WRITE : NFS_OPEN_DELEGATE_READ;
5329				recall = 1;
5330			}
5331			break;
5332		case NFS_OPEN_DELEGATE_READ:
5333		case NFS_OPEN_DELEGATE_WRITE:
5334			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
5335			nfsm_chain_get_32(error, &nmrep, recall);
5336			if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
5337				nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
5338			/* if we have any trouble accepting the ACE, just invalidate it */
5339			ace_type = ace_flags = ace_mask = len = 0;
5340			nfsm_chain_get_32(error, &nmrep, ace_type);
5341			nfsm_chain_get_32(error, &nmrep, ace_flags);
5342			nfsm_chain_get_32(error, &nmrep, ace_mask);
5343			nfsm_chain_get_32(error, &nmrep, len);
5344			ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
5345			ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
5346			ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
5347			if (!error && (len >= slen)) {
5348				MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
5349				if (s)
5350					slen = len+1;
5351				else
5352					ace.ace_flags = 0;
5353			}
5354			if (s)
5355				nfsm_chain_get_opaque(error, &nmrep, len, s);
5356			else
5357				nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
5358			if (!error && s) {
5359				s[len] = '\0';
5360				if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
5361					ace.ace_flags = 0;
5362			}
5363			if (error || !s)
5364				ace.ace_flags = 0;
5365			if (s && (s != sbuf))
5366				FREE(s, M_TEMP);
5367			if (!error) {
5368				/* stuff the delegation state in the node */
5369				lck_mtx_lock(&np->n_openlock);
5370				np->n_openflags &= ~N_DELEG_MASK;
5371				np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
5372				np->n_dstateid = dstateid;
5373				np->n_dace = ace;
5374				if (np->n_dlink.tqe_next == NFSNOLIST) {
5375					lck_mtx_lock(&nmp->nm_lock);
5376					if (np->n_dlink.tqe_next == NFSNOLIST)
5377						TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
5378					lck_mtx_unlock(&nmp->nm_lock);
5379				}
5380				lck_mtx_unlock(&np->n_openlock);
5381			}
5382			break;
5383		default:
5384			error = EBADRPC;
5385			break;
5386		}
5387	nfsmout_if(error);
5388	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
5389	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
5390	nfsmout_if(error);
5391	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
5392		NP(np, "nfs: open reclaim didn't return filehandle?");
5393		error = EBADRPC;
5394		goto nfsmout;
5395	}
5396	if (!NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
5397		// XXX what if fh doesn't match the vnode we think we're re-opening?
5398		// That should be pretty hard in this case, given that we are doing
5399		// the open reclaim using the file handle (and not a dir/name pair).
5400		// Solaris Named Attributes may do this due to a bug.... so don't warn for named attributes.
5401		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
5402			NP(np, "nfs4_open_reclaim_rpc: warning: file handle mismatch");
5403	}
5404	error = nfs_loadattrcache(np, &nvattr, &xid, 1);
5405	nfsmout_if(error);
5406	if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
5407		nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
5408nfsmout:
5409	// if (!error)
5410	// 	NP(np, "nfs: open reclaim (%d, %d) succeeded", share_access, share_deny);
5411	NVATTR_CLEANUP(&nvattr);
5412	nfsm_chain_cleanup(&nmreq);
5413	nfsm_chain_cleanup(&nmrep);
5414	if (!lockerror)
5415		nfs_node_unlock(np);
5416	nfs_open_owner_clear_busy(noop);
5417	if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) {
5418		if (recall)
5419			nfs4_delegation_return_enqueue(np);
5420	}
5421	return (error);
5422}
5423
5424int
5425nfs4_open_downgrade_rpc(
5426	nfsnode_t np,
5427	struct nfs_open_file *nofp,
5428	vfs_context_t ctx)
5429{
5430	struct nfs_open_owner *noop = nofp->nof_owner;
5431	struct nfsmount *nmp;
5432	int error, lockerror = ENOENT, status, nfsvers, numops;
5433	struct nfsm_chain nmreq, nmrep;
5434	u_int64_t xid;
5435	struct nfsreq_secinfo_args si;
5436
5437	nmp = NFSTONMP(np);
5438	if (nfs_mount_gone(nmp))
5439		return (ENXIO);
5440	nfsvers = nmp->nm_vers;
5441
5442	if ((error = nfs_open_owner_set_busy(noop, NULL)))
5443		return (error);
5444
5445	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
5446	nfsm_chain_null(&nmreq);
5447	nfsm_chain_null(&nmrep);
5448
5449	// PUTFH, OPEN_DOWNGRADE, GETATTR
5450	numops = 3;
5451	nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
5452	nfsm_chain_add_compound_header(error, &nmreq, "open_downgrd", numops);
5453	numops--;
5454	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
5455	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
5456	numops--;
5457	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_DOWNGRADE);
5458	nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid);
5459	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
5460	nfsm_chain_add_32(error, &nmreq, nofp->nof_access);
5461	nfsm_chain_add_32(error, &nmreq, nofp->nof_deny);
5462	numops--;
5463	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
5464	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
5465	nfsm_chain_build_done(error, &nmreq);
5466	nfsm_assert(error, (numops == 0), EPROTO);
5467	nfsmout_if(error);
5468	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND,
5469			vfs_context_thread(ctx), vfs_context_ucred(ctx),
5470			&si, R_NOINTR, &nmrep, &xid, &status);
5471
5472	if ((lockerror = nfs_node_lock(np)))
5473		error = lockerror;
5474	nfsm_chain_skip_tag(error, &nmrep);
5475	nfsm_chain_get_32(error, &nmrep, numops);
5476	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
5477	nfsmout_if(error);
5478	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_DOWNGRADE);
5479	nfs_owner_seqid_increment(noop, NULL, error);
5480	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
5481	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
5482	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
5483nfsmout:
5484	if (!lockerror)
5485		nfs_node_unlock(np);
5486	nfs_open_owner_clear_busy(noop);
5487	nfsm_chain_cleanup(&nmreq);
5488	nfsm_chain_cleanup(&nmrep);
5489	return (error);
5490}
5491
5492int
5493nfs4_close_rpc(
5494	nfsnode_t np,
5495	struct nfs_open_file *nofp,
5496	thread_t thd,
5497	kauth_cred_t cred,
5498	int flags)
5499{
5500	struct nfs_open_owner *noop = nofp->nof_owner;
5501	struct nfsmount *nmp;
5502	int error, lockerror = ENOENT, status, nfsvers, numops;
5503	struct nfsm_chain nmreq, nmrep;
5504	u_int64_t xid;
5505	struct nfsreq_secinfo_args si;
5506
5507	nmp = NFSTONMP(np);
5508	if (nfs_mount_gone(nmp))
5509		return (ENXIO);
5510	nfsvers = nmp->nm_vers;
5511
5512	if ((error = nfs_open_owner_set_busy(noop, NULL)))
5513		return (error);
5514
5515	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
5516	nfsm_chain_null(&nmreq);
5517	nfsm_chain_null(&nmrep);
5518
5519	// PUTFH, CLOSE, GETATTR
5520	numops = 3;
5521	nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
5522	nfsm_chain_add_compound_header(error, &nmreq, "close", numops);
5523	numops--;
5524	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
5525	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
5526	numops--;
5527	nfsm_chain_add_32(error, &nmreq, NFS_OP_CLOSE);
5528	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
5529	nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid);
5530	numops--;
5531	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
5532	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
5533	nfsm_chain_build_done(error, &nmreq);
5534	nfsm_assert(error, (numops == 0), EPROTO);
5535	nfsmout_if(error);
5536	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
5537
5538	if ((lockerror = nfs_node_lock(np)))
5539		error = lockerror;
5540	nfsm_chain_skip_tag(error, &nmrep);
5541	nfsm_chain_get_32(error, &nmrep, numops);
5542	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
5543	nfsmout_if(error);
5544	nfsm_chain_op_check(error, &nmrep, NFS_OP_CLOSE);
5545	nfs_owner_seqid_increment(noop, NULL, error);
5546	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
5547	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
5548	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
5549nfsmout:
5550	if (!lockerror)
5551		nfs_node_unlock(np);
5552	nfs_open_owner_clear_busy(noop);
5553	nfsm_chain_cleanup(&nmreq);
5554	nfsm_chain_cleanup(&nmrep);
5555	return (error);
5556}
5557
5558
5559/*
5560 * Claim the delegated open combinations this open file holds.
5561 */
5562int
5563nfs4_claim_delegated_state_for_open_file(struct nfs_open_file *nofp, int flags)
5564{
5565	struct nfs_open_owner *noop = nofp->nof_owner;
5566	struct nfs_lock_owner *nlop;
5567	struct nfs_file_lock *nflp, *nextnflp;
5568	struct nfsmount *nmp;
5569	int error = 0, reopen = 0;
5570
5571	if (nofp->nof_d_rw_drw) {
5572		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_BOTH, flags);
5573		if (!error) {
5574			lck_mtx_lock(&nofp->nof_lock);
5575			nofp->nof_rw_drw += nofp->nof_d_rw_drw;
5576			nofp->nof_d_rw_drw = 0;
5577			lck_mtx_unlock(&nofp->nof_lock);
5578		}
5579	}
5580	if (!error && nofp->nof_d_w_drw) {
5581		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_BOTH, flags);
5582		if (!error) {
5583			lck_mtx_lock(&nofp->nof_lock);
5584			nofp->nof_w_drw += nofp->nof_d_w_drw;
5585			nofp->nof_d_w_drw = 0;
5586			lck_mtx_unlock(&nofp->nof_lock);
5587		}
5588	}
5589	if (!error && nofp->nof_d_r_drw) {
5590		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_BOTH, flags);
5591		if (!error) {
5592			lck_mtx_lock(&nofp->nof_lock);
5593			nofp->nof_r_drw += nofp->nof_d_r_drw;
5594			nofp->nof_d_r_drw = 0;
5595			lck_mtx_unlock(&nofp->nof_lock);
5596		}
5597	}
5598	if (!error && nofp->nof_d_rw_dw) {
5599		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_WRITE, flags);
5600		if (!error) {
5601			lck_mtx_lock(&nofp->nof_lock);
5602			nofp->nof_rw_dw += nofp->nof_d_rw_dw;
5603			nofp->nof_d_rw_dw = 0;
5604			lck_mtx_unlock(&nofp->nof_lock);
5605		}
5606	}
5607	if (!error && nofp->nof_d_w_dw) {
5608		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_WRITE, flags);
5609		if (!error) {
5610			lck_mtx_lock(&nofp->nof_lock);
5611			nofp->nof_w_dw += nofp->nof_d_w_dw;
5612			nofp->nof_d_w_dw = 0;
5613			lck_mtx_unlock(&nofp->nof_lock);
5614		}
5615	}
5616	if (!error && nofp->nof_d_r_dw) {
5617		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_WRITE, flags);
5618		if (!error) {
5619			lck_mtx_lock(&nofp->nof_lock);
5620			nofp->nof_r_dw += nofp->nof_d_r_dw;
5621			nofp->nof_d_r_dw = 0;
5622			lck_mtx_unlock(&nofp->nof_lock);
5623		}
5624	}
5625	/* non-deny-mode opens may be reopened if no locks are held */
5626	if (!error && nofp->nof_d_rw) {
5627		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, flags);
5628		/* for some errors, we should just try reopening the file */
5629		if (nfs_mount_state_error_delegation_lost(error))
5630			reopen = error;
5631		if (!error || reopen) {
5632			lck_mtx_lock(&nofp->nof_lock);
5633			nofp->nof_rw += nofp->nof_d_rw;
5634			nofp->nof_d_rw = 0;
5635			lck_mtx_unlock(&nofp->nof_lock);
5636		}
5637	}
5638	/* if we've already set reopen, we should move these other two opens from delegated to not delegated */
5639	if ((!error || reopen) && nofp->nof_d_w) {
5640		if (!error) {
5641			error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, flags);
5642			/* for some errors, we should just try reopening the file */
5643			if (nfs_mount_state_error_delegation_lost(error))
5644				reopen = error;
5645		}
5646		if (!error || reopen) {
5647			lck_mtx_lock(&nofp->nof_lock);
5648			nofp->nof_w += nofp->nof_d_w;
5649			nofp->nof_d_w = 0;
5650			lck_mtx_unlock(&nofp->nof_lock);
5651		}
5652	}
5653	if ((!error || reopen) && nofp->nof_d_r) {
5654		if (!error) {
5655			error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, flags);
5656			/* for some errors, we should just try reopening the file */
5657			if (nfs_mount_state_error_delegation_lost(error))
5658				reopen = error;
5659		}
5660		if (!error || reopen) {
5661			lck_mtx_lock(&nofp->nof_lock);
5662			nofp->nof_r += nofp->nof_d_r;
5663			nofp->nof_d_r = 0;
5664			lck_mtx_unlock(&nofp->nof_lock);
5665		}
5666	}
5667
5668	if (reopen) {
5669		/*
5670		 * Any problems with the delegation probably indicates that we
5671		 * should review/return all of our current delegation state.
5672		 */
5673		if ((nmp = NFSTONMP(nofp->nof_np))) {
5674			nfs4_delegation_return_enqueue(nofp->nof_np);
5675			lck_mtx_lock(&nmp->nm_lock);
5676			nfs_need_recover(nmp, NFSERR_EXPIRED);
5677			lck_mtx_unlock(&nmp->nm_lock);
5678		}
5679		if (reopen && (nfs_check_for_locks(noop, nofp) == 0)) {
5680			/* just reopen the file on next access */
5681			NP(nofp->nof_np, "nfs4_claim_delegated_state_for_open_file: %d, need reopen, %d",
5682				reopen, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5683			lck_mtx_lock(&nofp->nof_lock);
5684			nofp->nof_flags |= NFS_OPEN_FILE_REOPEN;
5685			lck_mtx_unlock(&nofp->nof_lock);
5686			return (0);
5687		}
5688		if (reopen)
5689			NP(nofp->nof_np, "nfs4_claim_delegated_state_for_open_file: %d, locks prevent reopen, %d",
5690				reopen, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5691	}
5692
5693	if (!error && ((nmp = NFSTONMP(nofp->nof_np)))) {
5694		/* claim delegated locks */
5695		TAILQ_FOREACH(nlop, &nofp->nof_np->n_lock_owners, nlo_link) {
5696			if (nlop->nlo_open_owner != noop)
5697				continue;
5698			TAILQ_FOREACH_SAFE(nflp, &nlop->nlo_locks, nfl_lolink, nextnflp) {
5699				/* skip dead & blocked lock requests (shouldn't be any in the held lock list) */
5700				if (nflp->nfl_flags & (NFS_FILE_LOCK_DEAD|NFS_FILE_LOCK_BLOCKED))
5701					continue;
5702				/* skip non-delegated locks */
5703				if (!(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED))
5704					continue;
5705				error = nmp->nm_funcs->nf_setlock_rpc(nofp->nof_np, nofp, nflp, 0, flags, current_thread(), noop->noo_cred);
5706				if (error) {
5707					NP(nofp->nof_np, "nfs: delegated lock claim (0x%llx, 0x%llx) failed %d, %d",
5708						nflp->nfl_start, nflp->nfl_end, error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5709					break;
5710				}
5711				// else {
5712				// 	NP(nofp->nof_np, "nfs: delegated lock claim (0x%llx, 0x%llx) succeeded, %d",
5713				// 		nflp->nfl_start, nflp->nfl_end, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5714				// }
5715			}
5716			if (error)
5717				break;
5718		}
5719	}
5720
5721	if (!error)  /* all state claimed successfully! */
5722		return (0);
5723
5724	/* restart if it looks like a problem more than just losing the delegation */
5725	if (!nfs_mount_state_error_delegation_lost(error) &&
5726	    ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error))) {
5727		NP(nofp->nof_np, "nfs delegated lock claim error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5728		if ((error == ETIMEDOUT) && ((nmp = NFSTONMP(nofp->nof_np))))
5729			nfs_need_reconnect(nmp);
5730		return (error);
5731	}
5732
5733	/* delegated state lost (once held but now not claimable) */
5734	NP(nofp->nof_np, "nfs delegated state claim error %d, state lost, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5735
5736	/*
5737	 * Any problems with the delegation probably indicates that we
5738	 * should review/return all of our current delegation state.
5739	 */
5740	if ((nmp = NFSTONMP(nofp->nof_np))) {
5741		nfs4_delegation_return_enqueue(nofp->nof_np);
5742		lck_mtx_lock(&nmp->nm_lock);
5743		nfs_need_recover(nmp, NFSERR_EXPIRED);
5744		lck_mtx_unlock(&nmp->nm_lock);
5745	}
5746
5747	/* revoke all open file state */
5748	nfs_revoke_open_state_for_node(nofp->nof_np);
5749
5750	return (error);
5751}
5752
5753/*
5754 * Release all open state for the given node.
5755 */
5756void
5757nfs_release_open_state_for_node(nfsnode_t np, int force)
5758{
5759	struct nfsmount *nmp = NFSTONMP(np);
5760	struct nfs_open_file *nofp;
5761	struct nfs_file_lock *nflp, *nextnflp;
5762
5763	/* drop held locks */
5764	TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
5765		/* skip dead & blocked lock requests */
5766		if (nflp->nfl_flags & (NFS_FILE_LOCK_DEAD|NFS_FILE_LOCK_BLOCKED))
5767			continue;
5768		/* send an unlock if not a delegated lock */
5769		if (!force && nmp && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED))
5770			nmp->nm_funcs->nf_unlock_rpc(np, nflp->nfl_owner, F_WRLCK, nflp->nfl_start, nflp->nfl_end, R_RECOVER,
5771				NULL, nflp->nfl_owner->nlo_open_owner->noo_cred);
5772		/* kill/remove the lock */
5773		lck_mtx_lock(&np->n_openlock);
5774		nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
5775		lck_mtx_lock(&nflp->nfl_owner->nlo_lock);
5776		TAILQ_REMOVE(&nflp->nfl_owner->nlo_locks, nflp, nfl_lolink);
5777		lck_mtx_unlock(&nflp->nfl_owner->nlo_lock);
5778		if (nflp->nfl_blockcnt) {
5779			/* wake up anyone blocked on this lock */
5780			wakeup(nflp);
5781		} else {
5782			/* remove nflp from lock list and destroy */
5783			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
5784			nfs_file_lock_destroy(nflp);
5785		}
5786		lck_mtx_unlock(&np->n_openlock);
5787	}
5788
5789	lck_mtx_lock(&np->n_openlock);
5790
5791	/* drop all opens */
5792	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
5793		if (nofp->nof_flags & NFS_OPEN_FILE_LOST)
5794			continue;
5795		/* mark open state as lost */
5796		lck_mtx_lock(&nofp->nof_lock);
5797		nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN;
5798		nofp->nof_flags |= NFS_OPEN_FILE_LOST;
5799
5800		lck_mtx_unlock(&nofp->nof_lock);
5801		if (!force && nmp && (nmp->nm_vers >= NFS_VER4))
5802			nfs4_close_rpc(np, nofp, NULL, nofp->nof_owner->noo_cred, R_RECOVER);
5803	}
5804
5805	lck_mtx_unlock(&np->n_openlock);
5806}
5807
5808/*
5809 * State for a node has been lost, drop it, and revoke the node.
5810 * Attempt to return any state if possible in case the server
5811 * might somehow think we hold it.
5812 */
5813void
5814nfs_revoke_open_state_for_node(nfsnode_t np)
5815{
5816	struct nfsmount *nmp;
5817
5818	/* mark node as needing to be revoked */
5819	nfs_node_lock_force(np);
5820	if (np->n_flag & NREVOKE)  /* already revoked? */
5821	{
5822		NP(np, "nfs_revoke_open_state_for_node(): already revoked");
5823		nfs_node_unlock(np);
5824		return;
5825	}
5826	np->n_flag |= NREVOKE;
5827	nfs_node_unlock(np);
5828
5829	nfs_release_open_state_for_node(np, 0);
5830	NP(np, "nfs: state lost for %p 0x%x", np, np->n_flag);
5831
5832	/* mark mount as needing a revoke scan and have the socket thread do it. */
5833	if ((nmp = NFSTONMP(np))) {
5834		lck_mtx_lock(&nmp->nm_lock);
5835		nmp->nm_state |= NFSSTA_REVOKE;
5836		nfs_mount_sock_thread_wake(nmp);
5837		lck_mtx_unlock(&nmp->nm_lock);
5838	}
5839}
5840
5841/*
5842 * Claim the delegated open combinations that each of this node's open files hold.
5843 */
5844int
5845nfs4_claim_delegated_state_for_node(nfsnode_t np, int flags)
5846{
5847	struct nfs_open_file *nofp;
5848	int error = 0;
5849
5850	lck_mtx_lock(&np->n_openlock);
5851
5852	/* walk the open file list looking for opens with delegated state to claim */
5853restart:
5854	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
5855		if (!nofp->nof_d_rw_drw && !nofp->nof_d_w_drw && !nofp->nof_d_r_drw &&
5856		    !nofp->nof_d_rw_dw && !nofp->nof_d_w_dw && !nofp->nof_d_r_dw &&
5857		    !nofp->nof_d_rw && !nofp->nof_d_w && !nofp->nof_d_r)
5858			continue;
5859		lck_mtx_unlock(&np->n_openlock);
5860		error = nfs4_claim_delegated_state_for_open_file(nofp, flags);
5861		lck_mtx_lock(&np->n_openlock);
5862		if (error)
5863			break;
5864		goto restart;
5865	}
5866
5867	lck_mtx_unlock(&np->n_openlock);
5868
5869	return (error);
5870}
5871
5872/*
5873 * Mark a node as needed to have its delegation returned.
5874 * Queue it up on the delegation return queue.
5875 * Make sure the thread is running.
5876 */
5877void
5878nfs4_delegation_return_enqueue(nfsnode_t np)
5879{
5880	struct nfsmount *nmp;
5881
5882	nmp = NFSTONMP(np);
5883	if (nfs_mount_gone(nmp))
5884		return;
5885
5886	lck_mtx_lock(&np->n_openlock);
5887	np->n_openflags |= N_DELEG_RETURN;
5888	lck_mtx_unlock(&np->n_openlock);
5889
5890	lck_mtx_lock(&nmp->nm_lock);
5891	if (np->n_dreturn.tqe_next == NFSNOLIST)
5892		TAILQ_INSERT_TAIL(&nmp->nm_dreturnq, np, n_dreturn);
5893	nfs_mount_sock_thread_wake(nmp);
5894	lck_mtx_unlock(&nmp->nm_lock);
5895}
5896
5897/*
5898 * return any delegation we may have for the given node
5899 */
5900int
5901nfs4_delegation_return(nfsnode_t np, int flags, thread_t thd, kauth_cred_t cred)
5902{
5903	struct nfsmount *nmp;
5904	fhandle_t fh;
5905	nfs_stateid dstateid;
5906	int error;
5907
5908	nmp = NFSTONMP(np);
5909	if (nfs_mount_gone(nmp))
5910		return (ENXIO);
5911
5912	/* first, make sure the node's marked for delegation return */
5913	lck_mtx_lock(&np->n_openlock);
5914	np->n_openflags |= (N_DELEG_RETURN|N_DELEG_RETURNING);
5915	lck_mtx_unlock(&np->n_openlock);
5916
5917	/* make sure nobody else is using the delegation state */
5918	if ((error = nfs_open_state_set_busy(np, NULL)))
5919		goto out;
5920
5921	/* claim any delegated state */
5922	if ((error = nfs4_claim_delegated_state_for_node(np, flags)))
5923		goto out;
5924
5925	/* return the delegation */
5926	lck_mtx_lock(&np->n_openlock);
5927	dstateid = np->n_dstateid;
5928	fh.fh_len = np->n_fhsize;
5929	bcopy(np->n_fhp, &fh.fh_data, fh.fh_len);
5930	lck_mtx_unlock(&np->n_openlock);
5931	error = nfs4_delegreturn_rpc(NFSTONMP(np), fh.fh_data, fh.fh_len, &dstateid, flags, thd, cred);
5932	/* assume delegation is gone for all errors except ETIMEDOUT, NFSERR_*MOVED */
5933	if ((error != ETIMEDOUT) && (error != NFSERR_MOVED) && (error != NFSERR_LEASE_MOVED)) {
5934		lck_mtx_lock(&np->n_openlock);
5935		np->n_openflags &= ~N_DELEG_MASK;
5936		lck_mtx_lock(&nmp->nm_lock);
5937		if (np->n_dlink.tqe_next != NFSNOLIST) {
5938			TAILQ_REMOVE(&nmp->nm_delegations, np, n_dlink);
5939			np->n_dlink.tqe_next = NFSNOLIST;
5940		}
5941		lck_mtx_unlock(&nmp->nm_lock);
5942		lck_mtx_unlock(&np->n_openlock);
5943	}
5944
5945out:
5946	/* make sure it's no longer on the return queue and clear the return flags */
5947	lck_mtx_lock(&nmp->nm_lock);
5948	if (np->n_dreturn.tqe_next != NFSNOLIST) {
5949		TAILQ_REMOVE(&nmp->nm_dreturnq, np, n_dreturn);
5950		np->n_dreturn.tqe_next = NFSNOLIST;
5951	}
5952	lck_mtx_unlock(&nmp->nm_lock);
5953	lck_mtx_lock(&np->n_openlock);
5954	np->n_openflags &= ~(N_DELEG_RETURN|N_DELEG_RETURNING);
5955	lck_mtx_unlock(&np->n_openlock);
5956
5957	if (error) {
5958		NP(np, "nfs4_delegation_return, error %d", error);
5959		if (error == ETIMEDOUT)
5960			nfs_need_reconnect(nmp);
5961		if (nfs_mount_state_error_should_restart(error)) {
5962			/* make sure recovery happens */
5963			lck_mtx_lock(&nmp->nm_lock);
5964			nfs_need_recover(nmp, nfs_mount_state_error_delegation_lost(error) ? NFSERR_EXPIRED : 0);
5965			lck_mtx_unlock(&nmp->nm_lock);
5966		}
5967	}
5968
5969	nfs_open_state_clear_busy(np);
5970
5971	return (error);
5972}
5973
5974/*
5975 * RPC to return a delegation for a file handle
5976 */
5977int
5978nfs4_delegreturn_rpc(struct nfsmount *nmp, u_char *fhp, int fhlen, struct nfs_stateid *sid, int flags, thread_t thd, kauth_cred_t cred)
5979{
5980	int error = 0, status, numops;
5981	uint64_t xid;
5982	struct nfsm_chain nmreq, nmrep;
5983	struct nfsreq_secinfo_args si;
5984
5985	NFSREQ_SECINFO_SET(&si, NULL, fhp, fhlen, NULL, 0);
5986	nfsm_chain_null(&nmreq);
5987	nfsm_chain_null(&nmrep);
5988
5989	// PUTFH, DELEGRETURN
5990	numops = 2;
5991	nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
5992	nfsm_chain_add_compound_header(error, &nmreq, "delegreturn", numops);
5993	numops--;
5994	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
5995	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen);
5996	numops--;
5997	nfsm_chain_add_32(error, &nmreq, NFS_OP_DELEGRETURN);
5998	nfsm_chain_add_stateid(error, &nmreq, sid);
5999	nfsm_chain_build_done(error, &nmreq);
6000	nfsm_assert(error, (numops == 0), EPROTO);
6001	nfsmout_if(error);
6002	error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags, &nmrep, &xid, &status);
6003	nfsm_chain_skip_tag(error, &nmrep);
6004	nfsm_chain_get_32(error, &nmrep, numops);
6005	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
6006	nfsm_chain_op_check(error, &nmrep, NFS_OP_DELEGRETURN);
6007nfsmout:
6008	nfsm_chain_cleanup(&nmreq);
6009	nfsm_chain_cleanup(&nmrep);
6010	return (error);
6011}
6012
6013
6014/*
6015 * NFS read call.
6016 * Just call nfs_bioread() to do the work.
6017 *
6018 * Note: the exec code paths have a tendency to call VNOP_READ (and VNOP_MMAP)
6019 * without first calling VNOP_OPEN, so we make sure the file is open here.
6020 */
6021int
6022nfs_vnop_read(
6023	struct vnop_read_args /* {
6024		struct vnodeop_desc *a_desc;
6025		vnode_t a_vp;
6026		struct uio *a_uio;
6027		int a_ioflag;
6028		vfs_context_t a_context;
6029	} */ *ap)
6030{
6031	vnode_t vp = ap->a_vp;
6032	vfs_context_t ctx = ap->a_context;
6033	nfsnode_t np;
6034	struct nfsmount *nmp;
6035	struct nfs_open_owner *noop;
6036	struct nfs_open_file *nofp;
6037	int error;
6038
6039	if (vnode_vtype(ap->a_vp) != VREG)
6040		return (vnode_vtype(vp) == VDIR) ? EISDIR : EPERM;
6041
6042	np = VTONFS(vp);
6043	nmp = NFSTONMP(np);
6044	if (nfs_mount_gone(nmp))
6045		return (ENXIO);
6046	if (np->n_flag & NREVOKE)
6047		return (EIO);
6048
6049	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
6050	if (!noop)
6051		return (ENOMEM);
6052restart:
6053	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
6054	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
6055		NP(np, "nfs_vnop_read: LOST %d", kauth_cred_getuid(noop->noo_cred));
6056		error = EIO;
6057	}
6058	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
6059		error = nfs4_reopen(nofp, vfs_context_thread(ctx));
6060		nofp = NULL;
6061		if (!error)
6062			goto restart;
6063	}
6064	if (error) {
6065		nfs_open_owner_rele(noop);
6066		return (error);
6067	}
6068	if (!nofp->nof_access) {
6069		/* we don't have the file open, so open it for read access */
6070		error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
6071		if (error) {
6072			nfs_open_owner_rele(noop);
6073			return (error);
6074		}
6075		if (np->n_flag & NREVOKE) {
6076			error = EIO;
6077			nfs_mount_state_in_use_end(nmp, 0);
6078			nfs_open_owner_rele(noop);
6079			return (error);
6080		}
6081		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
6082		if (error)
6083			nofp = NULL;
6084		if (!error) {
6085			if (nmp->nm_vers < NFS_VER4) {
6086				/* NFS v2/v3 opens are always allowed - so just add it. */
6087				nfs_open_file_add_open(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, 0);
6088			} else {
6089				error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
6090			}
6091		}
6092		if (!error)
6093			nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE;
6094		if (nofp)
6095			nfs_open_file_clear_busy(nofp);
6096		if (nfs_mount_state_in_use_end(nmp, error)) {
6097			nofp = NULL;
6098			goto restart;
6099		}
6100	}
6101	nfs_open_owner_rele(noop);
6102	if (error)
6103		return (error);
6104	return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, ap->a_context));
6105}
6106
6107/*
6108 * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files.
6109 * Files are created using the NFSv4 OPEN RPC.  So we must open the
6110 * file to create it and then close it.
6111 */
6112int
6113nfs4_vnop_create(
6114	struct vnop_create_args /* {
6115		struct vnodeop_desc *a_desc;
6116		vnode_t a_dvp;
6117		vnode_t *a_vpp;
6118		struct componentname *a_cnp;
6119		struct vnode_attr *a_vap;
6120		vfs_context_t a_context;
6121	} */ *ap)
6122{
6123	vfs_context_t ctx = ap->a_context;
6124	struct componentname *cnp = ap->a_cnp;
6125	struct vnode_attr *vap = ap->a_vap;
6126	vnode_t dvp = ap->a_dvp;
6127	vnode_t *vpp = ap->a_vpp;
6128	struct nfsmount *nmp;
6129	nfsnode_t np;
6130	int error = 0, busyerror = 0, accessMode, denyMode;
6131	struct nfs_open_owner *noop = NULL;
6132	struct nfs_open_file *newnofp = NULL, *nofp = NULL;
6133
6134	nmp = VTONMP(dvp);
6135	if (nfs_mount_gone(nmp))
6136		return (ENXIO);
6137
6138	if (vap)
6139		nfs_avoid_needless_id_setting_on_create(VTONFS(dvp), vap, ctx);
6140
6141	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
6142	if (!noop)
6143		return (ENOMEM);
6144
6145restart:
6146	error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
6147	if (error) {
6148		nfs_open_owner_rele(noop);
6149		return (error);
6150	}
6151
6152	/* grab a provisional, nodeless open file */
6153	error = nfs_open_file_find(NULL, noop, &newnofp, 0, 0, 1);
6154	if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_LOST)) {
6155		printf("nfs_vnop_create: LOST\n");
6156		error = EIO;
6157	}
6158	if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
6159		/* This shouldn't happen given that this is a new, nodeless nofp */
6160		nfs_mount_state_in_use_end(nmp, 0);
6161		error = nfs4_reopen(newnofp, vfs_context_thread(ctx));
6162		nfs_open_file_destroy(newnofp);
6163		newnofp = NULL;
6164		if (!error)
6165			goto restart;
6166	}
6167	if (!error)
6168		error = nfs_open_file_set_busy(newnofp, vfs_context_thread(ctx));
6169	if (error) {
6170		if (newnofp)
6171			nfs_open_file_destroy(newnofp);
6172		newnofp = NULL;
6173		goto out;
6174	}
6175
6176	/*
6177	 * We're just trying to create the file.
6178	 * We'll create/open it RW, and set NFS_OPEN_FILE_CREATE.
6179	 */
6180	accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
6181	denyMode = NFS_OPEN_SHARE_DENY_NONE;
6182
6183	/* Do the open/create */
6184	error = nfs4_open_rpc(newnofp, ctx, cnp, vap, dvp, vpp, NFS_OPEN_CREATE, accessMode, denyMode);
6185	if ((error == EACCES) && vap && !(vap->va_vaflags & VA_EXCLUSIVE) &&
6186	    VATTR_IS_ACTIVE(vap, va_mode) && !(vap->va_mode & S_IWUSR)) {
6187		/*
6188		 * Hmm... it looks like we may have a situation where the request was
6189		 * retransmitted because we didn't get the first response which successfully
6190		 * created/opened the file and then the second time we were denied the open
6191		 * because the mode the file was created with doesn't allow write access.
6192		 *
6193		 * We'll try to work around this by temporarily updating the mode and
6194		 * retrying the open.
6195		 */
6196		struct vnode_attr vattr;
6197
6198		/* first make sure it's there */
6199		int error2 = nfs_lookitup(VTONFS(dvp), cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
6200		if (!error2 && np) {
6201			nfs_node_unlock(np);
6202			*vpp = NFSTOV(np);
6203			if (vnode_vtype(NFSTOV(np)) == VREG) {
6204				VATTR_INIT(&vattr);
6205				VATTR_SET(&vattr, va_mode, (vap->va_mode | S_IWUSR));
6206				if (!nfs4_setattr_rpc(np, &vattr, ctx)) {
6207					error2 = nfs4_open_rpc(newnofp, ctx, cnp, NULL, dvp, vpp, NFS_OPEN_NOCREATE, accessMode, denyMode);
6208					VATTR_INIT(&vattr);
6209					VATTR_SET(&vattr, va_mode, vap->va_mode);
6210					nfs4_setattr_rpc(np, &vattr, ctx);
6211					if (!error2)
6212						error = 0;
6213				}
6214			}
6215			if (error) {
6216				vnode_put(*vpp);
6217				*vpp = NULL;
6218			}
6219		}
6220	}
6221	if (!error && !*vpp) {
6222		printf("nfs4_open_rpc returned without a node?\n");
6223		/* Hmmm... with no node, we have no filehandle and can't close it */
6224		error = EIO;
6225	}
6226	if (error) {
6227		/* need to cleanup our temporary nofp */
6228		nfs_open_file_clear_busy(newnofp);
6229		nfs_open_file_destroy(newnofp);
6230		newnofp = NULL;
6231		goto out;
6232	}
6233	/* After we have a node, add our open file struct to the node */
6234	np = VTONFS(*vpp);
6235	nfs_open_file_add_open(newnofp, accessMode, denyMode, 0);
6236	nofp = newnofp;
6237	error = nfs_open_file_find_internal(np, noop, &nofp, 0, 0, 0);
6238	if (error) {
6239		/* This shouldn't happen, because we passed in a new nofp to use. */
6240		printf("nfs_open_file_find_internal failed! %d\n", error);
6241		goto out;
6242	} else if (nofp != newnofp) {
6243		/*
6244		 * Hmm... an open file struct already exists.
6245		 * Mark the existing one busy and merge our open into it.
6246		 * Then destroy the one we created.
6247		 * Note: there's no chance of an open confict because the
6248		 * open has already been granted.
6249		 */
6250		busyerror = nfs_open_file_set_busy(nofp, NULL);
6251		nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
6252		nofp->nof_stateid = newnofp->nof_stateid;
6253		if (newnofp->nof_flags & NFS_OPEN_FILE_POSIXLOCK)
6254			nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
6255		nfs_open_file_clear_busy(newnofp);
6256		nfs_open_file_destroy(newnofp);
6257	}
6258	newnofp = NULL;
6259	/* mark the node as holding a create-initiated open */
6260	nofp->nof_flags |= NFS_OPEN_FILE_CREATE;
6261	nofp->nof_creator = current_thread();
6262out:
6263	if (nofp && !busyerror)
6264		nfs_open_file_clear_busy(nofp);
6265	if (nfs_mount_state_in_use_end(nmp, error)) {
6266		nofp = newnofp = NULL;
6267		busyerror = 0;
6268		goto restart;
6269	}
6270	if (noop)
6271		nfs_open_owner_rele(noop);
6272	return (error);
6273}
6274
6275/*
6276 * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files.
6277 */
6278int
6279nfs4_create_rpc(
6280	vfs_context_t ctx,
6281	nfsnode_t dnp,
6282	struct componentname *cnp,
6283	struct vnode_attr *vap,
6284	int type,
6285	char *link,
6286	nfsnode_t *npp)
6287{
6288	struct nfsmount *nmp;
6289	struct nfs_vattr nvattr;
6290	int error = 0, create_error = EIO, lockerror = ENOENT, busyerror = ENOENT, status;
6291	int nfsvers, namedattrs, numops;
6292	u_int64_t xid, savedxid = 0;
6293	nfsnode_t np = NULL;
6294	vnode_t newvp = NULL;
6295	struct nfsm_chain nmreq, nmrep;
6296	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
6297	const char *tag;
6298	nfs_specdata sd;
6299	fhandle_t fh;
6300	struct nfsreq rq, *req = &rq;
6301	struct nfs_dulookup dul;
6302	struct nfsreq_secinfo_args si;
6303
6304	nmp = NFSTONMP(dnp);
6305	if (nfs_mount_gone(nmp))
6306		return (ENXIO);
6307	nfsvers = nmp->nm_vers;
6308	namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
6309	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
6310		return (EINVAL);
6311
6312	sd.specdata1 = sd.specdata2 = 0;
6313
6314	switch (type) {
6315	case NFLNK:
6316		tag = "symlink";
6317		break;
6318	case NFBLK:
6319	case NFCHR:
6320		tag = "mknod";
6321		if (!VATTR_IS_ACTIVE(vap, va_rdev))
6322			return (EINVAL);
6323		sd.specdata1 = major(vap->va_rdev);
6324		sd.specdata2 = minor(vap->va_rdev);
6325		break;
6326	case NFSOCK:
6327	case NFFIFO:
6328		tag = "mknod";
6329		break;
6330	case NFDIR:
6331		tag = "mkdir";
6332		break;
6333	default:
6334		return (EINVAL);
6335	}
6336
6337	nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
6338
6339	error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
6340	if (!namedattrs)
6341		nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
6342
6343	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
6344	NVATTR_INIT(&nvattr);
6345	nfsm_chain_null(&nmreq);
6346	nfsm_chain_null(&nmrep);
6347
6348	// PUTFH, SAVEFH, CREATE, GETATTR(FH), RESTOREFH, GETATTR
6349	numops = 6;
6350	nfsm_chain_build_alloc_init(error, &nmreq, 66 * NFSX_UNSIGNED);
6351	nfsm_chain_add_compound_header(error, &nmreq, tag, numops);
6352	numops--;
6353	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
6354	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
6355	numops--;
6356	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
6357	numops--;
6358	nfsm_chain_add_32(error, &nmreq, NFS_OP_CREATE);
6359	nfsm_chain_add_32(error, &nmreq, type);
6360	if (type == NFLNK) {
6361		nfsm_chain_add_name(error, &nmreq, link, strlen(link), nmp);
6362	} else if ((type == NFBLK) || (type == NFCHR)) {
6363		nfsm_chain_add_32(error, &nmreq, sd.specdata1);
6364		nfsm_chain_add_32(error, &nmreq, sd.specdata2);
6365	}
6366	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
6367	nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
6368	numops--;
6369	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
6370	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
6371	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
6372	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, NULL);
6373	numops--;
6374	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
6375	numops--;
6376	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
6377	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
6378	nfsm_chain_build_done(error, &nmreq);
6379	nfsm_assert(error, (numops == 0), EPROTO);
6380	nfsmout_if(error);
6381
6382	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND,
6383			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
6384	if (!error) {
6385		if (!namedattrs)
6386			nfs_dulookup_start(&dul, dnp, ctx);
6387		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
6388	}
6389
6390	if ((lockerror = nfs_node_lock(dnp)))
6391		error = lockerror;
6392	nfsm_chain_skip_tag(error, &nmrep);
6393	nfsm_chain_get_32(error, &nmrep, numops);
6394	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
6395	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
6396	nfsmout_if(error);
6397	nfsm_chain_op_check(error, &nmrep, NFS_OP_CREATE);
6398	nfsm_chain_check_change_info(error, &nmrep, dnp);
6399	bmlen = NFS_ATTR_BITMAP_LEN;
6400	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
6401	/* At this point if we have no error, the object was created. */
6402	/* if we don't get attributes, then we should lookitup. */
6403	create_error = error;
6404	nfsmout_if(error);
6405	nfs_vattr_set_supported(bitmap, vap);
6406	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
6407	nfsmout_if(error);
6408	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
6409	nfsmout_if(error);
6410	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
6411		printf("nfs: create/%s didn't return filehandle? %s\n", tag, cnp->cn_nameptr);
6412		error = EBADRPC;
6413		goto nfsmout;
6414	}
6415	/* directory attributes: if we don't get them, make sure to invalidate */
6416	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
6417	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
6418	savedxid = xid;
6419	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
6420	if (error)
6421		NATTRINVALIDATE(dnp);
6422
6423nfsmout:
6424	nfsm_chain_cleanup(&nmreq);
6425	nfsm_chain_cleanup(&nmrep);
6426
6427	if (!lockerror) {
6428		if (!create_error && (dnp->n_flag & NNEGNCENTRIES)) {
6429			dnp->n_flag &= ~NNEGNCENTRIES;
6430			cache_purge_negatives(NFSTOV(dnp));
6431		}
6432		dnp->n_flag |= NMODIFIED;
6433		nfs_node_unlock(dnp);
6434		/* nfs_getattr() will check changed and purge caches */
6435		nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
6436	}
6437
6438	if (!error && fh.fh_len) {
6439		/* create the vnode with the filehandle and attributes */
6440		xid = savedxid;
6441		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
6442		if (!error)
6443			newvp = NFSTOV(np);
6444	}
6445	NVATTR_CLEANUP(&nvattr);
6446
6447	if (!namedattrs)
6448		nfs_dulookup_finish(&dul, dnp, ctx);
6449
6450	/*
6451	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
6452	 * if we can succeed in looking up the object.
6453	 */
6454	if ((create_error == EEXIST) || (!create_error && !newvp)) {
6455		error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
6456		if (!error) {
6457			newvp = NFSTOV(np);
6458			if (vnode_vtype(newvp) != nfstov_type(type, nfsvers))
6459				error = EEXIST;
6460		}
6461	}
6462	if (!busyerror)
6463		nfs_node_clear_busy(dnp);
6464	if (error) {
6465		if (newvp) {
6466			nfs_node_unlock(np);
6467			vnode_put(newvp);
6468		}
6469	} else {
6470		nfs_node_unlock(np);
6471		*npp = np;
6472	}
6473	return (error);
6474}
6475
6476int
6477nfs4_vnop_mknod(
6478	struct vnop_mknod_args /* {
6479		struct vnodeop_desc *a_desc;
6480		vnode_t a_dvp;
6481		vnode_t *a_vpp;
6482		struct componentname *a_cnp;
6483		struct vnode_attr *a_vap;
6484		vfs_context_t a_context;
6485	} */ *ap)
6486{
6487	nfsnode_t np = NULL;
6488	struct nfsmount *nmp;
6489	int error;
6490
6491	nmp = VTONMP(ap->a_dvp);
6492	if (nfs_mount_gone(nmp))
6493		return (ENXIO);
6494
6495	if (!VATTR_IS_ACTIVE(ap->a_vap, va_type))
6496		return (EINVAL);
6497	switch (ap->a_vap->va_type) {
6498	case VBLK:
6499	case VCHR:
6500	case VFIFO:
6501	case VSOCK:
6502		break;
6503	default:
6504		return (ENOTSUP);
6505	}
6506
6507	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
6508			vtonfs_type(ap->a_vap->va_type, nmp->nm_vers), NULL, &np);
6509	if (!error)
6510		*ap->a_vpp = NFSTOV(np);
6511	return (error);
6512}
6513
6514int
6515nfs4_vnop_mkdir(
6516	struct vnop_mkdir_args /* {
6517		struct vnodeop_desc *a_desc;
6518		vnode_t a_dvp;
6519		vnode_t *a_vpp;
6520		struct componentname *a_cnp;
6521		struct vnode_attr *a_vap;
6522		vfs_context_t a_context;
6523	} */ *ap)
6524{
6525	nfsnode_t np = NULL;
6526	int error;
6527
6528	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
6529			NFDIR, NULL, &np);
6530	if (!error)
6531		*ap->a_vpp = NFSTOV(np);
6532	return (error);
6533}
6534
6535int
6536nfs4_vnop_symlink(
6537	struct vnop_symlink_args /* {
6538		struct vnodeop_desc *a_desc;
6539		vnode_t a_dvp;
6540		vnode_t *a_vpp;
6541		struct componentname *a_cnp;
6542		struct vnode_attr *a_vap;
6543		char *a_target;
6544		vfs_context_t a_context;
6545	} */ *ap)
6546{
6547	nfsnode_t np = NULL;
6548	int error;
6549
6550	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
6551			NFLNK, ap->a_target, &np);
6552	if (!error)
6553		*ap->a_vpp = NFSTOV(np);
6554	return (error);
6555}
6556
6557int
6558nfs4_vnop_link(
6559	struct vnop_link_args /* {
6560		struct vnodeop_desc *a_desc;
6561		vnode_t a_vp;
6562		vnode_t a_tdvp;
6563		struct componentname *a_cnp;
6564		vfs_context_t a_context;
6565	} */ *ap)
6566{
6567	vfs_context_t ctx = ap->a_context;
6568	vnode_t vp = ap->a_vp;
6569	vnode_t tdvp = ap->a_tdvp;
6570	struct componentname *cnp = ap->a_cnp;
6571	int error = 0, lockerror = ENOENT, status;
6572	struct nfsmount *nmp;
6573	nfsnode_t np = VTONFS(vp);
6574	nfsnode_t tdnp = VTONFS(tdvp);
6575	int nfsvers, numops;
6576	u_int64_t xid, savedxid;
6577	struct nfsm_chain nmreq, nmrep;
6578	struct nfsreq_secinfo_args si;
6579
6580	if (vnode_mount(vp) != vnode_mount(tdvp))
6581		return (EXDEV);
6582
6583	nmp = VTONMP(vp);
6584	if (nfs_mount_gone(nmp))
6585		return (ENXIO);
6586	nfsvers = nmp->nm_vers;
6587	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
6588		return (EINVAL);
6589	if (tdnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
6590		return (EINVAL);
6591
6592	/*
6593	 * Push all writes to the server, so that the attribute cache
6594	 * doesn't get "out of sync" with the server.
6595	 * XXX There should be a better way!
6596	 */
6597	nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
6598
6599	if ((error = nfs_node_set_busy2(tdnp, np, vfs_context_thread(ctx))))
6600		return (error);
6601
6602	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
6603	nfsm_chain_null(&nmreq);
6604	nfsm_chain_null(&nmrep);
6605
6606	// PUTFH(SOURCE), SAVEFH, PUTFH(DIR), LINK, GETATTR(DIR), RESTOREFH, GETATTR
6607	numops = 7;
6608	nfsm_chain_build_alloc_init(error, &nmreq, 29 * NFSX_UNSIGNED + cnp->cn_namelen);
6609	nfsm_chain_add_compound_header(error, &nmreq, "link", numops);
6610	numops--;
6611	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
6612	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
6613	numops--;
6614	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
6615	numops--;
6616	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
6617	nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
6618	numops--;
6619	nfsm_chain_add_32(error, &nmreq, NFS_OP_LINK);
6620	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
6621	numops--;
6622	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
6623	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, tdnp);
6624	numops--;
6625	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
6626	numops--;
6627	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
6628	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
6629	nfsm_chain_build_done(error, &nmreq);
6630	nfsm_assert(error, (numops == 0), EPROTO);
6631	nfsmout_if(error);
6632	error = nfs_request(tdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
6633
6634	if ((lockerror = nfs_node_lock2(tdnp, np))) {
6635		error = lockerror;
6636		goto nfsmout;
6637	}
6638	nfsm_chain_skip_tag(error, &nmrep);
6639	nfsm_chain_get_32(error, &nmrep, numops);
6640	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
6641	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
6642	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
6643	nfsm_chain_op_check(error, &nmrep, NFS_OP_LINK);
6644	nfsm_chain_check_change_info(error, &nmrep, tdnp);
6645	/* directory attributes: if we don't get them, make sure to invalidate */
6646	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
6647	savedxid = xid;
6648	nfsm_chain_loadattr(error, &nmrep, tdnp, nfsvers, &xid);
6649	if (error)
6650		NATTRINVALIDATE(tdnp);
6651	/* link attributes: if we don't get them, make sure to invalidate */
6652	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
6653	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
6654	xid = savedxid;
6655	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
6656	if (error)
6657		NATTRINVALIDATE(np);
6658nfsmout:
6659	nfsm_chain_cleanup(&nmreq);
6660	nfsm_chain_cleanup(&nmrep);
6661	if (!lockerror)
6662		tdnp->n_flag |= NMODIFIED;
6663	/* Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */
6664	if (error == EEXIST)
6665		error = 0;
6666	if (!error && (tdnp->n_flag & NNEGNCENTRIES)) {
6667		tdnp->n_flag &= ~NNEGNCENTRIES;
6668		cache_purge_negatives(tdvp);
6669	}
6670	if (!lockerror)
6671		nfs_node_unlock2(tdnp, np);
6672	nfs_node_clear_busy2(tdnp, np);
6673	return (error);
6674}
6675
6676int
6677nfs4_vnop_rmdir(
6678	struct vnop_rmdir_args /* {
6679		struct vnodeop_desc *a_desc;
6680		vnode_t a_dvp;
6681		vnode_t a_vp;
6682		struct componentname *a_cnp;
6683		vfs_context_t a_context;
6684	} */ *ap)
6685{
6686	vfs_context_t ctx = ap->a_context;
6687	vnode_t vp = ap->a_vp;
6688	vnode_t dvp = ap->a_dvp;
6689	struct componentname *cnp = ap->a_cnp;
6690	struct nfsmount *nmp;
6691	int error = 0, namedattrs;
6692	nfsnode_t np = VTONFS(vp);
6693	nfsnode_t dnp = VTONFS(dvp);
6694	struct nfs_dulookup dul;
6695
6696	if (vnode_vtype(vp) != VDIR)
6697		return (EINVAL);
6698
6699	nmp = NFSTONMP(dnp);
6700	if (nfs_mount_gone(nmp))
6701		return (ENXIO);
6702	namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
6703
6704	if ((error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx))))
6705		return (error);
6706
6707	if (!namedattrs) {
6708		nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
6709		nfs_dulookup_start(&dul, dnp, ctx);
6710	}
6711
6712	error = nfs4_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
6713			vfs_context_thread(ctx), vfs_context_ucred(ctx));
6714
6715	nfs_name_cache_purge(dnp, np, cnp, ctx);
6716	/* nfs_getattr() will check changed and purge caches */
6717	nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
6718	if (!namedattrs)
6719		nfs_dulookup_finish(&dul, dnp, ctx);
6720	nfs_node_clear_busy2(dnp, np);
6721
6722	/*
6723	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
6724	 */
6725	if (error == ENOENT)
6726		error = 0;
6727	if (!error) {
6728		/*
6729		 * remove nfsnode from hash now so we can't accidentally find it
6730		 * again if another object gets created with the same filehandle
6731		 * before this vnode gets reclaimed
6732		 */
6733		lck_mtx_lock(nfs_node_hash_mutex);
6734		if (np->n_hflag & NHHASHED) {
6735			LIST_REMOVE(np, n_hash);
6736			np->n_hflag &= ~NHHASHED;
6737			FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
6738		}
6739		lck_mtx_unlock(nfs_node_hash_mutex);
6740	}
6741	return (error);
6742}
6743
6744/*
6745 * NFSv4 Named Attributes
6746 *
6747 * Both the extended attributes interface and the named streams interface
6748 * are backed by NFSv4 named attributes.  The implementations for both use
6749 * a common set of routines in an attempt to reduce code duplication, to
6750 * increase efficiency, to increase caching of both names and data, and to
6751 * confine the complexity.
6752 *
6753 * Each NFS node caches its named attribute directory's file handle.
6754 * The directory nodes for the named attribute directories are handled
6755 * exactly like regular directories (with a couple minor exceptions).
6756 * Named attribute nodes are also treated as much like regular files as
6757 * possible.
6758 *
6759 * Most of the heavy lifting is done by nfs4_named_attr_get().
6760 */
6761
6762/*
6763 * Get the given node's attribute directory node.
6764 * If !fetch, then only return a cached node.
6765 * Otherwise, we will attempt to fetch the node from the server.
6766 * (Note: the node should be marked busy.)
6767 */
6768nfsnode_t
6769nfs4_named_attr_dir_get(nfsnode_t np, int fetch, vfs_context_t ctx)
6770{
6771	nfsnode_t adnp = NULL;
6772	struct nfsmount *nmp;
6773	int error = 0, status, numops;
6774	struct nfsm_chain nmreq, nmrep;
6775	u_int64_t xid;
6776	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
6777	fhandle_t fh;
6778	struct nfs_vattr nvattr;
6779	struct componentname cn;
6780	struct nfsreq rq, *req = &rq;
6781	struct nfsreq_secinfo_args si;
6782
6783	nmp = NFSTONMP(np);
6784	if (nfs_mount_gone(nmp))
6785		return (NULL);
6786	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
6787		return (NULL);
6788
6789	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
6790	NVATTR_INIT(&nvattr);
6791	nfsm_chain_null(&nmreq);
6792	nfsm_chain_null(&nmrep);
6793
6794	bzero(&cn, sizeof(cn));
6795	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(_PATH_FORKSPECIFIER, const, char *); /* "/..namedfork/" */
6796	cn.cn_namelen = strlen(_PATH_FORKSPECIFIER);
6797	cn.cn_nameiop = LOOKUP;
6798
6799	if (np->n_attrdirfh) {
6800		// XXX can't set parent correctly (to np) yet
6801		error = nfs_nget(nmp->nm_mountp, NULL, &cn, np->n_attrdirfh+1, *np->n_attrdirfh,
6802				NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &adnp);
6803		if (adnp)
6804			goto nfsmout;
6805	}
6806	if (!fetch) {
6807		error = ENOENT;
6808		goto nfsmout;
6809	}
6810
6811	// PUTFH, OPENATTR, GETATTR
6812	numops = 3;
6813	nfsm_chain_build_alloc_init(error, &nmreq, 22 * NFSX_UNSIGNED);
6814	nfsm_chain_add_compound_header(error, &nmreq, "openattr", numops);
6815	numops--;
6816	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
6817	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
6818	numops--;
6819	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
6820	nfsm_chain_add_32(error, &nmreq, 0);
6821	numops--;
6822	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
6823	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
6824	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
6825	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
6826		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
6827	nfsm_chain_build_done(error, &nmreq);
6828	nfsm_assert(error, (numops == 0), EPROTO);
6829	nfsmout_if(error);
6830	error = nfs_request_async(np, NULL, &nmreq, NFSPROC4_COMPOUND,
6831			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
6832	if (!error)
6833		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
6834
6835	nfsm_chain_skip_tag(error, &nmrep);
6836	nfsm_chain_get_32(error, &nmrep, numops);
6837	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
6838	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
6839	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
6840	nfsmout_if(error);
6841	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
6842	nfsmout_if(error);
6843	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE) || !fh.fh_len) {
6844		error = ENOENT;
6845		goto nfsmout;
6846	}
6847	if (!np->n_attrdirfh || (*np->n_attrdirfh != fh.fh_len)) {
6848		/* (re)allocate attrdir fh buffer */
6849		if (np->n_attrdirfh)
6850			FREE(np->n_attrdirfh, M_TEMP);
6851		MALLOC(np->n_attrdirfh, u_char*, fh.fh_len+1, M_TEMP, M_WAITOK);
6852	}
6853	if (!np->n_attrdirfh) {
6854		error = ENOMEM;
6855		goto nfsmout;
6856	}
6857	/* cache the attrdir fh in the node */
6858	*np->n_attrdirfh = fh.fh_len;
6859	bcopy(fh.fh_data, np->n_attrdirfh+1, fh.fh_len);
6860	/* create node for attrdir */
6861	// XXX can't set parent correctly (to np) yet
6862	error = nfs_nget(NFSTOMP(np), NULL, &cn, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, 0, &adnp);
6863nfsmout:
6864	NVATTR_CLEANUP(&nvattr);
6865	nfsm_chain_cleanup(&nmreq);
6866	nfsm_chain_cleanup(&nmrep);
6867
6868	if (adnp) {
6869		/* sanity check that this node is an attribute directory */
6870		if (adnp->n_vattr.nva_type != VDIR)
6871			error = EINVAL;
6872		if (!(adnp->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
6873			error = EINVAL;
6874		nfs_node_unlock(adnp);
6875		if (error)
6876			vnode_put(NFSTOV(adnp));
6877	}
6878	return (error ? NULL : adnp);
6879}
6880
6881/*
6882 * Get the given node's named attribute node for the name given.
6883 *
6884 * In an effort to increase the performance of named attribute access, we try
6885 * to reduce server requests by doing the following:
6886 *
6887 * - cache the node's named attribute directory file handle in the node
6888 * - maintain a directory vnode for the attribute directory
6889 * - use name cache entries (positive and negative) to speed up lookups
6890 * - optionally open the named attribute (with the given accessMode) in the same RPC
6891 * - combine attribute directory retrieval with the lookup/open RPC
6892 * - optionally prefetch the named attribute's first block of data in the same RPC
6893 *
6894 * Also, in an attempt to reduce the number of copies/variations of this code,
6895 * parts of the RPC building/processing code are conditionalized on what is
6896 * needed for any particular request (openattr, lookup vs. open, read).
6897 *
6898 * Note that because we may not have the attribute directory node when we start
6899 * the lookup/open, we lock both the node and the attribute directory node.
6900 */
6901
6902#define NFS_GET_NAMED_ATTR_CREATE		0x1
6903#define NFS_GET_NAMED_ATTR_CREATE_GUARDED	0x2
6904#define NFS_GET_NAMED_ATTR_TRUNCATE		0x4
6905#define NFS_GET_NAMED_ATTR_PREFETCH		0x8
6906
6907int
6908nfs4_named_attr_get(
6909	nfsnode_t np,
6910	struct componentname *cnp,
6911	uint32_t accessMode,
6912	int flags,
6913	vfs_context_t ctx,
6914	nfsnode_t *anpp,
6915	struct nfs_open_file **nofpp)
6916{
6917	struct nfsmount *nmp;
6918	int error = 0, open_error = EIO;
6919	int inuse = 0, adlockerror = ENOENT, busyerror = ENOENT, adbusyerror = ENOENT, nofpbusyerror = ENOENT;
6920	int create, guarded, prefetch, truncate, noopbusy = 0;
6921	int open, status, numops, hadattrdir, negnamecache;
6922	struct nfs_vattr nvattr;
6923	struct vnode_attr vattr;
6924	nfsnode_t adnp = NULL, anp = NULL;
6925	vnode_t avp = NULL;
6926	u_int64_t xid, savedxid = 0;
6927	struct nfsm_chain nmreq, nmrep;
6928	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
6929	uint32_t denyMode, rflags, delegation, recall, eof, rlen, retlen;
6930	nfs_stateid stateid, dstateid;
6931	fhandle_t fh;
6932	struct nfs_open_owner *noop = NULL;
6933	struct nfs_open_file *newnofp = NULL, *nofp = NULL;
6934	struct vnop_access_args naa;
6935	thread_t thd;
6936	kauth_cred_t cred;
6937	struct timeval now;
6938	char sbuf[64], *s;
6939	uint32_t ace_type, ace_flags, ace_mask, len, slen;
6940	struct kauth_ace ace;
6941	struct nfsreq rq, *req = &rq;
6942	struct nfsreq_secinfo_args si;
6943
6944	*anpp = NULL;
6945	fh.fh_len = 0;
6946	rflags = delegation = recall = eof = rlen = retlen = 0;
6947	ace.ace_flags = 0;
6948	s = sbuf;
6949	slen = sizeof(sbuf);
6950
6951	nmp = NFSTONMP(np);
6952	if (nfs_mount_gone(nmp))
6953		return (ENXIO);
6954	NVATTR_INIT(&nvattr);
6955	negnamecache = !NMFLAG(nmp, NONEGNAMECACHE);
6956	thd = vfs_context_thread(ctx);
6957	cred = vfs_context_ucred(ctx);
6958	create = (flags & NFS_GET_NAMED_ATTR_CREATE) ? NFS_OPEN_CREATE : NFS_OPEN_NOCREATE;
6959	guarded = (flags & NFS_GET_NAMED_ATTR_CREATE_GUARDED) ? NFS_CREATE_GUARDED : NFS_CREATE_UNCHECKED;
6960	truncate = (flags & NFS_GET_NAMED_ATTR_TRUNCATE);
6961	prefetch = (flags & NFS_GET_NAMED_ATTR_PREFETCH);
6962
6963	if (!create) {
6964		error = nfs_getattr(np, &nvattr, ctx, NGA_CACHED);
6965		if (error)
6966			return (error);
6967		if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
6968		    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
6969			return (ENOATTR);
6970	} else if (accessMode == NFS_OPEN_SHARE_ACCESS_NONE) {
6971		/* shouldn't happen... but just be safe */
6972		printf("nfs4_named_attr_get: create with no access %s\n", cnp->cn_nameptr);
6973		accessMode = NFS_OPEN_SHARE_ACCESS_READ;
6974	}
6975	open = (accessMode != NFS_OPEN_SHARE_ACCESS_NONE);
6976	if (open) {
6977		/*
6978		 * We're trying to open the file.
6979		 * We'll create/open it with the given access mode,
6980		 * and set NFS_OPEN_FILE_CREATE.
6981		 */
6982		denyMode = NFS_OPEN_SHARE_DENY_NONE;
6983		if (prefetch && guarded)
6984			prefetch = 0;  /* no sense prefetching data that can't be there */
6985
6986		noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
6987		if (!noop)
6988			return (ENOMEM);
6989	}
6990
6991	if ((error = busyerror = nfs_node_set_busy(np, vfs_context_thread(ctx))))
6992		return (error);
6993
6994	adnp = nfs4_named_attr_dir_get(np, 0, ctx);
6995	hadattrdir = (adnp != NULL);
6996	if (prefetch) {
6997		microuptime(&now);
6998		/* use the special state ID because we don't have a real one to send */
6999		stateid.seqid = stateid.other[0] = stateid.other[1] = stateid.other[2] = 0;
7000		rlen = MIN(nmp->nm_rsize, nmp->nm_biosize);
7001	}
7002	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
7003	nfsm_chain_null(&nmreq);
7004	nfsm_chain_null(&nmrep);
7005
7006	if (hadattrdir) {
7007		if ((error = adbusyerror = nfs_node_set_busy(adnp, vfs_context_thread(ctx))))
7008			goto nfsmout;
7009		/* nfs_getattr() will check changed and purge caches */
7010		error = nfs_getattr(adnp, NULL, ctx, NGA_CACHED);
7011		nfsmout_if(error);
7012		error = cache_lookup(NFSTOV(adnp), &avp, cnp);
7013		switch (error) {
7014		case ENOENT:
7015			/* negative cache entry */
7016			goto nfsmout;
7017		case 0:
7018			/* cache miss */
7019			/* try dir buf cache lookup */
7020			error = nfs_dir_buf_cache_lookup(adnp, &anp, cnp, ctx, 0);
7021			if (!error && anp) {
7022				/* dir buf cache hit */
7023				*anpp = anp;
7024				error = -1;
7025			}
7026			if (error != -1) /* cache miss */
7027				break;
7028			/* FALLTHROUGH */
7029		case -1:
7030			/* cache hit, not really an error */
7031			OSAddAtomic64(1, &nfsstats.lookupcache_hits);
7032			if (!anp && avp)
7033				*anpp = anp = VTONFS(avp);
7034
7035			nfs_node_clear_busy(adnp);
7036			adbusyerror = ENOENT;
7037
7038			/* check for directory access */
7039			naa.a_desc = &vnop_access_desc;
7040			naa.a_vp = NFSTOV(adnp);
7041			naa.a_action = KAUTH_VNODE_SEARCH;
7042			naa.a_context = ctx;
7043
7044			/* compute actual success/failure based on accessibility */
7045			error = nfs_vnop_access(&naa);
7046			/* FALLTHROUGH */
7047		default:
7048			/* we either found it, or hit an error */
7049			if (!error && guarded) {
7050				/* found cached entry but told not to use it */
7051				error = EEXIST;
7052				vnode_put(NFSTOV(anp));
7053				*anpp = anp = NULL;
7054			}
7055			/* we're done if error or we don't need to open */
7056			if (error || !open)
7057				goto nfsmout;
7058			/* no error and we need to open... */
7059		}
7060	}
7061
7062	if (open) {
7063restart:
7064		error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
7065		if (error) {
7066			nfs_open_owner_rele(noop);
7067			noop = NULL;
7068			goto nfsmout;
7069		}
7070		inuse = 1;
7071
7072		/* grab an open file - possibly provisional/nodeless if cache_lookup() failed */
7073		error = nfs_open_file_find(anp, noop, &newnofp, 0, 0, 1);
7074		if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_LOST)) {
7075			printf("nfs4_named_attr_get: LOST %d %s\n", kauth_cred_getuid(noop->noo_cred), cnp->cn_nameptr);
7076			error = EIO;
7077		}
7078		if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
7079			nfs_mount_state_in_use_end(nmp, 0);
7080			error = nfs4_reopen(newnofp, vfs_context_thread(ctx));
7081			nfs_open_file_destroy(newnofp);
7082			newnofp = NULL;
7083			if (!error)
7084				goto restart;
7085		}
7086		if (!error)
7087			error = nfs_open_file_set_busy(newnofp, vfs_context_thread(ctx));
7088		if (error) {
7089			if (newnofp)
7090				nfs_open_file_destroy(newnofp);
7091			newnofp = NULL;
7092			goto nfsmout;
7093		}
7094		if (anp) {
7095			/*
7096			 * We already have the node.  So we just need to open
7097			 * it - which we may be able to do with a delegation.
7098			 */
7099			open_error = error = nfs4_open(anp, newnofp, accessMode, denyMode, ctx);
7100			if (!error) {
7101				/* open succeeded, so our open file is no longer temporary */
7102				nofp = newnofp;
7103				nofpbusyerror = 0;
7104				newnofp = NULL;
7105				if (nofpp)
7106					*nofpp = nofp;
7107			}
7108			goto nfsmout;
7109		}
7110	}
7111
7112	/*
7113	 * We either don't have the attrdir or we didn't find the attribute
7114	 * in the name cache, so we need to talk to the server.
7115	 *
7116	 * If we don't have the attrdir, we'll need to ask the server for that too.
7117	 * If the caller is requesting that the attribute be created, we need to
7118	 * make sure the attrdir is created.
7119	 * The caller may also request that the first block of an existing attribute
7120	 * be retrieved at the same time.
7121	 */
7122
7123	if (open) {
7124		/* need to mark the open owner busy during the RPC */
7125		if ((error = nfs_open_owner_set_busy(noop, thd)))
7126			goto nfsmout;
7127		noopbusy = 1;
7128	}
7129
7130	/*
7131	 * We'd like to get updated post-open/lookup attributes for the
7132	 * directory and we may also want to prefetch some data via READ.
7133	 * We'd like the READ results to be last so that we can leave the
7134	 * data in the mbufs until the end.
7135	 *
7136	 * At a minimum we're sending: PUTFH, LOOKUP/OPEN, GETATTR, PUTFH, GETATTR
7137	 */
7138	numops = 5;
7139	if (!hadattrdir)
7140		numops += 3;	// also sending: OPENATTR, GETATTR, OPENATTR
7141	if (prefetch)
7142		numops += 4;	// also sending: SAVEFH, RESTOREFH, NVERIFY, READ
7143	nfsm_chain_build_alloc_init(error, &nmreq, 64 * NFSX_UNSIGNED + cnp->cn_namelen);
7144	nfsm_chain_add_compound_header(error, &nmreq, "getnamedattr", numops);
7145	if (hadattrdir) {
7146		numops--;
7147		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
7148		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, adnp->n_fhp, adnp->n_fhsize);
7149	} else {
7150		numops--;
7151		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
7152		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
7153		numops--;
7154		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
7155		nfsm_chain_add_32(error, &nmreq, create ? 1 : 0);
7156		numops--;
7157		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
7158		NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
7159		NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
7160		nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
7161			NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
7162	}
7163	if (open) {
7164		numops--;
7165		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
7166		nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
7167		nfsm_chain_add_32(error, &nmreq, accessMode);
7168		nfsm_chain_add_32(error, &nmreq, denyMode);
7169		nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid);
7170		nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
7171		nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred));
7172		nfsm_chain_add_32(error, &nmreq, create);
7173		if (create) {
7174			nfsm_chain_add_32(error, &nmreq, guarded);
7175			VATTR_INIT(&vattr);
7176			if (truncate)
7177				VATTR_SET(&vattr, va_data_size, 0);
7178			nfsm_chain_add_fattr4(error, &nmreq, &vattr, nmp);
7179		}
7180		nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_NULL);
7181		nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
7182	} else {
7183		numops--;
7184		nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
7185		nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
7186	}
7187	numops--;
7188	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
7189	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
7190	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
7191	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
7192		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
7193	if (prefetch) {
7194		numops--;
7195		nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
7196	}
7197	if (hadattrdir) {
7198		numops--;
7199		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
7200		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, adnp->n_fhp, adnp->n_fhsize);
7201	} else {
7202		numops--;
7203		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
7204		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
7205		numops--;
7206		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
7207		nfsm_chain_add_32(error, &nmreq, 0);
7208	}
7209	numops--;
7210	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
7211	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
7212		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
7213	if (prefetch) {
7214		numops--;
7215		nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
7216		numops--;
7217		nfsm_chain_add_32(error, &nmreq, NFS_OP_NVERIFY);
7218		VATTR_INIT(&vattr);
7219		VATTR_SET(&vattr, va_data_size, 0);
7220		nfsm_chain_add_fattr4(error, &nmreq, &vattr, nmp);
7221		numops--;
7222		nfsm_chain_add_32(error, &nmreq, NFS_OP_READ);
7223		nfsm_chain_add_stateid(error, &nmreq, &stateid);
7224		nfsm_chain_add_64(error, &nmreq, 0);
7225		nfsm_chain_add_32(error, &nmreq, rlen);
7226	}
7227	nfsm_chain_build_done(error, &nmreq);
7228	nfsm_assert(error, (numops == 0), EPROTO);
7229	nfsmout_if(error);
7230	error = nfs_request_async(hadattrdir ? adnp : np, NULL, &nmreq, NFSPROC4_COMPOUND,
7231			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, open ? R_NOINTR: 0, NULL, &req);
7232	if (!error)
7233		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
7234
7235	if (hadattrdir && ((adlockerror = nfs_node_lock(adnp))))
7236		error = adlockerror;
7237	savedxid = xid;
7238	nfsm_chain_skip_tag(error, &nmrep);
7239	nfsm_chain_get_32(error, &nmrep, numops);
7240	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
7241	if (!hadattrdir) {
7242		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
7243		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
7244		nfsmout_if(error);
7245		error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
7246		nfsmout_if(error);
7247		if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE) && fh.fh_len) {
7248			if (!np->n_attrdirfh || (*np->n_attrdirfh != fh.fh_len)) {
7249				/* (re)allocate attrdir fh buffer */
7250				if (np->n_attrdirfh)
7251					FREE(np->n_attrdirfh, M_TEMP);
7252				MALLOC(np->n_attrdirfh, u_char*, fh.fh_len+1, M_TEMP, M_WAITOK);
7253			}
7254			if (np->n_attrdirfh) {
7255				/* remember the attrdir fh in the node */
7256				*np->n_attrdirfh = fh.fh_len;
7257				bcopy(fh.fh_data, np->n_attrdirfh+1, fh.fh_len);
7258				/* create busied node for attrdir */
7259				struct componentname cn;
7260				bzero(&cn, sizeof(cn));
7261				cn.cn_nameptr = __CAST_AWAY_QUALIFIER(_PATH_FORKSPECIFIER, const, char *); /* "/..namedfork/" */
7262				cn.cn_namelen = strlen(_PATH_FORKSPECIFIER);
7263				cn.cn_nameiop = LOOKUP;
7264				// XXX can't set parent correctly (to np) yet
7265				error = nfs_nget(NFSTOMP(np), NULL, &cn, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, 0, &adnp);
7266				if (!error) {
7267					adlockerror = 0;
7268					/* set the node busy */
7269					SET(adnp->n_flag, NBUSY);
7270					adbusyerror = 0;
7271				}
7272				/* if no adnp, oh well... */
7273				error = 0;
7274			}
7275		}
7276		NVATTR_CLEANUP(&nvattr);
7277		fh.fh_len = 0;
7278	}
7279	if (open) {
7280		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
7281		nfs_owner_seqid_increment(noop, NULL, error);
7282		nfsm_chain_get_stateid(error, &nmrep, &newnofp->nof_stateid);
7283		nfsm_chain_check_change_info(error, &nmrep, adnp);
7284		nfsm_chain_get_32(error, &nmrep, rflags);
7285		bmlen = NFS_ATTR_BITMAP_LEN;
7286		nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
7287		nfsm_chain_get_32(error, &nmrep, delegation);
7288		if (!error)
7289			switch (delegation) {
7290			case NFS_OPEN_DELEGATE_NONE:
7291				break;
7292			case NFS_OPEN_DELEGATE_READ:
7293			case NFS_OPEN_DELEGATE_WRITE:
7294				nfsm_chain_get_stateid(error, &nmrep, &dstateid);
7295				nfsm_chain_get_32(error, &nmrep, recall);
7296				if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
7297					nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
7298				/* if we have any trouble accepting the ACE, just invalidate it */
7299				ace_type = ace_flags = ace_mask = len = 0;
7300				nfsm_chain_get_32(error, &nmrep, ace_type);
7301				nfsm_chain_get_32(error, &nmrep, ace_flags);
7302				nfsm_chain_get_32(error, &nmrep, ace_mask);
7303				nfsm_chain_get_32(error, &nmrep, len);
7304				ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
7305				ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
7306				ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
7307				if (!error && (len >= slen)) {
7308					MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
7309					if (s)
7310						slen = len+1;
7311					else
7312						ace.ace_flags = 0;
7313				}
7314				if (s)
7315					nfsm_chain_get_opaque(error, &nmrep, len, s);
7316				else
7317					nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
7318				if (!error && s) {
7319					s[len] = '\0';
7320					if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
7321						ace.ace_flags = 0;
7322				}
7323				if (error || !s)
7324					ace.ace_flags = 0;
7325				if (s && (s != sbuf))
7326					FREE(s, M_TEMP);
7327				break;
7328			default:
7329				error = EBADRPC;
7330				break;
7331			}
7332		/* At this point if we have no error, the object was created/opened. */
7333		open_error = error;
7334	} else {
7335		nfsm_chain_op_check(error, &nmrep, NFS_OP_LOOKUP);
7336	}
7337	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
7338	nfsmout_if(error);
7339	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
7340	nfsmout_if(error);
7341	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE) || !fh.fh_len) {
7342		error = EIO;
7343		goto nfsmout;
7344	}
7345	if (prefetch)
7346		nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
7347	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
7348	if (!hadattrdir)
7349		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
7350	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
7351	nfsmout_if(error);
7352	xid = savedxid;
7353	nfsm_chain_loadattr(error, &nmrep, adnp, nmp->nm_vers, &xid);
7354	nfsmout_if(error);
7355
7356	if (open) {
7357		if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
7358			newnofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
7359		if (rflags & NFS_OPEN_RESULT_CONFIRM) {
7360			if (adnp) {
7361				nfs_node_unlock(adnp);
7362				adlockerror = ENOENT;
7363			}
7364			NVATTR_CLEANUP(&nvattr);
7365			error = nfs4_open_confirm_rpc(nmp, adnp ? adnp : np, fh.fh_data, fh.fh_len, noop, &newnofp->nof_stateid, thd, cred, &nvattr, &xid);
7366			nfsmout_if(error);
7367			savedxid = xid;
7368			if ((adlockerror = nfs_node_lock(adnp)))
7369				error = adlockerror;
7370		}
7371	}
7372
7373nfsmout:
7374	if (open && adnp && !adlockerror) {
7375		if (!open_error && (adnp->n_flag & NNEGNCENTRIES)) {
7376			adnp->n_flag &= ~NNEGNCENTRIES;
7377			cache_purge_negatives(NFSTOV(adnp));
7378		}
7379		adnp->n_flag |= NMODIFIED;
7380		nfs_node_unlock(adnp);
7381		adlockerror = ENOENT;
7382		nfs_getattr(adnp, NULL, ctx, NGA_CACHED);
7383	}
7384	if (adnp && !adlockerror && (error == ENOENT) &&
7385	    (cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != CREATE) && negnamecache) {
7386		/* add a negative entry in the name cache */
7387		cache_enter(NFSTOV(adnp), NULL, cnp);
7388		adnp->n_flag |= NNEGNCENTRIES;
7389	}
7390	if (adnp && !adlockerror) {
7391		nfs_node_unlock(adnp);
7392		adlockerror = ENOENT;
7393	}
7394	if (!error && !anp && fh.fh_len) {
7395		/* create the vnode with the filehandle and attributes */
7396		xid = savedxid;
7397		error = nfs_nget(NFSTOMP(np), adnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &anp);
7398		if (!error) {
7399			*anpp = anp;
7400			nfs_node_unlock(anp);
7401		}
7402		if (!error && open) {
7403			nfs_open_file_add_open(newnofp, accessMode, denyMode, 0);
7404			/* After we have a node, add our open file struct to the node */
7405			nofp = newnofp;
7406			error = nfs_open_file_find_internal(anp, noop, &nofp, 0, 0, 0);
7407			if (error) {
7408				/* This shouldn't happen, because we passed in a new nofp to use. */
7409				printf("nfs_open_file_find_internal failed! %d\n", error);
7410				nofp = NULL;
7411			} else if (nofp != newnofp) {
7412				/*
7413				 * Hmm... an open file struct already exists.
7414				 * Mark the existing one busy and merge our open into it.
7415				 * Then destroy the one we created.
7416				 * Note: there's no chance of an open confict because the
7417				 * open has already been granted.
7418				 */
7419				nofpbusyerror = nfs_open_file_set_busy(nofp, NULL);
7420				nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
7421				nofp->nof_stateid = newnofp->nof_stateid;
7422				if (newnofp->nof_flags & NFS_OPEN_FILE_POSIXLOCK)
7423					nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
7424				nfs_open_file_clear_busy(newnofp);
7425				nfs_open_file_destroy(newnofp);
7426				newnofp = NULL;
7427			}
7428			if (!error) {
7429				newnofp = NULL;
7430				nofpbusyerror = 0;
7431				/* mark the node as holding a create-initiated open */
7432				nofp->nof_flags |= NFS_OPEN_FILE_CREATE;
7433				nofp->nof_creator = current_thread();
7434				if (nofpp)
7435					*nofpp = nofp;
7436			}
7437		}
7438	}
7439	NVATTR_CLEANUP(&nvattr);
7440	if (open && ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE))) {
7441		if (!error && anp && !recall) {
7442			/* stuff the delegation state in the node */
7443			lck_mtx_lock(&anp->n_openlock);
7444			anp->n_openflags &= ~N_DELEG_MASK;
7445			anp->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
7446			anp->n_dstateid = dstateid;
7447			anp->n_dace = ace;
7448			if (anp->n_dlink.tqe_next == NFSNOLIST) {
7449				lck_mtx_lock(&nmp->nm_lock);
7450				if (anp->n_dlink.tqe_next == NFSNOLIST)
7451					TAILQ_INSERT_TAIL(&nmp->nm_delegations, anp, n_dlink);
7452				lck_mtx_unlock(&nmp->nm_lock);
7453			}
7454			lck_mtx_unlock(&anp->n_openlock);
7455		} else {
7456			/* give the delegation back */
7457			if (anp) {
7458				if (NFS_CMPFH(anp, fh.fh_data, fh.fh_len)) {
7459					/* update delegation state and return it */
7460					lck_mtx_lock(&anp->n_openlock);
7461					anp->n_openflags &= ~N_DELEG_MASK;
7462					anp->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
7463					anp->n_dstateid = dstateid;
7464					anp->n_dace = ace;
7465					if (anp->n_dlink.tqe_next == NFSNOLIST) {
7466						lck_mtx_lock(&nmp->nm_lock);
7467						if (anp->n_dlink.tqe_next == NFSNOLIST)
7468							TAILQ_INSERT_TAIL(&nmp->nm_delegations, anp, n_dlink);
7469						lck_mtx_unlock(&nmp->nm_lock);
7470					}
7471					lck_mtx_unlock(&anp->n_openlock);
7472					/* don't need to send a separate delegreturn for fh */
7473					fh.fh_len = 0;
7474				}
7475				/* return anp's current delegation */
7476				nfs4_delegation_return(anp, 0, thd, cred);
7477			}
7478			if (fh.fh_len) /* return fh's delegation if it wasn't for anp */
7479				nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, 0, thd, cred);
7480		}
7481	}
7482	if (open) {
7483		if (newnofp) {
7484			/* need to cleanup our temporary nofp */
7485			nfs_open_file_clear_busy(newnofp);
7486			nfs_open_file_destroy(newnofp);
7487			newnofp = NULL;
7488		} else if (nofp && !nofpbusyerror) {
7489			nfs_open_file_clear_busy(nofp);
7490			nofpbusyerror = ENOENT;
7491		}
7492		if (inuse && nfs_mount_state_in_use_end(nmp, error)) {
7493			inuse = 0;
7494			nofp = newnofp = NULL;
7495			rflags = delegation = recall = eof = rlen = retlen = 0;
7496			ace.ace_flags = 0;
7497			s = sbuf;
7498			slen = sizeof(sbuf);
7499			nfsm_chain_cleanup(&nmreq);
7500			nfsm_chain_cleanup(&nmrep);
7501			if (anp) {
7502				vnode_put(NFSTOV(anp));
7503				*anpp = anp = NULL;
7504			}
7505			hadattrdir = (adnp != NULL);
7506			if (noopbusy) {
7507				nfs_open_owner_clear_busy(noop);
7508				noopbusy = 0;
7509			}
7510			goto restart;
7511		}
7512		if (noop) {
7513			if (noopbusy) {
7514				nfs_open_owner_clear_busy(noop);
7515				noopbusy = 0;
7516			}
7517			nfs_open_owner_rele(noop);
7518		}
7519	}
7520	if (!error && prefetch && nmrep.nmc_mhead) {
7521		nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
7522		nfsm_chain_op_check(error, &nmrep, NFS_OP_NVERIFY);
7523		nfsm_chain_op_check(error, &nmrep, NFS_OP_READ);
7524		nfsm_chain_get_32(error, &nmrep, eof);
7525		nfsm_chain_get_32(error, &nmrep, retlen);
7526		if (!error && anp) {
7527			/*
7528			 * There can be one problem with doing the prefetch.
7529			 * Because we don't have the node before we start the RPC, we
7530			 * can't have the buffer busy while the READ is performed.
7531			 * So there is a chance that other I/O occured on the same
7532			 * range of data while we were performing this RPC.  If that
7533			 * happens, then it's possible the data we have in the READ
7534			 * response is no longer up to date.
7535			 * Once we have the node and the buffer, we need to make sure
7536			 * that there's no chance we could be putting stale data in
7537			 * the buffer.
7538			 * So, we check if the range read is dirty or if any I/O may
7539			 * have occured on it while we were performing our RPC.
7540			 */
7541			struct nfsbuf *bp = NULL;
7542			int lastpg;
7543			uint32_t pagemask;
7544
7545			retlen = MIN(retlen, rlen);
7546
7547			/* check if node needs size update or invalidation */
7548			if (ISSET(anp->n_flag, NUPDATESIZE))
7549				nfs_data_update_size(anp, 0);
7550			if (!(error = nfs_node_lock(anp))) {
7551				if (anp->n_flag & NNEEDINVALIDATE) {
7552					anp->n_flag &= ~NNEEDINVALIDATE;
7553					nfs_node_unlock(anp);
7554					error = nfs_vinvalbuf(NFSTOV(anp), V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
7555					if (!error) /* lets play it safe and just drop the data */
7556						error = EIO;
7557				} else {
7558					nfs_node_unlock(anp);
7559				}
7560			}
7561
7562			/* calculate page mask for the range of data read */
7563			lastpg = (trunc_page_32(retlen) - 1) / PAGE_SIZE;
7564			pagemask = ((1 << (lastpg + 1)) - 1);
7565
7566			if (!error)
7567				error = nfs_buf_get(anp, 0, nmp->nm_biosize, thd, NBLK_READ|NBLK_NOWAIT, &bp);
7568			/* don't save the data if dirty or potential I/O conflict */
7569			if (!error && bp && !bp->nb_dirtyoff && !(bp->nb_dirty & pagemask) &&
7570			    timevalcmp(&anp->n_lastio, &now, <)) {
7571				OSAddAtomic64(1, &nfsstats.read_bios);
7572				CLR(bp->nb_flags, (NB_DONE|NB_ASYNC));
7573				SET(bp->nb_flags, NB_READ);
7574				NFS_BUF_MAP(bp);
7575				nfsm_chain_get_opaque(error, &nmrep, retlen, bp->nb_data);
7576				if (error) {
7577					bp->nb_error = error;
7578					SET(bp->nb_flags, NB_ERROR);
7579				} else {
7580					bp->nb_offio = 0;
7581					bp->nb_endio = rlen;
7582					if ((retlen > 0) && (bp->nb_endio < (int)retlen))
7583						bp->nb_endio = retlen;
7584					if (eof || (retlen == 0)) {
7585						/* zero out the remaining data (up to EOF) */
7586						off_t rpcrem, eofrem, rem;
7587						rpcrem = (rlen - retlen);
7588						eofrem = anp->n_size - (NBOFF(bp) + retlen);
7589						rem = (rpcrem < eofrem) ? rpcrem : eofrem;
7590						if (rem > 0)
7591							bzero(bp->nb_data + retlen, rem);
7592					} else if ((retlen < rlen) && !ISSET(bp->nb_flags, NB_ERROR)) {
7593						/* ugh... short read ... just invalidate for now... */
7594						SET(bp->nb_flags, NB_INVAL);
7595					}
7596				}
7597				nfs_buf_read_finish(bp);
7598				microuptime(&anp->n_lastio);
7599			}
7600			if (bp)
7601				nfs_buf_release(bp, 1);
7602		}
7603		error = 0; /* ignore any transient error in processing the prefetch */
7604	}
7605	if (adnp && !adbusyerror) {
7606		nfs_node_clear_busy(adnp);
7607		adbusyerror = ENOENT;
7608	}
7609	if (!busyerror) {
7610		nfs_node_clear_busy(np);
7611		busyerror = ENOENT;
7612	}
7613	if (adnp)
7614		vnode_put(NFSTOV(adnp));
7615	if (error && *anpp) {
7616		vnode_put(NFSTOV(*anpp));
7617		*anpp = NULL;
7618	}
7619	nfsm_chain_cleanup(&nmreq);
7620	nfsm_chain_cleanup(&nmrep);
7621	return (error);
7622}
7623
7624/*
7625 * Remove a named attribute.
7626 */
7627int
7628nfs4_named_attr_remove(nfsnode_t np, nfsnode_t anp, const char *name, vfs_context_t ctx)
7629{
7630	nfsnode_t adnp = NULL;
7631	struct nfsmount *nmp;
7632	struct componentname cn;
7633	struct vnop_remove_args vra;
7634	int error, putanp = 0;
7635
7636	nmp = NFSTONMP(np);
7637	if (nfs_mount_gone(nmp))
7638		return (ENXIO);
7639
7640	bzero(&cn, sizeof(cn));
7641	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(name, const, char *);
7642	cn.cn_namelen = strlen(name);
7643	cn.cn_nameiop = DELETE;
7644	cn.cn_flags = 0;
7645
7646	if (!anp) {
7647		error = nfs4_named_attr_get(np, &cn, NFS_OPEN_SHARE_ACCESS_NONE,
7648				0, ctx, &anp, NULL);
7649		if ((!error && !anp) || (error == ENOATTR))
7650			error = ENOENT;
7651		if (error) {
7652			if (anp) {
7653				vnode_put(NFSTOV(anp));
7654				anp = NULL;
7655			}
7656			goto out;
7657		}
7658		putanp = 1;
7659	}
7660
7661	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx))))
7662		goto out;
7663	adnp = nfs4_named_attr_dir_get(np, 1, ctx);
7664	nfs_node_clear_busy(np);
7665	if (!adnp) {
7666		error = ENOENT;
7667		goto out;
7668	}
7669
7670	vra.a_desc = &vnop_remove_desc;
7671	vra.a_dvp = NFSTOV(adnp);
7672	vra.a_vp = NFSTOV(anp);
7673	vra.a_cnp = &cn;
7674	vra.a_flags = 0;
7675	vra.a_context = ctx;
7676	error = nfs_vnop_remove(&vra);
7677out:
7678	if (adnp)
7679		vnode_put(NFSTOV(adnp));
7680	if (putanp)
7681		vnode_put(NFSTOV(anp));
7682	return (error);
7683}
7684
7685int
7686nfs4_vnop_getxattr(
7687	struct vnop_getxattr_args /* {
7688		struct vnodeop_desc *a_desc;
7689		vnode_t a_vp;
7690		const char * a_name;
7691		uio_t a_uio;
7692		size_t *a_size;
7693		int a_options;
7694		vfs_context_t a_context;
7695	} */ *ap)
7696{
7697	vfs_context_t ctx = ap->a_context;
7698	struct nfsmount *nmp;
7699	struct nfs_vattr nvattr;
7700	struct componentname cn;
7701	nfsnode_t anp;
7702	int error = 0, isrsrcfork;
7703
7704	nmp = VTONMP(ap->a_vp);
7705	if (nfs_mount_gone(nmp))
7706		return (ENXIO);
7707
7708	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
7709		return (ENOTSUP);
7710	error = nfs_getattr(VTONFS(ap->a_vp), &nvattr, ctx, NGA_CACHED);
7711	if (error)
7712		return (error);
7713	if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
7714	    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
7715		return (ENOATTR);
7716
7717	bzero(&cn, sizeof(cn));
7718	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(ap->a_name, const, char *);
7719	cn.cn_namelen = strlen(ap->a_name);
7720	cn.cn_nameiop = LOOKUP;
7721	cn.cn_flags = MAKEENTRY;
7722
7723	/* we'll normally try to prefetch data for xattrs... the resource fork is really a stream */
7724	isrsrcfork = (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0);
7725
7726	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_NONE,
7727			!isrsrcfork ? NFS_GET_NAMED_ATTR_PREFETCH : 0, ctx, &anp, NULL);
7728	if ((!error && !anp) || (error == ENOENT))
7729		error = ENOATTR;
7730	if (!error) {
7731		if (ap->a_uio)
7732			error = nfs_bioread(anp, ap->a_uio, 0, ctx);
7733		else
7734			*ap->a_size = anp->n_size;
7735	}
7736	if (anp)
7737		vnode_put(NFSTOV(anp));
7738	return (error);
7739}
7740
7741int
7742nfs4_vnop_setxattr(
7743	struct vnop_setxattr_args /* {
7744		struct vnodeop_desc *a_desc;
7745		vnode_t a_vp;
7746		const char * a_name;
7747		uio_t a_uio;
7748		int a_options;
7749		vfs_context_t a_context;
7750	} */ *ap)
7751{
7752	vfs_context_t ctx = ap->a_context;
7753	int options = ap->a_options;
7754	uio_t uio = ap->a_uio;
7755	const char *name = ap->a_name;
7756	struct nfsmount *nmp;
7757	struct componentname cn;
7758	nfsnode_t anp = NULL;
7759	int error = 0, closeerror = 0, flags, isrsrcfork, isfinderinfo, empty = 0, i;
7760#define FINDERINFOSIZE 32
7761	uint8_t finfo[FINDERINFOSIZE];
7762	uint32_t *finfop;
7763	struct nfs_open_file *nofp = NULL;
7764	char uio_buf [ UIO_SIZEOF(1) ];
7765	uio_t auio;
7766	struct vnop_write_args vwa;
7767
7768	nmp = VTONMP(ap->a_vp);
7769	if (nfs_mount_gone(nmp))
7770		return (ENXIO);
7771
7772	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
7773		return (ENOTSUP);
7774
7775	if ((options & XATTR_CREATE) && (options & XATTR_REPLACE))
7776		return (EINVAL);
7777
7778	/* XXX limitation based on need to back up uio on short write */
7779	if (uio_iovcnt(uio) > 1) {
7780		printf("nfs4_vnop_setxattr: iovcnt > 1\n");
7781		return (EINVAL);
7782	}
7783
7784	bzero(&cn, sizeof(cn));
7785	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(name, const, char *);
7786	cn.cn_namelen = strlen(name);
7787	cn.cn_nameiop = CREATE;
7788	cn.cn_flags = MAKEENTRY;
7789
7790	isfinderinfo = (bcmp(name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0);
7791	isrsrcfork = isfinderinfo ? 0 : (bcmp(name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0);
7792	if (!isrsrcfork)
7793		uio_setoffset(uio, 0);
7794	if (isfinderinfo) {
7795		if (uio_resid(uio) != sizeof(finfo))
7796			return (ERANGE);
7797		error = uiomove((char*)&finfo, sizeof(finfo), uio);
7798		if (error)
7799			return (error);
7800		/* setting a FinderInfo of all zeroes means remove the FinderInfo */
7801		empty = 1;
7802		for (i=0, finfop=(uint32_t*)&finfo; i < (int)(sizeof(finfo)/sizeof(uint32_t)); i++)
7803			if (finfop[i]) {
7804				empty = 0;
7805				break;
7806			}
7807		if (empty && !(options & (XATTR_CREATE|XATTR_REPLACE))) {
7808			error = nfs4_named_attr_remove(VTONFS(ap->a_vp), anp, name, ctx);
7809			if (error == ENOENT)
7810				error = 0;
7811			return (error);
7812		}
7813		/* first, let's see if we get a create/replace error */
7814	}
7815
7816	/*
7817	 * create/open the xattr
7818	 *
7819	 * We need to make sure not to create it if XATTR_REPLACE.
7820	 * For all xattrs except the resource fork, we also want to
7821	 * truncate the xattr to remove any current data.  We'll do
7822	 * that by setting the size to 0 on create/open.
7823	 */
7824	flags = 0;
7825	if (!(options & XATTR_REPLACE))
7826		flags |= NFS_GET_NAMED_ATTR_CREATE;
7827	if (options & XATTR_CREATE)
7828		flags |= NFS_GET_NAMED_ATTR_CREATE_GUARDED;
7829	if (!isrsrcfork)
7830		flags |= NFS_GET_NAMED_ATTR_TRUNCATE;
7831
7832	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_BOTH,
7833			flags, ctx, &anp, &nofp);
7834	if (!error && !anp)
7835		error = ENOATTR;
7836	if (error)
7837		goto out;
7838	/* grab the open state from the get/create/open */
7839	if (nofp && !(error = nfs_open_file_set_busy(nofp, NULL))) {
7840		nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
7841		nofp->nof_creator = NULL;
7842		nfs_open_file_clear_busy(nofp);
7843	}
7844
7845	/* Setting an empty FinderInfo really means remove it, skip to the close/remove */
7846	if (isfinderinfo && empty)
7847		goto doclose;
7848
7849	/*
7850	 * Write the data out and flush.
7851	 *
7852	 * For FinderInfo, we've already copied the data to finfo, so do I/O from there.
7853	 */
7854	vwa.a_desc = &vnop_write_desc;
7855	vwa.a_vp = NFSTOV(anp);
7856	vwa.a_uio = NULL;
7857	vwa.a_ioflag = 0;
7858	vwa.a_context = ctx;
7859	if (isfinderinfo) {
7860		auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE, &uio_buf, sizeof(uio_buf));
7861		uio_addiov(auio, (uintptr_t)&finfo, sizeof(finfo));
7862		vwa.a_uio = auio;
7863	} else if (uio_resid(uio) > 0) {
7864		vwa.a_uio = uio;
7865	}
7866	if (vwa.a_uio) {
7867		error = nfs_vnop_write(&vwa);
7868		if (!error)
7869			error = nfs_flush(anp, MNT_WAIT, vfs_context_thread(ctx), 0);
7870	}
7871doclose:
7872	/* Close the xattr. */
7873	if (nofp) {
7874		int busyerror = nfs_open_file_set_busy(nofp, NULL);
7875		closeerror = nfs_close(anp, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
7876		if (!busyerror)
7877			nfs_open_file_clear_busy(nofp);
7878	}
7879	if (!error && isfinderinfo && empty) { /* Setting an empty FinderInfo really means remove it */
7880		error = nfs4_named_attr_remove(VTONFS(ap->a_vp), anp, name, ctx);
7881		if (error == ENOENT)
7882			error = 0;
7883	}
7884	if (!error)
7885		error = closeerror;
7886out:
7887	if (anp)
7888		vnode_put(NFSTOV(anp));
7889	if (error == ENOENT)
7890		error = ENOATTR;
7891	return (error);
7892}
7893
7894int
7895nfs4_vnop_removexattr(
7896	struct vnop_removexattr_args /* {
7897		struct vnodeop_desc *a_desc;
7898		vnode_t a_vp;
7899		const char * a_name;
7900		int a_options;
7901		vfs_context_t a_context;
7902	} */ *ap)
7903{
7904	struct nfsmount *nmp = VTONMP(ap->a_vp);
7905	int error;
7906
7907	if (nfs_mount_gone(nmp))
7908		return (ENXIO);
7909	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
7910		return (ENOTSUP);
7911
7912	error = nfs4_named_attr_remove(VTONFS(ap->a_vp), NULL, ap->a_name, ap->a_context);
7913	if (error == ENOENT)
7914		error = ENOATTR;
7915	return (error);
7916}
7917
7918int
7919nfs4_vnop_listxattr(
7920	struct vnop_listxattr_args /* {
7921		struct vnodeop_desc *a_desc;
7922		vnode_t a_vp;
7923		uio_t a_uio;
7924		size_t *a_size;
7925		int a_options;
7926		vfs_context_t a_context;
7927	} */ *ap)
7928{
7929	vfs_context_t ctx = ap->a_context;
7930	nfsnode_t np = VTONFS(ap->a_vp);
7931	uio_t uio = ap->a_uio;
7932	nfsnode_t adnp = NULL;
7933	struct nfsmount *nmp;
7934	int error, done, i;
7935	struct nfs_vattr nvattr;
7936	uint64_t cookie, nextcookie, lbn = 0;
7937	struct nfsbuf *bp = NULL;
7938	struct nfs_dir_buf_header *ndbhp;
7939	struct direntry *dp;
7940
7941	nmp = VTONMP(ap->a_vp);
7942	if (nfs_mount_gone(nmp))
7943		return (ENXIO);
7944
7945	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
7946		return (ENOTSUP);
7947
7948	error = nfs_getattr(np, &nvattr, ctx, NGA_CACHED);
7949	if (error)
7950		return (error);
7951	if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
7952	    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
7953		return (0);
7954
7955	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx))))
7956		return (error);
7957	adnp = nfs4_named_attr_dir_get(np, 1, ctx);
7958	nfs_node_clear_busy(np);
7959	if (!adnp)
7960		goto out;
7961
7962	if ((error = nfs_node_lock(adnp)))
7963		goto out;
7964
7965	if (adnp->n_flag & NNEEDINVALIDATE) {
7966		adnp->n_flag &= ~NNEEDINVALIDATE;
7967		nfs_invaldir(adnp);
7968		nfs_node_unlock(adnp);
7969		error = nfs_vinvalbuf(NFSTOV(adnp), 0, ctx, 1);
7970		if (!error)
7971			error = nfs_node_lock(adnp);
7972		if (error)
7973			goto out;
7974	}
7975
7976	/*
7977	 * check for need to invalidate when (re)starting at beginning
7978	 */
7979	if (adnp->n_flag & NMODIFIED) {
7980		nfs_invaldir(adnp);
7981		nfs_node_unlock(adnp);
7982		if ((error = nfs_vinvalbuf(NFSTOV(adnp), 0, ctx, 1)))
7983			goto out;
7984	} else {
7985		nfs_node_unlock(adnp);
7986	}
7987	/* nfs_getattr() will check changed and purge caches */
7988	if ((error = nfs_getattr(adnp, &nvattr, ctx, NGA_UNCACHED)))
7989		goto out;
7990
7991	if (uio && (uio_resid(uio) == 0))
7992		goto out;
7993
7994	done = 0;
7995	nextcookie = lbn = 0;
7996
7997	while (!error && !done) {
7998		OSAddAtomic64(1, &nfsstats.biocache_readdirs);
7999		cookie = nextcookie;
8000getbuffer:
8001		error = nfs_buf_get(adnp, lbn, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp);
8002		if (error)
8003			goto out;
8004		ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
8005		if (!ISSET(bp->nb_flags, NB_CACHE) || !ISSET(ndbhp->ndbh_flags, NDB_FULL)) {
8006			if (!ISSET(bp->nb_flags, NB_CACHE)) { /* initialize the buffer */
8007				ndbhp->ndbh_flags = 0;
8008				ndbhp->ndbh_count = 0;
8009				ndbhp->ndbh_entry_end = sizeof(*ndbhp);
8010				ndbhp->ndbh_ncgen = adnp->n_ncgen;
8011			}
8012			error = nfs_buf_readdir(bp, ctx);
8013			if (error == NFSERR_DIRBUFDROPPED)
8014				goto getbuffer;
8015			if (error)
8016				nfs_buf_release(bp, 1);
8017			if (error && (error != ENXIO) && (error != ETIMEDOUT) && (error != EINTR) && (error != ERESTART)) {
8018				if (!nfs_node_lock(adnp)) {
8019					nfs_invaldir(adnp);
8020					nfs_node_unlock(adnp);
8021				}
8022				nfs_vinvalbuf(NFSTOV(adnp), 0, ctx, 1);
8023				if (error == NFSERR_BAD_COOKIE)
8024					error = ENOENT;
8025			}
8026			if (error)
8027				goto out;
8028		}
8029
8030		/* go through all the entries copying/counting */
8031		dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
8032		for (i=0; i < ndbhp->ndbh_count; i++) {
8033			if (!xattr_protected(dp->d_name)) {
8034				if (uio == NULL) {
8035					*ap->a_size += dp->d_namlen + 1;
8036				} else if (uio_resid(uio) < (dp->d_namlen + 1)) {
8037					error = ERANGE;
8038				} else {
8039					error = uiomove(dp->d_name, dp->d_namlen+1, uio);
8040					if (error && (error != EFAULT))
8041						error = ERANGE;
8042				}
8043			}
8044			nextcookie = dp->d_seekoff;
8045			dp = NFS_DIRENTRY_NEXT(dp);
8046		}
8047
8048		if (i == ndbhp->ndbh_count) {
8049			/* hit end of buffer, move to next buffer */
8050			lbn = nextcookie;
8051			/* if we also hit EOF, we're done */
8052			if (ISSET(ndbhp->ndbh_flags, NDB_EOF))
8053				done = 1;
8054		}
8055		if (!error && !done && (nextcookie == cookie)) {
8056			printf("nfs readdir cookie didn't change 0x%llx, %d/%d\n", cookie, i, ndbhp->ndbh_count);
8057			error = EIO;
8058		}
8059		nfs_buf_release(bp, 1);
8060	}
8061out:
8062	if (adnp)
8063		vnode_put(NFSTOV(adnp));
8064	return (error);
8065}
8066
8067#if NAMEDSTREAMS
8068int
8069nfs4_vnop_getnamedstream(
8070	struct vnop_getnamedstream_args /* {
8071		struct vnodeop_desc *a_desc;
8072		vnode_t a_vp;
8073		vnode_t *a_svpp;
8074		const char *a_name;
8075		enum nsoperation a_operation;
8076		int a_flags;
8077		vfs_context_t a_context;
8078	} */ *ap)
8079{
8080	vfs_context_t ctx = ap->a_context;
8081	struct nfsmount *nmp;
8082	struct nfs_vattr nvattr;
8083	struct componentname cn;
8084	nfsnode_t anp;
8085	int error = 0;
8086
8087	nmp = VTONMP(ap->a_vp);
8088	if (nfs_mount_gone(nmp))
8089		return (ENXIO);
8090
8091	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
8092		return (ENOTSUP);
8093	error = nfs_getattr(VTONFS(ap->a_vp), &nvattr, ctx, NGA_CACHED);
8094	if (error)
8095		return (error);
8096	if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
8097	    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
8098		return (ENOATTR);
8099
8100	bzero(&cn, sizeof(cn));
8101	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(ap->a_name, const, char *);
8102	cn.cn_namelen = strlen(ap->a_name);
8103	cn.cn_nameiop = LOOKUP;
8104	cn.cn_flags = MAKEENTRY;
8105
8106	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_NONE,
8107			0, ctx, &anp, NULL);
8108	if ((!error && !anp) || (error == ENOENT))
8109		error = ENOATTR;
8110	if (!error && anp)
8111		*ap->a_svpp = NFSTOV(anp);
8112	else if (anp)
8113		vnode_put(NFSTOV(anp));
8114	return (error);
8115}
8116
8117int
8118nfs4_vnop_makenamedstream(
8119	struct vnop_makenamedstream_args /* {
8120		struct vnodeop_desc *a_desc;
8121		vnode_t *a_svpp;
8122		vnode_t a_vp;
8123		const char *a_name;
8124		int a_flags;
8125		vfs_context_t a_context;
8126	} */ *ap)
8127{
8128	vfs_context_t ctx = ap->a_context;
8129	struct nfsmount *nmp;
8130	struct componentname cn;
8131	nfsnode_t anp;
8132	int error = 0;
8133
8134	nmp = VTONMP(ap->a_vp);
8135	if (nfs_mount_gone(nmp))
8136		return (ENXIO);
8137
8138	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
8139		return (ENOTSUP);
8140
8141	bzero(&cn, sizeof(cn));
8142	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(ap->a_name, const, char *);
8143	cn.cn_namelen = strlen(ap->a_name);
8144	cn.cn_nameiop = CREATE;
8145	cn.cn_flags = MAKEENTRY;
8146
8147	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_BOTH,
8148			NFS_GET_NAMED_ATTR_CREATE, ctx, &anp, NULL);
8149	if ((!error && !anp) || (error == ENOENT))
8150		error = ENOATTR;
8151	if (!error && anp)
8152		*ap->a_svpp = NFSTOV(anp);
8153	else if (anp)
8154		vnode_put(NFSTOV(anp));
8155	return (error);
8156}
8157
8158int
8159nfs4_vnop_removenamedstream(
8160	struct vnop_removenamedstream_args /* {
8161		struct vnodeop_desc *a_desc;
8162		vnode_t a_vp;
8163		vnode_t a_svp;
8164		const char *a_name;
8165		int a_flags;
8166		vfs_context_t a_context;
8167	} */ *ap)
8168{
8169	struct nfsmount *nmp = VTONMP(ap->a_vp);
8170	nfsnode_t np = ap->a_vp ? VTONFS(ap->a_vp) : NULL;
8171	nfsnode_t anp = ap->a_svp ? VTONFS(ap->a_svp) : NULL;
8172
8173	if (nfs_mount_gone(nmp))
8174		return (ENXIO);
8175
8176	/*
8177	 * Given that a_svp is a named stream, checking for
8178	 * named attribute support is kinda pointless.
8179	 */
8180	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
8181		return (ENOTSUP);
8182
8183	return (nfs4_named_attr_remove(np, anp, ap->a_name, ap->a_context));
8184}
8185
8186#endif
8187