1/*
2 * Copyright (c) 2006-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * vnode op calls for NFS version 4
31 */
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/systm.h>
35#include <sys/resourcevar.h>
36#include <sys/proc_internal.h>
37#include <sys/kauth.h>
38#include <sys/mount_internal.h>
39#include <sys/malloc.h>
40#include <sys/kpi_mbuf.h>
41#include <sys/conf.h>
42#include <sys/vnode_internal.h>
43#include <sys/dirent.h>
44#include <sys/fcntl.h>
45#include <sys/lockf.h>
46#include <sys/ubc_internal.h>
47#include <sys/attr.h>
48#include <sys/signalvar.h>
49#include <sys/uio_internal.h>
50#include <sys/xattr.h>
51#include <sys/paths.h>
52
53#include <vfs/vfs_support.h>
54
55#include <sys/vm.h>
56
57#include <sys/time.h>
58#include <kern/clock.h>
59#include <libkern/OSAtomic.h>
60
61#include <miscfs/fifofs/fifo.h>
62#include <miscfs/specfs/specdev.h>
63
64#include <nfs/rpcv2.h>
65#include <nfs/nfsproto.h>
66#include <nfs/nfs.h>
67#include <nfs/nfsnode.h>
68#include <nfs/nfs_gss.h>
69#include <nfs/nfsmount.h>
70#include <nfs/nfs_lock.h>
71#include <nfs/xdr_subs.h>
72#include <nfs/nfsm_subs.h>
73
74#include <net/if.h>
75#include <netinet/in.h>
76#include <netinet/in_var.h>
77#include <vm/vm_kern.h>
78
79#include <kern/task.h>
80#include <kern/sched_prim.h>
81
82int
83nfs4_access_rpc(nfsnode_t np, u_int32_t *access, vfs_context_t ctx)
84{
85	int error = 0, lockerror = ENOENT, status, numops, slot;
86	u_int64_t xid;
87	struct nfsm_chain nmreq, nmrep;
88	struct timeval now;
89	uint32_t access_result = 0, supported = 0, missing;
90	struct nfsmount *nmp = NFSTONMP(np);
91	int nfsvers = nmp->nm_vers;
92	uid_t uid;
93	struct nfsreq_secinfo_args si;
94
95	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
96		return (0);
97
98	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
99	nfsm_chain_null(&nmreq);
100	nfsm_chain_null(&nmrep);
101
102	// PUTFH, ACCESS, GETATTR
103	numops = 3;
104	nfsm_chain_build_alloc_init(error, &nmreq, 17 * NFSX_UNSIGNED);
105	nfsm_chain_add_compound_header(error, &nmreq, "access", numops);
106	numops--;
107	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
108	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
109	numops--;
110	nfsm_chain_add_32(error, &nmreq, NFS_OP_ACCESS);
111	nfsm_chain_add_32(error, &nmreq, *access);
112	numops--;
113	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
114	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
115	nfsm_chain_build_done(error, &nmreq);
116	nfsm_assert(error, (numops == 0), EPROTO);
117	nfsmout_if(error);
118	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
119
120	if ((lockerror = nfs_node_lock(np)))
121		error = lockerror;
122	nfsm_chain_skip_tag(error, &nmrep);
123	nfsm_chain_get_32(error, &nmrep, numops);
124	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
125	nfsm_chain_op_check(error, &nmrep, NFS_OP_ACCESS);
126	nfsm_chain_get_32(error, &nmrep, supported);
127	nfsm_chain_get_32(error, &nmrep, access_result);
128	nfsmout_if(error);
129	if ((missing = (*access & ~supported))) {
130		/* missing support for something(s) we wanted */
131		if (missing & NFS_ACCESS_DELETE) {
132			/*
133			 * If the server doesn't report DELETE (possible
134			 * on UNIX systems), we'll assume that it is OK
135			 * and just let any subsequent delete action fail
136			 * if it really isn't deletable.
137			 */
138			access_result |= NFS_ACCESS_DELETE;
139		}
140	}
141	/* ".zfs" subdirectories may erroneously give a denied answer for modify/delete */
142	if (nfs_access_dotzfs) {
143		vnode_t dvp = NULLVP;
144		if (np->n_flag & NISDOTZFSCHILD) /* may be able to create/delete snapshot dirs */
145			access_result |= (NFS_ACCESS_MODIFY|NFS_ACCESS_EXTEND|NFS_ACCESS_DELETE);
146		else if (((dvp = vnode_getparent(NFSTOV(np))) != NULLVP) && (VTONFS(dvp)->n_flag & NISDOTZFSCHILD))
147			access_result |= NFS_ACCESS_DELETE; /* may be able to delete snapshot dirs */
148		if (dvp != NULLVP)
149			vnode_put(dvp);
150	}
151	/* Some servers report DELETE support but erroneously give a denied answer. */
152	if (nfs_access_delete && (*access & NFS_ACCESS_DELETE) && !(access_result & NFS_ACCESS_DELETE))
153		access_result |= NFS_ACCESS_DELETE;
154	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
155	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
156	nfsmout_if(error);
157
158	uid = kauth_cred_getuid(vfs_context_ucred(ctx));
159	slot = nfs_node_access_slot(np, uid, 1);
160	np->n_accessuid[slot] = uid;
161	microuptime(&now);
162	np->n_accessstamp[slot] = now.tv_sec;
163	np->n_access[slot] = access_result;
164
165	/* pass back the access returned with this request */
166	*access = np->n_access[slot];
167nfsmout:
168	if (!lockerror)
169		nfs_node_unlock(np);
170	nfsm_chain_cleanup(&nmreq);
171	nfsm_chain_cleanup(&nmrep);
172	return (error);
173}
174
175int
176nfs4_getattr_rpc(
177	nfsnode_t np,
178	mount_t mp,
179	u_char *fhp,
180	size_t fhsize,
181	int flags,
182	vfs_context_t ctx,
183	struct nfs_vattr *nvap,
184	u_int64_t *xidp)
185{
186	struct nfsmount *nmp = mp ? VFSTONFS(mp) : NFSTONMP(np);
187	int error = 0, status, nfsvers, numops, rpcflags = 0, acls;
188	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
189	struct nfsm_chain nmreq, nmrep;
190	struct nfsreq_secinfo_args si;
191
192	if (!nmp)
193		return (ENXIO);
194	nfsvers = nmp->nm_vers;
195	acls = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL);
196
197	if (np && (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)) {
198		nfs4_default_attrs_for_referral_trigger(VTONFS(np->n_parent), NULL, 0, nvap, NULL);
199		return (0);
200	}
201
202	if (flags & NGA_MONITOR) /* vnode monitor requests should be soft */
203		rpcflags = R_RECOVER;
204
205	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
206	nfsm_chain_null(&nmreq);
207	nfsm_chain_null(&nmrep);
208
209	// PUTFH, GETATTR
210	numops = 2;
211	nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
212	nfsm_chain_add_compound_header(error, &nmreq, "getattr", numops);
213	numops--;
214	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
215	nfsm_chain_add_fh(error, &nmreq, nfsvers, fhp, fhsize);
216	numops--;
217	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
218	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
219	if ((flags & NGA_ACL) && acls)
220		NFS_BITMAP_SET(bitmap, NFS_FATTR_ACL);
221	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
222	nfsm_chain_build_done(error, &nmreq);
223	nfsm_assert(error, (numops == 0), EPROTO);
224	nfsmout_if(error);
225	error = nfs_request2(np, mp, &nmreq, NFSPROC4_COMPOUND,
226			vfs_context_thread(ctx), vfs_context_ucred(ctx),
227			NULL, rpcflags, &nmrep, xidp, &status);
228
229	nfsm_chain_skip_tag(error, &nmrep);
230	nfsm_chain_get_32(error, &nmrep, numops);
231	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
232	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
233	nfsmout_if(error);
234	error = nfs4_parsefattr(&nmrep, NULL, nvap, NULL, NULL, NULL);
235	nfsmout_if(error);
236	if ((flags & NGA_ACL) && acls && !NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_ACL)) {
237		/* we asked for the ACL but didn't get one... assume there isn't one */
238		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_ACL);
239		nvap->nva_acl = NULL;
240	}
241nfsmout:
242	nfsm_chain_cleanup(&nmreq);
243	nfsm_chain_cleanup(&nmrep);
244	return (error);
245}
246
247int
248nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
249{
250	struct nfsmount *nmp;
251	int error = 0, lockerror = ENOENT, status, numops;
252	uint32_t len = 0;
253	u_int64_t xid;
254	struct nfsm_chain nmreq, nmrep;
255	struct nfsreq_secinfo_args si;
256
257	nmp = NFSTONMP(np);
258	if (!nmp)
259		return (ENXIO);
260	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
261		return (EINVAL);
262	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
263	nfsm_chain_null(&nmreq);
264	nfsm_chain_null(&nmrep);
265
266	// PUTFH, GETATTR, READLINK
267	numops = 3;
268	nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
269	nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops);
270	numops--;
271	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
272	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
273	numops--;
274	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
275	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
276	numops--;
277	nfsm_chain_add_32(error, &nmreq, NFS_OP_READLINK);
278	nfsm_chain_build_done(error, &nmreq);
279	nfsm_assert(error, (numops == 0), EPROTO);
280	nfsmout_if(error);
281	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
282
283	if ((lockerror = nfs_node_lock(np)))
284		error = lockerror;
285	nfsm_chain_skip_tag(error, &nmrep);
286	nfsm_chain_get_32(error, &nmrep, numops);
287	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
288	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
289	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
290	nfsm_chain_op_check(error, &nmrep, NFS_OP_READLINK);
291	nfsm_chain_get_32(error, &nmrep, len);
292	nfsmout_if(error);
293	if (len >= *buflenp) {
294		if (np->n_size && (np->n_size < *buflenp))
295			len = np->n_size;
296		else
297			len = *buflenp - 1;
298	}
299	nfsm_chain_get_opaque(error, &nmrep, len, buf);
300	if (!error)
301		*buflenp = len;
302nfsmout:
303	if (!lockerror)
304		nfs_node_unlock(np);
305	nfsm_chain_cleanup(&nmreq);
306	nfsm_chain_cleanup(&nmrep);
307	return (error);
308}
309
310int
311nfs4_read_rpc_async(
312	nfsnode_t np,
313	off_t offset,
314	size_t len,
315	thread_t thd,
316	kauth_cred_t cred,
317	struct nfsreq_cbinfo *cb,
318	struct nfsreq **reqp)
319{
320	struct nfsmount *nmp;
321	int error = 0, nfsvers, numops;
322	nfs_stateid stateid;
323	struct nfsm_chain nmreq;
324	struct nfsreq_secinfo_args si;
325
326	nmp = NFSTONMP(np);
327	if (!nmp)
328		return (ENXIO);
329	nfsvers = nmp->nm_vers;
330	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
331		return (EINVAL);
332
333	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
334	nfsm_chain_null(&nmreq);
335
336	// PUTFH, READ, GETATTR
337	numops = 3;
338	nfsm_chain_build_alloc_init(error, &nmreq, 22 * NFSX_UNSIGNED);
339	nfsm_chain_add_compound_header(error, &nmreq, "read", numops);
340	numops--;
341	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
342	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
343	numops--;
344	nfsm_chain_add_32(error, &nmreq, NFS_OP_READ);
345	nfs_get_stateid(np, thd, cred, &stateid);
346	nfsm_chain_add_stateid(error, &nmreq, &stateid);
347	nfsm_chain_add_64(error, &nmreq, offset);
348	nfsm_chain_add_32(error, &nmreq, len);
349	numops--;
350	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
351	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
352	nfsm_chain_build_done(error, &nmreq);
353	nfsm_assert(error, (numops == 0), EPROTO);
354	nfsmout_if(error);
355	error = nfs_request_async(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, cb, reqp);
356nfsmout:
357	nfsm_chain_cleanup(&nmreq);
358	return (error);
359}
360
361int
362nfs4_read_rpc_async_finish(
363	nfsnode_t np,
364	struct nfsreq *req,
365	uio_t uio,
366	size_t *lenp,
367	int *eofp)
368{
369	struct nfsmount *nmp;
370	int error = 0, lockerror, nfsvers, numops, status, eof = 0;
371	size_t retlen = 0;
372	u_int64_t xid;
373	struct nfsm_chain nmrep;
374
375	nmp = NFSTONMP(np);
376	if (!nmp) {
377		nfs_request_async_cancel(req);
378		return (ENXIO);
379	}
380	nfsvers = nmp->nm_vers;
381
382	nfsm_chain_null(&nmrep);
383
384	error = nfs_request_async_finish(req, &nmrep, &xid, &status);
385	if (error == EINPROGRESS) /* async request restarted */
386		return (error);
387
388	if ((lockerror = nfs_node_lock(np)))
389		error = lockerror;
390	nfsm_chain_skip_tag(error, &nmrep);
391	nfsm_chain_get_32(error, &nmrep, numops);
392	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
393	nfsm_chain_op_check(error, &nmrep, NFS_OP_READ);
394	nfsm_chain_get_32(error, &nmrep, eof);
395	nfsm_chain_get_32(error, &nmrep, retlen);
396	if (!error) {
397		*lenp = MIN(retlen, *lenp);
398		error = nfsm_chain_get_uio(&nmrep, *lenp, uio);
399	}
400	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
401	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
402	if (!lockerror)
403		nfs_node_unlock(np);
404	if (eofp) {
405		if (!eof && !retlen)
406			eof = 1;
407		*eofp = eof;
408	}
409	nfsm_chain_cleanup(&nmrep);
410	if (np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR)
411		microuptime(&np->n_lastio);
412	return (error);
413}
414
415int
416nfs4_write_rpc_async(
417	nfsnode_t np,
418	uio_t uio,
419	size_t len,
420	thread_t thd,
421	kauth_cred_t cred,
422	int iomode,
423	struct nfsreq_cbinfo *cb,
424	struct nfsreq **reqp)
425{
426	struct nfsmount *nmp;
427	mount_t mp;
428	int error = 0, nfsvers, numops;
429	nfs_stateid stateid;
430	struct nfsm_chain nmreq;
431	struct nfsreq_secinfo_args si;
432
433	nmp = NFSTONMP(np);
434	if (!nmp)
435		return (ENXIO);
436	nfsvers = nmp->nm_vers;
437	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
438		return (EINVAL);
439
440	/* for async mounts, don't bother sending sync write requests */
441	if ((iomode != NFS_WRITE_UNSTABLE) && nfs_allow_async &&
442	    ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
443		iomode = NFS_WRITE_UNSTABLE;
444
445	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
446	nfsm_chain_null(&nmreq);
447
448	// PUTFH, WRITE, GETATTR
449	numops = 3;
450	nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED + len);
451	nfsm_chain_add_compound_header(error, &nmreq, "write", numops);
452	numops--;
453	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
454	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
455	numops--;
456	nfsm_chain_add_32(error, &nmreq, NFS_OP_WRITE);
457	nfs_get_stateid(np, thd, cred, &stateid);
458	nfsm_chain_add_stateid(error, &nmreq, &stateid);
459	nfsm_chain_add_64(error, &nmreq, uio_offset(uio));
460	nfsm_chain_add_32(error, &nmreq, iomode);
461	nfsm_chain_add_32(error, &nmreq, len);
462	if (!error)
463		error = nfsm_chain_add_uio(&nmreq, uio, len);
464	numops--;
465	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
466	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
467	nfsm_chain_build_done(error, &nmreq);
468	nfsm_assert(error, (numops == 0), EPROTO);
469	nfsmout_if(error);
470
471	error = nfs_request_async(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, cb, reqp);
472nfsmout:
473	nfsm_chain_cleanup(&nmreq);
474	return (error);
475}
476
477int
478nfs4_write_rpc_async_finish(
479	nfsnode_t np,
480	struct nfsreq *req,
481	int *iomodep,
482	size_t *rlenp,
483	uint64_t *wverfp)
484{
485	struct nfsmount *nmp;
486	int error = 0, lockerror = ENOENT, nfsvers, numops, status;
487	int committed = NFS_WRITE_FILESYNC;
488	size_t rlen = 0;
489	u_int64_t xid, wverf;
490	mount_t mp;
491	struct nfsm_chain nmrep;
492
493	nmp = NFSTONMP(np);
494	if (!nmp) {
495		nfs_request_async_cancel(req);
496		return (ENXIO);
497	}
498	nfsvers = nmp->nm_vers;
499
500	nfsm_chain_null(&nmrep);
501
502	error = nfs_request_async_finish(req, &nmrep, &xid, &status);
503	if (error == EINPROGRESS) /* async request restarted */
504		return (error);
505	nmp = NFSTONMP(np);
506	if (!nmp)
507		error = ENXIO;
508	if (!error && (lockerror = nfs_node_lock(np)))
509		error = lockerror;
510	nfsm_chain_skip_tag(error, &nmrep);
511	nfsm_chain_get_32(error, &nmrep, numops);
512	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
513	nfsm_chain_op_check(error, &nmrep, NFS_OP_WRITE);
514	nfsm_chain_get_32(error, &nmrep, rlen);
515	nfsmout_if(error);
516	*rlenp = rlen;
517	if (rlen <= 0)
518		error = NFSERR_IO;
519	nfsm_chain_get_32(error, &nmrep, committed);
520	nfsm_chain_get_64(error, &nmrep, wverf);
521	nfsmout_if(error);
522	if (wverfp)
523		*wverfp = wverf;
524	lck_mtx_lock(&nmp->nm_lock);
525	if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
526		nmp->nm_verf = wverf;
527		nmp->nm_state |= NFSSTA_HASWRITEVERF;
528	} else if (nmp->nm_verf != wverf) {
529		nmp->nm_verf = wverf;
530	}
531	lck_mtx_unlock(&nmp->nm_lock);
532	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
533	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
534nfsmout:
535	if (!lockerror)
536		nfs_node_unlock(np);
537	nfsm_chain_cleanup(&nmrep);
538	if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
539	    ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
540		committed = NFS_WRITE_FILESYNC;
541	*iomodep = committed;
542	if (np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR)
543		microuptime(&np->n_lastio);
544	return (error);
545}
546
547int
548nfs4_remove_rpc(
549	nfsnode_t dnp,
550	char *name,
551	int namelen,
552	thread_t thd,
553	kauth_cred_t cred)
554{
555	int error = 0, lockerror = ENOENT, remove_error = 0, status;
556	struct nfsmount *nmp;
557	int nfsvers, numops;
558	u_int64_t xid;
559	struct nfsm_chain nmreq, nmrep;
560	struct nfsreq_secinfo_args si;
561
562	nmp = NFSTONMP(dnp);
563	if (!nmp)
564		return (ENXIO);
565	nfsvers = nmp->nm_vers;
566	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
567		return (EINVAL);
568	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
569restart:
570	nfsm_chain_null(&nmreq);
571	nfsm_chain_null(&nmrep);
572
573	// PUTFH, REMOVE, GETATTR
574	numops = 3;
575	nfsm_chain_build_alloc_init(error, &nmreq, 17 * NFSX_UNSIGNED + namelen);
576	nfsm_chain_add_compound_header(error, &nmreq, "remove", numops);
577	numops--;
578	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
579	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
580	numops--;
581	nfsm_chain_add_32(error, &nmreq, NFS_OP_REMOVE);
582	nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
583	numops--;
584	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
585	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
586	nfsm_chain_build_done(error, &nmreq);
587	nfsm_assert(error, (numops == 0), EPROTO);
588	nfsmout_if(error);
589
590	error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, &nmrep, &xid, &status);
591
592	if ((lockerror = nfs_node_lock(dnp)))
593		error = lockerror;
594	nfsm_chain_skip_tag(error, &nmrep);
595	nfsm_chain_get_32(error, &nmrep, numops);
596	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
597	nfsm_chain_op_check(error, &nmrep, NFS_OP_REMOVE);
598	remove_error = error;
599	nfsm_chain_check_change_info(error, &nmrep, dnp);
600	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
601	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
602	if (error && !lockerror)
603		NATTRINVALIDATE(dnp);
604nfsmout:
605	nfsm_chain_cleanup(&nmreq);
606	nfsm_chain_cleanup(&nmrep);
607
608	if (!lockerror) {
609		dnp->n_flag |= NMODIFIED;
610		nfs_node_unlock(dnp);
611	}
612	if (error == NFSERR_GRACE) {
613		tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
614		goto restart;
615	}
616
617	return (remove_error);
618}
619
620int
621nfs4_rename_rpc(
622	nfsnode_t fdnp,
623	char *fnameptr,
624	int fnamelen,
625	nfsnode_t tdnp,
626	char *tnameptr,
627	int tnamelen,
628	vfs_context_t ctx)
629{
630	int error = 0, lockerror = ENOENT, status, nfsvers, numops;
631	struct nfsmount *nmp;
632	u_int64_t xid, savedxid;
633	struct nfsm_chain nmreq, nmrep;
634	struct nfsreq_secinfo_args si;
635
636	nmp = NFSTONMP(fdnp);
637	if (!nmp)
638		return (ENXIO);
639	nfsvers = nmp->nm_vers;
640	if (fdnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
641		return (EINVAL);
642	if (tdnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
643		return (EINVAL);
644
645	NFSREQ_SECINFO_SET(&si, fdnp, NULL, 0, NULL, 0);
646	nfsm_chain_null(&nmreq);
647	nfsm_chain_null(&nmrep);
648
649	// PUTFH(FROM), SAVEFH, PUTFH(TO), RENAME, GETATTR(TO), RESTOREFH, GETATTR(FROM)
650	numops = 7;
651	nfsm_chain_build_alloc_init(error, &nmreq, 30 * NFSX_UNSIGNED + fnamelen + tnamelen);
652	nfsm_chain_add_compound_header(error, &nmreq, "rename", numops);
653	numops--;
654	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
655	nfsm_chain_add_fh(error, &nmreq, nfsvers, fdnp->n_fhp, fdnp->n_fhsize);
656	numops--;
657	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
658	numops--;
659	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
660	nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
661	numops--;
662	nfsm_chain_add_32(error, &nmreq, NFS_OP_RENAME);
663	nfsm_chain_add_name(error, &nmreq, fnameptr, fnamelen, nmp);
664	nfsm_chain_add_name(error, &nmreq, tnameptr, tnamelen, nmp);
665	numops--;
666	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
667	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, tdnp);
668	numops--;
669	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
670	numops--;
671	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
672	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, fdnp);
673	nfsm_chain_build_done(error, &nmreq);
674	nfsm_assert(error, (numops == 0), EPROTO);
675	nfsmout_if(error);
676
677	error = nfs_request(fdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
678
679	if ((lockerror = nfs_node_lock2(fdnp, tdnp)))
680		error = lockerror;
681	nfsm_chain_skip_tag(error, &nmrep);
682	nfsm_chain_get_32(error, &nmrep, numops);
683	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
684	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
685	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
686	nfsm_chain_op_check(error, &nmrep, NFS_OP_RENAME);
687	nfsm_chain_check_change_info(error, &nmrep, fdnp);
688	nfsm_chain_check_change_info(error, &nmrep, tdnp);
689	/* directory attributes: if we don't get them, make sure to invalidate */
690	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
691	savedxid = xid;
692	nfsm_chain_loadattr(error, &nmrep, tdnp, nfsvers, &xid);
693	if (error && !lockerror)
694		NATTRINVALIDATE(tdnp);
695	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
696	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
697	xid = savedxid;
698	nfsm_chain_loadattr(error, &nmrep, fdnp, nfsvers, &xid);
699	if (error && !lockerror)
700		NATTRINVALIDATE(fdnp);
701nfsmout:
702	nfsm_chain_cleanup(&nmreq);
703	nfsm_chain_cleanup(&nmrep);
704	if (!lockerror) {
705		fdnp->n_flag |= NMODIFIED;
706		tdnp->n_flag |= NMODIFIED;
707		nfs_node_unlock2(fdnp, tdnp);
708	}
709	return (error);
710}
711
712/*
713 * NFS V4 readdir RPC.
714 */
715int
716nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
717{
718	struct nfsmount *nmp;
719	int error = 0, lockerror, nfsvers, namedattr, rdirplus, bigcookies, numops;
720	int i, status, more_entries = 1, eof, bp_dropped = 0;
721	uint32_t nmreaddirsize, nmrsize;
722	uint32_t namlen, skiplen, fhlen, xlen, attrlen, reclen, space_free, space_needed;
723	uint64_t cookie, lastcookie, xid, savedxid;
724	struct nfsm_chain nmreq, nmrep, nmrepsave;
725	fhandle_t fh;
726	struct nfs_vattr nvattr, *nvattrp;
727	struct nfs_dir_buf_header *ndbhp;
728	struct direntry *dp;
729	char *padstart, padlen;
730	const char *tag;
731	uint32_t entry_attrs[NFS_ATTR_BITMAP_LEN];
732	struct timeval now;
733	struct nfsreq_secinfo_args si;
734
735	nmp = NFSTONMP(dnp);
736	if (!nmp)
737		return (ENXIO);
738	nfsvers = nmp->nm_vers;
739	nmreaddirsize = nmp->nm_readdirsize;
740	nmrsize = nmp->nm_rsize;
741	bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES;
742	namedattr = (dnp->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) ? 1 : 0;
743	rdirplus = (NMFLAG(nmp, RDIRPLUS) || namedattr) ? 1 : 0;
744	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
745		return (EINVAL);
746	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
747
748	/*
749	 * Set up attribute request for entries.
750	 * For READDIRPLUS functionality, get everything.
751	 * Otherwise, just get what we need for struct direntry.
752	 */
753	if (rdirplus) {
754		tag = "readdirplus";
755		NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, entry_attrs);
756		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_FILEHANDLE);
757	} else {
758		tag = "readdir";
759		NFS_CLEAR_ATTRIBUTES(entry_attrs);
760		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_TYPE);
761		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_FILEID);
762		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_MOUNTED_ON_FILEID);
763	}
764	NFS_BITMAP_SET(entry_attrs, NFS_FATTR_RDATTR_ERROR);
765
766	/* lock to protect access to cookie verifier */
767	if ((lockerror = nfs_node_lock(dnp)))
768		return (lockerror);
769
770	/* determine cookie to use, and move dp to the right offset */
771	ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
772	dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
773	if (ndbhp->ndbh_count) {
774		for (i=0; i < ndbhp->ndbh_count-1; i++)
775			dp = NFS_DIRENTRY_NEXT(dp);
776		cookie = dp->d_seekoff;
777		dp = NFS_DIRENTRY_NEXT(dp);
778	} else {
779		cookie = bp->nb_lblkno;
780		/* increment with every buffer read */
781		OSAddAtomic64(1, &nfsstats.readdir_bios);
782	}
783	lastcookie = cookie;
784
785	/*
786	 * The NFS client is responsible for the "." and ".." entries in the
787	 * directory.  So, we put them at the start of the first buffer.
788	 * Don't bother for attribute directories.
789	 */
790	if (((bp->nb_lblkno == 0) && (ndbhp->ndbh_count == 0)) &&
791	    !(dnp->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR)) {
792		fh.fh_len = 0;
793		fhlen = rdirplus ? fh.fh_len + 1 : 0;
794		xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0;
795		/* "." */
796		namlen = 1;
797		reclen = NFS_DIRENTRY_LEN(namlen + xlen);
798		if (xlen)
799			bzero(&dp->d_name[namlen+1], xlen);
800		dp->d_namlen = namlen;
801		strlcpy(dp->d_name, ".", namlen+1);
802		dp->d_fileno = dnp->n_vattr.nva_fileid;
803		dp->d_type = DT_DIR;
804		dp->d_reclen = reclen;
805		dp->d_seekoff = 1;
806		padstart = dp->d_name + dp->d_namlen + 1 + xlen;
807		dp = NFS_DIRENTRY_NEXT(dp);
808		padlen = (char*)dp - padstart;
809		if (padlen > 0)
810			bzero(padstart, padlen);
811		if (rdirplus) /* zero out attributes */
812			bzero(NFS_DIR_BUF_NVATTR(bp, 0), sizeof(struct nfs_vattr));
813
814		/* ".." */
815		namlen = 2;
816		reclen = NFS_DIRENTRY_LEN(namlen + xlen);
817		if (xlen)
818			bzero(&dp->d_name[namlen+1], xlen);
819		dp->d_namlen = namlen;
820		strlcpy(dp->d_name, "..", namlen+1);
821		if (dnp->n_parent)
822			dp->d_fileno = VTONFS(dnp->n_parent)->n_vattr.nva_fileid;
823		else
824			dp->d_fileno = dnp->n_vattr.nva_fileid;
825		dp->d_type = DT_DIR;
826		dp->d_reclen = reclen;
827		dp->d_seekoff = 2;
828		padstart = dp->d_name + dp->d_namlen + 1 + xlen;
829		dp = NFS_DIRENTRY_NEXT(dp);
830		padlen = (char*)dp - padstart;
831		if (padlen > 0)
832			bzero(padstart, padlen);
833		if (rdirplus) /* zero out attributes */
834			bzero(NFS_DIR_BUF_NVATTR(bp, 1), sizeof(struct nfs_vattr));
835
836		ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data;
837		ndbhp->ndbh_count = 2;
838	}
839
840	/*
841	 * Loop around doing readdir(plus) RPCs of size nm_readdirsize until
842	 * the buffer is full (or we hit EOF).  Then put the remainder of the
843	 * results in the next buffer(s).
844	 */
845	nfsm_chain_null(&nmreq);
846	nfsm_chain_null(&nmrep);
847	while (nfs_dir_buf_freespace(bp, rdirplus) && !(ndbhp->ndbh_flags & NDB_FULL)) {
848
849		// PUTFH, GETATTR, READDIR
850		numops = 3;
851		nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
852		nfsm_chain_add_compound_header(error, &nmreq, tag, numops);
853		numops--;
854		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
855		nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
856		numops--;
857		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
858		nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
859		numops--;
860		nfsm_chain_add_32(error, &nmreq, NFS_OP_READDIR);
861		nfsm_chain_add_64(error, &nmreq, (cookie <= 2) ? 0 : cookie);
862		nfsm_chain_add_64(error, &nmreq, dnp->n_cookieverf);
863		nfsm_chain_add_32(error, &nmreq, nmreaddirsize);
864		nfsm_chain_add_32(error, &nmreq, nmrsize);
865		nfsm_chain_add_bitmap_supported(error, &nmreq, entry_attrs, nmp, dnp);
866		nfsm_chain_build_done(error, &nmreq);
867		nfsm_assert(error, (numops == 0), EPROTO);
868		nfs_node_unlock(dnp);
869		nfsmout_if(error);
870		error = nfs_request(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
871
872		if ((lockerror = nfs_node_lock(dnp)))
873			error = lockerror;
874
875		savedxid = xid;
876		nfsm_chain_skip_tag(error, &nmrep);
877		nfsm_chain_get_32(error, &nmrep, numops);
878		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
879		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
880		nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
881		nfsm_chain_op_check(error, &nmrep, NFS_OP_READDIR);
882		nfsm_chain_get_64(error, &nmrep, dnp->n_cookieverf);
883		nfsm_chain_get_32(error, &nmrep, more_entries);
884
885		if (!lockerror) {
886			nfs_node_unlock(dnp);
887			lockerror = ENOENT;
888		}
889		nfsmout_if(error);
890
891		if (rdirplus)
892			microuptime(&now);
893
894		/* loop through the entries packing them into the buffer */
895		while (more_entries) {
896			/* Entry: COOKIE, NAME, FATTR */
897			nfsm_chain_get_64(error, &nmrep, cookie);
898			nfsm_chain_get_32(error, &nmrep, namlen);
899			nfsmout_if(error);
900			if (!bigcookies && (cookie >> 32) && (nmp == NFSTONMP(dnp))) {
901				/* we've got a big cookie, make sure flag is set */
902				lck_mtx_lock(&nmp->nm_lock);
903				nmp->nm_state |= NFSSTA_BIGCOOKIES;
904				lck_mtx_unlock(&nmp->nm_lock);
905				bigcookies = 1;
906			}
907			/* just truncate names that don't fit in direntry.d_name */
908			if (namlen <= 0) {
909				error = EBADRPC;
910				goto nfsmout;
911			}
912			if (namlen > (sizeof(dp->d_name)-1)) {
913				skiplen = namlen - sizeof(dp->d_name) + 1;
914				namlen = sizeof(dp->d_name) - 1;
915			} else {
916				skiplen = 0;
917			}
918			/* guess that fh size will be same as parent */
919			fhlen = rdirplus ? (1 + dnp->n_fhsize) : 0;
920			xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0;
921			attrlen = rdirplus ? sizeof(struct nfs_vattr) : 0;
922			reclen = NFS_DIRENTRY_LEN(namlen + xlen);
923			space_needed = reclen + attrlen;
924			space_free = nfs_dir_buf_freespace(bp, rdirplus);
925			if (space_needed > space_free) {
926				/*
927				 * We still have entries to pack, but we've
928				 * run out of room in the current buffer.
929				 * So we need to move to the next buffer.
930				 * The block# for the next buffer is the
931				 * last cookie in the current buffer.
932				 */
933nextbuffer:
934				ndbhp->ndbh_flags |= NDB_FULL;
935				nfs_buf_release(bp, 0);
936				bp_dropped = 1;
937				bp = NULL;
938				error = nfs_buf_get(dnp, lastcookie, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp);
939				nfsmout_if(error);
940				/* initialize buffer */
941				ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
942				ndbhp->ndbh_flags = 0;
943				ndbhp->ndbh_count = 0;
944				ndbhp->ndbh_entry_end = sizeof(*ndbhp);
945				ndbhp->ndbh_ncgen = dnp->n_ncgen;
946				space_free = nfs_dir_buf_freespace(bp, rdirplus);
947				dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
948				/* increment with every buffer read */
949				OSAddAtomic64(1, &nfsstats.readdir_bios);
950			}
951			nmrepsave = nmrep;
952			dp->d_fileno = cookie; /* placeholder */
953			dp->d_seekoff = cookie;
954			dp->d_namlen = namlen;
955			dp->d_reclen = reclen;
956			dp->d_type = DT_UNKNOWN;
957			nfsm_chain_get_opaque(error, &nmrep, namlen, dp->d_name);
958			nfsmout_if(error);
959			dp->d_name[namlen] = '\0';
960			if (skiplen)
961				nfsm_chain_adv(error, &nmrep,
962					nfsm_rndup(namlen + skiplen) - nfsm_rndup(namlen));
963			nfsmout_if(error);
964			nvattrp = rdirplus ? NFS_DIR_BUF_NVATTR(bp, ndbhp->ndbh_count) : &nvattr;
965			error = nfs4_parsefattr(&nmrep, NULL, nvattrp, &fh, NULL, NULL);
966			if (!error && NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_ACL)) {
967				/* we do NOT want ACLs returned to us here */
968				NFS_BITMAP_CLR(nvattrp->nva_bitmap, NFS_FATTR_ACL);
969				if (nvattrp->nva_acl) {
970					kauth_acl_free(nvattrp->nva_acl);
971					nvattrp->nva_acl = NULL;
972				}
973			}
974			if (error && NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_RDATTR_ERROR)) {
975				/* OK, we may not have gotten all of the attributes but we will use what we can. */
976				if ((error == NFSERR_MOVED) || (error == NFSERR_INVAL)) {
977					/* set this up to look like a referral trigger */
978					nfs4_default_attrs_for_referral_trigger(dnp, dp->d_name, namlen, nvattrp, &fh);
979				}
980				error = 0;
981			}
982			/* check for more entries after this one */
983			nfsm_chain_get_32(error, &nmrep, more_entries);
984			nfsmout_if(error);
985
986			/* Skip any "." and ".." entries returned from server. */
987			/* Also skip any bothersome named attribute entries. */
988			if (((dp->d_name[0] == '.') && ((namlen == 1) || ((namlen == 2) && (dp->d_name[1] == '.')))) ||
989			    (namedattr && (namlen == 11) && (!strcmp(dp->d_name, "SUNWattr_ro") || !strcmp(dp->d_name, "SUNWattr_rw")))) {
990				lastcookie = cookie;
991				continue;
992			}
993
994			if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_TYPE))
995				dp->d_type = IFTODT(VTTOIF(nvattrp->nva_type));
996			if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_FILEID))
997				dp->d_fileno = nvattrp->nva_fileid;
998			if (rdirplus) {
999				/* fileid is already in d_fileno, so stash xid in attrs */
1000				nvattrp->nva_fileid = savedxid;
1001				if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_FILEHANDLE)) {
1002					fhlen = fh.fh_len + 1;
1003					xlen = fhlen + sizeof(time_t);
1004					reclen = NFS_DIRENTRY_LEN(namlen + xlen);
1005					space_needed = reclen + attrlen;
1006					if (space_needed > space_free) {
1007						/* didn't actually have the room... move on to next buffer */
1008						nmrep = nmrepsave;
1009						goto nextbuffer;
1010					}
1011					/* pack the file handle into the record */
1012					dp->d_name[dp->d_namlen+1] = fh.fh_len;
1013					bcopy(fh.fh_data, &dp->d_name[dp->d_namlen+2], fh.fh_len);
1014				} else {
1015					/* mark the file handle invalid */
1016					fh.fh_len = 0;
1017					fhlen = fh.fh_len + 1;
1018					xlen = fhlen + sizeof(time_t);
1019					reclen = NFS_DIRENTRY_LEN(namlen + xlen);
1020					bzero(&dp->d_name[dp->d_namlen+1], fhlen);
1021				}
1022				*(time_t*)(&dp->d_name[dp->d_namlen+1+fhlen]) = now.tv_sec;
1023				dp->d_reclen = reclen;
1024			}
1025			padstart = dp->d_name + dp->d_namlen + 1 + xlen;
1026			ndbhp->ndbh_count++;
1027			lastcookie = cookie;
1028
1029			/* advance to next direntry in buffer */
1030			dp = NFS_DIRENTRY_NEXT(dp);
1031			ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data;
1032			/* zero out the pad bytes */
1033			padlen = (char*)dp - padstart;
1034			if (padlen > 0)
1035				bzero(padstart, padlen);
1036		}
1037		/* Finally, get the eof boolean */
1038		nfsm_chain_get_32(error, &nmrep, eof);
1039		nfsmout_if(error);
1040		if (eof) {
1041			ndbhp->ndbh_flags |= (NDB_FULL|NDB_EOF);
1042			nfs_node_lock_force(dnp);
1043			dnp->n_eofcookie = lastcookie;
1044			nfs_node_unlock(dnp);
1045		} else {
1046			more_entries = 1;
1047		}
1048		if (bp_dropped) {
1049			nfs_buf_release(bp, 0);
1050			bp = NULL;
1051			break;
1052		}
1053		if ((lockerror = nfs_node_lock(dnp)))
1054			error = lockerror;
1055		nfsmout_if(error);
1056		nfsm_chain_cleanup(&nmrep);
1057		nfsm_chain_null(&nmreq);
1058	}
1059nfsmout:
1060	if (bp_dropped && bp)
1061		nfs_buf_release(bp, 0);
1062	if (!lockerror)
1063		nfs_node_unlock(dnp);
1064	nfsm_chain_cleanup(&nmreq);
1065	nfsm_chain_cleanup(&nmrep);
1066	return (bp_dropped ? NFSERR_DIRBUFDROPPED : error);
1067}
1068
1069int
1070nfs4_lookup_rpc_async(
1071	nfsnode_t dnp,
1072	char *name,
1073	int namelen,
1074	vfs_context_t ctx,
1075	struct nfsreq **reqp)
1076{
1077	int error = 0, isdotdot = 0, nfsvers, numops;
1078	struct nfsm_chain nmreq;
1079	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
1080	struct nfsmount *nmp;
1081	struct nfsreq_secinfo_args si;
1082
1083	nmp = NFSTONMP(dnp);
1084	if (!nmp)
1085		return (ENXIO);
1086	nfsvers = nmp->nm_vers;
1087	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
1088		return (EINVAL);
1089
1090	if ((name[0] == '.') && (name[1] == '.') && (namelen == 2)) {
1091		isdotdot = 1;
1092		NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
1093	} else {
1094		NFSREQ_SECINFO_SET(&si, dnp, dnp->n_fhp, dnp->n_fhsize, name, namelen);
1095	}
1096
1097	nfsm_chain_null(&nmreq);
1098
1099	// PUTFH, GETATTR, LOOKUP(P), GETFH, GETATTR (FH)
1100	numops = 5;
1101	nfsm_chain_build_alloc_init(error, &nmreq, 20 * NFSX_UNSIGNED + namelen);
1102	nfsm_chain_add_compound_header(error, &nmreq, "lookup", numops);
1103	numops--;
1104	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
1105	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
1106	numops--;
1107	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
1108	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
1109	numops--;
1110	if (isdotdot) {
1111		nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUPP);
1112	} else {
1113		nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
1114		nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
1115	}
1116	numops--;
1117	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETFH);
1118	numops--;
1119	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
1120	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
1121	/* some ".zfs" directories can't handle being asked for some attributes */
1122	if ((dnp->n_flag & NISDOTZFS) && !isdotdot)
1123		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
1124	if ((dnp->n_flag & NISDOTZFSCHILD) && isdotdot)
1125		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
1126	if (((namelen == 4) && (name[0] == '.') && (name[1] == 'z') && (name[2] == 'f') && (name[3] == 's')))
1127		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
1128	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, NULL);
1129	nfsm_chain_build_done(error, &nmreq);
1130	nfsm_assert(error, (numops == 0), EPROTO);
1131	nfsmout_if(error);
1132	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND,
1133			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, reqp);
1134nfsmout:
1135	nfsm_chain_cleanup(&nmreq);
1136	return (error);
1137}
1138
1139
1140int
1141nfs4_lookup_rpc_async_finish(
1142	nfsnode_t dnp,
1143	char *name,
1144	int namelen,
1145	vfs_context_t ctx,
1146	struct nfsreq *req,
1147	u_int64_t *xidp,
1148	fhandle_t *fhp,
1149	struct nfs_vattr *nvap)
1150{
1151	int error = 0, lockerror = ENOENT, status, nfsvers, numops, isdotdot = 0;
1152	uint32_t op = NFS_OP_LOOKUP;
1153	u_int64_t xid;
1154	struct nfsmount *nmp;
1155	struct nfsm_chain nmrep;
1156
1157	nmp = NFSTONMP(dnp);
1158	nfsvers = nmp->nm_vers;
1159	if ((name[0] == '.') && (name[1] == '.') && (namelen == 2))
1160		isdotdot = 1;
1161
1162	nfsm_chain_null(&nmrep);
1163
1164	error = nfs_request_async_finish(req, &nmrep, &xid, &status);
1165
1166	if ((lockerror = nfs_node_lock(dnp)))
1167		error = lockerror;
1168	nfsm_chain_skip_tag(error, &nmrep);
1169	nfsm_chain_get_32(error, &nmrep, numops);
1170	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
1171	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
1172	if (xidp)
1173		*xidp = xid;
1174	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
1175
1176	nfsm_chain_op_check(error, &nmrep, (isdotdot ? NFS_OP_LOOKUPP : NFS_OP_LOOKUP));
1177	nfsmout_if(error || !fhp || !nvap);
1178	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETFH);
1179	nfsm_chain_get_32(error, &nmrep, fhp->fh_len);
1180	nfsm_chain_get_opaque(error, &nmrep, fhp->fh_len, fhp->fh_data);
1181	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
1182	if ((error == NFSERR_MOVED) || (error == NFSERR_INVAL)) {
1183		/* set this up to look like a referral trigger */
1184		nfs4_default_attrs_for_referral_trigger(dnp, name, namelen, nvap, fhp);
1185		error = 0;
1186	} else {
1187		nfsmout_if(error);
1188		error = nfs4_parsefattr(&nmrep, NULL, nvap, NULL, NULL, NULL);
1189	}
1190nfsmout:
1191	if (!lockerror)
1192		nfs_node_unlock(dnp);
1193	nfsm_chain_cleanup(&nmrep);
1194	if (!error && (op == NFS_OP_LOOKUP) && (nmp->nm_state & NFSSTA_NEEDSECINFO)) {
1195		/* We still need to get SECINFO to set default for mount. */
1196		/* Do so for the first LOOKUP that returns successfully. */
1197		struct nfs_sec sec;
1198
1199		sec.count = NX_MAX_SEC_FLAVORS;
1200		error = nfs4_secinfo_rpc(nmp, &req->r_secinfo, vfs_context_ucred(ctx), sec.flavors, &sec.count);
1201		/* [sigh] some implementations return "illegal" error for unsupported ops */
1202		if (error == NFSERR_OP_ILLEGAL)
1203			error = 0;
1204		if (!error) {
1205			/* set our default security flavor to the first in the list */
1206			lck_mtx_lock(&nmp->nm_lock);
1207			if (sec.count)
1208				nmp->nm_auth = sec.flavors[0];
1209			nmp->nm_state &= ~NFSSTA_NEEDSECINFO;
1210			lck_mtx_unlock(&nmp->nm_lock);
1211		}
1212	}
1213	return (error);
1214}
1215
1216int
1217nfs4_commit_rpc(
1218	nfsnode_t np,
1219	uint64_t offset,
1220	uint64_t count,
1221	kauth_cred_t cred,
1222	uint64_t wverf)
1223{
1224	struct nfsmount *nmp;
1225	int error = 0, lockerror, status, nfsvers, numops;
1226	u_int64_t xid, newwverf;
1227	uint32_t count32;
1228	struct nfsm_chain nmreq, nmrep;
1229	struct nfsreq_secinfo_args si;
1230
1231	nmp = NFSTONMP(np);
1232	FSDBG(521, np, offset, count, nmp ? nmp->nm_state : 0);
1233	if (!nmp)
1234		return (ENXIO);
1235	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
1236		return (EINVAL);
1237	if (!(nmp->nm_state & NFSSTA_HASWRITEVERF))
1238		return (0);
1239	nfsvers = nmp->nm_vers;
1240
1241	if (count > UINT32_MAX)
1242		count32 = 0;
1243	else
1244		count32 = count;
1245
1246	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
1247	nfsm_chain_null(&nmreq);
1248	nfsm_chain_null(&nmrep);
1249
1250	// PUTFH, COMMIT, GETATTR
1251	numops = 3;
1252	nfsm_chain_build_alloc_init(error, &nmreq, 19 * NFSX_UNSIGNED);
1253	nfsm_chain_add_compound_header(error, &nmreq, "commit", numops);
1254	numops--;
1255	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
1256	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1257	numops--;
1258	nfsm_chain_add_32(error, &nmreq, NFS_OP_COMMIT);
1259	nfsm_chain_add_64(error, &nmreq, offset);
1260	nfsm_chain_add_32(error, &nmreq, count32);
1261	numops--;
1262	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
1263	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
1264	nfsm_chain_build_done(error, &nmreq);
1265	nfsm_assert(error, (numops == 0), EPROTO);
1266	nfsmout_if(error);
1267	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND,
1268			current_thread(), cred, &si, 0, &nmrep, &xid, &status);
1269
1270	if ((lockerror = nfs_node_lock(np)))
1271		error = lockerror;
1272	nfsm_chain_skip_tag(error, &nmrep);
1273	nfsm_chain_get_32(error, &nmrep, numops);
1274	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
1275	nfsm_chain_op_check(error, &nmrep, NFS_OP_COMMIT);
1276	nfsm_chain_get_64(error, &nmrep, newwverf);
1277	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
1278	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
1279	if (!lockerror)
1280		nfs_node_unlock(np);
1281	nfsmout_if(error);
1282	lck_mtx_lock(&nmp->nm_lock);
1283	if (nmp->nm_verf != newwverf)
1284		nmp->nm_verf = newwverf;
1285	if (wverf != newwverf)
1286		error = NFSERR_STALEWRITEVERF;
1287	lck_mtx_unlock(&nmp->nm_lock);
1288nfsmout:
1289	nfsm_chain_cleanup(&nmreq);
1290	nfsm_chain_cleanup(&nmrep);
1291	return (error);
1292}
1293
1294int
1295nfs4_pathconf_rpc(
1296	nfsnode_t np,
1297	struct nfs_fsattr *nfsap,
1298	vfs_context_t ctx)
1299{
1300	u_int64_t xid;
1301	int error = 0, lockerror, status, nfsvers, numops;
1302	struct nfsm_chain nmreq, nmrep;
1303	struct nfsmount *nmp = NFSTONMP(np);
1304	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
1305	struct nfs_vattr nvattr;
1306	struct nfsreq_secinfo_args si;
1307
1308	if (!nmp)
1309		return (ENXIO);
1310	nfsvers = nmp->nm_vers;
1311	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
1312		return (EINVAL);
1313
1314	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
1315	NVATTR_INIT(&nvattr);
1316	nfsm_chain_null(&nmreq);
1317	nfsm_chain_null(&nmrep);
1318
1319	/* NFSv4: fetch "pathconf" info for this node */
1320	// PUTFH, GETATTR
1321	numops = 2;
1322	nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
1323	nfsm_chain_add_compound_header(error, &nmreq, "pathconf", numops);
1324	numops--;
1325	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
1326	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1327	numops--;
1328	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
1329	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
1330	NFS_BITMAP_SET(bitmap, NFS_FATTR_MAXLINK);
1331	NFS_BITMAP_SET(bitmap, NFS_FATTR_MAXNAME);
1332	NFS_BITMAP_SET(bitmap, NFS_FATTR_NO_TRUNC);
1333	NFS_BITMAP_SET(bitmap, NFS_FATTR_CHOWN_RESTRICTED);
1334	NFS_BITMAP_SET(bitmap, NFS_FATTR_CASE_INSENSITIVE);
1335	NFS_BITMAP_SET(bitmap, NFS_FATTR_CASE_PRESERVING);
1336	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
1337	nfsm_chain_build_done(error, &nmreq);
1338	nfsm_assert(error, (numops == 0), EPROTO);
1339	nfsmout_if(error);
1340	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
1341
1342	nfsm_chain_skip_tag(error, &nmrep);
1343	nfsm_chain_get_32(error, &nmrep, numops);
1344	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
1345	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
1346	nfsmout_if(error);
1347	error = nfs4_parsefattr(&nmrep, nfsap, &nvattr, NULL, NULL, NULL);
1348	nfsmout_if(error);
1349	if ((lockerror = nfs_node_lock(np)))
1350		error = lockerror;
1351	if (!error)
1352		nfs_loadattrcache(np, &nvattr, &xid, 0);
1353	if (!lockerror)
1354		nfs_node_unlock(np);
1355nfsmout:
1356	NVATTR_CLEANUP(&nvattr);
1357	nfsm_chain_cleanup(&nmreq);
1358	nfsm_chain_cleanup(&nmrep);
1359	return (error);
1360}
1361
1362int
1363nfs4_vnop_getattr(
1364	struct vnop_getattr_args /* {
1365		struct vnodeop_desc *a_desc;
1366		vnode_t a_vp;
1367		struct vnode_attr *a_vap;
1368		vfs_context_t a_context;
1369	} */ *ap)
1370{
1371	struct vnode_attr *vap = ap->a_vap;
1372	struct nfsmount *nmp;
1373	struct nfs_vattr nva;
1374	int error, acls, ngaflags;
1375
1376	if (!(nmp = VTONMP(ap->a_vp)))
1377		return (ENXIO);
1378	acls = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL);
1379
1380	ngaflags = NGA_CACHED;
1381	if (VATTR_IS_ACTIVE(vap, va_acl) && acls)
1382		ngaflags |= NGA_ACL;
1383	error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, ngaflags);
1384	if (error)
1385		return (error);
1386
1387	/* copy what we have in nva to *a_vap */
1388	if (VATTR_IS_ACTIVE(vap, va_rdev) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_RAWDEV)) {
1389		dev_t rdev = makedev(nva.nva_rawdev.specdata1, nva.nva_rawdev.specdata2);
1390		VATTR_RETURN(vap, va_rdev, rdev);
1391	}
1392	if (VATTR_IS_ACTIVE(vap, va_nlink) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_NUMLINKS))
1393		VATTR_RETURN(vap, va_nlink, nva.nva_nlink);
1394	if (VATTR_IS_ACTIVE(vap, va_data_size) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_SIZE))
1395		VATTR_RETURN(vap, va_data_size, nva.nva_size);
1396	// VATTR_RETURN(vap, va_data_alloc, ???);
1397	// VATTR_RETURN(vap, va_total_size, ???);
1398	if (VATTR_IS_ACTIVE(vap, va_total_alloc) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_SPACE_USED))
1399		VATTR_RETURN(vap, va_total_alloc, nva.nva_bytes);
1400	if (VATTR_IS_ACTIVE(vap, va_uid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER))
1401		VATTR_RETURN(vap, va_uid, nva.nva_uid);
1402	if (VATTR_IS_ACTIVE(vap, va_uuuid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER))
1403		VATTR_RETURN(vap, va_uuuid, nva.nva_uuuid);
1404	if (VATTR_IS_ACTIVE(vap, va_gid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER_GROUP))
1405		VATTR_RETURN(vap, va_gid, nva.nva_gid);
1406	if (VATTR_IS_ACTIVE(vap, va_guuid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER_GROUP))
1407		VATTR_RETURN(vap, va_guuid, nva.nva_guuid);
1408	if (VATTR_IS_ACTIVE(vap, va_mode)) {
1409		if (NMFLAG(nmp, ACLONLY) || !NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_MODE))
1410			VATTR_RETURN(vap, va_mode, 0777);
1411		else
1412			VATTR_RETURN(vap, va_mode, nva.nva_mode);
1413	}
1414	if (VATTR_IS_ACTIVE(vap, va_flags) &&
1415	    (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_ARCHIVE) ||
1416	     NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_HIDDEN) ||
1417	     (nva.nva_flags & NFS_FFLAG_TRIGGER))) {
1418		uint32_t flags = 0;
1419		if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_ARCHIVE) &&
1420		    (nva.nva_flags & NFS_FFLAG_ARCHIVED))
1421			flags |= SF_ARCHIVED;
1422		if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_HIDDEN) &&
1423		    (nva.nva_flags & NFS_FFLAG_HIDDEN))
1424			flags |= UF_HIDDEN;
1425		VATTR_RETURN(vap, va_flags, flags);
1426	}
1427	if (VATTR_IS_ACTIVE(vap, va_create_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_CREATE)) {
1428		vap->va_create_time.tv_sec = nva.nva_timesec[NFSTIME_CREATE];
1429		vap->va_create_time.tv_nsec = nva.nva_timensec[NFSTIME_CREATE];
1430		VATTR_SET_SUPPORTED(vap, va_create_time);
1431	}
1432	if (VATTR_IS_ACTIVE(vap, va_access_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_ACCESS)) {
1433		vap->va_access_time.tv_sec = nva.nva_timesec[NFSTIME_ACCESS];
1434		vap->va_access_time.tv_nsec = nva.nva_timensec[NFSTIME_ACCESS];
1435		VATTR_SET_SUPPORTED(vap, va_access_time);
1436	}
1437	if (VATTR_IS_ACTIVE(vap, va_modify_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_MODIFY)) {
1438		vap->va_modify_time.tv_sec = nva.nva_timesec[NFSTIME_MODIFY];
1439		vap->va_modify_time.tv_nsec = nva.nva_timensec[NFSTIME_MODIFY];
1440		VATTR_SET_SUPPORTED(vap, va_modify_time);
1441	}
1442	if (VATTR_IS_ACTIVE(vap, va_change_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_METADATA)) {
1443		vap->va_change_time.tv_sec = nva.nva_timesec[NFSTIME_CHANGE];
1444		vap->va_change_time.tv_nsec = nva.nva_timensec[NFSTIME_CHANGE];
1445		VATTR_SET_SUPPORTED(vap, va_change_time);
1446	}
1447	if (VATTR_IS_ACTIVE(vap, va_backup_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_BACKUP)) {
1448		vap->va_backup_time.tv_sec = nva.nva_timesec[NFSTIME_BACKUP];
1449		vap->va_backup_time.tv_nsec = nva.nva_timensec[NFSTIME_BACKUP];
1450		VATTR_SET_SUPPORTED(vap, va_backup_time);
1451	}
1452	if (VATTR_IS_ACTIVE(vap, va_fileid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_FILEID))
1453		VATTR_RETURN(vap, va_fileid, nva.nva_fileid);
1454	if (VATTR_IS_ACTIVE(vap, va_type) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TYPE))
1455		VATTR_RETURN(vap, va_type, nva.nva_type);
1456	if (VATTR_IS_ACTIVE(vap, va_filerev) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_CHANGE))
1457		VATTR_RETURN(vap, va_filerev, nva.nva_change);
1458
1459	if (VATTR_IS_ACTIVE(vap, va_acl) && acls) {
1460		VATTR_RETURN(vap, va_acl, nva.nva_acl);
1461		nva.nva_acl = NULL;
1462	}
1463
1464	// other attrs we might support someday:
1465	// VATTR_RETURN(vap, va_encoding, ??? /* potentially unnormalized UTF-8? */);
1466
1467	NVATTR_CLEANUP(&nva);
1468	return (error);
1469}
1470
1471int
1472nfs4_setattr_rpc(
1473	nfsnode_t np,
1474	struct vnode_attr *vap,
1475	vfs_context_t ctx)
1476{
1477	struct nfsmount *nmp = NFSTONMP(np);
1478	int error = 0, setattr_error = 0, lockerror = ENOENT, status, nfsvers, numops;
1479	u_int64_t xid, nextxid;
1480	struct nfsm_chain nmreq, nmrep;
1481	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
1482	uint32_t getbitmap[NFS_ATTR_BITMAP_LEN];
1483	uint32_t setbitmap[NFS_ATTR_BITMAP_LEN];
1484	nfs_stateid stateid;
1485	struct nfsreq_secinfo_args si;
1486
1487	if (!nmp)
1488		return (ENXIO);
1489	nfsvers = nmp->nm_vers;
1490	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
1491		return (EINVAL);
1492
1493	if (VATTR_IS_ACTIVE(vap, va_flags) && (vap->va_flags & ~(SF_ARCHIVED|UF_HIDDEN))) {
1494		/* we don't support setting unsupported flags (duh!) */
1495		if (vap->va_active & ~VNODE_ATTR_va_flags)
1496			return (EINVAL);	/* return EINVAL if other attributes also set */
1497		else
1498			return (ENOTSUP);	/* return ENOTSUP for chflags(2) */
1499	}
1500
1501	/* don't bother requesting some changes if they don't look like they are changing */
1502	if (VATTR_IS_ACTIVE(vap, va_uid) && (vap->va_uid == np->n_vattr.nva_uid))
1503		VATTR_CLEAR_ACTIVE(vap, va_uid);
1504	if (VATTR_IS_ACTIVE(vap, va_gid) && (vap->va_gid == np->n_vattr.nva_gid))
1505		VATTR_CLEAR_ACTIVE(vap, va_gid);
1506	if (VATTR_IS_ACTIVE(vap, va_uuuid) && kauth_guid_equal(&vap->va_uuuid, &np->n_vattr.nva_uuuid))
1507		VATTR_CLEAR_ACTIVE(vap, va_uuuid);
1508	if (VATTR_IS_ACTIVE(vap, va_guuid) && kauth_guid_equal(&vap->va_guuid, &np->n_vattr.nva_guuid))
1509		VATTR_CLEAR_ACTIVE(vap, va_guuid);
1510
1511tryagain:
1512	/* do nothing if no attributes will be sent */
1513	nfs_vattr_set_bitmap(nmp, bitmap, vap);
1514	if (!bitmap[0] && !bitmap[1])
1515		return (0);
1516
1517	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
1518	nfsm_chain_null(&nmreq);
1519	nfsm_chain_null(&nmrep);
1520
1521	/*
1522	 * Prepare GETATTR bitmap: if we are setting the ACL or mode, we
1523	 * need to invalidate any cached ACL.  And if we had an ACL cached,
1524	 * we might as well also fetch the new value.
1525	 */
1526	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, getbitmap);
1527	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ACL) ||
1528	    NFS_BITMAP_ISSET(bitmap, NFS_FATTR_MODE)) {
1529		if (NACLVALID(np))
1530			NFS_BITMAP_SET(getbitmap, NFS_FATTR_ACL);
1531		NACLINVALIDATE(np);
1532	}
1533
1534	// PUTFH, SETATTR, GETATTR
1535	numops = 3;
1536	nfsm_chain_build_alloc_init(error, &nmreq, 40 * NFSX_UNSIGNED);
1537	nfsm_chain_add_compound_header(error, &nmreq, "setattr", numops);
1538	numops--;
1539	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
1540	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1541	numops--;
1542	nfsm_chain_add_32(error, &nmreq, NFS_OP_SETATTR);
1543	if (VATTR_IS_ACTIVE(vap, va_data_size))
1544		nfs_get_stateid(np, vfs_context_thread(ctx), vfs_context_ucred(ctx), &stateid);
1545	else
1546		stateid.seqid = stateid.other[0] = stateid.other[1] = stateid.other[2] = 0;
1547	nfsm_chain_add_stateid(error, &nmreq, &stateid);
1548	nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
1549	numops--;
1550	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
1551	nfsm_chain_add_bitmap_supported(error, &nmreq, getbitmap, nmp, np);
1552	nfsm_chain_build_done(error, &nmreq);
1553	nfsm_assert(error, (numops == 0), EPROTO);
1554	nfsmout_if(error);
1555	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
1556
1557	if ((lockerror = nfs_node_lock(np)))
1558		error = lockerror;
1559	nfsm_chain_skip_tag(error, &nmrep);
1560	nfsm_chain_get_32(error, &nmrep, numops);
1561	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
1562	nfsmout_if(error);
1563	nfsm_chain_op_check(error, &nmrep, NFS_OP_SETATTR);
1564	nfsmout_if(error == EBADRPC);
1565	setattr_error = error;
1566	error = 0;
1567	bmlen = NFS_ATTR_BITMAP_LEN;
1568	nfsm_chain_get_bitmap(error, &nmrep, setbitmap, bmlen);
1569	if (!error) {
1570		if (VATTR_IS_ACTIVE(vap, va_data_size) && (np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
1571			microuptime(&np->n_lastio);
1572		nfs_vattr_set_supported(setbitmap, vap);
1573		error = setattr_error;
1574	}
1575	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
1576	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
1577	if (error)
1578		NATTRINVALIDATE(np);
1579	/*
1580	 * We just changed the attributes and we want to make sure that we
1581	 * see the latest attributes.  Get the next XID.  If it's not the
1582	 * next XID after the SETATTR XID, then it's possible that another
1583	 * RPC was in flight at the same time and it might put stale attributes
1584	 * in the cache.  In that case, we invalidate the attributes and set
1585	 * the attribute cache XID to guarantee that newer attributes will
1586	 * get loaded next.
1587	 */
1588	nextxid = 0;
1589	nfs_get_xid(&nextxid);
1590	if (nextxid != (xid + 1)) {
1591		np->n_xid = nextxid;
1592		NATTRINVALIDATE(np);
1593	}
1594nfsmout:
1595	if (!lockerror)
1596		nfs_node_unlock(np);
1597	nfsm_chain_cleanup(&nmreq);
1598	nfsm_chain_cleanup(&nmrep);
1599	if ((setattr_error == EINVAL) && VATTR_IS_ACTIVE(vap, va_acl) && VATTR_IS_ACTIVE(vap, va_mode) && !NMFLAG(nmp, ACLONLY)) {
1600		/*
1601		 * Some server's may not like ACL/mode combos that get sent.
1602		 * If it looks like that's what the server choked on, try setting
1603		 * just the ACL and not the mode (unless it looks like everything
1604		 * but mode was already successfully set).
1605		 */
1606		if (((bitmap[0] & setbitmap[0]) != bitmap[0]) ||
1607		    ((bitmap[1] & (setbitmap[1]|NFS_FATTR_MODE)) != bitmap[1])) {
1608			VATTR_CLEAR_ACTIVE(vap, va_mode);
1609			error = 0;
1610			goto tryagain;
1611		}
1612	}
1613	return (error);
1614}
1615
1616/*
1617 * Wait for any pending recovery to complete.
1618 */
1619int
1620nfs_mount_state_wait_for_recovery(struct nfsmount *nmp)
1621{
1622	struct timespec ts = { 1, 0 };
1623	int error = 0, slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
1624
1625	lck_mtx_lock(&nmp->nm_lock);
1626	while (nmp->nm_state & NFSSTA_RECOVER) {
1627		if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
1628			break;
1629		nfs_mount_sock_thread_wake(nmp);
1630		msleep(&nmp->nm_state, &nmp->nm_lock, slpflag|(PZERO-1), "nfsrecoverwait", &ts);
1631		slpflag = 0;
1632	}
1633	lck_mtx_unlock(&nmp->nm_lock);
1634
1635	return (error);
1636}
1637
1638/*
1639 * We're about to use/manipulate NFS mount's open/lock state.
1640 * Wait for any pending state recovery to complete, then
1641 * mark the state as being in use (which will hold off
1642 * the recovery thread until we're done).
1643 */
1644int
1645nfs_mount_state_in_use_start(struct nfsmount *nmp, thread_t thd)
1646{
1647	struct timespec ts = { 1, 0 };
1648	int error = 0, slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
1649
1650	if (!nmp)
1651		return (ENXIO);
1652	lck_mtx_lock(&nmp->nm_lock);
1653	if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) {
1654		lck_mtx_unlock(&nmp->nm_lock);
1655		return (ENXIO);
1656	}
1657	while (nmp->nm_state & NFSSTA_RECOVER) {
1658		if ((error = nfs_sigintr(nmp, NULL, thd, 1)))
1659			break;
1660		nfs_mount_sock_thread_wake(nmp);
1661		msleep(&nmp->nm_state, &nmp->nm_lock, slpflag|(PZERO-1), "nfsrecoverwait", &ts);
1662		slpflag = 0;
1663	}
1664	if (!error)
1665		nmp->nm_stateinuse++;
1666	lck_mtx_unlock(&nmp->nm_lock);
1667
1668	return (error);
1669}
1670
1671/*
1672 * We're done using/manipulating the NFS mount's open/lock
1673 * state.  If the given error indicates that recovery should
1674 * be performed, we'll initiate recovery.
1675 */
1676int
1677nfs_mount_state_in_use_end(struct nfsmount *nmp, int error)
1678{
1679	int restart = nfs_mount_state_error_should_restart(error);
1680
1681	if (!nmp)
1682		return (restart);
1683	lck_mtx_lock(&nmp->nm_lock);
1684	if (restart && (error != NFSERR_OLD_STATEID) && (error != NFSERR_GRACE)) {
1685		printf("nfs_mount_state_in_use_end: error %d, initiating recovery for %s, 0x%x\n",
1686			error, vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid);
1687		nfs_need_recover(nmp, error);
1688	}
1689	if (nmp->nm_stateinuse > 0)
1690		nmp->nm_stateinuse--;
1691	else
1692		panic("NFS mount state in use count underrun");
1693	if (!nmp->nm_stateinuse && (nmp->nm_state & NFSSTA_RECOVER))
1694		wakeup(&nmp->nm_stateinuse);
1695	lck_mtx_unlock(&nmp->nm_lock);
1696	if (error == NFSERR_GRACE)
1697		tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
1698
1699	return (restart);
1700}
1701
1702/*
1703 * Does the error mean we should restart/redo a state-related operation?
1704 */
1705int
1706nfs_mount_state_error_should_restart(int error)
1707{
1708	switch (error) {
1709	case NFSERR_STALE_STATEID:
1710	case NFSERR_STALE_CLIENTID:
1711	case NFSERR_ADMIN_REVOKED:
1712	case NFSERR_EXPIRED:
1713	case NFSERR_OLD_STATEID:
1714	case NFSERR_BAD_STATEID:
1715	case NFSERR_GRACE:
1716		return (1);
1717	}
1718	return (0);
1719}
1720
1721/*
1722 * In some cases we may want to limit how many times we restart a
1723 * state-related operation - e.g. we're repeatedly getting NFSERR_GRACE.
1724 * Base the limit on the lease (as long as it's not too short).
1725 */
1726uint
1727nfs_mount_state_max_restarts(struct nfsmount *nmp)
1728{
1729	return (MAX(nmp->nm_fsattr.nfsa_lease, 60));
1730}
1731
1732/*
1733 * Does the error mean we probably lost a delegation?
1734 */
1735int
1736nfs_mount_state_error_delegation_lost(int error)
1737{
1738	switch (error) {
1739	case NFSERR_STALE_STATEID:
1740	case NFSERR_ADMIN_REVOKED:
1741	case NFSERR_EXPIRED:
1742	case NFSERR_OLD_STATEID:
1743	case NFSERR_BAD_STATEID:
1744	case NFSERR_GRACE: /* ugh! (stupid) RFC 3530 specifically disallows CLAIM_DELEGATE_CUR during grace period? */
1745		return (1);
1746	}
1747	return (0);
1748}
1749
1750
1751/*
1752 * Mark an NFS node's open state as busy.
1753 */
1754int
1755nfs_open_state_set_busy(nfsnode_t np, thread_t thd)
1756{
1757	struct nfsmount *nmp;
1758	struct timespec ts = {2, 0};
1759	int error = 0, slpflag;
1760
1761	nmp = NFSTONMP(np);
1762	if (!nmp)
1763		return (ENXIO);
1764	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
1765
1766	lck_mtx_lock(&np->n_openlock);
1767	while (np->n_openflags & N_OPENBUSY) {
1768		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
1769			break;
1770		np->n_openflags |= N_OPENWANT;
1771		msleep(&np->n_openflags, &np->n_openlock, slpflag, "nfs_open_state_set_busy", &ts);
1772		slpflag = 0;
1773	}
1774	if (!error)
1775		np->n_openflags |= N_OPENBUSY;
1776	lck_mtx_unlock(&np->n_openlock);
1777
1778	return (error);
1779}
1780
1781/*
1782 * Clear an NFS node's open state busy flag and wake up
1783 * anyone wanting it.
1784 */
1785void
1786nfs_open_state_clear_busy(nfsnode_t np)
1787{
1788	int wanted;
1789
1790	lck_mtx_lock(&np->n_openlock);
1791	if (!(np->n_openflags & N_OPENBUSY))
1792		panic("nfs_open_state_clear_busy");
1793	wanted = (np->n_openflags & N_OPENWANT);
1794	np->n_openflags &= ~(N_OPENBUSY|N_OPENWANT);
1795	lck_mtx_unlock(&np->n_openlock);
1796	if (wanted)
1797		wakeup(&np->n_openflags);
1798}
1799
1800/*
1801 * Search a mount's open owner list for the owner for this credential.
1802 * If not found and "alloc" is set, then allocate a new one.
1803 */
1804struct nfs_open_owner *
1805nfs_open_owner_find(struct nfsmount *nmp, kauth_cred_t cred, int alloc)
1806{
1807	uid_t uid = kauth_cred_getuid(cred);
1808	struct nfs_open_owner *noop, *newnoop = NULL;
1809
1810tryagain:
1811	lck_mtx_lock(&nmp->nm_lock);
1812	TAILQ_FOREACH(noop, &nmp->nm_open_owners, noo_link) {
1813		if (kauth_cred_getuid(noop->noo_cred) == uid)
1814			break;
1815	}
1816
1817	if (!noop && !newnoop && alloc) {
1818		lck_mtx_unlock(&nmp->nm_lock);
1819		MALLOC(newnoop, struct nfs_open_owner *, sizeof(struct nfs_open_owner), M_TEMP, M_WAITOK);
1820		if (!newnoop)
1821			return (NULL);
1822		bzero(newnoop, sizeof(*newnoop));
1823		lck_mtx_init(&newnoop->noo_lock, nfs_open_grp, LCK_ATTR_NULL);
1824		newnoop->noo_mount = nmp;
1825		kauth_cred_ref(cred);
1826		newnoop->noo_cred = cred;
1827		newnoop->noo_name = OSAddAtomic(1, &nfs_open_owner_seqnum);
1828		TAILQ_INIT(&newnoop->noo_opens);
1829		goto tryagain;
1830	}
1831	if (!noop && newnoop) {
1832		newnoop->noo_flags |= NFS_OPEN_OWNER_LINK;
1833		TAILQ_INSERT_HEAD(&nmp->nm_open_owners, newnoop, noo_link);
1834		noop = newnoop;
1835	}
1836	lck_mtx_unlock(&nmp->nm_lock);
1837
1838	if (newnoop && (noop != newnoop))
1839		nfs_open_owner_destroy(newnoop);
1840
1841	if (noop)
1842		nfs_open_owner_ref(noop);
1843
1844	return (noop);
1845}
1846
1847/*
1848 * destroy an open owner that's no longer needed
1849 */
1850void
1851nfs_open_owner_destroy(struct nfs_open_owner *noop)
1852{
1853	if (noop->noo_cred)
1854		kauth_cred_unref(&noop->noo_cred);
1855	lck_mtx_destroy(&noop->noo_lock, nfs_open_grp);
1856	FREE(noop, M_TEMP);
1857}
1858
1859/*
1860 * acquire a reference count on an open owner
1861 */
1862void
1863nfs_open_owner_ref(struct nfs_open_owner *noop)
1864{
1865	lck_mtx_lock(&noop->noo_lock);
1866	noop->noo_refcnt++;
1867	lck_mtx_unlock(&noop->noo_lock);
1868}
1869
1870/*
1871 * drop a reference count on an open owner and destroy it if
1872 * it is no longer referenced and no longer on the mount's list.
1873 */
1874void
1875nfs_open_owner_rele(struct nfs_open_owner *noop)
1876{
1877	lck_mtx_lock(&noop->noo_lock);
1878	if (noop->noo_refcnt < 1)
1879		panic("nfs_open_owner_rele: no refcnt");
1880	noop->noo_refcnt--;
1881	if (!noop->noo_refcnt && (noop->noo_flags & NFS_OPEN_OWNER_BUSY))
1882		panic("nfs_open_owner_rele: busy");
1883	/* XXX we may potentially want to clean up idle/unused open owner structures */
1884	if (noop->noo_refcnt || (noop->noo_flags & NFS_OPEN_OWNER_LINK)) {
1885		lck_mtx_unlock(&noop->noo_lock);
1886		return;
1887	}
1888	/* owner is no longer referenced or linked to mount, so destroy it */
1889	lck_mtx_unlock(&noop->noo_lock);
1890	nfs_open_owner_destroy(noop);
1891}
1892
1893/*
1894 * Mark an open owner as busy because we are about to
1895 * start an operation that uses and updates open owner state.
1896 */
1897int
1898nfs_open_owner_set_busy(struct nfs_open_owner *noop, thread_t thd)
1899{
1900	struct nfsmount *nmp;
1901	struct timespec ts = {2, 0};
1902	int error = 0, slpflag;
1903
1904	nmp = noop->noo_mount;
1905	if (!nmp)
1906		return (ENXIO);
1907	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
1908
1909	lck_mtx_lock(&noop->noo_lock);
1910	while (noop->noo_flags & NFS_OPEN_OWNER_BUSY) {
1911		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
1912			break;
1913		noop->noo_flags |= NFS_OPEN_OWNER_WANT;
1914		msleep(noop, &noop->noo_lock, slpflag, "nfs_open_owner_set_busy", &ts);
1915		slpflag = 0;
1916	}
1917	if (!error)
1918		noop->noo_flags |= NFS_OPEN_OWNER_BUSY;
1919	lck_mtx_unlock(&noop->noo_lock);
1920
1921	return (error);
1922}
1923
1924/*
1925 * Clear the busy flag on an open owner and wake up anyone waiting
1926 * to mark it busy.
1927 */
1928void
1929nfs_open_owner_clear_busy(struct nfs_open_owner *noop)
1930{
1931	int wanted;
1932
1933	lck_mtx_lock(&noop->noo_lock);
1934	if (!(noop->noo_flags & NFS_OPEN_OWNER_BUSY))
1935		panic("nfs_open_owner_clear_busy");
1936	wanted = (noop->noo_flags & NFS_OPEN_OWNER_WANT);
1937	noop->noo_flags &= ~(NFS_OPEN_OWNER_BUSY|NFS_OPEN_OWNER_WANT);
1938	lck_mtx_unlock(&noop->noo_lock);
1939	if (wanted)
1940		wakeup(noop);
1941}
1942
1943/*
1944 * Given an open/lock owner and an error code, increment the
1945 * sequence ID if appropriate.
1946 */
1947void
1948nfs_owner_seqid_increment(struct nfs_open_owner *noop, struct nfs_lock_owner *nlop, int error)
1949{
1950	switch (error) {
1951	case NFSERR_STALE_CLIENTID:
1952	case NFSERR_STALE_STATEID:
1953	case NFSERR_OLD_STATEID:
1954	case NFSERR_BAD_STATEID:
1955	case NFSERR_BAD_SEQID:
1956	case NFSERR_BADXDR:
1957	case NFSERR_RESOURCE:
1958	case NFSERR_NOFILEHANDLE:
1959		/* do not increment the open seqid on these errors */
1960		return;
1961	}
1962	if (noop)
1963		noop->noo_seqid++;
1964	if (nlop)
1965		nlop->nlo_seqid++;
1966}
1967
1968/*
1969 * Search a node's open file list for any conflicts with this request.
1970 * Also find this open owner's open file structure.
1971 * If not found and "alloc" is set, then allocate one.
1972 */
1973int
1974nfs_open_file_find(
1975	nfsnode_t np,
1976	struct nfs_open_owner *noop,
1977	struct nfs_open_file **nofpp,
1978	uint32_t accessMode,
1979	uint32_t denyMode,
1980	int alloc)
1981{
1982	*nofpp = NULL;
1983	return nfs_open_file_find_internal(np, noop, nofpp, accessMode, denyMode, alloc);
1984}
1985
1986/*
1987 * Internally, allow using a provisional nodeless nofp (passed in via *nofpp)
1988 * if an existing one is not found.  This is used in "create" scenarios to
1989 * officially add the provisional nofp to the node once the node is created.
1990 */
1991int
1992nfs_open_file_find_internal(
1993	nfsnode_t np,
1994	struct nfs_open_owner *noop,
1995	struct nfs_open_file **nofpp,
1996	uint32_t accessMode,
1997	uint32_t denyMode,
1998	int alloc)
1999{
2000	struct nfs_open_file *nofp = NULL, *nofp2, *newnofp = NULL;
2001
2002	if (!np)
2003		goto alloc;
2004tryagain:
2005	lck_mtx_lock(&np->n_openlock);
2006	TAILQ_FOREACH(nofp2, &np->n_opens, nof_link) {
2007		if (nofp2->nof_owner == noop) {
2008			nofp = nofp2;
2009			if (!accessMode)
2010				break;
2011		}
2012		if ((accessMode & nofp2->nof_deny) || (denyMode & nofp2->nof_access)) {
2013			/* This request conflicts with an existing open on this client. */
2014			lck_mtx_unlock(&np->n_openlock);
2015			return (EACCES);
2016		}
2017	}
2018
2019	/*
2020	 * If this open owner doesn't have an open
2021	 * file structure yet, we create one for it.
2022	 */
2023	if (!nofp && !*nofpp && !newnofp && alloc) {
2024		lck_mtx_unlock(&np->n_openlock);
2025alloc:
2026		MALLOC(newnofp, struct nfs_open_file *, sizeof(struct nfs_open_file), M_TEMP, M_WAITOK);
2027		if (!newnofp)
2028			return (ENOMEM);
2029		bzero(newnofp, sizeof(*newnofp));
2030		lck_mtx_init(&newnofp->nof_lock, nfs_open_grp, LCK_ATTR_NULL);
2031		newnofp->nof_owner = noop;
2032		nfs_open_owner_ref(noop);
2033		newnofp->nof_np = np;
2034		lck_mtx_lock(&noop->noo_lock);
2035		TAILQ_INSERT_HEAD(&noop->noo_opens, newnofp, nof_oolink);
2036		lck_mtx_unlock(&noop->noo_lock);
2037		if (np)
2038			goto tryagain;
2039	}
2040	if (!nofp) {
2041		if (*nofpp) {
2042			(*nofpp)->nof_np = np;
2043			nofp = *nofpp;
2044		} else {
2045			nofp = newnofp;
2046		}
2047		if (nofp && np)
2048			TAILQ_INSERT_HEAD(&np->n_opens, nofp, nof_link);
2049	}
2050	if (np)
2051		lck_mtx_unlock(&np->n_openlock);
2052
2053	if (alloc && newnofp && (nofp != newnofp))
2054		nfs_open_file_destroy(newnofp);
2055
2056	*nofpp = nofp;
2057	return (nofp ? 0 : ESRCH);
2058}
2059
2060/*
2061 * Destroy an open file structure.
2062 */
2063void
2064nfs_open_file_destroy(struct nfs_open_file *nofp)
2065{
2066	lck_mtx_lock(&nofp->nof_owner->noo_lock);
2067	TAILQ_REMOVE(&nofp->nof_owner->noo_opens, nofp, nof_oolink);
2068	lck_mtx_unlock(&nofp->nof_owner->noo_lock);
2069	nfs_open_owner_rele(nofp->nof_owner);
2070	lck_mtx_destroy(&nofp->nof_lock, nfs_open_grp);
2071	FREE(nofp, M_TEMP);
2072}
2073
2074/*
2075 * Mark an open file as busy because we are about to
2076 * start an operation that uses and updates open file state.
2077 */
2078int
2079nfs_open_file_set_busy(struct nfs_open_file *nofp, thread_t thd)
2080{
2081	struct nfsmount *nmp;
2082	struct timespec ts = {2, 0};
2083	int error = 0, slpflag;
2084
2085	nmp = nofp->nof_owner->noo_mount;
2086	if (!nmp)
2087		return (ENXIO);
2088	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
2089
2090	lck_mtx_lock(&nofp->nof_lock);
2091	while (nofp->nof_flags & NFS_OPEN_FILE_BUSY) {
2092		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
2093			break;
2094		nofp->nof_flags |= NFS_OPEN_FILE_WANT;
2095		msleep(nofp, &nofp->nof_lock, slpflag, "nfs_open_file_set_busy", &ts);
2096		slpflag = 0;
2097	}
2098	if (!error)
2099		nofp->nof_flags |= NFS_OPEN_FILE_BUSY;
2100	lck_mtx_unlock(&nofp->nof_lock);
2101
2102	return (error);
2103}
2104
2105/*
2106 * Clear the busy flag on an open file and wake up anyone waiting
2107 * to mark it busy.
2108 */
2109void
2110nfs_open_file_clear_busy(struct nfs_open_file *nofp)
2111{
2112	int wanted;
2113
2114	lck_mtx_lock(&nofp->nof_lock);
2115	if (!(nofp->nof_flags & NFS_OPEN_FILE_BUSY))
2116		panic("nfs_open_file_clear_busy");
2117	wanted = (nofp->nof_flags & NFS_OPEN_FILE_WANT);
2118	nofp->nof_flags &= ~(NFS_OPEN_FILE_BUSY|NFS_OPEN_FILE_WANT);
2119	lck_mtx_unlock(&nofp->nof_lock);
2120	if (wanted)
2121		wakeup(nofp);
2122}
2123
2124/*
2125 * Add the open state for the given access/deny modes to this open file.
2126 */
2127void
2128nfs_open_file_add_open(struct nfs_open_file *nofp, uint32_t accessMode, uint32_t denyMode, int delegated)
2129{
2130	lck_mtx_lock(&nofp->nof_lock);
2131	nofp->nof_access |= accessMode;
2132	nofp->nof_deny |= denyMode;
2133
2134	if (delegated) {
2135		if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
2136			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2137				nofp->nof_d_r++;
2138			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2139				nofp->nof_d_w++;
2140			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2141				nofp->nof_d_rw++;
2142		} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
2143			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2144				nofp->nof_d_r_dw++;
2145			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2146				nofp->nof_d_w_dw++;
2147			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2148				nofp->nof_d_rw_dw++;
2149		} else { /* NFS_OPEN_SHARE_DENY_BOTH */
2150			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2151				nofp->nof_d_r_drw++;
2152			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2153				nofp->nof_d_w_drw++;
2154			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2155				nofp->nof_d_rw_drw++;
2156		}
2157	} else {
2158		if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
2159			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2160				nofp->nof_r++;
2161			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2162				nofp->nof_w++;
2163			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2164				nofp->nof_rw++;
2165		} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
2166			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2167				nofp->nof_r_dw++;
2168			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2169				nofp->nof_w_dw++;
2170			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2171				nofp->nof_rw_dw++;
2172		} else { /* NFS_OPEN_SHARE_DENY_BOTH */
2173			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2174				nofp->nof_r_drw++;
2175			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2176				nofp->nof_w_drw++;
2177			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2178				nofp->nof_rw_drw++;
2179		}
2180	}
2181
2182	nofp->nof_opencnt++;
2183	lck_mtx_unlock(&nofp->nof_lock);
2184}
2185
2186/*
2187 * Find which particular open combo will be closed and report what
2188 * the new modes will be and whether the open was delegated.
2189 */
2190void
2191nfs_open_file_remove_open_find(
2192	struct nfs_open_file *nofp,
2193	uint32_t accessMode,
2194	uint32_t denyMode,
2195	uint32_t *newAccessMode,
2196	uint32_t *newDenyMode,
2197	int *delegated)
2198{
2199	/*
2200	 * Calculate new modes: a mode bit gets removed when there's only
2201	 * one count in all the corresponding counts
2202	 */
2203	*newAccessMode = nofp->nof_access;
2204	*newDenyMode = nofp->nof_deny;
2205
2206	if ((accessMode & NFS_OPEN_SHARE_ACCESS_READ) &&
2207	    (nofp->nof_access & NFS_OPEN_SHARE_ACCESS_READ) &&
2208	    ((nofp->nof_r + nofp->nof_d_r +
2209	      nofp->nof_rw + nofp->nof_d_rw +
2210	      nofp->nof_r_dw + nofp->nof_d_r_dw +
2211	      nofp->nof_rw_dw + nofp->nof_d_rw_dw +
2212	      nofp->nof_r_drw + nofp->nof_d_r_drw +
2213	      nofp->nof_rw_dw + nofp->nof_d_rw_dw) == 1))
2214		*newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
2215	if ((accessMode & NFS_OPEN_SHARE_ACCESS_WRITE) &&
2216	    (nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE) &&
2217	    ((nofp->nof_w + nofp->nof_d_w +
2218	      nofp->nof_rw + nofp->nof_d_rw +
2219	      nofp->nof_w_dw + nofp->nof_d_w_dw +
2220	      nofp->nof_rw_dw + nofp->nof_d_rw_dw +
2221	      nofp->nof_w_drw + nofp->nof_d_w_drw +
2222	      nofp->nof_rw_dw + nofp->nof_d_rw_dw) == 1))
2223		*newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_WRITE;
2224	if ((denyMode & NFS_OPEN_SHARE_DENY_READ) &&
2225	    (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ) &&
2226	    ((nofp->nof_r_drw + nofp->nof_d_r_drw +
2227	      nofp->nof_w_drw + nofp->nof_d_w_drw +
2228	      nofp->nof_rw_drw + nofp->nof_d_rw_drw) == 1))
2229		*newDenyMode &= ~NFS_OPEN_SHARE_DENY_READ;
2230	if ((denyMode & NFS_OPEN_SHARE_DENY_WRITE) &&
2231	    (nofp->nof_deny & NFS_OPEN_SHARE_DENY_WRITE) &&
2232	    ((nofp->nof_r_drw + nofp->nof_d_r_drw +
2233	      nofp->nof_w_drw + nofp->nof_d_w_drw +
2234	      nofp->nof_rw_drw + nofp->nof_d_rw_drw +
2235	      nofp->nof_r_dw + nofp->nof_d_r_dw +
2236	      nofp->nof_w_dw + nofp->nof_d_w_dw +
2237	      nofp->nof_rw_dw + nofp->nof_d_rw_dw) == 1))
2238		*newDenyMode &= ~NFS_OPEN_SHARE_DENY_WRITE;
2239
2240	/* Find the corresponding open access/deny mode counter. */
2241	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
2242		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2243			*delegated = (nofp->nof_d_r != 0);
2244		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2245			*delegated = (nofp->nof_d_w != 0);
2246		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2247			*delegated = (nofp->nof_d_rw != 0);
2248		else
2249			*delegated = 0;
2250	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
2251		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2252			*delegated = (nofp->nof_d_r_dw != 0);
2253		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2254			*delegated = (nofp->nof_d_w_dw != 0);
2255		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2256			*delegated = (nofp->nof_d_rw_dw != 0);
2257		else
2258			*delegated = 0;
2259	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
2260		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
2261			*delegated = (nofp->nof_d_r_drw != 0);
2262		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
2263			*delegated = (nofp->nof_d_w_drw != 0);
2264		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
2265			*delegated = (nofp->nof_d_rw_drw != 0);
2266		else
2267			*delegated = 0;
2268	}
2269}
2270
2271/*
2272 * Remove the open state for the given access/deny modes to this open file.
2273 */
2274void
2275nfs_open_file_remove_open(struct nfs_open_file *nofp, uint32_t accessMode, uint32_t denyMode)
2276{
2277	uint32_t newAccessMode, newDenyMode;
2278	int delegated = 0;
2279
2280	lck_mtx_lock(&nofp->nof_lock);
2281	nfs_open_file_remove_open_find(nofp, accessMode, denyMode, &newAccessMode, &newDenyMode, &delegated);
2282
2283	/* Decrement the corresponding open access/deny mode counter. */
2284	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
2285		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
2286			if (delegated) {
2287				if (nofp->nof_d_r == 0)
2288					NP(nofp->nof_np, "nfs: open(R) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2289				else
2290					nofp->nof_d_r--;
2291			} else {
2292				if (nofp->nof_r == 0)
2293					NP(nofp->nof_np, "nfs: open(R) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2294				else
2295					nofp->nof_r--;
2296			}
2297		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
2298			if (delegated) {
2299				if (nofp->nof_d_w == 0)
2300					NP(nofp->nof_np, "nfs: open(W) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2301				else
2302					nofp->nof_d_w--;
2303			} else {
2304				if (nofp->nof_w == 0)
2305					NP(nofp->nof_np, "nfs: open(W) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2306				else
2307					nofp->nof_w--;
2308			}
2309		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
2310			if (delegated) {
2311				if (nofp->nof_d_rw == 0)
2312					NP(nofp->nof_np, "nfs: open(RW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2313				else
2314					nofp->nof_d_rw--;
2315			} else {
2316				if (nofp->nof_rw == 0)
2317					NP(nofp->nof_np, "nfs: open(RW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2318				else
2319					nofp->nof_rw--;
2320			}
2321		}
2322	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
2323		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
2324			if (delegated) {
2325				if (nofp->nof_d_r_dw == 0)
2326					NP(nofp->nof_np, "nfs: open(R,DW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2327				else
2328					nofp->nof_d_r_dw--;
2329			} else {
2330				if (nofp->nof_r_dw == 0)
2331					NP(nofp->nof_np, "nfs: open(R,DW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2332				else
2333					nofp->nof_r_dw--;
2334			}
2335		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
2336			if (delegated) {
2337				if (nofp->nof_d_w_dw == 0)
2338					NP(nofp->nof_np, "nfs: open(W,DW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2339				else
2340					nofp->nof_d_w_dw--;
2341			} else {
2342				if (nofp->nof_w_dw == 0)
2343					NP(nofp->nof_np, "nfs: open(W,DW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2344				else
2345					nofp->nof_w_dw--;
2346			}
2347		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
2348			if (delegated) {
2349				if (nofp->nof_d_rw_dw == 0)
2350					NP(nofp->nof_np, "nfs: open(RW,DW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2351				else
2352					nofp->nof_d_rw_dw--;
2353			} else {
2354				if (nofp->nof_rw_dw == 0)
2355					NP(nofp->nof_np, "nfs: open(RW,DW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2356				else
2357					nofp->nof_rw_dw--;
2358			}
2359		}
2360	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
2361		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
2362			if (delegated) {
2363				if (nofp->nof_d_r_drw == 0)
2364					NP(nofp->nof_np, "nfs: open(R,DRW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2365				else
2366					nofp->nof_d_r_drw--;
2367			} else {
2368				if (nofp->nof_r_drw == 0)
2369					NP(nofp->nof_np, "nfs: open(R,DRW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2370				else
2371					nofp->nof_r_drw--;
2372			}
2373		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
2374			if (delegated) {
2375				if (nofp->nof_d_w_drw == 0)
2376					NP(nofp->nof_np, "nfs: open(W,DRW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2377				else
2378					nofp->nof_d_w_drw--;
2379			} else {
2380				if (nofp->nof_w_drw == 0)
2381					NP(nofp->nof_np, "nfs: open(W,DRW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2382				else
2383					nofp->nof_w_drw--;
2384			}
2385		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
2386			if (delegated) {
2387				if (nofp->nof_d_rw_drw == 0)
2388					NP(nofp->nof_np, "nfs: open(RW,DRW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2389				else
2390					nofp->nof_d_rw_drw--;
2391			} else {
2392				if (nofp->nof_rw_drw == 0)
2393					NP(nofp->nof_np, "nfs: open(RW,DRW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
2394				else
2395					nofp->nof_rw_drw--;
2396			}
2397		}
2398	}
2399
2400	/* update the modes */
2401	nofp->nof_access = newAccessMode;
2402	nofp->nof_deny = newDenyMode;
2403	nofp->nof_opencnt--;
2404	lck_mtx_unlock(&nofp->nof_lock);
2405}
2406
2407
2408/*
2409 * Get the current (delegation, lock, open, default) stateid for this node.
2410 * If node has a delegation, use that stateid.
2411 * If pid has a lock, use the lockowner's stateid.
2412 * Or use the open file's stateid.
2413 * If no open file, use a default stateid of all ones.
2414 */
2415void
2416nfs_get_stateid(nfsnode_t np, thread_t thd, kauth_cred_t cred, nfs_stateid *sid)
2417{
2418	struct nfsmount *nmp = NFSTONMP(np);
2419	proc_t p = thd ? get_bsdthreadtask_info(thd) : current_proc();  // XXX async I/O requests don't have a thread
2420	struct nfs_open_owner *noop = NULL;
2421	struct nfs_open_file *nofp = NULL;
2422	struct nfs_lock_owner *nlop = NULL;
2423	nfs_stateid *s = NULL;
2424
2425	if (np->n_openflags & N_DELEG_MASK) {
2426		s = &np->n_dstateid;
2427	} else {
2428		if (p)
2429			nlop = nfs_lock_owner_find(np, p, 0);
2430		if (nlop && !TAILQ_EMPTY(&nlop->nlo_locks)) {
2431			/* we hold locks, use lock stateid */
2432			s = &nlop->nlo_stateid;
2433		} else if (((noop = nfs_open_owner_find(nmp, cred, 0))) &&
2434			 (nfs_open_file_find(np, noop, &nofp, 0, 0, 0) == 0) &&
2435			 !(nofp->nof_flags & NFS_OPEN_FILE_LOST) &&
2436			 nofp->nof_access) {
2437			/* we (should) have the file open, use open stateid */
2438			if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)
2439				nfs4_reopen(nofp, thd);
2440			if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST))
2441				s = &nofp->nof_stateid;
2442		}
2443	}
2444
2445	if (s) {
2446		sid->seqid = s->seqid;
2447		sid->other[0] = s->other[0];
2448		sid->other[1] = s->other[1];
2449		sid->other[2] = s->other[2];
2450	} else {
2451		/* named attributes may not have a stateid for reads, so don't complain for them */
2452		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
2453			NP(np, "nfs_get_stateid: no stateid");
2454		sid->seqid = sid->other[0] = sid->other[1] = sid->other[2] = 0xffffffff;
2455	}
2456	if (nlop)
2457		nfs_lock_owner_rele(nlop);
2458	if (noop)
2459		nfs_open_owner_rele(noop);
2460}
2461
2462
2463/*
2464 * When we have a delegation, we may be able to perform the OPEN locally.
2465 * Perform the OPEN by checking the delegation ACE and/or checking via ACCESS.
2466 */
2467int
2468nfs4_open_delegated(
2469	nfsnode_t np,
2470	struct nfs_open_file *nofp,
2471	uint32_t accessMode,
2472	uint32_t denyMode,
2473	vfs_context_t ctx)
2474{
2475	int error = 0, ismember, readtoo = 0, authorized = 0;
2476	uint32_t action;
2477	struct kauth_acl_eval eval;
2478	kauth_cred_t cred = vfs_context_ucred(ctx);
2479
2480	if (!(accessMode & NFS_OPEN_SHARE_ACCESS_READ)) {
2481		/*
2482		 * Try to open it for read access too,
2483		 * so the buffer cache can read data.
2484		 */
2485		readtoo = 1;
2486		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
2487	}
2488
2489tryagain:
2490	action = 0;
2491	if (accessMode & NFS_OPEN_SHARE_ACCESS_READ)
2492		action |= KAUTH_VNODE_READ_DATA;
2493	if (accessMode & NFS_OPEN_SHARE_ACCESS_WRITE)
2494		action |= KAUTH_VNODE_WRITE_DATA;
2495
2496	/* evaluate ACE (if we have one) */
2497	if (np->n_dace.ace_flags) {
2498		eval.ae_requested = action;
2499		eval.ae_acl = &np->n_dace;
2500		eval.ae_count = 1;
2501		eval.ae_options = 0;
2502		if (np->n_vattr.nva_uid == kauth_cred_getuid(cred))
2503			eval.ae_options |= KAUTH_AEVAL_IS_OWNER;
2504		error = kauth_cred_ismember_gid(cred, np->n_vattr.nva_gid, &ismember);
2505		if (!error && ismember)
2506			eval.ae_options |= KAUTH_AEVAL_IN_GROUP;
2507
2508		eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS;
2509		eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS;
2510		eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS;
2511		eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS;
2512
2513		error = kauth_acl_evaluate(cred, &eval);
2514
2515		if (!error && (eval.ae_result == KAUTH_RESULT_ALLOW))
2516			authorized = 1;
2517	}
2518
2519	if (!authorized) {
2520		/* need to ask the server via ACCESS */
2521		struct vnop_access_args naa;
2522		naa.a_desc = &vnop_access_desc;
2523		naa.a_vp = NFSTOV(np);
2524		naa.a_action = action;
2525		naa.a_context = ctx;
2526		if (!(error = nfs_vnop_access(&naa)))
2527			authorized = 1;
2528	}
2529
2530	if (!authorized) {
2531		if (readtoo) {
2532			/* try again without the extra read access */
2533			accessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
2534			readtoo = 0;
2535			goto tryagain;
2536		}
2537		return (error ? error : EACCES);
2538	}
2539
2540	nfs_open_file_add_open(nofp, accessMode, denyMode, 1);
2541
2542	return (0);
2543}
2544
2545
2546/*
2547 * Open a file with the given access/deny modes.
2548 *
2549 * If we have a delegation, we may be able to handle the open locally.
2550 * Otherwise, we will always send the open RPC even if this open's mode is
2551 * a subset of all the existing opens.  This makes sure that we will always
2552 * be able to do a downgrade to any of the open modes.
2553 *
2554 * Note: local conflicts should have already been checked in nfs_open_file_find().
2555 */
2556int
2557nfs4_open(
2558	nfsnode_t np,
2559	struct nfs_open_file *nofp,
2560	uint32_t accessMode,
2561	uint32_t denyMode,
2562	vfs_context_t ctx)
2563{
2564	vnode_t vp = NFSTOV(np);
2565	vnode_t dvp = NULL;
2566	struct componentname cn;
2567	const char *vname = NULL;
2568	size_t namelen;
2569	char smallname[128];
2570	char *filename = NULL;
2571	int error = 0, readtoo = 0;
2572
2573	/*
2574	 * We can handle the OPEN ourselves if we have a delegation,
2575	 * unless it's a read delegation and the open is asking for
2576	 * either write access or deny read.  We also don't bother to
2577	 * use the delegation if it's being returned.
2578	 */
2579	if (np->n_openflags & N_DELEG_MASK) {
2580		if ((error = nfs_open_state_set_busy(np, vfs_context_thread(ctx))))
2581			return (error);
2582		if ((np->n_openflags & N_DELEG_MASK) && !(np->n_openflags & N_DELEG_RETURN) &&
2583		    (((np->n_openflags & N_DELEG_MASK) == N_DELEG_WRITE) ||
2584		     (!(accessMode & NFS_OPEN_SHARE_ACCESS_WRITE) && !(denyMode & NFS_OPEN_SHARE_DENY_READ)))) {
2585			error = nfs4_open_delegated(np, nofp, accessMode, denyMode, ctx);
2586			nfs_open_state_clear_busy(np);
2587			return (error);
2588		}
2589		nfs_open_state_clear_busy(np);
2590	}
2591
2592	/*
2593	 * [sigh] We can't trust VFS to get the parent right for named
2594	 * attribute nodes.  (It likes to reparent the nodes after we've
2595	 * created them.)  Luckily we can probably get the right parent
2596	 * from the n_parent we have stashed away.
2597	 */
2598	if ((np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) &&
2599	    (((dvp = np->n_parent)) && (error = vnode_get(dvp))))
2600		dvp = NULL;
2601	if (!dvp)
2602		dvp = vnode_getparent(vp);
2603	vname = vnode_getname(vp);
2604	if (!dvp || !vname) {
2605		if (!error)
2606			error = EIO;
2607		goto out;
2608	}
2609	filename = &smallname[0];
2610	namelen = snprintf(filename, sizeof(smallname), "%s", vname);
2611	if (namelen >= sizeof(smallname)) {
2612		MALLOC(filename, char *, namelen+1, M_TEMP, M_WAITOK);
2613		if (!filename) {
2614			error = ENOMEM;
2615			goto out;
2616		}
2617		snprintf(filename, namelen+1, "%s", vname);
2618	}
2619	bzero(&cn, sizeof(cn));
2620	cn.cn_nameptr = filename;
2621	cn.cn_namelen = namelen;
2622
2623	if (!(accessMode & NFS_OPEN_SHARE_ACCESS_READ)) {
2624		/*
2625		 * Try to open it for read access too,
2626		 * so the buffer cache can read data.
2627		 */
2628		readtoo = 1;
2629		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
2630	}
2631tryagain:
2632	error = nfs4_open_rpc(nofp, ctx, &cn, NULL, dvp, &vp, NFS_OPEN_NOCREATE, accessMode, denyMode);
2633	if (error) {
2634		if (!nfs_mount_state_error_should_restart(error) &&
2635		    (error != EINTR) && (error != ERESTART) && readtoo) {
2636			/* try again without the extra read access */
2637			accessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
2638			readtoo = 0;
2639			goto tryagain;
2640		}
2641		goto out;
2642	}
2643	nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
2644out:
2645	if (filename && (filename != &smallname[0]))
2646		FREE(filename, M_TEMP);
2647	if (vname)
2648		vnode_putname(vname);
2649	if (dvp != NULLVP)
2650		vnode_put(dvp);
2651	return (error);
2652}
2653
2654int
2655nfs_vnop_mmap(
2656	struct vnop_mmap_args /* {
2657		struct vnodeop_desc *a_desc;
2658		vnode_t a_vp;
2659		int a_fflags;
2660		vfs_context_t a_context;
2661	} */ *ap)
2662{
2663	vfs_context_t ctx = ap->a_context;
2664	vnode_t vp = ap->a_vp;
2665	nfsnode_t np = VTONFS(vp);
2666	int error = 0, accessMode, denyMode, delegated;
2667	struct nfsmount *nmp;
2668	struct nfs_open_owner *noop = NULL;
2669	struct nfs_open_file *nofp = NULL;
2670
2671	nmp = VTONMP(vp);
2672	if (!nmp)
2673		return (ENXIO);
2674
2675	if (!vnode_isreg(vp) || !(ap->a_fflags & (PROT_READ|PROT_WRITE)))
2676		return (EINVAL);
2677	if (np->n_flag & NREVOKE)
2678		return (EIO);
2679
2680	/*
2681	 * fflags contains some combination of: PROT_READ, PROT_WRITE
2682	 * Since it's not possible to mmap() without having the file open for reading,
2683	 * read access is always there (regardless if PROT_READ is not set).
2684	 */
2685	accessMode = NFS_OPEN_SHARE_ACCESS_READ;
2686	if (ap->a_fflags & PROT_WRITE)
2687		accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
2688	denyMode = NFS_OPEN_SHARE_DENY_NONE;
2689
2690	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
2691	if (!noop)
2692		return (ENOMEM);
2693
2694restart:
2695	error = nfs_mount_state_in_use_start(nmp, NULL);
2696	if (error) {
2697		nfs_open_owner_rele(noop);
2698		return (error);
2699	}
2700	if (np->n_flag & NREVOKE) {
2701		error = EIO;
2702		nfs_mount_state_in_use_end(nmp, 0);
2703		nfs_open_owner_rele(noop);
2704		return (error);
2705	}
2706
2707	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
2708	if (error || (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST))) {
2709		NP(np, "nfs_vnop_mmap: no open file for owner, error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
2710		error = EPERM;
2711	}
2712	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
2713		nfs_mount_state_in_use_end(nmp, 0);
2714		error = nfs4_reopen(nofp, NULL);
2715		nofp = NULL;
2716		if (!error)
2717			goto restart;
2718	}
2719	if (!error)
2720		error = nfs_open_file_set_busy(nofp, NULL);
2721	if (error) {
2722		nofp = NULL;
2723		goto out;
2724	}
2725
2726	/*
2727	 * The open reference for mmap must mirror an existing open because
2728	 * we may need to reclaim it after the file is closed.
2729	 * So grab another open count matching the accessMode passed in.
2730	 * If we already had an mmap open, prefer read/write without deny mode.
2731	 * This means we may have to drop the current mmap open first.
2732	 */
2733
2734	if (!nofp->nof_access) {
2735		if (accessMode != NFS_OPEN_SHARE_ACCESS_READ) {
2736			/* not asking for just read access -> fail */
2737			error = EPERM;
2738			goto out;
2739		}
2740		/* we don't have the file open, so open it for read access */
2741		if (nmp->nm_vers < NFS_VER4) {
2742			/* NFS v2/v3 opens are always allowed - so just add it. */
2743			nfs_open_file_add_open(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, 0);
2744			error = 0;
2745		} else {
2746			error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
2747		}
2748		if (!error)
2749			nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE;
2750		if (error)
2751			goto out;
2752	}
2753
2754	/* determine deny mode for open */
2755	if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
2756		if (nofp->nof_d_rw || nofp->nof_d_rw_dw || nofp->nof_d_rw_drw) {
2757			delegated = 1;
2758			if (nofp->nof_d_rw)
2759				denyMode = NFS_OPEN_SHARE_DENY_NONE;
2760			else if (nofp->nof_d_rw_dw)
2761				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
2762			else if (nofp->nof_d_rw_drw)
2763				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
2764		} else if (nofp->nof_rw || nofp->nof_rw_dw || nofp->nof_rw_drw) {
2765			delegated = 0;
2766			if (nofp->nof_rw)
2767				denyMode = NFS_OPEN_SHARE_DENY_NONE;
2768			else if (nofp->nof_rw_dw)
2769				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
2770			else if (nofp->nof_rw_drw)
2771				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
2772		} else {
2773			error = EPERM;
2774		}
2775	} else { /* NFS_OPEN_SHARE_ACCESS_READ */
2776		if (nofp->nof_d_r || nofp->nof_d_r_dw || nofp->nof_d_r_drw) {
2777			delegated = 1;
2778			if (nofp->nof_d_r)
2779				denyMode = NFS_OPEN_SHARE_DENY_NONE;
2780			else if (nofp->nof_d_r_dw)
2781				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
2782			else if (nofp->nof_d_r_drw)
2783				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
2784		} else if (nofp->nof_r || nofp->nof_r_dw || nofp->nof_r_drw) {
2785			delegated = 0;
2786			if (nofp->nof_r)
2787				denyMode = NFS_OPEN_SHARE_DENY_NONE;
2788			else if (nofp->nof_r_dw)
2789				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
2790			else if (nofp->nof_r_drw)
2791				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
2792		} else {
2793			error = EPERM;
2794		}
2795	}
2796	if (error) /* mmap mode without proper open mode */
2797		goto out;
2798
2799	/*
2800	 * If the existing mmap access is more than the new access OR the
2801	 * existing access is the same and the existing deny mode is less,
2802	 * then we'll stick with the existing mmap open mode.
2803	 */
2804	if ((nofp->nof_mmap_access > accessMode) ||
2805	    ((nofp->nof_mmap_access == accessMode) && (nofp->nof_mmap_deny <= denyMode)))
2806		goto out;
2807
2808	/* update mmap open mode */
2809	if (nofp->nof_mmap_access) {
2810		error = nfs_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx);
2811		if (error) {
2812			if (!nfs_mount_state_error_should_restart(error))
2813				NP(np, "nfs_vnop_mmap: close of previous mmap mode failed: %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
2814			NP(np, "nfs_vnop_mmap: update, close error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
2815			goto out;
2816		}
2817		nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
2818	}
2819
2820	nfs_open_file_add_open(nofp, accessMode, denyMode, delegated);
2821	nofp->nof_mmap_access = accessMode;
2822	nofp->nof_mmap_deny = denyMode;
2823
2824out:
2825	if (nofp)
2826		nfs_open_file_clear_busy(nofp);
2827	if (nfs_mount_state_in_use_end(nmp, error)) {
2828		nofp = NULL;
2829		goto restart;
2830	}
2831	if (noop)
2832		nfs_open_owner_rele(noop);
2833
2834	if (!error) {
2835		int ismapped = 0;
2836		nfs_node_lock_force(np);
2837		if ((np->n_flag & NISMAPPED) == 0) {
2838			np->n_flag |= NISMAPPED;
2839			ismapped = 1;
2840		}
2841		nfs_node_unlock(np);
2842		if (ismapped) {
2843			lck_mtx_lock(&nmp->nm_lock);
2844			nmp->nm_state &= ~NFSSTA_SQUISHY;
2845			nmp->nm_curdeadtimeout = nmp->nm_deadtimeout;
2846			if (nmp->nm_curdeadtimeout <= 0)
2847				nmp->nm_deadto_start = 0;
2848			nmp->nm_mappers++;
2849			lck_mtx_unlock(&nmp->nm_lock);
2850		}
2851	}
2852
2853	return (error);
2854}
2855
2856
2857int
2858nfs_vnop_mnomap(
2859	struct vnop_mnomap_args /* {
2860		struct vnodeop_desc *a_desc;
2861		vnode_t a_vp;
2862		vfs_context_t a_context;
2863	} */ *ap)
2864{
2865	vfs_context_t ctx = ap->a_context;
2866	vnode_t vp = ap->a_vp;
2867	nfsnode_t np = VTONFS(vp);
2868	struct nfsmount *nmp;
2869	struct nfs_open_file *nofp = NULL;
2870	off_t size;
2871	int error;
2872	int is_mapped_flag = 0;
2873
2874	nmp = VTONMP(vp);
2875	if (!nmp)
2876		return (ENXIO);
2877
2878	nfs_node_lock_force(np);
2879	if (np->n_flag & NISMAPPED) {
2880		is_mapped_flag = 1;
2881		np->n_flag &= ~NISMAPPED;
2882	}
2883	nfs_node_unlock(np);
2884	if (is_mapped_flag) {
2885		lck_mtx_lock(&nmp->nm_lock);
2886		if (nmp->nm_mappers)
2887			nmp->nm_mappers--;
2888		else
2889			NP(np, "nfs_vnop_mnomap: removing mmap reference from mount, but mount has no files mmapped");
2890		lck_mtx_unlock(&nmp->nm_lock);
2891	}
2892
2893	/* flush buffers/ubc before we drop the open (in case it's our last open) */
2894	nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
2895	if (UBCINFOEXISTS(vp) && (size = ubc_getsize(vp)))
2896		ubc_msync(vp, 0, size, NULL, UBC_PUSHALL | UBC_SYNC);
2897
2898	/* walk all open files and close all mmap opens */
2899loop:
2900	error = nfs_mount_state_in_use_start(nmp, NULL);
2901	if (error)
2902		return (error);
2903	lck_mtx_lock(&np->n_openlock);
2904	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
2905		if (!nofp->nof_mmap_access)
2906			continue;
2907		lck_mtx_unlock(&np->n_openlock);
2908		if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
2909			nfs_mount_state_in_use_end(nmp, 0);
2910			error = nfs4_reopen(nofp, NULL);
2911			if (!error)
2912				goto loop;
2913		}
2914		if (!error)
2915			error = nfs_open_file_set_busy(nofp, NULL);
2916		if (error) {
2917			lck_mtx_lock(&np->n_openlock);
2918			break;
2919		}
2920		if (nofp->nof_mmap_access) {
2921			error = nfs_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx);
2922			if (!nfs_mount_state_error_should_restart(error)) {
2923				if (error) /* not a state-operation-restarting error, so just clear the access */
2924					NP(np, "nfs_vnop_mnomap: close of mmap mode failed: %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
2925				nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
2926			}
2927			if (error)
2928				NP(np, "nfs_vnop_mnomap: error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
2929		}
2930		nfs_open_file_clear_busy(nofp);
2931		nfs_mount_state_in_use_end(nmp, error);
2932		goto loop;
2933	}
2934	lck_mtx_unlock(&np->n_openlock);
2935	nfs_mount_state_in_use_end(nmp, error);
2936	return (error);
2937}
2938
2939/*
2940 * Search a node's lock owner list for the owner for this process.
2941 * If not found and "alloc" is set, then allocate a new one.
2942 */
2943struct nfs_lock_owner *
2944nfs_lock_owner_find(nfsnode_t np, proc_t p, int alloc)
2945{
2946	pid_t pid = proc_pid(p);
2947	struct nfs_lock_owner *nlop, *newnlop = NULL;
2948
2949tryagain:
2950	lck_mtx_lock(&np->n_openlock);
2951	TAILQ_FOREACH(nlop, &np->n_lock_owners, nlo_link) {
2952		if (nlop->nlo_pid != pid)
2953			continue;
2954		if (timevalcmp(&nlop->nlo_pid_start, &p->p_start, ==))
2955			break;
2956		/* stale lock owner... reuse it if we can */
2957		if (nlop->nlo_refcnt) {
2958			TAILQ_REMOVE(&np->n_lock_owners, nlop, nlo_link);
2959			nlop->nlo_flags &= ~NFS_LOCK_OWNER_LINK;
2960			lck_mtx_unlock(&np->n_openlock);
2961			goto tryagain;
2962		}
2963		nlop->nlo_pid_start = p->p_start;
2964		nlop->nlo_seqid = 0;
2965		nlop->nlo_stategenid = 0;
2966		break;
2967	}
2968
2969	if (!nlop && !newnlop && alloc) {
2970		lck_mtx_unlock(&np->n_openlock);
2971		MALLOC(newnlop, struct nfs_lock_owner *, sizeof(struct nfs_lock_owner), M_TEMP, M_WAITOK);
2972		if (!newnlop)
2973			return (NULL);
2974		bzero(newnlop, sizeof(*newnlop));
2975		lck_mtx_init(&newnlop->nlo_lock, nfs_open_grp, LCK_ATTR_NULL);
2976		newnlop->nlo_pid = pid;
2977		newnlop->nlo_pid_start = p->p_start;
2978		newnlop->nlo_name = OSAddAtomic(1, &nfs_lock_owner_seqnum);
2979		TAILQ_INIT(&newnlop->nlo_locks);
2980		goto tryagain;
2981	}
2982	if (!nlop && newnlop) {
2983		newnlop->nlo_flags |= NFS_LOCK_OWNER_LINK;
2984		TAILQ_INSERT_HEAD(&np->n_lock_owners, newnlop, nlo_link);
2985		nlop = newnlop;
2986	}
2987	lck_mtx_unlock(&np->n_openlock);
2988
2989	if (newnlop && (nlop != newnlop))
2990		nfs_lock_owner_destroy(newnlop);
2991
2992	if (nlop)
2993		nfs_lock_owner_ref(nlop);
2994
2995	return (nlop);
2996}
2997
2998/*
2999 * destroy a lock owner that's no longer needed
3000 */
3001void
3002nfs_lock_owner_destroy(struct nfs_lock_owner *nlop)
3003{
3004	if (nlop->nlo_open_owner) {
3005		nfs_open_owner_rele(nlop->nlo_open_owner);
3006		nlop->nlo_open_owner = NULL;
3007	}
3008	lck_mtx_destroy(&nlop->nlo_lock, nfs_open_grp);
3009	FREE(nlop, M_TEMP);
3010}
3011
3012/*
3013 * acquire a reference count on a lock owner
3014 */
3015void
3016nfs_lock_owner_ref(struct nfs_lock_owner *nlop)
3017{
3018	lck_mtx_lock(&nlop->nlo_lock);
3019	nlop->nlo_refcnt++;
3020	lck_mtx_unlock(&nlop->nlo_lock);
3021}
3022
3023/*
3024 * drop a reference count on a lock owner and destroy it if
3025 * it is no longer referenced and no longer on the mount's list.
3026 */
3027void
3028nfs_lock_owner_rele(struct nfs_lock_owner *nlop)
3029{
3030	lck_mtx_lock(&nlop->nlo_lock);
3031	if (nlop->nlo_refcnt < 1)
3032		panic("nfs_lock_owner_rele: no refcnt");
3033	nlop->nlo_refcnt--;
3034	if (!nlop->nlo_refcnt && (nlop->nlo_flags & NFS_LOCK_OWNER_BUSY))
3035		panic("nfs_lock_owner_rele: busy");
3036	/* XXX we may potentially want to clean up idle/unused lock owner structures */
3037	if (nlop->nlo_refcnt || (nlop->nlo_flags & NFS_LOCK_OWNER_LINK)) {
3038		lck_mtx_unlock(&nlop->nlo_lock);
3039		return;
3040	}
3041	/* owner is no longer referenced or linked to mount, so destroy it */
3042	lck_mtx_unlock(&nlop->nlo_lock);
3043	nfs_lock_owner_destroy(nlop);
3044}
3045
3046/*
3047 * Mark a lock owner as busy because we are about to
3048 * start an operation that uses and updates lock owner state.
3049 */
3050int
3051nfs_lock_owner_set_busy(struct nfs_lock_owner *nlop, thread_t thd)
3052{
3053	struct nfsmount *nmp;
3054	struct timespec ts = {2, 0};
3055	int error = 0, slpflag;
3056
3057	nmp = nlop->nlo_open_owner->noo_mount;
3058	if (!nmp)
3059		return (ENXIO);
3060	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
3061
3062	lck_mtx_lock(&nlop->nlo_lock);
3063	while (nlop->nlo_flags & NFS_LOCK_OWNER_BUSY) {
3064		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
3065			break;
3066		nlop->nlo_flags |= NFS_LOCK_OWNER_WANT;
3067		msleep(nlop, &nlop->nlo_lock, slpflag, "nfs_lock_owner_set_busy", &ts);
3068		slpflag = 0;
3069	}
3070	if (!error)
3071		nlop->nlo_flags |= NFS_LOCK_OWNER_BUSY;
3072	lck_mtx_unlock(&nlop->nlo_lock);
3073
3074	return (error);
3075}
3076
3077/*
3078 * Clear the busy flag on a lock owner and wake up anyone waiting
3079 * to mark it busy.
3080 */
3081void
3082nfs_lock_owner_clear_busy(struct nfs_lock_owner *nlop)
3083{
3084	int wanted;
3085
3086	lck_mtx_lock(&nlop->nlo_lock);
3087	if (!(nlop->nlo_flags & NFS_LOCK_OWNER_BUSY))
3088		panic("nfs_lock_owner_clear_busy");
3089	wanted = (nlop->nlo_flags & NFS_LOCK_OWNER_WANT);
3090	nlop->nlo_flags &= ~(NFS_LOCK_OWNER_BUSY|NFS_LOCK_OWNER_WANT);
3091	lck_mtx_unlock(&nlop->nlo_lock);
3092	if (wanted)
3093		wakeup(nlop);
3094}
3095
3096/*
3097 * Insert a held lock into a lock owner's sorted list.
3098 * (flock locks are always inserted at the head the list)
3099 */
3100void
3101nfs_lock_owner_insert_held_lock(struct nfs_lock_owner *nlop, struct nfs_file_lock *newnflp)
3102{
3103	struct nfs_file_lock *nflp;
3104
3105	/* insert new lock in lock owner's held lock list */
3106	lck_mtx_lock(&nlop->nlo_lock);
3107	if ((newnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK) {
3108		TAILQ_INSERT_HEAD(&nlop->nlo_locks, newnflp, nfl_lolink);
3109	} else {
3110		TAILQ_FOREACH(nflp, &nlop->nlo_locks, nfl_lolink) {
3111			if (newnflp->nfl_start < nflp->nfl_start)
3112				break;
3113		}
3114		if (nflp)
3115			TAILQ_INSERT_BEFORE(nflp, newnflp, nfl_lolink);
3116		else
3117			TAILQ_INSERT_TAIL(&nlop->nlo_locks, newnflp, nfl_lolink);
3118	}
3119	lck_mtx_unlock(&nlop->nlo_lock);
3120}
3121
3122/*
3123 * Get a file lock structure for this lock owner.
3124 */
3125struct nfs_file_lock *
3126nfs_file_lock_alloc(struct nfs_lock_owner *nlop)
3127{
3128	struct nfs_file_lock *nflp = NULL;
3129
3130	lck_mtx_lock(&nlop->nlo_lock);
3131	if (!nlop->nlo_alock.nfl_owner) {
3132		nflp = &nlop->nlo_alock;
3133		nflp->nfl_owner = nlop;
3134	}
3135	lck_mtx_unlock(&nlop->nlo_lock);
3136	if (!nflp) {
3137		MALLOC(nflp, struct nfs_file_lock *, sizeof(struct nfs_file_lock), M_TEMP, M_WAITOK);
3138		if (!nflp)
3139			return (NULL);
3140		bzero(nflp, sizeof(*nflp));
3141		nflp->nfl_flags |= NFS_FILE_LOCK_ALLOC;
3142		nflp->nfl_owner = nlop;
3143	}
3144	nfs_lock_owner_ref(nlop);
3145	return (nflp);
3146}
3147
3148/*
3149 * destroy the given NFS file lock structure
3150 */
3151void
3152nfs_file_lock_destroy(struct nfs_file_lock *nflp)
3153{
3154	struct nfs_lock_owner *nlop = nflp->nfl_owner;
3155
3156	if (nflp->nfl_flags & NFS_FILE_LOCK_ALLOC) {
3157		nflp->nfl_owner = NULL;
3158		FREE(nflp, M_TEMP);
3159	} else {
3160		lck_mtx_lock(&nlop->nlo_lock);
3161		bzero(nflp, sizeof(nflp));
3162		lck_mtx_unlock(&nlop->nlo_lock);
3163	}
3164	nfs_lock_owner_rele(nlop);
3165}
3166
3167/*
3168 * Check if one file lock conflicts with another.
3169 * (nflp1 is the new lock.  nflp2 is the existing lock.)
3170 */
3171int
3172nfs_file_lock_conflict(struct nfs_file_lock *nflp1, struct nfs_file_lock *nflp2, int *willsplit)
3173{
3174	/* no conflict if lock is dead */
3175	if ((nflp1->nfl_flags & NFS_FILE_LOCK_DEAD) || (nflp2->nfl_flags & NFS_FILE_LOCK_DEAD))
3176		return (0);
3177	/* no conflict if it's ours - unless the lock style doesn't match */
3178	if ((nflp1->nfl_owner == nflp2->nfl_owner) &&
3179	    ((nflp1->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == (nflp2->nfl_flags & NFS_FILE_LOCK_STYLE_MASK))) {
3180		if (willsplit && (nflp1->nfl_type != nflp2->nfl_type) &&
3181		    (nflp1->nfl_start > nflp2->nfl_start) &&
3182		    (nflp1->nfl_end < nflp2->nfl_end))
3183			*willsplit = 1;
3184		return (0);
3185	}
3186	/* no conflict if ranges don't overlap */
3187	if ((nflp1->nfl_start > nflp2->nfl_end) || (nflp1->nfl_end < nflp2->nfl_start))
3188		return (0);
3189	/* no conflict if neither lock is exclusive */
3190	if ((nflp1->nfl_type != F_WRLCK) && (nflp2->nfl_type != F_WRLCK))
3191		return (0);
3192	/* conflict */
3193	return (1);
3194}
3195
3196/*
3197 * Send an NFSv4 LOCK RPC to the server.
3198 */
3199int
3200nfs4_setlock_rpc(
3201	nfsnode_t np,
3202	struct nfs_open_file *nofp,
3203	struct nfs_file_lock *nflp,
3204	int reclaim,
3205	int flags,
3206	thread_t thd,
3207	kauth_cred_t cred)
3208{
3209	struct nfs_lock_owner *nlop = nflp->nfl_owner;
3210	struct nfsmount *nmp;
3211	struct nfsm_chain nmreq, nmrep;
3212	uint64_t xid;
3213	uint32_t locktype;
3214	int error = 0, lockerror = ENOENT, newlocker, numops, status;
3215	struct nfsreq_secinfo_args si;
3216
3217	nmp = NFSTONMP(np);
3218	if (!nmp)
3219		return (ENXIO);
3220	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
3221		return (EINVAL);
3222
3223	newlocker = (nlop->nlo_stategenid != nmp->nm_stategenid);
3224	locktype = (nflp->nfl_flags & NFS_FILE_LOCK_WAIT) ?
3225			((nflp->nfl_type == F_WRLCK) ?
3226				NFS_LOCK_TYPE_WRITEW :
3227				NFS_LOCK_TYPE_READW) :
3228			((nflp->nfl_type == F_WRLCK) ?
3229				NFS_LOCK_TYPE_WRITE :
3230				NFS_LOCK_TYPE_READ);
3231	if (newlocker) {
3232		error = nfs_open_file_set_busy(nofp, thd);
3233		if (error)
3234			return (error);
3235		error = nfs_open_owner_set_busy(nofp->nof_owner, thd);
3236		if (error) {
3237			nfs_open_file_clear_busy(nofp);
3238			return (error);
3239		}
3240		if (!nlop->nlo_open_owner) {
3241			nfs_open_owner_ref(nofp->nof_owner);
3242			nlop->nlo_open_owner = nofp->nof_owner;
3243		}
3244	}
3245	error = nfs_lock_owner_set_busy(nlop, thd);
3246	if (error) {
3247		if (newlocker) {
3248			nfs_open_owner_clear_busy(nofp->nof_owner);
3249			nfs_open_file_clear_busy(nofp);
3250		}
3251		return (error);
3252	}
3253
3254	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
3255	nfsm_chain_null(&nmreq);
3256	nfsm_chain_null(&nmrep);
3257
3258	// PUTFH, GETATTR, LOCK
3259	numops = 3;
3260	nfsm_chain_build_alloc_init(error, &nmreq, 33 * NFSX_UNSIGNED);
3261	nfsm_chain_add_compound_header(error, &nmreq, "lock", numops);
3262	numops--;
3263	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
3264	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
3265	numops--;
3266	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
3267	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
3268	numops--;
3269	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCK);
3270	nfsm_chain_add_32(error, &nmreq, locktype);
3271	nfsm_chain_add_32(error, &nmreq, reclaim);
3272	nfsm_chain_add_64(error, &nmreq, nflp->nfl_start);
3273	nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(nflp->nfl_start, nflp->nfl_end));
3274	nfsm_chain_add_32(error, &nmreq, newlocker);
3275	if (newlocker) {
3276		nfsm_chain_add_32(error, &nmreq, nofp->nof_owner->noo_seqid);
3277		nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid);
3278		nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid);
3279		nfsm_chain_add_lock_owner4(error, &nmreq, nmp, nlop);
3280	} else {
3281		nfsm_chain_add_stateid(error, &nmreq, &nlop->nlo_stateid);
3282		nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid);
3283	}
3284	nfsm_chain_build_done(error, &nmreq);
3285	nfsm_assert(error, (numops == 0), EPROTO);
3286	nfsmout_if(error);
3287
3288	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
3289
3290	if ((lockerror = nfs_node_lock(np)))
3291		error = lockerror;
3292	nfsm_chain_skip_tag(error, &nmrep);
3293	nfsm_chain_get_32(error, &nmrep, numops);
3294	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
3295	nfsmout_if(error);
3296	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
3297	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
3298	nfsmout_if(error);
3299	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCK);
3300	nfs_owner_seqid_increment(newlocker ? nofp->nof_owner : NULL, nlop, error);
3301	nfsm_chain_get_stateid(error, &nmrep, &nlop->nlo_stateid);
3302
3303	/* Update the lock owner's stategenid once it appears the server has state for it. */
3304	/* We determine this by noting the request was successful (we got a stateid). */
3305	if (newlocker && !error)
3306		nlop->nlo_stategenid = nmp->nm_stategenid;
3307nfsmout:
3308	if (!lockerror)
3309		nfs_node_unlock(np);
3310	nfs_lock_owner_clear_busy(nlop);
3311	if (newlocker) {
3312		nfs_open_owner_clear_busy(nofp->nof_owner);
3313		nfs_open_file_clear_busy(nofp);
3314	}
3315	nfsm_chain_cleanup(&nmreq);
3316	nfsm_chain_cleanup(&nmrep);
3317	return (error);
3318}
3319
3320/*
3321 * Send an NFSv4 LOCKU RPC to the server.
3322 */
3323int
3324nfs4_unlock_rpc(
3325	nfsnode_t np,
3326	struct nfs_lock_owner *nlop,
3327	int type,
3328	uint64_t start,
3329	uint64_t end,
3330	int flags,
3331	thread_t thd,
3332	kauth_cred_t cred)
3333{
3334	struct nfsmount *nmp;
3335	struct nfsm_chain nmreq, nmrep;
3336	uint64_t xid;
3337	int error = 0, lockerror = ENOENT, numops, status;
3338	struct nfsreq_secinfo_args si;
3339
3340	nmp = NFSTONMP(np);
3341	if (!nmp)
3342		return (ENXIO);
3343	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
3344		return (EINVAL);
3345
3346	error = nfs_lock_owner_set_busy(nlop, NULL);
3347	if (error)
3348		return (error);
3349
3350	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
3351	nfsm_chain_null(&nmreq);
3352	nfsm_chain_null(&nmrep);
3353
3354	// PUTFH, GETATTR, LOCKU
3355	numops = 3;
3356	nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
3357	nfsm_chain_add_compound_header(error, &nmreq, "unlock", numops);
3358	numops--;
3359	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
3360	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
3361	numops--;
3362	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
3363	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
3364	numops--;
3365	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCKU);
3366	nfsm_chain_add_32(error, &nmreq, (type == F_WRLCK) ? NFS_LOCK_TYPE_WRITE : NFS_LOCK_TYPE_READ);
3367	nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid);
3368	nfsm_chain_add_stateid(error, &nmreq, &nlop->nlo_stateid);
3369	nfsm_chain_add_64(error, &nmreq, start);
3370	nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(start, end));
3371	nfsm_chain_build_done(error, &nmreq);
3372	nfsm_assert(error, (numops == 0), EPROTO);
3373	nfsmout_if(error);
3374
3375	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
3376
3377	if ((lockerror = nfs_node_lock(np)))
3378		error = lockerror;
3379	nfsm_chain_skip_tag(error, &nmrep);
3380	nfsm_chain_get_32(error, &nmrep, numops);
3381	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
3382	nfsmout_if(error);
3383	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
3384	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
3385	nfsmout_if(error);
3386	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCKU);
3387	nfs_owner_seqid_increment(NULL, nlop, error);
3388	nfsm_chain_get_stateid(error, &nmrep, &nlop->nlo_stateid);
3389nfsmout:
3390	if (!lockerror)
3391		nfs_node_unlock(np);
3392	nfs_lock_owner_clear_busy(nlop);
3393	nfsm_chain_cleanup(&nmreq);
3394	nfsm_chain_cleanup(&nmrep);
3395	return (error);
3396}
3397
3398/*
3399 * Send an NFSv4 LOCKT RPC to the server.
3400 */
3401int
3402nfs4_getlock_rpc(
3403	nfsnode_t np,
3404	struct nfs_lock_owner *nlop,
3405	struct flock *fl,
3406	uint64_t start,
3407	uint64_t end,
3408	vfs_context_t ctx)
3409{
3410	struct nfsmount *nmp;
3411	struct nfsm_chain nmreq, nmrep;
3412	uint64_t xid, val64 = 0;
3413	uint32_t val = 0;
3414	int error = 0, lockerror, numops, status;
3415	struct nfsreq_secinfo_args si;
3416
3417	nmp = NFSTONMP(np);
3418	if (!nmp)
3419		return (ENXIO);
3420	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
3421		return (EINVAL);
3422
3423	lockerror = ENOENT;
3424	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
3425	nfsm_chain_null(&nmreq);
3426	nfsm_chain_null(&nmrep);
3427
3428	// PUTFH, GETATTR, LOCKT
3429	numops = 3;
3430	nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED);
3431	nfsm_chain_add_compound_header(error, &nmreq, "locktest", numops);
3432	numops--;
3433	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
3434	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
3435	numops--;
3436	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
3437	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
3438	numops--;
3439	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCKT);
3440	nfsm_chain_add_32(error, &nmreq, (fl->l_type == F_WRLCK) ? NFS_LOCK_TYPE_WRITE : NFS_LOCK_TYPE_READ);
3441	nfsm_chain_add_64(error, &nmreq, start);
3442	nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(start, end));
3443	nfsm_chain_add_lock_owner4(error, &nmreq, nmp, nlop);
3444	nfsm_chain_build_done(error, &nmreq);
3445	nfsm_assert(error, (numops == 0), EPROTO);
3446	nfsmout_if(error);
3447
3448	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
3449
3450	if ((lockerror = nfs_node_lock(np)))
3451		error = lockerror;
3452	nfsm_chain_skip_tag(error, &nmrep);
3453	nfsm_chain_get_32(error, &nmrep, numops);
3454	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
3455	nfsmout_if(error);
3456	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
3457	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
3458	nfsmout_if(error);
3459	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCKT);
3460	if (error == NFSERR_DENIED) {
3461		error = 0;
3462		nfsm_chain_get_64(error, &nmrep, fl->l_start);
3463		nfsm_chain_get_64(error, &nmrep, val64);
3464		fl->l_len = (val64 == UINT64_MAX) ? 0 : val64;
3465		nfsm_chain_get_32(error, &nmrep, val);
3466		fl->l_type = (val == NFS_LOCK_TYPE_WRITE) ? F_WRLCK : F_RDLCK;
3467		fl->l_pid = 0;
3468		fl->l_whence = SEEK_SET;
3469	} else if (!error) {
3470		fl->l_type = F_UNLCK;
3471	}
3472nfsmout:
3473	if (!lockerror)
3474		nfs_node_unlock(np);
3475	nfsm_chain_cleanup(&nmreq);
3476	nfsm_chain_cleanup(&nmrep);
3477	return (error);
3478}
3479
3480
3481/*
3482 * Check for any conflicts with the given lock.
3483 *
3484 * Checking for a lock doesn't require the file to be opened.
3485 * So we skip all the open owner, open file, lock owner work
3486 * and just check for a conflicting lock.
3487 */
3488int
3489nfs_advlock_getlock(
3490	nfsnode_t np,
3491	struct nfs_lock_owner *nlop,
3492	struct flock *fl,
3493	uint64_t start,
3494	uint64_t end,
3495	vfs_context_t ctx)
3496{
3497	struct nfsmount *nmp;
3498	struct nfs_file_lock *nflp;
3499	int error = 0, answered = 0;
3500
3501	nmp = NFSTONMP(np);
3502	if (!nmp)
3503		return (ENXIO);
3504
3505restart:
3506	if ((error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx))))
3507		return (error);
3508
3509	lck_mtx_lock(&np->n_openlock);
3510	/* scan currently held locks for conflict */
3511	TAILQ_FOREACH(nflp, &np->n_locks, nfl_link) {
3512		if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
3513			continue;
3514		if ((start <= nflp->nfl_end) && (end >= nflp->nfl_start) &&
3515		    ((fl->l_type == F_WRLCK) || (nflp->nfl_type == F_WRLCK)))
3516			break;
3517	}
3518	if (nflp) {
3519		/* found a conflicting lock */
3520		fl->l_type = nflp->nfl_type;
3521		fl->l_pid = (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_FLOCK) ? -1 : nflp->nfl_owner->nlo_pid;
3522		fl->l_start = nflp->nfl_start;
3523		fl->l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
3524		fl->l_whence = SEEK_SET;
3525		answered = 1;
3526	} else if ((np->n_openflags & N_DELEG_WRITE) && !(np->n_openflags & N_DELEG_RETURN)) {
3527		/*
3528		 * If we have a write delegation, we know there can't be other
3529		 * locks on the server.  So the answer is no conflicting lock found.
3530		 */
3531		fl->l_type = F_UNLCK;
3532		answered = 1;
3533	}
3534	lck_mtx_unlock(&np->n_openlock);
3535	if (answered) {
3536		nfs_mount_state_in_use_end(nmp, 0);
3537		return (0);
3538	}
3539
3540	/* no conflict found locally, so ask the server */
3541	error = nmp->nm_funcs->nf_getlock_rpc(np, nlop, fl, start, end, ctx);
3542
3543	if (nfs_mount_state_in_use_end(nmp, error))
3544		goto restart;
3545	return (error);
3546}
3547
3548/*
3549 * Acquire a file lock for the given range.
3550 *
3551 * Add the lock (request) to the lock queue.
3552 * Scan the lock queue for any conflicting locks.
3553 * If a conflict is found, block or return an error.
3554 * Once end of queue is reached, send request to the server.
3555 * If the server grants the lock, scan the lock queue and
3556 * update any existing locks.  Then (optionally) scan the
3557 * queue again to coalesce any locks adjacent to the new one.
3558 */
3559int
3560nfs_advlock_setlock(
3561	nfsnode_t np,
3562	struct nfs_open_file *nofp,
3563	struct nfs_lock_owner *nlop,
3564	int op,
3565	uint64_t start,
3566	uint64_t end,
3567	int style,
3568	short type,
3569	vfs_context_t ctx)
3570{
3571	struct nfsmount *nmp;
3572	struct nfs_file_lock *newnflp, *nflp, *nflp2 = NULL, *nextnflp, *flocknflp = NULL;
3573	struct nfs_file_lock *coalnflp;
3574	int error = 0, error2, willsplit = 0, delay, slpflag, busy = 0, inuse = 0, restart, inqueue = 0;
3575	struct timespec ts = {1, 0};
3576
3577	nmp = NFSTONMP(np);
3578	if (!nmp)
3579		return (ENXIO);
3580	slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
3581
3582	if ((type != F_RDLCK) && (type != F_WRLCK))
3583		return (EINVAL);
3584
3585	/* allocate a new lock */
3586	newnflp = nfs_file_lock_alloc(nlop);
3587	if (!newnflp)
3588		return (ENOLCK);
3589	newnflp->nfl_start = start;
3590	newnflp->nfl_end = end;
3591	newnflp->nfl_type = type;
3592	if (op == F_SETLKW)
3593		newnflp->nfl_flags |= NFS_FILE_LOCK_WAIT;
3594	newnflp->nfl_flags |= style;
3595	newnflp->nfl_flags |= NFS_FILE_LOCK_BLOCKED;
3596
3597	if ((style == NFS_FILE_LOCK_STYLE_FLOCK) && (type == F_WRLCK)) {
3598		/*
3599		 * For exclusive flock-style locks, if we block waiting for the
3600		 * lock, we need to first release any currently held shared
3601		 * flock-style lock.  So, the first thing we do is check if we
3602		 * have a shared flock-style lock.
3603		 */
3604		nflp = TAILQ_FIRST(&nlop->nlo_locks);
3605		if (nflp && ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != NFS_FILE_LOCK_STYLE_FLOCK))
3606			nflp = NULL;
3607		if (nflp && (nflp->nfl_type != F_RDLCK))
3608			nflp = NULL;
3609		flocknflp = nflp;
3610	}
3611
3612restart:
3613	restart = 0;
3614	error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
3615	if (error)
3616		goto error_out;
3617	inuse = 1;
3618	if (np->n_flag & NREVOKE) {
3619		error = EIO;
3620		nfs_mount_state_in_use_end(nmp, 0);
3621		inuse = 0;
3622		goto error_out;
3623	}
3624	if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
3625		nfs_mount_state_in_use_end(nmp, 0);
3626		inuse = 0;
3627		error = nfs4_reopen(nofp, vfs_context_thread(ctx));
3628		if (error)
3629			goto error_out;
3630		goto restart;
3631	}
3632
3633	lck_mtx_lock(&np->n_openlock);
3634	if (!inqueue) {
3635		/* insert new lock at beginning of list */
3636		TAILQ_INSERT_HEAD(&np->n_locks, newnflp, nfl_link);
3637		inqueue = 1;
3638	}
3639
3640	/* scan current list of locks (held and pending) for conflicts */
3641	for (nflp = TAILQ_NEXT(newnflp, nfl_link); nflp; nflp = nextnflp) {
3642		nextnflp = TAILQ_NEXT(nflp, nfl_link);
3643		if (!nfs_file_lock_conflict(newnflp, nflp, &willsplit))
3644			continue;
3645		/* Conflict */
3646		if (!(newnflp->nfl_flags & NFS_FILE_LOCK_WAIT)) {
3647			error = EAGAIN;
3648			break;
3649		}
3650		/* Block until this lock is no longer held. */
3651		if (nflp->nfl_blockcnt == UINT_MAX) {
3652			error = ENOLCK;
3653			break;
3654		}
3655		nflp->nfl_blockcnt++;
3656		do {
3657			if (flocknflp) {
3658				/* release any currently held shared lock before sleeping */
3659				lck_mtx_unlock(&np->n_openlock);
3660				nfs_mount_state_in_use_end(nmp, 0);
3661				inuse = 0;
3662				error = nfs_advlock_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx);
3663				flocknflp = NULL;
3664				if (!error)
3665					error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
3666				if (error) {
3667					lck_mtx_lock(&np->n_openlock);
3668					break;
3669				}
3670				inuse = 1;
3671				lck_mtx_lock(&np->n_openlock);
3672				/* no need to block/sleep if the conflict is gone */
3673				if (!nfs_file_lock_conflict(newnflp, nflp, NULL))
3674					break;
3675			}
3676			msleep(nflp, &np->n_openlock, slpflag, "nfs_advlock_setlock_blocked", &ts);
3677			slpflag = 0;
3678			error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0);
3679			if (!error && (nmp->nm_state & NFSSTA_RECOVER)) {
3680				/* looks like we have a recover pending... restart */
3681				restart = 1;
3682				lck_mtx_unlock(&np->n_openlock);
3683				nfs_mount_state_in_use_end(nmp, 0);
3684				inuse = 0;
3685				lck_mtx_lock(&np->n_openlock);
3686				break;
3687			}
3688			if (!error && (np->n_flag & NREVOKE))
3689				error = EIO;
3690		} while (!error && nfs_file_lock_conflict(newnflp, nflp, NULL));
3691		nflp->nfl_blockcnt--;
3692		if ((nflp->nfl_flags & NFS_FILE_LOCK_DEAD) && !nflp->nfl_blockcnt) {
3693			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
3694			nfs_file_lock_destroy(nflp);
3695		}
3696		if (error || restart)
3697			break;
3698		/* We have released n_openlock and we can't trust that nextnflp is still valid. */
3699		/* So, start this lock-scanning loop over from where it started. */
3700		nextnflp = TAILQ_NEXT(newnflp, nfl_link);
3701	}
3702	lck_mtx_unlock(&np->n_openlock);
3703	if (restart)
3704		goto restart;
3705	if (error)
3706		goto error_out;
3707
3708	if (willsplit) {
3709		/*
3710		 * It looks like this operation is splitting a lock.
3711		 * We allocate a new lock now so we don't have to worry
3712		 * about the allocation failing after we've updated some state.
3713		 */
3714		nflp2 = nfs_file_lock_alloc(nlop);
3715		if (!nflp2) {
3716			error = ENOLCK;
3717			goto error_out;
3718		}
3719	}
3720
3721	/* once scan for local conflicts is clear, send request to server */
3722	if ((error = nfs_open_state_set_busy(np, vfs_context_thread(ctx))))
3723		goto error_out;
3724	busy = 1;
3725	delay = 0;
3726	do {
3727		/* do we have a delegation? (that we're not returning?) */
3728		if ((np->n_openflags & N_DELEG_MASK) && !(np->n_openflags & N_DELEG_RETURN)) {
3729			if (np->n_openflags & N_DELEG_WRITE) {
3730				/* with a write delegation, just take the lock delegated */
3731				newnflp->nfl_flags |= NFS_FILE_LOCK_DELEGATED;
3732				error = 0;
3733				/* make sure the lock owner knows its open owner */
3734				if (!nlop->nlo_open_owner) {
3735					nfs_open_owner_ref(nofp->nof_owner);
3736					nlop->nlo_open_owner = nofp->nof_owner;
3737				}
3738				break;
3739			} else {
3740				/*
3741				 * If we don't have any non-delegated opens but we do have
3742				 * delegated opens, then we need to first claim the delegated
3743				 * opens so that the lock request on the server can be associated
3744				 * with an open it knows about.
3745				 */
3746				if ((!nofp->nof_rw_drw && !nofp->nof_w_drw && !nofp->nof_r_drw &&
3747				     !nofp->nof_rw_dw && !nofp->nof_w_dw && !nofp->nof_r_dw &&
3748				     !nofp->nof_rw && !nofp->nof_w && !nofp->nof_r) &&
3749				    (nofp->nof_d_rw_drw || nofp->nof_d_w_drw || nofp->nof_d_r_drw ||
3750				     nofp->nof_d_rw_dw || nofp->nof_d_w_dw || nofp->nof_d_r_dw ||
3751				     nofp->nof_d_rw || nofp->nof_d_w || nofp->nof_d_r)) {
3752					error = nfs4_claim_delegated_state_for_open_file(nofp, 0);
3753					if (error)
3754						break;
3755				}
3756			}
3757		}
3758		if (np->n_flag & NREVOKE)
3759			error = EIO;
3760		if (!error)
3761			error = nmp->nm_funcs->nf_setlock_rpc(np, nofp, newnflp, 0, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
3762		if (!error || ((error != NFSERR_DENIED) && (error != NFSERR_GRACE)))
3763			break;
3764		/* request was denied due to either conflict or grace period */
3765		if ((error == NFSERR_DENIED) && !(newnflp->nfl_flags & NFS_FILE_LOCK_WAIT)) {
3766			error = EAGAIN;
3767			break;
3768		}
3769		if (flocknflp) {
3770			/* release any currently held shared lock before sleeping */
3771			nfs_open_state_clear_busy(np);
3772			busy = 0;
3773			nfs_mount_state_in_use_end(nmp, 0);
3774			inuse = 0;
3775			error2 = nfs_advlock_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx);
3776			flocknflp = NULL;
3777			if (!error2)
3778				error2 = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
3779			if (!error2) {
3780				inuse = 1;
3781				error2 = nfs_open_state_set_busy(np, vfs_context_thread(ctx));
3782			}
3783			if (error2) {
3784				error = error2;
3785				break;
3786			}
3787			busy = 1;
3788		}
3789		/*
3790		 * Wait a little bit and send the request again.
3791		 * Except for retries of blocked v2/v3 request where we've already waited a bit.
3792		 */
3793		if ((nmp->nm_vers >= NFS_VER4) || (error == NFSERR_GRACE)) {
3794			if (error == NFSERR_GRACE)
3795				delay = 4;
3796			if (delay < 4)
3797				delay++;
3798			tsleep(newnflp, slpflag, "nfs_advlock_setlock_delay", delay * (hz/2));
3799			slpflag = 0;
3800		}
3801		error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0);
3802		if (!error && (nmp->nm_state & NFSSTA_RECOVER)) {
3803			/* looks like we have a recover pending... restart */
3804			nfs_open_state_clear_busy(np);
3805			busy = 0;
3806			nfs_mount_state_in_use_end(nmp, 0);
3807			inuse = 0;
3808			goto restart;
3809		}
3810		if (!error && (np->n_flag & NREVOKE))
3811			error = EIO;
3812	} while (!error);
3813
3814error_out:
3815	if (nfs_mount_state_error_should_restart(error)) {
3816		/* looks like we need to restart this operation */
3817		if (busy) {
3818			nfs_open_state_clear_busy(np);
3819			busy = 0;
3820		}
3821		if (inuse) {
3822			nfs_mount_state_in_use_end(nmp, error);
3823			inuse = 0;
3824		}
3825		goto restart;
3826	}
3827	lck_mtx_lock(&np->n_openlock);
3828	newnflp->nfl_flags &= ~NFS_FILE_LOCK_BLOCKED;
3829	if (error) {
3830		newnflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
3831		if (newnflp->nfl_blockcnt) {
3832			/* wake up anyone blocked on this lock */
3833			wakeup(newnflp);
3834		} else {
3835			/* remove newnflp from lock list and destroy */
3836			if (inqueue)
3837				TAILQ_REMOVE(&np->n_locks, newnflp, nfl_link);
3838			nfs_file_lock_destroy(newnflp);
3839		}
3840		lck_mtx_unlock(&np->n_openlock);
3841		if (busy)
3842			nfs_open_state_clear_busy(np);
3843		if (inuse)
3844			nfs_mount_state_in_use_end(nmp, error);
3845		if (nflp2)
3846			nfs_file_lock_destroy(nflp2);
3847		return (error);
3848	}
3849
3850	/* server granted the lock */
3851
3852	/*
3853	 * Scan for locks to update.
3854	 *
3855	 * Locks completely covered are killed.
3856	 * At most two locks may need to be clipped.
3857	 * It's possible that a single lock may need to be split.
3858	 */
3859	TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
3860		if (nflp == newnflp)
3861			continue;
3862		if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
3863			continue;
3864		if (nflp->nfl_owner != nlop)
3865			continue;
3866		if ((newnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK))
3867			continue;
3868		if ((newnflp->nfl_start > nflp->nfl_end) || (newnflp->nfl_end < nflp->nfl_start))
3869			continue;
3870		/* here's one to update */
3871		if ((newnflp->nfl_start <= nflp->nfl_start) && (newnflp->nfl_end >= nflp->nfl_end)) {
3872			/* The entire lock is being replaced. */
3873			nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
3874			lck_mtx_lock(&nlop->nlo_lock);
3875			TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
3876			lck_mtx_unlock(&nlop->nlo_lock);
3877			/* lock will be destroyed below, if no waiters */
3878		} else if ((newnflp->nfl_start > nflp->nfl_start) && (newnflp->nfl_end < nflp->nfl_end)) {
3879			/* We're replacing a range in the middle of a lock. */
3880			/* The current lock will be split into two locks. */
3881			/* Update locks and insert new lock after current lock. */
3882			nflp2->nfl_flags |= (nflp->nfl_flags & (NFS_FILE_LOCK_STYLE_MASK|NFS_FILE_LOCK_DELEGATED));
3883			nflp2->nfl_type = nflp->nfl_type;
3884			nflp2->nfl_start = newnflp->nfl_end + 1;
3885			nflp2->nfl_end = nflp->nfl_end;
3886			nflp->nfl_end = newnflp->nfl_start - 1;
3887			TAILQ_INSERT_AFTER(&np->n_locks, nflp, nflp2, nfl_link);
3888			nfs_lock_owner_insert_held_lock(nlop, nflp2);
3889			nextnflp = nflp2;
3890			nflp2 = NULL;
3891		} else if (newnflp->nfl_start > nflp->nfl_start) {
3892			/* We're replacing the end of a lock. */
3893			nflp->nfl_end = newnflp->nfl_start - 1;
3894		} else if (newnflp->nfl_end < nflp->nfl_end) {
3895			/* We're replacing the start of a lock. */
3896			nflp->nfl_start = newnflp->nfl_end + 1;
3897		}
3898		if (nflp->nfl_blockcnt) {
3899			/* wake up anyone blocked on this lock */
3900			wakeup(nflp);
3901		} else if (nflp->nfl_flags & NFS_FILE_LOCK_DEAD) {
3902			/* remove nflp from lock list and destroy */
3903			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
3904			nfs_file_lock_destroy(nflp);
3905		}
3906	}
3907
3908	nfs_lock_owner_insert_held_lock(nlop, newnflp);
3909
3910	/*
3911	 * POSIX locks should be coalesced when possible.
3912	 */
3913	if ((style == NFS_FILE_LOCK_STYLE_POSIX) && (nofp->nof_flags & NFS_OPEN_FILE_POSIXLOCK)) {
3914		/*
3915		 * Walk through the lock queue and check each of our held locks with
3916		 * the previous and next locks in the lock owner's "held lock list".
3917		 * If the two locks can be coalesced, we merge the current lock into
3918		 * the other (previous or next) lock.  Merging this way makes sure that
3919		 * lock ranges are always merged forward in the lock queue.  This is
3920		 * important because anyone blocked on the lock being "merged away"
3921		 * will still need to block on that range and it will simply continue
3922		 * checking locks that are further down the list.
3923		 */
3924		TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
3925			if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
3926				continue;
3927			if (nflp->nfl_owner != nlop)
3928				continue;
3929			if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != NFS_FILE_LOCK_STYLE_POSIX)
3930				continue;
3931			if (((coalnflp = TAILQ_PREV(nflp, nfs_file_lock_queue, nfl_lolink))) &&
3932			    ((coalnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) &&
3933			    (coalnflp->nfl_type == nflp->nfl_type) &&
3934			    (coalnflp->nfl_end == (nflp->nfl_start - 1))) {
3935				coalnflp->nfl_end = nflp->nfl_end;
3936				nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
3937				lck_mtx_lock(&nlop->nlo_lock);
3938				TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
3939				lck_mtx_unlock(&nlop->nlo_lock);
3940			} else if (((coalnflp = TAILQ_NEXT(nflp, nfl_lolink))) &&
3941			    ((coalnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) &&
3942			    (coalnflp->nfl_type == nflp->nfl_type) &&
3943			    (coalnflp->nfl_start == (nflp->nfl_end + 1))) {
3944				coalnflp->nfl_start = nflp->nfl_start;
3945				nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
3946				lck_mtx_lock(&nlop->nlo_lock);
3947				TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
3948				lck_mtx_unlock(&nlop->nlo_lock);
3949			}
3950			if (!(nflp->nfl_flags & NFS_FILE_LOCK_DEAD))
3951				continue;
3952			if (nflp->nfl_blockcnt) {
3953				/* wake up anyone blocked on this lock */
3954				wakeup(nflp);
3955			} else {
3956				/* remove nflp from lock list and destroy */
3957				TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
3958				nfs_file_lock_destroy(nflp);
3959			}
3960		}
3961	}
3962
3963	lck_mtx_unlock(&np->n_openlock);
3964	nfs_open_state_clear_busy(np);
3965	nfs_mount_state_in_use_end(nmp, error);
3966
3967	if (nflp2)
3968		nfs_file_lock_destroy(nflp2);
3969	return (error);
3970}
3971
3972/*
3973 * Release all (same style) locks within the given range.
3974 */
3975int
3976nfs_advlock_unlock(
3977	nfsnode_t np,
3978	struct nfs_open_file *nofp,
3979	struct nfs_lock_owner *nlop,
3980	uint64_t start,
3981	uint64_t end,
3982	int style,
3983	vfs_context_t ctx)
3984{
3985	struct nfsmount *nmp;
3986	struct nfs_file_lock *nflp, *nextnflp, *newnflp = NULL;
3987	int error = 0, willsplit = 0, send_unlock_rpcs = 1;
3988
3989	nmp = NFSTONMP(np);
3990	if (!nmp)
3991		return (ENXIO);
3992
3993restart:
3994	if ((error = nfs_mount_state_in_use_start(nmp, NULL)))
3995		return (error);
3996	if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
3997		nfs_mount_state_in_use_end(nmp, 0);
3998		error = nfs4_reopen(nofp, NULL);
3999		if (error)
4000			return (error);
4001		goto restart;
4002	}
4003	if ((error = nfs_open_state_set_busy(np, NULL))) {
4004		nfs_mount_state_in_use_end(nmp, error);
4005		return (error);
4006	}
4007
4008	lck_mtx_lock(&np->n_openlock);
4009	if ((start > 0) && (end < UINT64_MAX) && !willsplit) {
4010		/*
4011		 * We may need to allocate a new lock if an existing lock gets split.
4012		 * So, we first scan the list to check for a split, and if there's
4013		 * going to be one, we'll allocate one now.
4014		 */
4015		TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
4016			if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
4017				continue;
4018			if (nflp->nfl_owner != nlop)
4019				continue;
4020			if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != style)
4021				continue;
4022			if ((start > nflp->nfl_end) || (end < nflp->nfl_start))
4023				continue;
4024			if ((start > nflp->nfl_start) && (end < nflp->nfl_end)) {
4025				willsplit = 1;
4026				break;
4027			}
4028		}
4029		if (willsplit) {
4030			lck_mtx_unlock(&np->n_openlock);
4031			nfs_open_state_clear_busy(np);
4032			nfs_mount_state_in_use_end(nmp, 0);
4033			newnflp = nfs_file_lock_alloc(nlop);
4034			if (!newnflp)
4035				return (ENOMEM);
4036			goto restart;
4037		}
4038	}
4039
4040	/*
4041	 * Free all of our locks in the given range.
4042	 *
4043	 * Note that this process requires sending requests to the server.
4044	 * Because of this, we will release the n_openlock while performing
4045	 * the unlock RPCs.  The N_OPENBUSY state keeps the state of *held*
4046	 * locks from changing underneath us.  However, other entries in the
4047	 * list may be removed.  So we need to be careful walking the list.
4048	 */
4049
4050	/*
4051	 * Don't unlock ranges that are held by other-style locks.
4052	 * If style is posix, don't send any unlock rpcs if flock is held.
4053	 * If we unlock an flock, don't send unlock rpcs for any posix-style
4054	 * ranges held - instead send unlocks for the ranges not held.
4055	 */
4056	if ((style == NFS_FILE_LOCK_STYLE_POSIX) &&
4057	    ((nflp = TAILQ_FIRST(&nlop->nlo_locks))) &&
4058	    ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK))
4059		send_unlock_rpcs = 0;
4060	if ((style == NFS_FILE_LOCK_STYLE_FLOCK) &&
4061	    ((nflp = TAILQ_FIRST(&nlop->nlo_locks))) &&
4062	    ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK) &&
4063	    ((nflp = TAILQ_NEXT(nflp, nfl_lolink))) &&
4064	    ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX)) {
4065		uint64_t s = 0;
4066		int type = TAILQ_FIRST(&nlop->nlo_locks)->nfl_type;
4067		int delegated = (TAILQ_FIRST(&nlop->nlo_locks)->nfl_flags & NFS_FILE_LOCK_DELEGATED);
4068		while (!delegated && nflp) {
4069			if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) {
4070				/* unlock the range preceding this lock */
4071				lck_mtx_unlock(&np->n_openlock);
4072				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, type, s, nflp->nfl_start-1, 0,
4073						vfs_context_thread(ctx), vfs_context_ucred(ctx));
4074				if (nfs_mount_state_error_should_restart(error)) {
4075					nfs_open_state_clear_busy(np);
4076					nfs_mount_state_in_use_end(nmp, error);
4077					goto restart;
4078				}
4079				lck_mtx_lock(&np->n_openlock);
4080				if (error)
4081					goto out;
4082				s = nflp->nfl_end+1;
4083			}
4084			nflp = TAILQ_NEXT(nflp, nfl_lolink);
4085		}
4086		if (!delegated) {
4087			lck_mtx_unlock(&np->n_openlock);
4088			error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, type, s, end, 0,
4089					vfs_context_thread(ctx), vfs_context_ucred(ctx));
4090			if (nfs_mount_state_error_should_restart(error)) {
4091				nfs_open_state_clear_busy(np);
4092				nfs_mount_state_in_use_end(nmp, error);
4093				goto restart;
4094			}
4095			lck_mtx_lock(&np->n_openlock);
4096			if (error)
4097				goto out;
4098		}
4099		send_unlock_rpcs = 0;
4100	}
4101
4102	TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
4103		if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
4104			continue;
4105		if (nflp->nfl_owner != nlop)
4106			continue;
4107		if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != style)
4108			continue;
4109		if ((start > nflp->nfl_end) || (end < nflp->nfl_start))
4110			continue;
4111		/* here's one to unlock */
4112		if ((start <= nflp->nfl_start) && (end >= nflp->nfl_end)) {
4113			/* The entire lock is being unlocked. */
4114			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
4115				lck_mtx_unlock(&np->n_openlock);
4116				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, nflp->nfl_end, 0,
4117						vfs_context_thread(ctx), vfs_context_ucred(ctx));
4118				if (nfs_mount_state_error_should_restart(error)) {
4119					nfs_open_state_clear_busy(np);
4120					nfs_mount_state_in_use_end(nmp, error);
4121					goto restart;
4122				}
4123				lck_mtx_lock(&np->n_openlock);
4124			}
4125			nextnflp = TAILQ_NEXT(nflp, nfl_link);
4126			if (error)
4127				break;
4128			nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
4129			lck_mtx_lock(&nlop->nlo_lock);
4130			TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
4131			lck_mtx_unlock(&nlop->nlo_lock);
4132			/* lock will be destroyed below, if no waiters */
4133		} else if ((start > nflp->nfl_start) && (end < nflp->nfl_end)) {
4134			/* We're unlocking a range in the middle of a lock. */
4135			/* The current lock will be split into two locks. */
4136			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
4137				lck_mtx_unlock(&np->n_openlock);
4138				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, start, end, 0,
4139						vfs_context_thread(ctx), vfs_context_ucred(ctx));
4140				if (nfs_mount_state_error_should_restart(error)) {
4141					nfs_open_state_clear_busy(np);
4142					nfs_mount_state_in_use_end(nmp, error);
4143					goto restart;
4144				}
4145				lck_mtx_lock(&np->n_openlock);
4146			}
4147			if (error)
4148				break;
4149			/* update locks and insert new lock after current lock */
4150			newnflp->nfl_flags |= (nflp->nfl_flags & (NFS_FILE_LOCK_STYLE_MASK|NFS_FILE_LOCK_DELEGATED));
4151			newnflp->nfl_type = nflp->nfl_type;
4152			newnflp->nfl_start = end + 1;
4153			newnflp->nfl_end = nflp->nfl_end;
4154			nflp->nfl_end = start - 1;
4155			TAILQ_INSERT_AFTER(&np->n_locks, nflp, newnflp, nfl_link);
4156			nfs_lock_owner_insert_held_lock(nlop, newnflp);
4157			nextnflp = newnflp;
4158			newnflp = NULL;
4159		} else if (start > nflp->nfl_start) {
4160			/* We're unlocking the end of a lock. */
4161			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
4162				lck_mtx_unlock(&np->n_openlock);
4163				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, start, nflp->nfl_end, 0,
4164						vfs_context_thread(ctx), vfs_context_ucred(ctx));
4165				if (nfs_mount_state_error_should_restart(error)) {
4166					nfs_open_state_clear_busy(np);
4167					nfs_mount_state_in_use_end(nmp, error);
4168					goto restart;
4169				}
4170				lck_mtx_lock(&np->n_openlock);
4171			}
4172			nextnflp = TAILQ_NEXT(nflp, nfl_link);
4173			if (error)
4174				break;
4175			nflp->nfl_end = start - 1;
4176		} else if (end < nflp->nfl_end) {
4177			/* We're unlocking the start of a lock. */
4178			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
4179				lck_mtx_unlock(&np->n_openlock);
4180				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, end, 0,
4181						vfs_context_thread(ctx), vfs_context_ucred(ctx));
4182				if (nfs_mount_state_error_should_restart(error)) {
4183					nfs_open_state_clear_busy(np);
4184					nfs_mount_state_in_use_end(nmp, error);
4185					goto restart;
4186				}
4187				lck_mtx_lock(&np->n_openlock);
4188			}
4189			nextnflp = TAILQ_NEXT(nflp, nfl_link);
4190			if (error)
4191				break;
4192			nflp->nfl_start = end + 1;
4193		}
4194		if (nflp->nfl_blockcnt) {
4195			/* wake up anyone blocked on this lock */
4196			wakeup(nflp);
4197		} else if (nflp->nfl_flags & NFS_FILE_LOCK_DEAD) {
4198			/* remove nflp from lock list and destroy */
4199			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
4200			nfs_file_lock_destroy(nflp);
4201		}
4202	}
4203out:
4204	lck_mtx_unlock(&np->n_openlock);
4205	nfs_open_state_clear_busy(np);
4206	nfs_mount_state_in_use_end(nmp, 0);
4207
4208	if (newnflp)
4209		nfs_file_lock_destroy(newnflp);
4210	return (error);
4211}
4212
4213/*
4214 * NFSv4 advisory file locking
4215 */
4216int
4217nfs_vnop_advlock(
4218	struct vnop_advlock_args /* {
4219		struct vnodeop_desc *a_desc;
4220		vnode_t a_vp;
4221		caddr_t a_id;
4222		int a_op;
4223		struct flock *a_fl;
4224		int a_flags;
4225		vfs_context_t a_context;
4226	} */ *ap)
4227{
4228	vnode_t vp = ap->a_vp;
4229	nfsnode_t np = VTONFS(ap->a_vp);
4230	struct flock *fl = ap->a_fl;
4231	int op = ap->a_op;
4232	int flags = ap->a_flags;
4233	vfs_context_t ctx = ap->a_context;
4234	struct nfsmount *nmp;
4235	struct nfs_open_owner *noop = NULL;
4236	struct nfs_open_file *nofp = NULL;
4237	struct nfs_lock_owner *nlop = NULL;
4238	off_t lstart;
4239	uint64_t start, end;
4240	int error = 0, modified, style;
4241	enum vtype vtype;
4242#define OFF_MAX QUAD_MAX
4243
4244	nmp = VTONMP(ap->a_vp);
4245	if (!nmp)
4246		return (ENXIO);
4247	lck_mtx_lock(&nmp->nm_lock);
4248	if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED)) {
4249		lck_mtx_unlock(&nmp->nm_lock);
4250		return (ENOTSUP);
4251	}
4252	lck_mtx_unlock(&nmp->nm_lock);
4253
4254	if (np->n_flag & NREVOKE)
4255		return (EIO);
4256	vtype = vnode_vtype(ap->a_vp);
4257	if (vtype == VDIR) /* ignore lock requests on directories */
4258		return (0);
4259	if (vtype != VREG) /* anything other than regular files is invalid */
4260		return (EINVAL);
4261
4262	/* Convert the flock structure into a start and end. */
4263	switch (fl->l_whence) {
4264	case SEEK_SET:
4265	case SEEK_CUR:
4266		/*
4267		 * Caller is responsible for adding any necessary offset
4268		 * to fl->l_start when SEEK_CUR is used.
4269		 */
4270		lstart = fl->l_start;
4271		break;
4272	case SEEK_END:
4273		/* need to flush, and refetch attributes to make */
4274		/* sure we have the correct end of file offset   */
4275		if ((error = nfs_node_lock(np)))
4276			return (error);
4277		modified = (np->n_flag & NMODIFIED);
4278		nfs_node_unlock(np);
4279		if (modified && ((error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1))))
4280			return (error);
4281		if ((error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED)))
4282			return (error);
4283		nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
4284		if ((np->n_size > OFF_MAX) ||
4285		    ((fl->l_start > 0) && (np->n_size > (u_quad_t)(OFF_MAX - fl->l_start))))
4286			error = EOVERFLOW;
4287		lstart = np->n_size + fl->l_start;
4288		nfs_data_unlock(np);
4289		if (error)
4290			return (error);
4291		break;
4292	default:
4293		return (EINVAL);
4294	}
4295	if (lstart < 0)
4296		return (EINVAL);
4297	start = lstart;
4298	if (fl->l_len == 0) {
4299		end = UINT64_MAX;
4300	} else if (fl->l_len > 0) {
4301		if ((fl->l_len - 1) > (OFF_MAX - lstart))
4302			return (EOVERFLOW);
4303		end = start - 1 + fl->l_len;
4304	} else { /* l_len is negative */
4305		if ((lstart + fl->l_len) < 0)
4306			return (EINVAL);
4307		end = start - 1;
4308		start += fl->l_len;
4309	}
4310	if ((nmp->nm_vers == NFS_VER2) && ((start > INT32_MAX) || (fl->l_len && (end > INT32_MAX))))
4311		return (EINVAL);
4312
4313	style = (flags & F_FLOCK) ? NFS_FILE_LOCK_STYLE_FLOCK : NFS_FILE_LOCK_STYLE_POSIX;
4314	if ((style == NFS_FILE_LOCK_STYLE_FLOCK) && ((start != 0) || (end != UINT64_MAX)))
4315		return (EINVAL);
4316
4317	/* find the lock owner, alloc if not unlock */
4318	nlop = nfs_lock_owner_find(np, vfs_context_proc(ctx), (op != F_UNLCK));
4319	if (!nlop) {
4320		error = (op == F_UNLCK) ? 0 : ENOMEM;
4321		if (error)
4322			NP(np, "nfs_vnop_advlock: no lock owner, error %d", error);
4323		goto out;
4324	}
4325
4326	if (op == F_GETLK) {
4327		error = nfs_advlock_getlock(np, nlop, fl, start, end, ctx);
4328	} else {
4329		/* find the open owner */
4330		noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
4331		if (!noop) {
4332			NP(np, "nfs_vnop_advlock: no open owner %d", kauth_cred_getuid(vfs_context_ucred(ctx)));
4333			error = EPERM;
4334			goto out;
4335		}
4336		/* find the open file */
4337restart:
4338		error = nfs_open_file_find(np, noop, &nofp, 0, 0, 0);
4339		if (error)
4340			error = EBADF;
4341		if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
4342			NP(np, "nfs_vnop_advlock: LOST %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
4343			error = EIO;
4344		}
4345		if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
4346			error = nfs4_reopen(nofp, ((op == F_UNLCK) ? NULL : vfs_context_thread(ctx)));
4347			nofp = NULL;
4348			if (!error)
4349				goto restart;
4350		}
4351		if (error) {
4352			NP(np, "nfs_vnop_advlock: no open file %d, %d", error, kauth_cred_getuid(noop->noo_cred));
4353			goto out;
4354		}
4355		if (op == F_UNLCK) {
4356			error = nfs_advlock_unlock(np, nofp, nlop, start, end, style, ctx);
4357		} else if ((op == F_SETLK) || (op == F_SETLKW)) {
4358			if ((op == F_SETLK) && (flags & F_WAIT))
4359				op = F_SETLKW;
4360			error = nfs_advlock_setlock(np, nofp, nlop, op, start, end, style, fl->l_type, ctx);
4361		} else {
4362			/* not getlk, unlock or lock? */
4363			error = EINVAL;
4364		}
4365	}
4366
4367out:
4368	if (nlop)
4369		nfs_lock_owner_rele(nlop);
4370	if (noop)
4371		nfs_open_owner_rele(noop);
4372	return (error);
4373}
4374
4375/*
4376 * Check if an open owner holds any locks on a file.
4377 */
4378int
4379nfs_check_for_locks(struct nfs_open_owner *noop, struct nfs_open_file *nofp)
4380{
4381	struct nfs_lock_owner *nlop;
4382
4383	TAILQ_FOREACH(nlop, &nofp->nof_np->n_lock_owners, nlo_link) {
4384		if (nlop->nlo_open_owner != noop)
4385			continue;
4386		if (!TAILQ_EMPTY(&nlop->nlo_locks))
4387			break;
4388	}
4389	return (nlop ? 1 : 0);
4390}
4391
4392/*
4393 * Reopen simple (no deny, no locks) open state that was lost.
4394 */
4395int
4396nfs4_reopen(struct nfs_open_file *nofp, thread_t thd)
4397{
4398	struct nfs_open_owner *noop = nofp->nof_owner;
4399	struct nfsmount *nmp = NFSTONMP(nofp->nof_np);
4400	nfsnode_t np = nofp->nof_np;
4401	vnode_t vp = NFSTOV(np);
4402	vnode_t dvp = NULL;
4403	struct componentname cn;
4404	const char *vname = NULL;
4405	const char *name = NULL;
4406	size_t namelen;
4407	char smallname[128];
4408	char *filename = NULL;
4409	int error = 0, done = 0, slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
4410	struct timespec ts = { 1, 0 };
4411
4412	lck_mtx_lock(&nofp->nof_lock);
4413	while (nofp->nof_flags & NFS_OPEN_FILE_REOPENING) {
4414		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
4415			break;
4416		msleep(&nofp->nof_flags, &nofp->nof_lock, slpflag|(PZERO-1), "nfsreopenwait", &ts);
4417		slpflag = 0;
4418	}
4419	if (error || !(nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
4420		lck_mtx_unlock(&nofp->nof_lock);
4421		return (error);
4422	}
4423	nofp->nof_flags |= NFS_OPEN_FILE_REOPENING;
4424	lck_mtx_unlock(&nofp->nof_lock);
4425
4426	nfs_node_lock_force(np);
4427	if ((vnode_vtype(vp) != VDIR) && np->n_sillyrename) {
4428		/*
4429		 * The node's been sillyrenamed, so we need to use
4430		 * the sillyrename directory/name to do the open.
4431		 */
4432		struct nfs_sillyrename *nsp = np->n_sillyrename;
4433		dvp = NFSTOV(nsp->nsr_dnp);
4434		if ((error = vnode_get(dvp))) {
4435			nfs_node_unlock(np);
4436			goto out;
4437		}
4438		name = nsp->nsr_name;
4439	} else {
4440		/*
4441		 * [sigh] We can't trust VFS to get the parent right for named
4442		 * attribute nodes.  (It likes to reparent the nodes after we've
4443		 * created them.)  Luckily we can probably get the right parent
4444		 * from the n_parent we have stashed away.
4445		 */
4446		if ((np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) &&
4447		    (((dvp = np->n_parent)) && (error = vnode_get(dvp))))
4448			dvp = NULL;
4449		if (!dvp)
4450			dvp = vnode_getparent(vp);
4451		vname = vnode_getname(vp);
4452		if (!dvp || !vname) {
4453			if (!error)
4454				error = EIO;
4455			nfs_node_unlock(np);
4456			goto out;
4457		}
4458		name = vname;
4459	}
4460	filename = &smallname[0];
4461	namelen = snprintf(filename, sizeof(smallname), "%s", name);
4462	if (namelen >= sizeof(smallname)) {
4463		MALLOC(filename, char *, namelen+1, M_TEMP, M_WAITOK);
4464		if (!filename) {
4465			error = ENOMEM;
4466			goto out;
4467		}
4468		snprintf(filename, namelen+1, "%s", name);
4469	}
4470	nfs_node_unlock(np);
4471	bzero(&cn, sizeof(cn));
4472	cn.cn_nameptr = filename;
4473	cn.cn_namelen = namelen;
4474
4475restart:
4476	done = 0;
4477	if ((error = nfs_mount_state_in_use_start(nmp, thd)))
4478		goto out;
4479
4480	if (nofp->nof_rw)
4481		error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE);
4482	if (!error && nofp->nof_w)
4483		error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE);
4484	if (!error && nofp->nof_r)
4485		error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE);
4486
4487	if (nfs_mount_state_in_use_end(nmp, error)) {
4488		if (error == NFSERR_GRACE)
4489			goto restart;
4490		printf("nfs4_reopen: RPC failed, error %d, lost %d, %s\n", error,
4491			(nofp->nof_flags & NFS_OPEN_FILE_LOST) ? 1 : 0, name ? name : "???");
4492		error = 0;
4493		goto out;
4494	}
4495	done = 1;
4496out:
4497	if (error && (error != EINTR) && (error != ERESTART))
4498		nfs_revoke_open_state_for_node(np);
4499	lck_mtx_lock(&nofp->nof_lock);
4500	nofp->nof_flags &= ~NFS_OPEN_FILE_REOPENING;
4501	if (done)
4502		nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN;
4503	else if (error)
4504		printf("nfs4_reopen: failed, error %d, lost %d, %s\n", error,
4505			(nofp->nof_flags & NFS_OPEN_FILE_LOST) ? 1 : 0, name ? name : "???");
4506	lck_mtx_unlock(&nofp->nof_lock);
4507	if (filename && (filename != &smallname[0]))
4508		FREE(filename, M_TEMP);
4509	if (vname)
4510		vnode_putname(vname);
4511	if (dvp != NULLVP)
4512		vnode_put(dvp);
4513	return (error);
4514}
4515
4516/*
4517 * Send a normal OPEN RPC to open/create a file.
4518 */
4519int
4520nfs4_open_rpc(
4521	struct nfs_open_file *nofp,
4522	vfs_context_t ctx,
4523	struct componentname *cnp,
4524	struct vnode_attr *vap,
4525	vnode_t dvp,
4526	vnode_t *vpp,
4527	int create,
4528	int share_access,
4529	int share_deny)
4530{
4531	return (nfs4_open_rpc_internal(nofp, ctx, vfs_context_thread(ctx), vfs_context_ucred(ctx),
4532					cnp, vap, dvp, vpp, create, share_access, share_deny));
4533}
4534
4535/*
4536 * Send an OPEN RPC to reopen a file.
4537 */
4538int
4539nfs4_open_reopen_rpc(
4540	struct nfs_open_file *nofp,
4541	thread_t thd,
4542	kauth_cred_t cred,
4543	struct componentname *cnp,
4544	vnode_t dvp,
4545	vnode_t *vpp,
4546	int share_access,
4547	int share_deny)
4548{
4549	return (nfs4_open_rpc_internal(nofp, NULL, thd, cred, cnp, NULL, dvp, vpp, NFS_OPEN_NOCREATE, share_access, share_deny));
4550}
4551
4552/*
4553 * Send an OPEN_CONFIRM RPC to confirm an OPEN.
4554 */
4555int
4556nfs4_open_confirm_rpc(
4557	struct nfsmount *nmp,
4558	nfsnode_t dnp,
4559	u_char *fhp,
4560	int fhlen,
4561	struct nfs_open_owner *noop,
4562	nfs_stateid *sid,
4563	thread_t thd,
4564	kauth_cred_t cred,
4565	struct nfs_vattr *nvap,
4566	uint64_t *xidp)
4567{
4568	struct nfsm_chain nmreq, nmrep;
4569	int error = 0, status, numops;
4570	struct nfsreq_secinfo_args si;
4571
4572	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
4573	nfsm_chain_null(&nmreq);
4574	nfsm_chain_null(&nmrep);
4575
4576	// PUTFH, OPEN_CONFIRM, GETATTR
4577	numops = 3;
4578	nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
4579	nfsm_chain_add_compound_header(error, &nmreq, "open_confirm", numops);
4580	numops--;
4581	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
4582	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen);
4583	numops--;
4584	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_CONFIRM);
4585	nfsm_chain_add_stateid(error, &nmreq, sid);
4586	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
4587	numops--;
4588	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
4589	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
4590	nfsm_chain_build_done(error, &nmreq);
4591	nfsm_assert(error, (numops == 0), EPROTO);
4592	nfsmout_if(error);
4593	error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, R_NOINTR, &nmrep, xidp, &status);
4594
4595	nfsm_chain_skip_tag(error, &nmrep);
4596	nfsm_chain_get_32(error, &nmrep, numops);
4597	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
4598	nfsmout_if(error);
4599	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_CONFIRM);
4600	nfs_owner_seqid_increment(noop, NULL, error);
4601	nfsm_chain_get_stateid(error, &nmrep, sid);
4602	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
4603	nfsmout_if(error);
4604	error = nfs4_parsefattr(&nmrep, NULL, nvap, NULL, NULL, NULL);
4605nfsmout:
4606	nfsm_chain_cleanup(&nmreq);
4607	nfsm_chain_cleanup(&nmrep);
4608	return (error);
4609}
4610
4611/*
4612 * common OPEN RPC code
4613 *
4614 * If create is set, ctx must be passed in.
4615 * Returns a node on success if no node passed in.
4616 */
4617int
4618nfs4_open_rpc_internal(
4619	struct nfs_open_file *nofp,
4620	vfs_context_t ctx,
4621	thread_t thd,
4622	kauth_cred_t cred,
4623	struct componentname *cnp,
4624	struct vnode_attr *vap,
4625	vnode_t dvp,
4626	vnode_t *vpp,
4627	int create,
4628	int share_access,
4629	int share_deny)
4630{
4631	struct nfsmount *nmp;
4632	struct nfs_open_owner *noop = nofp->nof_owner;
4633	struct nfs_vattr nvattr;
4634	int error = 0, open_error = EIO, lockerror = ENOENT, busyerror = ENOENT, status;
4635	int nfsvers, namedattrs, numops, exclusive = 0, gotuid, gotgid;
4636	u_int64_t xid, savedxid = 0;
4637	nfsnode_t dnp = VTONFS(dvp);
4638	nfsnode_t np, newnp = NULL;
4639	vnode_t newvp = NULL;
4640	struct nfsm_chain nmreq, nmrep;
4641	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
4642	uint32_t rflags, delegation, recall;
4643	struct nfs_stateid stateid, dstateid, *sid;
4644	fhandle_t fh;
4645	struct nfsreq rq, *req = &rq;
4646	struct nfs_dulookup dul;
4647	char sbuf[64], *s;
4648	uint32_t ace_type, ace_flags, ace_mask, len, slen;
4649	struct kauth_ace ace;
4650	struct nfsreq_secinfo_args si;
4651
4652	if (create && !ctx)
4653		return (EINVAL);
4654
4655	nmp = VTONMP(dvp);
4656	if (!nmp)
4657		return (ENXIO);
4658	nfsvers = nmp->nm_vers;
4659	namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
4660	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
4661		return (EINVAL);
4662
4663	np = *vpp ? VTONFS(*vpp) : NULL;
4664	if (create && vap) {
4665		exclusive = (vap->va_vaflags & VA_EXCLUSIVE);
4666		nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
4667		gotuid = VATTR_IS_ACTIVE(vap, va_uid);
4668		gotgid = VATTR_IS_ACTIVE(vap, va_gid);
4669		if (exclusive && (!VATTR_IS_ACTIVE(vap, va_access_time) || !VATTR_IS_ACTIVE(vap, va_modify_time)))
4670			vap->va_vaflags |= VA_UTIMES_NULL;
4671	} else {
4672		exclusive = gotuid = gotgid = 0;
4673	}
4674	if (nofp) {
4675		sid = &nofp->nof_stateid;
4676	} else {
4677		stateid.seqid = stateid.other[0] = stateid.other[1] = stateid.other[2] = 0;
4678		sid = &stateid;
4679	}
4680
4681	if ((error = nfs_open_owner_set_busy(noop, thd)))
4682		return (error);
4683again:
4684	rflags = delegation = recall = 0;
4685	ace.ace_flags = 0;
4686	s = sbuf;
4687	slen = sizeof(sbuf);
4688	NVATTR_INIT(&nvattr);
4689	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, cnp->cn_nameptr, cnp->cn_namelen);
4690
4691	nfsm_chain_null(&nmreq);
4692	nfsm_chain_null(&nmrep);
4693
4694	// PUTFH, SAVEFH, OPEN(CREATE?), GETATTR(FH), RESTOREFH, GETATTR
4695	numops = 6;
4696	nfsm_chain_build_alloc_init(error, &nmreq, 53 * NFSX_UNSIGNED + cnp->cn_namelen);
4697	nfsm_chain_add_compound_header(error, &nmreq, create ? "create" : "open", numops);
4698	numops--;
4699	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
4700	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4701	numops--;
4702	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
4703	numops--;
4704	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
4705	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
4706	nfsm_chain_add_32(error, &nmreq, share_access);
4707	nfsm_chain_add_32(error, &nmreq, share_deny);
4708	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid);
4709	nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
4710	nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred));
4711	nfsm_chain_add_32(error, &nmreq, create);
4712	if (create) {
4713		if (exclusive) {
4714			static uint32_t create_verf; // XXX need a better verifier
4715			create_verf++;
4716			nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE);
4717			/* insert 64 bit verifier */
4718			nfsm_chain_add_32(error, &nmreq, create_verf);
4719			nfsm_chain_add_32(error, &nmreq, create_verf);
4720		} else {
4721			nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED);
4722			nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
4723		}
4724	}
4725	nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_NULL);
4726	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
4727	numops--;
4728	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
4729	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
4730	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
4731	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
4732	numops--;
4733	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
4734	numops--;
4735	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
4736	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
4737	nfsm_chain_build_done(error, &nmreq);
4738	nfsm_assert(error, (numops == 0), EPROTO);
4739	if (!error)
4740		error = busyerror = nfs_node_set_busy(dnp, thd);
4741	nfsmout_if(error);
4742
4743	if (create && !namedattrs)
4744		nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
4745
4746	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, R_NOINTR, NULL, &req);
4747	if (!error) {
4748		if (create && !namedattrs)
4749			nfs_dulookup_start(&dul, dnp, ctx);
4750		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
4751		savedxid = xid;
4752	}
4753
4754	if (create && !namedattrs)
4755		nfs_dulookup_finish(&dul, dnp, ctx);
4756
4757	if ((lockerror = nfs_node_lock(dnp)))
4758		error = lockerror;
4759	nfsm_chain_skip_tag(error, &nmrep);
4760	nfsm_chain_get_32(error, &nmrep, numops);
4761	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
4762	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
4763	nfsmout_if(error);
4764	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
4765	nfs_owner_seqid_increment(noop, NULL, error);
4766	nfsm_chain_get_stateid(error, &nmrep, sid);
4767	nfsm_chain_check_change_info(error, &nmrep, dnp);
4768	nfsm_chain_get_32(error, &nmrep, rflags);
4769	bmlen = NFS_ATTR_BITMAP_LEN;
4770	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
4771	nfsm_chain_get_32(error, &nmrep, delegation);
4772	if (!error)
4773		switch (delegation) {
4774		case NFS_OPEN_DELEGATE_NONE:
4775			break;
4776		case NFS_OPEN_DELEGATE_READ:
4777		case NFS_OPEN_DELEGATE_WRITE:
4778			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
4779			nfsm_chain_get_32(error, &nmrep, recall);
4780			if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
4781				nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
4782			/* if we have any trouble accepting the ACE, just invalidate it */
4783			ace_type = ace_flags = ace_mask = len = 0;
4784			nfsm_chain_get_32(error, &nmrep, ace_type);
4785			nfsm_chain_get_32(error, &nmrep, ace_flags);
4786			nfsm_chain_get_32(error, &nmrep, ace_mask);
4787			nfsm_chain_get_32(error, &nmrep, len);
4788			ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
4789			ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
4790			ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
4791			if (!error && (len >= slen)) {
4792				MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
4793				if (s)
4794					slen = len+1;
4795				else
4796					ace.ace_flags = 0;
4797			}
4798			if (s)
4799				nfsm_chain_get_opaque(error, &nmrep, len, s);
4800			else
4801				nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
4802			if (!error && s) {
4803				s[len] = '\0';
4804				if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
4805					ace.ace_flags = 0;
4806			}
4807			if (error || !s)
4808				ace.ace_flags = 0;
4809			if (s && (s != sbuf))
4810				FREE(s, M_TEMP);
4811			break;
4812		default:
4813			error = EBADRPC;
4814			break;
4815		}
4816	/* At this point if we have no error, the object was created/opened. */
4817	open_error = error;
4818	nfsmout_if(error);
4819	if (create && vap && !exclusive)
4820		nfs_vattr_set_supported(bitmap, vap);
4821	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
4822	nfsmout_if(error);
4823	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
4824	nfsmout_if(error);
4825	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
4826		printf("nfs: open/create didn't return filehandle? %s\n", cnp->cn_nameptr);
4827		error = EBADRPC;
4828		goto nfsmout;
4829	}
4830	if (!create && np && !NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
4831		// XXX for the open case, what if fh doesn't match the vnode we think we're opening?
4832		// Solaris Named Attributes may do this due to a bug.... so don't warn for named attributes.
4833		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
4834			NP(np, "nfs4_open_rpc: warning: file handle mismatch");
4835	}
4836	/* directory attributes: if we don't get them, make sure to invalidate */
4837	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
4838	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
4839	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
4840	if (error)
4841		NATTRINVALIDATE(dnp);
4842	nfsmout_if(error);
4843
4844	if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
4845		nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
4846
4847	if (rflags & NFS_OPEN_RESULT_CONFIRM) {
4848		nfs_node_unlock(dnp);
4849		lockerror = ENOENT;
4850		NVATTR_CLEANUP(&nvattr);
4851		error = nfs4_open_confirm_rpc(nmp, dnp, fh.fh_data, fh.fh_len, noop, sid, thd, cred, &nvattr, &xid);
4852		nfsmout_if(error);
4853		savedxid = xid;
4854		if ((lockerror = nfs_node_lock(dnp)))
4855			error = lockerror;
4856	}
4857
4858nfsmout:
4859	nfsm_chain_cleanup(&nmreq);
4860	nfsm_chain_cleanup(&nmrep);
4861
4862	if (!lockerror && create) {
4863		if (!open_error && (dnp->n_flag & NNEGNCENTRIES)) {
4864			dnp->n_flag &= ~NNEGNCENTRIES;
4865			cache_purge_negatives(dvp);
4866		}
4867		dnp->n_flag |= NMODIFIED;
4868		nfs_node_unlock(dnp);
4869		lockerror = ENOENT;
4870		nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
4871	}
4872	if (!lockerror)
4873		nfs_node_unlock(dnp);
4874	if (!error && !np && fh.fh_len) {
4875		/* create the vnode with the filehandle and attributes */
4876		xid = savedxid;
4877		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &newnp);
4878		if (!error)
4879			newvp = NFSTOV(newnp);
4880	}
4881	NVATTR_CLEANUP(&nvattr);
4882	if (!busyerror)
4883		nfs_node_clear_busy(dnp);
4884	if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) {
4885		if (!np)
4886			np = newnp;
4887		if (!error && np && !recall) {
4888			/* stuff the delegation state in the node */
4889			lck_mtx_lock(&np->n_openlock);
4890			np->n_openflags &= ~N_DELEG_MASK;
4891			np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
4892			np->n_dstateid = dstateid;
4893			np->n_dace = ace;
4894			if (np->n_dlink.tqe_next == NFSNOLIST) {
4895				lck_mtx_lock(&nmp->nm_lock);
4896				if (np->n_dlink.tqe_next == NFSNOLIST)
4897					TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
4898				lck_mtx_unlock(&nmp->nm_lock);
4899			}
4900			lck_mtx_unlock(&np->n_openlock);
4901		} else {
4902			/* give the delegation back */
4903			if (np) {
4904				if (NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
4905					/* update delegation state and return it */
4906					lck_mtx_lock(&np->n_openlock);
4907					np->n_openflags &= ~N_DELEG_MASK;
4908					np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
4909					np->n_dstateid = dstateid;
4910					np->n_dace = ace;
4911					if (np->n_dlink.tqe_next == NFSNOLIST) {
4912						lck_mtx_lock(&nmp->nm_lock);
4913						if (np->n_dlink.tqe_next == NFSNOLIST)
4914							TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
4915						lck_mtx_unlock(&nmp->nm_lock);
4916					}
4917					lck_mtx_unlock(&np->n_openlock);
4918					/* don't need to send a separate delegreturn for fh */
4919					fh.fh_len = 0;
4920				}
4921				/* return np's current delegation */
4922				nfs4_delegation_return(np, 0, thd, cred);
4923			}
4924			if (fh.fh_len) /* return fh's delegation if it wasn't for np */
4925				nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, 0, thd, cred);
4926		}
4927	}
4928	if (error) {
4929		if (exclusive && (error == NFSERR_NOTSUPP)) {
4930			exclusive = 0;
4931			goto again;
4932		}
4933		if (newvp) {
4934			nfs_node_unlock(newnp);
4935			vnode_put(newvp);
4936		}
4937	} else if (create) {
4938		nfs_node_unlock(newnp);
4939		if (exclusive) {
4940			error = nfs4_setattr_rpc(newnp, vap, ctx);
4941			if (error && (gotuid || gotgid)) {
4942				/* it's possible the server didn't like our attempt to set IDs. */
4943				/* so, let's try it again without those */
4944				VATTR_CLEAR_ACTIVE(vap, va_uid);
4945				VATTR_CLEAR_ACTIVE(vap, va_gid);
4946				error = nfs4_setattr_rpc(newnp, vap, ctx);
4947			}
4948		}
4949		if (error)
4950			vnode_put(newvp);
4951		else
4952			*vpp = newvp;
4953	}
4954	nfs_open_owner_clear_busy(noop);
4955	return (error);
4956}
4957
4958
4959/*
4960 * Send an OPEN RPC to claim a delegated open for a file
4961 */
4962int
4963nfs4_claim_delegated_open_rpc(
4964	struct nfs_open_file *nofp,
4965	int share_access,
4966	int share_deny,
4967	int flags)
4968{
4969	struct nfsmount *nmp;
4970	struct nfs_open_owner *noop = nofp->nof_owner;
4971	struct nfs_vattr nvattr;
4972	int error = 0, lockerror = ENOENT, status;
4973	int nfsvers, numops;
4974	u_int64_t xid;
4975	nfsnode_t np = nofp->nof_np;
4976	struct nfsm_chain nmreq, nmrep;
4977	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
4978	uint32_t rflags = 0, delegation, recall = 0;
4979	fhandle_t fh;
4980	struct nfs_stateid dstateid;
4981	char sbuf[64], *s = sbuf;
4982	uint32_t ace_type, ace_flags, ace_mask, len, slen = sizeof(sbuf);
4983	struct kauth_ace ace;
4984	vnode_t dvp = NULL;
4985	const char *vname = NULL;
4986	const char *name = NULL;
4987	size_t namelen;
4988	char smallname[128];
4989	char *filename = NULL;
4990	struct nfsreq_secinfo_args si;
4991
4992	nmp = NFSTONMP(np);
4993	if (!nmp)
4994		return (ENXIO);
4995	nfsvers = nmp->nm_vers;
4996
4997	nfs_node_lock_force(np);
4998	if ((vnode_vtype(NFSTOV(np)) != VDIR) && np->n_sillyrename) {
4999		/*
5000		 * The node's been sillyrenamed, so we need to use
5001		 * the sillyrename directory/name to do the open.
5002		 */
5003		struct nfs_sillyrename *nsp = np->n_sillyrename;
5004		dvp = NFSTOV(nsp->nsr_dnp);
5005		if ((error = vnode_get(dvp))) {
5006			nfs_node_unlock(np);
5007			goto out;
5008		}
5009		name = nsp->nsr_name;
5010	} else {
5011		/*
5012		 * [sigh] We can't trust VFS to get the parent right for named
5013		 * attribute nodes.  (It likes to reparent the nodes after we've
5014		 * created them.)  Luckily we can probably get the right parent
5015		 * from the n_parent we have stashed away.
5016		 */
5017		if ((np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) &&
5018		    (((dvp = np->n_parent)) && (error = vnode_get(dvp))))
5019			dvp = NULL;
5020		if (!dvp)
5021			dvp = vnode_getparent(NFSTOV(np));
5022		vname = vnode_getname(NFSTOV(np));
5023		if (!dvp || !vname) {
5024			if (!error)
5025				error = EIO;
5026			nfs_node_unlock(np);
5027			goto out;
5028		}
5029		name = vname;
5030	}
5031	filename = &smallname[0];
5032	namelen = snprintf(filename, sizeof(smallname), "%s", name);
5033	if (namelen >= sizeof(smallname)) {
5034		MALLOC(filename, char *, namelen+1, M_TEMP, M_WAITOK);
5035		if (!filename) {
5036			error = ENOMEM;
5037			goto out;
5038		}
5039		snprintf(filename, namelen+1, "%s", name);
5040	}
5041	nfs_node_unlock(np);
5042
5043	if ((error = nfs_open_owner_set_busy(noop, NULL)))
5044		return (error);
5045
5046	NVATTR_INIT(&nvattr);
5047	delegation = NFS_OPEN_DELEGATE_NONE;
5048	dstateid = np->n_dstateid;
5049	NFSREQ_SECINFO_SET(&si, VTONFS(dvp), NULL, 0, filename, namelen);
5050
5051	nfsm_chain_null(&nmreq);
5052	nfsm_chain_null(&nmrep);
5053
5054	// PUTFH, OPEN, GETATTR(FH)
5055	numops = 3;
5056	nfsm_chain_build_alloc_init(error, &nmreq, 48 * NFSX_UNSIGNED);
5057	nfsm_chain_add_compound_header(error, &nmreq, "open_claim_d", numops);
5058	numops--;
5059	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
5060	nfsm_chain_add_fh(error, &nmreq, nfsvers, VTONFS(dvp)->n_fhp, VTONFS(dvp)->n_fhsize);
5061	numops--;
5062	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
5063	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
5064	nfsm_chain_add_32(error, &nmreq, share_access);
5065	nfsm_chain_add_32(error, &nmreq, share_deny);
5066	// open owner: clientid + uid
5067	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid
5068	nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
5069	nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred)); // open_owner4.owner
5070	// openflag4
5071	nfsm_chain_add_32(error, &nmreq, NFS_OPEN_NOCREATE);
5072	// open_claim4
5073	nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_DELEGATE_CUR);
5074	nfsm_chain_add_stateid(error, &nmreq, &np->n_dstateid);
5075	nfsm_chain_add_name(error, &nmreq, filename, namelen, nmp);
5076	numops--;
5077	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
5078	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
5079	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
5080	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
5081	nfsm_chain_build_done(error, &nmreq);
5082	nfsm_assert(error, (numops == 0), EPROTO);
5083	nfsmout_if(error);
5084
5085	error = nfs_request2(np, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, current_thread(),
5086			noop->noo_cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
5087
5088	if ((lockerror = nfs_node_lock(np)))
5089		error = lockerror;
5090	nfsm_chain_skip_tag(error, &nmrep);
5091	nfsm_chain_get_32(error, &nmrep, numops);
5092	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
5093	nfsmout_if(error);
5094	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
5095	nfs_owner_seqid_increment(noop, NULL, error);
5096	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
5097	nfsm_chain_check_change_info(error, &nmrep, np);
5098	nfsm_chain_get_32(error, &nmrep, rflags);
5099	bmlen = NFS_ATTR_BITMAP_LEN;
5100	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
5101	nfsm_chain_get_32(error, &nmrep, delegation);
5102	if (!error)
5103		switch (delegation) {
5104		case NFS_OPEN_DELEGATE_NONE:
5105			// if (!(np->n_openflags & N_DELEG_RETURN)) /* don't warn if delegation is being returned */
5106			// 	printf("nfs: open delegated claim didn't return a delegation %s\n", filename ? filename : "???");
5107			break;
5108		case NFS_OPEN_DELEGATE_READ:
5109		case NFS_OPEN_DELEGATE_WRITE:
5110			if ((((np->n_openflags & N_DELEG_MASK) == N_DELEG_READ) &&
5111			     (delegation == NFS_OPEN_DELEGATE_WRITE)) ||
5112			    (((np->n_openflags & N_DELEG_MASK) == N_DELEG_WRITE) &&
5113			     (delegation == NFS_OPEN_DELEGATE_READ)))
5114				printf("nfs: open delegated claim returned a different delegation type! have %s got %s %s\n",
5115				     ((np->n_openflags & N_DELEG_MASK) == N_DELEG_WRITE) ? "W" : "R",
5116				     (delegation == NFS_OPEN_DELEGATE_WRITE) ? "W" : "R", filename ? filename : "???");
5117			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
5118			nfsm_chain_get_32(error, &nmrep, recall);
5119			if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
5120				nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
5121			/* if we have any trouble accepting the ACE, just invalidate it */
5122			ace_type = ace_flags = ace_mask = len = 0;
5123			nfsm_chain_get_32(error, &nmrep, ace_type);
5124			nfsm_chain_get_32(error, &nmrep, ace_flags);
5125			nfsm_chain_get_32(error, &nmrep, ace_mask);
5126			nfsm_chain_get_32(error, &nmrep, len);
5127			ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
5128			ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
5129			ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
5130			if (!error && (len >= slen)) {
5131				MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
5132				if (s)
5133					slen = len+1;
5134				else
5135					ace.ace_flags = 0;
5136			}
5137			if (s)
5138				nfsm_chain_get_opaque(error, &nmrep, len, s);
5139			else
5140				nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
5141			if (!error && s) {
5142				s[len] = '\0';
5143				if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
5144					ace.ace_flags = 0;
5145			}
5146			if (error || !s)
5147				ace.ace_flags = 0;
5148			if (s && (s != sbuf))
5149				FREE(s, M_TEMP);
5150			if (!error) {
5151				/* stuff the latest delegation state in the node */
5152				lck_mtx_lock(&np->n_openlock);
5153				np->n_openflags &= ~N_DELEG_MASK;
5154				np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
5155				np->n_dstateid = dstateid;
5156				np->n_dace = ace;
5157				if (np->n_dlink.tqe_next == NFSNOLIST) {
5158					lck_mtx_lock(&nmp->nm_lock);
5159					if (np->n_dlink.tqe_next == NFSNOLIST)
5160						TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
5161					lck_mtx_unlock(&nmp->nm_lock);
5162				}
5163				lck_mtx_unlock(&np->n_openlock);
5164			}
5165			break;
5166		default:
5167			error = EBADRPC;
5168			break;
5169		}
5170	nfsmout_if(error);
5171	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
5172	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
5173	nfsmout_if(error);
5174	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
5175		printf("nfs: open reclaim didn't return filehandle? %s\n", filename ? filename : "???");
5176		error = EBADRPC;
5177		goto nfsmout;
5178	}
5179	if (!NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
5180		// XXX what if fh doesn't match the vnode we think we're re-opening?
5181		// Solaris Named Attributes may do this due to a bug.... so don't warn for named attributes.
5182		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
5183			printf("nfs4_claim_delegated_open_rpc: warning: file handle mismatch %s\n", filename ? filename : "???");
5184	}
5185	error = nfs_loadattrcache(np, &nvattr, &xid, 1);
5186	nfsmout_if(error);
5187	if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
5188		nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
5189nfsmout:
5190	NVATTR_CLEANUP(&nvattr);
5191	nfsm_chain_cleanup(&nmreq);
5192	nfsm_chain_cleanup(&nmrep);
5193	if (!lockerror)
5194		nfs_node_unlock(np);
5195	nfs_open_owner_clear_busy(noop);
5196	if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) {
5197		if (recall) {
5198			/*
5199			 * We're making a delegated claim.
5200			 * Don't return the delegation here in case we have more to claim.
5201			 * Just make sure it's queued up to be returned.
5202			 */
5203			nfs4_delegation_return_enqueue(np);
5204		}
5205	}
5206out:
5207	// if (!error)
5208	// 	printf("nfs: open claim delegated (%d, %d) succeeded for %s\n", share_access, share_deny, filename ? filename : "???");
5209	if (filename && (filename != &smallname[0]))
5210		FREE(filename, M_TEMP);
5211	if (vname)
5212		vnode_putname(vname);
5213	if (dvp != NULLVP)
5214		vnode_put(dvp);
5215	return (error);
5216}
5217
5218/*
5219 * Send an OPEN RPC to reclaim an open file.
5220 */
5221int
5222nfs4_open_reclaim_rpc(
5223	struct nfs_open_file *nofp,
5224	int share_access,
5225	int share_deny)
5226{
5227	struct nfsmount *nmp;
5228	struct nfs_open_owner *noop = nofp->nof_owner;
5229	struct nfs_vattr nvattr;
5230	int error = 0, lockerror = ENOENT, status;
5231	int nfsvers, numops;
5232	u_int64_t xid;
5233	nfsnode_t np = nofp->nof_np;
5234	struct nfsm_chain nmreq, nmrep;
5235	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
5236	uint32_t rflags = 0, delegation, recall = 0;
5237	fhandle_t fh;
5238	struct nfs_stateid dstateid;
5239	char sbuf[64], *s = sbuf;
5240	uint32_t ace_type, ace_flags, ace_mask, len, slen = sizeof(sbuf);
5241	struct kauth_ace ace;
5242	struct nfsreq_secinfo_args si;
5243
5244	nmp = NFSTONMP(np);
5245	if (!nmp)
5246		return (ENXIO);
5247	nfsvers = nmp->nm_vers;
5248
5249	if ((error = nfs_open_owner_set_busy(noop, NULL)))
5250		return (error);
5251
5252	NVATTR_INIT(&nvattr);
5253	delegation = NFS_OPEN_DELEGATE_NONE;
5254	dstateid = np->n_dstateid;
5255	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
5256
5257	nfsm_chain_null(&nmreq);
5258	nfsm_chain_null(&nmrep);
5259
5260	// PUTFH, OPEN, GETATTR(FH)
5261	numops = 3;
5262	nfsm_chain_build_alloc_init(error, &nmreq, 48 * NFSX_UNSIGNED);
5263	nfsm_chain_add_compound_header(error, &nmreq, "open_reclaim", numops);
5264	numops--;
5265	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
5266	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
5267	numops--;
5268	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
5269	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
5270	nfsm_chain_add_32(error, &nmreq, share_access);
5271	nfsm_chain_add_32(error, &nmreq, share_deny);
5272	// open owner: clientid + uid
5273	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid
5274	nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
5275	nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred)); // open_owner4.owner
5276	// openflag4
5277	nfsm_chain_add_32(error, &nmreq, NFS_OPEN_NOCREATE);
5278	// open_claim4
5279	nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_PREVIOUS);
5280	delegation = (np->n_openflags & N_DELEG_READ) ? NFS_OPEN_DELEGATE_READ :
5281			(np->n_openflags & N_DELEG_WRITE) ? NFS_OPEN_DELEGATE_WRITE :
5282			NFS_OPEN_DELEGATE_NONE;
5283	nfsm_chain_add_32(error, &nmreq, delegation);
5284	delegation = NFS_OPEN_DELEGATE_NONE;
5285	numops--;
5286	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
5287	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
5288	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
5289	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
5290	nfsm_chain_build_done(error, &nmreq);
5291	nfsm_assert(error, (numops == 0), EPROTO);
5292	nfsmout_if(error);
5293
5294	error = nfs_request2(np, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, current_thread(),
5295			noop->noo_cred, &si, R_RECOVER|R_NOINTR, &nmrep, &xid, &status);
5296
5297	if ((lockerror = nfs_node_lock(np)))
5298		error = lockerror;
5299	nfsm_chain_skip_tag(error, &nmrep);
5300	nfsm_chain_get_32(error, &nmrep, numops);
5301	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
5302	nfsmout_if(error);
5303	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
5304	nfs_owner_seqid_increment(noop, NULL, error);
5305	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
5306	nfsm_chain_check_change_info(error, &nmrep, np);
5307	nfsm_chain_get_32(error, &nmrep, rflags);
5308	bmlen = NFS_ATTR_BITMAP_LEN;
5309	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
5310	nfsm_chain_get_32(error, &nmrep, delegation);
5311	if (!error)
5312		switch (delegation) {
5313		case NFS_OPEN_DELEGATE_NONE:
5314			if (np->n_openflags & N_DELEG_MASK) {
5315				/*
5316				 * Hey!  We were supposed to get our delegation back even
5317				 * if it was getting immediately recalled.  Bad server!
5318				 *
5319				 * Just try to return the existing delegation.
5320				 */
5321				// NP(np, "nfs: open reclaim didn't return delegation?");
5322				delegation = (np->n_openflags & N_DELEG_WRITE) ? NFS_OPEN_DELEGATE_WRITE : NFS_OPEN_DELEGATE_READ;
5323				recall = 1;
5324			}
5325			break;
5326		case NFS_OPEN_DELEGATE_READ:
5327		case NFS_OPEN_DELEGATE_WRITE:
5328			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
5329			nfsm_chain_get_32(error, &nmrep, recall);
5330			if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
5331				nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
5332			/* if we have any trouble accepting the ACE, just invalidate it */
5333			ace_type = ace_flags = ace_mask = len = 0;
5334			nfsm_chain_get_32(error, &nmrep, ace_type);
5335			nfsm_chain_get_32(error, &nmrep, ace_flags);
5336			nfsm_chain_get_32(error, &nmrep, ace_mask);
5337			nfsm_chain_get_32(error, &nmrep, len);
5338			ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
5339			ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
5340			ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
5341			if (!error && (len >= slen)) {
5342				MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
5343				if (s)
5344					slen = len+1;
5345				else
5346					ace.ace_flags = 0;
5347			}
5348			if (s)
5349				nfsm_chain_get_opaque(error, &nmrep, len, s);
5350			else
5351				nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
5352			if (!error && s) {
5353				s[len] = '\0';
5354				if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
5355					ace.ace_flags = 0;
5356			}
5357			if (error || !s)
5358				ace.ace_flags = 0;
5359			if (s && (s != sbuf))
5360				FREE(s, M_TEMP);
5361			if (!error) {
5362				/* stuff the delegation state in the node */
5363				lck_mtx_lock(&np->n_openlock);
5364				np->n_openflags &= ~N_DELEG_MASK;
5365				np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
5366				np->n_dstateid = dstateid;
5367				np->n_dace = ace;
5368				if (np->n_dlink.tqe_next == NFSNOLIST) {
5369					lck_mtx_lock(&nmp->nm_lock);
5370					if (np->n_dlink.tqe_next == NFSNOLIST)
5371						TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
5372					lck_mtx_unlock(&nmp->nm_lock);
5373				}
5374				lck_mtx_unlock(&np->n_openlock);
5375			}
5376			break;
5377		default:
5378			error = EBADRPC;
5379			break;
5380		}
5381	nfsmout_if(error);
5382	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
5383	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
5384	nfsmout_if(error);
5385	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
5386		NP(np, "nfs: open reclaim didn't return filehandle?");
5387		error = EBADRPC;
5388		goto nfsmout;
5389	}
5390	if (!NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
5391		// XXX what if fh doesn't match the vnode we think we're re-opening?
5392		// That should be pretty hard in this case, given that we are doing
5393		// the open reclaim using the file handle (and not a dir/name pair).
5394		// Solaris Named Attributes may do this due to a bug.... so don't warn for named attributes.
5395		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
5396			NP(np, "nfs4_open_reclaim_rpc: warning: file handle mismatch");
5397	}
5398	error = nfs_loadattrcache(np, &nvattr, &xid, 1);
5399	nfsmout_if(error);
5400	if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
5401		nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
5402nfsmout:
5403	// if (!error)
5404	// 	NP(np, "nfs: open reclaim (%d, %d) succeeded", share_access, share_deny);
5405	NVATTR_CLEANUP(&nvattr);
5406	nfsm_chain_cleanup(&nmreq);
5407	nfsm_chain_cleanup(&nmrep);
5408	if (!lockerror)
5409		nfs_node_unlock(np);
5410	nfs_open_owner_clear_busy(noop);
5411	if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) {
5412		if (recall)
5413			nfs4_delegation_return_enqueue(np);
5414	}
5415	return (error);
5416}
5417
5418int
5419nfs4_open_downgrade_rpc(
5420	nfsnode_t np,
5421	struct nfs_open_file *nofp,
5422	vfs_context_t ctx)
5423{
5424	struct nfs_open_owner *noop = nofp->nof_owner;
5425	struct nfsmount *nmp;
5426	int error, lockerror = ENOENT, status, nfsvers, numops;
5427	struct nfsm_chain nmreq, nmrep;
5428	u_int64_t xid;
5429	struct nfsreq_secinfo_args si;
5430
5431	nmp = NFSTONMP(np);
5432	if (!nmp)
5433		return (ENXIO);
5434	nfsvers = nmp->nm_vers;
5435
5436	if ((error = nfs_open_owner_set_busy(noop, NULL)))
5437		return (error);
5438
5439	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
5440	nfsm_chain_null(&nmreq);
5441	nfsm_chain_null(&nmrep);
5442
5443	// PUTFH, OPEN_DOWNGRADE, GETATTR
5444	numops = 3;
5445	nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
5446	nfsm_chain_add_compound_header(error, &nmreq, "open_downgrd", numops);
5447	numops--;
5448	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
5449	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
5450	numops--;
5451	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_DOWNGRADE);
5452	nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid);
5453	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
5454	nfsm_chain_add_32(error, &nmreq, nofp->nof_access);
5455	nfsm_chain_add_32(error, &nmreq, nofp->nof_deny);
5456	numops--;
5457	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
5458	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
5459	nfsm_chain_build_done(error, &nmreq);
5460	nfsm_assert(error, (numops == 0), EPROTO);
5461	nfsmout_if(error);
5462	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND,
5463			vfs_context_thread(ctx), vfs_context_ucred(ctx),
5464			&si, R_NOINTR, &nmrep, &xid, &status);
5465
5466	if ((lockerror = nfs_node_lock(np)))
5467		error = lockerror;
5468	nfsm_chain_skip_tag(error, &nmrep);
5469	nfsm_chain_get_32(error, &nmrep, numops);
5470	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
5471	nfsmout_if(error);
5472	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_DOWNGRADE);
5473	nfs_owner_seqid_increment(noop, NULL, error);
5474	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
5475	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
5476	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
5477nfsmout:
5478	if (!lockerror)
5479		nfs_node_unlock(np);
5480	nfs_open_owner_clear_busy(noop);
5481	nfsm_chain_cleanup(&nmreq);
5482	nfsm_chain_cleanup(&nmrep);
5483	return (error);
5484}
5485
5486int
5487nfs4_close_rpc(
5488	nfsnode_t np,
5489	struct nfs_open_file *nofp,
5490	thread_t thd,
5491	kauth_cred_t cred,
5492	int flags)
5493{
5494	struct nfs_open_owner *noop = nofp->nof_owner;
5495	struct nfsmount *nmp;
5496	int error, lockerror = ENOENT, status, nfsvers, numops;
5497	struct nfsm_chain nmreq, nmrep;
5498	u_int64_t xid;
5499	struct nfsreq_secinfo_args si;
5500
5501	nmp = NFSTONMP(np);
5502	if (!nmp)
5503		return (ENXIO);
5504	nfsvers = nmp->nm_vers;
5505
5506	if ((error = nfs_open_owner_set_busy(noop, NULL)))
5507		return (error);
5508
5509	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
5510	nfsm_chain_null(&nmreq);
5511	nfsm_chain_null(&nmrep);
5512
5513	// PUTFH, CLOSE, GETATTR
5514	numops = 3;
5515	nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
5516	nfsm_chain_add_compound_header(error, &nmreq, "close", numops);
5517	numops--;
5518	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
5519	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
5520	numops--;
5521	nfsm_chain_add_32(error, &nmreq, NFS_OP_CLOSE);
5522	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
5523	nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid);
5524	numops--;
5525	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
5526	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
5527	nfsm_chain_build_done(error, &nmreq);
5528	nfsm_assert(error, (numops == 0), EPROTO);
5529	nfsmout_if(error);
5530	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
5531
5532	if ((lockerror = nfs_node_lock(np)))
5533		error = lockerror;
5534	nfsm_chain_skip_tag(error, &nmrep);
5535	nfsm_chain_get_32(error, &nmrep, numops);
5536	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
5537	nfsmout_if(error);
5538	nfsm_chain_op_check(error, &nmrep, NFS_OP_CLOSE);
5539	nfs_owner_seqid_increment(noop, NULL, error);
5540	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
5541	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
5542	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
5543nfsmout:
5544	if (!lockerror)
5545		nfs_node_unlock(np);
5546	nfs_open_owner_clear_busy(noop);
5547	nfsm_chain_cleanup(&nmreq);
5548	nfsm_chain_cleanup(&nmrep);
5549	return (error);
5550}
5551
5552
5553/*
5554 * Claim the delegated open combinations this open file holds.
5555 */
5556int
5557nfs4_claim_delegated_state_for_open_file(struct nfs_open_file *nofp, int flags)
5558{
5559	struct nfs_open_owner *noop = nofp->nof_owner;
5560	struct nfs_lock_owner *nlop;
5561	struct nfs_file_lock *nflp, *nextnflp;
5562	struct nfsmount *nmp;
5563	int error = 0, reopen = 0;
5564
5565	if (nofp->nof_d_rw_drw) {
5566		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_BOTH, flags);
5567		if (!error) {
5568			lck_mtx_lock(&nofp->nof_lock);
5569			nofp->nof_rw_drw += nofp->nof_d_rw_drw;
5570			nofp->nof_d_rw_drw = 0;
5571			lck_mtx_unlock(&nofp->nof_lock);
5572		}
5573	}
5574	if (!error && nofp->nof_d_w_drw) {
5575		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_BOTH, flags);
5576		if (!error) {
5577			lck_mtx_lock(&nofp->nof_lock);
5578			nofp->nof_w_drw += nofp->nof_d_w_drw;
5579			nofp->nof_d_w_drw = 0;
5580			lck_mtx_unlock(&nofp->nof_lock);
5581		}
5582	}
5583	if (!error && nofp->nof_d_r_drw) {
5584		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_BOTH, flags);
5585		if (!error) {
5586			lck_mtx_lock(&nofp->nof_lock);
5587			nofp->nof_r_drw += nofp->nof_d_r_drw;
5588			nofp->nof_d_r_drw = 0;
5589			lck_mtx_unlock(&nofp->nof_lock);
5590		}
5591	}
5592	if (!error && nofp->nof_d_rw_dw) {
5593		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_WRITE, flags);
5594		if (!error) {
5595			lck_mtx_lock(&nofp->nof_lock);
5596			nofp->nof_rw_dw += nofp->nof_d_rw_dw;
5597			nofp->nof_d_rw_dw = 0;
5598			lck_mtx_unlock(&nofp->nof_lock);
5599		}
5600	}
5601	if (!error && nofp->nof_d_w_dw) {
5602		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_WRITE, flags);
5603		if (!error) {
5604			lck_mtx_lock(&nofp->nof_lock);
5605			nofp->nof_w_dw += nofp->nof_d_w_dw;
5606			nofp->nof_d_w_dw = 0;
5607			lck_mtx_unlock(&nofp->nof_lock);
5608		}
5609	}
5610	if (!error && nofp->nof_d_r_dw) {
5611		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_WRITE, flags);
5612		if (!error) {
5613			lck_mtx_lock(&nofp->nof_lock);
5614			nofp->nof_r_dw += nofp->nof_d_r_dw;
5615			nofp->nof_d_r_dw = 0;
5616			lck_mtx_unlock(&nofp->nof_lock);
5617		}
5618	}
5619	/* non-deny-mode opens may be reopened if no locks are held */
5620	if (!error && nofp->nof_d_rw) {
5621		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, flags);
5622		/* for some errors, we should just try reopening the file */
5623		if (nfs_mount_state_error_delegation_lost(error))
5624			reopen = error;
5625		if (!error || reopen) {
5626			lck_mtx_lock(&nofp->nof_lock);
5627			nofp->nof_rw += nofp->nof_d_rw;
5628			nofp->nof_d_rw = 0;
5629			lck_mtx_unlock(&nofp->nof_lock);
5630		}
5631	}
5632	/* if we've already set reopen, we should move these other two opens from delegated to not delegated */
5633	if ((!error || reopen) && nofp->nof_d_w) {
5634		if (!error) {
5635			error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, flags);
5636			/* for some errors, we should just try reopening the file */
5637			if (nfs_mount_state_error_delegation_lost(error))
5638				reopen = error;
5639		}
5640		if (!error || reopen) {
5641			lck_mtx_lock(&nofp->nof_lock);
5642			nofp->nof_w += nofp->nof_d_w;
5643			nofp->nof_d_w = 0;
5644			lck_mtx_unlock(&nofp->nof_lock);
5645		}
5646	}
5647	if ((!error || reopen) && nofp->nof_d_r) {
5648		if (!error) {
5649			error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, flags);
5650			/* for some errors, we should just try reopening the file */
5651			if (nfs_mount_state_error_delegation_lost(error))
5652				reopen = error;
5653		}
5654		if (!error || reopen) {
5655			lck_mtx_lock(&nofp->nof_lock);
5656			nofp->nof_r += nofp->nof_d_r;
5657			nofp->nof_d_r = 0;
5658			lck_mtx_unlock(&nofp->nof_lock);
5659		}
5660	}
5661
5662	if (reopen) {
5663		/*
5664		 * Any problems with the delegation probably indicates that we
5665		 * should review/return all of our current delegation state.
5666		 */
5667		if ((nmp = NFSTONMP(nofp->nof_np))) {
5668			nfs4_delegation_return_enqueue(nofp->nof_np);
5669			lck_mtx_lock(&nmp->nm_lock);
5670			nfs_need_recover(nmp, NFSERR_EXPIRED);
5671			lck_mtx_unlock(&nmp->nm_lock);
5672		}
5673		if (reopen && (nfs_check_for_locks(noop, nofp) == 0)) {
5674			/* just reopen the file on next access */
5675			NP(nofp->nof_np, "nfs4_claim_delegated_state_for_open_file: %d, need reopen, %d",
5676				reopen, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5677			lck_mtx_lock(&nofp->nof_lock);
5678			nofp->nof_flags |= NFS_OPEN_FILE_REOPEN;
5679			lck_mtx_unlock(&nofp->nof_lock);
5680			return (0);
5681		}
5682		if (reopen)
5683			NP(nofp->nof_np, "nfs4_claim_delegated_state_for_open_file: %d, locks prevent reopen, %d",
5684				reopen, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5685	}
5686
5687	if (!error && ((nmp = NFSTONMP(nofp->nof_np)))) {
5688		/* claim delegated locks */
5689		TAILQ_FOREACH(nlop, &nofp->nof_np->n_lock_owners, nlo_link) {
5690			if (nlop->nlo_open_owner != noop)
5691				continue;
5692			TAILQ_FOREACH_SAFE(nflp, &nlop->nlo_locks, nfl_lolink, nextnflp) {
5693				/* skip dead & blocked lock requests (shouldn't be any in the held lock list) */
5694				if (nflp->nfl_flags & (NFS_FILE_LOCK_DEAD|NFS_FILE_LOCK_BLOCKED))
5695					continue;
5696				/* skip non-delegated locks */
5697				if (!(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED))
5698					continue;
5699				error = nmp->nm_funcs->nf_setlock_rpc(nofp->nof_np, nofp, nflp, 0, flags, current_thread(), noop->noo_cred);
5700				if (error) {
5701					NP(nofp->nof_np, "nfs: delegated lock claim (0x%llx, 0x%llx) failed %d, %d",
5702						nflp->nfl_start, nflp->nfl_end, error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5703					break;
5704				}
5705				// else {
5706				// 	NP(nofp->nof_np, "nfs: delegated lock claim (0x%llx, 0x%llx) succeeded, %d",
5707				// 		nflp->nfl_start, nflp->nfl_end, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5708				// }
5709			}
5710			if (error)
5711				break;
5712		}
5713	}
5714
5715	if (!error)  /* all state claimed successfully! */
5716		return (0);
5717
5718	/* restart if it looks like a problem more than just losing the delegation */
5719	if (!nfs_mount_state_error_delegation_lost(error) &&
5720	    ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error))) {
5721		NP(nofp->nof_np, "nfs delegated lock claim error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5722		if ((error == ETIMEDOUT) && ((nmp = NFSTONMP(nofp->nof_np))))
5723			nfs_need_reconnect(nmp);
5724		return (error);
5725	}
5726
5727	/* delegated state lost (once held but now not claimable) */
5728	NP(nofp->nof_np, "nfs delegated state claim error %d, state lost, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
5729
5730	/*
5731	 * Any problems with the delegation probably indicates that we
5732	 * should review/return all of our current delegation state.
5733	 */
5734	if ((nmp = NFSTONMP(nofp->nof_np))) {
5735		nfs4_delegation_return_enqueue(nofp->nof_np);
5736		lck_mtx_lock(&nmp->nm_lock);
5737		nfs_need_recover(nmp, NFSERR_EXPIRED);
5738		lck_mtx_unlock(&nmp->nm_lock);
5739	}
5740
5741	/* revoke all open file state */
5742	nfs_revoke_open_state_for_node(nofp->nof_np);
5743
5744	return (error);
5745}
5746
5747/*
5748 * Release all open state for the given node.
5749 */
5750void
5751nfs_release_open_state_for_node(nfsnode_t np, int force)
5752{
5753	struct nfsmount *nmp = NFSTONMP(np);
5754	struct nfs_open_file *nofp;
5755	struct nfs_file_lock *nflp, *nextnflp;
5756
5757	/* drop held locks */
5758	TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
5759		/* skip dead & blocked lock requests */
5760		if (nflp->nfl_flags & (NFS_FILE_LOCK_DEAD|NFS_FILE_LOCK_BLOCKED))
5761			continue;
5762		/* send an unlock if not a delegated lock */
5763		if (!force && nmp && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED))
5764			nmp->nm_funcs->nf_unlock_rpc(np, nflp->nfl_owner, F_WRLCK, nflp->nfl_start, nflp->nfl_end, R_RECOVER,
5765				NULL, nflp->nfl_owner->nlo_open_owner->noo_cred);
5766		/* kill/remove the lock */
5767		lck_mtx_lock(&np->n_openlock);
5768		nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
5769		lck_mtx_lock(&nflp->nfl_owner->nlo_lock);
5770		TAILQ_REMOVE(&nflp->nfl_owner->nlo_locks, nflp, nfl_lolink);
5771		lck_mtx_unlock(&nflp->nfl_owner->nlo_lock);
5772		if (nflp->nfl_blockcnt) {
5773			/* wake up anyone blocked on this lock */
5774			wakeup(nflp);
5775		} else {
5776			/* remove nflp from lock list and destroy */
5777			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
5778			nfs_file_lock_destroy(nflp);
5779		}
5780		lck_mtx_unlock(&np->n_openlock);
5781	}
5782
5783	lck_mtx_lock(&np->n_openlock);
5784
5785	/* drop all opens */
5786	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
5787		if (nofp->nof_flags & NFS_OPEN_FILE_LOST)
5788			continue;
5789		/* mark open state as lost */
5790		lck_mtx_lock(&nofp->nof_lock);
5791		nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN;
5792		nofp->nof_flags |= NFS_OPEN_FILE_LOST;
5793
5794		lck_mtx_unlock(&nofp->nof_lock);
5795		if (!force && nmp && (nmp->nm_vers >= NFS_VER4))
5796			nfs4_close_rpc(np, nofp, NULL, nofp->nof_owner->noo_cred, R_RECOVER);
5797	}
5798
5799	lck_mtx_unlock(&np->n_openlock);
5800}
5801
5802/*
5803 * State for a node has been lost, drop it, and revoke the node.
5804 * Attempt to return any state if possible in case the server
5805 * might somehow think we hold it.
5806 */
5807void
5808nfs_revoke_open_state_for_node(nfsnode_t np)
5809{
5810	struct nfsmount *nmp;
5811
5812	/* mark node as needing to be revoked */
5813	nfs_node_lock_force(np);
5814	if (np->n_flag & NREVOKE)  /* already revoked? */
5815	{
5816		NP(np, "nfs_revoke_open_state_for_node(): already revoked");
5817		nfs_node_unlock(np);
5818		return;
5819	}
5820	np->n_flag |= NREVOKE;
5821	nfs_node_unlock(np);
5822
5823	nfs_release_open_state_for_node(np, 0);
5824	NP(np, "nfs: state lost for %p 0x%x", np, np->n_flag);
5825
5826	/* mark mount as needing a revoke scan and have the socket thread do it. */
5827	if ((nmp = NFSTONMP(np))) {
5828		lck_mtx_lock(&nmp->nm_lock);
5829		nmp->nm_state |= NFSSTA_REVOKE;
5830		nfs_mount_sock_thread_wake(nmp);
5831		lck_mtx_unlock(&nmp->nm_lock);
5832	}
5833}
5834
5835/*
5836 * Claim the delegated open combinations that each of this node's open files hold.
5837 */
5838int
5839nfs4_claim_delegated_state_for_node(nfsnode_t np, int flags)
5840{
5841	struct nfs_open_file *nofp;
5842	int error = 0;
5843
5844	lck_mtx_lock(&np->n_openlock);
5845
5846	/* walk the open file list looking for opens with delegated state to claim */
5847restart:
5848	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
5849		if (!nofp->nof_d_rw_drw && !nofp->nof_d_w_drw && !nofp->nof_d_r_drw &&
5850		    !nofp->nof_d_rw_dw && !nofp->nof_d_w_dw && !nofp->nof_d_r_dw &&
5851		    !nofp->nof_d_rw && !nofp->nof_d_w && !nofp->nof_d_r)
5852			continue;
5853		lck_mtx_unlock(&np->n_openlock);
5854		error = nfs4_claim_delegated_state_for_open_file(nofp, flags);
5855		lck_mtx_lock(&np->n_openlock);
5856		if (error)
5857			break;
5858		goto restart;
5859	}
5860
5861	lck_mtx_unlock(&np->n_openlock);
5862
5863	return (error);
5864}
5865
5866/*
5867 * Mark a node as needed to have its delegation returned.
5868 * Queue it up on the delegation return queue.
5869 * Make sure the thread is running.
5870 */
5871void
5872nfs4_delegation_return_enqueue(nfsnode_t np)
5873{
5874	struct nfsmount *nmp;
5875
5876	nmp = NFSTONMP(np);
5877	if (!nmp)
5878		return;
5879
5880	lck_mtx_lock(&np->n_openlock);
5881	np->n_openflags |= N_DELEG_RETURN;
5882	lck_mtx_unlock(&np->n_openlock);
5883
5884	lck_mtx_lock(&nmp->nm_lock);
5885	if (np->n_dreturn.tqe_next == NFSNOLIST)
5886		TAILQ_INSERT_TAIL(&nmp->nm_dreturnq, np, n_dreturn);
5887	nfs_mount_sock_thread_wake(nmp);
5888	lck_mtx_unlock(&nmp->nm_lock);
5889}
5890
5891/*
5892 * return any delegation we may have for the given node
5893 */
5894int
5895nfs4_delegation_return(nfsnode_t np, int flags, thread_t thd, kauth_cred_t cred)
5896{
5897	struct nfsmount *nmp;
5898	fhandle_t fh;
5899	nfs_stateid dstateid;
5900	int error;
5901
5902	nmp = NFSTONMP(np);
5903	if (!nmp)
5904		return (ENXIO);
5905
5906	/* first, make sure the node's marked for delegation return */
5907	lck_mtx_lock(&np->n_openlock);
5908	np->n_openflags |= (N_DELEG_RETURN|N_DELEG_RETURNING);
5909	lck_mtx_unlock(&np->n_openlock);
5910
5911	/* make sure nobody else is using the delegation state */
5912	if ((error = nfs_open_state_set_busy(np, NULL)))
5913		goto out;
5914
5915	/* claim any delegated state */
5916	if ((error = nfs4_claim_delegated_state_for_node(np, flags)))
5917		goto out;
5918
5919	/* return the delegation */
5920	lck_mtx_lock(&np->n_openlock);
5921	dstateid = np->n_dstateid;
5922	fh.fh_len = np->n_fhsize;
5923	bcopy(np->n_fhp, &fh.fh_data, fh.fh_len);
5924	lck_mtx_unlock(&np->n_openlock);
5925	error = nfs4_delegreturn_rpc(NFSTONMP(np), fh.fh_data, fh.fh_len, &dstateid, flags, thd, cred);
5926	/* assume delegation is gone for all errors except ETIMEDOUT, NFSERR_*MOVED */
5927	if ((error != ETIMEDOUT) && (error != NFSERR_MOVED) && (error != NFSERR_LEASE_MOVED)) {
5928		lck_mtx_lock(&np->n_openlock);
5929		np->n_openflags &= ~N_DELEG_MASK;
5930		lck_mtx_lock(&nmp->nm_lock);
5931		if (np->n_dlink.tqe_next != NFSNOLIST) {
5932			TAILQ_REMOVE(&nmp->nm_delegations, np, n_dlink);
5933			np->n_dlink.tqe_next = NFSNOLIST;
5934		}
5935		lck_mtx_unlock(&nmp->nm_lock);
5936		lck_mtx_unlock(&np->n_openlock);
5937	}
5938
5939out:
5940	/* make sure it's no longer on the return queue and clear the return flags */
5941	lck_mtx_lock(&nmp->nm_lock);
5942	if (np->n_dreturn.tqe_next != NFSNOLIST) {
5943		TAILQ_REMOVE(&nmp->nm_dreturnq, np, n_dreturn);
5944		np->n_dreturn.tqe_next = NFSNOLIST;
5945	}
5946	lck_mtx_unlock(&nmp->nm_lock);
5947	lck_mtx_lock(&np->n_openlock);
5948	np->n_openflags &= ~(N_DELEG_RETURN|N_DELEG_RETURNING);
5949	lck_mtx_unlock(&np->n_openlock);
5950
5951	if (error) {
5952		NP(np, "nfs4_delegation_return, error %d", error);
5953		if (error == ETIMEDOUT)
5954			nfs_need_reconnect(nmp);
5955		if (nfs_mount_state_error_should_restart(error)) {
5956			/* make sure recovery happens */
5957			lck_mtx_lock(&nmp->nm_lock);
5958			nfs_need_recover(nmp, nfs_mount_state_error_delegation_lost(error) ? NFSERR_EXPIRED : 0);
5959			lck_mtx_unlock(&nmp->nm_lock);
5960		}
5961	}
5962
5963	nfs_open_state_clear_busy(np);
5964
5965	return (error);
5966}
5967
5968/*
5969 * RPC to return a delegation for a file handle
5970 */
5971int
5972nfs4_delegreturn_rpc(struct nfsmount *nmp, u_char *fhp, int fhlen, struct nfs_stateid *sid, int flags, thread_t thd, kauth_cred_t cred)
5973{
5974	int error = 0, status, numops;
5975	uint64_t xid;
5976	struct nfsm_chain nmreq, nmrep;
5977	struct nfsreq_secinfo_args si;
5978
5979	NFSREQ_SECINFO_SET(&si, NULL, fhp, fhlen, NULL, 0);
5980	nfsm_chain_null(&nmreq);
5981	nfsm_chain_null(&nmrep);
5982
5983	// PUTFH, DELEGRETURN
5984	numops = 2;
5985	nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
5986	nfsm_chain_add_compound_header(error, &nmreq, "delegreturn", numops);
5987	numops--;
5988	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
5989	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen);
5990	numops--;
5991	nfsm_chain_add_32(error, &nmreq, NFS_OP_DELEGRETURN);
5992	nfsm_chain_add_stateid(error, &nmreq, sid);
5993	nfsm_chain_build_done(error, &nmreq);
5994	nfsm_assert(error, (numops == 0), EPROTO);
5995	nfsmout_if(error);
5996	error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags, &nmrep, &xid, &status);
5997	nfsm_chain_skip_tag(error, &nmrep);
5998	nfsm_chain_get_32(error, &nmrep, numops);
5999	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
6000	nfsm_chain_op_check(error, &nmrep, NFS_OP_DELEGRETURN);
6001nfsmout:
6002	nfsm_chain_cleanup(&nmreq);
6003	nfsm_chain_cleanup(&nmrep);
6004	return (error);
6005}
6006
6007
6008/*
6009 * NFS read call.
6010 * Just call nfs_bioread() to do the work.
6011 *
6012 * Note: the exec code paths have a tendency to call VNOP_READ (and VNOP_MMAP)
6013 * without first calling VNOP_OPEN, so we make sure the file is open here.
6014 */
6015int
6016nfs_vnop_read(
6017	struct vnop_read_args /* {
6018		struct vnodeop_desc *a_desc;
6019		vnode_t a_vp;
6020		struct uio *a_uio;
6021		int a_ioflag;
6022		vfs_context_t a_context;
6023	} */ *ap)
6024{
6025	vnode_t vp = ap->a_vp;
6026	vfs_context_t ctx = ap->a_context;
6027	nfsnode_t np;
6028	struct nfsmount *nmp;
6029	struct nfs_open_owner *noop;
6030	struct nfs_open_file *nofp;
6031	int error;
6032
6033	if (vnode_vtype(ap->a_vp) != VREG)
6034		return (vnode_vtype(vp) == VDIR) ? EISDIR : EPERM;
6035
6036	np = VTONFS(vp);
6037	nmp = NFSTONMP(np);
6038	if (!nmp)
6039		return (ENXIO);
6040	if (np->n_flag & NREVOKE)
6041		return (EIO);
6042
6043	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
6044	if (!noop)
6045		return (ENOMEM);
6046restart:
6047	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
6048	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
6049		NP(np, "nfs_vnop_read: LOST %d", kauth_cred_getuid(noop->noo_cred));
6050		error = EIO;
6051	}
6052	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
6053		error = nfs4_reopen(nofp, vfs_context_thread(ctx));
6054		nofp = NULL;
6055		if (!error)
6056			goto restart;
6057	}
6058	if (error) {
6059		nfs_open_owner_rele(noop);
6060		return (error);
6061	}
6062	if (!nofp->nof_access) {
6063		/* we don't have the file open, so open it for read access */
6064		error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
6065		if (error) {
6066			nfs_open_owner_rele(noop);
6067			return (error);
6068		}
6069		if (np->n_flag & NREVOKE) {
6070			error = EIO;
6071			nfs_mount_state_in_use_end(nmp, 0);
6072			nfs_open_owner_rele(noop);
6073			return (error);
6074		}
6075		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
6076		if (error)
6077			nofp = NULL;
6078		if (!error) {
6079			if (nmp->nm_vers < NFS_VER4) {
6080				/* NFS v2/v3 opens are always allowed - so just add it. */
6081				nfs_open_file_add_open(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, 0);
6082			} else {
6083				error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
6084			}
6085		}
6086		if (!error)
6087			nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE;
6088		if (nofp)
6089			nfs_open_file_clear_busy(nofp);
6090		if (nfs_mount_state_in_use_end(nmp, error)) {
6091			nofp = NULL;
6092			goto restart;
6093		}
6094	}
6095	nfs_open_owner_rele(noop);
6096	if (error)
6097		return (error);
6098	return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, ap->a_context));
6099}
6100
6101/*
6102 * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files.
6103 * Files are created using the NFSv4 OPEN RPC.  So we must open the
6104 * file to create it and then close it.
6105 */
6106int
6107nfs4_vnop_create(
6108	struct vnop_create_args /* {
6109		struct vnodeop_desc *a_desc;
6110		vnode_t a_dvp;
6111		vnode_t *a_vpp;
6112		struct componentname *a_cnp;
6113		struct vnode_attr *a_vap;
6114		vfs_context_t a_context;
6115	} */ *ap)
6116{
6117	vfs_context_t ctx = ap->a_context;
6118	struct componentname *cnp = ap->a_cnp;
6119	struct vnode_attr *vap = ap->a_vap;
6120	vnode_t dvp = ap->a_dvp;
6121	vnode_t *vpp = ap->a_vpp;
6122	struct nfsmount *nmp;
6123	nfsnode_t np;
6124	int error = 0, busyerror = 0, accessMode, denyMode;
6125	struct nfs_open_owner *noop = NULL;
6126	struct nfs_open_file *newnofp = NULL, *nofp = NULL;
6127
6128	nmp = VTONMP(dvp);
6129	if (!nmp)
6130		return (ENXIO);
6131
6132	if (vap)
6133		nfs_avoid_needless_id_setting_on_create(VTONFS(dvp), vap, ctx);
6134
6135	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
6136	if (!noop)
6137		return (ENOMEM);
6138
6139restart:
6140	error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
6141	if (error) {
6142		nfs_open_owner_rele(noop);
6143		return (error);
6144	}
6145
6146	/* grab a provisional, nodeless open file */
6147	error = nfs_open_file_find(NULL, noop, &newnofp, 0, 0, 1);
6148	if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_LOST)) {
6149		printf("nfs_vnop_create: LOST\n");
6150		error = EIO;
6151	}
6152	if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
6153		/* This shouldn't happen given that this is a new, nodeless nofp */
6154		nfs_mount_state_in_use_end(nmp, 0);
6155		error = nfs4_reopen(newnofp, vfs_context_thread(ctx));
6156		nfs_open_file_destroy(newnofp);
6157		newnofp = NULL;
6158		if (!error)
6159			goto restart;
6160	}
6161	if (!error)
6162		error = nfs_open_file_set_busy(newnofp, vfs_context_thread(ctx));
6163	if (error) {
6164		if (newnofp)
6165			nfs_open_file_destroy(newnofp);
6166		newnofp = NULL;
6167		goto out;
6168	}
6169
6170	/*
6171	 * We're just trying to create the file.
6172	 * We'll create/open it RW, and set NFS_OPEN_FILE_CREATE.
6173	 */
6174	accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
6175	denyMode = NFS_OPEN_SHARE_DENY_NONE;
6176
6177	/* Do the open/create */
6178	error = nfs4_open_rpc(newnofp, ctx, cnp, vap, dvp, vpp, NFS_OPEN_CREATE, accessMode, denyMode);
6179	if ((error == EACCES) && vap && !(vap->va_vaflags & VA_EXCLUSIVE) &&
6180	    VATTR_IS_ACTIVE(vap, va_mode) && !(vap->va_mode & S_IWUSR)) {
6181		/*
6182		 * Hmm... it looks like we may have a situation where the request was
6183		 * retransmitted because we didn't get the first response which successfully
6184		 * created/opened the file and then the second time we were denied the open
6185		 * because the mode the file was created with doesn't allow write access.
6186		 *
6187		 * We'll try to work around this by temporarily updating the mode and
6188		 * retrying the open.
6189		 */
6190		struct vnode_attr vattr;
6191
6192		/* first make sure it's there */
6193		int error2 = nfs_lookitup(VTONFS(dvp), cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
6194		if (!error2 && np) {
6195			nfs_node_unlock(np);
6196			*vpp = NFSTOV(np);
6197			if (vnode_vtype(NFSTOV(np)) == VREG) {
6198				VATTR_INIT(&vattr);
6199				VATTR_SET(&vattr, va_mode, (vap->va_mode | S_IWUSR));
6200				if (!nfs4_setattr_rpc(np, &vattr, ctx)) {
6201					error2 = nfs4_open_rpc(newnofp, ctx, cnp, NULL, dvp, vpp, NFS_OPEN_NOCREATE, accessMode, denyMode);
6202					VATTR_INIT(&vattr);
6203					VATTR_SET(&vattr, va_mode, vap->va_mode);
6204					nfs4_setattr_rpc(np, &vattr, ctx);
6205					if (!error2)
6206						error = 0;
6207				}
6208			}
6209			if (error) {
6210				vnode_put(*vpp);
6211				*vpp = NULL;
6212			}
6213		}
6214	}
6215	if (!error && !*vpp) {
6216		printf("nfs4_open_rpc returned without a node?\n");
6217		/* Hmmm... with no node, we have no filehandle and can't close it */
6218		error = EIO;
6219	}
6220	if (error) {
6221		/* need to cleanup our temporary nofp */
6222		nfs_open_file_clear_busy(newnofp);
6223		nfs_open_file_destroy(newnofp);
6224		newnofp = NULL;
6225		goto out;
6226	}
6227	/* After we have a node, add our open file struct to the node */
6228	np = VTONFS(*vpp);
6229	nfs_open_file_add_open(newnofp, accessMode, denyMode, 0);
6230	nofp = newnofp;
6231	error = nfs_open_file_find_internal(np, noop, &nofp, 0, 0, 0);
6232	if (error) {
6233		/* This shouldn't happen, because we passed in a new nofp to use. */
6234		printf("nfs_open_file_find_internal failed! %d\n", error);
6235		goto out;
6236	} else if (nofp != newnofp) {
6237		/*
6238		 * Hmm... an open file struct already exists.
6239		 * Mark the existing one busy and merge our open into it.
6240		 * Then destroy the one we created.
6241		 * Note: there's no chance of an open confict because the
6242		 * open has already been granted.
6243		 */
6244		busyerror = nfs_open_file_set_busy(nofp, NULL);
6245		nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
6246		nofp->nof_stateid = newnofp->nof_stateid;
6247		if (newnofp->nof_flags & NFS_OPEN_FILE_POSIXLOCK)
6248			nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
6249		nfs_open_file_clear_busy(newnofp);
6250		nfs_open_file_destroy(newnofp);
6251	}
6252	newnofp = NULL;
6253	/* mark the node as holding a create-initiated open */
6254	nofp->nof_flags |= NFS_OPEN_FILE_CREATE;
6255	nofp->nof_creator = current_thread();
6256out:
6257	if (nofp && !busyerror)
6258		nfs_open_file_clear_busy(nofp);
6259	if (nfs_mount_state_in_use_end(nmp, error)) {
6260		nofp = newnofp = NULL;
6261		busyerror = 0;
6262		goto restart;
6263	}
6264	if (noop)
6265		nfs_open_owner_rele(noop);
6266	return (error);
6267}
6268
6269/*
6270 * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files.
6271 */
6272int
6273nfs4_create_rpc(
6274	vfs_context_t ctx,
6275	nfsnode_t dnp,
6276	struct componentname *cnp,
6277	struct vnode_attr *vap,
6278	int type,
6279	char *link,
6280	nfsnode_t *npp)
6281{
6282	struct nfsmount *nmp;
6283	struct nfs_vattr nvattr;
6284	int error = 0, create_error = EIO, lockerror = ENOENT, busyerror = ENOENT, status;
6285	int nfsvers, namedattrs, numops;
6286	u_int64_t xid, savedxid = 0;
6287	nfsnode_t np = NULL;
6288	vnode_t newvp = NULL;
6289	struct nfsm_chain nmreq, nmrep;
6290	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
6291	const char *tag;
6292	nfs_specdata sd;
6293	fhandle_t fh;
6294	struct nfsreq rq, *req = &rq;
6295	struct nfs_dulookup dul;
6296	struct nfsreq_secinfo_args si;
6297
6298	nmp = NFSTONMP(dnp);
6299	if (!nmp)
6300		return (ENXIO);
6301	nfsvers = nmp->nm_vers;
6302	namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
6303	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
6304		return (EINVAL);
6305
6306	sd.specdata1 = sd.specdata2 = 0;
6307
6308	switch (type) {
6309	case NFLNK:
6310		tag = "symlink";
6311		break;
6312	case NFBLK:
6313	case NFCHR:
6314		tag = "mknod";
6315		if (!VATTR_IS_ACTIVE(vap, va_rdev))
6316			return (EINVAL);
6317		sd.specdata1 = major(vap->va_rdev);
6318		sd.specdata2 = minor(vap->va_rdev);
6319		break;
6320	case NFSOCK:
6321	case NFFIFO:
6322		tag = "mknod";
6323		break;
6324	case NFDIR:
6325		tag = "mkdir";
6326		break;
6327	default:
6328		return (EINVAL);
6329	}
6330
6331	nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
6332
6333	error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
6334	if (!namedattrs)
6335		nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
6336
6337	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
6338	NVATTR_INIT(&nvattr);
6339	nfsm_chain_null(&nmreq);
6340	nfsm_chain_null(&nmrep);
6341
6342	// PUTFH, SAVEFH, CREATE, GETATTR(FH), RESTOREFH, GETATTR
6343	numops = 6;
6344	nfsm_chain_build_alloc_init(error, &nmreq, 66 * NFSX_UNSIGNED);
6345	nfsm_chain_add_compound_header(error, &nmreq, tag, numops);
6346	numops--;
6347	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
6348	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
6349	numops--;
6350	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
6351	numops--;
6352	nfsm_chain_add_32(error, &nmreq, NFS_OP_CREATE);
6353	nfsm_chain_add_32(error, &nmreq, type);
6354	if (type == NFLNK) {
6355		nfsm_chain_add_name(error, &nmreq, link, strlen(link), nmp);
6356	} else if ((type == NFBLK) || (type == NFCHR)) {
6357		nfsm_chain_add_32(error, &nmreq, sd.specdata1);
6358		nfsm_chain_add_32(error, &nmreq, sd.specdata2);
6359	}
6360	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
6361	nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
6362	numops--;
6363	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
6364	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
6365	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
6366	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, NULL);
6367	numops--;
6368	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
6369	numops--;
6370	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
6371	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
6372	nfsm_chain_build_done(error, &nmreq);
6373	nfsm_assert(error, (numops == 0), EPROTO);
6374	nfsmout_if(error);
6375
6376	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND,
6377			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
6378	if (!error) {
6379		if (!namedattrs)
6380			nfs_dulookup_start(&dul, dnp, ctx);
6381		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
6382	}
6383
6384	if ((lockerror = nfs_node_lock(dnp)))
6385		error = lockerror;
6386	nfsm_chain_skip_tag(error, &nmrep);
6387	nfsm_chain_get_32(error, &nmrep, numops);
6388	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
6389	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
6390	nfsmout_if(error);
6391	nfsm_chain_op_check(error, &nmrep, NFS_OP_CREATE);
6392	nfsm_chain_check_change_info(error, &nmrep, dnp);
6393	bmlen = NFS_ATTR_BITMAP_LEN;
6394	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
6395	/* At this point if we have no error, the object was created. */
6396	/* if we don't get attributes, then we should lookitup. */
6397	create_error = error;
6398	nfsmout_if(error);
6399	nfs_vattr_set_supported(bitmap, vap);
6400	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
6401	nfsmout_if(error);
6402	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
6403	nfsmout_if(error);
6404	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
6405		printf("nfs: create/%s didn't return filehandle? %s\n", tag, cnp->cn_nameptr);
6406		error = EBADRPC;
6407		goto nfsmout;
6408	}
6409	/* directory attributes: if we don't get them, make sure to invalidate */
6410	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
6411	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
6412	savedxid = xid;
6413	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
6414	if (error)
6415		NATTRINVALIDATE(dnp);
6416
6417nfsmout:
6418	nfsm_chain_cleanup(&nmreq);
6419	nfsm_chain_cleanup(&nmrep);
6420
6421	if (!lockerror) {
6422		if (!create_error && (dnp->n_flag & NNEGNCENTRIES)) {
6423			dnp->n_flag &= ~NNEGNCENTRIES;
6424			cache_purge_negatives(NFSTOV(dnp));
6425		}
6426		dnp->n_flag |= NMODIFIED;
6427		nfs_node_unlock(dnp);
6428		/* nfs_getattr() will check changed and purge caches */
6429		nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
6430	}
6431
6432	if (!error && fh.fh_len) {
6433		/* create the vnode with the filehandle and attributes */
6434		xid = savedxid;
6435		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
6436		if (!error)
6437			newvp = NFSTOV(np);
6438	}
6439	NVATTR_CLEANUP(&nvattr);
6440
6441	if (!namedattrs)
6442		nfs_dulookup_finish(&dul, dnp, ctx);
6443
6444	/*
6445	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
6446	 * if we can succeed in looking up the object.
6447	 */
6448	if ((create_error == EEXIST) || (!create_error && !newvp)) {
6449		error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
6450		if (!error) {
6451			newvp = NFSTOV(np);
6452			if (vnode_vtype(newvp) != nfstov_type(type, nfsvers))
6453				error = EEXIST;
6454		}
6455	}
6456	if (!busyerror)
6457		nfs_node_clear_busy(dnp);
6458	if (error) {
6459		if (newvp) {
6460			nfs_node_unlock(np);
6461			vnode_put(newvp);
6462		}
6463	} else {
6464		nfs_node_unlock(np);
6465		*npp = np;
6466	}
6467	return (error);
6468}
6469
6470int
6471nfs4_vnop_mknod(
6472	struct vnop_mknod_args /* {
6473		struct vnodeop_desc *a_desc;
6474		vnode_t a_dvp;
6475		vnode_t *a_vpp;
6476		struct componentname *a_cnp;
6477		struct vnode_attr *a_vap;
6478		vfs_context_t a_context;
6479	} */ *ap)
6480{
6481	nfsnode_t np = NULL;
6482	struct nfsmount *nmp;
6483	int error;
6484
6485	nmp = VTONMP(ap->a_dvp);
6486	if (!nmp)
6487		return (ENXIO);
6488
6489	if (!VATTR_IS_ACTIVE(ap->a_vap, va_type))
6490		return (EINVAL);
6491	switch (ap->a_vap->va_type) {
6492	case VBLK:
6493	case VCHR:
6494	case VFIFO:
6495	case VSOCK:
6496		break;
6497	default:
6498		return (ENOTSUP);
6499	}
6500
6501	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
6502			vtonfs_type(ap->a_vap->va_type, nmp->nm_vers), NULL, &np);
6503	if (!error)
6504		*ap->a_vpp = NFSTOV(np);
6505	return (error);
6506}
6507
6508int
6509nfs4_vnop_mkdir(
6510	struct vnop_mkdir_args /* {
6511		struct vnodeop_desc *a_desc;
6512		vnode_t a_dvp;
6513		vnode_t *a_vpp;
6514		struct componentname *a_cnp;
6515		struct vnode_attr *a_vap;
6516		vfs_context_t a_context;
6517	} */ *ap)
6518{
6519	nfsnode_t np = NULL;
6520	int error;
6521
6522	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
6523			NFDIR, NULL, &np);
6524	if (!error)
6525		*ap->a_vpp = NFSTOV(np);
6526	return (error);
6527}
6528
6529int
6530nfs4_vnop_symlink(
6531	struct vnop_symlink_args /* {
6532		struct vnodeop_desc *a_desc;
6533		vnode_t a_dvp;
6534		vnode_t *a_vpp;
6535		struct componentname *a_cnp;
6536		struct vnode_attr *a_vap;
6537		char *a_target;
6538		vfs_context_t a_context;
6539	} */ *ap)
6540{
6541	nfsnode_t np = NULL;
6542	int error;
6543
6544	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
6545			NFLNK, ap->a_target, &np);
6546	if (!error)
6547		*ap->a_vpp = NFSTOV(np);
6548	return (error);
6549}
6550
6551int
6552nfs4_vnop_link(
6553	struct vnop_link_args /* {
6554		struct vnodeop_desc *a_desc;
6555		vnode_t a_vp;
6556		vnode_t a_tdvp;
6557		struct componentname *a_cnp;
6558		vfs_context_t a_context;
6559	} */ *ap)
6560{
6561	vfs_context_t ctx = ap->a_context;
6562	vnode_t vp = ap->a_vp;
6563	vnode_t tdvp = ap->a_tdvp;
6564	struct componentname *cnp = ap->a_cnp;
6565	int error = 0, lockerror = ENOENT, status;
6566	struct nfsmount *nmp;
6567	nfsnode_t np = VTONFS(vp);
6568	nfsnode_t tdnp = VTONFS(tdvp);
6569	int nfsvers, numops;
6570	u_int64_t xid, savedxid;
6571	struct nfsm_chain nmreq, nmrep;
6572	struct nfsreq_secinfo_args si;
6573
6574	if (vnode_mount(vp) != vnode_mount(tdvp))
6575		return (EXDEV);
6576
6577	nmp = VTONMP(vp);
6578	if (!nmp)
6579		return (ENXIO);
6580	nfsvers = nmp->nm_vers;
6581	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
6582		return (EINVAL);
6583	if (tdnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
6584		return (EINVAL);
6585
6586	/*
6587	 * Push all writes to the server, so that the attribute cache
6588	 * doesn't get "out of sync" with the server.
6589	 * XXX There should be a better way!
6590	 */
6591	nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
6592
6593	if ((error = nfs_node_set_busy2(tdnp, np, vfs_context_thread(ctx))))
6594		return (error);
6595
6596	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
6597	nfsm_chain_null(&nmreq);
6598	nfsm_chain_null(&nmrep);
6599
6600	// PUTFH(SOURCE), SAVEFH, PUTFH(DIR), LINK, GETATTR(DIR), RESTOREFH, GETATTR
6601	numops = 7;
6602	nfsm_chain_build_alloc_init(error, &nmreq, 29 * NFSX_UNSIGNED + cnp->cn_namelen);
6603	nfsm_chain_add_compound_header(error, &nmreq, "link", numops);
6604	numops--;
6605	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
6606	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
6607	numops--;
6608	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
6609	numops--;
6610	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
6611	nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
6612	numops--;
6613	nfsm_chain_add_32(error, &nmreq, NFS_OP_LINK);
6614	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
6615	numops--;
6616	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
6617	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, tdnp);
6618	numops--;
6619	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
6620	numops--;
6621	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
6622	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
6623	nfsm_chain_build_done(error, &nmreq);
6624	nfsm_assert(error, (numops == 0), EPROTO);
6625	nfsmout_if(error);
6626	error = nfs_request(tdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
6627
6628	if ((lockerror = nfs_node_lock2(tdnp, np))) {
6629		error = lockerror;
6630		goto nfsmout;
6631	}
6632	nfsm_chain_skip_tag(error, &nmrep);
6633	nfsm_chain_get_32(error, &nmrep, numops);
6634	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
6635	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
6636	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
6637	nfsm_chain_op_check(error, &nmrep, NFS_OP_LINK);
6638	nfsm_chain_check_change_info(error, &nmrep, tdnp);
6639	/* directory attributes: if we don't get them, make sure to invalidate */
6640	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
6641	savedxid = xid;
6642	nfsm_chain_loadattr(error, &nmrep, tdnp, nfsvers, &xid);
6643	if (error)
6644		NATTRINVALIDATE(tdnp);
6645	/* link attributes: if we don't get them, make sure to invalidate */
6646	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
6647	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
6648	xid = savedxid;
6649	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
6650	if (error)
6651		NATTRINVALIDATE(np);
6652nfsmout:
6653	nfsm_chain_cleanup(&nmreq);
6654	nfsm_chain_cleanup(&nmrep);
6655	if (!lockerror)
6656		tdnp->n_flag |= NMODIFIED;
6657	/* Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */
6658	if (error == EEXIST)
6659		error = 0;
6660	if (!error && (tdnp->n_flag & NNEGNCENTRIES)) {
6661		tdnp->n_flag &= ~NNEGNCENTRIES;
6662		cache_purge_negatives(tdvp);
6663	}
6664	if (!lockerror)
6665		nfs_node_unlock2(tdnp, np);
6666	nfs_node_clear_busy2(tdnp, np);
6667	return (error);
6668}
6669
6670int
6671nfs4_vnop_rmdir(
6672	struct vnop_rmdir_args /* {
6673		struct vnodeop_desc *a_desc;
6674		vnode_t a_dvp;
6675		vnode_t a_vp;
6676		struct componentname *a_cnp;
6677		vfs_context_t a_context;
6678	} */ *ap)
6679{
6680	vfs_context_t ctx = ap->a_context;
6681	vnode_t vp = ap->a_vp;
6682	vnode_t dvp = ap->a_dvp;
6683	struct componentname *cnp = ap->a_cnp;
6684	struct nfsmount *nmp;
6685	int error = 0, namedattrs;
6686	nfsnode_t np = VTONFS(vp);
6687	nfsnode_t dnp = VTONFS(dvp);
6688	struct nfs_dulookup dul;
6689
6690	if (vnode_vtype(vp) != VDIR)
6691		return (EINVAL);
6692
6693	nmp = NFSTONMP(dnp);
6694	if (!nmp)
6695		return (ENXIO);
6696	namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
6697
6698	if ((error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx))))
6699		return (error);
6700
6701	if (!namedattrs) {
6702		nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
6703		nfs_dulookup_start(&dul, dnp, ctx);
6704	}
6705
6706	error = nfs4_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
6707			vfs_context_thread(ctx), vfs_context_ucred(ctx));
6708
6709	nfs_name_cache_purge(dnp, np, cnp, ctx);
6710	/* nfs_getattr() will check changed and purge caches */
6711	nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
6712	if (!namedattrs)
6713		nfs_dulookup_finish(&dul, dnp, ctx);
6714	nfs_node_clear_busy2(dnp, np);
6715
6716	/*
6717	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
6718	 */
6719	if (error == ENOENT)
6720		error = 0;
6721	if (!error) {
6722		/*
6723		 * remove nfsnode from hash now so we can't accidentally find it
6724		 * again if another object gets created with the same filehandle
6725		 * before this vnode gets reclaimed
6726		 */
6727		lck_mtx_lock(nfs_node_hash_mutex);
6728		if (np->n_hflag & NHHASHED) {
6729			LIST_REMOVE(np, n_hash);
6730			np->n_hflag &= ~NHHASHED;
6731			FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
6732		}
6733		lck_mtx_unlock(nfs_node_hash_mutex);
6734	}
6735	return (error);
6736}
6737
6738/*
6739 * NFSv4 Named Attributes
6740 *
6741 * Both the extended attributes interface and the named streams interface
6742 * are backed by NFSv4 named attributes.  The implementations for both use
6743 * a common set of routines in an attempt to reduce code duplication, to
6744 * increase efficiency, to increase caching of both names and data, and to
6745 * confine the complexity.
6746 *
6747 * Each NFS node caches its named attribute directory's file handle.
6748 * The directory nodes for the named attribute directories are handled
6749 * exactly like regular directories (with a couple minor exceptions).
6750 * Named attribute nodes are also treated as much like regular files as
6751 * possible.
6752 *
6753 * Most of the heavy lifting is done by nfs4_named_attr_get().
6754 */
6755
6756/*
6757 * Get the given node's attribute directory node.
6758 * If !fetch, then only return a cached node.
6759 * Otherwise, we will attempt to fetch the node from the server.
6760 * (Note: the node should be marked busy.)
6761 */
6762nfsnode_t
6763nfs4_named_attr_dir_get(nfsnode_t np, int fetch, vfs_context_t ctx)
6764{
6765	nfsnode_t adnp = NULL;
6766	struct nfsmount *nmp;
6767	int error = 0, status, numops;
6768	struct nfsm_chain nmreq, nmrep;
6769	u_int64_t xid;
6770	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
6771	fhandle_t fh;
6772	struct nfs_vattr nvattr;
6773	struct componentname cn;
6774	struct nfsreq rq, *req = &rq;
6775	struct nfsreq_secinfo_args si;
6776
6777	nmp = NFSTONMP(np);
6778	if (!nmp)
6779		return (NULL);
6780	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
6781		return (NULL);
6782
6783	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
6784	NVATTR_INIT(&nvattr);
6785	nfsm_chain_null(&nmreq);
6786	nfsm_chain_null(&nmrep);
6787
6788	bzero(&cn, sizeof(cn));
6789	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(_PATH_FORKSPECIFIER, const, char *); /* "/..namedfork/" */
6790	cn.cn_namelen = strlen(_PATH_FORKSPECIFIER);
6791	cn.cn_nameiop = LOOKUP;
6792
6793	if (np->n_attrdirfh) {
6794		// XXX can't set parent correctly (to np) yet
6795		error = nfs_nget(nmp->nm_mountp, NULL, &cn, np->n_attrdirfh+1, *np->n_attrdirfh,
6796				NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &adnp);
6797		if (adnp)
6798			goto nfsmout;
6799	}
6800	if (!fetch) {
6801		error = ENOENT;
6802		goto nfsmout;
6803	}
6804
6805	// PUTFH, OPENATTR, GETATTR
6806	numops = 3;
6807	nfsm_chain_build_alloc_init(error, &nmreq, 22 * NFSX_UNSIGNED);
6808	nfsm_chain_add_compound_header(error, &nmreq, "openattr", numops);
6809	numops--;
6810	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
6811	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
6812	numops--;
6813	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
6814	nfsm_chain_add_32(error, &nmreq, 0);
6815	numops--;
6816	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
6817	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
6818	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
6819	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
6820		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
6821	nfsm_chain_build_done(error, &nmreq);
6822	nfsm_assert(error, (numops == 0), EPROTO);
6823	nfsmout_if(error);
6824	error = nfs_request_async(np, NULL, &nmreq, NFSPROC4_COMPOUND,
6825			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
6826	if (!error)
6827		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
6828
6829	nfsm_chain_skip_tag(error, &nmrep);
6830	nfsm_chain_get_32(error, &nmrep, numops);
6831	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
6832	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
6833	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
6834	nfsmout_if(error);
6835	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
6836	nfsmout_if(error);
6837	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE) || !fh.fh_len) {
6838		error = ENOENT;
6839		goto nfsmout;
6840	}
6841	if (!np->n_attrdirfh || (*np->n_attrdirfh != fh.fh_len)) {
6842		/* (re)allocate attrdir fh buffer */
6843		if (np->n_attrdirfh)
6844			FREE(np->n_attrdirfh, M_TEMP);
6845		MALLOC(np->n_attrdirfh, u_char*, fh.fh_len+1, M_TEMP, M_WAITOK);
6846	}
6847	if (!np->n_attrdirfh) {
6848		error = ENOMEM;
6849		goto nfsmout;
6850	}
6851	/* cache the attrdir fh in the node */
6852	*np->n_attrdirfh = fh.fh_len;
6853	bcopy(fh.fh_data, np->n_attrdirfh+1, fh.fh_len);
6854	/* create node for attrdir */
6855	// XXX can't set parent correctly (to np) yet
6856	error = nfs_nget(NFSTOMP(np), NULL, &cn, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, 0, &adnp);
6857nfsmout:
6858	NVATTR_CLEANUP(&nvattr);
6859	nfsm_chain_cleanup(&nmreq);
6860	nfsm_chain_cleanup(&nmrep);
6861
6862	if (adnp) {
6863		/* sanity check that this node is an attribute directory */
6864		if (adnp->n_vattr.nva_type != VDIR)
6865			error = EINVAL;
6866		if (!(adnp->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
6867			error = EINVAL;
6868		nfs_node_unlock(adnp);
6869		if (error)
6870			vnode_put(NFSTOV(adnp));
6871	}
6872	return (error ? NULL : adnp);
6873}
6874
6875/*
6876 * Get the given node's named attribute node for the name given.
6877 *
6878 * In an effort to increase the performance of named attribute access, we try
6879 * to reduce server requests by doing the following:
6880 *
6881 * - cache the node's named attribute directory file handle in the node
6882 * - maintain a directory vnode for the attribute directory
6883 * - use name cache entries (positive and negative) to speed up lookups
6884 * - optionally open the named attribute (with the given accessMode) in the same RPC
6885 * - combine attribute directory retrieval with the lookup/open RPC
6886 * - optionally prefetch the named attribute's first block of data in the same RPC
6887 *
6888 * Also, in an attempt to reduce the number of copies/variations of this code,
6889 * parts of the RPC building/processing code are conditionalized on what is
6890 * needed for any particular request (openattr, lookup vs. open, read).
6891 *
6892 * Note that because we may not have the attribute directory node when we start
6893 * the lookup/open, we lock both the node and the attribute directory node.
6894 */
6895
6896#define NFS_GET_NAMED_ATTR_CREATE		0x1
6897#define NFS_GET_NAMED_ATTR_CREATE_GUARDED	0x2
6898#define NFS_GET_NAMED_ATTR_TRUNCATE		0x4
6899#define NFS_GET_NAMED_ATTR_PREFETCH		0x8
6900
6901int
6902nfs4_named_attr_get(
6903	nfsnode_t np,
6904	struct componentname *cnp,
6905	uint32_t accessMode,
6906	int flags,
6907	vfs_context_t ctx,
6908	nfsnode_t *anpp,
6909	struct nfs_open_file **nofpp)
6910{
6911	struct nfsmount *nmp;
6912	int error = 0, open_error = EIO;
6913	int inuse = 0, adlockerror = ENOENT, busyerror = ENOENT, adbusyerror = ENOENT, nofpbusyerror = ENOENT;
6914	int create, guarded, prefetch, truncate, noopbusy = 0;
6915	int open, status, numops, hadattrdir, negnamecache;
6916	struct nfs_vattr nvattr;
6917	struct vnode_attr vattr;
6918	nfsnode_t adnp = NULL, anp = NULL;
6919	vnode_t avp = NULL;
6920	u_int64_t xid, savedxid = 0;
6921	struct nfsm_chain nmreq, nmrep;
6922	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
6923	uint32_t denyMode, rflags, delegation, recall, eof, rlen, retlen;
6924	nfs_stateid stateid, dstateid;
6925	fhandle_t fh;
6926	struct nfs_open_owner *noop = NULL;
6927	struct nfs_open_file *newnofp = NULL, *nofp = NULL;
6928	struct vnop_access_args naa;
6929	thread_t thd;
6930	kauth_cred_t cred;
6931	struct timeval now;
6932	char sbuf[64], *s;
6933	uint32_t ace_type, ace_flags, ace_mask, len, slen;
6934	struct kauth_ace ace;
6935	struct nfsreq rq, *req = &rq;
6936	struct nfsreq_secinfo_args si;
6937
6938	*anpp = NULL;
6939	fh.fh_len = 0;
6940	rflags = delegation = recall = eof = rlen = retlen = 0;
6941	ace.ace_flags = 0;
6942	s = sbuf;
6943	slen = sizeof(sbuf);
6944
6945	nmp = NFSTONMP(np);
6946	if (!nmp)
6947		return (ENXIO);
6948	NVATTR_INIT(&nvattr);
6949	negnamecache = !NMFLAG(nmp, NONEGNAMECACHE);
6950	thd = vfs_context_thread(ctx);
6951	cred = vfs_context_ucred(ctx);
6952	create = (flags & NFS_GET_NAMED_ATTR_CREATE) ? NFS_OPEN_CREATE : NFS_OPEN_NOCREATE;
6953	guarded = (flags & NFS_GET_NAMED_ATTR_CREATE_GUARDED) ? NFS_CREATE_GUARDED : NFS_CREATE_UNCHECKED;
6954	truncate = (flags & NFS_GET_NAMED_ATTR_TRUNCATE);
6955	prefetch = (flags & NFS_GET_NAMED_ATTR_PREFETCH);
6956
6957	if (!create) {
6958		error = nfs_getattr(np, &nvattr, ctx, NGA_CACHED);
6959		if (error)
6960			return (error);
6961		if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
6962		    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
6963			return (ENOATTR);
6964	} else if (accessMode == NFS_OPEN_SHARE_ACCESS_NONE) {
6965		/* shouldn't happen... but just be safe */
6966		printf("nfs4_named_attr_get: create with no access %s\n", cnp->cn_nameptr);
6967		accessMode = NFS_OPEN_SHARE_ACCESS_READ;
6968	}
6969	open = (accessMode != NFS_OPEN_SHARE_ACCESS_NONE);
6970	if (open) {
6971		/*
6972		 * We're trying to open the file.
6973		 * We'll create/open it with the given access mode,
6974		 * and set NFS_OPEN_FILE_CREATE.
6975		 */
6976		denyMode = NFS_OPEN_SHARE_DENY_NONE;
6977		if (prefetch && guarded)
6978			prefetch = 0;  /* no sense prefetching data that can't be there */
6979
6980		noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
6981		if (!noop)
6982			return (ENOMEM);
6983	}
6984
6985	if ((error = busyerror = nfs_node_set_busy(np, vfs_context_thread(ctx))))
6986		return (error);
6987
6988	adnp = nfs4_named_attr_dir_get(np, 0, ctx);
6989	hadattrdir = (adnp != NULL);
6990	if (prefetch) {
6991		microuptime(&now);
6992		/* use the special state ID because we don't have a real one to send */
6993		stateid.seqid = stateid.other[0] = stateid.other[1] = stateid.other[2] = 0;
6994		rlen = MIN(nmp->nm_rsize, nmp->nm_biosize);
6995	}
6996	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
6997	nfsm_chain_null(&nmreq);
6998	nfsm_chain_null(&nmrep);
6999
7000	if (hadattrdir) {
7001		if ((error = adbusyerror = nfs_node_set_busy(adnp, vfs_context_thread(ctx))))
7002			goto nfsmout;
7003		/* nfs_getattr() will check changed and purge caches */
7004		error = nfs_getattr(adnp, NULL, ctx, NGA_CACHED);
7005		nfsmout_if(error);
7006		error = cache_lookup(NFSTOV(adnp), &avp, cnp);
7007		switch (error) {
7008		case ENOENT:
7009			/* negative cache entry */
7010			goto nfsmout;
7011		case 0:
7012			/* cache miss */
7013			/* try dir buf cache lookup */
7014			error = nfs_dir_buf_cache_lookup(adnp, &anp, cnp, ctx, 0);
7015			if (!error && anp) {
7016				/* dir buf cache hit */
7017				*anpp = anp;
7018				error = -1;
7019			}
7020			if (error != -1) /* cache miss */
7021				break;
7022			/* FALLTHROUGH */
7023		case -1:
7024			/* cache hit, not really an error */
7025			OSAddAtomic64(1, &nfsstats.lookupcache_hits);
7026			if (!anp && avp)
7027				*anpp = anp = VTONFS(avp);
7028
7029			nfs_node_clear_busy(adnp);
7030			adbusyerror = ENOENT;
7031
7032			/* check for directory access */
7033			naa.a_desc = &vnop_access_desc;
7034			naa.a_vp = NFSTOV(adnp);
7035			naa.a_action = KAUTH_VNODE_SEARCH;
7036			naa.a_context = ctx;
7037
7038			/* compute actual success/failure based on accessibility */
7039			error = nfs_vnop_access(&naa);
7040			/* FALLTHROUGH */
7041		default:
7042			/* we either found it, or hit an error */
7043			if (!error && guarded) {
7044				/* found cached entry but told not to use it */
7045				error = EEXIST;
7046				vnode_put(NFSTOV(anp));
7047				*anpp = anp = NULL;
7048			}
7049			/* we're done if error or we don't need to open */
7050			if (error || !open)
7051				goto nfsmout;
7052			/* no error and we need to open... */
7053		}
7054	}
7055
7056	if (open) {
7057restart:
7058		error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
7059		if (error) {
7060			nfs_open_owner_rele(noop);
7061			noop = NULL;
7062			goto nfsmout;
7063		}
7064		inuse = 1;
7065
7066		/* grab an open file - possibly provisional/nodeless if cache_lookup() failed */
7067		error = nfs_open_file_find(anp, noop, &newnofp, 0, 0, 1);
7068		if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_LOST)) {
7069			printf("nfs4_named_attr_get: LOST %d %s\n", kauth_cred_getuid(noop->noo_cred), cnp->cn_nameptr);
7070			error = EIO;
7071		}
7072		if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
7073			nfs_mount_state_in_use_end(nmp, 0);
7074			error = nfs4_reopen(newnofp, vfs_context_thread(ctx));
7075			nfs_open_file_destroy(newnofp);
7076			newnofp = NULL;
7077			if (!error)
7078				goto restart;
7079		}
7080		if (!error)
7081			error = nfs_open_file_set_busy(newnofp, vfs_context_thread(ctx));
7082		if (error) {
7083			if (newnofp)
7084				nfs_open_file_destroy(newnofp);
7085			newnofp = NULL;
7086			goto nfsmout;
7087		}
7088		if (anp) {
7089			/*
7090			 * We already have the node.  So we just need to open
7091			 * it - which we may be able to do with a delegation.
7092			 */
7093			open_error = error = nfs4_open(anp, newnofp, accessMode, denyMode, ctx);
7094			if (!error) {
7095				/* open succeeded, so our open file is no longer temporary */
7096				nofp = newnofp;
7097				nofpbusyerror = 0;
7098				newnofp = NULL;
7099				if (nofpp)
7100					*nofpp = nofp;
7101			}
7102			goto nfsmout;
7103		}
7104	}
7105
7106	/*
7107	 * We either don't have the attrdir or we didn't find the attribute
7108	 * in the name cache, so we need to talk to the server.
7109	 *
7110	 * If we don't have the attrdir, we'll need to ask the server for that too.
7111	 * If the caller is requesting that the attribute be created, we need to
7112	 * make sure the attrdir is created.
7113	 * The caller may also request that the first block of an existing attribute
7114	 * be retrieved at the same time.
7115	 */
7116
7117	if (open) {
7118		/* need to mark the open owner busy during the RPC */
7119		if ((error = nfs_open_owner_set_busy(noop, thd)))
7120			goto nfsmout;
7121		noopbusy = 1;
7122	}
7123
7124	/*
7125	 * We'd like to get updated post-open/lookup attributes for the
7126	 * directory and we may also want to prefetch some data via READ.
7127	 * We'd like the READ results to be last so that we can leave the
7128	 * data in the mbufs until the end.
7129	 *
7130	 * At a minimum we're sending: PUTFH, LOOKUP/OPEN, GETATTR, PUTFH, GETATTR
7131	 */
7132	numops = 5;
7133	if (!hadattrdir)
7134		numops += 3;	// also sending: OPENATTR, GETATTR, OPENATTR
7135	if (prefetch)
7136		numops += 4;	// also sending: SAVEFH, RESTOREFH, NVERIFY, READ
7137	nfsm_chain_build_alloc_init(error, &nmreq, 64 * NFSX_UNSIGNED + cnp->cn_namelen);
7138	nfsm_chain_add_compound_header(error, &nmreq, "getnamedattr", numops);
7139	if (hadattrdir) {
7140		numops--;
7141		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
7142		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, adnp->n_fhp, adnp->n_fhsize);
7143	} else {
7144		numops--;
7145		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
7146		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
7147		numops--;
7148		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
7149		nfsm_chain_add_32(error, &nmreq, create ? 1 : 0);
7150		numops--;
7151		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
7152		NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
7153		NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
7154		nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
7155			NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
7156	}
7157	if (open) {
7158		numops--;
7159		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
7160		nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
7161		nfsm_chain_add_32(error, &nmreq, accessMode);
7162		nfsm_chain_add_32(error, &nmreq, denyMode);
7163		nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid);
7164		nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
7165		nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred));
7166		nfsm_chain_add_32(error, &nmreq, create);
7167		if (create) {
7168			nfsm_chain_add_32(error, &nmreq, guarded);
7169			VATTR_INIT(&vattr);
7170			if (truncate)
7171				VATTR_SET(&vattr, va_data_size, 0);
7172			nfsm_chain_add_fattr4(error, &nmreq, &vattr, nmp);
7173		}
7174		nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_NULL);
7175		nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
7176	} else {
7177		numops--;
7178		nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
7179		nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
7180	}
7181	numops--;
7182	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
7183	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
7184	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
7185	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
7186		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
7187	if (prefetch) {
7188		numops--;
7189		nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
7190	}
7191	if (hadattrdir) {
7192		numops--;
7193		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
7194		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, adnp->n_fhp, adnp->n_fhsize);
7195	} else {
7196		numops--;
7197		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
7198		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
7199		numops--;
7200		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
7201		nfsm_chain_add_32(error, &nmreq, 0);
7202	}
7203	numops--;
7204	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
7205	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
7206		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
7207	if (prefetch) {
7208		numops--;
7209		nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
7210		numops--;
7211		nfsm_chain_add_32(error, &nmreq, NFS_OP_NVERIFY);
7212		VATTR_INIT(&vattr);
7213		VATTR_SET(&vattr, va_data_size, 0);
7214		nfsm_chain_add_fattr4(error, &nmreq, &vattr, nmp);
7215		numops--;
7216		nfsm_chain_add_32(error, &nmreq, NFS_OP_READ);
7217		nfsm_chain_add_stateid(error, &nmreq, &stateid);
7218		nfsm_chain_add_64(error, &nmreq, 0);
7219		nfsm_chain_add_32(error, &nmreq, rlen);
7220	}
7221	nfsm_chain_build_done(error, &nmreq);
7222	nfsm_assert(error, (numops == 0), EPROTO);
7223	nfsmout_if(error);
7224	error = nfs_request_async(hadattrdir ? adnp : np, NULL, &nmreq, NFSPROC4_COMPOUND,
7225			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, open ? R_NOINTR: 0, NULL, &req);
7226	if (!error)
7227		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
7228
7229	if (hadattrdir && ((adlockerror = nfs_node_lock(adnp))))
7230		error = adlockerror;
7231	savedxid = xid;
7232	nfsm_chain_skip_tag(error, &nmrep);
7233	nfsm_chain_get_32(error, &nmrep, numops);
7234	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
7235	if (!hadattrdir) {
7236		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
7237		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
7238		nfsmout_if(error);
7239		error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
7240		nfsmout_if(error);
7241		if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE) && fh.fh_len) {
7242			if (!np->n_attrdirfh || (*np->n_attrdirfh != fh.fh_len)) {
7243				/* (re)allocate attrdir fh buffer */
7244				if (np->n_attrdirfh)
7245					FREE(np->n_attrdirfh, M_TEMP);
7246				MALLOC(np->n_attrdirfh, u_char*, fh.fh_len+1, M_TEMP, M_WAITOK);
7247			}
7248			if (np->n_attrdirfh) {
7249				/* remember the attrdir fh in the node */
7250				*np->n_attrdirfh = fh.fh_len;
7251				bcopy(fh.fh_data, np->n_attrdirfh+1, fh.fh_len);
7252				/* create busied node for attrdir */
7253				struct componentname cn;
7254				bzero(&cn, sizeof(cn));
7255				cn.cn_nameptr = __CAST_AWAY_QUALIFIER(_PATH_FORKSPECIFIER, const, char *); /* "/..namedfork/" */
7256				cn.cn_namelen = strlen(_PATH_FORKSPECIFIER);
7257				cn.cn_nameiop = LOOKUP;
7258				// XXX can't set parent correctly (to np) yet
7259				error = nfs_nget(NFSTOMP(np), NULL, &cn, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, 0, &adnp);
7260				if (!error) {
7261					adlockerror = 0;
7262					/* set the node busy */
7263					SET(adnp->n_flag, NBUSY);
7264					adbusyerror = 0;
7265				}
7266				/* if no adnp, oh well... */
7267				error = 0;
7268			}
7269		}
7270		NVATTR_CLEANUP(&nvattr);
7271		fh.fh_len = 0;
7272	}
7273	if (open) {
7274		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
7275		nfs_owner_seqid_increment(noop, NULL, error);
7276		nfsm_chain_get_stateid(error, &nmrep, &newnofp->nof_stateid);
7277		nfsm_chain_check_change_info(error, &nmrep, adnp);
7278		nfsm_chain_get_32(error, &nmrep, rflags);
7279		bmlen = NFS_ATTR_BITMAP_LEN;
7280		nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
7281		nfsm_chain_get_32(error, &nmrep, delegation);
7282		if (!error)
7283			switch (delegation) {
7284			case NFS_OPEN_DELEGATE_NONE:
7285				break;
7286			case NFS_OPEN_DELEGATE_READ:
7287			case NFS_OPEN_DELEGATE_WRITE:
7288				nfsm_chain_get_stateid(error, &nmrep, &dstateid);
7289				nfsm_chain_get_32(error, &nmrep, recall);
7290				if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
7291					nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
7292				/* if we have any trouble accepting the ACE, just invalidate it */
7293				ace_type = ace_flags = ace_mask = len = 0;
7294				nfsm_chain_get_32(error, &nmrep, ace_type);
7295				nfsm_chain_get_32(error, &nmrep, ace_flags);
7296				nfsm_chain_get_32(error, &nmrep, ace_mask);
7297				nfsm_chain_get_32(error, &nmrep, len);
7298				ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
7299				ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
7300				ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
7301				if (!error && (len >= slen)) {
7302					MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
7303					if (s)
7304						slen = len+1;
7305					else
7306						ace.ace_flags = 0;
7307				}
7308				if (s)
7309					nfsm_chain_get_opaque(error, &nmrep, len, s);
7310				else
7311					nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
7312				if (!error && s) {
7313					s[len] = '\0';
7314					if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
7315						ace.ace_flags = 0;
7316				}
7317				if (error || !s)
7318					ace.ace_flags = 0;
7319				if (s && (s != sbuf))
7320					FREE(s, M_TEMP);
7321				break;
7322			default:
7323				error = EBADRPC;
7324				break;
7325			}
7326		/* At this point if we have no error, the object was created/opened. */
7327		open_error = error;
7328	} else {
7329		nfsm_chain_op_check(error, &nmrep, NFS_OP_LOOKUP);
7330	}
7331	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
7332	nfsmout_if(error);
7333	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
7334	nfsmout_if(error);
7335	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE) || !fh.fh_len) {
7336		error = EIO;
7337		goto nfsmout;
7338	}
7339	if (prefetch)
7340		nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
7341	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
7342	if (!hadattrdir)
7343		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
7344	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
7345	nfsmout_if(error);
7346	xid = savedxid;
7347	nfsm_chain_loadattr(error, &nmrep, adnp, nmp->nm_vers, &xid);
7348	nfsmout_if(error);
7349
7350	if (open) {
7351		if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
7352			newnofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
7353		if (rflags & NFS_OPEN_RESULT_CONFIRM) {
7354			if (adnp) {
7355				nfs_node_unlock(adnp);
7356				adlockerror = ENOENT;
7357			}
7358			NVATTR_CLEANUP(&nvattr);
7359			error = nfs4_open_confirm_rpc(nmp, adnp ? adnp : np, fh.fh_data, fh.fh_len, noop, &newnofp->nof_stateid, thd, cred, &nvattr, &xid);
7360			nfsmout_if(error);
7361			savedxid = xid;
7362			if ((adlockerror = nfs_node_lock(adnp)))
7363				error = adlockerror;
7364		}
7365	}
7366
7367nfsmout:
7368	if (open && adnp && !adlockerror) {
7369		if (!open_error && (adnp->n_flag & NNEGNCENTRIES)) {
7370			adnp->n_flag &= ~NNEGNCENTRIES;
7371			cache_purge_negatives(NFSTOV(adnp));
7372		}
7373		adnp->n_flag |= NMODIFIED;
7374		nfs_node_unlock(adnp);
7375		adlockerror = ENOENT;
7376		nfs_getattr(adnp, NULL, ctx, NGA_CACHED);
7377	}
7378	if (adnp && !adlockerror && (error == ENOENT) &&
7379	    (cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != CREATE) && negnamecache) {
7380		/* add a negative entry in the name cache */
7381		cache_enter(NFSTOV(adnp), NULL, cnp);
7382		adnp->n_flag |= NNEGNCENTRIES;
7383	}
7384	if (adnp && !adlockerror) {
7385		nfs_node_unlock(adnp);
7386		adlockerror = ENOENT;
7387	}
7388	if (!error && !anp && fh.fh_len) {
7389		/* create the vnode with the filehandle and attributes */
7390		xid = savedxid;
7391		error = nfs_nget(NFSTOMP(np), adnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &anp);
7392		if (!error) {
7393			*anpp = anp;
7394			nfs_node_unlock(anp);
7395		}
7396		if (!error && open) {
7397			nfs_open_file_add_open(newnofp, accessMode, denyMode, 0);
7398			/* After we have a node, add our open file struct to the node */
7399			nofp = newnofp;
7400			error = nfs_open_file_find_internal(anp, noop, &nofp, 0, 0, 0);
7401			if (error) {
7402				/* This shouldn't happen, because we passed in a new nofp to use. */
7403				printf("nfs_open_file_find_internal failed! %d\n", error);
7404				nofp = NULL;
7405			} else if (nofp != newnofp) {
7406				/*
7407				 * Hmm... an open file struct already exists.
7408				 * Mark the existing one busy and merge our open into it.
7409				 * Then destroy the one we created.
7410				 * Note: there's no chance of an open confict because the
7411				 * open has already been granted.
7412				 */
7413				nofpbusyerror = nfs_open_file_set_busy(nofp, NULL);
7414				nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
7415				nofp->nof_stateid = newnofp->nof_stateid;
7416				if (newnofp->nof_flags & NFS_OPEN_FILE_POSIXLOCK)
7417					nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
7418				nfs_open_file_clear_busy(newnofp);
7419				nfs_open_file_destroy(newnofp);
7420				newnofp = NULL;
7421			}
7422			if (!error) {
7423				newnofp = NULL;
7424				nofpbusyerror = 0;
7425				/* mark the node as holding a create-initiated open */
7426				nofp->nof_flags |= NFS_OPEN_FILE_CREATE;
7427				nofp->nof_creator = current_thread();
7428				if (nofpp)
7429					*nofpp = nofp;
7430			}
7431		}
7432	}
7433	NVATTR_CLEANUP(&nvattr);
7434	if (open && ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE))) {
7435		if (!error && anp && !recall) {
7436			/* stuff the delegation state in the node */
7437			lck_mtx_lock(&anp->n_openlock);
7438			anp->n_openflags &= ~N_DELEG_MASK;
7439			anp->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
7440			anp->n_dstateid = dstateid;
7441			anp->n_dace = ace;
7442			if (anp->n_dlink.tqe_next == NFSNOLIST) {
7443				lck_mtx_lock(&nmp->nm_lock);
7444				if (anp->n_dlink.tqe_next == NFSNOLIST)
7445					TAILQ_INSERT_TAIL(&nmp->nm_delegations, anp, n_dlink);
7446				lck_mtx_unlock(&nmp->nm_lock);
7447			}
7448			lck_mtx_unlock(&anp->n_openlock);
7449		} else {
7450			/* give the delegation back */
7451			if (anp) {
7452				if (NFS_CMPFH(anp, fh.fh_data, fh.fh_len)) {
7453					/* update delegation state and return it */
7454					lck_mtx_lock(&anp->n_openlock);
7455					anp->n_openflags &= ~N_DELEG_MASK;
7456					anp->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
7457					anp->n_dstateid = dstateid;
7458					anp->n_dace = ace;
7459					if (anp->n_dlink.tqe_next == NFSNOLIST) {
7460						lck_mtx_lock(&nmp->nm_lock);
7461						if (anp->n_dlink.tqe_next == NFSNOLIST)
7462							TAILQ_INSERT_TAIL(&nmp->nm_delegations, anp, n_dlink);
7463						lck_mtx_unlock(&nmp->nm_lock);
7464					}
7465					lck_mtx_unlock(&anp->n_openlock);
7466					/* don't need to send a separate delegreturn for fh */
7467					fh.fh_len = 0;
7468				}
7469				/* return anp's current delegation */
7470				nfs4_delegation_return(anp, 0, thd, cred);
7471			}
7472			if (fh.fh_len) /* return fh's delegation if it wasn't for anp */
7473				nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, 0, thd, cred);
7474		}
7475	}
7476	if (open) {
7477		if (newnofp) {
7478			/* need to cleanup our temporary nofp */
7479			nfs_open_file_clear_busy(newnofp);
7480			nfs_open_file_destroy(newnofp);
7481			newnofp = NULL;
7482		} else if (nofp && !nofpbusyerror) {
7483			nfs_open_file_clear_busy(nofp);
7484			nofpbusyerror = ENOENT;
7485		}
7486		if (inuse && nfs_mount_state_in_use_end(nmp, error)) {
7487			inuse = 0;
7488			nofp = newnofp = NULL;
7489			rflags = delegation = recall = eof = rlen = retlen = 0;
7490			ace.ace_flags = 0;
7491			s = sbuf;
7492			slen = sizeof(sbuf);
7493			nfsm_chain_cleanup(&nmreq);
7494			nfsm_chain_cleanup(&nmrep);
7495			if (anp) {
7496				vnode_put(NFSTOV(anp));
7497				*anpp = anp = NULL;
7498			}
7499			hadattrdir = (adnp != NULL);
7500			if (noopbusy) {
7501				nfs_open_owner_clear_busy(noop);
7502				noopbusy = 0;
7503			}
7504			goto restart;
7505		}
7506		if (noop) {
7507			if (noopbusy) {
7508				nfs_open_owner_clear_busy(noop);
7509				noopbusy = 0;
7510			}
7511			nfs_open_owner_rele(noop);
7512		}
7513	}
7514	if (!error && prefetch && nmrep.nmc_mhead) {
7515		nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
7516		nfsm_chain_op_check(error, &nmrep, NFS_OP_NVERIFY);
7517		nfsm_chain_op_check(error, &nmrep, NFS_OP_READ);
7518		nfsm_chain_get_32(error, &nmrep, eof);
7519		nfsm_chain_get_32(error, &nmrep, retlen);
7520		if (!error && anp) {
7521			/*
7522			 * There can be one problem with doing the prefetch.
7523			 * Because we don't have the node before we start the RPC, we
7524			 * can't have the buffer busy while the READ is performed.
7525			 * So there is a chance that other I/O occured on the same
7526			 * range of data while we were performing this RPC.  If that
7527			 * happens, then it's possible the data we have in the READ
7528			 * response is no longer up to date.
7529			 * Once we have the node and the buffer, we need to make sure
7530			 * that there's no chance we could be putting stale data in
7531			 * the buffer.
7532			 * So, we check if the range read is dirty or if any I/O may
7533			 * have occured on it while we were performing our RPC.
7534			 */
7535			struct nfsbuf *bp = NULL;
7536			int lastpg;
7537			uint32_t pagemask;
7538
7539			retlen = MIN(retlen, rlen);
7540
7541			/* check if node needs size update or invalidation */
7542			if (ISSET(anp->n_flag, NUPDATESIZE))
7543				nfs_data_update_size(anp, 0);
7544			if (!(error = nfs_node_lock(anp))) {
7545				if (anp->n_flag & NNEEDINVALIDATE) {
7546					anp->n_flag &= ~NNEEDINVALIDATE;
7547					nfs_node_unlock(anp);
7548					error = nfs_vinvalbuf(NFSTOV(anp), V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
7549					if (!error) /* lets play it safe and just drop the data */
7550						error = EIO;
7551				} else {
7552					nfs_node_unlock(anp);
7553				}
7554			}
7555
7556			/* calculate page mask for the range of data read */
7557			lastpg = (trunc_page_32(retlen) - 1) / PAGE_SIZE;
7558			pagemask = ((1 << (lastpg + 1)) - 1);
7559
7560			if (!error)
7561				error = nfs_buf_get(anp, 0, nmp->nm_biosize, thd, NBLK_READ|NBLK_NOWAIT, &bp);
7562			/* don't save the data if dirty or potential I/O conflict */
7563			if (!error && bp && !bp->nb_dirtyoff && !(bp->nb_dirty & pagemask) &&
7564			    timevalcmp(&anp->n_lastio, &now, <)) {
7565				OSAddAtomic64(1, &nfsstats.read_bios);
7566				CLR(bp->nb_flags, (NB_DONE|NB_ASYNC));
7567				SET(bp->nb_flags, NB_READ);
7568				NFS_BUF_MAP(bp);
7569				nfsm_chain_get_opaque(error, &nmrep, retlen, bp->nb_data);
7570				if (error) {
7571					bp->nb_error = error;
7572					SET(bp->nb_flags, NB_ERROR);
7573				} else {
7574					bp->nb_offio = 0;
7575					bp->nb_endio = rlen;
7576					if ((retlen > 0) && (bp->nb_endio < (int)retlen))
7577						bp->nb_endio = retlen;
7578					if (eof || (retlen == 0)) {
7579						/* zero out the remaining data (up to EOF) */
7580						off_t rpcrem, eofrem, rem;
7581						rpcrem = (rlen - retlen);
7582						eofrem = anp->n_size - (NBOFF(bp) + retlen);
7583						rem = (rpcrem < eofrem) ? rpcrem : eofrem;
7584						if (rem > 0)
7585							bzero(bp->nb_data + retlen, rem);
7586					} else if ((retlen < rlen) && !ISSET(bp->nb_flags, NB_ERROR)) {
7587						/* ugh... short read ... just invalidate for now... */
7588						SET(bp->nb_flags, NB_INVAL);
7589					}
7590				}
7591				nfs_buf_read_finish(bp);
7592				microuptime(&anp->n_lastio);
7593			}
7594			if (bp)
7595				nfs_buf_release(bp, 1);
7596		}
7597		error = 0; /* ignore any transient error in processing the prefetch */
7598	}
7599	if (adnp && !adbusyerror) {
7600		nfs_node_clear_busy(adnp);
7601		adbusyerror = ENOENT;
7602	}
7603	if (!busyerror) {
7604		nfs_node_clear_busy(np);
7605		busyerror = ENOENT;
7606	}
7607	if (adnp)
7608		vnode_put(NFSTOV(adnp));
7609	if (error && *anpp) {
7610		vnode_put(NFSTOV(*anpp));
7611		*anpp = NULL;
7612	}
7613	nfsm_chain_cleanup(&nmreq);
7614	nfsm_chain_cleanup(&nmrep);
7615	return (error);
7616}
7617
7618/*
7619 * Remove a named attribute.
7620 */
7621int
7622nfs4_named_attr_remove(nfsnode_t np, nfsnode_t anp, const char *name, vfs_context_t ctx)
7623{
7624	nfsnode_t adnp = NULL;
7625	struct nfsmount *nmp;
7626	struct componentname cn;
7627	struct vnop_remove_args vra;
7628	int error, putanp = 0;
7629
7630	nmp = NFSTONMP(np);
7631	if (!nmp)
7632		return (ENXIO);
7633
7634	bzero(&cn, sizeof(cn));
7635	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(name, const, char *);
7636	cn.cn_namelen = strlen(name);
7637	cn.cn_nameiop = DELETE;
7638	cn.cn_flags = 0;
7639
7640	if (!anp) {
7641		error = nfs4_named_attr_get(np, &cn, NFS_OPEN_SHARE_ACCESS_NONE,
7642				0, ctx, &anp, NULL);
7643		if ((!error && !anp) || (error == ENOATTR))
7644			error = ENOENT;
7645		if (error) {
7646			if (anp) {
7647				vnode_put(NFSTOV(anp));
7648				anp = NULL;
7649			}
7650			goto out;
7651		}
7652		putanp = 1;
7653	}
7654
7655	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx))))
7656		goto out;
7657	adnp = nfs4_named_attr_dir_get(np, 1, ctx);
7658	nfs_node_clear_busy(np);
7659	if (!adnp) {
7660		error = ENOENT;
7661		goto out;
7662	}
7663
7664	vra.a_desc = &vnop_remove_desc;
7665	vra.a_dvp = NFSTOV(adnp);
7666	vra.a_vp = NFSTOV(anp);
7667	vra.a_cnp = &cn;
7668	vra.a_flags = 0;
7669	vra.a_context = ctx;
7670	error = nfs_vnop_remove(&vra);
7671out:
7672	if (adnp)
7673		vnode_put(NFSTOV(adnp));
7674	if (putanp)
7675		vnode_put(NFSTOV(anp));
7676	return (error);
7677}
7678
7679int
7680nfs4_vnop_getxattr(
7681	struct vnop_getxattr_args /* {
7682		struct vnodeop_desc *a_desc;
7683		vnode_t a_vp;
7684		const char * a_name;
7685		uio_t a_uio;
7686		size_t *a_size;
7687		int a_options;
7688		vfs_context_t a_context;
7689	} */ *ap)
7690{
7691	vfs_context_t ctx = ap->a_context;
7692	struct nfsmount *nmp;
7693	struct nfs_vattr nvattr;
7694	struct componentname cn;
7695	nfsnode_t anp;
7696	int error = 0, isrsrcfork;
7697
7698	nmp = VTONMP(ap->a_vp);
7699	if (!nmp)
7700		return (ENXIO);
7701
7702	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
7703		return (ENOTSUP);
7704	error = nfs_getattr(VTONFS(ap->a_vp), &nvattr, ctx, NGA_CACHED);
7705	if (error)
7706		return (error);
7707	if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
7708	    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
7709		return (ENOATTR);
7710
7711	bzero(&cn, sizeof(cn));
7712	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(ap->a_name, const, char *);
7713	cn.cn_namelen = strlen(ap->a_name);
7714	cn.cn_nameiop = LOOKUP;
7715	cn.cn_flags = MAKEENTRY;
7716
7717	/* we'll normally try to prefetch data for xattrs... the resource fork is really a stream */
7718	isrsrcfork = (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0);
7719
7720	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_NONE,
7721			!isrsrcfork ? NFS_GET_NAMED_ATTR_PREFETCH : 0, ctx, &anp, NULL);
7722	if ((!error && !anp) || (error == ENOENT))
7723		error = ENOATTR;
7724	if (!error) {
7725		if (ap->a_uio)
7726			error = nfs_bioread(anp, ap->a_uio, 0, ctx);
7727		else
7728			*ap->a_size = anp->n_size;
7729	}
7730	if (anp)
7731		vnode_put(NFSTOV(anp));
7732	return (error);
7733}
7734
7735int
7736nfs4_vnop_setxattr(
7737	struct vnop_setxattr_args /* {
7738		struct vnodeop_desc *a_desc;
7739		vnode_t a_vp;
7740		const char * a_name;
7741		uio_t a_uio;
7742		int a_options;
7743		vfs_context_t a_context;
7744	} */ *ap)
7745{
7746	vfs_context_t ctx = ap->a_context;
7747	int options = ap->a_options;
7748	uio_t uio = ap->a_uio;
7749	const char *name = ap->a_name;
7750	struct nfsmount *nmp;
7751	struct componentname cn;
7752	nfsnode_t anp = NULL;
7753	int error = 0, closeerror = 0, flags, isrsrcfork, isfinderinfo, empty = 0, i;
7754#define FINDERINFOSIZE 32
7755	uint8_t finfo[FINDERINFOSIZE];
7756	uint32_t *finfop;
7757	struct nfs_open_file *nofp = NULL;
7758	char uio_buf [ UIO_SIZEOF(1) ];
7759	uio_t auio;
7760	struct vnop_write_args vwa;
7761
7762	nmp = VTONMP(ap->a_vp);
7763	if (!nmp)
7764		return (ENXIO);
7765
7766	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
7767		return (ENOTSUP);
7768
7769	if ((options & XATTR_CREATE) && (options & XATTR_REPLACE))
7770		return (EINVAL);
7771
7772	/* XXX limitation based on need to back up uio on short write */
7773	if (uio_iovcnt(uio) > 1) {
7774		printf("nfs4_vnop_setxattr: iovcnt > 1\n");
7775		return (EINVAL);
7776	}
7777
7778	bzero(&cn, sizeof(cn));
7779	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(name, const, char *);
7780	cn.cn_namelen = strlen(name);
7781	cn.cn_nameiop = CREATE;
7782	cn.cn_flags = MAKEENTRY;
7783
7784	isfinderinfo = (bcmp(name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0);
7785	isrsrcfork = isfinderinfo ? 0 : (bcmp(name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0);
7786	if (!isrsrcfork)
7787		uio_setoffset(uio, 0);
7788	if (isfinderinfo) {
7789		if (uio_resid(uio) != sizeof(finfo))
7790			return (ERANGE);
7791		error = uiomove((char*)&finfo, sizeof(finfo), uio);
7792		if (error)
7793			return (error);
7794		/* setting a FinderInfo of all zeroes means remove the FinderInfo */
7795		empty = 1;
7796		for (i=0, finfop=(uint32_t*)&finfo; i < (int)(sizeof(finfo)/sizeof(uint32_t)); i++)
7797			if (finfop[i]) {
7798				empty = 0;
7799				break;
7800			}
7801		if (empty && !(options & (XATTR_CREATE|XATTR_REPLACE))) {
7802			error = nfs4_named_attr_remove(VTONFS(ap->a_vp), anp, name, ctx);
7803			if (error == ENOENT)
7804				error = 0;
7805			return (error);
7806		}
7807		/* first, let's see if we get a create/replace error */
7808	}
7809
7810	/*
7811	 * create/open the xattr
7812	 *
7813	 * We need to make sure not to create it if XATTR_REPLACE.
7814	 * For all xattrs except the resource fork, we also want to
7815	 * truncate the xattr to remove any current data.  We'll do
7816	 * that by setting the size to 0 on create/open.
7817	 */
7818	flags = 0;
7819	if (!(options & XATTR_REPLACE))
7820		flags |= NFS_GET_NAMED_ATTR_CREATE;
7821	if (options & XATTR_CREATE)
7822		flags |= NFS_GET_NAMED_ATTR_CREATE_GUARDED;
7823	if (!isrsrcfork)
7824		flags |= NFS_GET_NAMED_ATTR_TRUNCATE;
7825
7826	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_BOTH,
7827			flags, ctx, &anp, &nofp);
7828	if (!error && !anp)
7829		error = ENOATTR;
7830	if (error)
7831		goto out;
7832	/* grab the open state from the get/create/open */
7833	if (nofp && !(error = nfs_open_file_set_busy(nofp, NULL))) {
7834		nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
7835		nofp->nof_creator = NULL;
7836		nfs_open_file_clear_busy(nofp);
7837	}
7838
7839	/* Setting an empty FinderInfo really means remove it, skip to the close/remove */
7840	if (isfinderinfo && empty)
7841		goto doclose;
7842
7843	/*
7844	 * Write the data out and flush.
7845	 *
7846	 * For FinderInfo, we've already copied the data to finfo, so do I/O from there.
7847	 */
7848	vwa.a_desc = &vnop_write_desc;
7849	vwa.a_vp = NFSTOV(anp);
7850	vwa.a_uio = NULL;
7851	vwa.a_ioflag = 0;
7852	vwa.a_context = ctx;
7853	if (isfinderinfo) {
7854		auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE, &uio_buf, sizeof(uio_buf));
7855		uio_addiov(auio, (uintptr_t)&finfo, sizeof(finfo));
7856		vwa.a_uio = auio;
7857	} else if (uio_resid(uio) > 0) {
7858		vwa.a_uio = uio;
7859	}
7860	if (vwa.a_uio) {
7861		error = nfs_vnop_write(&vwa);
7862		if (!error)
7863			error = nfs_flush(anp, MNT_WAIT, vfs_context_thread(ctx), 0);
7864	}
7865doclose:
7866	/* Close the xattr. */
7867	if (nofp) {
7868		int busyerror = nfs_open_file_set_busy(nofp, NULL);
7869		closeerror = nfs_close(anp, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
7870		if (!busyerror)
7871			nfs_open_file_clear_busy(nofp);
7872	}
7873	if (!error && isfinderinfo && empty) { /* Setting an empty FinderInfo really means remove it */
7874		error = nfs4_named_attr_remove(VTONFS(ap->a_vp), anp, name, ctx);
7875		if (error == ENOENT)
7876			error = 0;
7877	}
7878	if (!error)
7879		error = closeerror;
7880out:
7881	if (anp)
7882		vnode_put(NFSTOV(anp));
7883	if (error == ENOENT)
7884		error = ENOATTR;
7885	return (error);
7886}
7887
7888int
7889nfs4_vnop_removexattr(
7890	struct vnop_removexattr_args /* {
7891		struct vnodeop_desc *a_desc;
7892		vnode_t a_vp;
7893		const char * a_name;
7894		int a_options;
7895		vfs_context_t a_context;
7896	} */ *ap)
7897{
7898	struct nfsmount *nmp = VTONMP(ap->a_vp);
7899	int error;
7900
7901	if (!nmp)
7902		return (ENXIO);
7903	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
7904		return (ENOTSUP);
7905
7906	error = nfs4_named_attr_remove(VTONFS(ap->a_vp), NULL, ap->a_name, ap->a_context);
7907	if (error == ENOENT)
7908		error = ENOATTR;
7909	return (error);
7910}
7911
7912int
7913nfs4_vnop_listxattr(
7914	struct vnop_listxattr_args /* {
7915		struct vnodeop_desc *a_desc;
7916		vnode_t a_vp;
7917		uio_t a_uio;
7918		size_t *a_size;
7919		int a_options;
7920		vfs_context_t a_context;
7921	} */ *ap)
7922{
7923	vfs_context_t ctx = ap->a_context;
7924	nfsnode_t np = VTONFS(ap->a_vp);
7925	uio_t uio = ap->a_uio;
7926	nfsnode_t adnp = NULL;
7927	struct nfsmount *nmp;
7928	int error, done, i;
7929	struct nfs_vattr nvattr;
7930	uint64_t cookie, nextcookie, lbn = 0;
7931	struct nfsbuf *bp = NULL;
7932	struct nfs_dir_buf_header *ndbhp;
7933	struct direntry *dp;
7934
7935	nmp = VTONMP(ap->a_vp);
7936	if (!nmp)
7937		return (ENXIO);
7938
7939	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
7940		return (ENOTSUP);
7941
7942	error = nfs_getattr(np, &nvattr, ctx, NGA_CACHED);
7943	if (error)
7944		return (error);
7945	if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
7946	    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
7947		return (0);
7948
7949	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx))))
7950		return (error);
7951	adnp = nfs4_named_attr_dir_get(np, 1, ctx);
7952	nfs_node_clear_busy(np);
7953	if (!adnp)
7954		goto out;
7955
7956	if ((error = nfs_node_lock(adnp)))
7957		goto out;
7958
7959	if (adnp->n_flag & NNEEDINVALIDATE) {
7960		adnp->n_flag &= ~NNEEDINVALIDATE;
7961		nfs_invaldir(adnp);
7962		nfs_node_unlock(adnp);
7963		error = nfs_vinvalbuf(NFSTOV(adnp), 0, ctx, 1);
7964		if (!error)
7965			error = nfs_node_lock(adnp);
7966		if (error)
7967			goto out;
7968	}
7969
7970	/*
7971	 * check for need to invalidate when (re)starting at beginning
7972	 */
7973	if (adnp->n_flag & NMODIFIED) {
7974		nfs_invaldir(adnp);
7975		nfs_node_unlock(adnp);
7976		if ((error = nfs_vinvalbuf(NFSTOV(adnp), 0, ctx, 1)))
7977			goto out;
7978	} else {
7979		nfs_node_unlock(adnp);
7980	}
7981	/* nfs_getattr() will check changed and purge caches */
7982	if ((error = nfs_getattr(adnp, &nvattr, ctx, NGA_UNCACHED)))
7983		goto out;
7984
7985	if (uio && (uio_resid(uio) == 0))
7986		goto out;
7987
7988	done = 0;
7989	nextcookie = lbn = 0;
7990
7991	while (!error && !done) {
7992		OSAddAtomic64(1, &nfsstats.biocache_readdirs);
7993		cookie = nextcookie;
7994getbuffer:
7995		error = nfs_buf_get(adnp, lbn, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp);
7996		if (error)
7997			goto out;
7998		ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
7999		if (!ISSET(bp->nb_flags, NB_CACHE) || !ISSET(ndbhp->ndbh_flags, NDB_FULL)) {
8000			if (!ISSET(bp->nb_flags, NB_CACHE)) { /* initialize the buffer */
8001				ndbhp->ndbh_flags = 0;
8002				ndbhp->ndbh_count = 0;
8003				ndbhp->ndbh_entry_end = sizeof(*ndbhp);
8004				ndbhp->ndbh_ncgen = adnp->n_ncgen;
8005			}
8006			error = nfs_buf_readdir(bp, ctx);
8007			if (error == NFSERR_DIRBUFDROPPED)
8008				goto getbuffer;
8009			if (error)
8010				nfs_buf_release(bp, 1);
8011			if (error && (error != ENXIO) && (error != ETIMEDOUT) && (error != EINTR) && (error != ERESTART)) {
8012				if (!nfs_node_lock(adnp)) {
8013					nfs_invaldir(adnp);
8014					nfs_node_unlock(adnp);
8015				}
8016				nfs_vinvalbuf(NFSTOV(adnp), 0, ctx, 1);
8017				if (error == NFSERR_BAD_COOKIE)
8018					error = ENOENT;
8019			}
8020			if (error)
8021				goto out;
8022		}
8023
8024		/* go through all the entries copying/counting */
8025		dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
8026		for (i=0; i < ndbhp->ndbh_count; i++) {
8027			if (!xattr_protected(dp->d_name)) {
8028				if (uio == NULL) {
8029					*ap->a_size += dp->d_namlen + 1;
8030				} else if (uio_resid(uio) < (dp->d_namlen + 1)) {
8031					error = ERANGE;
8032				} else {
8033					error = uiomove(dp->d_name, dp->d_namlen+1, uio);
8034					if (error && (error != EFAULT))
8035						error = ERANGE;
8036				}
8037			}
8038			nextcookie = dp->d_seekoff;
8039			dp = NFS_DIRENTRY_NEXT(dp);
8040		}
8041
8042		if (i == ndbhp->ndbh_count) {
8043			/* hit end of buffer, move to next buffer */
8044			lbn = nextcookie;
8045			/* if we also hit EOF, we're done */
8046			if (ISSET(ndbhp->ndbh_flags, NDB_EOF))
8047				done = 1;
8048		}
8049		if (!error && !done && (nextcookie == cookie)) {
8050			printf("nfs readdir cookie didn't change 0x%llx, %d/%d\n", cookie, i, ndbhp->ndbh_count);
8051			error = EIO;
8052		}
8053		nfs_buf_release(bp, 1);
8054	}
8055out:
8056	if (adnp)
8057		vnode_put(NFSTOV(adnp));
8058	return (error);
8059}
8060
8061#if NAMEDSTREAMS
8062int
8063nfs4_vnop_getnamedstream(
8064	struct vnop_getnamedstream_args /* {
8065		struct vnodeop_desc *a_desc;
8066		vnode_t a_vp;
8067		vnode_t *a_svpp;
8068		const char *a_name;
8069		enum nsoperation a_operation;
8070		int a_flags;
8071		vfs_context_t a_context;
8072	} */ *ap)
8073{
8074	vfs_context_t ctx = ap->a_context;
8075	struct nfsmount *nmp;
8076	struct nfs_vattr nvattr;
8077	struct componentname cn;
8078	nfsnode_t anp;
8079	int error = 0;
8080
8081	nmp = VTONMP(ap->a_vp);
8082	if (!nmp)
8083		return (ENXIO);
8084
8085	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
8086		return (ENOTSUP);
8087	error = nfs_getattr(VTONFS(ap->a_vp), &nvattr, ctx, NGA_CACHED);
8088	if (error)
8089		return (error);
8090	if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
8091	    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
8092		return (ENOATTR);
8093
8094	bzero(&cn, sizeof(cn));
8095	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(ap->a_name, const, char *);
8096	cn.cn_namelen = strlen(ap->a_name);
8097	cn.cn_nameiop = LOOKUP;
8098	cn.cn_flags = MAKEENTRY;
8099
8100	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_NONE,
8101			0, ctx, &anp, NULL);
8102	if ((!error && !anp) || (error == ENOENT))
8103		error = ENOATTR;
8104	if (!error && anp)
8105		*ap->a_svpp = NFSTOV(anp);
8106	else if (anp)
8107		vnode_put(NFSTOV(anp));
8108	return (error);
8109}
8110
8111int
8112nfs4_vnop_makenamedstream(
8113	struct vnop_makenamedstream_args /* {
8114		struct vnodeop_desc *a_desc;
8115		vnode_t *a_svpp;
8116		vnode_t a_vp;
8117		const char *a_name;
8118		int a_flags;
8119		vfs_context_t a_context;
8120	} */ *ap)
8121{
8122	vfs_context_t ctx = ap->a_context;
8123	struct nfsmount *nmp;
8124	struct componentname cn;
8125	nfsnode_t anp;
8126	int error = 0;
8127
8128	nmp = VTONMP(ap->a_vp);
8129	if (!nmp)
8130		return (ENXIO);
8131
8132	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
8133		return (ENOTSUP);
8134
8135	bzero(&cn, sizeof(cn));
8136	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(ap->a_name, const, char *);
8137	cn.cn_namelen = strlen(ap->a_name);
8138	cn.cn_nameiop = CREATE;
8139	cn.cn_flags = MAKEENTRY;
8140
8141	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_BOTH,
8142			NFS_GET_NAMED_ATTR_CREATE, ctx, &anp, NULL);
8143	if ((!error && !anp) || (error == ENOENT))
8144		error = ENOATTR;
8145	if (!error && anp)
8146		*ap->a_svpp = NFSTOV(anp);
8147	else if (anp)
8148		vnode_put(NFSTOV(anp));
8149	return (error);
8150}
8151
8152int
8153nfs4_vnop_removenamedstream(
8154	struct vnop_removenamedstream_args /* {
8155		struct vnodeop_desc *a_desc;
8156		vnode_t a_vp;
8157		vnode_t a_svp;
8158		const char *a_name;
8159		int a_flags;
8160		vfs_context_t a_context;
8161	} */ *ap)
8162{
8163	struct nfsmount *nmp = VTONMP(ap->a_vp);
8164	nfsnode_t np = ap->a_vp ? VTONFS(ap->a_vp) : NULL;
8165	nfsnode_t anp = ap->a_svp ? VTONFS(ap->a_svp) : NULL;
8166
8167	if (!nmp)
8168		return (ENXIO);
8169
8170	/*
8171	 * Given that a_svp is a named stream, checking for
8172	 * named attribute support is kinda pointless.
8173	 */
8174	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
8175		return (ENOTSUP);
8176
8177	return (nfs4_named_attr_remove(np, anp, ap->a_name, ap->a_context));
8178}
8179
8180#endif
8181