1/*
2 * Copyright (c) 2002-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*-
29 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 *    notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 *    notice, this list of conditions and the following disclaimer in the
38 *    documentation and/or other materials provided with the distribution.
39 * 3. Berkeley Software Design Inc's name may not be used to endorse or
40 *    promote products derived from this software without specific prior
41 *    written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 *      from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
56 */
57
58#include <sys/cdefs.h>
59#include <sys/param.h>
60#include <sys/systm.h>
61#include <sys/fcntl.h>
62#include <sys/kernel.h>		/* for hz */
63#include <sys/file_internal.h>
64#include <sys/malloc.h>
65#include <sys/lockf.h>		/* for hz */ /* Must come after sys/malloc.h */
66#include <sys/kpi_mbuf.h>
67#include <sys/mount_internal.h>
68#include <sys/proc_internal.h>	/* for p_start */
69#include <sys/kauth.h>
70#include <sys/resourcevar.h>
71#include <sys/socket.h>
72#include <sys/unistd.h>
73#include <sys/user.h>
74#include <sys/vnode_internal.h>
75
76#include <kern/thread.h>
77#include <kern/host.h>
78
79#include <machine/limits.h>
80
81#include <net/if.h>
82
83#include <nfs/rpcv2.h>
84#include <nfs/nfsproto.h>
85#include <nfs/nfs.h>
86#include <nfs/nfs_gss.h>
87#include <nfs/nfsmount.h>
88#include <nfs/nfsnode.h>
89#include <nfs/nfs_lock.h>
90
91#include <mach/host_priv.h>
92#include <mach/mig_errors.h>
93#include <mach/host_special_ports.h>
94#include <lockd/lockd_mach.h>
95
96extern void ipc_port_release_send(ipc_port_t);
97
98/*
99 * pending lock request messages are kept in this queue which is
100 * kept sorted by transaction ID (xid).
101 */
102static uint64_t nfs_lockxid = 0;
103static LOCKD_MSG_QUEUE nfs_pendlockq;
104
105/* list of mounts that are (potentially) making lockd requests */
106TAILQ_HEAD(nfs_lockd_mount_list,nfsmount) nfs_lockd_mount_list;
107
108static lck_grp_t *nfs_lock_lck_grp;
109static lck_mtx_t *nfs_lock_mutex;
110
111void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
112void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
113int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *);
114LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *);
115LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
116uint64_t nfs_lockxid_get(void);
117int nfs_lockd_send_request(LOCKD_MSG *, int);
118
119/*
120 * initialize global nfs lock state
121 */
122void
123nfs_lockinit(void)
124{
125	TAILQ_INIT(&nfs_pendlockq);
126	TAILQ_INIT(&nfs_lockd_mount_list);
127
128	nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
129	nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
130}
131
132/*
133 * Register a mount as (potentially) making lockd requests.
134 */
135void
136nfs_lockd_mount_register(struct nfsmount *nmp)
137{
138	lck_mtx_lock(nfs_lock_mutex);
139	TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
140	nfs_lockd_mounts++;
141	lck_mtx_unlock(nfs_lock_mutex);
142}
143
144/*
145 * Unregister a mount as (potentially) making lockd requests.
146 *
147 * When the lockd mount count drops to zero, then send a shutdown request to
148 * lockd if we've sent any requests to it.
149 */
150void
151nfs_lockd_mount_unregister(struct nfsmount *nmp)
152{
153	int send_shutdown;
154	mach_port_t lockd_port = IPC_PORT_NULL;
155	kern_return_t kr;
156
157	lck_mtx_lock(nfs_lock_mutex);
158	if (nmp->nm_ldlink.tqe_next == NFSNOLIST) {
159		lck_mtx_unlock(nfs_lock_mutex);
160		return;
161	}
162
163	TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
164	nmp->nm_ldlink.tqe_next = NFSNOLIST;
165
166	nfs_lockd_mounts--;
167
168	/* send a shutdown request if there are no more lockd mounts */
169	send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
170	if (send_shutdown)
171		nfs_lockd_request_sent = 0;
172
173	lck_mtx_unlock(nfs_lock_mutex);
174
175	if (!send_shutdown)
176		return;
177
178	/*
179	 * Let lockd know that it is no longer needed for any NFS mounts
180	 */
181	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
182	if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) {
183		printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
184			kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
185			(lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
186		return;
187	}
188
189	kr = lockd_shutdown(lockd_port);
190	if (kr != KERN_SUCCESS)
191		printf("nfs_lockd_mount_change: shutdown %d\n", kr);
192
193	ipc_port_release_send(lockd_port);
194}
195
196/*
197 * insert a lock request message into the pending queue
198 * (nfs_lock_mutex must be held)
199 */
200void
201nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
202{
203	LOCKD_MSG_REQUEST *mr;
204
205	mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
206	if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
207		/* fast path: empty queue or new largest xid */
208		TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
209		return;
210	}
211	/* slow path: need to walk list to find insertion point */
212	while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
213		mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
214	}
215	if (mr) {
216		TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
217	} else {
218		TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
219	}
220}
221
222/*
223 * remove a lock request message from the pending queue
224 * (nfs_lock_mutex must be held)
225 */
226void
227nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
228{
229	TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
230}
231
232/*
233 * find a pending lock request message by xid
234 *
235 * We search from the head of the list assuming that the message we're
236 * looking for is for an older request (because we have an answer to it).
237 * This assumes that lock request will be answered primarily in FIFO order.
238 * However, this may not be the case if there are blocked requests.  We may
239 * want to move blocked requests to a separate queue (but that'll complicate
240 * duplicate xid checking).
241 *
242 * (nfs_lock_mutex must be held)
243 */
244LOCKD_MSG_REQUEST *
245nfs_lockdmsg_find_by_xid(uint64_t lockxid)
246{
247	LOCKD_MSG_REQUEST *mr;
248
249	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
250		if (mr->lmr_msg.lm_xid == lockxid)
251			return mr;
252		if (mr->lmr_msg.lm_xid > lockxid)
253			return NULL;
254	}
255	return mr;
256}
257
258/*
259 * Because we can't depend on nlm_granted messages containing the same
260 * cookie we sent with the original lock request, we need code to test
261 * if an nlm_granted answer matches the lock request.  We also need code
262 * that can find a lockd message based solely on the nlm_granted answer.
263 */
264
265/*
266 * compare lockd message to answer
267 *
268 * returns 0 on equality and 1 if different
269 */
270int
271nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
272{
273	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
274		return 1;
275	if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
276		return 1;
277	if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
278		return 1;
279	if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
280		return 1;
281	if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
282		return 1;
283	if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
284		return 1;
285	return 0;
286}
287
288/*
289 * find a pending lock request message based on the lock info provided
290 * in the lockd_ans/nlm_granted data.  We need this because we can't
291 * depend on nlm_granted messages containing the same cookie we sent
292 * with the original lock request.
293 *
294 * We search from the head of the list assuming that the message we're
295 * looking for is for an older request (because we have an answer to it).
296 * This assumes that lock request will be answered primarily in FIFO order.
297 * However, this may not be the case if there are blocked requests.  We may
298 * want to move blocked requests to a separate queue (but that'll complicate
299 * duplicate xid checking).
300 *
301 * (nfs_lock_mutex must be held)
302 */
303LOCKD_MSG_REQUEST *
304nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
305{
306	LOCKD_MSG_REQUEST *mr;
307
308	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
309		return NULL;
310	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
311		if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
312			break;
313	}
314	return mr;
315}
316
317/*
318 * return the next unique lock request transaction ID
319 * (nfs_lock_mutex must be held)
320 */
321uint64_t
322nfs_lockxid_get(void)
323{
324	LOCKD_MSG_REQUEST *mr;
325
326	/* derive initial lock xid from system time */
327	if (!nfs_lockxid) {
328		/*
329		 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
330		 * due to a broken clock) because we immediately increment it
331		 * and we guarantee to never use xid 0.  So, nfs_lockxid should only
332		 * ever be 0 the first time this function is called.
333		 */
334		struct timeval tv;
335		microtime(&tv);
336		nfs_lockxid = (uint64_t)tv.tv_sec << 12;
337	}
338
339	/* make sure we get a unique xid */
340	do {
341		/* Skip zero xid if it should ever happen.  */
342		if (++nfs_lockxid == 0)
343			nfs_lockxid++;
344		if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
345		     (mr->lmr_msg.lm_xid < nfs_lockxid)) {
346			/* fast path: empty queue or new largest xid */
347			break;
348		}
349		/* check if xid is already in use */
350	} while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
351
352	return nfs_lockxid;
353}
354
355#define MACH_MAX_TRIES 3
356
357int
358nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
359{
360	kern_return_t kr;
361	int retries = 0;
362	mach_port_t lockd_port = IPC_PORT_NULL;
363
364	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
365	if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port))
366		return (ENOTSUP);
367
368	do {
369		/* In the kernel all mach messaging is interruptable */
370		do {
371			kr = lockd_request(
372				lockd_port,
373				msg->lm_version,
374				msg->lm_flags,
375				msg->lm_xid,
376				msg->lm_fl.l_start,
377				msg->lm_fl.l_len,
378				msg->lm_fl.l_pid,
379				msg->lm_fl.l_type,
380				msg->lm_fl.l_whence,
381				(uint32_t *)&msg->lm_addr,
382				(uint32_t *)&msg->lm_cred,
383				msg->lm_fh_len,
384				msg->lm_fh);
385			if (kr != KERN_SUCCESS)
386				printf("lockd_request received %d!\n", kr);
387		} while (!interruptable && kr == MACH_SEND_INTERRUPTED);
388	} while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
389
390	ipc_port_release_send(lockd_port);
391	switch (kr) {
392	case MACH_SEND_INTERRUPTED:
393		return (EINTR);
394	default:
395		/*
396		 * Other MACH or MIG errors we will retry. Eventually
397		 * we will call nfs_down and allow the user to disable
398		 * locking.
399		 */
400		return (EAGAIN);
401	}
402	return (kr);
403}
404
405
406/*
407 * NFS advisory byte-level locks (client)
408 */
409int
410nfs3_lockd_request(
411	nfsnode_t np,
412	int type,
413	LOCKD_MSG_REQUEST *msgreq,
414	int flags,
415	thread_t thd)
416{
417	LOCKD_MSG *msg = &msgreq->lmr_msg;
418	int error, error2;
419	int interruptable, slpflag;
420	struct nfsmount *nmp;
421	struct timeval now;
422	int timeo, starttime, endtime, lastmsg, wentdown = 0;
423	struct timespec ts;
424	struct sockaddr *saddr;
425
426	nmp = NFSTONMP(np);
427	if (!nmp || !nmp->nm_saddr)
428		return (ENXIO);
429
430	lck_mtx_lock(&nmp->nm_lock);
431	saddr = nmp->nm_saddr;
432	bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
433	if (nmp->nm_vers == NFS_VER3)
434		msg->lm_flags |= LOCKD_MSG_NFSV3;
435
436	if (nmp->nm_sotype != SOCK_DGRAM)
437		msg->lm_flags |= LOCKD_MSG_TCP;
438
439	microuptime(&now);
440	starttime = now.tv_sec;
441	lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
442	interruptable = NMFLAG(nmp, INTR);
443	lck_mtx_unlock(&nmp->nm_lock);
444
445	lck_mtx_lock(nfs_lock_mutex);
446
447	/* allocate unique xid */
448	msg->lm_xid = nfs_lockxid_get();
449	nfs_lockdmsg_enqueue(msgreq);
450
451	timeo = 4;
452
453	for (;;) {
454		nfs_lockd_request_sent = 1;
455
456		/* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
457		lck_mtx_unlock(nfs_lock_mutex);
458		error = nfs_lockd_send_request(msg, interruptable);
459		lck_mtx_lock(nfs_lock_mutex);
460		if (error && error != EAGAIN)
461			break;
462
463		/*
464		 * Always wait for an answer.  Not waiting for unlocks could
465		 * cause a lock to be left if the unlock request gets dropped.
466		 */
467
468		/*
469		 * Retry if it takes too long to get a response.
470		 *
471		 * The timeout numbers were picked out of thin air... they start
472		 * at 4 and double each timeout with a max of 30 seconds.
473		 *
474		 * In order to maintain responsiveness, we pass a small timeout
475		 * to msleep and calculate the timeouts ourselves.  This allows
476		 * us to pick up on mount changes quicker.
477		 */
478wait_for_granted:
479		error = EWOULDBLOCK;
480		slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
481		ts.tv_sec = 2;
482		ts.tv_nsec = 0;
483		microuptime(&now);
484		endtime = now.tv_sec + timeo;
485		while (now.tv_sec < endtime) {
486			error = error2 = 0;
487			if (!msgreq->lmr_answered) {
488				error = msleep(msgreq, nfs_lock_mutex, slpflag | PUSER, "lockd", &ts);
489				slpflag = 0;
490			}
491			if (msgreq->lmr_answered) {
492				/*
493				 * Note: it's possible to have a lock granted at
494				 * essentially the same time that we get interrupted.
495				 * Since the lock may be granted, we can't return an
496				 * error from this request or we might not unlock the
497				 * lock that's been granted.
498				 */
499				nmp = NFSTONMP(np);
500				if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
501				    (nmp->nm_state & NFSSTA_LOCKSWORK)) {
502					/*
503					 * We have evidence that locks work, yet lockd
504					 * returned ENOTSUP.  This is probably because
505					 * it was unable to contact the server's lockd
506					 * to send it the request.
507					 *
508					 * Because we know locks work, we'll consider
509					 * this failure to be a timeout.
510					 */
511					error = EWOULDBLOCK;
512				} else {
513					error = 0;
514				}
515				break;
516			}
517			if (error != EWOULDBLOCK)
518				break;
519			/* check that we still have our mount... */
520			/* ...and that we still support locks */
521			/* ...and that there isn't a recovery pending */
522			nmp = NFSTONMP(np);
523			if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
524				error = error2;
525				if (type == F_UNLCK)
526					printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
527				break;
528			}
529			lck_mtx_lock(&nmp->nm_lock);
530			if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
531				lck_mtx_unlock(&nmp->nm_lock);
532				break;
533			}
534			if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
535				/* recovery pending... return an error that'll get this operation restarted */
536				error = NFSERR_GRACE;
537				lck_mtx_unlock(&nmp->nm_lock);
538				break;
539			}
540			interruptable = NMFLAG(nmp, INTR);
541			lck_mtx_unlock(&nmp->nm_lock);
542			microuptime(&now);
543		}
544		if (error) {
545			/* check that we still have our mount... */
546			nmp = NFSTONMP(np);
547			if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
548				error = error2;
549				if (error2 != EINTR) {
550					if (type == F_UNLCK)
551						printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
552					break;
553				}
554			}
555			/* ...and that we still support locks */
556			lck_mtx_lock(&nmp->nm_lock);
557			if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
558				if (error == EWOULDBLOCK)
559					error = ENOTSUP;
560				lck_mtx_unlock(&nmp->nm_lock);
561				break;
562			}
563			/* ...and that there isn't a recovery pending */
564			if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
565				/* recovery pending... return to allow recovery to occur */
566				error = NFSERR_DENIED;
567				lck_mtx_unlock(&nmp->nm_lock);
568				break;
569			}
570			interruptable = NMFLAG(nmp, INTR);
571			if ((error != EWOULDBLOCK) ||
572			    ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) ||
573			    ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
574				if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
575					/* give up if this is for recovery and taking too long */
576					error = ETIMEDOUT;
577				} else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
578					/* recovery pending... return an error that'll get this operation restarted */
579					error = NFSERR_GRACE;
580				}
581				lck_mtx_unlock(&nmp->nm_lock);
582				/*
583				 * We're going to bail on this request.
584				 * If we were a blocked lock request, send a cancel.
585				 */
586				if ((msgreq->lmr_errno == EINPROGRESS) &&
587				    !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
588					/* set this request up as a cancel */
589					msg->lm_flags |= LOCKD_MSG_CANCEL;
590					nfs_lockdmsg_dequeue(msgreq);
591					msg->lm_xid = nfs_lockxid_get();
592					nfs_lockdmsg_enqueue(msgreq);
593					msgreq->lmr_saved_errno = error;
594					msgreq->lmr_errno = 0;
595					msgreq->lmr_answered = 0;
596					/* reset timeout */
597					timeo = 2;
598					/* send cancel request */
599					continue;
600				}
601				break;
602			}
603
604			/* warn if we're not getting any response */
605			microuptime(&now);
606			if ((msgreq->lmr_errno != EINPROGRESS) &&
607			    !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
608			    (nmp->nm_tprintf_initial_delay != 0) &&
609			    ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
610				lck_mtx_unlock(&nmp->nm_lock);
611				lastmsg = now.tv_sec;
612				nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding", 0);
613				wentdown = 1;
614			} else
615				lck_mtx_unlock(&nmp->nm_lock);
616
617			if (msgreq->lmr_errno == EINPROGRESS) {
618				/*
619				 * We've got a blocked lock request that we are
620				 * going to retry.  First, we'll want to try to
621				 * send a cancel for the previous request.
622				 *
623				 * Clear errno so if we don't get a response
624				 * to the resend we'll call nfs_down().
625				 * Also reset timeout because we'll expect a
626				 * quick response to the cancel/resend (even if
627				 * it is NLM_BLOCKED).
628				 */
629				msg->lm_flags |= LOCKD_MSG_CANCEL;
630				nfs_lockdmsg_dequeue(msgreq);
631				msg->lm_xid = nfs_lockxid_get();
632				nfs_lockdmsg_enqueue(msgreq);
633				msgreq->lmr_saved_errno = msgreq->lmr_errno;
634				msgreq->lmr_errno = 0;
635				msgreq->lmr_answered = 0;
636				timeo = 2;
637				/* send cancel then resend request */
638				continue;
639			}
640
641			/*
642			 * We timed out, so we will resend the request.
643			 */
644			if (!(flags & R_RECOVER))
645				timeo *= 2;
646			if (timeo > 30)
647				timeo = 30;
648			/* resend request */
649			continue;
650		}
651
652		/* we got a reponse, so the server's lockd is OK */
653		nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
654			wentdown ? "lockd alive again" : NULL);
655		wentdown = 0;
656
657		if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
658			/*
659			 * The lock request was denied because the server lockd is
660			 * still in its grace period.  So, we need to try the
661			 * request again in a little bit.  Return the GRACE error so
662			 * the higher levels can perform the retry.
663			 */
664			msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
665		}
666
667		if (msgreq->lmr_errno == EINPROGRESS) {
668			/* got NLM_BLOCKED response */
669			/* need to wait for NLM_GRANTED */
670			timeo = 30;
671			msgreq->lmr_answered = 0;
672			goto wait_for_granted;
673		}
674
675		if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
676		    (msgreq->lmr_saved_errno == EINPROGRESS)) {
677			/*
678			 * We just got a successful reply to the
679			 * cancel of the previous blocked lock request.
680			 * Now, go ahead and return a DENIED error so the
681			 * higher levels can resend the request.
682			 */
683			msg->lm_flags &= ~LOCKD_MSG_CANCEL;
684			nfs_lockdmsg_dequeue(msgreq);
685			error = NFSERR_DENIED;
686			break;
687		}
688
689		/*
690		 * If the blocked lock request was cancelled.
691		 * Restore the error condition from when we
692		 * originally bailed on the request.
693		 */
694		if (msg->lm_flags & LOCKD_MSG_CANCEL) {
695			msg->lm_flags &= ~LOCKD_MSG_CANCEL;
696			error = msgreq->lmr_saved_errno;
697		} else {
698			error = msgreq->lmr_errno;
699		}
700
701		nmp = NFSTONMP(np);
702		if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
703			/*
704			 * We have NO evidence that locks work and lockd
705			 * returned ENOTSUP.  Let's take this as a hint
706			 * that locks aren't supported and disable them
707			 * for this mount.
708			 */
709			nfs_lockdmsg_dequeue(msgreq);
710			lck_mtx_unlock(nfs_lock_mutex);
711			lck_mtx_lock(&nmp->nm_lock);
712			if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
713				nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
714				nfs_lockd_mount_unregister(nmp);
715			}
716			nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
717			lck_mtx_unlock(&nmp->nm_lock);
718			printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
719				vfs_statfs(nmp->nm_mountp)->f_mntfromname);
720			return (error);
721		}
722		if (!error) {
723			/* record that NFS file locking has worked on this mount */
724			if (nmp) {
725				lck_mtx_lock(&nmp->nm_lock);
726				if (!(nmp->nm_state & NFSSTA_LOCKSWORK))
727					nmp->nm_state |= NFSSTA_LOCKSWORK;
728				lck_mtx_unlock(&nmp->nm_lock);
729			}
730		}
731		break;
732	}
733
734	nfs_lockdmsg_dequeue(msgreq);
735
736	lck_mtx_unlock(nfs_lock_mutex);
737
738	return (error);
739}
740
741/*
742 * Send an NLM LOCK message to the server
743 */
744int
745nfs3_setlock_rpc(
746	nfsnode_t np,
747	struct nfs_open_file *nofp,
748	struct nfs_file_lock *nflp,
749	int reclaim,
750	int flags,
751	thread_t thd,
752	kauth_cred_t cred)
753{
754	struct nfs_lock_owner *nlop = nflp->nfl_owner;
755	struct nfsmount *nmp;
756	int error;
757	LOCKD_MSG_REQUEST msgreq;
758	LOCKD_MSG *msg;
759
760	nmp = NFSTONMP(np);
761	if (nfs_mount_gone(nmp))
762		return (ENXIO);
763
764	if (!nlop->nlo_open_owner) {
765		nfs_open_owner_ref(nofp->nof_owner);
766		nlop->nlo_open_owner = nofp->nof_owner;
767	}
768	if ((error = nfs_lock_owner_set_busy(nlop, thd)))
769		return (error);
770
771	/* set up lock message request structure */
772	bzero(&msgreq, sizeof(msgreq));
773	msg = &msgreq.lmr_msg;
774	msg->lm_version = LOCKD_MSG_VERSION;
775	if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim)
776		msg->lm_flags |= LOCKD_MSG_BLOCK;
777	if (reclaim)
778		msg->lm_flags |= LOCKD_MSG_RECLAIM;
779	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
780	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
781	cru2x(cred, &msg->lm_cred);
782
783	msg->lm_fl.l_whence = SEEK_SET;
784	msg->lm_fl.l_start = nflp->nfl_start;
785	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
786	msg->lm_fl.l_type = nflp->nfl_type;
787	msg->lm_fl.l_pid = nlop->nlo_pid;
788
789	error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
790
791	nfs_lock_owner_clear_busy(nlop);
792	return (error);
793}
794
795/*
796 * Send an NLM UNLOCK message to the server
797 */
798int
799nfs3_unlock_rpc(
800	nfsnode_t np,
801	struct nfs_lock_owner *nlop,
802	__unused int type,
803	uint64_t start,
804	uint64_t end,
805	int flags,
806	thread_t thd,
807	kauth_cred_t cred)
808{
809	struct nfsmount *nmp;
810	LOCKD_MSG_REQUEST msgreq;
811	LOCKD_MSG *msg;
812
813	nmp = NFSTONMP(np);
814	if (!nmp)
815		return (ENXIO);
816
817	/* set up lock message request structure */
818	bzero(&msgreq, sizeof(msgreq));
819	msg = &msgreq.lmr_msg;
820	msg->lm_version = LOCKD_MSG_VERSION;
821	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
822	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
823	cru2x(cred, &msg->lm_cred);
824
825	msg->lm_fl.l_whence = SEEK_SET;
826	msg->lm_fl.l_start = start;
827	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
828	msg->lm_fl.l_type = F_UNLCK;
829	msg->lm_fl.l_pid = nlop->nlo_pid;
830
831	return (nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd));
832}
833
834/*
835 * Send an NLM LOCK TEST message to the server
836 */
837int
838nfs3_getlock_rpc(
839	nfsnode_t np,
840	struct nfs_lock_owner *nlop,
841	struct flock *fl,
842	uint64_t start,
843	uint64_t end,
844	vfs_context_t ctx)
845{
846	struct nfsmount *nmp;
847	int error;
848	LOCKD_MSG_REQUEST msgreq;
849	LOCKD_MSG *msg;
850
851	nmp = NFSTONMP(np);
852	if (nfs_mount_gone(nmp))
853		return (ENXIO);
854
855	/* set up lock message request structure */
856	bzero(&msgreq, sizeof(msgreq));
857	msg = &msgreq.lmr_msg;
858	msg->lm_version = LOCKD_MSG_VERSION;
859	msg->lm_flags |= LOCKD_MSG_TEST;
860	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
861	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
862	cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
863
864	msg->lm_fl.l_whence = SEEK_SET;
865	msg->lm_fl.l_start = start;
866	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
867	msg->lm_fl.l_type = fl->l_type;
868	msg->lm_fl.l_pid = nlop->nlo_pid;
869
870	error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
871
872	if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
873		if (msg->lm_fl.l_type != F_UNLCK) {
874			fl->l_type = msg->lm_fl.l_type;
875			fl->l_pid = msg->lm_fl.l_pid;
876			fl->l_start = msg->lm_fl.l_start;
877			fl->l_len = msg->lm_fl.l_len;
878			fl->l_whence = SEEK_SET;
879		} else
880			fl->l_type = F_UNLCK;
881	}
882
883	return (error);
884}
885
886/*
887 * nfslockdans --
888 *      NFS advisory byte-level locks answer from the lock daemon.
889 */
890int
891nfslockdans(proc_t p, struct lockd_ans *ansp)
892{
893	LOCKD_MSG_REQUEST *msgreq;
894	int error;
895
896	/* Let root make this call. */
897	error = proc_suser(p);
898	if (error)
899		return (error);
900
901	/* the version should match, or we're out of sync */
902	if (ansp->la_version != LOCKD_ANS_VERSION)
903		return (EINVAL);
904
905	lck_mtx_lock(nfs_lock_mutex);
906
907	/* try to find the lockd message by transaction id (cookie) */
908	msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
909	if (ansp->la_flags & LOCKD_ANS_GRANTED) {
910		/*
911		 * We can't depend on the granted message having our cookie,
912		 * so we check the answer against the lockd message found.
913		 * If no message was found or it doesn't match the answer,
914		 * we look for the lockd message by the answer's lock info.
915		 */
916		if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp))
917			msgreq = nfs_lockdmsg_find_by_answer(ansp);
918		/*
919		 * We need to make sure this request isn't being cancelled
920		 * If it is, we don't want to accept the granted message.
921		 */
922		if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
923			msgreq = NULL;
924	}
925	if (!msgreq) {
926		lck_mtx_unlock(nfs_lock_mutex);
927		return (EPIPE);
928	}
929
930	msgreq->lmr_errno = ansp->la_errno;
931	if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
932		if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
933			if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
934				msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
935			else
936				msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
937			msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
938			msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
939			msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
940		} else {
941			msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
942		}
943	}
944	if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE)
945		msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE;
946
947	msgreq->lmr_answered = 1;
948	lck_mtx_unlock(nfs_lock_mutex);
949	wakeup(msgreq);
950
951	return (0);
952}
953
954/*
955 * nfslockdnotify --
956 *      NFS host restart notification from the lock daemon.
957 *
958 * Used to initiate reclaiming of held locks when a server we
959 * have mounted reboots.
960 */
961int
962nfslockdnotify(proc_t p, user_addr_t argp)
963{
964	int error, i, headsize;
965	struct lockd_notify ln;
966	struct nfsmount *nmp;
967	struct sockaddr *saddr;
968
969	/* Let root make this call. */
970	error = proc_suser(p);
971	if (error)
972		return (error);
973
974	headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version;
975	error = copyin(argp, &ln, headsize);
976	if (error)
977		return (error);
978	if (ln.ln_version != LOCKD_NOTIFY_VERSION)
979		return (EINVAL);
980	if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128))
981		return (EINVAL);
982	argp += headsize;
983	saddr = (struct sockaddr *)&ln.ln_addr[0];
984
985	lck_mtx_lock(nfs_lock_mutex);
986
987	for (i=0; i < ln.ln_addrcount; i++) {
988		error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
989		if (error)
990			break;
991		argp += sizeof(ln.ln_addr[0]);
992		/* scan lockd mount list for match to this address */
993		TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
994			/* check if address matches this mount's server address */
995			if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr))
996				continue;
997			/* We have a match!  Mark it as needing recovery. */
998			lck_mtx_lock(&nmp->nm_lock);
999			nfs_need_recover(nmp, 0);
1000			lck_mtx_unlock(&nmp->nm_lock);
1001		}
1002	}
1003
1004	lck_mtx_unlock(nfs_lock_mutex);
1005
1006	return (error);
1007}
1008
1009