1/*
2 * Copyright (c) 2002-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*-
29 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 *    notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 *    notice, this list of conditions and the following disclaimer in the
38 *    documentation and/or other materials provided with the distribution.
39 * 3. Berkeley Software Design Inc's name may not be used to endorse or
40 *    promote products derived from this software without specific prior
41 *    written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 *      from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
56 */
57
58#include <sys/cdefs.h>
59#include <sys/param.h>
60#include <sys/systm.h>
61#include <sys/fcntl.h>
62#include <sys/kernel.h>		/* for hz */
63#include <sys/file_internal.h>
64#include <sys/malloc.h>
65#include <sys/lockf.h>		/* for hz */ /* Must come after sys/malloc.h */
66#include <sys/kpi_mbuf.h>
67#include <sys/mount_internal.h>
68#include <sys/proc_internal.h>	/* for p_start */
69#include <sys/kauth.h>
70#include <sys/resourcevar.h>
71#include <sys/socket.h>
72#include <sys/unistd.h>
73#include <sys/user.h>
74#include <sys/vnode_internal.h>
75
76#include <kern/thread.h>
77#include <kern/host.h>
78
79#include <machine/limits.h>
80
81#include <net/if.h>
82
83#include <nfs/rpcv2.h>
84#include <nfs/nfsproto.h>
85#include <nfs/nfs.h>
86#include <nfs/nfs_gss.h>
87#include <nfs/nfsmount.h>
88#include <nfs/nfsnode.h>
89#include <nfs/nfs_lock.h>
90
91#include <mach/host_priv.h>
92#include <mach/mig_errors.h>
93#include <mach/host_special_ports.h>
94#include <lockd/lockd_mach.h>
95
96extern void ipc_port_release_send(ipc_port_t);
97
98/*
99 * pending lock request messages are kept in this queue which is
100 * kept sorted by transaction ID (xid).
101 */
102static uint64_t nfs_lockxid = 0;
103static LOCKD_MSG_QUEUE nfs_pendlockq;
104
105/* list of mounts that are (potentially) making lockd requests */
106TAILQ_HEAD(nfs_lockd_mount_list,nfsmount) nfs_lockd_mount_list;
107
108static lck_grp_t *nfs_lock_lck_grp;
109static lck_mtx_t *nfs_lock_mutex;
110
111void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
112void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
113int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *);
114LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *);
115LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
116uint64_t nfs_lockxid_get(void);
117int nfs_lockd_send_request(LOCKD_MSG *, int);
118
119/*
120 * initialize global nfs lock state
121 */
122void
123nfs_lockinit(void)
124{
125	TAILQ_INIT(&nfs_pendlockq);
126	TAILQ_INIT(&nfs_lockd_mount_list);
127
128	nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
129	nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
130}
131
132/*
133 * Register a mount as (potentially) making lockd requests.
134 */
135void
136nfs_lockd_mount_register(struct nfsmount *nmp)
137{
138	lck_mtx_lock(nfs_lock_mutex);
139	TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
140	nfs_lockd_mounts++;
141	lck_mtx_unlock(nfs_lock_mutex);
142}
143
144/*
145 * Unregister a mount as (potentially) making lockd requests.
146 *
147 * When the lockd mount count drops to zero, then send a shutdown request to
148 * lockd if we've sent any requests to it.
149 */
150void
151nfs_lockd_mount_unregister(struct nfsmount *nmp)
152{
153	int send_shutdown;
154	mach_port_t lockd_port = IPC_PORT_NULL;
155	kern_return_t kr;
156
157	lck_mtx_lock(nfs_lock_mutex);
158	TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
159	nfs_lockd_mounts--;
160
161	/* send a shutdown request if there are no more lockd mounts */
162	send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
163	if (send_shutdown)
164		nfs_lockd_request_sent = 0;
165
166	lck_mtx_unlock(nfs_lock_mutex);
167
168	if (!send_shutdown)
169		return;
170
171	/*
172	 * Let lockd know that it is no longer needed for any NFS mounts
173	 */
174	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
175	if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) {
176		printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
177			kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
178			(lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
179		return;
180	}
181
182	kr = lockd_shutdown(lockd_port);
183	if (kr != KERN_SUCCESS)
184		printf("nfs_lockd_mount_change: shutdown %d\n", kr);
185
186	ipc_port_release_send(lockd_port);
187}
188
189/*
190 * insert a lock request message into the pending queue
191 * (nfs_lock_mutex must be held)
192 */
193void
194nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
195{
196	LOCKD_MSG_REQUEST *mr;
197
198	mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
199	if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
200		/* fast path: empty queue or new largest xid */
201		TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
202		return;
203	}
204	/* slow path: need to walk list to find insertion point */
205	while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
206		mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
207	}
208	if (mr) {
209		TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
210	} else {
211		TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
212	}
213}
214
215/*
216 * remove a lock request message from the pending queue
217 * (nfs_lock_mutex must be held)
218 */
219void
220nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
221{
222	TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
223}
224
225/*
226 * find a pending lock request message by xid
227 *
228 * We search from the head of the list assuming that the message we're
229 * looking for is for an older request (because we have an answer to it).
230 * This assumes that lock request will be answered primarily in FIFO order.
231 * However, this may not be the case if there are blocked requests.  We may
232 * want to move blocked requests to a separate queue (but that'll complicate
233 * duplicate xid checking).
234 *
235 * (nfs_lock_mutex must be held)
236 */
237LOCKD_MSG_REQUEST *
238nfs_lockdmsg_find_by_xid(uint64_t lockxid)
239{
240	LOCKD_MSG_REQUEST *mr;
241
242	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
243		if (mr->lmr_msg.lm_xid == lockxid)
244			return mr;
245		if (mr->lmr_msg.lm_xid > lockxid)
246			return NULL;
247	}
248	return mr;
249}
250
251/*
252 * Because we can't depend on nlm_granted messages containing the same
253 * cookie we sent with the original lock request, we need code to test
254 * if an nlm_granted answer matches the lock request.  We also need code
255 * that can find a lockd message based solely on the nlm_granted answer.
256 */
257
258/*
259 * compare lockd message to answer
260 *
261 * returns 0 on equality and 1 if different
262 */
263int
264nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
265{
266	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
267		return 1;
268	if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
269		return 1;
270	if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
271		return 1;
272	if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
273		return 1;
274	if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
275		return 1;
276	if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
277		return 1;
278	return 0;
279}
280
281/*
282 * find a pending lock request message based on the lock info provided
283 * in the lockd_ans/nlm_granted data.  We need this because we can't
284 * depend on nlm_granted messages containing the same cookie we sent
285 * with the original lock request.
286 *
287 * We search from the head of the list assuming that the message we're
288 * looking for is for an older request (because we have an answer to it).
289 * This assumes that lock request will be answered primarily in FIFO order.
290 * However, this may not be the case if there are blocked requests.  We may
291 * want to move blocked requests to a separate queue (but that'll complicate
292 * duplicate xid checking).
293 *
294 * (nfs_lock_mutex must be held)
295 */
296LOCKD_MSG_REQUEST *
297nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
298{
299	LOCKD_MSG_REQUEST *mr;
300
301	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
302		return NULL;
303	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
304		if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
305			break;
306	}
307	return mr;
308}
309
310/*
311 * return the next unique lock request transaction ID
312 * (nfs_lock_mutex must be held)
313 */
314uint64_t
315nfs_lockxid_get(void)
316{
317	LOCKD_MSG_REQUEST *mr;
318
319	/* derive initial lock xid from system time */
320	if (!nfs_lockxid) {
321		/*
322		 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
323		 * due to a broken clock) because we immediately increment it
324		 * and we guarantee to never use xid 0.  So, nfs_lockxid should only
325		 * ever be 0 the first time this function is called.
326		 */
327		struct timeval tv;
328		microtime(&tv);
329		nfs_lockxid = (uint64_t)tv.tv_sec << 12;
330	}
331
332	/* make sure we get a unique xid */
333	do {
334		/* Skip zero xid if it should ever happen.  */
335		if (++nfs_lockxid == 0)
336			nfs_lockxid++;
337		if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
338		     (mr->lmr_msg.lm_xid < nfs_lockxid)) {
339			/* fast path: empty queue or new largest xid */
340			break;
341		}
342		/* check if xid is already in use */
343	} while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
344
345	return nfs_lockxid;
346}
347
348#define MACH_MAX_TRIES 3
349
350int
351nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
352{
353	kern_return_t kr;
354	int retries = 0;
355	mach_port_t lockd_port = IPC_PORT_NULL;
356
357	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
358	if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port))
359		return (ENOTSUP);
360
361	do {
362		/* In the kernel all mach messaging is interruptable */
363		do {
364			kr = lockd_request(
365				lockd_port,
366				msg->lm_version,
367				msg->lm_flags,
368				msg->lm_xid,
369				msg->lm_fl.l_start,
370				msg->lm_fl.l_len,
371				msg->lm_fl.l_pid,
372				msg->lm_fl.l_type,
373				msg->lm_fl.l_whence,
374				(uint32_t *)&msg->lm_addr,
375				(uint32_t *)&msg->lm_cred,
376				msg->lm_fh_len,
377				msg->lm_fh);
378			if (kr != KERN_SUCCESS)
379				printf("lockd_request received %d!\n", kr);
380		} while (!interruptable && kr == MACH_SEND_INTERRUPTED);
381	} while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
382
383	ipc_port_release_send(lockd_port);
384	switch (kr) {
385	case MACH_SEND_INTERRUPTED:
386		return (EINTR);
387	default:
388		/*
389		 * Other MACH or MIG errors we will retry. Eventually
390		 * we will call nfs_down and allow the user to disable
391		 * locking.
392		 */
393		return (EAGAIN);
394	}
395	return (kr);
396}
397
398
399/*
400 * NFS advisory byte-level locks (client)
401 */
402int
403nfs3_lockd_request(
404	nfsnode_t np,
405	int type,
406	LOCKD_MSG_REQUEST *msgreq,
407	int flags,
408	thread_t thd)
409{
410	LOCKD_MSG *msg = &msgreq->lmr_msg;
411	int error, error2;
412	int interruptable, slpflag;
413	struct nfsmount *nmp;
414	struct timeval now;
415	int timeo, starttime, endtime, lastmsg, wentdown = 0;
416	struct timespec ts;
417	struct sockaddr *saddr;
418
419	nmp = NFSTONMP(np);
420	if (!nmp || !nmp->nm_saddr)
421		return (ENXIO);
422
423	lck_mtx_lock(&nmp->nm_lock);
424	saddr = nmp->nm_saddr;
425	bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
426	if (nmp->nm_vers == NFS_VER3)
427		msg->lm_flags |= LOCKD_MSG_NFSV3;
428
429	if (nmp->nm_sotype != SOCK_DGRAM)
430		msg->lm_flags |= LOCKD_MSG_TCP;
431
432	microuptime(&now);
433	starttime = now.tv_sec;
434	lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
435	interruptable = NMFLAG(nmp, INTR);
436	lck_mtx_unlock(&nmp->nm_lock);
437
438	lck_mtx_lock(nfs_lock_mutex);
439
440	/* allocate unique xid */
441	msg->lm_xid = nfs_lockxid_get();
442	nfs_lockdmsg_enqueue(msgreq);
443
444	timeo = 4;
445
446	for (;;) {
447		nfs_lockd_request_sent = 1;
448
449		/* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
450		lck_mtx_unlock(nfs_lock_mutex);
451		error = nfs_lockd_send_request(msg, interruptable);
452		lck_mtx_lock(nfs_lock_mutex);
453		if (error && error != EAGAIN)
454			break;
455
456		/*
457		 * Always wait for an answer.  Not waiting for unlocks could
458		 * cause a lock to be left if the unlock request gets dropped.
459		 */
460
461		/*
462		 * Retry if it takes too long to get a response.
463		 *
464		 * The timeout numbers were picked out of thin air... they start
465		 * at 4 and double each timeout with a max of 30 seconds.
466		 *
467		 * In order to maintain responsiveness, we pass a small timeout
468		 * to msleep and calculate the timeouts ourselves.  This allows
469		 * us to pick up on mount changes quicker.
470		 */
471wait_for_granted:
472		error = EWOULDBLOCK;
473		slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
474		ts.tv_sec = 2;
475		ts.tv_nsec = 0;
476		microuptime(&now);
477		endtime = now.tv_sec + timeo;
478		while (now.tv_sec < endtime) {
479			error = error2 = 0;
480			if (!msgreq->lmr_answered) {
481				error = msleep(msgreq, nfs_lock_mutex, slpflag | PUSER, "lockd", &ts);
482				slpflag = 0;
483			}
484			if (msgreq->lmr_answered) {
485				/*
486				 * Note: it's possible to have a lock granted at
487				 * essentially the same time that we get interrupted.
488				 * Since the lock may be granted, we can't return an
489				 * error from this request or we might not unlock the
490				 * lock that's been granted.
491				 */
492				nmp = NFSTONMP(np);
493				if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
494				    (nmp->nm_state & NFSSTA_LOCKSWORK)) {
495					/*
496					 * We have evidence that locks work, yet lockd
497					 * returned ENOTSUP.  This is probably because
498					 * it was unable to contact the server's lockd
499					 * to send it the request.
500					 *
501					 * Because we know locks work, we'll consider
502					 * this failure to be a timeout.
503					 */
504					error = EWOULDBLOCK;
505				} else {
506					error = 0;
507				}
508				break;
509			}
510			if (error != EWOULDBLOCK)
511				break;
512			/* check that we still have our mount... */
513			/* ...and that we still support locks */
514			/* ...and that there isn't a recovery pending */
515			nmp = NFSTONMP(np);
516			if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
517				error = error2;
518				if (type == F_UNLCK)
519					printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
520				break;
521			}
522			lck_mtx_lock(&nmp->nm_lock);
523			if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
524				lck_mtx_unlock(&nmp->nm_lock);
525				break;
526			}
527			if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
528				/* recovery pending... return an error that'll get this operation restarted */
529				error = NFSERR_GRACE;
530				lck_mtx_unlock(&nmp->nm_lock);
531				break;
532			}
533			interruptable = NMFLAG(nmp, INTR);
534			lck_mtx_unlock(&nmp->nm_lock);
535			microuptime(&now);
536		}
537		if (error) {
538			/* check that we still have our mount... */
539			nmp = NFSTONMP(np);
540			if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
541				error = error2;
542				if (error2 != EINTR) {
543					if (type == F_UNLCK)
544						printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
545					break;
546				}
547			}
548			/* ...and that we still support locks */
549			lck_mtx_lock(&nmp->nm_lock);
550			if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
551				if (error == EWOULDBLOCK)
552					error = ENOTSUP;
553				lck_mtx_unlock(&nmp->nm_lock);
554				break;
555			}
556			/* ...and that there isn't a recovery pending */
557			if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
558				/* recovery pending... return to allow recovery to occur */
559				error = NFSERR_DENIED;
560				lck_mtx_unlock(&nmp->nm_lock);
561				break;
562			}
563			interruptable = NMFLAG(nmp, INTR);
564			if ((error != EWOULDBLOCK) ||
565			    ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) ||
566			    ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
567				if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
568					/* give up if this is for recovery and taking too long */
569					error = ETIMEDOUT;
570				} else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
571					/* recovery pending... return an error that'll get this operation restarted */
572					error = NFSERR_GRACE;
573				}
574				lck_mtx_unlock(&nmp->nm_lock);
575				/*
576				 * We're going to bail on this request.
577				 * If we were a blocked lock request, send a cancel.
578				 */
579				if ((msgreq->lmr_errno == EINPROGRESS) &&
580				    !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
581					/* set this request up as a cancel */
582					msg->lm_flags |= LOCKD_MSG_CANCEL;
583					nfs_lockdmsg_dequeue(msgreq);
584					msg->lm_xid = nfs_lockxid_get();
585					nfs_lockdmsg_enqueue(msgreq);
586					msgreq->lmr_saved_errno = error;
587					msgreq->lmr_errno = 0;
588					msgreq->lmr_answered = 0;
589					/* reset timeout */
590					timeo = 2;
591					/* send cancel request */
592					continue;
593				}
594				break;
595			}
596
597			/* warn if we're not getting any response */
598			microuptime(&now);
599			if ((msgreq->lmr_errno != EINPROGRESS) &&
600			    !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
601			    (nmp->nm_tprintf_initial_delay != 0) &&
602			    ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
603				lck_mtx_unlock(&nmp->nm_lock);
604				lastmsg = now.tv_sec;
605				nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding");
606				wentdown = 1;
607			} else
608				lck_mtx_unlock(&nmp->nm_lock);
609
610			if (msgreq->lmr_errno == EINPROGRESS) {
611				/*
612				 * We've got a blocked lock request that we are
613				 * going to retry.  First, we'll want to try to
614				 * send a cancel for the previous request.
615				 *
616				 * Clear errno so if we don't get a response
617				 * to the resend we'll call nfs_down().
618				 * Also reset timeout because we'll expect a
619				 * quick response to the cancel/resend (even if
620				 * it is NLM_BLOCKED).
621				 */
622				msg->lm_flags |= LOCKD_MSG_CANCEL;
623				nfs_lockdmsg_dequeue(msgreq);
624				msg->lm_xid = nfs_lockxid_get();
625				nfs_lockdmsg_enqueue(msgreq);
626				msgreq->lmr_saved_errno = msgreq->lmr_errno;
627				msgreq->lmr_errno = 0;
628				msgreq->lmr_answered = 0;
629				timeo = 2;
630				/* send cancel then resend request */
631				continue;
632			}
633
634			/*
635			 * We timed out, so we will resend the request.
636			 */
637			if (!(flags & R_RECOVER))
638				timeo *= 2;
639			if (timeo > 30)
640				timeo = 30;
641			/* resend request */
642			continue;
643		}
644
645		/* we got a reponse, so the server's lockd is OK */
646		nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
647			wentdown ? "lockd alive again" : NULL);
648		wentdown = 0;
649
650		if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
651			/*
652			 * The lock request was denied because the server lockd is
653			 * still in its grace period.  So, we need to try the
654			 * request again in a little bit.  Return the GRACE error so
655			 * the higher levels can perform the retry.
656			 */
657			msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
658		}
659
660		if (msgreq->lmr_errno == EINPROGRESS) {
661			/* got NLM_BLOCKED response */
662			/* need to wait for NLM_GRANTED */
663			timeo = 30;
664			msgreq->lmr_answered = 0;
665			goto wait_for_granted;
666		}
667
668		if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
669		    (msgreq->lmr_saved_errno == EINPROGRESS)) {
670			/*
671			 * We just got a successful reply to the
672			 * cancel of the previous blocked lock request.
673			 * Now, go ahead and return a DENIED error so the
674			 * higher levels can resend the request.
675			 */
676			msg->lm_flags &= ~LOCKD_MSG_CANCEL;
677			nfs_lockdmsg_dequeue(msgreq);
678			error = NFSERR_DENIED;
679			break;
680		}
681
682		/*
683		 * If the blocked lock request was cancelled.
684		 * Restore the error condition from when we
685		 * originally bailed on the request.
686		 */
687		if (msg->lm_flags & LOCKD_MSG_CANCEL) {
688			msg->lm_flags &= ~LOCKD_MSG_CANCEL;
689			error = msgreq->lmr_saved_errno;
690		} else {
691			error = msgreq->lmr_errno;
692		}
693
694		nmp = NFSTONMP(np);
695		if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
696			/*
697			 * We have NO evidence that locks work and lockd
698			 * returned ENOTSUP.  Let's take this as a hint
699			 * that locks aren't supported and disable them
700			 * for this mount.
701			 */
702			nfs_lockdmsg_dequeue(msgreq);
703			lck_mtx_unlock(nfs_lock_mutex);
704			lck_mtx_lock(&nmp->nm_lock);
705			if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
706				nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
707				nfs_lockd_mount_unregister(nmp);
708			}
709			nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
710			lck_mtx_unlock(&nmp->nm_lock);
711			printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
712				vfs_statfs(nmp->nm_mountp)->f_mntfromname);
713			return (error);
714		}
715		if (!error) {
716			/* record that NFS file locking has worked on this mount */
717			if (nmp) {
718				lck_mtx_lock(&nmp->nm_lock);
719				if (!(nmp->nm_state & NFSSTA_LOCKSWORK))
720					nmp->nm_state |= NFSSTA_LOCKSWORK;
721				lck_mtx_unlock(&nmp->nm_lock);
722			}
723		}
724		break;
725	}
726
727	nfs_lockdmsg_dequeue(msgreq);
728
729	lck_mtx_unlock(nfs_lock_mutex);
730
731	return (error);
732}
733
734/*
735 * Send an NLM LOCK message to the server
736 */
737int
738nfs3_setlock_rpc(
739	nfsnode_t np,
740	struct nfs_open_file *nofp,
741	struct nfs_file_lock *nflp,
742	int reclaim,
743	int flags,
744	thread_t thd,
745	kauth_cred_t cred)
746{
747	struct nfs_lock_owner *nlop = nflp->nfl_owner;
748	struct nfsmount *nmp;
749	int error;
750	LOCKD_MSG_REQUEST msgreq;
751	LOCKD_MSG *msg;
752
753	nmp = NFSTONMP(np);
754	if (!nmp)
755		return (ENXIO);
756
757	if (!nlop->nlo_open_owner) {
758		nfs_open_owner_ref(nofp->nof_owner);
759		nlop->nlo_open_owner = nofp->nof_owner;
760	}
761	if ((error = nfs_lock_owner_set_busy(nlop, thd)))
762		return (error);
763
764	/* set up lock message request structure */
765	bzero(&msgreq, sizeof(msgreq));
766	msg = &msgreq.lmr_msg;
767	msg->lm_version = LOCKD_MSG_VERSION;
768	if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim)
769		msg->lm_flags |= LOCKD_MSG_BLOCK;
770	if (reclaim)
771		msg->lm_flags |= LOCKD_MSG_RECLAIM;
772	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
773	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
774	cru2x(cred, &msg->lm_cred);
775
776	msg->lm_fl.l_whence = SEEK_SET;
777	msg->lm_fl.l_start = nflp->nfl_start;
778	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
779	msg->lm_fl.l_type = nflp->nfl_type;
780	msg->lm_fl.l_pid = nlop->nlo_pid;
781
782	error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
783
784	nfs_lock_owner_clear_busy(nlop);
785	return (error);
786}
787
788/*
789 * Send an NLM UNLOCK message to the server
790 */
791int
792nfs3_unlock_rpc(
793	nfsnode_t np,
794	struct nfs_lock_owner *nlop,
795	__unused int type,
796	uint64_t start,
797	uint64_t end,
798	int flags,
799	thread_t thd,
800	kauth_cred_t cred)
801{
802	struct nfsmount *nmp;
803	LOCKD_MSG_REQUEST msgreq;
804	LOCKD_MSG *msg;
805
806	nmp = NFSTONMP(np);
807	if (!nmp)
808		return (ENXIO);
809
810	/* set up lock message request structure */
811	bzero(&msgreq, sizeof(msgreq));
812	msg = &msgreq.lmr_msg;
813	msg->lm_version = LOCKD_MSG_VERSION;
814	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
815	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
816	cru2x(cred, &msg->lm_cred);
817
818	msg->lm_fl.l_whence = SEEK_SET;
819	msg->lm_fl.l_start = start;
820	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
821	msg->lm_fl.l_type = F_UNLCK;
822	msg->lm_fl.l_pid = nlop->nlo_pid;
823
824	return (nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd));
825}
826
827/*
828 * Send an NLM LOCK TEST message to the server
829 */
830int
831nfs3_getlock_rpc(
832	nfsnode_t np,
833	struct nfs_lock_owner *nlop,
834	struct flock *fl,
835	uint64_t start,
836	uint64_t end,
837	vfs_context_t ctx)
838{
839	struct nfsmount *nmp;
840	int error;
841	LOCKD_MSG_REQUEST msgreq;
842	LOCKD_MSG *msg;
843
844	nmp = NFSTONMP(np);
845	if (!nmp)
846		return (ENXIO);
847
848	/* set up lock message request structure */
849	bzero(&msgreq, sizeof(msgreq));
850	msg = &msgreq.lmr_msg;
851	msg->lm_version = LOCKD_MSG_VERSION;
852	msg->lm_flags |= LOCKD_MSG_TEST;
853	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
854	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
855	cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
856
857	msg->lm_fl.l_whence = SEEK_SET;
858	msg->lm_fl.l_start = start;
859	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
860	msg->lm_fl.l_type = fl->l_type;
861	msg->lm_fl.l_pid = nlop->nlo_pid;
862
863	error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
864
865	if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
866		if (msg->lm_fl.l_type != F_UNLCK) {
867			fl->l_type = msg->lm_fl.l_type;
868			fl->l_pid = msg->lm_fl.l_pid;
869			fl->l_start = msg->lm_fl.l_start;
870			fl->l_len = msg->lm_fl.l_len;
871			fl->l_whence = SEEK_SET;
872		} else
873			fl->l_type = F_UNLCK;
874	}
875
876	return (error);
877}
878
879/*
880 * nfslockdans --
881 *      NFS advisory byte-level locks answer from the lock daemon.
882 */
883int
884nfslockdans(proc_t p, struct lockd_ans *ansp)
885{
886	LOCKD_MSG_REQUEST *msgreq;
887	int error;
888
889	/* Let root make this call. */
890	error = proc_suser(p);
891	if (error)
892		return (error);
893
894	/* the version should match, or we're out of sync */
895	if (ansp->la_version != LOCKD_ANS_VERSION)
896		return (EINVAL);
897
898	lck_mtx_lock(nfs_lock_mutex);
899
900	/* try to find the lockd message by transaction id (cookie) */
901	msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
902	if (ansp->la_flags & LOCKD_ANS_GRANTED) {
903		/*
904		 * We can't depend on the granted message having our cookie,
905		 * so we check the answer against the lockd message found.
906		 * If no message was found or it doesn't match the answer,
907		 * we look for the lockd message by the answer's lock info.
908		 */
909		if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp))
910			msgreq = nfs_lockdmsg_find_by_answer(ansp);
911		/*
912		 * We need to make sure this request isn't being cancelled
913		 * If it is, we don't want to accept the granted message.
914		 */
915		if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
916			msgreq = NULL;
917	}
918	if (!msgreq) {
919		lck_mtx_unlock(nfs_lock_mutex);
920		return (EPIPE);
921	}
922
923	msgreq->lmr_errno = ansp->la_errno;
924	if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
925		if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
926			if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
927				msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
928			else
929				msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
930			msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
931			msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
932			msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
933		} else {
934			msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
935		}
936	}
937	if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE)
938		msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE;
939
940	msgreq->lmr_answered = 1;
941	lck_mtx_unlock(nfs_lock_mutex);
942	wakeup(msgreq);
943
944	return (0);
945}
946
947/*
948 * nfslockdnotify --
949 *      NFS host restart notification from the lock daemon.
950 *
951 * Used to initiate reclaiming of held locks when a server we
952 * have mounted reboots.
953 */
954int
955nfslockdnotify(proc_t p, user_addr_t argp)
956{
957	int error, i, headsize;
958	struct lockd_notify ln;
959	struct nfsmount *nmp;
960	struct sockaddr *saddr;
961
962	/* Let root make this call. */
963	error = proc_suser(p);
964	if (error)
965		return (error);
966
967	headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version;
968	error = copyin(argp, &ln, headsize);
969	if (error)
970		return (error);
971	if (ln.ln_version != LOCKD_NOTIFY_VERSION)
972		return (EINVAL);
973	if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128))
974		return (EINVAL);
975	argp += headsize;
976	saddr = (struct sockaddr *)&ln.ln_addr[0];
977
978	lck_mtx_lock(nfs_lock_mutex);
979
980	for (i=0; i < ln.ln_addrcount; i++) {
981		error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
982		if (error)
983			break;
984		argp += sizeof(ln.ln_addr[0]);
985		/* scan lockd mount list for match to this address */
986		TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
987			/* check if address matches this mount's server address */
988			if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr))
989				continue;
990			/* We have a match!  Mark it as needing recovery. */
991			lck_mtx_lock(&nmp->nm_lock);
992			nfs_need_recover(nmp, 0);
993			lck_mtx_unlock(&nmp->nm_lock);
994		}
995	}
996
997	lck_mtx_unlock(nfs_lock_mutex);
998
999	return (error);
1000}
1001
1002