1191783Srmacklem/*-
2191783Srmacklem * Copyright (c) 1989, 1991, 1993, 1995
3191783Srmacklem *	The Regents of the University of California.  All rights reserved.
4191783Srmacklem *
5191783Srmacklem * This code is derived from software contributed to Berkeley by
6191783Srmacklem * Rick Macklem at The University of Guelph.
7191783Srmacklem *
8191783Srmacklem * Redistribution and use in source and binary forms, with or without
9191783Srmacklem * modification, are permitted provided that the following conditions
10191783Srmacklem * are met:
11191783Srmacklem * 1. Redistributions of source code must retain the above copyright
12191783Srmacklem *    notice, this list of conditions and the following disclaimer.
13191783Srmacklem * 2. Redistributions in binary form must reproduce the above copyright
14191783Srmacklem *    notice, this list of conditions and the following disclaimer in the
15191783Srmacklem *    documentation and/or other materials provided with the distribution.
16191783Srmacklem * 4. Neither the name of the University nor the names of its contributors
17191783Srmacklem *    may be used to endorse or promote products derived from this software
18191783Srmacklem *    without specific prior written permission.
19191783Srmacklem *
20191783Srmacklem * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21191783Srmacklem * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22191783Srmacklem * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23191783Srmacklem * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24191783Srmacklem * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25191783Srmacklem * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26191783Srmacklem * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27191783Srmacklem * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28191783Srmacklem * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29191783Srmacklem * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30191783Srmacklem * SUCH DAMAGE.
31191783Srmacklem *
32191783Srmacklem */
33191783Srmacklem
34191783Srmacklem#include <sys/cdefs.h>
35191783Srmacklem__FBSDID("$FreeBSD: stable/10/sys/fs/nfs/nfs_commonkrpc.c 338308 2018-08-24 22:48:19Z rmacklem $");
36191783Srmacklem
37191783Srmacklem/*
38191783Srmacklem * Socket operations for use by nfs
39191783Srmacklem */
40191783Srmacklem
41223280Srmacklem#include "opt_kdtrace.h"
42191783Srmacklem#include "opt_kgssapi.h"
43191783Srmacklem#include "opt_nfs.h"
44191783Srmacklem
45191783Srmacklem#include <sys/param.h>
46191783Srmacklem#include <sys/systm.h>
47191783Srmacklem#include <sys/kernel.h>
48191783Srmacklem#include <sys/limits.h>
49191783Srmacklem#include <sys/lock.h>
50191783Srmacklem#include <sys/malloc.h>
51191783Srmacklem#include <sys/mbuf.h>
52191783Srmacklem#include <sys/mount.h>
53191783Srmacklem#include <sys/mutex.h>
54191783Srmacklem#include <sys/proc.h>
55191783Srmacklem#include <sys/signalvar.h>
56191783Srmacklem#include <sys/syscallsubr.h>
57191783Srmacklem#include <sys/sysctl.h>
58191783Srmacklem#include <sys/syslog.h>
59191783Srmacklem#include <sys/vnode.h>
60191783Srmacklem
61191783Srmacklem#include <rpc/rpc.h>
62269398Srmacklem#include <rpc/krpc.h>
63191783Srmacklem
64191783Srmacklem#include <kgssapi/krb5/kcrypto.h>
65191783Srmacklem
66191783Srmacklem#include <fs/nfs/nfsport.h>
67191783Srmacklem
68223280Srmacklem#ifdef KDTRACE_HOOKS
69223280Srmacklem#include <sys/dtrace_bsd.h>
70223280Srmacklem
71223280Srmacklemdtrace_nfsclient_nfs23_start_probe_func_t
72223280Srmacklem		dtrace_nfscl_nfs234_start_probe;
73223280Srmacklem
74223280Srmacklemdtrace_nfsclient_nfs23_done_probe_func_t
75223280Srmacklem		dtrace_nfscl_nfs234_done_probe;
76223280Srmacklem
77223280Srmacklem/*
78223280Srmacklem * Registered probes by RPC type.
79223280Srmacklem */
80244042Srmacklemuint32_t	nfscl_nfs2_start_probes[NFSV41_NPROCS + 1];
81244042Srmacklemuint32_t	nfscl_nfs2_done_probes[NFSV41_NPROCS + 1];
82223280Srmacklem
83244042Srmacklemuint32_t	nfscl_nfs3_start_probes[NFSV41_NPROCS + 1];
84244042Srmacklemuint32_t	nfscl_nfs3_done_probes[NFSV41_NPROCS + 1];
85223280Srmacklem
86244042Srmacklemuint32_t	nfscl_nfs4_start_probes[NFSV41_NPROCS + 1];
87244042Srmacklemuint32_t	nfscl_nfs4_done_probes[NFSV41_NPROCS + 1];
88223280Srmacklem#endif
89223280Srmacklem
90191783SrmacklemNFSSTATESPINLOCK;
91191783SrmacklemNFSREQSPINLOCK;
92244042SrmacklemNFSDLOCKMUTEX;
93317404SrmacklemNFSCLSTATEMUTEX;
94306663Srmacklemextern struct nfsstats newnfsstats;
95191783Srmacklemextern struct nfsreqhead nfsd_reqq;
96191783Srmacklemextern int nfscl_ticks;
97191783Srmacklemextern void (*ncl_call_invalcaches)(struct vnode *);
98244042Srmacklemextern int nfs_numnfscbd;
99244042Srmacklemextern int nfscl_debuglevel;
100191783Srmacklem
101244042SrmacklemSVCPOOL		*nfscbd_pool;
102191783Srmacklemstatic int	nfsrv_gsscallbackson = 0;
103191783Srmacklemstatic int	nfs_bufpackets = 4;
104191783Srmacklemstatic int	nfs_reconnects;
105191783Srmacklemstatic int	nfs3_jukebox_delay = 10;
106191783Srmacklemstatic int	nfs_skip_wcc_data_onerr = 1;
107191783Srmacklem
108221973SrmacklemSYSCTL_DECL(_vfs_nfs);
109191783Srmacklem
110221973SrmacklemSYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0,
111191783Srmacklem    "Buffer reservation size 2 < x < 64");
112221973SrmacklemSYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0,
113191783Srmacklem    "Number of times the nfs client has had to reconnect");
114221973SrmacklemSYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0,
115191783Srmacklem    "Number of seconds to delay a retry after receiving EJUKEBOX");
116221973SrmacklemSYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0,
117191783Srmacklem    "Disable weak cache consistency checking when server returns an error");
118191783Srmacklem
119191783Srmacklemstatic void	nfs_down(struct nfsmount *, struct thread *, const char *,
120191783Srmacklem    int, int);
121191783Srmacklemstatic void	nfs_up(struct nfsmount *, struct thread *, const char *,
122191783Srmacklem    int, int);
123191783Srmacklemstatic int	nfs_msg(struct thread *, const char *, const char *, int);
124191783Srmacklem
125191783Srmacklemstruct nfs_cached_auth {
126191783Srmacklem	int		ca_refs; /* refcount, including 1 from the cache */
127191783Srmacklem	uid_t		ca_uid;	 /* uid that corresponds to this auth */
128191783Srmacklem	AUTH		*ca_auth; /* RPC auth handle */
129191783Srmacklem};
130191783Srmacklem
131207764Srmacklemstatic int nfsv2_procid[NFS_V3NPROCS] = {
132207764Srmacklem	NFSV2PROC_NULL,
133207764Srmacklem	NFSV2PROC_GETATTR,
134207764Srmacklem	NFSV2PROC_SETATTR,
135207764Srmacklem	NFSV2PROC_LOOKUP,
136207764Srmacklem	NFSV2PROC_NOOP,
137207764Srmacklem	NFSV2PROC_READLINK,
138207764Srmacklem	NFSV2PROC_READ,
139207764Srmacklem	NFSV2PROC_WRITE,
140207764Srmacklem	NFSV2PROC_CREATE,
141207764Srmacklem	NFSV2PROC_MKDIR,
142207764Srmacklem	NFSV2PROC_SYMLINK,
143207764Srmacklem	NFSV2PROC_CREATE,
144207764Srmacklem	NFSV2PROC_REMOVE,
145207764Srmacklem	NFSV2PROC_RMDIR,
146207764Srmacklem	NFSV2PROC_RENAME,
147207764Srmacklem	NFSV2PROC_LINK,
148207764Srmacklem	NFSV2PROC_READDIR,
149207764Srmacklem	NFSV2PROC_NOOP,
150207764Srmacklem	NFSV2PROC_STATFS,
151207764Srmacklem	NFSV2PROC_NOOP,
152207764Srmacklem	NFSV2PROC_NOOP,
153207764Srmacklem	NFSV2PROC_NOOP,
154207764Srmacklem};
155207764Srmacklem
156191783Srmacklem/*
157191783Srmacklem * Initialize sockets and congestion for a new NFS connection.
158191783Srmacklem * We do not free the sockaddr if error.
159191783Srmacklem */
160191783Srmacklemint
161191783Srmacklemnewnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
162191783Srmacklem    struct ucred *cred, NFSPROC_T *p, int callback_retry_mult)
163191783Srmacklem{
164191783Srmacklem	int rcvreserve, sndreserve;
165320637Srmacklem	int pktscale, pktscalesav;
166191783Srmacklem	struct sockaddr *saddr;
167191783Srmacklem	struct ucred *origcred;
168191783Srmacklem	CLIENT *client;
169191783Srmacklem	struct netconfig *nconf;
170191783Srmacklem	struct socket *so;
171224086Szack	int one = 1, retries, error = 0;
172191783Srmacklem	struct thread *td = curthread;
173244042Srmacklem	SVCXPRT *xprt;
174228757Srmacklem	struct timeval timo;
175191783Srmacklem
176191783Srmacklem	/*
177191783Srmacklem	 * We need to establish the socket using the credentials of
178191783Srmacklem	 * the mountpoint.  Some parts of this process (such as
179191783Srmacklem	 * sobind() and soconnect()) will use the curent thread's
180191783Srmacklem	 * credential instead of the socket credential.  To work
181191783Srmacklem	 * around this, temporarily change the current thread's
182191783Srmacklem	 * credential to that of the mountpoint.
183191783Srmacklem	 *
184191783Srmacklem	 * XXX: It would be better to explicitly pass the correct
185191783Srmacklem	 * credential to sobind() and soconnect().
186191783Srmacklem	 */
187191783Srmacklem	origcred = td->td_ucred;
188191783Srmacklem
189191783Srmacklem	/*
190191783Srmacklem	 * Use the credential in nr_cred, if not NULL.
191191783Srmacklem	 */
192191783Srmacklem	if (nrp->nr_cred != NULL)
193191783Srmacklem		td->td_ucred = nrp->nr_cred;
194191783Srmacklem	else
195191783Srmacklem		td->td_ucred = cred;
196191783Srmacklem	saddr = nrp->nr_nam;
197191783Srmacklem
198191783Srmacklem	if (saddr->sa_family == AF_INET)
199191783Srmacklem		if (nrp->nr_sotype == SOCK_DGRAM)
200191783Srmacklem			nconf = getnetconfigent("udp");
201191783Srmacklem		else
202191783Srmacklem			nconf = getnetconfigent("tcp");
203191783Srmacklem	else
204191783Srmacklem		if (nrp->nr_sotype == SOCK_DGRAM)
205191783Srmacklem			nconf = getnetconfigent("udp6");
206191783Srmacklem		else
207191783Srmacklem			nconf = getnetconfigent("tcp6");
208191783Srmacklem
209191783Srmacklem	pktscale = nfs_bufpackets;
210191783Srmacklem	if (pktscale < 2)
211191783Srmacklem		pktscale = 2;
212191783Srmacklem	if (pktscale > 64)
213191783Srmacklem		pktscale = 64;
214320637Srmacklem	pktscalesav = pktscale;
215191783Srmacklem	/*
216191783Srmacklem	 * soreserve() can fail if sb_max is too small, so shrink pktscale
217191783Srmacklem	 * and try again if there is an error.
218191783Srmacklem	 * Print a log message suggesting increasing sb_max.
219191783Srmacklem	 * Creating a socket and doing this is necessary since, if the
220191783Srmacklem	 * reservation sizes are too large and will make soreserve() fail,
221191783Srmacklem	 * the connection will work until a large send is attempted and
222191783Srmacklem	 * then it will loop in the krpc code.
223191783Srmacklem	 */
224191783Srmacklem	so = NULL;
225191783Srmacklem	saddr = NFSSOCKADDR(nrp->nr_nam, struct sockaddr *);
226191783Srmacklem	error = socreate(saddr->sa_family, &so, nrp->nr_sotype,
227191783Srmacklem	    nrp->nr_soproto, td->td_ucred, td);
228191783Srmacklem	if (error) {
229191783Srmacklem		td->td_ucred = origcred;
230224086Szack		goto out;
231191783Srmacklem	}
232191783Srmacklem	do {
233320637Srmacklem	    if (error != 0 && pktscale > 2) {
234320637Srmacklem		if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
235320637Srmacklem		    pktscale == pktscalesav)
236320637Srmacklem		    printf("Consider increasing kern.ipc.maxsockbuf\n");
237191783Srmacklem		pktscale--;
238320637Srmacklem	    }
239191783Srmacklem	    if (nrp->nr_sotype == SOCK_DGRAM) {
240191783Srmacklem		if (nmp != NULL) {
241191783Srmacklem			sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
242191783Srmacklem			    pktscale;
243191783Srmacklem			rcvreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
244191783Srmacklem			    pktscale;
245191783Srmacklem		} else {
246191783Srmacklem			sndreserve = rcvreserve = 1024 * pktscale;
247191783Srmacklem		}
248191783Srmacklem	    } else {
249191783Srmacklem		if (nrp->nr_sotype != SOCK_STREAM)
250191783Srmacklem			panic("nfscon sotype");
251191783Srmacklem		if (nmp != NULL) {
252320637Srmacklem			sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR +
253191783Srmacklem			    sizeof (u_int32_t)) * pktscale;
254320637Srmacklem			rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR +
255191783Srmacklem			    sizeof (u_int32_t)) * pktscale;
256191783Srmacklem		} else {
257191783Srmacklem			sndreserve = rcvreserve = 1024 * pktscale;
258191783Srmacklem		}
259191783Srmacklem	    }
260191783Srmacklem	    error = soreserve(so, sndreserve, rcvreserve);
261320637Srmacklem	    if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
262320637Srmacklem		pktscale <= 2)
263320637Srmacklem		printf("Must increase kern.ipc.maxsockbuf or reduce"
264320637Srmacklem		    " rsize, wsize\n");
265191783Srmacklem	} while (error != 0 && pktscale > 2);
266191783Srmacklem	soclose(so);
267191783Srmacklem	if (error) {
268191783Srmacklem		td->td_ucred = origcred;
269224086Szack		goto out;
270191783Srmacklem	}
271191783Srmacklem
272191783Srmacklem	client = clnt_reconnect_create(nconf, saddr, nrp->nr_prog,
273191783Srmacklem	    nrp->nr_vers, sndreserve, rcvreserve);
274191783Srmacklem	CLNT_CONTROL(client, CLSET_WAITCHAN, "newnfsreq");
275191783Srmacklem	if (nmp != NULL) {
276191783Srmacklem		if ((nmp->nm_flag & NFSMNT_INT))
277191783Srmacklem			CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one);
278191783Srmacklem		if ((nmp->nm_flag & NFSMNT_RESVPORT))
279191783Srmacklem			CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
280228757Srmacklem		if (NFSHASSOFT(nmp)) {
281228757Srmacklem			if (nmp->nm_sotype == SOCK_DGRAM)
282228757Srmacklem				/*
283228757Srmacklem				 * For UDP, the large timeout for a reconnect
284228757Srmacklem				 * will be set to "nm_retry * nm_timeo / 2", so
285228757Srmacklem				 * we only want to do 2 reconnect timeout
286228757Srmacklem				 * retries.
287228757Srmacklem				 */
288228757Srmacklem				retries = 2;
289228757Srmacklem			else
290228757Srmacklem				retries = nmp->nm_retry;
291228757Srmacklem		} else
292191783Srmacklem			retries = INT_MAX;
293317927Srmacklem		/* cred == NULL for DS connects. */
294317927Srmacklem		if (NFSHASNFSV4N(nmp) && cred != NULL) {
295244042Srmacklem			/*
296244042Srmacklem			 * Make sure the nfscbd_pool doesn't get destroyed
297244042Srmacklem			 * while doing this.
298244042Srmacklem			 */
299244042Srmacklem			NFSD_LOCK();
300244042Srmacklem			if (nfs_numnfscbd > 0) {
301244042Srmacklem				nfs_numnfscbd++;
302244042Srmacklem				NFSD_UNLOCK();
303244042Srmacklem				xprt = svc_vc_create_backchannel(nfscbd_pool);
304244042Srmacklem				CLNT_CONTROL(client, CLSET_BACKCHANNEL, xprt);
305244042Srmacklem				NFSD_LOCK();
306244042Srmacklem				nfs_numnfscbd--;
307244042Srmacklem				if (nfs_numnfscbd == 0)
308244042Srmacklem					wakeup(&nfs_numnfscbd);
309244042Srmacklem			}
310244042Srmacklem			NFSD_UNLOCK();
311244042Srmacklem		}
312191783Srmacklem	} else {
313191783Srmacklem		/*
314191783Srmacklem		 * Three cases:
315191783Srmacklem		 * - Null RPC callback to client
316191783Srmacklem		 * - Non-Null RPC callback to client, wait a little longer
317191783Srmacklem		 * - upcalls to nfsuserd and gssd (clp == NULL)
318191783Srmacklem		 */
319191783Srmacklem		if (callback_retry_mult == 0) {
320191783Srmacklem			retries = NFSV4_UPCALLRETRY;
321191783Srmacklem			CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
322191783Srmacklem		} else {
323191783Srmacklem			retries = NFSV4_CALLBACKRETRY * callback_retry_mult;
324191783Srmacklem		}
325191783Srmacklem	}
326191783Srmacklem	CLNT_CONTROL(client, CLSET_RETRIES, &retries);
327191783Srmacklem
328228757Srmacklem	if (nmp != NULL) {
329228757Srmacklem		/*
330228757Srmacklem		 * For UDP, there are 2 timeouts:
331228757Srmacklem		 * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer
332228757Srmacklem		 *   that does a retransmit of an RPC request using the same
333228757Srmacklem		 *   socket and xid. This is what you normally want to do,
334228757Srmacklem		 *   since NFS servers depend on "same xid" for their
335228757Srmacklem		 *   Duplicate Request Cache.
336228757Srmacklem		 * - timeout specified in CLNT_CALL_MBUF(), which specifies when
337228757Srmacklem		 *   retransmits on the same socket should fail and a fresh
338228757Srmacklem		 *   socket created. Each of these timeouts counts as one
339228757Srmacklem		 *   CLSET_RETRIES as set above.
340228757Srmacklem		 * Set the initial retransmit timeout for UDP. This timeout
341228757Srmacklem		 * doesn't exist for TCP and the following call just fails,
342228757Srmacklem		 * which is ok.
343228757Srmacklem		 */
344228757Srmacklem		timo.tv_sec = nmp->nm_timeo / NFS_HZ;
345228757Srmacklem		timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
346228757Srmacklem		CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo);
347228757Srmacklem	}
348228757Srmacklem
349191783Srmacklem	mtx_lock(&nrp->nr_mtx);
350191783Srmacklem	if (nrp->nr_client != NULL) {
351259207Srmacklem		mtx_unlock(&nrp->nr_mtx);
352191783Srmacklem		/*
353191783Srmacklem		 * Someone else already connected.
354191783Srmacklem		 */
355191783Srmacklem		CLNT_RELEASE(client);
356191783Srmacklem	} else {
357191783Srmacklem		nrp->nr_client = client;
358259207Srmacklem		/*
359259207Srmacklem		 * Protocols that do not require connections may be optionally
360259207Srmacklem		 * left unconnected for servers that reply from a port other
361259207Srmacklem		 * than NFS_PORT.
362259207Srmacklem		 */
363259207Srmacklem		if (nmp == NULL || (nmp->nm_flag & NFSMNT_NOCONN) == 0) {
364259207Srmacklem			mtx_unlock(&nrp->nr_mtx);
365259207Srmacklem			CLNT_CONTROL(client, CLSET_CONNECT, &one);
366259207Srmacklem		} else
367259207Srmacklem			mtx_unlock(&nrp->nr_mtx);
368191783Srmacklem	}
369191783Srmacklem
370191783Srmacklem
371191783Srmacklem	/* Restore current thread's credentials. */
372191783Srmacklem	td->td_ucred = origcred;
373224086Szack
374224086Szackout:
375224086Szack	NFSEXITCODE(error);
376224086Szack	return (error);
377191783Srmacklem}
378191783Srmacklem
379191783Srmacklem/*
380191783Srmacklem * NFS disconnect. Clean up and unlink.
381191783Srmacklem */
382191783Srmacklemvoid
383191783Srmacklemnewnfs_disconnect(struct nfssockreq *nrp)
384191783Srmacklem{
385191783Srmacklem	CLIENT *client;
386191783Srmacklem
387191783Srmacklem	mtx_lock(&nrp->nr_mtx);
388191783Srmacklem	if (nrp->nr_client != NULL) {
389191783Srmacklem		client = nrp->nr_client;
390191783Srmacklem		nrp->nr_client = NULL;
391191783Srmacklem		mtx_unlock(&nrp->nr_mtx);
392223309Srmacklem		rpc_gss_secpurge_call(client);
393191783Srmacklem		CLNT_CLOSE(client);
394191783Srmacklem		CLNT_RELEASE(client);
395191783Srmacklem	} else {
396191783Srmacklem		mtx_unlock(&nrp->nr_mtx);
397191783Srmacklem	}
398191783Srmacklem}
399191783Srmacklem
400191783Srmacklemstatic AUTH *
401191783Srmacklemnfs_getauth(struct nfssockreq *nrp, int secflavour, char *clnt_principal,
402191783Srmacklem    char *srv_principal, gss_OID mech_oid, struct ucred *cred)
403191783Srmacklem{
404191783Srmacklem	rpc_gss_service_t svc;
405191783Srmacklem	AUTH *auth;
406191783Srmacklem
407191783Srmacklem	switch (secflavour) {
408191783Srmacklem	case RPCSEC_GSS_KRB5:
409191783Srmacklem	case RPCSEC_GSS_KRB5I:
410191783Srmacklem	case RPCSEC_GSS_KRB5P:
411191783Srmacklem		if (!mech_oid) {
412223309Srmacklem			if (!rpc_gss_mech_to_oid_call("kerberosv5", &mech_oid))
413191783Srmacklem				return (NULL);
414191783Srmacklem		}
415191783Srmacklem		if (secflavour == RPCSEC_GSS_KRB5)
416191783Srmacklem			svc = rpc_gss_svc_none;
417191783Srmacklem		else if (secflavour == RPCSEC_GSS_KRB5I)
418191783Srmacklem			svc = rpc_gss_svc_integrity;
419191783Srmacklem		else
420191783Srmacklem			svc = rpc_gss_svc_privacy;
421191783Srmacklem
422192616Srmacklem		if (clnt_principal == NULL)
423223309Srmacklem			auth = rpc_gss_secfind_call(nrp->nr_client, cred,
424192616Srmacklem			    srv_principal, mech_oid, svc);
425253049Srmacklem		else {
426253049Srmacklem			auth = rpc_gss_seccreate_call(nrp->nr_client, cred,
427253049Srmacklem			    clnt_principal, srv_principal, "kerberosv5",
428253049Srmacklem			    svc, NULL, NULL, NULL);
429253049Srmacklem			return (auth);
430253049Srmacklem		}
431192675Srmacklem		if (auth != NULL)
432192675Srmacklem			return (auth);
433192675Srmacklem		/* fallthrough */
434191783Srmacklem	case AUTH_SYS:
435191783Srmacklem	default:
436191783Srmacklem		return (authunix_create(cred));
437191783Srmacklem
438191783Srmacklem	}
439191783Srmacklem}
440191783Srmacklem
441191783Srmacklem/*
442191783Srmacklem * Callback from the RPC code to generate up/down notifications.
443191783Srmacklem */
444191783Srmacklem
445191783Srmacklemstruct nfs_feedback_arg {
446191783Srmacklem	struct nfsmount *nf_mount;
447191783Srmacklem	int		nf_lastmsg;	/* last tprintf */
448191783Srmacklem	int		nf_tprintfmsg;
449191783Srmacklem	struct thread	*nf_td;
450191783Srmacklem};
451191783Srmacklem
452191783Srmacklemstatic void
453191783Srmacklemnfs_feedback(int type, int proc, void *arg)
454191783Srmacklem{
455191783Srmacklem	struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg;
456191783Srmacklem	struct nfsmount *nmp = nf->nf_mount;
457245909Sjhb	time_t now;
458191783Srmacklem
459191783Srmacklem	switch (type) {
460191783Srmacklem	case FEEDBACK_REXMIT2:
461191783Srmacklem	case FEEDBACK_RECONNECT:
462245909Sjhb		now = NFSD_MONOSEC;
463245909Sjhb		if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now) {
464191783Srmacklem			nfs_down(nmp, nf->nf_td,
465191783Srmacklem			    "not responding", 0, NFSSTA_TIMEO);
466191783Srmacklem			nf->nf_tprintfmsg = TRUE;
467245909Sjhb			nf->nf_lastmsg = now;
468191783Srmacklem		}
469191783Srmacklem		break;
470191783Srmacklem
471191783Srmacklem	case FEEDBACK_OK:
472191783Srmacklem		nfs_up(nf->nf_mount, nf->nf_td,
473191783Srmacklem		    "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg);
474191783Srmacklem		break;
475191783Srmacklem	}
476191783Srmacklem}
477191783Srmacklem
478191783Srmacklem/*
479191783Srmacklem * newnfs_request - goes something like this
480191783Srmacklem *	- does the rpc by calling the krpc layer
481191783Srmacklem *	- break down rpc header and return with nfs reply
482191783Srmacklem * nb: always frees up nd_mreq mbuf list
483191783Srmacklem */
484191783Srmacklemint
485191783Srmacklemnewnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
486191783Srmacklem    struct nfsclient *clp, struct nfssockreq *nrp, vnode_t vp,
487191783Srmacklem    struct thread *td, struct ucred *cred, u_int32_t prog, u_int32_t vers,
488317404Srmacklem    u_char *retsum, int toplevel, u_int64_t *xidp, struct nfsclsession *dssep)
489191783Srmacklem{
490317404Srmacklem	uint32_t retseq, retval, slotseq, *tl;
491191783Srmacklem	time_t waituntil;
492244042Srmacklem	int i = 0, j = 0, opcnt, set_sigset = 0, slot;
493191783Srmacklem	int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS;
494317404Srmacklem	int freeslot, maxslot, reterr, slotpos, timeo;
495191783Srmacklem	u_int16_t procnum;
496191783Srmacklem	u_int trylater_delay = 1;
497191783Srmacklem	struct nfs_feedback_arg nf;
498245909Sjhb	struct timeval timo;
499191783Srmacklem	AUTH *auth;
500191783Srmacklem	struct rpc_callextra ext;
501191783Srmacklem	enum clnt_stat stat;
502191783Srmacklem	struct nfsreq *rep = NULL;
503253049Srmacklem	char *srv_principal = NULL, *clnt_principal = NULL;
504195642Srmacklem	sigset_t oldset;
505230345Srmacklem	struct ucred *authcred;
506317404Srmacklem	struct nfsclsession *sep;
507317404Srmacklem	uint8_t sessionid[NFSX_V4SESSIONID];
508191783Srmacklem
509317404Srmacklem	sep = dssep;
510191783Srmacklem	if (xidp != NULL)
511191783Srmacklem		*xidp = 0;
512191783Srmacklem	/* Reject requests while attempting a forced unmount. */
513191783Srmacklem	if (nmp != NULL && (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)) {
514191783Srmacklem		m_freem(nd->nd_mreq);
515191783Srmacklem		return (ESTALE);
516191783Srmacklem	}
517191783Srmacklem
518230345Srmacklem	/*
519230345Srmacklem	 * Set authcred, which is used to acquire RPC credentials to
520230345Srmacklem	 * the cred argument, by default. The crhold() should not be
521230345Srmacklem	 * necessary, but will ensure that some future code change
522230345Srmacklem	 * doesn't result in the credential being free'd prematurely.
523230345Srmacklem	 */
524230345Srmacklem	authcred = crhold(cred);
525230345Srmacklem
526195642Srmacklem	/* For client side interruptible mounts, mask off the signals. */
527195642Srmacklem	if (nmp != NULL && td != NULL && NFSHASINT(nmp)) {
528195642Srmacklem		newnfs_set_sigmask(td, &oldset);
529195642Srmacklem		set_sigset = 1;
530195642Srmacklem	}
531195642Srmacklem
532191783Srmacklem	/*
533192675Srmacklem	 * XXX if not already connected call nfs_connect now. Longer
534192675Srmacklem	 * term, change nfs_mount to call nfs_connect unconditionally
535192675Srmacklem	 * and let clnt_reconnect_create handle reconnects.
536192675Srmacklem	 */
537192675Srmacklem	if (nrp->nr_client == NULL)
538192675Srmacklem		newnfs_connect(nmp, nrp, cred, td, 0);
539192675Srmacklem
540192675Srmacklem	/*
541191783Srmacklem	 * For a client side mount, nmp is != NULL and clp == NULL. For
542191783Srmacklem	 * server calls (callbacks or upcalls), nmp == NULL.
543191783Srmacklem	 */
544191783Srmacklem	if (clp != NULL) {
545191783Srmacklem		NFSLOCKSTATE();
546191783Srmacklem		if ((clp->lc_flags & LCL_GSS) && nfsrv_gsscallbackson) {
547191783Srmacklem			secflavour = RPCSEC_GSS_KRB5;
548191783Srmacklem			if (nd->nd_procnum != NFSPROC_NULL) {
549191783Srmacklem				if (clp->lc_flags & LCL_GSSINTEGRITY)
550191783Srmacklem					secflavour = RPCSEC_GSS_KRB5I;
551191783Srmacklem				else if (clp->lc_flags & LCL_GSSPRIVACY)
552191783Srmacklem					secflavour = RPCSEC_GSS_KRB5P;
553191783Srmacklem			}
554191783Srmacklem		}
555191783Srmacklem		NFSUNLOCKSTATE();
556191783Srmacklem	} else if (nmp != NULL && NFSHASKERB(nmp) &&
557191783Srmacklem	     nd->nd_procnum != NFSPROC_NULL) {
558191783Srmacklem		if (NFSHASALLGSSNAME(nmp) && nmp->nm_krbnamelen > 0)
559191783Srmacklem			nd->nd_flag |= ND_USEGSSNAME;
560192675Srmacklem		if ((nd->nd_flag & ND_USEGSSNAME) != 0) {
561192675Srmacklem			/*
562192675Srmacklem			 * If there is a client side host based credential,
563192675Srmacklem			 * use that, otherwise use the system uid, if set.
564230345Srmacklem			 * The system uid is in the nmp->nm_sockreq.nr_cred
565230345Srmacklem			 * credentials.
566192675Srmacklem			 */
567192675Srmacklem			if (nmp->nm_krbnamelen > 0) {
568192675Srmacklem				usegssname = 1;
569253049Srmacklem				clnt_principal = nmp->nm_krbname;
570192675Srmacklem			} else if (nmp->nm_uid != (uid_t)-1) {
571230345Srmacklem				KASSERT(nmp->nm_sockreq.nr_cred != NULL,
572230345Srmacklem				    ("newnfs_request: NULL nr_cred"));
573230345Srmacklem				crfree(authcred);
574230345Srmacklem				authcred = crhold(nmp->nm_sockreq.nr_cred);
575192675Srmacklem			}
576192675Srmacklem		} else if (nmp->nm_krbnamelen == 0 &&
577192675Srmacklem		    nmp->nm_uid != (uid_t)-1 && cred->cr_uid == (uid_t)0) {
578192675Srmacklem			/*
579192675Srmacklem			 * If there is no host based principal name and
580192675Srmacklem			 * the system uid is set and this is root, use the
581192675Srmacklem			 * system uid, since root won't have user
582192675Srmacklem			 * credentials in a credentials cache file.
583230345Srmacklem			 * The system uid is in the nmp->nm_sockreq.nr_cred
584230345Srmacklem			 * credentials.
585192675Srmacklem			 */
586230345Srmacklem			KASSERT(nmp->nm_sockreq.nr_cred != NULL,
587230345Srmacklem			    ("newnfs_request: NULL nr_cred"));
588230345Srmacklem			crfree(authcred);
589230345Srmacklem			authcred = crhold(nmp->nm_sockreq.nr_cred);
590192675Srmacklem		}
591191783Srmacklem		if (NFSHASINTEGRITY(nmp))
592191783Srmacklem			secflavour = RPCSEC_GSS_KRB5I;
593191783Srmacklem		else if (NFSHASPRIVACY(nmp))
594191783Srmacklem			secflavour = RPCSEC_GSS_KRB5P;
595191783Srmacklem		else
596191783Srmacklem			secflavour = RPCSEC_GSS_KRB5;
597191783Srmacklem		srv_principal = NFSMNT_SRVKRBNAME(nmp);
598223436Srmacklem	} else if (nmp != NULL && !NFSHASKERB(nmp) &&
599223436Srmacklem	    nd->nd_procnum != NFSPROC_NULL &&
600223436Srmacklem	    (nd->nd_flag & ND_USEGSSNAME) != 0) {
601223436Srmacklem		/*
602223436Srmacklem		 * Use the uid that did the mount when the RPC is doing
603223436Srmacklem		 * NFSv4 system operations, as indicated by the
604223436Srmacklem		 * ND_USEGSSNAME flag, for the AUTH_SYS case.
605230345Srmacklem		 * The credentials in nm_sockreq.nr_cred were used for the
606230345Srmacklem		 * mount.
607223436Srmacklem		 */
608230345Srmacklem		KASSERT(nmp->nm_sockreq.nr_cred != NULL,
609230345Srmacklem		    ("newnfs_request: NULL nr_cred"));
610230345Srmacklem		crfree(authcred);
611230345Srmacklem		authcred = crhold(nmp->nm_sockreq.nr_cred);
612191783Srmacklem	}
613191783Srmacklem
614191783Srmacklem	if (nmp != NULL) {
615191783Srmacklem		bzero(&nf, sizeof(struct nfs_feedback_arg));
616191783Srmacklem		nf.nf_mount = nmp;
617191783Srmacklem		nf.nf_td = td;
618245909Sjhb		nf.nf_lastmsg = NFSD_MONOSEC -
619191783Srmacklem		    ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay));
620191783Srmacklem	}
621191783Srmacklem
622192181Srmacklem	if (nd->nd_procnum == NFSPROC_NULL)
623192181Srmacklem		auth = authnone_create();
624253049Srmacklem	else if (usegssname) {
625253049Srmacklem		/*
626253049Srmacklem		 * For this case, the authenticator is held in the
627253049Srmacklem		 * nfssockreq structure, so don't release the reference count
628253049Srmacklem		 * held on it. --> Don't AUTH_DESTROY() it in this function.
629253049Srmacklem		 */
630253049Srmacklem		if (nrp->nr_auth == NULL)
631253049Srmacklem			nrp->nr_auth = nfs_getauth(nrp, secflavour,
632253049Srmacklem			    clnt_principal, srv_principal, NULL, authcred);
633253049Srmacklem		else
634253049Srmacklem			rpc_gss_refresh_auth_call(nrp->nr_auth);
635253049Srmacklem		auth = nrp->nr_auth;
636253049Srmacklem	} else
637191783Srmacklem		auth = nfs_getauth(nrp, secflavour, NULL,
638230345Srmacklem		    srv_principal, NULL, authcred);
639230345Srmacklem	crfree(authcred);
640191783Srmacklem	if (auth == NULL) {
641191783Srmacklem		m_freem(nd->nd_mreq);
642195642Srmacklem		if (set_sigset)
643195642Srmacklem			newnfs_restore_sigmask(td, &oldset);
644191783Srmacklem		return (EACCES);
645191783Srmacklem	}
646191783Srmacklem	bzero(&ext, sizeof(ext));
647191783Srmacklem	ext.rc_auth = auth;
648191783Srmacklem	if (nmp != NULL) {
649191783Srmacklem		ext.rc_feedback = nfs_feedback;
650191783Srmacklem		ext.rc_feedback_arg = &nf;
651191783Srmacklem	}
652191783Srmacklem
653191783Srmacklem	procnum = nd->nd_procnum;
654191783Srmacklem	if ((nd->nd_flag & ND_NFSV4) &&
655192181Srmacklem	    nd->nd_procnum != NFSPROC_NULL &&
656191783Srmacklem	    nd->nd_procnum != NFSV4PROC_CBCOMPOUND)
657191783Srmacklem		procnum = NFSV4PROC_COMPOUND;
658191783Srmacklem
659191783Srmacklem	if (nmp != NULL) {
660306663Srmacklem		NFSINCRGLOBAL(newnfsstats.rpcrequests);
661207764Srmacklem
662207764Srmacklem		/* Map the procnum to the old NFSv2 one, as required. */
663207764Srmacklem		if ((nd->nd_flag & ND_NFSV2) != 0) {
664207764Srmacklem			if (nd->nd_procnum < NFS_V3NPROCS)
665207764Srmacklem				procnum = nfsv2_procid[nd->nd_procnum];
666207764Srmacklem			else
667207764Srmacklem				procnum = NFSV2PROC_NOOP;
668207764Srmacklem		}
669207764Srmacklem
670191783Srmacklem		/*
671191783Srmacklem		 * Now only used for the R_DONTRECOVER case, but until that is
672191783Srmacklem		 * supported within the krpc code, I need to keep a queue of
673191783Srmacklem		 * outstanding RPCs for nfsv4 client requests.
674191783Srmacklem		 */
675191783Srmacklem		if ((nd->nd_flag & ND_NFSV4) && procnum == NFSV4PROC_COMPOUND)
676191783Srmacklem			MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq),
677191783Srmacklem			    M_NFSDREQ, M_WAITOK);
678223280Srmacklem#ifdef KDTRACE_HOOKS
679223280Srmacklem		if (dtrace_nfscl_nfs234_start_probe != NULL) {
680223280Srmacklem			uint32_t probe_id;
681223280Srmacklem			int probe_procnum;
682223280Srmacklem
683223280Srmacklem			if (nd->nd_flag & ND_NFSV4) {
684223280Srmacklem				probe_id =
685223280Srmacklem				    nfscl_nfs4_start_probes[nd->nd_procnum];
686223280Srmacklem				probe_procnum = nd->nd_procnum;
687223280Srmacklem			} else if (nd->nd_flag & ND_NFSV3) {
688223280Srmacklem				probe_id = nfscl_nfs3_start_probes[procnum];
689223280Srmacklem				probe_procnum = procnum;
690223280Srmacklem			} else {
691223280Srmacklem				probe_id =
692223280Srmacklem				    nfscl_nfs2_start_probes[nd->nd_procnum];
693223280Srmacklem				probe_procnum = procnum;
694223280Srmacklem			}
695223280Srmacklem			if (probe_id != 0)
696223280Srmacklem				(dtrace_nfscl_nfs234_start_probe)
697223280Srmacklem				    (probe_id, vp, nd->nd_mreq, cred,
698223280Srmacklem				     probe_procnum);
699223280Srmacklem		}
700223280Srmacklem#endif
701191783Srmacklem	}
702191783Srmacklem	trycnt = 0;
703244042Srmacklem	freeslot = -1;		/* Set to slot that needs to be free'd */
704191783Srmacklemtryagain:
705244042Srmacklem	slot = -1;		/* Slot that needs a sequence# increment. */
706228757Srmacklem	/*
707228757Srmacklem	 * This timeout specifies when a new socket should be created,
708228757Srmacklem	 * along with new xid values. For UDP, this should be done
709228757Srmacklem	 * infrequently, since retransmits of RPC requests should normally
710228757Srmacklem	 * use the same xid.
711228757Srmacklem	 */
712191783Srmacklem	if (nmp == NULL) {
713191783Srmacklem		timo.tv_usec = 0;
714191783Srmacklem		if (clp == NULL)
715191783Srmacklem			timo.tv_sec = NFSV4_UPCALLTIMEO;
716191783Srmacklem		else
717191783Srmacklem			timo.tv_sec = NFSV4_CALLBACKTIMEO;
718191783Srmacklem	} else {
719191783Srmacklem		if (nrp->nr_sotype != SOCK_DGRAM) {
720191783Srmacklem			timo.tv_usec = 0;
721191783Srmacklem			if ((nmp->nm_flag & NFSMNT_NFSV4))
722191783Srmacklem				timo.tv_sec = INT_MAX;
723191783Srmacklem			else
724191783Srmacklem				timo.tv_sec = NFS_TCPTIMEO;
725191783Srmacklem		} else {
726228757Srmacklem			if (NFSHASSOFT(nmp)) {
727228757Srmacklem				/*
728228757Srmacklem				 * CLSET_RETRIES is set to 2, so this should be
729228757Srmacklem				 * half of the total timeout required.
730228757Srmacklem				 */
731228757Srmacklem				timeo = nmp->nm_retry * nmp->nm_timeo / 2;
732228757Srmacklem				if (timeo < 1)
733228757Srmacklem					timeo = 1;
734228757Srmacklem				timo.tv_sec = timeo / NFS_HZ;
735228757Srmacklem				timo.tv_usec = (timeo % NFS_HZ) * 1000000 /
736228757Srmacklem				    NFS_HZ;
737228757Srmacklem			} else {
738228757Srmacklem				/* For UDP hard mounts, use a large value. */
739228757Srmacklem				timo.tv_sec = NFS_MAXTIMEO / NFS_HZ;
740228757Srmacklem				timo.tv_usec = 0;
741228757Srmacklem			}
742191783Srmacklem		}
743191783Srmacklem
744191783Srmacklem		if (rep != NULL) {
745191783Srmacklem			rep->r_flags = 0;
746191783Srmacklem			rep->r_nmp = nmp;
747191783Srmacklem			/*
748191783Srmacklem			 * Chain request into list of outstanding requests.
749191783Srmacklem			 */
750191783Srmacklem			NFSLOCKREQ();
751191783Srmacklem			TAILQ_INSERT_TAIL(&nfsd_reqq, rep, r_chain);
752191783Srmacklem			NFSUNLOCKREQ();
753191783Srmacklem		}
754191783Srmacklem	}
755191783Srmacklem
756191783Srmacklem	nd->nd_mrep = NULL;
757269398Srmacklem	if (clp != NULL && sep != NULL)
758269398Srmacklem		stat = clnt_bck_call(nrp->nr_client, &ext, procnum,
759269398Srmacklem		    nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt);
760269398Srmacklem	else
761269398Srmacklem		stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum,
762269398Srmacklem		    nd->nd_mreq, &nd->nd_mrep, timo);
763191783Srmacklem
764191783Srmacklem	if (rep != NULL) {
765191783Srmacklem		/*
766191783Srmacklem		 * RPC done, unlink the request.
767191783Srmacklem		 */
768191783Srmacklem		NFSLOCKREQ();
769191783Srmacklem		TAILQ_REMOVE(&nfsd_reqq, rep, r_chain);
770191783Srmacklem		NFSUNLOCKREQ();
771191783Srmacklem	}
772191783Srmacklem
773191783Srmacklem	/*
774191783Srmacklem	 * If there was a successful reply and a tprintf msg.
775191783Srmacklem	 * tprintf a response.
776191783Srmacklem	 */
777191783Srmacklem	if (stat == RPC_SUCCESS) {
778191783Srmacklem		error = 0;
779191783Srmacklem	} else if (stat == RPC_TIMEDOUT) {
780306663Srmacklem		NFSINCRGLOBAL(newnfsstats.rpctimeouts);
781191783Srmacklem		error = ETIMEDOUT;
782191783Srmacklem	} else if (stat == RPC_VERSMISMATCH) {
783306663Srmacklem		NFSINCRGLOBAL(newnfsstats.rpcinvalid);
784191783Srmacklem		error = EOPNOTSUPP;
785191783Srmacklem	} else if (stat == RPC_PROGVERSMISMATCH) {
786306663Srmacklem		NFSINCRGLOBAL(newnfsstats.rpcinvalid);
787191783Srmacklem		error = EPROTONOSUPPORT;
788245476Sjhb	} else if (stat == RPC_INTR) {
789245476Sjhb		error = EINTR;
790191783Srmacklem	} else {
791306663Srmacklem		NFSINCRGLOBAL(newnfsstats.rpcinvalid);
792191783Srmacklem		error = EACCES;
793191783Srmacklem	}
794191783Srmacklem	if (error) {
795191783Srmacklem		m_freem(nd->nd_mreq);
796253049Srmacklem		if (usegssname == 0)
797253049Srmacklem			AUTH_DESTROY(auth);
798191783Srmacklem		if (rep != NULL)
799191783Srmacklem			FREE((caddr_t)rep, M_NFSDREQ);
800195642Srmacklem		if (set_sigset)
801195642Srmacklem			newnfs_restore_sigmask(td, &oldset);
802191783Srmacklem		return (error);
803191783Srmacklem	}
804191783Srmacklem
805191783Srmacklem	KASSERT(nd->nd_mrep != NULL, ("mrep shouldn't be NULL if no error\n"));
806191783Srmacklem
807192695Srmacklem	/*
808192695Srmacklem	 * Search for any mbufs that are not a multiple of 4 bytes long
809192695Srmacklem	 * or with m_data not longword aligned.
810192695Srmacklem	 * These could cause pointer alignment problems, so copy them to
811192695Srmacklem	 * well aligned mbufs.
812192695Srmacklem	 */
813249592Sken	newnfs_realign(&nd->nd_mrep, M_WAITOK);
814191783Srmacklem	nd->nd_md = nd->nd_mrep;
815191783Srmacklem	nd->nd_dpos = NFSMTOD(nd->nd_md, caddr_t);
816191783Srmacklem	nd->nd_repstat = 0;
817269398Srmacklem	if (nd->nd_procnum != NFSPROC_NULL &&
818269398Srmacklem	    nd->nd_procnum != NFSV4PROC_CBNULL) {
819244042Srmacklem		/* If sep == NULL, set it to the default in nmp. */
820244042Srmacklem		if (sep == NULL && nmp != NULL)
821317404Srmacklem			sep = nfsmnt_mdssession(nmp);
822191783Srmacklem		/*
823191783Srmacklem		 * and now the actual NFS xdr.
824191783Srmacklem		 */
825191783Srmacklem		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
826191783Srmacklem		nd->nd_repstat = fxdr_unsigned(u_int32_t, *tl);
827244042Srmacklem		if (nd->nd_repstat >= 10000)
828244042Srmacklem			NFSCL_DEBUG(1, "proc=%d reps=%d\n", (int)nd->nd_procnum,
829244042Srmacklem			    (int)nd->nd_repstat);
830244042Srmacklem
831244042Srmacklem		/*
832244042Srmacklem		 * Get rid of the tag, return count and SEQUENCE result for
833244042Srmacklem		 * NFSv4.
834244042Srmacklem		 */
835244042Srmacklem		if ((nd->nd_flag & ND_NFSV4) != 0) {
836244042Srmacklem			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
837244042Srmacklem			i = fxdr_unsigned(int, *tl);
838244042Srmacklem			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
839244042Srmacklem			if (error)
840244042Srmacklem				goto nfsmout;
841244042Srmacklem			NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
842244042Srmacklem			opcnt = fxdr_unsigned(int, *tl++);
843244042Srmacklem			i = fxdr_unsigned(int, *tl++);
844244042Srmacklem			j = fxdr_unsigned(int, *tl);
845244042Srmacklem			if (j >= 10000)
846244042Srmacklem				NFSCL_DEBUG(1, "fop=%d fst=%d\n", i, j);
847244042Srmacklem			/*
848244042Srmacklem			 * If the first op is Sequence, free up the slot.
849244042Srmacklem			 */
850269398Srmacklem			if ((nmp != NULL && i == NFSV4OP_SEQUENCE && j != 0) ||
851269398Srmacklem			    (clp != NULL && i == NFSV4OP_CBSEQUENCE && j != 0))
852244042Srmacklem				NFSCL_DEBUG(1, "failed seq=%d\n", j);
853336899Srmacklem			if (((nmp != NULL && i == NFSV4OP_SEQUENCE && j == 0) ||
854336899Srmacklem			    (clp != NULL && i == NFSV4OP_CBSEQUENCE &&
855336899Srmacklem			    j == 0)) && sep != NULL) {
856269398Srmacklem				if (i == NFSV4OP_SEQUENCE)
857269398Srmacklem					NFSM_DISSECT(tl, uint32_t *,
858269398Srmacklem					    NFSX_V4SESSIONID +
859269398Srmacklem					    5 * NFSX_UNSIGNED);
860269398Srmacklem				else
861269398Srmacklem					NFSM_DISSECT(tl, uint32_t *,
862269398Srmacklem					    NFSX_V4SESSIONID +
863269398Srmacklem					    4 * NFSX_UNSIGNED);
864244042Srmacklem				mtx_lock(&sep->nfsess_mtx);
865317404Srmacklem				if (bcmp(tl, sep->nfsess_sessionid,
866317404Srmacklem				    NFSX_V4SESSIONID) == 0) {
867317404Srmacklem					tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
868317404Srmacklem					retseq = fxdr_unsigned(uint32_t, *tl++);
869317404Srmacklem					slot = fxdr_unsigned(int, *tl++);
870317404Srmacklem					freeslot = slot;
871317404Srmacklem					if (retseq != sep->nfsess_slotseq[slot])
872317404Srmacklem						printf("retseq diff 0x%x\n",
873317404Srmacklem						    retseq);
874317404Srmacklem					retval = fxdr_unsigned(uint32_t, *++tl);
875317404Srmacklem					if ((retval + 1) < sep->nfsess_foreslots
876317404Srmacklem					    )
877317404Srmacklem						sep->nfsess_foreslots = (retval
878317404Srmacklem						    + 1);
879317404Srmacklem					else if ((retval + 1) >
880317404Srmacklem					    sep->nfsess_foreslots)
881317404Srmacklem						sep->nfsess_foreslots = (retval
882317404Srmacklem						    < 64) ? (retval + 1) : 64;
883317404Srmacklem				}
884244042Srmacklem				mtx_unlock(&sep->nfsess_mtx);
885244042Srmacklem
886244042Srmacklem				/* Grab the op and status for the next one. */
887244042Srmacklem				if (opcnt > 1) {
888244042Srmacklem					NFSM_DISSECT(tl, uint32_t *,
889244042Srmacklem					    2 * NFSX_UNSIGNED);
890244042Srmacklem					i = fxdr_unsigned(int, *tl++);
891244042Srmacklem					j = fxdr_unsigned(int, *tl);
892244042Srmacklem				}
893244042Srmacklem			}
894244042Srmacklem		}
895191783Srmacklem		if (nd->nd_repstat != 0) {
896317404Srmacklem			if (nd->nd_repstat == NFSERR_BADSESSION &&
897336899Srmacklem			    nmp != NULL && dssep == NULL &&
898336899Srmacklem			    (nd->nd_flag & ND_NFSV41) != 0) {
899317404Srmacklem				/*
900317404Srmacklem				 * If this is a client side MDS RPC, mark
901317404Srmacklem				 * the MDS session defunct and initiate
902317404Srmacklem				 * recovery, as required.
903317404Srmacklem				 * The nfsess_defunct field is protected by
904317404Srmacklem				 * the NFSLOCKMNT()/nm_mtx lock and not the
905317404Srmacklem				 * nfsess_mtx lock to simplify its handling,
906317404Srmacklem				 * for the MDS session. This lock is also
907317404Srmacklem				 * sufficient for nfsess_sessionid, since it
908317404Srmacklem				 * never changes in the structure.
909317404Srmacklem				 */
910317404Srmacklem				NFSCL_DEBUG(1, "Got badsession\n");
911317404Srmacklem				NFSLOCKCLSTATE();
912317404Srmacklem				NFSLOCKMNT(nmp);
913317404Srmacklem				sep = NFSMNT_MDSSESSION(nmp);
914317404Srmacklem				if (bcmp(sep->nfsess_sessionid, nd->nd_sequence,
915317404Srmacklem				    NFSX_V4SESSIONID) == 0) {
916317404Srmacklem					/* Initiate recovery. */
917317404Srmacklem					sep->nfsess_defunct = 1;
918317404Srmacklem					NFSCL_DEBUG(1, "Marked defunct\n");
919317404Srmacklem					if (nmp->nm_clp != NULL) {
920317404Srmacklem						nmp->nm_clp->nfsc_flags |=
921317404Srmacklem						    NFSCLFLAGS_RECOVER;
922317404Srmacklem						wakeup(nmp->nm_clp);
923317404Srmacklem					}
924317404Srmacklem				}
925317404Srmacklem				NFSUNLOCKCLSTATE();
926317404Srmacklem				/*
927317404Srmacklem				 * Sleep for up to 1sec waiting for a new
928317404Srmacklem				 * session.
929317404Srmacklem				 */
930317404Srmacklem				mtx_sleep(&nmp->nm_sess, &nmp->nm_mtx, PZERO,
931317404Srmacklem				    "nfsbadsess", hz);
932317404Srmacklem				/*
933317404Srmacklem				 * Get the session again, in case a new one
934317404Srmacklem				 * has been created during the sleep.
935317404Srmacklem				 */
936317404Srmacklem				sep = NFSMNT_MDSSESSION(nmp);
937317404Srmacklem				NFSUNLOCKMNT(nmp);
938317404Srmacklem				if ((nd->nd_flag & ND_LOOPBADSESS) != 0) {
939317404Srmacklem					reterr = nfsv4_sequencelookup(nmp, sep,
940317404Srmacklem					    &slotpos, &maxslot, &slotseq,
941317404Srmacklem					    sessionid);
942317404Srmacklem					if (reterr == 0) {
943317404Srmacklem						/* Fill in new session info. */
944317404Srmacklem						NFSCL_DEBUG(1,
945317404Srmacklem						  "Filling in new sequence\n");
946317404Srmacklem						tl = nd->nd_sequence;
947317404Srmacklem						bcopy(sessionid, tl,
948317404Srmacklem						    NFSX_V4SESSIONID);
949317404Srmacklem						tl += NFSX_V4SESSIONID /
950317404Srmacklem						    NFSX_UNSIGNED;
951317404Srmacklem						*tl++ = txdr_unsigned(slotseq);
952317404Srmacklem						*tl++ = txdr_unsigned(slotpos);
953317404Srmacklem						*tl = txdr_unsigned(maxslot);
954317404Srmacklem					}
955317404Srmacklem					if (reterr == NFSERR_BADSESSION ||
956317404Srmacklem					    reterr == 0) {
957317404Srmacklem						NFSCL_DEBUG(1,
958317404Srmacklem						    "Badsession looping\n");
959317404Srmacklem						m_freem(nd->nd_mrep);
960317404Srmacklem						nd->nd_mrep = NULL;
961317404Srmacklem						goto tryagain;
962317404Srmacklem					}
963317404Srmacklem					nd->nd_repstat = reterr;
964317404Srmacklem					NFSCL_DEBUG(1, "Got err=%d\n", reterr);
965317404Srmacklem				}
966317404Srmacklem			}
967338308Srmacklem			/*
968338308Srmacklem			 * When clp != NULL, it is a callback and all
969338308Srmacklem			 * callback operations can be retried for NFSERR_DELAY.
970338308Srmacklem			 */
971224117Srmacklem			if (((nd->nd_repstat == NFSERR_DELAY ||
972224117Srmacklem			      nd->nd_repstat == NFSERR_GRACE) &&
973338308Srmacklem			     (nd->nd_flag & ND_NFSV4) && (clp != NULL ||
974338308Srmacklem			     (nd->nd_procnum != NFSPROC_DELEGRETURN &&
975191783Srmacklem			     nd->nd_procnum != NFSPROC_SETATTR &&
976191783Srmacklem			     nd->nd_procnum != NFSPROC_READ &&
977244042Srmacklem			     nd->nd_procnum != NFSPROC_READDS &&
978191783Srmacklem			     nd->nd_procnum != NFSPROC_WRITE &&
979244042Srmacklem			     nd->nd_procnum != NFSPROC_WRITEDS &&
980191783Srmacklem			     nd->nd_procnum != NFSPROC_OPEN &&
981191783Srmacklem			     nd->nd_procnum != NFSPROC_CREATE &&
982191783Srmacklem			     nd->nd_procnum != NFSPROC_OPENCONFIRM &&
983191783Srmacklem			     nd->nd_procnum != NFSPROC_OPENDOWNGRADE &&
984191783Srmacklem			     nd->nd_procnum != NFSPROC_CLOSE &&
985191783Srmacklem			     nd->nd_procnum != NFSPROC_LOCK &&
986338308Srmacklem			     nd->nd_procnum != NFSPROC_LOCKU))) ||
987191783Srmacklem			    (nd->nd_repstat == NFSERR_DELAY &&
988191783Srmacklem			     (nd->nd_flag & ND_NFSV4) == 0) ||
989191783Srmacklem			    nd->nd_repstat == NFSERR_RESOURCE) {
990191783Srmacklem				if (trylater_delay > NFS_TRYLATERDEL)
991191783Srmacklem					trylater_delay = NFS_TRYLATERDEL;
992191783Srmacklem				waituntil = NFSD_MONOSEC + trylater_delay;
993191783Srmacklem				while (NFSD_MONOSEC < waituntil)
994207170Srmacklem					(void) nfs_catnap(PZERO, 0, "nfstry");
995191783Srmacklem				trylater_delay *= 2;
996244042Srmacklem				if (slot != -1) {
997244042Srmacklem					mtx_lock(&sep->nfsess_mtx);
998244042Srmacklem					sep->nfsess_slotseq[slot]++;
999244042Srmacklem					*nd->nd_slotseq = txdr_unsigned(
1000244042Srmacklem					    sep->nfsess_slotseq[slot]);
1001244042Srmacklem					mtx_unlock(&sep->nfsess_mtx);
1002244042Srmacklem				}
1003223441Srmacklem				m_freem(nd->nd_mrep);
1004223441Srmacklem				nd->nd_mrep = NULL;
1005191783Srmacklem				goto tryagain;
1006191783Srmacklem			}
1007191783Srmacklem
1008191783Srmacklem			/*
1009191783Srmacklem			 * If the File Handle was stale, invalidate the
1010191783Srmacklem			 * lookup cache, just in case.
1011191783Srmacklem			 * (vp != NULL implies a client side call)
1012191783Srmacklem			 */
1013191783Srmacklem			if (nd->nd_repstat == ESTALE && vp != NULL) {
1014191783Srmacklem				cache_purge(vp);
1015191783Srmacklem				if (ncl_call_invalcaches != NULL)
1016191783Srmacklem					(*ncl_call_invalcaches)(vp);
1017191783Srmacklem			}
1018191783Srmacklem		}
1019244042Srmacklem		if ((nd->nd_flag & ND_NFSV4) != 0) {
1020244042Srmacklem			/* Free the slot, as required. */
1021244042Srmacklem			if (freeslot != -1)
1022244042Srmacklem				nfsv4_freeslot(sep, freeslot);
1023191783Srmacklem			/*
1024244042Srmacklem			 * If this op is Putfh, throw its results away.
1025191783Srmacklem			 */
1026244042Srmacklem			if (j >= 10000)
1027244042Srmacklem				NFSCL_DEBUG(1, "nop=%d nst=%d\n", i, j);
1028244042Srmacklem			if (nmp != NULL && i == NFSV4OP_PUTFH && j == 0) {
1029191783Srmacklem				NFSM_DISSECT(tl,u_int32_t *,2 * NFSX_UNSIGNED);
1030191783Srmacklem				i = fxdr_unsigned(int, *tl++);
1031191783Srmacklem				j = fxdr_unsigned(int, *tl);
1032244042Srmacklem				if (j >= 10000)
1033244042Srmacklem					NFSCL_DEBUG(1, "n2op=%d n2st=%d\n", i,
1034244042Srmacklem					    j);
1035191783Srmacklem				/*
1036191783Srmacklem				 * All Compounds that do an Op that must
1037191783Srmacklem				 * be in sequence consist of NFSV4OP_PUTFH
1038191783Srmacklem				 * followed by one of these. As such, we
1039191783Srmacklem				 * can determine if the seqid# should be
1040191783Srmacklem				 * incremented, here.
1041191783Srmacklem				 */
1042191783Srmacklem				if ((i == NFSV4OP_OPEN ||
1043191783Srmacklem				     i == NFSV4OP_OPENCONFIRM ||
1044191783Srmacklem				     i == NFSV4OP_OPENDOWNGRADE ||
1045191783Srmacklem				     i == NFSV4OP_CLOSE ||
1046191783Srmacklem				     i == NFSV4OP_LOCK ||
1047191783Srmacklem				     i == NFSV4OP_LOCKU) &&
1048191783Srmacklem				    (j == 0 ||
1049191783Srmacklem				     (j != NFSERR_STALECLIENTID &&
1050191783Srmacklem				      j != NFSERR_STALESTATEID &&
1051191783Srmacklem				      j != NFSERR_BADSTATEID &&
1052191783Srmacklem				      j != NFSERR_BADSEQID &&
1053191783Srmacklem				      j != NFSERR_BADXDR &&
1054191783Srmacklem				      j != NFSERR_RESOURCE &&
1055191783Srmacklem				      j != NFSERR_NOFILEHANDLE)))
1056191783Srmacklem					nd->nd_flag |= ND_INCRSEQID;
1057191783Srmacklem			}
1058244042Srmacklem			/*
1059244042Srmacklem			 * If this op's status is non-zero, mark
1060244042Srmacklem			 * that there is no more data to process.
1061317930Srmacklem			 * The exception is Setattr, which always has xdr
1062317930Srmacklem			 * when it has failed.
1063244042Srmacklem			 */
1064317930Srmacklem			if (j != 0 && i != NFSV4OP_SETATTR)
1065244042Srmacklem				nd->nd_flag |= ND_NOMOREDATA;
1066191783Srmacklem
1067191783Srmacklem			/*
1068191783Srmacklem			 * If R_DONTRECOVER is set, replace the stale error
1069191783Srmacklem			 * reply, so that recovery isn't initiated.
1070191783Srmacklem			 */
1071191783Srmacklem			if ((nd->nd_repstat == NFSERR_STALECLIENTID ||
1072244042Srmacklem			     nd->nd_repstat == NFSERR_BADSESSION ||
1073191783Srmacklem			     nd->nd_repstat == NFSERR_STALESTATEID) &&
1074191783Srmacklem			    rep != NULL && (rep->r_flags & R_DONTRECOVER))
1075191783Srmacklem				nd->nd_repstat = NFSERR_STALEDONTRECOVER;
1076191783Srmacklem		}
1077192181Srmacklem	}
1078191783Srmacklem
1079223280Srmacklem#ifdef KDTRACE_HOOKS
1080223280Srmacklem	if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) {
1081223280Srmacklem		uint32_t probe_id;
1082223280Srmacklem		int probe_procnum;
1083223280Srmacklem
1084223280Srmacklem		if (nd->nd_flag & ND_NFSV4) {
1085223280Srmacklem			probe_id = nfscl_nfs4_done_probes[nd->nd_procnum];
1086223280Srmacklem			probe_procnum = nd->nd_procnum;
1087223280Srmacklem		} else if (nd->nd_flag & ND_NFSV3) {
1088223280Srmacklem			probe_id = nfscl_nfs3_done_probes[procnum];
1089223280Srmacklem			probe_procnum = procnum;
1090223280Srmacklem		} else {
1091223280Srmacklem			probe_id = nfscl_nfs2_done_probes[nd->nd_procnum];
1092223280Srmacklem			probe_procnum = procnum;
1093223280Srmacklem		}
1094223280Srmacklem		if (probe_id != 0)
1095223280Srmacklem			(dtrace_nfscl_nfs234_done_probe)(probe_id, vp,
1096223280Srmacklem			    nd->nd_mreq, cred, probe_procnum, 0);
1097223280Srmacklem	}
1098223280Srmacklem#endif
1099223280Srmacklem
1100192181Srmacklem	m_freem(nd->nd_mreq);
1101253049Srmacklem	if (usegssname == 0)
1102253049Srmacklem		AUTH_DESTROY(auth);
1103192181Srmacklem	if (rep != NULL)
1104192181Srmacklem		FREE((caddr_t)rep, M_NFSDREQ);
1105195642Srmacklem	if (set_sigset)
1106195642Srmacklem		newnfs_restore_sigmask(td, &oldset);
1107192181Srmacklem	return (0);
1108191783Srmacklemnfsmout:
1109191783Srmacklem	mbuf_freem(nd->nd_mrep);
1110191783Srmacklem	mbuf_freem(nd->nd_mreq);
1111253049Srmacklem	if (usegssname == 0)
1112253049Srmacklem		AUTH_DESTROY(auth);
1113191783Srmacklem	if (rep != NULL)
1114191783Srmacklem		FREE((caddr_t)rep, M_NFSDREQ);
1115195642Srmacklem	if (set_sigset)
1116195642Srmacklem		newnfs_restore_sigmask(td, &oldset);
1117191783Srmacklem	return (error);
1118191783Srmacklem}
1119191783Srmacklem
1120191783Srmacklem/*
1121191783Srmacklem * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
1122191783Srmacklem * wait for all requests to complete. This is used by forced unmounts
1123191783Srmacklem * to terminate any outstanding RPCs.
1124191783Srmacklem */
1125191783Srmacklemint
1126191783Srmacklemnewnfs_nmcancelreqs(struct nfsmount *nmp)
1127191783Srmacklem{
1128325407Srmacklem	struct nfsclds *dsp;
1129325407Srmacklem	struct __rpc_client *cl;
1130191783Srmacklem
1131191783Srmacklem	if (nmp->nm_sockreq.nr_client != NULL)
1132191783Srmacklem		CLNT_CLOSE(nmp->nm_sockreq.nr_client);
1133325407Srmacklemlookformore:
1134325407Srmacklem	NFSLOCKMNT(nmp);
1135325407Srmacklem	TAILQ_FOREACH(dsp, &nmp->nm_sess, nfsclds_list) {
1136325407Srmacklem		NFSLOCKDS(dsp);
1137325407Srmacklem		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1138325407Srmacklem		    (dsp->nfsclds_flags & NFSCLDS_CLOSED) == 0 &&
1139325407Srmacklem		    dsp->nfsclds_sockp != NULL &&
1140325407Srmacklem		    dsp->nfsclds_sockp->nr_client != NULL) {
1141325407Srmacklem			dsp->nfsclds_flags |= NFSCLDS_CLOSED;
1142325407Srmacklem			cl = dsp->nfsclds_sockp->nr_client;
1143325407Srmacklem			NFSUNLOCKDS(dsp);
1144325407Srmacklem			NFSUNLOCKMNT(nmp);
1145325407Srmacklem			CLNT_CLOSE(cl);
1146325407Srmacklem			goto lookformore;
1147325407Srmacklem		}
1148325407Srmacklem		NFSUNLOCKDS(dsp);
1149325407Srmacklem	}
1150325407Srmacklem	NFSUNLOCKMNT(nmp);
1151191783Srmacklem	return (0);
1152191783Srmacklem}
1153191783Srmacklem
1154191783Srmacklem/*
1155191783Srmacklem * Any signal that can interrupt an NFS operation in an intr mount
1156191783Srmacklem * should be added to this set. SIGSTOP and SIGKILL cannot be masked.
1157191783Srmacklem */
1158191783Srmacklemint newnfs_sig_set[] = {
1159191783Srmacklem	SIGINT,
1160191783Srmacklem	SIGTERM,
1161191783Srmacklem	SIGHUP,
1162191783Srmacklem	SIGKILL,
1163191783Srmacklem	SIGQUIT
1164191783Srmacklem};
1165191783Srmacklem
1166191783Srmacklem/*
1167191783Srmacklem * Check to see if one of the signals in our subset is pending on
1168191783Srmacklem * the process (in an intr mount).
1169191783Srmacklem */
1170191783Srmacklemstatic int
1171191783Srmacklemnfs_sig_pending(sigset_t set)
1172191783Srmacklem{
1173191783Srmacklem	int i;
1174191783Srmacklem
1175191783Srmacklem	for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++)
1176191783Srmacklem		if (SIGISMEMBER(set, newnfs_sig_set[i]))
1177191783Srmacklem			return (1);
1178191783Srmacklem	return (0);
1179191783Srmacklem}
1180191783Srmacklem
1181191783Srmacklem/*
1182191783Srmacklem * The set/restore sigmask functions are used to (temporarily) overwrite
1183246417Sjhb * the thread td_sigmask during an RPC call (for example). These are also
1184191783Srmacklem * used in other places in the NFS client that might tsleep().
1185191783Srmacklem */
1186191783Srmacklemvoid
1187191783Srmacklemnewnfs_set_sigmask(struct thread *td, sigset_t *oldset)
1188191783Srmacklem{
1189191783Srmacklem	sigset_t newset;
1190191783Srmacklem	int i;
1191191783Srmacklem	struct proc *p;
1192191783Srmacklem
1193191783Srmacklem	SIGFILLSET(newset);
1194191783Srmacklem	if (td == NULL)
1195191783Srmacklem		td = curthread; /* XXX */
1196191783Srmacklem	p = td->td_proc;
1197191783Srmacklem	/* Remove the NFS set of signals from newset */
1198191783Srmacklem	PROC_LOCK(p);
1199191783Srmacklem	mtx_lock(&p->p_sigacts->ps_mtx);
1200191783Srmacklem	for (i = 0 ; i < sizeof(newnfs_sig_set)/sizeof(int) ; i++) {
1201191783Srmacklem		/*
1202191783Srmacklem		 * But make sure we leave the ones already masked
1203191783Srmacklem		 * by the process, ie. remove the signal from the
1204191783Srmacklem		 * temporary signalmask only if it wasn't already
1205191783Srmacklem		 * in p_sigmask.
1206191783Srmacklem		 */
1207191783Srmacklem		if (!SIGISMEMBER(td->td_sigmask, newnfs_sig_set[i]) &&
1208191783Srmacklem		    !SIGISMEMBER(p->p_sigacts->ps_sigignore, newnfs_sig_set[i]))
1209191783Srmacklem			SIGDELSET(newset, newnfs_sig_set[i]);
1210191783Srmacklem	}
1211191783Srmacklem	mtx_unlock(&p->p_sigacts->ps_mtx);
1212246417Sjhb	kern_sigprocmask(td, SIG_SETMASK, &newset, oldset,
1213246417Sjhb	    SIGPROCMASK_PROC_LOCKED);
1214191783Srmacklem	PROC_UNLOCK(p);
1215191783Srmacklem}
1216191783Srmacklem
1217191783Srmacklemvoid
1218191783Srmacklemnewnfs_restore_sigmask(struct thread *td, sigset_t *set)
1219191783Srmacklem{
1220191783Srmacklem	if (td == NULL)
1221191783Srmacklem		td = curthread; /* XXX */
1222191783Srmacklem	kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0);
1223191783Srmacklem}
1224191783Srmacklem
1225191783Srmacklem/*
1226191783Srmacklem * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the
1227191783Srmacklem * old one after msleep() returns.
1228191783Srmacklem */
1229191783Srmacklemint
1230191783Srmacklemnewnfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo)
1231191783Srmacklem{
1232191783Srmacklem	sigset_t oldset;
1233191783Srmacklem	int error;
1234191783Srmacklem	struct proc *p;
1235191783Srmacklem
1236191783Srmacklem	if ((priority & PCATCH) == 0)
1237191783Srmacklem		return msleep(ident, mtx, priority, wmesg, timo);
1238191783Srmacklem	if (td == NULL)
1239191783Srmacklem		td = curthread; /* XXX */
1240191783Srmacklem	newnfs_set_sigmask(td, &oldset);
1241191783Srmacklem	error = msleep(ident, mtx, priority, wmesg, timo);
1242191783Srmacklem	newnfs_restore_sigmask(td, &oldset);
1243191783Srmacklem	p = td->td_proc;
1244191783Srmacklem	return (error);
1245191783Srmacklem}
1246191783Srmacklem
1247191783Srmacklem/*
1248191783Srmacklem * Test for a termination condition pending on the process.
1249191783Srmacklem * This is used for NFSMNT_INT mounts.
1250191783Srmacklem */
1251191783Srmacklemint
1252191783Srmacklemnewnfs_sigintr(struct nfsmount *nmp, struct thread *td)
1253191783Srmacklem{
1254191783Srmacklem	struct proc *p;
1255191783Srmacklem	sigset_t tmpset;
1256191783Srmacklem
1257191783Srmacklem	/* Terminate all requests while attempting a forced unmount. */
1258191783Srmacklem	if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
1259191783Srmacklem		return (EIO);
1260191783Srmacklem	if (!(nmp->nm_flag & NFSMNT_INT))
1261191783Srmacklem		return (0);
1262191783Srmacklem	if (td == NULL)
1263191783Srmacklem		return (0);
1264191783Srmacklem	p = td->td_proc;
1265191783Srmacklem	PROC_LOCK(p);
1266191783Srmacklem	tmpset = p->p_siglist;
1267191783Srmacklem	SIGSETOR(tmpset, td->td_siglist);
1268191783Srmacklem	SIGSETNAND(tmpset, td->td_sigmask);
1269191783Srmacklem	mtx_lock(&p->p_sigacts->ps_mtx);
1270191783Srmacklem	SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
1271191783Srmacklem	mtx_unlock(&p->p_sigacts->ps_mtx);
1272191783Srmacklem	if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist))
1273191783Srmacklem	    && nfs_sig_pending(tmpset)) {
1274191783Srmacklem		PROC_UNLOCK(p);
1275191783Srmacklem		return (EINTR);
1276191783Srmacklem	}
1277191783Srmacklem	PROC_UNLOCK(p);
1278191783Srmacklem	return (0);
1279191783Srmacklem}
1280191783Srmacklem
1281191783Srmacklemstatic int
1282191783Srmacklemnfs_msg(struct thread *td, const char *server, const char *msg, int error)
1283191783Srmacklem{
1284191783Srmacklem	struct proc *p;
1285191783Srmacklem
1286191783Srmacklem	p = td ? td->td_proc : NULL;
1287191783Srmacklem	if (error) {
1288191783Srmacklem		tprintf(p, LOG_INFO, "newnfs server %s: %s, error %d\n",
1289191783Srmacklem		    server, msg, error);
1290191783Srmacklem	} else {
1291191783Srmacklem		tprintf(p, LOG_INFO, "newnfs server %s: %s\n", server, msg);
1292191783Srmacklem	}
1293191783Srmacklem	return (0);
1294191783Srmacklem}
1295191783Srmacklem
1296191783Srmacklemstatic void
1297191783Srmacklemnfs_down(struct nfsmount *nmp, struct thread *td, const char *msg,
1298191783Srmacklem    int error, int flags)
1299191783Srmacklem{
1300191783Srmacklem	if (nmp == NULL)
1301191783Srmacklem		return;
1302191783Srmacklem	mtx_lock(&nmp->nm_mtx);
1303191783Srmacklem	if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
1304191783Srmacklem		nmp->nm_state |= NFSSTA_TIMEO;
1305191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
1306191783Srmacklem		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1307191783Srmacklem		    VQ_NOTRESP, 0);
1308191783Srmacklem	} else
1309191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
1310191783Srmacklem	mtx_lock(&nmp->nm_mtx);
1311191783Srmacklem	if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
1312191783Srmacklem		nmp->nm_state |= NFSSTA_LOCKTIMEO;
1313191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
1314191783Srmacklem		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1315191783Srmacklem		    VQ_NOTRESPLOCK, 0);
1316191783Srmacklem	} else
1317191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
1318191783Srmacklem	nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
1319191783Srmacklem}
1320191783Srmacklem
1321191783Srmacklemstatic void
1322191783Srmacklemnfs_up(struct nfsmount *nmp, struct thread *td, const char *msg,
1323191783Srmacklem    int flags, int tprintfmsg)
1324191783Srmacklem{
1325191783Srmacklem	if (nmp == NULL)
1326191783Srmacklem		return;
1327191783Srmacklem	if (tprintfmsg) {
1328191783Srmacklem		nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
1329191783Srmacklem	}
1330191783Srmacklem
1331191783Srmacklem	mtx_lock(&nmp->nm_mtx);
1332191783Srmacklem	if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
1333191783Srmacklem		nmp->nm_state &= ~NFSSTA_TIMEO;
1334191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
1335191783Srmacklem		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1336191783Srmacklem		    VQ_NOTRESP, 1);
1337191783Srmacklem	} else
1338191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
1339191783Srmacklem
1340191783Srmacklem	mtx_lock(&nmp->nm_mtx);
1341191783Srmacklem	if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
1342191783Srmacklem		nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
1343191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
1344191783Srmacklem		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
1345191783Srmacklem		    VQ_NOTRESPLOCK, 1);
1346191783Srmacklem	} else
1347191783Srmacklem		mtx_unlock(&nmp->nm_mtx);
1348191783Srmacklem}
1349191783Srmacklem
1350