nfs_krpc.c revision 190293
1/*-
2 * Copyright (c) 1989, 1991, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)nfs_socket.c	8.5 (Berkeley) 3/30/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_krpc.c 190293 2009-03-22 22:07:52Z rwatson $");
37
38/*
39 * Socket operations for use by nfs
40 */
41
42#include "opt_inet6.h"
43#include "opt_kdtrace.h"
44#include "opt_kgssapi.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/kernel.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/mount.h>
54#include <sys/mutex.h>
55#include <sys/proc.h>
56#include <sys/signalvar.h>
57#include <sys/syscallsubr.h>
58#include <sys/sysctl.h>
59#include <sys/syslog.h>
60#include <sys/vnode.h>
61
62#include <rpc/rpc.h>
63#include <rpc/rpcclnt.h>
64
65#include <nfs/rpcv2.h>
66#include <nfs/nfsproto.h>
67#include <nfsclient/nfs.h>
68#include <nfs/xdr_subs.h>
69#include <nfsclient/nfsm_subs.h>
70#include <nfsclient/nfsmount.h>
71#include <nfsclient/nfsnode.h>
72
73#include <nfs4client/nfs4.h>
74
75#ifndef NFS_LEGACYRPC
76
77#ifdef KDTRACE_HOOKS
78#include <sys/dtrace_bsd.h>
79
80dtrace_nfsclient_nfs23_start_probe_func_t
81    dtrace_nfsclient_nfs23_start_probe;
82
83dtrace_nfsclient_nfs23_done_probe_func_t
84    dtrace_nfsclient_nfs23_done_probe;
85
86/*
87 * Registered probes by RPC type.
88 */
89uint32_t	nfsclient_nfs2_start_probes[NFS_NPROCS];
90uint32_t	nfsclient_nfs2_done_probes[NFS_NPROCS];
91
92uint32_t	nfsclient_nfs3_start_probes[NFS_NPROCS];
93uint32_t	nfsclient_nfs3_done_probes[NFS_NPROCS];
94#endif
95
96static int	nfs_realign_test;
97static int	nfs_realign_count;
98static int	nfs_bufpackets = 4;
99static int	nfs_reconnects;
100static int	nfs3_jukebox_delay = 10;
101static int	nfs_skip_wcc_data_onerr = 1;
102static int	fake_wchan;
103
104SYSCTL_DECL(_vfs_nfs);
105
106SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0,
107    "Number of realign tests done");
108SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0,
109    "Number of mbuf realignments done");
110SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0,
111    "Buffer reservation size 2 < x < 64");
112SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0,
113    "Number of times the nfs client has had to reconnect");
114SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0,
115    "Number of seconds to delay a retry after receiving EJUKEBOX");
116SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0,
117    "Disable weak cache consistency checking when server returns an error");
118
119static void	nfs_down(struct nfsmount *, struct thread *, const char *,
120    int, int);
121static void	nfs_up(struct nfsmount *, struct thread *, const char *,
122    int, int);
123static int	nfs_msg(struct thread *, const char *, const char *, int);
124
125extern int nfsv2_procid[];
126
127struct nfs_cached_auth {
128	int		ca_refs; /* refcount, including 1 from the cache */
129	uid_t		ca_uid;	 /* uid that corresponds to this auth */
130	AUTH		*ca_auth; /* RPC auth handle */
131};
132
133/*
134 * RTT estimator
135 */
136
137static enum nfs_rto_timer_t nfs_proct[NFS_NPROCS] = {
138	NFS_DEFAULT_TIMER,	/* NULL */
139	NFS_GETATTR_TIMER,	/* GETATTR */
140	NFS_DEFAULT_TIMER,	/* SETATTR */
141	NFS_LOOKUP_TIMER,	/* LOOKUP */
142	NFS_GETATTR_TIMER,	/* ACCESS */
143	NFS_READ_TIMER,		/* READLINK */
144	NFS_READ_TIMER,		/* READ */
145	NFS_WRITE_TIMER,	/* WRITE */
146	NFS_DEFAULT_TIMER,	/* CREATE */
147	NFS_DEFAULT_TIMER,	/* MKDIR */
148	NFS_DEFAULT_TIMER,	/* SYMLINK */
149	NFS_DEFAULT_TIMER,	/* MKNOD */
150	NFS_DEFAULT_TIMER,	/* REMOVE */
151	NFS_DEFAULT_TIMER,	/* RMDIR */
152	NFS_DEFAULT_TIMER,	/* RENAME */
153	NFS_DEFAULT_TIMER,	/* LINK */
154	NFS_READ_TIMER,		/* READDIR */
155	NFS_READ_TIMER,		/* READDIRPLUS */
156	NFS_DEFAULT_TIMER,	/* FSSTAT */
157	NFS_DEFAULT_TIMER,	/* FSINFO */
158	NFS_DEFAULT_TIMER,	/* PATHCONF */
159	NFS_DEFAULT_TIMER,	/* COMMIT */
160	NFS_DEFAULT_TIMER,	/* NOOP */
161};
162
163/*
164 * Choose the correct RTT timer for this NFS procedure.
165 */
166static inline enum nfs_rto_timer_t
167nfs_rto_timer(u_int32_t procnum)
168{
169	return nfs_proct[procnum];
170}
171
172/*
173 * Initialize the RTT estimator state for a new mount point.
174 */
175static void
176nfs_init_rtt(struct nfsmount *nmp)
177{
178	int i;
179
180	for (i = 0; i < NFS_MAX_TIMER; i++) {
181		nmp->nm_timers[i].rt_srtt = hz;
182		nmp->nm_timers[i].rt_deviate = 0;
183		nmp->nm_timers[i].rt_rtxcur = hz;
184	}
185}
186
187/*
188 * Initialize sockets and congestion for a new NFS connection.
189 * We do not free the sockaddr if error.
190 */
191int
192nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
193{
194	int rcvreserve, sndreserve;
195	int pktscale;
196	struct sockaddr *saddr;
197	struct ucred *origcred;
198	struct thread *td = curthread;
199	CLIENT *client;
200	struct netconfig *nconf;
201	rpcvers_t vers;
202	int one = 1, retries;
203
204	/*
205	 * We need to establish the socket using the credentials of
206	 * the mountpoint.  Some parts of this process (such as
207	 * sobind() and soconnect()) will use the curent thread's
208	 * credential instead of the socket credential.  To work
209	 * around this, temporarily change the current thread's
210	 * credential to that of the mountpoint.
211	 *
212	 * XXX: It would be better to explicitly pass the correct
213	 * credential to sobind() and soconnect().
214	 */
215	origcred = td->td_ucred;
216	td->td_ucred = nmp->nm_mountp->mnt_cred;
217	saddr = nmp->nm_nam;
218
219	vers = NFS_VER2;
220	if (nmp->nm_flag & NFSMNT_NFSV3)
221		vers = NFS_VER3;
222	else if (nmp->nm_flag & NFSMNT_NFSV4)
223		vers = NFS_VER4;
224	if (saddr->sa_family == AF_INET)
225		if (nmp->nm_sotype == SOCK_DGRAM)
226			nconf = getnetconfigent("udp");
227		else
228			nconf = getnetconfigent("tcp");
229	else
230		if (nmp->nm_sotype == SOCK_DGRAM)
231			nconf = getnetconfigent("udp6");
232		else
233			nconf = getnetconfigent("tcp6");
234
235	/*
236	 * Get buffer reservation size from sysctl, but impose reasonable
237	 * limits.
238	 */
239	pktscale = nfs_bufpackets;
240	if (pktscale < 2)
241		pktscale = 2;
242	if (pktscale > 64)
243		pktscale = 64;
244	mtx_lock(&nmp->nm_mtx);
245	if (nmp->nm_sotype == SOCK_DGRAM) {
246		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
247		rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
248		    NFS_MAXPKTHDR) * pktscale;
249	} else if (nmp->nm_sotype == SOCK_SEQPACKET) {
250		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
251		rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
252		    NFS_MAXPKTHDR) * pktscale;
253	} else {
254		if (nmp->nm_sotype != SOCK_STREAM)
255			panic("nfscon sotype");
256		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
257		    sizeof (u_int32_t)) * pktscale;
258		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
259		    sizeof (u_int32_t)) * pktscale;
260	}
261	mtx_unlock(&nmp->nm_mtx);
262
263	client = clnt_reconnect_create(nconf, saddr, NFS_PROG, vers,
264	    sndreserve, rcvreserve);
265	CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq");
266	if (nmp->nm_flag & NFSMNT_INT)
267		CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one);
268	if (nmp->nm_flag & NFSMNT_RESVPORT)
269		CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
270	if (nmp->nm_flag & NFSMNT_SOFT)
271		retries = nmp->nm_retry;
272	else
273		retries = INT_MAX;
274	CLNT_CONTROL(client, CLSET_RETRIES, &retries);
275
276	mtx_lock(&nmp->nm_mtx);
277	if (nmp->nm_client) {
278		/*
279		 * Someone else already connected.
280		 */
281		CLNT_RELEASE(client);
282	} else {
283		nmp->nm_client = client;
284	}
285
286	/*
287	 * Protocols that do not require connections may be optionally left
288	 * unconnected for servers that reply from a port other than NFS_PORT.
289	 */
290	if (!(nmp->nm_flag & NFSMNT_NOCONN)) {
291		mtx_unlock(&nmp->nm_mtx);
292		CLNT_CONTROL(client, CLSET_CONNECT, &one);
293	} else {
294		mtx_unlock(&nmp->nm_mtx);
295	}
296
297	/* Restore current thread's credentials. */
298	td->td_ucred = origcred;
299
300	mtx_lock(&nmp->nm_mtx);
301	/* Initialize other non-zero congestion variables */
302	nfs_init_rtt(nmp);
303	mtx_unlock(&nmp->nm_mtx);
304	return (0);
305}
306
307/*
308 * NFS disconnect. Clean up and unlink.
309 */
310void
311nfs_disconnect(struct nfsmount *nmp)
312{
313	CLIENT *client;
314
315	mtx_lock(&nmp->nm_mtx);
316	if (nmp->nm_client) {
317		client = nmp->nm_client;
318		nmp->nm_client = NULL;
319		mtx_unlock(&nmp->nm_mtx);
320#ifdef KGSSAPI
321		rpc_gss_secpurge(client);
322#endif
323		CLNT_CLOSE(client);
324		CLNT_RELEASE(client);
325	} else {
326		mtx_unlock(&nmp->nm_mtx);
327	}
328}
329
330void
331nfs_safedisconnect(struct nfsmount *nmp)
332{
333
334	nfs_disconnect(nmp);
335}
336
337static AUTH *
338nfs_getauth(struct nfsmount *nmp, struct ucred *cred)
339{
340#ifdef KGSSAPI
341	rpc_gss_service_t svc;
342	AUTH *auth;
343#endif
344
345	switch (nmp->nm_secflavor) {
346#ifdef KGSSAPI
347	case RPCSEC_GSS_KRB5:
348	case RPCSEC_GSS_KRB5I:
349	case RPCSEC_GSS_KRB5P:
350		if (!nmp->nm_mech_oid) {
351			if (!rpc_gss_mech_to_oid("kerberosv5",
352				&nmp->nm_mech_oid))
353				return (NULL);
354		}
355		if (nmp->nm_secflavor == RPCSEC_GSS_KRB5)
356			svc = rpc_gss_svc_none;
357		else if (nmp->nm_secflavor == RPCSEC_GSS_KRB5I)
358			svc = rpc_gss_svc_integrity;
359		else
360			svc = rpc_gss_svc_privacy;
361		auth = rpc_gss_secfind(nmp->nm_client, cred,
362		    nmp->nm_principal, nmp->nm_mech_oid, svc);
363		if (auth)
364			return (auth);
365		/* fallthrough */
366#endif
367	case AUTH_SYS:
368	default:
369		return (authunix_create(cred));
370
371	}
372}
373
374/*
375 * Callback from the RPC code to generate up/down notifications.
376 */
377
378struct nfs_feedback_arg {
379	struct nfsmount *nf_mount;
380	int		nf_lastmsg;	/* last tprintf */
381	int		nf_tprintfmsg;
382	struct thread	*nf_td;
383};
384
385static void
386nfs_feedback(int type, int proc, void *arg)
387{
388	struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg;
389	struct nfsmount *nmp = nf->nf_mount;
390	struct timeval now;
391
392	getmicrouptime(&now);
393
394	switch (type) {
395	case FEEDBACK_REXMIT2:
396	case FEEDBACK_RECONNECT:
397		if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
398			nfs_down(nmp, nf->nf_td,
399			    "not responding", 0, NFSSTA_TIMEO);
400			nf->nf_tprintfmsg = TRUE;
401			nf->nf_lastmsg = now.tv_sec;
402		}
403		break;
404
405	case FEEDBACK_OK:
406		nfs_up(nf->nf_mount, nf->nf_td,
407		    "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg);
408		break;
409	}
410}
411
412/*
413 * nfs_request - goes something like this
414 *	- fill in request struct
415 *	- links it into list
416 *	- calls nfs_send() for first transmit
417 *	- calls nfs_receive() to get reply
418 *	- break down rpc header and return with nfs reply pointed to
419 *	  by mrep or error
420 * nb: always frees up mreq mbuf list
421 */
422int
423nfs_request(struct vnode *vp, struct mbuf *mreq, int procnum,
424    struct thread *td, struct ucred *cred, struct mbuf **mrp,
425    struct mbuf **mdp, caddr_t *dposp)
426{
427	struct mbuf *mrep;
428	u_int32_t *tl;
429	struct nfsmount *nmp;
430	struct mbuf *md;
431	time_t waituntil;
432	caddr_t dpos;
433	int error = 0;
434	struct timeval now;
435	AUTH *auth = NULL;
436	enum nfs_rto_timer_t timer;
437	struct nfs_feedback_arg nf;
438	struct rpc_callextra ext;
439	enum clnt_stat stat;
440	struct timeval timo;
441
442	/* Reject requests while attempting a forced unmount. */
443	if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) {
444		m_freem(mreq);
445		return (ESTALE);
446	}
447	nmp = VFSTONFS(vp->v_mount);
448	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0)
449		return nfs4_request(vp, mreq, procnum, td, cred, mrp, mdp, dposp);
450	bzero(&nf, sizeof(struct nfs_feedback_arg));
451	nf.nf_mount = nmp;
452	nf.nf_td = td;
453	getmicrouptime(&now);
454	nf.nf_lastmsg = now.tv_sec -
455		((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
456
457	/*
458	 * XXX if not already connected call nfs_connect now. Longer
459	 * term, change nfs_mount to call nfs_connect unconditionally
460	 * and let clnt_reconnect_create handle reconnects.
461	 */
462	if (!nmp->nm_client)
463		nfs_connect(nmp, NULL);
464
465	auth = nfs_getauth(nmp, cred);
466	if (!auth) {
467		m_freem(mreq);
468		return (EACCES);
469	}
470	bzero(&ext, sizeof(ext));
471	ext.rc_auth = auth;
472
473	ext.rc_feedback = nfs_feedback;
474	ext.rc_feedback_arg = &nf;
475
476	/*
477	 * Use a conservative timeout for RPCs other than getattr,
478	 * lookup, read or write. The justification for doing "other"
479	 * this way is that these RPCs happen so infrequently that
480	 * timer est. would probably be stale.  Also, since many of
481	 * these RPCs are non-idempotent, a conservative timeout is
482	 * desired.
483	 */
484	timer = nfs_rto_timer(procnum);
485	if (timer != NFS_DEFAULT_TIMER) {
486		ext.rc_timers = &nmp->nm_timers[timer - 1];
487	} else {
488		ext.rc_timers = NULL;
489	}
490
491#ifdef KDTRACE_HOOKS
492	if (dtrace_nfsclient_nfs23_start_probe != NULL) {
493		uint32_t probe_id;
494		int probe_procnum;
495
496		if (nmp->nm_flag & NFSMNT_NFSV3) {
497			probe_id = nfsclient_nfs3_start_probes[procnum];
498			probe_procnum = procnum;
499		} else {
500			probe_id = nfsclient_nfs2_start_probes[procnum];
501			probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ?
502			    procnum : nfsv2_procid[procnum];
503		}
504		if (probe_id != 0)
505			(dtrace_nfsclient_nfs23_start_probe)(probe_id, vp,
506			    mreq, cred, probe_procnum);
507	}
508#endif
509
510	nfsstats.rpcrequests++;
511tryagain:
512	timo.tv_sec = nmp->nm_timeo / NFS_HZ;
513	timo.tv_usec = (nmp->nm_timeo * 1000000) / NFS_HZ;
514	mrep = NULL;
515	stat = CLNT_CALL_MBUF(nmp->nm_client, &ext,
516	    (nmp->nm_flag & NFSMNT_NFSV3) ? procnum : nfsv2_procid[procnum],
517	    mreq, &mrep, timo);
518
519	/*
520	 * If there was a successful reply and a tprintf msg.
521	 * tprintf a response.
522	 */
523	if (stat == RPC_SUCCESS) {
524		error = 0;
525	} else if (stat == RPC_TIMEDOUT) {
526		error = ETIMEDOUT;
527	} else if (stat == RPC_VERSMISMATCH) {
528		error = EOPNOTSUPP;
529	} else if (stat == RPC_PROGVERSMISMATCH) {
530		error = EPROTONOSUPPORT;
531	} else {
532		error = EACCES;
533	}
534	md = mrep;
535	if (error)
536		goto nfsmout;
537
538	KASSERT(mrep != NULL, ("mrep shouldn't be NULL if no error\n"));
539
540	dpos = mtod(mrep, caddr_t);
541	tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
542	if (*tl != 0) {
543		error = fxdr_unsigned(int, *tl);
544		if ((nmp->nm_flag & NFSMNT_NFSV3) &&
545		    error == NFSERR_TRYLATER) {
546			m_freem(mrep);
547			error = 0;
548			waituntil = time_second + nfs3_jukebox_delay;
549			while (time_second < waituntil) {
550				(void) tsleep(&fake_wchan, PSOCK, "nqnfstry", hz);
551			}
552			goto tryagain;
553		}
554
555		/*
556		 * If the File Handle was stale, invalidate the lookup
557		 * cache, just in case.
558		 */
559		if (error == ESTALE)
560			cache_purge(vp);
561		/*
562		 * Skip wcc data on NFS errors for now. NetApp filers
563		 * return corrupt postop attrs in the wcc data for NFS
564		 * err EROFS. Not sure if they could return corrupt
565		 * postop attrs for others errors.
566		 */
567		if ((nmp->nm_flag & NFSMNT_NFSV3) && !nfs_skip_wcc_data_onerr) {
568			*mrp = mrep;
569			*mdp = md;
570			*dposp = dpos;
571			error |= NFSERR_RETERR;
572		} else
573			m_freem(mrep);
574		goto nfsmout;
575	}
576
577#ifdef KDTRACE_HOOKS
578	if (dtrace_nfsclient_nfs23_done_probe != NULL) {
579		uint32_t probe_id;
580		int probe_procnum;
581
582		if (nmp->nm_flag & NFSMNT_NFSV3) {
583			probe_id = nfsclient_nfs3_done_probes[procnum];
584			probe_procnum = procnum;
585		} else {
586			probe_id = nfsclient_nfs2_done_probes[procnum];
587			probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ?
588			    procnum : nfsv2_procid[procnum];
589		}
590		if (probe_id != 0)
591			(dtrace_nfsclient_nfs23_done_probe)(probe_id, vp,
592			    mreq, cred, probe_procnum, 0);
593	}
594#endif
595	m_freem(mreq);
596	*mrp = mrep;
597	*mdp = md;
598	*dposp = dpos;
599	AUTH_DESTROY(auth);
600	return (0);
601
602nfsmout:
603#ifdef KDTRACE_HOOKS
604	if (dtrace_nfsclient_nfs23_done_probe != NULL) {
605		uint32_t probe_id;
606		int probe_procnum;
607
608		if (nmp->nm_flag & NFSMNT_NFSV3) {
609			probe_id = nfsclient_nfs3_done_probes[procnum];
610			probe_procnum = procnum;
611		} else {
612			probe_id = nfsclient_nfs2_done_probes[procnum];
613			probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ?
614			    procnum : nfsv2_procid[procnum];
615		}
616		if (probe_id != 0)
617			(dtrace_nfsclient_nfs23_done_probe)(probe_id, vp,
618			    mreq, cred, probe_procnum, error);
619	}
620#endif
621	m_freem(mreq);
622	if (auth)
623		AUTH_DESTROY(auth);
624	return (error);
625}
626
627/*
628 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
629 * wait for all requests to complete. This is used by forced unmounts
630 * to terminate any outstanding RPCs.
631 */
632int
633nfs_nmcancelreqs(struct nfsmount *nmp)
634{
635
636	if (nmp->nm_client)
637		CLNT_CLOSE(nmp->nm_client);
638	return (0);
639}
640
641/*
642 * Any signal that can interrupt an NFS operation in an intr mount
643 * should be added to this set. SIGSTOP and SIGKILL cannot be masked.
644 */
645int nfs_sig_set[] = {
646	SIGINT,
647	SIGTERM,
648	SIGHUP,
649	SIGKILL,
650	SIGSTOP,
651	SIGQUIT
652};
653
654/*
655 * Check to see if one of the signals in our subset is pending on
656 * the process (in an intr mount).
657 */
658static int
659nfs_sig_pending(sigset_t set)
660{
661	int i;
662
663	for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++)
664		if (SIGISMEMBER(set, nfs_sig_set[i]))
665			return (1);
666	return (0);
667}
668
669/*
670 * The set/restore sigmask functions are used to (temporarily) overwrite
671 * the process p_sigmask during an RPC call (for example). These are also
672 * used in other places in the NFS client that might tsleep().
673 */
674void
675nfs_set_sigmask(struct thread *td, sigset_t *oldset)
676{
677	sigset_t newset;
678	int i;
679	struct proc *p;
680
681	SIGFILLSET(newset);
682	if (td == NULL)
683		td = curthread; /* XXX */
684	p = td->td_proc;
685	/* Remove the NFS set of signals from newset */
686	PROC_LOCK(p);
687	mtx_lock(&p->p_sigacts->ps_mtx);
688	for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) {
689		/*
690		 * But make sure we leave the ones already masked
691		 * by the process, ie. remove the signal from the
692		 * temporary signalmask only if it wasn't already
693		 * in p_sigmask.
694		 */
695		if (!SIGISMEMBER(td->td_sigmask, nfs_sig_set[i]) &&
696		    !SIGISMEMBER(p->p_sigacts->ps_sigignore, nfs_sig_set[i]))
697			SIGDELSET(newset, nfs_sig_set[i]);
698	}
699	mtx_unlock(&p->p_sigacts->ps_mtx);
700	PROC_UNLOCK(p);
701	kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0);
702}
703
704void
705nfs_restore_sigmask(struct thread *td, sigset_t *set)
706{
707	if (td == NULL)
708		td = curthread; /* XXX */
709	kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0);
710}
711
712/*
713 * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the
714 * old one after msleep() returns.
715 */
716int
717nfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo)
718{
719	sigset_t oldset;
720	int error;
721	struct proc *p;
722
723	if ((priority & PCATCH) == 0)
724		return msleep(ident, mtx, priority, wmesg, timo);
725	if (td == NULL)
726		td = curthread; /* XXX */
727	nfs_set_sigmask(td, &oldset);
728	error = msleep(ident, mtx, priority, wmesg, timo);
729	nfs_restore_sigmask(td, &oldset);
730	p = td->td_proc;
731	return (error);
732}
733
734/*
735 * Test for a termination condition pending on the process.
736 * This is used for NFSMNT_INT mounts.
737 */
738int
739nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td)
740{
741	struct proc *p;
742	sigset_t tmpset;
743
744	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0)
745		return nfs4_sigintr(nmp, rep, td);
746	/* Terminate all requests while attempting a forced unmount. */
747	if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
748		return (EIO);
749	if (!(nmp->nm_flag & NFSMNT_INT))
750		return (0);
751	if (td == NULL)
752		return (0);
753	p = td->td_proc;
754	PROC_LOCK(p);
755	tmpset = p->p_siglist;
756	SIGSETOR(tmpset, td->td_siglist);
757	SIGSETNAND(tmpset, td->td_sigmask);
758	mtx_lock(&p->p_sigacts->ps_mtx);
759	SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
760	mtx_unlock(&p->p_sigacts->ps_mtx);
761	if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist))
762	    && nfs_sig_pending(tmpset)) {
763		PROC_UNLOCK(p);
764		return (EINTR);
765	}
766	PROC_UNLOCK(p);
767	return (0);
768}
769
770static int
771nfs_msg(struct thread *td, const char *server, const char *msg, int error)
772{
773	struct proc *p;
774
775	p = td ? td->td_proc : NULL;
776	if (error) {
777		tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server,
778		    msg, error);
779	} else {
780		tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg);
781	}
782	return (0);
783}
784
785static void
786nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg,
787    int error, int flags)
788{
789	if (nmp == NULL)
790		return;
791	mtx_lock(&nmp->nm_mtx);
792	if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
793		nmp->nm_state |= NFSSTA_TIMEO;
794		mtx_unlock(&nmp->nm_mtx);
795		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
796		    VQ_NOTRESP, 0);
797	} else
798		mtx_unlock(&nmp->nm_mtx);
799	mtx_lock(&nmp->nm_mtx);
800	if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
801		nmp->nm_state |= NFSSTA_LOCKTIMEO;
802		mtx_unlock(&nmp->nm_mtx);
803		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
804		    VQ_NOTRESPLOCK, 0);
805	} else
806		mtx_unlock(&nmp->nm_mtx);
807	nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
808}
809
810static void
811nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg,
812    int flags, int tprintfmsg)
813{
814	if (nmp == NULL)
815		return;
816	if (tprintfmsg) {
817		nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
818	}
819
820	mtx_lock(&nmp->nm_mtx);
821	if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
822		nmp->nm_state &= ~NFSSTA_TIMEO;
823		mtx_unlock(&nmp->nm_mtx);
824		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
825		    VQ_NOTRESP, 1);
826	} else
827		mtx_unlock(&nmp->nm_mtx);
828
829	mtx_lock(&nmp->nm_mtx);
830	if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
831		nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
832		mtx_unlock(&nmp->nm_mtx);
833		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
834		    VQ_NOTRESPLOCK, 1);
835	} else
836		mtx_unlock(&nmp->nm_mtx);
837}
838
839#endif /* !NFS_LEGACYRPC */
840