nfs_krpc.c revision 192686
1/*-
2 * Copyright (c) 1989, 1991, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 *	@(#)nfs_socket.c	8.5 (Berkeley) 3/30/95
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/nfsclient/nfs_krpc.c 192686 2009-05-24 13:22:00Z dfr $");
37
38/*
39 * Socket operations for use by nfs
40 */
41
42#include "opt_inet6.h"
43#include "opt_kdtrace.h"
44#include "opt_kgssapi.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/kernel.h>
49#include <sys/limits.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/mount.h>
54#include <sys/mutex.h>
55#include <sys/proc.h>
56#include <sys/signalvar.h>
57#include <sys/syscallsubr.h>
58#include <sys/sysctl.h>
59#include <sys/syslog.h>
60#include <sys/vnode.h>
61
62#include <rpc/rpc.h>
63
64#include <nfs/rpcv2.h>
65#include <nfs/nfsproto.h>
66#include <nfsclient/nfs.h>
67#include <nfs/xdr_subs.h>
68#include <nfsclient/nfsm_subs.h>
69#include <nfsclient/nfsmount.h>
70#include <nfsclient/nfsnode.h>
71
72#ifndef NFS_LEGACYRPC
73
74#ifdef KDTRACE_HOOKS
75#include <sys/dtrace_bsd.h>
76
77dtrace_nfsclient_nfs23_start_probe_func_t
78    dtrace_nfsclient_nfs23_start_probe;
79
80dtrace_nfsclient_nfs23_done_probe_func_t
81    dtrace_nfsclient_nfs23_done_probe;
82
83/*
84 * Registered probes by RPC type.
85 */
86uint32_t	nfsclient_nfs2_start_probes[NFS_NPROCS];
87uint32_t	nfsclient_nfs2_done_probes[NFS_NPROCS];
88
89uint32_t	nfsclient_nfs3_start_probes[NFS_NPROCS];
90uint32_t	nfsclient_nfs3_done_probes[NFS_NPROCS];
91#endif
92
93static int	nfs_realign_test;
94static int	nfs_realign_count;
95static int	nfs_bufpackets = 4;
96static int	nfs_reconnects;
97static int	nfs3_jukebox_delay = 10;
98static int	nfs_skip_wcc_data_onerr = 1;
99static int	fake_wchan;
100
101SYSCTL_DECL(_vfs_nfs);
102
103SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0,
104    "Number of realign tests done");
105SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0,
106    "Number of mbuf realignments done");
107SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0,
108    "Buffer reservation size 2 < x < 64");
109SYSCTL_INT(_vfs_nfs, OID_AUTO, reconnects, CTLFLAG_RD, &nfs_reconnects, 0,
110    "Number of times the nfs client has had to reconnect");
111SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs3_jukebox_delay, CTLFLAG_RW, &nfs3_jukebox_delay, 0,
112    "Number of seconds to delay a retry after receiving EJUKEBOX");
113SYSCTL_INT(_vfs_nfs, OID_AUTO, skip_wcc_data_onerr, CTLFLAG_RW, &nfs_skip_wcc_data_onerr, 0,
114    "Disable weak cache consistency checking when server returns an error");
115
116static void	nfs_down(struct nfsmount *, struct thread *, const char *,
117    int, int);
118static void	nfs_up(struct nfsmount *, struct thread *, const char *,
119    int, int);
120static int	nfs_msg(struct thread *, const char *, const char *, int);
121
122extern int nfsv2_procid[];
123
124struct nfs_cached_auth {
125	int		ca_refs; /* refcount, including 1 from the cache */
126	uid_t		ca_uid;	 /* uid that corresponds to this auth */
127	AUTH		*ca_auth; /* RPC auth handle */
128};
129
130/*
131 * RTT estimator
132 */
133
134static enum nfs_rto_timer_t nfs_proct[NFS_NPROCS] = {
135	NFS_DEFAULT_TIMER,	/* NULL */
136	NFS_GETATTR_TIMER,	/* GETATTR */
137	NFS_DEFAULT_TIMER,	/* SETATTR */
138	NFS_LOOKUP_TIMER,	/* LOOKUP */
139	NFS_GETATTR_TIMER,	/* ACCESS */
140	NFS_READ_TIMER,		/* READLINK */
141	NFS_READ_TIMER,		/* READ */
142	NFS_WRITE_TIMER,	/* WRITE */
143	NFS_DEFAULT_TIMER,	/* CREATE */
144	NFS_DEFAULT_TIMER,	/* MKDIR */
145	NFS_DEFAULT_TIMER,	/* SYMLINK */
146	NFS_DEFAULT_TIMER,	/* MKNOD */
147	NFS_DEFAULT_TIMER,	/* REMOVE */
148	NFS_DEFAULT_TIMER,	/* RMDIR */
149	NFS_DEFAULT_TIMER,	/* RENAME */
150	NFS_DEFAULT_TIMER,	/* LINK */
151	NFS_READ_TIMER,		/* READDIR */
152	NFS_READ_TIMER,		/* READDIRPLUS */
153	NFS_DEFAULT_TIMER,	/* FSSTAT */
154	NFS_DEFAULT_TIMER,	/* FSINFO */
155	NFS_DEFAULT_TIMER,	/* PATHCONF */
156	NFS_DEFAULT_TIMER,	/* COMMIT */
157	NFS_DEFAULT_TIMER,	/* NOOP */
158};
159
160/*
161 * Choose the correct RTT timer for this NFS procedure.
162 */
163static inline enum nfs_rto_timer_t
164nfs_rto_timer(u_int32_t procnum)
165{
166	return nfs_proct[procnum];
167}
168
169/*
170 * Initialize the RTT estimator state for a new mount point.
171 */
172static void
173nfs_init_rtt(struct nfsmount *nmp)
174{
175	int i;
176
177	for (i = 0; i < NFS_MAX_TIMER; i++) {
178		nmp->nm_timers[i].rt_srtt = hz;
179		nmp->nm_timers[i].rt_deviate = 0;
180		nmp->nm_timers[i].rt_rtxcur = hz;
181	}
182}
183
184/*
185 * Initialize sockets and congestion for a new NFS connection.
186 * We do not free the sockaddr if error.
187 */
188int
189nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
190{
191	int rcvreserve, sndreserve;
192	int pktscale;
193	struct sockaddr *saddr;
194	struct ucred *origcred;
195	struct thread *td = curthread;
196	CLIENT *client;
197	struct netconfig *nconf;
198	rpcvers_t vers;
199	int one = 1, retries;
200
201	/*
202	 * We need to establish the socket using the credentials of
203	 * the mountpoint.  Some parts of this process (such as
204	 * sobind() and soconnect()) will use the curent thread's
205	 * credential instead of the socket credential.  To work
206	 * around this, temporarily change the current thread's
207	 * credential to that of the mountpoint.
208	 *
209	 * XXX: It would be better to explicitly pass the correct
210	 * credential to sobind() and soconnect().
211	 */
212	origcred = td->td_ucred;
213	td->td_ucred = nmp->nm_mountp->mnt_cred;
214	saddr = nmp->nm_nam;
215
216	vers = NFS_VER2;
217	if (nmp->nm_flag & NFSMNT_NFSV3)
218		vers = NFS_VER3;
219	else if (nmp->nm_flag & NFSMNT_NFSV4)
220		vers = NFS_VER4;
221	if (saddr->sa_family == AF_INET)
222		if (nmp->nm_sotype == SOCK_DGRAM)
223			nconf = getnetconfigent("udp");
224		else
225			nconf = getnetconfigent("tcp");
226	else
227		if (nmp->nm_sotype == SOCK_DGRAM)
228			nconf = getnetconfigent("udp6");
229		else
230			nconf = getnetconfigent("tcp6");
231
232	/*
233	 * Get buffer reservation size from sysctl, but impose reasonable
234	 * limits.
235	 */
236	pktscale = nfs_bufpackets;
237	if (pktscale < 2)
238		pktscale = 2;
239	if (pktscale > 64)
240		pktscale = 64;
241	mtx_lock(&nmp->nm_mtx);
242	if (nmp->nm_sotype == SOCK_DGRAM) {
243		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
244		rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
245		    NFS_MAXPKTHDR) * pktscale;
246	} else if (nmp->nm_sotype == SOCK_SEQPACKET) {
247		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
248		rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
249		    NFS_MAXPKTHDR) * pktscale;
250	} else {
251		if (nmp->nm_sotype != SOCK_STREAM)
252			panic("nfscon sotype");
253		sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
254		    sizeof (u_int32_t)) * pktscale;
255		rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
256		    sizeof (u_int32_t)) * pktscale;
257	}
258	mtx_unlock(&nmp->nm_mtx);
259
260	client = clnt_reconnect_create(nconf, saddr, NFS_PROG, vers,
261	    sndreserve, rcvreserve);
262	CLNT_CONTROL(client, CLSET_WAITCHAN, "nfsreq");
263	if (nmp->nm_flag & NFSMNT_INT)
264		CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one);
265	if (nmp->nm_flag & NFSMNT_RESVPORT)
266		CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
267	if (nmp->nm_flag & NFSMNT_SOFT)
268		retries = nmp->nm_retry;
269	else
270		retries = INT_MAX;
271	CLNT_CONTROL(client, CLSET_RETRIES, &retries);
272
273	mtx_lock(&nmp->nm_mtx);
274	if (nmp->nm_client) {
275		/*
276		 * Someone else already connected.
277		 */
278		CLNT_RELEASE(client);
279	} else {
280		nmp->nm_client = client;
281	}
282
283	/*
284	 * Protocols that do not require connections may be optionally left
285	 * unconnected for servers that reply from a port other than NFS_PORT.
286	 */
287	if (!(nmp->nm_flag & NFSMNT_NOCONN)) {
288		mtx_unlock(&nmp->nm_mtx);
289		CLNT_CONTROL(client, CLSET_CONNECT, &one);
290	} else {
291		mtx_unlock(&nmp->nm_mtx);
292	}
293
294	/* Restore current thread's credentials. */
295	td->td_ucred = origcred;
296
297	mtx_lock(&nmp->nm_mtx);
298	/* Initialize other non-zero congestion variables */
299	nfs_init_rtt(nmp);
300	mtx_unlock(&nmp->nm_mtx);
301	return (0);
302}
303
304/*
305 * NFS disconnect. Clean up and unlink.
306 */
307void
308nfs_disconnect(struct nfsmount *nmp)
309{
310	CLIENT *client;
311
312	mtx_lock(&nmp->nm_mtx);
313	if (nmp->nm_client) {
314		client = nmp->nm_client;
315		nmp->nm_client = NULL;
316		mtx_unlock(&nmp->nm_mtx);
317#ifdef KGSSAPI
318		rpc_gss_secpurge(client);
319#endif
320		CLNT_CLOSE(client);
321		CLNT_RELEASE(client);
322	} else {
323		mtx_unlock(&nmp->nm_mtx);
324	}
325}
326
327void
328nfs_safedisconnect(struct nfsmount *nmp)
329{
330
331	nfs_disconnect(nmp);
332}
333
334static AUTH *
335nfs_getauth(struct nfsmount *nmp, struct ucred *cred)
336{
337#ifdef KGSSAPI
338	rpc_gss_service_t svc;
339	AUTH *auth;
340#endif
341
342	switch (nmp->nm_secflavor) {
343#ifdef KGSSAPI
344	case RPCSEC_GSS_KRB5:
345	case RPCSEC_GSS_KRB5I:
346	case RPCSEC_GSS_KRB5P:
347		if (!nmp->nm_mech_oid) {
348			if (!rpc_gss_mech_to_oid("kerberosv5",
349				&nmp->nm_mech_oid))
350				return (NULL);
351		}
352		if (nmp->nm_secflavor == RPCSEC_GSS_KRB5)
353			svc = rpc_gss_svc_none;
354		else if (nmp->nm_secflavor == RPCSEC_GSS_KRB5I)
355			svc = rpc_gss_svc_integrity;
356		else
357			svc = rpc_gss_svc_privacy;
358		auth = rpc_gss_secfind(nmp->nm_client, cred,
359		    nmp->nm_principal, nmp->nm_mech_oid, svc);
360		if (auth)
361			return (auth);
362		/* fallthrough */
363#endif
364	case AUTH_SYS:
365	default:
366		return (authunix_create(cred));
367
368	}
369}
370
371/*
372 * Callback from the RPC code to generate up/down notifications.
373 */
374
375struct nfs_feedback_arg {
376	struct nfsmount *nf_mount;
377	int		nf_lastmsg;	/* last tprintf */
378	int		nf_tprintfmsg;
379	struct thread	*nf_td;
380};
381
382static void
383nfs_feedback(int type, int proc, void *arg)
384{
385	struct nfs_feedback_arg *nf = (struct nfs_feedback_arg *) arg;
386	struct nfsmount *nmp = nf->nf_mount;
387	struct timeval now;
388
389	getmicrouptime(&now);
390
391	switch (type) {
392	case FEEDBACK_REXMIT2:
393	case FEEDBACK_RECONNECT:
394		if (nf->nf_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) {
395			nfs_down(nmp, nf->nf_td,
396			    "not responding", 0, NFSSTA_TIMEO);
397			nf->nf_tprintfmsg = TRUE;
398			nf->nf_lastmsg = now.tv_sec;
399		}
400		break;
401
402	case FEEDBACK_OK:
403		nfs_up(nf->nf_mount, nf->nf_td,
404		    "is alive again", NFSSTA_TIMEO, nf->nf_tprintfmsg);
405		break;
406	}
407}
408
409/*
410 *	nfs_realign:
411 *
412 *	Check for badly aligned mbuf data and realign by copying the unaligned
413 *	portion of the data into a new mbuf chain and freeing the portions
414 *	of the old chain that were replaced.
415 *
416 *	We cannot simply realign the data within the existing mbuf chain
417 *	because the underlying buffers may contain other rpc commands and
418 *	we cannot afford to overwrite them.
419 *
420 *	We would prefer to avoid this situation entirely.  The situation does
421 *	not occur with NFS/UDP and is supposed to only occassionally occur
422 *	with TCP.  Use vfs.nfs.realign_count and realign_test to check this.
423 *
424 */
425static int
426nfs_realign(struct mbuf **pm, int hsiz)
427{
428	struct mbuf *m, *n;
429	int off, space;
430
431	++nfs_realign_test;
432	while ((m = *pm) != NULL) {
433		if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
434			/*
435			 * NB: we can't depend on m_pkthdr.len to help us
436			 * decide what to do here.  May not be worth doing
437			 * the m_length calculation as m_copyback will
438			 * expand the mbuf chain below as needed.
439			 */
440			space = m_length(m, NULL);
441			if (space >= MINCLSIZE) {
442				/* NB: m_copyback handles space > MCLBYTES */
443				n = m_getcl(M_DONTWAIT, MT_DATA, 0);
444			} else
445				n = m_get(M_DONTWAIT, MT_DATA);
446			if (n == NULL)
447				return (ENOMEM);
448			/*
449			 * Align the remainder of the mbuf chain.
450			 */
451			n->m_len = 0;
452			off = 0;
453			while (m != NULL) {
454				m_copyback(n, off, m->m_len, mtod(m, caddr_t));
455				off += m->m_len;
456				m = m->m_next;
457			}
458			m_freem(*pm);
459			*pm = n;
460			++nfs_realign_count;
461			break;
462		}
463		pm = &m->m_next;
464	}
465	return (0);
466}
467
468/*
469 * nfs_request - goes something like this
470 *	- fill in request struct
471 *	- links it into list
472 *	- calls nfs_send() for first transmit
473 *	- calls nfs_receive() to get reply
474 *	- break down rpc header and return with nfs reply pointed to
475 *	  by mrep or error
476 * nb: always frees up mreq mbuf list
477 */
478int
479nfs_request(struct vnode *vp, struct mbuf *mreq, int procnum,
480    struct thread *td, struct ucred *cred, struct mbuf **mrp,
481    struct mbuf **mdp, caddr_t *dposp)
482{
483	struct mbuf *mrep;
484	u_int32_t *tl;
485	struct nfsmount *nmp;
486	struct mbuf *md;
487	time_t waituntil;
488	caddr_t dpos;
489	int error = 0;
490	struct timeval now;
491	AUTH *auth = NULL;
492	enum nfs_rto_timer_t timer;
493	struct nfs_feedback_arg nf;
494	struct rpc_callextra ext;
495	enum clnt_stat stat;
496	struct timeval timo;
497
498	/* Reject requests while attempting a forced unmount. */
499	if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) {
500		m_freem(mreq);
501		return (ESTALE);
502	}
503	nmp = VFSTONFS(vp->v_mount);
504	bzero(&nf, sizeof(struct nfs_feedback_arg));
505	nf.nf_mount = nmp;
506	nf.nf_td = td;
507	getmicrouptime(&now);
508	nf.nf_lastmsg = now.tv_sec -
509		((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
510
511	/*
512	 * XXX if not already connected call nfs_connect now. Longer
513	 * term, change nfs_mount to call nfs_connect unconditionally
514	 * and let clnt_reconnect_create handle reconnects.
515	 */
516	if (!nmp->nm_client)
517		nfs_connect(nmp, NULL);
518
519	auth = nfs_getauth(nmp, cred);
520	if (!auth) {
521		m_freem(mreq);
522		return (EACCES);
523	}
524	bzero(&ext, sizeof(ext));
525	ext.rc_auth = auth;
526
527	ext.rc_feedback = nfs_feedback;
528	ext.rc_feedback_arg = &nf;
529
530	/*
531	 * Use a conservative timeout for RPCs other than getattr,
532	 * lookup, read or write. The justification for doing "other"
533	 * this way is that these RPCs happen so infrequently that
534	 * timer est. would probably be stale.  Also, since many of
535	 * these RPCs are non-idempotent, a conservative timeout is
536	 * desired.
537	 */
538	timer = nfs_rto_timer(procnum);
539	if (timer != NFS_DEFAULT_TIMER) {
540		ext.rc_timers = &nmp->nm_timers[timer - 1];
541	} else {
542		ext.rc_timers = NULL;
543	}
544
545#ifdef KDTRACE_HOOKS
546	if (dtrace_nfsclient_nfs23_start_probe != NULL) {
547		uint32_t probe_id;
548		int probe_procnum;
549
550		if (nmp->nm_flag & NFSMNT_NFSV3) {
551			probe_id = nfsclient_nfs3_start_probes[procnum];
552			probe_procnum = procnum;
553		} else {
554			probe_id = nfsclient_nfs2_start_probes[procnum];
555			probe_procnum = nfsv2_procid[procnum];
556		}
557		if (probe_id != 0)
558			(dtrace_nfsclient_nfs23_start_probe)(probe_id, vp,
559			    mreq, cred, probe_procnum);
560	}
561#endif
562
563	nfsstats.rpcrequests++;
564tryagain:
565	timo.tv_sec = nmp->nm_timeo / NFS_HZ;
566	timo.tv_usec = (nmp->nm_timeo * 1000000) / NFS_HZ;
567	mrep = NULL;
568	stat = CLNT_CALL_MBUF(nmp->nm_client, &ext,
569	    (nmp->nm_flag & NFSMNT_NFSV3) ? procnum : nfsv2_procid[procnum],
570	    mreq, &mrep, timo);
571
572	/*
573	 * If there was a successful reply and a tprintf msg.
574	 * tprintf a response.
575	 */
576	if (stat == RPC_SUCCESS) {
577		error = 0;
578	} else if (stat == RPC_TIMEDOUT) {
579		error = ETIMEDOUT;
580	} else if (stat == RPC_VERSMISMATCH) {
581		error = EOPNOTSUPP;
582	} else if (stat == RPC_PROGVERSMISMATCH) {
583		error = EPROTONOSUPPORT;
584	} else {
585		error = EACCES;
586	}
587	if (error)
588		goto nfsmout;
589
590	KASSERT(mrep != NULL, ("mrep shouldn't be NULL if no error\n"));
591
592	/*
593	 * Search for any mbufs that are not a multiple of 4 bytes long
594	 * or with m_data not longword aligned.
595	 * These could cause pointer alignment problems, so copy them to
596	 * well aligned mbufs.
597	 */
598	error = nfs_realign(&mrep, 2 * NFSX_UNSIGNED);
599	if (error == ENOMEM) {
600		m_freem(mrep);
601		AUTH_DESTROY(auth);
602		return (error);
603	}
604
605	md = mrep;
606	dpos = mtod(mrep, caddr_t);
607	tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
608	if (*tl != 0) {
609		error = fxdr_unsigned(int, *tl);
610		if ((nmp->nm_flag & NFSMNT_NFSV3) &&
611		    error == NFSERR_TRYLATER) {
612			m_freem(mrep);
613			error = 0;
614			waituntil = time_second + nfs3_jukebox_delay;
615			while (time_second < waituntil) {
616				(void) tsleep(&fake_wchan, PSOCK, "nqnfstry", hz);
617			}
618			goto tryagain;
619		}
620
621		/*
622		 * If the File Handle was stale, invalidate the lookup
623		 * cache, just in case.
624		 */
625		if (error == ESTALE)
626			nfs_purgecache(vp);
627		/*
628		 * Skip wcc data on NFS errors for now. NetApp filers
629		 * return corrupt postop attrs in the wcc data for NFS
630		 * err EROFS. Not sure if they could return corrupt
631		 * postop attrs for others errors.
632		 */
633		if ((nmp->nm_flag & NFSMNT_NFSV3) && !nfs_skip_wcc_data_onerr) {
634			*mrp = mrep;
635			*mdp = md;
636			*dposp = dpos;
637			error |= NFSERR_RETERR;
638		} else
639			m_freem(mrep);
640		goto nfsmout;
641	}
642
643#ifdef KDTRACE_HOOKS
644	if (dtrace_nfsclient_nfs23_done_probe != NULL) {
645		uint32_t probe_id;
646		int probe_procnum;
647
648		if (nmp->nm_flag & NFSMNT_NFSV3) {
649			probe_id = nfsclient_nfs3_done_probes[procnum];
650			probe_procnum = procnum;
651		} else {
652			probe_id = nfsclient_nfs2_done_probes[procnum];
653			probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ?
654			    procnum : nfsv2_procid[procnum];
655		}
656		if (probe_id != 0)
657			(dtrace_nfsclient_nfs23_done_probe)(probe_id, vp,
658			    mreq, cred, probe_procnum, 0);
659	}
660#endif
661	m_freem(mreq);
662	*mrp = mrep;
663	*mdp = md;
664	*dposp = dpos;
665	AUTH_DESTROY(auth);
666	return (0);
667
668nfsmout:
669#ifdef KDTRACE_HOOKS
670	if (dtrace_nfsclient_nfs23_done_probe != NULL) {
671		uint32_t probe_id;
672		int probe_procnum;
673
674		if (nmp->nm_flag & NFSMNT_NFSV3) {
675			probe_id = nfsclient_nfs3_done_probes[procnum];
676			probe_procnum = procnum;
677		} else {
678			probe_id = nfsclient_nfs2_done_probes[procnum];
679			probe_procnum = (nmp->nm_flag & NFSMNT_NFSV3) ?
680			    procnum : nfsv2_procid[procnum];
681		}
682		if (probe_id != 0)
683			(dtrace_nfsclient_nfs23_done_probe)(probe_id, vp,
684			    mreq, cred, probe_procnum, error);
685	}
686#endif
687	m_freem(mreq);
688	if (auth)
689		AUTH_DESTROY(auth);
690	return (error);
691}
692
693/*
694 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
695 * wait for all requests to complete. This is used by forced unmounts
696 * to terminate any outstanding RPCs.
697 */
698int
699nfs_nmcancelreqs(struct nfsmount *nmp)
700{
701
702	if (nmp->nm_client)
703		CLNT_CLOSE(nmp->nm_client);
704	return (0);
705}
706
707/*
708 * Any signal that can interrupt an NFS operation in an intr mount
709 * should be added to this set. SIGSTOP and SIGKILL cannot be masked.
710 */
711int nfs_sig_set[] = {
712	SIGINT,
713	SIGTERM,
714	SIGHUP,
715	SIGKILL,
716	SIGSTOP,
717	SIGQUIT
718};
719
720/*
721 * Check to see if one of the signals in our subset is pending on
722 * the process (in an intr mount).
723 */
724static int
725nfs_sig_pending(sigset_t set)
726{
727	int i;
728
729	for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++)
730		if (SIGISMEMBER(set, nfs_sig_set[i]))
731			return (1);
732	return (0);
733}
734
735/*
736 * The set/restore sigmask functions are used to (temporarily) overwrite
737 * the process p_sigmask during an RPC call (for example). These are also
738 * used in other places in the NFS client that might tsleep().
739 */
740void
741nfs_set_sigmask(struct thread *td, sigset_t *oldset)
742{
743	sigset_t newset;
744	int i;
745	struct proc *p;
746
747	SIGFILLSET(newset);
748	if (td == NULL)
749		td = curthread; /* XXX */
750	p = td->td_proc;
751	/* Remove the NFS set of signals from newset */
752	PROC_LOCK(p);
753	mtx_lock(&p->p_sigacts->ps_mtx);
754	for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) {
755		/*
756		 * But make sure we leave the ones already masked
757		 * by the process, ie. remove the signal from the
758		 * temporary signalmask only if it wasn't already
759		 * in p_sigmask.
760		 */
761		if (!SIGISMEMBER(td->td_sigmask, nfs_sig_set[i]) &&
762		    !SIGISMEMBER(p->p_sigacts->ps_sigignore, nfs_sig_set[i]))
763			SIGDELSET(newset, nfs_sig_set[i]);
764	}
765	mtx_unlock(&p->p_sigacts->ps_mtx);
766	PROC_UNLOCK(p);
767	kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0);
768}
769
770void
771nfs_restore_sigmask(struct thread *td, sigset_t *set)
772{
773	if (td == NULL)
774		td = curthread; /* XXX */
775	kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0);
776}
777
778/*
779 * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the
780 * old one after msleep() returns.
781 */
782int
783nfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo)
784{
785	sigset_t oldset;
786	int error;
787	struct proc *p;
788
789	if ((priority & PCATCH) == 0)
790		return msleep(ident, mtx, priority, wmesg, timo);
791	if (td == NULL)
792		td = curthread; /* XXX */
793	nfs_set_sigmask(td, &oldset);
794	error = msleep(ident, mtx, priority, wmesg, timo);
795	nfs_restore_sigmask(td, &oldset);
796	p = td->td_proc;
797	return (error);
798}
799
800/*
801 * Test for a termination condition pending on the process.
802 * This is used for NFSMNT_INT mounts.
803 */
804int
805nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td)
806{
807	struct proc *p;
808	sigset_t tmpset;
809
810	/* Terminate all requests while attempting a forced unmount. */
811	if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
812		return (EIO);
813	if (!(nmp->nm_flag & NFSMNT_INT))
814		return (0);
815	if (td == NULL)
816		return (0);
817	p = td->td_proc;
818	PROC_LOCK(p);
819	tmpset = p->p_siglist;
820	SIGSETOR(tmpset, td->td_siglist);
821	SIGSETNAND(tmpset, td->td_sigmask);
822	mtx_lock(&p->p_sigacts->ps_mtx);
823	SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
824	mtx_unlock(&p->p_sigacts->ps_mtx);
825	if ((SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist))
826	    && nfs_sig_pending(tmpset)) {
827		PROC_UNLOCK(p);
828		return (EINTR);
829	}
830	PROC_UNLOCK(p);
831	return (0);
832}
833
834static int
835nfs_msg(struct thread *td, const char *server, const char *msg, int error)
836{
837	struct proc *p;
838
839	p = td ? td->td_proc : NULL;
840	if (error) {
841		tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server,
842		    msg, error);
843	} else {
844		tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg);
845	}
846	return (0);
847}
848
849static void
850nfs_down(struct nfsmount *nmp, struct thread *td, const char *msg,
851    int error, int flags)
852{
853	if (nmp == NULL)
854		return;
855	mtx_lock(&nmp->nm_mtx);
856	if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) {
857		nmp->nm_state |= NFSSTA_TIMEO;
858		mtx_unlock(&nmp->nm_mtx);
859		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
860		    VQ_NOTRESP, 0);
861	} else
862		mtx_unlock(&nmp->nm_mtx);
863	mtx_lock(&nmp->nm_mtx);
864	if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
865		nmp->nm_state |= NFSSTA_LOCKTIMEO;
866		mtx_unlock(&nmp->nm_mtx);
867		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
868		    VQ_NOTRESPLOCK, 0);
869	} else
870		mtx_unlock(&nmp->nm_mtx);
871	nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
872}
873
874static void
875nfs_up(struct nfsmount *nmp, struct thread *td, const char *msg,
876    int flags, int tprintfmsg)
877{
878	if (nmp == NULL)
879		return;
880	if (tprintfmsg) {
881		nfs_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
882	}
883
884	mtx_lock(&nmp->nm_mtx);
885	if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) {
886		nmp->nm_state &= ~NFSSTA_TIMEO;
887		mtx_unlock(&nmp->nm_mtx);
888		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
889		    VQ_NOTRESP, 1);
890	} else
891		mtx_unlock(&nmp->nm_mtx);
892
893	mtx_lock(&nmp->nm_mtx);
894	if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) {
895		nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
896		mtx_unlock(&nmp->nm_mtx);
897		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
898		    VQ_NOTRESPLOCK, 1);
899	} else
900		mtx_unlock(&nmp->nm_mtx);
901}
902
903#endif /* !NFS_LEGACYRPC */
904