nfs_srvsocket.c revision 1.1
1/*	$NetBSD: nfs_srvsocket.c,v 1.1 2008/11/19 18:36:09 ad Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1991, 1993, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)nfs_socket.c	8.5 (Berkeley) 3/30/95
35 */
36
37/*
38 * Socket operations for use by nfs
39 */
40
41#include <sys/cdefs.h>
42__KERNEL_RCSID(0, "$NetBSD: nfs_srvsocket.c,v 1.1 2008/11/19 18:36:09 ad Exp $");
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/evcnt.h>
47#include <sys/callout.h>
48#include <sys/proc.h>
49#include <sys/mount.h>
50#include <sys/kernel.h>
51#include <sys/kmem.h>
52#include <sys/mbuf.h>
53#include <sys/vnode.h>
54#include <sys/domain.h>
55#include <sys/protosw.h>
56#include <sys/socket.h>
57#include <sys/socketvar.h>
58#include <sys/syslog.h>
59#include <sys/tprintf.h>
60#include <sys/namei.h>
61#include <sys/signal.h>
62#include <sys/signalvar.h>
63#include <sys/kauth.h>
64
65#include <netinet/in.h>
66#include <netinet/tcp.h>
67
68#include <nfs/rpcv2.h>
69#include <nfs/nfsproto.h>
70#include <nfs/nfs.h>
71#include <nfs/xdr_subs.h>
72#include <nfs/nfsm_subs.h>
73#include <nfs/nfsmount.h>
74#include <nfs/nfsnode.h>
75#include <nfs/nfsrtt.h>
76#include <nfs/nfs_var.h>
77
78static void nfsrv_wakenfsd_locked(struct nfssvc_sock *);
79
80int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *,
81				    struct nfssvc_sock *, struct lwp *,
82				    struct mbuf **)) = {
83	nfsrv_null,
84	nfsrv_getattr,
85	nfsrv_setattr,
86	nfsrv_lookup,
87	nfsrv3_access,
88	nfsrv_readlink,
89	nfsrv_read,
90	nfsrv_write,
91	nfsrv_create,
92	nfsrv_mkdir,
93	nfsrv_symlink,
94	nfsrv_mknod,
95	nfsrv_remove,
96	nfsrv_rmdir,
97	nfsrv_rename,
98	nfsrv_link,
99	nfsrv_readdir,
100	nfsrv_readdirplus,
101	nfsrv_statfs,
102	nfsrv_fsinfo,
103	nfsrv_pathconf,
104	nfsrv_commit,
105	nfsrv_noop
106};
107
108/*
109 * Socket upcall routine for the nfsd sockets.
110 * The void *arg is a pointer to the "struct nfssvc_sock".
111 */
112void
113nfsrv_soupcall(struct socket *so, void *arg, int waitflag)
114{
115	struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
116
117	nfsdsock_setbits(slp, SLP_A_NEEDQ);
118	nfsrv_wakenfsd(slp);
119}
120
121void
122nfsrv_rcv(struct nfssvc_sock *slp)
123{
124	struct socket *so;
125	struct mbuf *m;
126	struct mbuf *mp, *nam;
127	struct uio auio;
128	int flags;
129	int error;
130	int setflags = 0;
131
132	error = nfsdsock_lock(slp, true);
133	if (error) {
134		setflags |= SLP_A_NEEDQ;
135		goto dorecs_unlocked;
136	}
137
138	nfsdsock_clearbits(slp, SLP_A_NEEDQ);
139
140	so = slp->ns_so;
141	if (so->so_type == SOCK_STREAM) {
142		/*
143		 * Do soreceive().
144		 */
145		auio.uio_resid = 1000000000;
146		/* not need to setup uio_vmspace */
147		flags = MSG_DONTWAIT;
148		error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, &flags);
149		if (error || mp == NULL) {
150			if (error == EWOULDBLOCK)
151				setflags |= SLP_A_NEEDQ;
152			else
153				setflags |= SLP_A_DISCONN;
154			goto dorecs;
155		}
156		m = mp;
157		m_claimm(m, &nfs_mowner);
158		if (slp->ns_rawend) {
159			slp->ns_rawend->m_next = m;
160			slp->ns_cc += 1000000000 - auio.uio_resid;
161		} else {
162			slp->ns_raw = m;
163			slp->ns_cc = 1000000000 - auio.uio_resid;
164		}
165		while (m->m_next)
166			m = m->m_next;
167		slp->ns_rawend = m;
168
169		/*
170		 * Now try and parse record(s) out of the raw stream data.
171		 */
172		error = nfsrv_getstream(slp, M_WAIT);
173		if (error) {
174			if (error == EPERM)
175				setflags |= SLP_A_DISCONN;
176			else
177				setflags |= SLP_A_NEEDQ;
178		}
179	} else {
180		do {
181			auio.uio_resid = 1000000000;
182			/* not need to setup uio_vmspace */
183			flags = MSG_DONTWAIT;
184			error = (*so->so_receive)(so, &nam, &auio, &mp, NULL,
185			    &flags);
186			if (mp) {
187				if (nam) {
188					m = nam;
189					m->m_next = mp;
190				} else
191					m = mp;
192				m_claimm(m, &nfs_mowner);
193				if (slp->ns_recend)
194					slp->ns_recend->m_nextpkt = m;
195				else
196					slp->ns_rec = m;
197				slp->ns_recend = m;
198				m->m_nextpkt = (struct mbuf *)0;
199			}
200			if (error) {
201				if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
202				    && error != EWOULDBLOCK) {
203					setflags |= SLP_A_DISCONN;
204					goto dorecs;
205				}
206			}
207		} while (mp);
208	}
209dorecs:
210	nfsdsock_unlock(slp);
211
212dorecs_unlocked:
213	if (setflags) {
214		nfsdsock_setbits(slp, setflags);
215	}
216}
217
218int
219nfsdsock_lock(struct nfssvc_sock *slp, bool waitok)
220{
221
222	mutex_enter(&slp->ns_lock);
223	while ((~slp->ns_flags & (SLP_BUSY|SLP_VALID)) == 0) {
224		if (!waitok) {
225			mutex_exit(&slp->ns_lock);
226			return EWOULDBLOCK;
227		}
228		cv_wait(&slp->ns_cv, &slp->ns_lock);
229	}
230	if ((slp->ns_flags & SLP_VALID) == 0) {
231		mutex_exit(&slp->ns_lock);
232		return EINVAL;
233	}
234	KASSERT((slp->ns_flags & SLP_BUSY) == 0);
235	slp->ns_flags |= SLP_BUSY;
236	mutex_exit(&slp->ns_lock);
237
238	return 0;
239}
240
241void
242nfsdsock_unlock(struct nfssvc_sock *slp)
243{
244
245	mutex_enter(&slp->ns_lock);
246	KASSERT((slp->ns_flags & SLP_BUSY) != 0);
247	cv_broadcast(&slp->ns_cv);
248	slp->ns_flags &= ~SLP_BUSY;
249	mutex_exit(&slp->ns_lock);
250}
251
252int
253nfsdsock_drain(struct nfssvc_sock *slp)
254{
255	int error = 0;
256
257	mutex_enter(&slp->ns_lock);
258	if ((slp->ns_flags & SLP_VALID) == 0) {
259		error = EINVAL;
260		goto done;
261	}
262	slp->ns_flags &= ~SLP_VALID;
263	while ((slp->ns_flags & SLP_BUSY) != 0) {
264		cv_wait(&slp->ns_cv, &slp->ns_lock);
265	}
266done:
267	mutex_exit(&slp->ns_lock);
268
269	return error;
270}
271
272/*
273 * Try and extract an RPC request from the mbuf data list received on a
274 * stream socket. The "waitflag" argument indicates whether or not it
275 * can sleep.
276 */
277int
278nfsrv_getstream(slp, waitflag)
279	struct nfssvc_sock *slp;
280	int waitflag;
281{
282	struct mbuf *m, **mpp;
283	struct mbuf *recm;
284	u_int32_t recmark;
285	int error = 0;
286
287	KASSERT((slp->ns_flags & SLP_BUSY) != 0);
288	for (;;) {
289		if (slp->ns_reclen == 0) {
290			if (slp->ns_cc < NFSX_UNSIGNED) {
291				break;
292			}
293			m = slp->ns_raw;
294			m_copydata(m, 0, NFSX_UNSIGNED, (void *)&recmark);
295			m_adj(m, NFSX_UNSIGNED);
296			slp->ns_cc -= NFSX_UNSIGNED;
297			recmark = ntohl(recmark);
298			slp->ns_reclen = recmark & ~0x80000000;
299			if (recmark & 0x80000000)
300				slp->ns_sflags |= SLP_S_LASTFRAG;
301			else
302				slp->ns_sflags &= ~SLP_S_LASTFRAG;
303			if (slp->ns_reclen > NFS_MAXPACKET) {
304				error = EPERM;
305				break;
306			}
307		}
308
309		/*
310		 * Now get the record part.
311		 *
312		 * Note that slp->ns_reclen may be 0.  Linux sometimes
313		 * generates 0-length records.
314		 */
315		if (slp->ns_cc == slp->ns_reclen) {
316			recm = slp->ns_raw;
317			slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
318			slp->ns_cc = slp->ns_reclen = 0;
319		} else if (slp->ns_cc > slp->ns_reclen) {
320			recm = slp->ns_raw;
321			m = m_split(recm, slp->ns_reclen, waitflag);
322			if (m == NULL) {
323				error = EWOULDBLOCK;
324				break;
325			}
326			m_claimm(recm, &nfs_mowner);
327			slp->ns_raw = m;
328			if (m->m_next == NULL)
329				slp->ns_rawend = m;
330			slp->ns_cc -= slp->ns_reclen;
331			slp->ns_reclen = 0;
332		} else {
333			break;
334		}
335
336		/*
337		 * Accumulate the fragments into a record.
338		 */
339		mpp = &slp->ns_frag;
340		while (*mpp)
341			mpp = &((*mpp)->m_next);
342		*mpp = recm;
343		if (slp->ns_sflags & SLP_S_LASTFRAG) {
344			if (slp->ns_recend)
345				slp->ns_recend->m_nextpkt = slp->ns_frag;
346			else
347				slp->ns_rec = slp->ns_frag;
348			slp->ns_recend = slp->ns_frag;
349			slp->ns_frag = NULL;
350		}
351	}
352
353	return error;
354}
355
356/*
357 * Parse an RPC header.
358 */
359int
360nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
361    struct nfsrv_descript **ndp, bool *more)
362{
363	struct mbuf *m, *nam;
364	struct nfsrv_descript *nd;
365	int error;
366
367	*ndp = NULL;
368	*more = false;
369
370	if (nfsdsock_lock(slp, true)) {
371		return ENOBUFS;
372	}
373	m = slp->ns_rec;
374	if (m == NULL) {
375		nfsdsock_unlock(slp);
376		return ENOBUFS;
377	}
378	slp->ns_rec = m->m_nextpkt;
379	if (slp->ns_rec) {
380		m->m_nextpkt = NULL;
381		*more = true;
382	} else {
383		slp->ns_recend = NULL;
384	}
385	nfsdsock_unlock(slp);
386
387	if (m->m_type == MT_SONAME) {
388		nam = m;
389		m = m->m_next;
390		nam->m_next = NULL;
391	} else
392		nam = NULL;
393	nd = nfsdreq_alloc();
394	nd->nd_md = nd->nd_mrep = m;
395	nd->nd_nam2 = nam;
396	nd->nd_dpos = mtod(m, void *);
397	error = nfs_getreq(nd, nfsd, true);
398	if (error) {
399		m_freem(nam);
400		nfsdreq_free(nd);
401		return (error);
402	}
403	*ndp = nd;
404	nfsd->nfsd_nd = nd;
405	return (0);
406}
407
408bool
409nfsrv_timer(void)
410{
411	struct timeval tv;
412	struct nfssvc_sock *slp;
413	u_quad_t cur_usec;
414	struct nfsrv_descript *nd;
415	bool more;
416
417	/*
418	 * Scan the write gathering queues for writes that need to be
419	 * completed now.
420	 */
421	getmicrotime(&tv);
422	cur_usec = (u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec;
423	more = false;
424	mutex_enter(&nfsd_lock);
425	TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
426		nd = LIST_FIRST(&slp->ns_tq);
427		if (nd != NULL) {
428			if (nd->nd_time <= cur_usec) {
429				nfsrv_wakenfsd_locked(slp);
430			}
431			more = true;
432		}
433	}
434	mutex_exit(&nfsd_lock);
435	return more;
436}
437
438/*
439 * Search for a sleeping nfsd and wake it up.
440 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
441 * running nfsds will go look for the work in the nfssvc_sock list.
442 */
443static void
444nfsrv_wakenfsd_locked(struct nfssvc_sock *slp)
445{
446	struct nfsd *nd;
447
448	KASSERT(mutex_owned(&nfsd_lock));
449
450	if ((slp->ns_flags & SLP_VALID) == 0)
451		return;
452	if (slp->ns_gflags & SLP_G_DOREC)
453		return;
454	nd = SLIST_FIRST(&nfsd_idle_head);
455	if (nd) {
456		SLIST_REMOVE_HEAD(&nfsd_idle_head, nfsd_idle);
457		if (nd->nfsd_slp)
458			panic("nfsd wakeup");
459		slp->ns_sref++;
460		KASSERT(slp->ns_sref > 0);
461		nd->nfsd_slp = slp;
462		cv_signal(&nd->nfsd_cv);
463	} else {
464		slp->ns_gflags |= SLP_G_DOREC;
465		nfsd_head_flag |= NFSD_CHECKSLP;
466		TAILQ_INSERT_TAIL(&nfssvc_sockpending, slp, ns_pending);
467	}
468}
469
470void
471nfsrv_wakenfsd(struct nfssvc_sock *slp)
472{
473
474	mutex_enter(&nfsd_lock);
475	nfsrv_wakenfsd_locked(slp);
476	mutex_exit(&nfsd_lock);
477}
478
479int
480nfsdsock_sendreply(struct nfssvc_sock *slp, struct nfsrv_descript *nd)
481{
482	int error;
483
484	if (nd->nd_mrep != NULL) {
485		m_freem(nd->nd_mrep);
486		nd->nd_mrep = NULL;
487	}
488
489	mutex_enter(&slp->ns_lock);
490	if ((slp->ns_flags & SLP_SENDING) != 0) {
491		SIMPLEQ_INSERT_TAIL(&slp->ns_sendq, nd, nd_sendq);
492		mutex_exit(&slp->ns_lock);
493		return 0;
494	}
495	KASSERT(SIMPLEQ_EMPTY(&slp->ns_sendq));
496	slp->ns_flags |= SLP_SENDING;
497	mutex_exit(&slp->ns_lock);
498
499again:
500	error = nfs_send(slp->ns_so, nd->nd_nam2, nd->nd_mreq, NULL, curlwp);
501	if (nd->nd_nam2) {
502		m_free(nd->nd_nam2);
503	}
504	nfsdreq_free(nd);
505
506	mutex_enter(&slp->ns_lock);
507	KASSERT((slp->ns_flags & SLP_SENDING) != 0);
508	nd = SIMPLEQ_FIRST(&slp->ns_sendq);
509	if (nd != NULL) {
510		SIMPLEQ_REMOVE_HEAD(&slp->ns_sendq, nd_sendq);
511		mutex_exit(&slp->ns_lock);
512		goto again;
513	}
514	slp->ns_flags &= ~SLP_SENDING;
515	mutex_exit(&slp->ns_lock);
516
517	return error;
518}
519
520void
521nfsdsock_setbits(struct nfssvc_sock *slp, int bits)
522{
523
524	mutex_enter(&slp->ns_alock);
525	slp->ns_aflags |= bits;
526	mutex_exit(&slp->ns_alock);
527}
528
529void
530nfsdsock_clearbits(struct nfssvc_sock *slp, int bits)
531{
532
533	mutex_enter(&slp->ns_alock);
534	slp->ns_aflags &= ~bits;
535	mutex_exit(&slp->ns_alock);
536}
537
538bool
539nfsdsock_testbits(struct nfssvc_sock *slp, int bits)
540{
541
542	return (slp->ns_aflags & bits);
543}
544