svc_vc.c revision 261055
127008Sbrian/*	$NetBSD: svc_vc.c,v 1.7 2000/08/03 00:01:53 fvdl Exp $	*/
227008Sbrian
327008Sbrian/*-
427747Sbrian * Copyright (c) 2009, Sun Microsystems, Inc.
527008Sbrian * All rights reserved.
627008Sbrian *
727008Sbrian * Redistribution and use in source and binary forms, with or without
827008Sbrian * modification, are permitted provided that the following conditions are met:
927008Sbrian * - Redistributions of source code must retain the above copyright notice,
1027008Sbrian *   this list of conditions and the following disclaimer.
1127008Sbrian * - Redistributions in binary form must reproduce the above copyright notice,
1227008Sbrian *   this list of conditions and the following disclaimer in the documentation
1327008Sbrian *   and/or other materials provided with the distribution.
1427008Sbrian * - Neither the name of Sun Microsystems, Inc. nor the names of its
1527008Sbrian *   contributors may be used to endorse or promote products derived
1627008Sbrian *   from this software without specific prior written permission.
1727008Sbrian *
1831006Sbrian * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
1927008Sbrian * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2027008Sbrian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2127008Sbrian * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
2227008Sbrian * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2327008Sbrian * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2427008Sbrian * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2527008Sbrian * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2627008Sbrian * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2727008Sbrian * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2827008Sbrian * POSSIBILITY OF SUCH DAMAGE.
2927008Sbrian */
3027008Sbrian
3127008Sbrian#if defined(LIBC_SCCS) && !defined(lint)
3227008Sbrianstatic char *sccsid2 = "@(#)svc_tcp.c 1.21 87/08/11 Copyr 1984 Sun Micro";
3327008Sbrianstatic char *sccsid = "@(#)svc_tcp.c	2.2 88/08/01 4.0 RPCSRC";
3427008Sbrian#endif
3527008Sbrian#include <sys/cdefs.h>
3627008Sbrian__FBSDID("$FreeBSD: stable/10/sys/rpc/svc_vc.c 261055 2014-01-22 23:55:25Z mav $");
3727008Sbrian
3827008Sbrian/*
3927008Sbrian * svc_vc.c, Server side for Connection Oriented based RPC.
4027008Sbrian *
4127008Sbrian * Actually implements two flavors of transporter -
4227008Sbrian * a tcp rendezvouser (a listner and connection establisher)
4327008Sbrian * and a record/tcp stream.
4427008Sbrian */
4527008Sbrian
4627008Sbrian#include <sys/param.h>
4727008Sbrian#include <sys/lock.h>
4827008Sbrian#include <sys/kernel.h>
4927008Sbrian#include <sys/malloc.h>
5027008Sbrian#include <sys/mbuf.h>
5127008Sbrian#include <sys/mutex.h>
5227008Sbrian#include <sys/proc.h>
5327008Sbrian#include <sys/protosw.h>
5427008Sbrian#include <sys/queue.h>
5527008Sbrian#include <sys/socket.h>
5627008Sbrian#include <sys/socketvar.h>
5727008Sbrian#include <sys/sx.h>
5827008Sbrian#include <sys/systm.h>
5927008Sbrian#include <sys/uio.h>
6027008Sbrian
6127008Sbrian#include <net/vnet.h>
6227008Sbrian
6327008Sbrian#include <netinet/tcp.h>
6427008Sbrian
6527008Sbrian#include <rpc/rpc.h>
6627008Sbrian
6727008Sbrian#include <rpc/krpc.h>
6827008Sbrian#include <rpc/rpc_com.h>
6927008Sbrian
7027008Sbrian#include <security/mac/mac_framework.h>
7127008Sbrian
7227008Sbrianstatic bool_t svc_vc_rendezvous_recv(SVCXPRT *, struct rpc_msg *,
7327008Sbrian    struct sockaddr **, struct mbuf **);
7427008Sbrianstatic enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *);
7527008Sbrianstatic void svc_vc_rendezvous_destroy(SVCXPRT *);
7627008Sbrianstatic bool_t svc_vc_null(void);
7727008Sbrianstatic void svc_vc_destroy(SVCXPRT *);
7827008Sbrianstatic enum xprt_stat svc_vc_stat(SVCXPRT *);
7927008Sbrianstatic bool_t svc_vc_ack(SVCXPRT *, uint32_t *);
8027008Sbrianstatic bool_t svc_vc_recv(SVCXPRT *, struct rpc_msg *,
8127008Sbrian    struct sockaddr **, struct mbuf **);
8227008Sbrianstatic bool_t svc_vc_reply(SVCXPRT *, struct rpc_msg *,
8327008Sbrian    struct sockaddr *, struct mbuf *, uint32_t *seq);
8427008Sbrianstatic bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
8527008Sbrianstatic bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
8627008Sbrian    void *in);
8727008Sbrianstatic void svc_vc_backchannel_destroy(SVCXPRT *);
8827008Sbrianstatic enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *);
8927008Sbrianstatic bool_t svc_vc_backchannel_recv(SVCXPRT *, struct rpc_msg *,
9027008Sbrian    struct sockaddr **, struct mbuf **);
9127008Sbrianstatic bool_t svc_vc_backchannel_reply(SVCXPRT *, struct rpc_msg *,
9227008Sbrian    struct sockaddr *, struct mbuf *, uint32_t *);
9327008Sbrianstatic bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq,
9427008Sbrian    void *in);
9527008Sbrianstatic SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so,
9627008Sbrian    struct sockaddr *raddr);
9727008Sbrianstatic int svc_vc_accept(struct socket *head, struct socket **sop);
9827008Sbrianstatic int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
9927008Sbrian
10027008Sbrianstatic struct xp_ops svc_vc_rendezvous_ops = {
10127008Sbrian	.xp_recv =	svc_vc_rendezvous_recv,
10227008Sbrian	.xp_stat =	svc_vc_rendezvous_stat,
10327008Sbrian	.xp_reply =	(bool_t (*)(SVCXPRT *, struct rpc_msg *,
10427008Sbrian		struct sockaddr *, struct mbuf *, uint32_t *))svc_vc_null,
10527008Sbrian	.xp_destroy =	svc_vc_rendezvous_destroy,
10627008Sbrian	.xp_control =	svc_vc_rendezvous_control
10727008Sbrian};
10827008Sbrian
10927008Sbrianstatic struct xp_ops svc_vc_ops = {
11027008Sbrian	.xp_recv =	svc_vc_recv,
11127008Sbrian	.xp_stat =	svc_vc_stat,
11227008Sbrian	.xp_ack =	svc_vc_ack,
11327008Sbrian	.xp_reply =	svc_vc_reply,
11427008Sbrian	.xp_destroy =	svc_vc_destroy,
11527008Sbrian	.xp_control =	svc_vc_control
11627008Sbrian};
11727008Sbrian
11827008Sbrianstatic struct xp_ops svc_vc_backchannel_ops = {
11927008Sbrian	.xp_recv =	svc_vc_backchannel_recv,
12027008Sbrian	.xp_stat =	svc_vc_backchannel_stat,
12127008Sbrian	.xp_reply =	svc_vc_backchannel_reply,
12227008Sbrian	.xp_destroy =	svc_vc_backchannel_destroy,
12327008Sbrian	.xp_control =	svc_vc_backchannel_control
12427008Sbrian};
12527008Sbrian
12627008Sbrian/*
12727008Sbrian * Usage:
12827008Sbrian *	xprt = svc_vc_create(sock, send_buf_size, recv_buf_size);
12927008Sbrian *
13027008Sbrian * Creates, registers, and returns a (rpc) tcp based transporter.
13127008Sbrian * Once *xprt is initialized, it is registered as a transporter
13227008Sbrian * see (svc.h, xprt_register).  This routine returns
13327008Sbrian * a NULL if a problem occurred.
13427008Sbrian *
13527008Sbrian * The filedescriptor passed in is expected to refer to a bound, but
13627008Sbrian * not yet connected socket.
13727008Sbrian *
13827008Sbrian * Since streams do buffered io similar to stdio, the caller can specify
13927008Sbrian * how big the send and receive buffers are via the second and third parms;
14027008Sbrian * 0 => use the system default.
14127008Sbrian */
14227008SbrianSVCXPRT *
14327008Sbriansvc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize,
14427008Sbrian    size_t recvsize)
14527008Sbrian{
14627008Sbrian	SVCXPRT *xprt;
14727008Sbrian	struct sockaddr* sa;
14827008Sbrian	int error;
14927008Sbrian
15027008Sbrian	SOCK_LOCK(so);
15127008Sbrian	if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED)) {
15227008Sbrian		SOCK_UNLOCK(so);
15331006Sbrian		error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
15427008Sbrian		if (error)
15527008Sbrian			return (NULL);
15627008Sbrian		xprt = svc_vc_create_conn(pool, so, sa);
15727008Sbrian		free(sa, M_SONAME);
15827008Sbrian		return (xprt);
15927008Sbrian	}
16027008Sbrian	SOCK_UNLOCK(so);
16127008Sbrian
16227008Sbrian	xprt = svc_xprt_alloc();
16327008Sbrian	sx_init(&xprt->xp_lock, "xprt->xp_lock");
16427008Sbrian	xprt->xp_pool = pool;
16527008Sbrian	xprt->xp_socket = so;
16627008Sbrian	xprt->xp_p1 = NULL;
16727008Sbrian	xprt->xp_p2 = NULL;
16827008Sbrian	xprt->xp_ops = &svc_vc_rendezvous_ops;
16927008Sbrian
17027008Sbrian	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
17127008Sbrian	if (error) {
17227008Sbrian		goto cleanup_svc_vc_create;
17327347Sbrian	}
17427747Sbrian
17527347Sbrian	memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
17627347Sbrian	free(sa, M_SONAME);
17727347Sbrian
17827347Sbrian	xprt_register(xprt);
17927347Sbrian
18027347Sbrian	solisten(so, SOMAXCONN, curthread);
18127347Sbrian
18227347Sbrian	SOCKBUF_LOCK(&so->so_rcv);
18327747Sbrian	xprt->xp_upcallset = 1;
18427347Sbrian	soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
18527008Sbrian	SOCKBUF_UNLOCK(&so->so_rcv);
18627008Sbrian
18727747Sbrian	return (xprt);
18827008Sbriancleanup_svc_vc_create:
18927747Sbrian	if (xprt) {
19027747Sbrian		sx_destroy(&xprt->xp_lock);
19127747Sbrian		svc_xprt_free(xprt);
19227747Sbrian	}
19327747Sbrian	return (NULL);
19427747Sbrian}
19527347Sbrian
19627008Sbrian/*
19727008Sbrian * Create a new transport for a socket optained via soaccept().
19827747Sbrian */
19927747SbrianSVCXPRT *
20027747Sbriansvc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr)
20127347Sbrian{
20227347Sbrian	SVCXPRT *xprt = NULL;
20327008Sbrian	struct cf_conn *cd = NULL;
20427347Sbrian	struct sockaddr* sa = NULL;
20527347Sbrian	struct sockopt opt;
20627347Sbrian	int one = 1;
20727347Sbrian	int error;
20827008Sbrian
20927008Sbrian	bzero(&opt, sizeof(struct sockopt));
21027008Sbrian	opt.sopt_dir = SOPT_SET;
21127008Sbrian	opt.sopt_level = SOL_SOCKET;
21227008Sbrian	opt.sopt_name = SO_KEEPALIVE;
21327008Sbrian	opt.sopt_val = &one;
21427008Sbrian	opt.sopt_valsize = sizeof(one);
21527008Sbrian	error = sosetopt(so, &opt);
21627008Sbrian	if (error) {
21727008Sbrian		return (NULL);
21827008Sbrian	}
21927008Sbrian
22027008Sbrian	if (so->so_proto->pr_protocol == IPPROTO_TCP) {
22127008Sbrian		bzero(&opt, sizeof(struct sockopt));
22227008Sbrian		opt.sopt_dir = SOPT_SET;
22327008Sbrian		opt.sopt_level = IPPROTO_TCP;
22427008Sbrian		opt.sopt_name = TCP_NODELAY;
22527008Sbrian		opt.sopt_val = &one;
22627008Sbrian		opt.sopt_valsize = sizeof(one);
22727008Sbrian		error = sosetopt(so, &opt);
22827008Sbrian		if (error) {
22927008Sbrian			return (NULL);
23027008Sbrian		}
23127008Sbrian	}
23227008Sbrian
23327008Sbrian	cd = mem_alloc(sizeof(*cd));
23427008Sbrian	cd->strm_stat = XPRT_IDLE;
23527008Sbrian
23627008Sbrian	xprt = svc_xprt_alloc();
23727008Sbrian	sx_init(&xprt->xp_lock, "xprt->xp_lock");
23827008Sbrian	xprt->xp_pool = pool;
23927008Sbrian	xprt->xp_socket = so;
24027008Sbrian	xprt->xp_p1 = cd;
24127008Sbrian	xprt->xp_p2 = NULL;
24227008Sbrian	xprt->xp_ops = &svc_vc_ops;
24327008Sbrian
24427008Sbrian	/*
24527008Sbrian	 * See http://www.connectathon.org/talks96/nfstcp.pdf - client
24627008Sbrian	 * has a 5 minute timer, server has a 6 minute timer.
24727008Sbrian	 */
24827008Sbrian	xprt->xp_idletimeout = 6 * 60;
24927008Sbrian
25027008Sbrian	memcpy(&xprt->xp_rtaddr, raddr, raddr->sa_len);
25127008Sbrian
25227008Sbrian	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
25327008Sbrian	if (error)
25427008Sbrian		goto cleanup_svc_vc_create;
25527008Sbrian
25627008Sbrian	memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
25727008Sbrian	free(sa, M_SONAME);
25827008Sbrian
25927008Sbrian	xprt_register(xprt);
26027008Sbrian
26127008Sbrian	SOCKBUF_LOCK(&so->so_rcv);
26227008Sbrian	xprt->xp_upcallset = 1;
26327008Sbrian	soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
26431006Sbrian	SOCKBUF_UNLOCK(&so->so_rcv);
26531006Sbrian
26631006Sbrian	/*
26731006Sbrian	 * Throw the transport into the active list in case it already
26831006Sbrian	 * has some data buffered.
26931006Sbrian	 */
27031006Sbrian	sx_xlock(&xprt->xp_lock);
27131006Sbrian	xprt_active(xprt);
27231006Sbrian	sx_xunlock(&xprt->xp_lock);
27331006Sbrian
27431006Sbrian	return (xprt);
27531006Sbriancleanup_svc_vc_create:
27631006Sbrian	if (xprt) {
27731006Sbrian		sx_destroy(&xprt->xp_lock);
27831006Sbrian		svc_xprt_free(xprt);
27931006Sbrian	}
28031006Sbrian	if (cd)
28131006Sbrian		mem_free(cd, sizeof(*cd));
28231006Sbrian	return (NULL);
28331006Sbrian}
28431006Sbrian
28531006Sbrian/*
28631006Sbrian * Create a new transport for a backchannel on a clnt_vc socket.
28731006Sbrian */
28831006SbrianSVCXPRT *
28931006Sbriansvc_vc_create_backchannel(SVCPOOL *pool)
29031006Sbrian{
29131006Sbrian	SVCXPRT *xprt = NULL;
29231006Sbrian	struct cf_conn *cd = NULL;
29331006Sbrian
29431006Sbrian	cd = mem_alloc(sizeof(*cd));
29531006Sbrian	cd->strm_stat = XPRT_IDLE;
29631006Sbrian
29731006Sbrian	xprt = svc_xprt_alloc();
29831006Sbrian	sx_init(&xprt->xp_lock, "xprt->xp_lock");
29931006Sbrian	xprt->xp_pool = pool;
30031006Sbrian	xprt->xp_socket = NULL;
30127008Sbrian	xprt->xp_p1 = cd;
30231006Sbrian	xprt->xp_p2 = NULL;
30331006Sbrian	xprt->xp_ops = &svc_vc_backchannel_ops;
30427008Sbrian	return (xprt);
30527008Sbrian}
30627008Sbrian
30727008Sbrian/*
30827008Sbrian * This does all of the accept except the final call to soaccept. The
30927008Sbrian * caller will call soaccept after dropping its locks (soaccept may
31027008Sbrian * call malloc).
31127008Sbrian */
31227008Sbrianint
31327008Sbriansvc_vc_accept(struct socket *head, struct socket **sop)
31427008Sbrian{
315	int error = 0;
316	struct socket *so;
317
318	if ((head->so_options & SO_ACCEPTCONN) == 0) {
319		error = EINVAL;
320		goto done;
321	}
322#ifdef MAC
323	error = mac_socket_check_accept(curthread->td_ucred, head);
324	if (error != 0)
325		goto done;
326#endif
327	ACCEPT_LOCK();
328	if (TAILQ_EMPTY(&head->so_comp)) {
329		ACCEPT_UNLOCK();
330		error = EWOULDBLOCK;
331		goto done;
332	}
333	so = TAILQ_FIRST(&head->so_comp);
334	KASSERT(!(so->so_qstate & SQ_INCOMP), ("svc_vc_accept: so SQ_INCOMP"));
335	KASSERT(so->so_qstate & SQ_COMP, ("svc_vc_accept: so not SQ_COMP"));
336
337	/*
338	 * Before changing the flags on the socket, we have to bump the
339	 * reference count.  Otherwise, if the protocol calls sofree(),
340	 * the socket will be released due to a zero refcount.
341	 * XXX might not need soref() since this is simpler than kern_accept.
342	 */
343	SOCK_LOCK(so);			/* soref() and so_state update */
344	soref(so);			/* file descriptor reference */
345
346	TAILQ_REMOVE(&head->so_comp, so, so_list);
347	head->so_qlen--;
348	so->so_state |= (head->so_state & SS_NBIO);
349	so->so_qstate &= ~SQ_COMP;
350	so->so_head = NULL;
351
352	SOCK_UNLOCK(so);
353	ACCEPT_UNLOCK();
354
355	*sop = so;
356
357	/* connection has been removed from the listen queue */
358	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
359done:
360	return (error);
361}
362
363/*ARGSUSED*/
364static bool_t
365svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg,
366    struct sockaddr **addrp, struct mbuf **mp)
367{
368	struct socket *so = NULL;
369	struct sockaddr *sa = NULL;
370	int error;
371	SVCXPRT *new_xprt;
372
373	/*
374	 * The socket upcall calls xprt_active() which will eventually
375	 * cause the server to call us here. We attempt to accept a
376	 * connection from the socket and turn it into a new
377	 * transport. If the accept fails, we have drained all pending
378	 * connections so we call xprt_inactive().
379	 */
380	sx_xlock(&xprt->xp_lock);
381
382	error = svc_vc_accept(xprt->xp_socket, &so);
383
384	if (error == EWOULDBLOCK) {
385		/*
386		 * We must re-test for new connections after taking
387		 * the lock to protect us in the case where a new
388		 * connection arrives after our call to accept fails
389		 * with EWOULDBLOCK.
390		 */
391		ACCEPT_LOCK();
392		if (TAILQ_EMPTY(&xprt->xp_socket->so_comp))
393			xprt_inactive_self(xprt);
394		ACCEPT_UNLOCK();
395		sx_xunlock(&xprt->xp_lock);
396		return (FALSE);
397	}
398
399	if (error) {
400		SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
401		if (xprt->xp_upcallset) {
402			xprt->xp_upcallset = 0;
403			soupcall_clear(xprt->xp_socket, SO_RCV);
404		}
405		SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
406		xprt_inactive_self(xprt);
407		sx_xunlock(&xprt->xp_lock);
408		return (FALSE);
409	}
410
411	sx_xunlock(&xprt->xp_lock);
412
413	sa = 0;
414	error = soaccept(so, &sa);
415
416	if (error) {
417		/*
418		 * XXX not sure if I need to call sofree or soclose here.
419		 */
420		if (sa)
421			free(sa, M_SONAME);
422		return (FALSE);
423	}
424
425	/*
426	 * svc_vc_create_conn will call xprt_register - we don't need
427	 * to do anything with the new connection except derefence it.
428	 */
429	new_xprt = svc_vc_create_conn(xprt->xp_pool, so, sa);
430	if (!new_xprt) {
431		soclose(so);
432	} else {
433		SVC_RELEASE(new_xprt);
434	}
435
436	free(sa, M_SONAME);
437
438	return (FALSE); /* there is never an rpc msg to be processed */
439}
440
441/*ARGSUSED*/
442static enum xprt_stat
443svc_vc_rendezvous_stat(SVCXPRT *xprt)
444{
445
446	return (XPRT_IDLE);
447}
448
449static void
450svc_vc_destroy_common(SVCXPRT *xprt)
451{
452	SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
453	if (xprt->xp_upcallset) {
454		xprt->xp_upcallset = 0;
455		soupcall_clear(xprt->xp_socket, SO_RCV);
456	}
457	SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
458
459	if (xprt->xp_socket)
460		(void)soclose(xprt->xp_socket);
461
462	if (xprt->xp_netid)
463		(void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1);
464	svc_xprt_free(xprt);
465}
466
467static void
468svc_vc_rendezvous_destroy(SVCXPRT *xprt)
469{
470
471	svc_vc_destroy_common(xprt);
472}
473
474static void
475svc_vc_destroy(SVCXPRT *xprt)
476{
477	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
478
479	svc_vc_destroy_common(xprt);
480
481	if (cd->mreq)
482		m_freem(cd->mreq);
483	if (cd->mpending)
484		m_freem(cd->mpending);
485	mem_free(cd, sizeof(*cd));
486}
487
488static void
489svc_vc_backchannel_destroy(SVCXPRT *xprt)
490{
491	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
492	struct mbuf *m, *m2;
493
494	svc_xprt_free(xprt);
495	m = cd->mreq;
496	while (m != NULL) {
497		m2 = m;
498		m = m->m_nextpkt;
499		m_freem(m2);
500	}
501	mem_free(cd, sizeof(*cd));
502}
503
504/*ARGSUSED*/
505static bool_t
506svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in)
507{
508	return (FALSE);
509}
510
511static bool_t
512svc_vc_rendezvous_control(SVCXPRT *xprt, const u_int rq, void *in)
513{
514
515	return (FALSE);
516}
517
518static bool_t
519svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in)
520{
521
522	return (FALSE);
523}
524
525static enum xprt_stat
526svc_vc_stat(SVCXPRT *xprt)
527{
528	struct cf_conn *cd;
529
530	cd = (struct cf_conn *)(xprt->xp_p1);
531
532	if (cd->strm_stat == XPRT_DIED)
533		return (XPRT_DIED);
534
535	if (cd->mreq != NULL && cd->resid == 0 && cd->eor)
536		return (XPRT_MOREREQS);
537
538	if (soreadable(xprt->xp_socket))
539		return (XPRT_MOREREQS);
540
541	return (XPRT_IDLE);
542}
543
544static bool_t
545svc_vc_ack(SVCXPRT *xprt, uint32_t *ack)
546{
547
548	*ack = atomic_load_acq_32(&xprt->xp_snt_cnt);
549	*ack -= xprt->xp_socket->so_snd.sb_cc;
550	return (TRUE);
551}
552
553static enum xprt_stat
554svc_vc_backchannel_stat(SVCXPRT *xprt)
555{
556	struct cf_conn *cd;
557
558	cd = (struct cf_conn *)(xprt->xp_p1);
559
560	if (cd->mreq != NULL)
561		return (XPRT_MOREREQS);
562
563	return (XPRT_IDLE);
564}
565
566/*
567 * If we have an mbuf chain in cd->mpending, try to parse a record from it,
568 * leaving the result in cd->mreq. If we don't have a complete record, leave
569 * the partial result in cd->mreq and try to read more from the socket.
570 */
571static int
572svc_vc_process_pending(SVCXPRT *xprt)
573{
574	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
575	struct socket *so = xprt->xp_socket;
576	struct mbuf *m;
577
578	/*
579	 * If cd->resid is non-zero, we have part of the
580	 * record already, otherwise we are expecting a record
581	 * marker.
582	 */
583	if (!cd->resid && cd->mpending) {
584		/*
585		 * See if there is enough data buffered to
586		 * make up a record marker. Make sure we can
587		 * handle the case where the record marker is
588		 * split across more than one mbuf.
589		 */
590		size_t n = 0;
591		uint32_t header;
592
593		m = cd->mpending;
594		while (n < sizeof(uint32_t) && m) {
595			n += m->m_len;
596			m = m->m_next;
597		}
598		if (n < sizeof(uint32_t)) {
599			so->so_rcv.sb_lowat = sizeof(uint32_t) - n;
600			return (FALSE);
601		}
602		m_copydata(cd->mpending, 0, sizeof(header),
603		    (char *)&header);
604		header = ntohl(header);
605		cd->eor = (header & 0x80000000) != 0;
606		cd->resid = header & 0x7fffffff;
607		m_adj(cd->mpending, sizeof(uint32_t));
608	}
609
610	/*
611	 * Start pulling off mbufs from cd->mpending
612	 * until we either have a complete record or
613	 * we run out of data. We use m_split to pull
614	 * data - it will pull as much as possible and
615	 * split the last mbuf if necessary.
616	 */
617	while (cd->mpending && cd->resid) {
618		m = cd->mpending;
619		if (cd->mpending->m_next
620		    || cd->mpending->m_len > cd->resid)
621			cd->mpending = m_split(cd->mpending,
622			    cd->resid, M_WAITOK);
623		else
624			cd->mpending = NULL;
625		if (cd->mreq)
626			m_last(cd->mreq)->m_next = m;
627		else
628			cd->mreq = m;
629		while (m) {
630			cd->resid -= m->m_len;
631			m = m->m_next;
632		}
633	}
634
635	/*
636	 * Block receive upcalls if we have more data pending,
637	 * otherwise report our need.
638	 */
639	if (cd->mpending)
640		so->so_rcv.sb_lowat = INT_MAX;
641	else
642		so->so_rcv.sb_lowat =
643		    imax(1, imin(cd->resid, so->so_rcv.sb_hiwat / 2));
644	return (TRUE);
645}
646
647static bool_t
648svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg,
649    struct sockaddr **addrp, struct mbuf **mp)
650{
651	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
652	struct uio uio;
653	struct mbuf *m;
654	struct socket* so = xprt->xp_socket;
655	XDR xdrs;
656	int error, rcvflag;
657
658	/*
659	 * Serialise access to the socket and our own record parsing
660	 * state.
661	 */
662	sx_xlock(&xprt->xp_lock);
663
664	for (;;) {
665		/* If we have no request ready, check pending queue. */
666		while (cd->mpending &&
667		    (cd->mreq == NULL || cd->resid != 0 || !cd->eor)) {
668			if (!svc_vc_process_pending(xprt))
669				break;
670		}
671
672		/* Process and return complete request in cd->mreq. */
673		if (cd->mreq != NULL && cd->resid == 0 && cd->eor) {
674
675			xdrmbuf_create(&xdrs, cd->mreq, XDR_DECODE);
676			cd->mreq = NULL;
677
678			/* Check for next request in a pending queue. */
679			svc_vc_process_pending(xprt);
680			if (cd->mreq == NULL || cd->resid != 0) {
681				SOCKBUF_LOCK(&so->so_rcv);
682				if (!soreadable(so))
683					xprt_inactive_self(xprt);
684				SOCKBUF_UNLOCK(&so->so_rcv);
685			}
686
687			sx_xunlock(&xprt->xp_lock);
688
689			if (! xdr_callmsg(&xdrs, msg)) {
690				XDR_DESTROY(&xdrs);
691				return (FALSE);
692			}
693
694			*addrp = NULL;
695			*mp = xdrmbuf_getall(&xdrs);
696			XDR_DESTROY(&xdrs);
697
698			return (TRUE);
699		}
700
701		/*
702		 * The socket upcall calls xprt_active() which will eventually
703		 * cause the server to call us here. We attempt to
704		 * read as much as possible from the socket and put
705		 * the result in cd->mpending. If the read fails,
706		 * we have drained both cd->mpending and the socket so
707		 * we can call xprt_inactive().
708		 */
709		uio.uio_resid = 1000000000;
710		uio.uio_td = curthread;
711		m = NULL;
712		rcvflag = MSG_DONTWAIT;
713		error = soreceive(so, NULL, &uio, &m, NULL, &rcvflag);
714
715		if (error == EWOULDBLOCK) {
716			/*
717			 * We must re-test for readability after
718			 * taking the lock to protect us in the case
719			 * where a new packet arrives on the socket
720			 * after our call to soreceive fails with
721			 * EWOULDBLOCK.
722			 */
723			SOCKBUF_LOCK(&so->so_rcv);
724			if (!soreadable(so))
725				xprt_inactive_self(xprt);
726			SOCKBUF_UNLOCK(&so->so_rcv);
727			sx_xunlock(&xprt->xp_lock);
728			return (FALSE);
729		}
730
731		if (error) {
732			SOCKBUF_LOCK(&so->so_rcv);
733			if (xprt->xp_upcallset) {
734				xprt->xp_upcallset = 0;
735				soupcall_clear(so, SO_RCV);
736			}
737			SOCKBUF_UNLOCK(&so->so_rcv);
738			xprt_inactive_self(xprt);
739			cd->strm_stat = XPRT_DIED;
740			sx_xunlock(&xprt->xp_lock);
741			return (FALSE);
742		}
743
744		if (!m) {
745			/*
746			 * EOF - the other end has closed the socket.
747			 */
748			xprt_inactive_self(xprt);
749			cd->strm_stat = XPRT_DIED;
750			sx_xunlock(&xprt->xp_lock);
751			return (FALSE);
752		}
753
754		if (cd->mpending)
755			m_last(cd->mpending)->m_next = m;
756		else
757			cd->mpending = m;
758	}
759}
760
761static bool_t
762svc_vc_backchannel_recv(SVCXPRT *xprt, struct rpc_msg *msg,
763    struct sockaddr **addrp, struct mbuf **mp)
764{
765	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
766	struct ct_data *ct;
767	struct mbuf *m;
768	XDR xdrs;
769
770	sx_xlock(&xprt->xp_lock);
771	ct = (struct ct_data *)xprt->xp_p2;
772	if (ct == NULL) {
773		sx_xunlock(&xprt->xp_lock);
774		return (FALSE);
775	}
776	mtx_lock(&ct->ct_lock);
777	m = cd->mreq;
778	if (m == NULL) {
779		xprt_inactive_self(xprt);
780		mtx_unlock(&ct->ct_lock);
781		sx_xunlock(&xprt->xp_lock);
782		return (FALSE);
783	}
784	cd->mreq = m->m_nextpkt;
785	mtx_unlock(&ct->ct_lock);
786	sx_xunlock(&xprt->xp_lock);
787
788	xdrmbuf_create(&xdrs, m, XDR_DECODE);
789	if (! xdr_callmsg(&xdrs, msg)) {
790		XDR_DESTROY(&xdrs);
791		return (FALSE);
792	}
793	*addrp = NULL;
794	*mp = xdrmbuf_getall(&xdrs);
795	XDR_DESTROY(&xdrs);
796	return (TRUE);
797}
798
799static bool_t
800svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg,
801    struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
802{
803	XDR xdrs;
804	struct mbuf *mrep;
805	bool_t stat = TRUE;
806	int error, len;
807
808	/*
809	 * Leave space for record mark.
810	 */
811	mrep = m_gethdr(M_WAITOK, MT_DATA);
812	mrep->m_data += sizeof(uint32_t);
813
814	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
815
816	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
817	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
818		if (!xdr_replymsg(&xdrs, msg))
819			stat = FALSE;
820		else
821			xdrmbuf_append(&xdrs, m);
822	} else {
823		stat = xdr_replymsg(&xdrs, msg);
824	}
825
826	if (stat) {
827		m_fixhdr(mrep);
828
829		/*
830		 * Prepend a record marker containing the reply length.
831		 */
832		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
833		len = mrep->m_pkthdr.len;
834		*mtod(mrep, uint32_t *) =
835			htonl(0x80000000 | (len - sizeof(uint32_t)));
836		atomic_add_acq_32(&xprt->xp_snd_cnt, len);
837		error = sosend(xprt->xp_socket, NULL, NULL, mrep, NULL,
838		    0, curthread);
839		if (!error) {
840			atomic_add_rel_32(&xprt->xp_snt_cnt, len);
841			if (seq)
842				*seq = xprt->xp_snd_cnt;
843			stat = TRUE;
844		} else
845			atomic_subtract_32(&xprt->xp_snd_cnt, len);
846	} else {
847		m_freem(mrep);
848	}
849
850	XDR_DESTROY(&xdrs);
851	xprt->xp_p2 = NULL;
852
853	return (stat);
854}
855
856static bool_t
857svc_vc_backchannel_reply(SVCXPRT *xprt, struct rpc_msg *msg,
858    struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
859{
860	struct ct_data *ct;
861	XDR xdrs;
862	struct mbuf *mrep;
863	bool_t stat = TRUE;
864	int error;
865
866	/*
867	 * Leave space for record mark.
868	 */
869	mrep = m_gethdr(M_WAITOK, MT_DATA);
870	mrep->m_data += sizeof(uint32_t);
871
872	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
873
874	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
875	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
876		if (!xdr_replymsg(&xdrs, msg))
877			stat = FALSE;
878		else
879			xdrmbuf_append(&xdrs, m);
880	} else {
881		stat = xdr_replymsg(&xdrs, msg);
882	}
883
884	if (stat) {
885		m_fixhdr(mrep);
886
887		/*
888		 * Prepend a record marker containing the reply length.
889		 */
890		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
891		*mtod(mrep, uint32_t *) =
892			htonl(0x80000000 | (mrep->m_pkthdr.len
893				- sizeof(uint32_t)));
894		sx_xlock(&xprt->xp_lock);
895		ct = (struct ct_data *)xprt->xp_p2;
896		if (ct != NULL)
897			error = sosend(ct->ct_socket, NULL, NULL, mrep, NULL,
898			    0, curthread);
899		else
900			error = EPIPE;
901		sx_xunlock(&xprt->xp_lock);
902		if (!error) {
903			stat = TRUE;
904		}
905	} else {
906		m_freem(mrep);
907	}
908
909	XDR_DESTROY(&xdrs);
910
911	return (stat);
912}
913
914static bool_t
915svc_vc_null()
916{
917
918	return (FALSE);
919}
920
921static int
922svc_vc_soupcall(struct socket *so, void *arg, int waitflag)
923{
924	SVCXPRT *xprt = (SVCXPRT *) arg;
925
926	if (soreadable(xprt->xp_socket))
927		xprt_active(xprt);
928	return (SU_OK);
929}
930
931#if 0
932/*
933 * Get the effective UID of the sending process. Used by rpcbind, keyserv
934 * and rpc.yppasswdd on AF_LOCAL.
935 */
936int
937__rpc_get_local_uid(SVCXPRT *transp, uid_t *uid) {
938	int sock, ret;
939	gid_t egid;
940	uid_t euid;
941	struct sockaddr *sa;
942
943	sock = transp->xp_fd;
944	sa = (struct sockaddr *)transp->xp_rtaddr;
945	if (sa->sa_family == AF_LOCAL) {
946		ret = getpeereid(sock, &euid, &egid);
947		if (ret == 0)
948			*uid = euid;
949		return (ret);
950	} else
951		return (-1);
952}
953#endif
954