1/*	$NetBSD: svc_vc.c,v 1.7 2000/08/03 00:01:53 fvdl Exp $	*/
2
3/*-
4 * SPDX-License-Identifier: BSD-3-Clause
5 *
6 * Copyright (c) 2009, Sun Microsystems, Inc.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are met:
11 * - Redistributions of source code must retain the above copyright notice,
12 *   this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright notice,
14 *   this list of conditions and the following disclaimer in the documentation
15 *   and/or other materials provided with the distribution.
16 * - Neither the name of Sun Microsystems, Inc. nor the names of its
17 *   contributors may be used to endorse or promote products derived
18 *   from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#if defined(LIBC_SCCS) && !defined(lint)
34static char *sccsid2 = "@(#)svc_tcp.c 1.21 87/08/11 Copyr 1984 Sun Micro";
35static char *sccsid = "@(#)svc_tcp.c	2.2 88/08/01 4.0 RPCSRC";
36#endif
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD$");
39
40/*
41 * svc_vc.c, Server side for Connection Oriented based RPC.
42 *
43 * Actually implements two flavors of transporter -
44 * a tcp rendezvouser (a listner and connection establisher)
45 * and a record/tcp stream.
46 */
47
48#include "opt_kern_tls.h"
49
50#include <sys/param.h>
51#include <sys/limits.h>
52#include <sys/lock.h>
53#include <sys/kernel.h>
54#include <sys/ktls.h>
55#include <sys/malloc.h>
56#include <sys/mbuf.h>
57#include <sys/mutex.h>
58#include <sys/proc.h>
59#include <sys/protosw.h>
60#include <sys/queue.h>
61#include <sys/socket.h>
62#include <sys/socketvar.h>
63#include <sys/sx.h>
64#include <sys/systm.h>
65#include <sys/uio.h>
66
67#include <net/vnet.h>
68
69#include <netinet/tcp.h>
70
71#include <rpc/rpc.h>
72#include <rpc/rpcsec_tls.h>
73
74#include <rpc/krpc.h>
75#include <rpc/rpc_com.h>
76
77#include <security/mac/mac_framework.h>
78
79static bool_t svc_vc_rendezvous_recv(SVCXPRT *, struct rpc_msg *,
80    struct sockaddr **, struct mbuf **);
81static enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *);
82static void svc_vc_rendezvous_destroy(SVCXPRT *);
83static bool_t svc_vc_null(void);
84static void svc_vc_destroy(SVCXPRT *);
85static enum xprt_stat svc_vc_stat(SVCXPRT *);
86static bool_t svc_vc_ack(SVCXPRT *, uint32_t *);
87static bool_t svc_vc_recv(SVCXPRT *, struct rpc_msg *,
88    struct sockaddr **, struct mbuf **);
89static bool_t svc_vc_reply(SVCXPRT *, struct rpc_msg *,
90    struct sockaddr *, struct mbuf *, uint32_t *seq);
91static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
92static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
93    void *in);
94static void svc_vc_backchannel_destroy(SVCXPRT *);
95static enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *);
96static bool_t svc_vc_backchannel_recv(SVCXPRT *, struct rpc_msg *,
97    struct sockaddr **, struct mbuf **);
98static bool_t svc_vc_backchannel_reply(SVCXPRT *, struct rpc_msg *,
99    struct sockaddr *, struct mbuf *, uint32_t *);
100static bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq,
101    void *in);
102static SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so,
103    struct sockaddr *raddr);
104static int svc_vc_accept(struct socket *head, struct socket **sop);
105static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
106static int svc_vc_rendezvous_soupcall(struct socket *, void *, int);
107
108static struct xp_ops svc_vc_rendezvous_ops = {
109	.xp_recv =	svc_vc_rendezvous_recv,
110	.xp_stat =	svc_vc_rendezvous_stat,
111	.xp_reply =	(bool_t (*)(SVCXPRT *, struct rpc_msg *,
112		struct sockaddr *, struct mbuf *, uint32_t *))svc_vc_null,
113	.xp_destroy =	svc_vc_rendezvous_destroy,
114	.xp_control =	svc_vc_rendezvous_control
115};
116
117static struct xp_ops svc_vc_ops = {
118	.xp_recv =	svc_vc_recv,
119	.xp_stat =	svc_vc_stat,
120	.xp_ack =	svc_vc_ack,
121	.xp_reply =	svc_vc_reply,
122	.xp_destroy =	svc_vc_destroy,
123	.xp_control =	svc_vc_control
124};
125
126static struct xp_ops svc_vc_backchannel_ops = {
127	.xp_recv =	svc_vc_backchannel_recv,
128	.xp_stat =	svc_vc_backchannel_stat,
129	.xp_reply =	svc_vc_backchannel_reply,
130	.xp_destroy =	svc_vc_backchannel_destroy,
131	.xp_control =	svc_vc_backchannel_control
132};
133
134/*
135 * Usage:
136 *	xprt = svc_vc_create(sock, send_buf_size, recv_buf_size);
137 *
138 * Creates, registers, and returns a (rpc) tcp based transporter.
139 * Once *xprt is initialized, it is registered as a transporter
140 * see (svc.h, xprt_register).  This routine returns
141 * a NULL if a problem occurred.
142 *
143 * The filedescriptor passed in is expected to refer to a bound, but
144 * not yet connected socket.
145 *
146 * Since streams do buffered io similar to stdio, the caller can specify
147 * how big the send and receive buffers are via the second and third parms;
148 * 0 => use the system default.
149 */
150SVCXPRT *
151svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize,
152    size_t recvsize)
153{
154	SVCXPRT *xprt;
155	struct sockaddr* sa;
156	int error;
157
158	SOCK_LOCK(so);
159	if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED)) {
160		SOCK_UNLOCK(so);
161		CURVNET_SET(so->so_vnet);
162		error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
163		CURVNET_RESTORE();
164		if (error)
165			return (NULL);
166		xprt = svc_vc_create_conn(pool, so, sa);
167		free(sa, M_SONAME);
168		return (xprt);
169	}
170	SOCK_UNLOCK(so);
171
172	xprt = svc_xprt_alloc();
173	sx_init(&xprt->xp_lock, "xprt->xp_lock");
174	xprt->xp_pool = pool;
175	xprt->xp_socket = so;
176	xprt->xp_p1 = NULL;
177	xprt->xp_p2 = NULL;
178	xprt->xp_ops = &svc_vc_rendezvous_ops;
179
180	CURVNET_SET(so->so_vnet);
181	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
182	CURVNET_RESTORE();
183	if (error) {
184		goto cleanup_svc_vc_create;
185	}
186
187	memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
188	free(sa, M_SONAME);
189
190	xprt_register(xprt);
191
192	solisten(so, -1, curthread);
193
194	SOLISTEN_LOCK(so);
195	xprt->xp_upcallset = 1;
196	solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt);
197	SOLISTEN_UNLOCK(so);
198
199	return (xprt);
200
201cleanup_svc_vc_create:
202	sx_destroy(&xprt->xp_lock);
203	svc_xprt_free(xprt);
204
205	return (NULL);
206}
207
208/*
209 * Create a new transport for a socket optained via soaccept().
210 */
211SVCXPRT *
212svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr)
213{
214	SVCXPRT *xprt;
215	struct cf_conn *cd;
216	struct sockaddr* sa = NULL;
217	struct sockopt opt;
218	int one = 1;
219	int error;
220
221	bzero(&opt, sizeof(struct sockopt));
222	opt.sopt_dir = SOPT_SET;
223	opt.sopt_level = SOL_SOCKET;
224	opt.sopt_name = SO_KEEPALIVE;
225	opt.sopt_val = &one;
226	opt.sopt_valsize = sizeof(one);
227	error = sosetopt(so, &opt);
228	if (error) {
229		return (NULL);
230	}
231
232	if (so->so_proto->pr_protocol == IPPROTO_TCP) {
233		bzero(&opt, sizeof(struct sockopt));
234		opt.sopt_dir = SOPT_SET;
235		opt.sopt_level = IPPROTO_TCP;
236		opt.sopt_name = TCP_NODELAY;
237		opt.sopt_val = &one;
238		opt.sopt_valsize = sizeof(one);
239		error = sosetopt(so, &opt);
240		if (error) {
241			return (NULL);
242		}
243	}
244
245	cd = mem_alloc(sizeof(*cd));
246	cd->strm_stat = XPRT_IDLE;
247
248	xprt = svc_xprt_alloc();
249	sx_init(&xprt->xp_lock, "xprt->xp_lock");
250	xprt->xp_pool = pool;
251	xprt->xp_socket = so;
252	xprt->xp_p1 = cd;
253	xprt->xp_p2 = NULL;
254	xprt->xp_ops = &svc_vc_ops;
255
256	/*
257	 * See http://www.connectathon.org/talks96/nfstcp.pdf - client
258	 * has a 5 minute timer, server has a 6 minute timer.
259	 */
260	xprt->xp_idletimeout = 6 * 60;
261
262	memcpy(&xprt->xp_rtaddr, raddr, raddr->sa_len);
263
264	CURVNET_SET(so->so_vnet);
265	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
266	CURVNET_RESTORE();
267	if (error)
268		goto cleanup_svc_vc_create;
269
270	memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
271	free(sa, M_SONAME);
272
273	xprt_register(xprt);
274
275	SOCKBUF_LOCK(&so->so_rcv);
276	xprt->xp_upcallset = 1;
277	soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
278	SOCKBUF_UNLOCK(&so->so_rcv);
279
280	/*
281	 * Throw the transport into the active list in case it already
282	 * has some data buffered.
283	 */
284	sx_xlock(&xprt->xp_lock);
285	xprt_active(xprt);
286	sx_xunlock(&xprt->xp_lock);
287
288	return (xprt);
289cleanup_svc_vc_create:
290	sx_destroy(&xprt->xp_lock);
291	svc_xprt_free(xprt);
292	mem_free(cd, sizeof(*cd));
293
294	return (NULL);
295}
296
297/*
298 * Create a new transport for a backchannel on a clnt_vc socket.
299 */
300SVCXPRT *
301svc_vc_create_backchannel(SVCPOOL *pool)
302{
303	SVCXPRT *xprt = NULL;
304	struct cf_conn *cd = NULL;
305
306	cd = mem_alloc(sizeof(*cd));
307	cd->strm_stat = XPRT_IDLE;
308
309	xprt = svc_xprt_alloc();
310	sx_init(&xprt->xp_lock, "xprt->xp_lock");
311	xprt->xp_pool = pool;
312	xprt->xp_socket = NULL;
313	xprt->xp_p1 = cd;
314	xprt->xp_p2 = NULL;
315	xprt->xp_ops = &svc_vc_backchannel_ops;
316	return (xprt);
317}
318
319/*
320 * This does all of the accept except the final call to soaccept. The
321 * caller will call soaccept after dropping its locks (soaccept may
322 * call malloc).
323 */
324int
325svc_vc_accept(struct socket *head, struct socket **sop)
326{
327	struct socket *so;
328	int error = 0;
329	short nbio;
330
331	/* XXXGL: shouldn't that be an assertion? */
332	if (!SOLISTENING(head)) {
333		error = EINVAL;
334		goto done;
335	}
336#ifdef MAC
337	error = mac_socket_check_accept(curthread->td_ucred, head);
338	if (error != 0)
339		goto done;
340#endif
341	/*
342	 * XXXGL: we want non-blocking semantics.  The socket could be a
343	 * socket created by kernel as well as socket shared with userland,
344	 * so we can't be sure about presense of SS_NBIO.  We also shall not
345	 * toggle it on the socket, since that may surprise userland.  So we
346	 * set SS_NBIO only temporarily.
347	 */
348	SOLISTEN_LOCK(head);
349	nbio = head->so_state & SS_NBIO;
350	head->so_state |= SS_NBIO;
351	error = solisten_dequeue(head, &so, 0);
352	head->so_state &= (nbio & ~SS_NBIO);
353	if (error)
354		goto done;
355
356	so->so_state |= nbio;
357	*sop = so;
358
359	/* connection has been removed from the listen queue */
360	KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
361done:
362	return (error);
363}
364
365/*ARGSUSED*/
366static bool_t
367svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg,
368    struct sockaddr **addrp, struct mbuf **mp)
369{
370	struct socket *so = NULL;
371	struct sockaddr *sa = NULL;
372	int error;
373	SVCXPRT *new_xprt;
374
375	/*
376	 * The socket upcall calls xprt_active() which will eventually
377	 * cause the server to call us here. We attempt to accept a
378	 * connection from the socket and turn it into a new
379	 * transport. If the accept fails, we have drained all pending
380	 * connections so we call xprt_inactive().
381	 */
382	sx_xlock(&xprt->xp_lock);
383
384	error = svc_vc_accept(xprt->xp_socket, &so);
385
386	if (error == EWOULDBLOCK) {
387		/*
388		 * We must re-test for new connections after taking
389		 * the lock to protect us in the case where a new
390		 * connection arrives after our call to accept fails
391		 * with EWOULDBLOCK.
392		 */
393		SOLISTEN_LOCK(xprt->xp_socket);
394		if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp))
395			xprt_inactive_self(xprt);
396		SOLISTEN_UNLOCK(xprt->xp_socket);
397		sx_xunlock(&xprt->xp_lock);
398		return (FALSE);
399	}
400
401	if (error) {
402		SOLISTEN_LOCK(xprt->xp_socket);
403		if (xprt->xp_upcallset) {
404			xprt->xp_upcallset = 0;
405			soupcall_clear(xprt->xp_socket, SO_RCV);
406		}
407		SOLISTEN_UNLOCK(xprt->xp_socket);
408		xprt_inactive_self(xprt);
409		sx_xunlock(&xprt->xp_lock);
410		return (FALSE);
411	}
412
413	sx_xunlock(&xprt->xp_lock);
414
415	sa = NULL;
416	error = soaccept(so, &sa);
417
418	if (error) {
419		/*
420		 * XXX not sure if I need to call sofree or soclose here.
421		 */
422		if (sa)
423			free(sa, M_SONAME);
424		return (FALSE);
425	}
426
427	/*
428	 * svc_vc_create_conn will call xprt_register - we don't need
429	 * to do anything with the new connection except derefence it.
430	 */
431	new_xprt = svc_vc_create_conn(xprt->xp_pool, so, sa);
432	if (!new_xprt) {
433		soclose(so);
434	} else {
435		SVC_RELEASE(new_xprt);
436	}
437
438	free(sa, M_SONAME);
439
440	return (FALSE); /* there is never an rpc msg to be processed */
441}
442
443/*ARGSUSED*/
444static enum xprt_stat
445svc_vc_rendezvous_stat(SVCXPRT *xprt)
446{
447
448	return (XPRT_IDLE);
449}
450
451static void
452svc_vc_destroy_common(SVCXPRT *xprt)
453{
454	enum clnt_stat stat;
455	uint32_t reterr;
456
457	if (xprt->xp_socket) {
458		if ((xprt->xp_tls & (RPCTLS_FLAGS_HANDSHAKE |
459		    RPCTLS_FLAGS_HANDSHFAIL)) != 0) {
460			if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
461				/*
462				 * If the upcall fails, the socket has
463				 * probably been closed via the rpctlssd
464				 * daemon having crashed or been
465				 * restarted, so just ignore returned stat.
466				 */
467				stat = rpctls_srv_disconnect(xprt->xp_sslsec,
468				    xprt->xp_sslusec, xprt->xp_sslrefno,
469				    &reterr);
470			}
471			/* Must sorele() to get rid of reference. */
472			CURVNET_SET(xprt->xp_socket->so_vnet);
473			SOCK_LOCK(xprt->xp_socket);
474			sorele(xprt->xp_socket);
475			CURVNET_RESTORE();
476		} else
477			(void)soclose(xprt->xp_socket);
478	}
479
480	if (xprt->xp_netid)
481		(void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1);
482	svc_xprt_free(xprt);
483}
484
485static void
486svc_vc_rendezvous_destroy(SVCXPRT *xprt)
487{
488
489	SOLISTEN_LOCK(xprt->xp_socket);
490	if (xprt->xp_upcallset) {
491		xprt->xp_upcallset = 0;
492		solisten_upcall_set(xprt->xp_socket, NULL, NULL);
493	}
494	SOLISTEN_UNLOCK(xprt->xp_socket);
495
496	svc_vc_destroy_common(xprt);
497}
498
499static void
500svc_vc_destroy(SVCXPRT *xprt)
501{
502	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
503
504	SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
505	if (xprt->xp_upcallset) {
506		xprt->xp_upcallset = 0;
507		if (xprt->xp_socket->so_rcv.sb_upcall != NULL)
508			soupcall_clear(xprt->xp_socket, SO_RCV);
509	}
510	SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
511
512	svc_vc_destroy_common(xprt);
513
514	if (cd->mreq)
515		m_freem(cd->mreq);
516	if (cd->mpending)
517		m_freem(cd->mpending);
518	mem_free(cd, sizeof(*cd));
519}
520
521static void
522svc_vc_backchannel_destroy(SVCXPRT *xprt)
523{
524	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
525	struct mbuf *m, *m2;
526
527	svc_xprt_free(xprt);
528	m = cd->mreq;
529	while (m != NULL) {
530		m2 = m;
531		m = m->m_nextpkt;
532		m_freem(m2);
533	}
534	mem_free(cd, sizeof(*cd));
535}
536
537/*ARGSUSED*/
538static bool_t
539svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in)
540{
541	return (FALSE);
542}
543
544static bool_t
545svc_vc_rendezvous_control(SVCXPRT *xprt, const u_int rq, void *in)
546{
547
548	return (FALSE);
549}
550
551static bool_t
552svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in)
553{
554
555	return (FALSE);
556}
557
558static enum xprt_stat
559svc_vc_stat(SVCXPRT *xprt)
560{
561	struct cf_conn *cd;
562
563	cd = (struct cf_conn *)(xprt->xp_p1);
564
565	if (cd->strm_stat == XPRT_DIED)
566		return (XPRT_DIED);
567
568	if (cd->mreq != NULL && cd->resid == 0 && cd->eor)
569		return (XPRT_MOREREQS);
570
571	if (soreadable(xprt->xp_socket))
572		return (XPRT_MOREREQS);
573
574	return (XPRT_IDLE);
575}
576
577static bool_t
578svc_vc_ack(SVCXPRT *xprt, uint32_t *ack)
579{
580
581	*ack = atomic_load_acq_32(&xprt->xp_snt_cnt);
582	*ack -= sbused(&xprt->xp_socket->so_snd);
583	return (TRUE);
584}
585
586static enum xprt_stat
587svc_vc_backchannel_stat(SVCXPRT *xprt)
588{
589	struct cf_conn *cd;
590
591	cd = (struct cf_conn *)(xprt->xp_p1);
592
593	if (cd->mreq != NULL)
594		return (XPRT_MOREREQS);
595
596	return (XPRT_IDLE);
597}
598
599/*
600 * If we have an mbuf chain in cd->mpending, try to parse a record from it,
601 * leaving the result in cd->mreq. If we don't have a complete record, leave
602 * the partial result in cd->mreq and try to read more from the socket.
603 */
604static int
605svc_vc_process_pending(SVCXPRT *xprt)
606{
607	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
608	struct socket *so = xprt->xp_socket;
609	struct mbuf *m;
610
611	/*
612	 * If cd->resid is non-zero, we have part of the
613	 * record already, otherwise we are expecting a record
614	 * marker.
615	 */
616	if (!cd->resid && cd->mpending) {
617		/*
618		 * See if there is enough data buffered to
619		 * make up a record marker. Make sure we can
620		 * handle the case where the record marker is
621		 * split across more than one mbuf.
622		 */
623		size_t n = 0;
624		uint32_t header;
625
626		m = cd->mpending;
627		while (n < sizeof(uint32_t) && m) {
628			n += m->m_len;
629			m = m->m_next;
630		}
631		if (n < sizeof(uint32_t)) {
632			so->so_rcv.sb_lowat = sizeof(uint32_t) - n;
633			return (FALSE);
634		}
635		m_copydata(cd->mpending, 0, sizeof(header),
636		    (char *)&header);
637		header = ntohl(header);
638		cd->eor = (header & 0x80000000) != 0;
639		cd->resid = header & 0x7fffffff;
640		m_adj(cd->mpending, sizeof(uint32_t));
641	}
642
643	/*
644	 * Start pulling off mbufs from cd->mpending
645	 * until we either have a complete record or
646	 * we run out of data. We use m_split to pull
647	 * data - it will pull as much as possible and
648	 * split the last mbuf if necessary.
649	 */
650	while (cd->mpending && cd->resid) {
651		m = cd->mpending;
652		if (cd->mpending->m_next
653		    || cd->mpending->m_len > cd->resid)
654			cd->mpending = m_split(cd->mpending,
655			    cd->resid, M_WAITOK);
656		else
657			cd->mpending = NULL;
658		if (cd->mreq)
659			m_last(cd->mreq)->m_next = m;
660		else
661			cd->mreq = m;
662		while (m) {
663			cd->resid -= m->m_len;
664			m = m->m_next;
665		}
666	}
667
668	/*
669	 * Block receive upcalls if we have more data pending,
670	 * otherwise report our need.
671	 */
672	if (cd->mpending)
673		so->so_rcv.sb_lowat = INT_MAX;
674	else
675		so->so_rcv.sb_lowat =
676		    imax(1, imin(cd->resid, so->so_rcv.sb_hiwat / 2));
677	return (TRUE);
678}
679
680static bool_t
681svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg,
682    struct sockaddr **addrp, struct mbuf **mp)
683{
684	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
685	struct uio uio;
686	struct mbuf *m, *ctrl;
687	struct socket* so = xprt->xp_socket;
688	XDR xdrs;
689	int error, rcvflag;
690	uint32_t reterr, xid_plus_direction[2];
691	struct cmsghdr *cmsg;
692	struct tls_get_record tgr;
693	enum clnt_stat ret;
694
695	/*
696	 * Serialise access to the socket and our own record parsing
697	 * state.
698	 */
699	sx_xlock(&xprt->xp_lock);
700
701	for (;;) {
702		/* If we have no request ready, check pending queue. */
703		while (cd->mpending &&
704		    (cd->mreq == NULL || cd->resid != 0 || !cd->eor)) {
705			if (!svc_vc_process_pending(xprt))
706				break;
707		}
708
709		/* Process and return complete request in cd->mreq. */
710		if (cd->mreq != NULL && cd->resid == 0 && cd->eor) {
711
712			/*
713			 * Now, check for a backchannel reply.
714			 * The XID is in the first uint32_t of the reply
715			 * and the message direction is the second one.
716			 */
717			if ((cd->mreq->m_len >= sizeof(xid_plus_direction) ||
718			    m_length(cd->mreq, NULL) >=
719			    sizeof(xid_plus_direction)) &&
720			    xprt->xp_p2 != NULL) {
721				m_copydata(cd->mreq, 0,
722				    sizeof(xid_plus_direction),
723				    (char *)xid_plus_direction);
724				xid_plus_direction[0] =
725				    ntohl(xid_plus_direction[0]);
726				xid_plus_direction[1] =
727				    ntohl(xid_plus_direction[1]);
728				/* Check message direction. */
729				if (xid_plus_direction[1] == REPLY) {
730					clnt_bck_svccall(xprt->xp_p2,
731					    cd->mreq,
732					    xid_plus_direction[0]);
733					cd->mreq = NULL;
734					continue;
735				}
736			}
737
738			xdrmbuf_create(&xdrs, cd->mreq, XDR_DECODE);
739			cd->mreq = NULL;
740
741			/* Check for next request in a pending queue. */
742			svc_vc_process_pending(xprt);
743			if (cd->mreq == NULL || cd->resid != 0) {
744				SOCKBUF_LOCK(&so->so_rcv);
745				if (!soreadable(so))
746					xprt_inactive_self(xprt);
747				SOCKBUF_UNLOCK(&so->so_rcv);
748			}
749
750			sx_xunlock(&xprt->xp_lock);
751
752			if (! xdr_callmsg(&xdrs, msg)) {
753				XDR_DESTROY(&xdrs);
754				return (FALSE);
755			}
756
757			*addrp = NULL;
758			*mp = xdrmbuf_getall(&xdrs);
759			XDR_DESTROY(&xdrs);
760
761			return (TRUE);
762		}
763
764		/*
765		 * If receiving is disabled so that a TLS handshake can be
766		 * done by the rpctlssd daemon, return FALSE here.
767		 */
768		rcvflag = MSG_DONTWAIT;
769		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0)
770			rcvflag |= MSG_TLSAPPDATA;
771tryagain:
772		if (xprt->xp_dontrcv) {
773			sx_xunlock(&xprt->xp_lock);
774			return (FALSE);
775		}
776
777		/*
778		 * The socket upcall calls xprt_active() which will eventually
779		 * cause the server to call us here. We attempt to
780		 * read as much as possible from the socket and put
781		 * the result in cd->mpending. If the read fails,
782		 * we have drained both cd->mpending and the socket so
783		 * we can call xprt_inactive().
784		 */
785		uio.uio_resid = 1000000000;
786		uio.uio_td = curthread;
787		ctrl = m = NULL;
788		error = soreceive(so, NULL, &uio, &m, &ctrl, &rcvflag);
789
790		if (error == EWOULDBLOCK) {
791			/*
792			 * We must re-test for readability after
793			 * taking the lock to protect us in the case
794			 * where a new packet arrives on the socket
795			 * after our call to soreceive fails with
796			 * EWOULDBLOCK.
797			 */
798			SOCKBUF_LOCK(&so->so_rcv);
799			if (!soreadable(so))
800				xprt_inactive_self(xprt);
801			SOCKBUF_UNLOCK(&so->so_rcv);
802			sx_xunlock(&xprt->xp_lock);
803			return (FALSE);
804		}
805
806		/*
807		 * A return of ENXIO indicates that there is a
808		 * non-application data record at the head of the
809		 * socket's receive queue, for TLS connections.
810		 * This record needs to be handled in userland
811		 * via an SSL_read() call, so do an upcall to the daemon.
812		 */
813		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0 &&
814		    error == ENXIO) {
815			/* Disable reception. */
816			xprt->xp_dontrcv = TRUE;
817			sx_xunlock(&xprt->xp_lock);
818			ret = rpctls_srv_handlerecord(xprt->xp_sslsec,
819			    xprt->xp_sslusec, xprt->xp_sslrefno,
820			    &reterr);
821			sx_xlock(&xprt->xp_lock);
822			xprt->xp_dontrcv = FALSE;
823			if (ret != RPC_SUCCESS || reterr != RPCTLSERR_OK) {
824				/*
825				 * All we can do is soreceive() it and
826				 * then toss it.
827				 */
828				rcvflag = MSG_DONTWAIT;
829				goto tryagain;
830			}
831			sx_xunlock(&xprt->xp_lock);
832			xprt_active(xprt);   /* Harmless if already active. */
833			return (FALSE);
834		}
835
836		if (error) {
837			SOCKBUF_LOCK(&so->so_rcv);
838			if (xprt->xp_upcallset) {
839				xprt->xp_upcallset = 0;
840				soupcall_clear(so, SO_RCV);
841			}
842			SOCKBUF_UNLOCK(&so->so_rcv);
843			xprt_inactive_self(xprt);
844			cd->strm_stat = XPRT_DIED;
845			sx_xunlock(&xprt->xp_lock);
846			return (FALSE);
847		}
848
849		if (!m) {
850			/*
851			 * EOF - the other end has closed the socket.
852			 */
853			xprt_inactive_self(xprt);
854			cd->strm_stat = XPRT_DIED;
855			sx_xunlock(&xprt->xp_lock);
856			return (FALSE);
857		}
858
859		/* Process any record header(s). */
860		if (ctrl != NULL) {
861			cmsg = mtod(ctrl, struct cmsghdr *);
862			if (cmsg->cmsg_type == TLS_GET_RECORD &&
863			    cmsg->cmsg_len == CMSG_LEN(sizeof(tgr))) {
864				memcpy(&tgr, CMSG_DATA(cmsg), sizeof(tgr));
865				/*
866				 * This should have been handled by
867				 * the rpctls_svc_handlerecord()
868				 * upcall.  If not, all we can do is
869				 * toss it away.
870				 */
871				if (tgr.tls_type != TLS_RLTYPE_APP) {
872					m_freem(m);
873					m_free(ctrl);
874					rcvflag = MSG_DONTWAIT | MSG_TLSAPPDATA;
875					goto tryagain;
876				}
877			}
878			m_free(ctrl);
879		}
880
881		if (cd->mpending)
882			m_last(cd->mpending)->m_next = m;
883		else
884			cd->mpending = m;
885	}
886}
887
888static bool_t
889svc_vc_backchannel_recv(SVCXPRT *xprt, struct rpc_msg *msg,
890    struct sockaddr **addrp, struct mbuf **mp)
891{
892	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
893	struct ct_data *ct;
894	struct mbuf *m;
895	XDR xdrs;
896
897	sx_xlock(&xprt->xp_lock);
898	ct = (struct ct_data *)xprt->xp_p2;
899	if (ct == NULL) {
900		sx_xunlock(&xprt->xp_lock);
901		return (FALSE);
902	}
903	mtx_lock(&ct->ct_lock);
904	m = cd->mreq;
905	if (m == NULL) {
906		xprt_inactive_self(xprt);
907		mtx_unlock(&ct->ct_lock);
908		sx_xunlock(&xprt->xp_lock);
909		return (FALSE);
910	}
911	cd->mreq = m->m_nextpkt;
912	mtx_unlock(&ct->ct_lock);
913	sx_xunlock(&xprt->xp_lock);
914
915	xdrmbuf_create(&xdrs, m, XDR_DECODE);
916	if (! xdr_callmsg(&xdrs, msg)) {
917		XDR_DESTROY(&xdrs);
918		return (FALSE);
919	}
920	*addrp = NULL;
921	*mp = xdrmbuf_getall(&xdrs);
922	XDR_DESTROY(&xdrs);
923	return (TRUE);
924}
925
926static bool_t
927svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg,
928    struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
929{
930	XDR xdrs;
931	struct mbuf *mrep;
932	bool_t stat = TRUE;
933	int error, len, maxextsiz;
934#ifdef KERN_TLS
935	u_int maxlen;
936#endif
937
938	/*
939	 * Leave space for record mark.
940	 */
941	mrep = m_gethdr(M_WAITOK, MT_DATA);
942	mrep->m_data += sizeof(uint32_t);
943
944	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
945
946	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
947	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
948		if (!xdr_replymsg(&xdrs, msg))
949			stat = FALSE;
950		else
951			xdrmbuf_append(&xdrs, m);
952	} else {
953		stat = xdr_replymsg(&xdrs, msg);
954	}
955
956	if (stat) {
957		m_fixhdr(mrep);
958
959		/*
960		 * Prepend a record marker containing the reply length.
961		 */
962		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
963		len = mrep->m_pkthdr.len;
964		*mtod(mrep, uint32_t *) =
965			htonl(0x80000000 | (len - sizeof(uint32_t)));
966
967		/* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */
968		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
969			/*
970			 * Copy the mbuf chain to a chain of
971			 * ext_pgs mbuf(s) as required by KERN_TLS.
972			 */
973			maxextsiz = TLS_MAX_MSG_SIZE_V10_2;
974#ifdef KERN_TLS
975			if (rpctls_getinfo(&maxlen, false, false))
976				maxextsiz = min(maxextsiz, maxlen);
977#endif
978			mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz);
979		}
980		atomic_add_32(&xprt->xp_snd_cnt, len);
981		/*
982		 * sosend consumes mreq.
983		 */
984		error = sosend(xprt->xp_socket, NULL, NULL, mrep, NULL,
985		    0, curthread);
986		if (!error) {
987			atomic_add_rel_32(&xprt->xp_snt_cnt, len);
988			if (seq)
989				*seq = xprt->xp_snd_cnt;
990			stat = TRUE;
991		} else
992			atomic_subtract_32(&xprt->xp_snd_cnt, len);
993	} else {
994		m_freem(mrep);
995	}
996
997	XDR_DESTROY(&xdrs);
998
999	return (stat);
1000}
1001
1002static bool_t
1003svc_vc_backchannel_reply(SVCXPRT *xprt, struct rpc_msg *msg,
1004    struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
1005{
1006	struct ct_data *ct;
1007	XDR xdrs;
1008	struct mbuf *mrep;
1009	bool_t stat = TRUE;
1010	int error, maxextsiz;
1011#ifdef KERN_TLS
1012	u_int maxlen;
1013#endif
1014
1015	/*
1016	 * Leave space for record mark.
1017	 */
1018	mrep = m_gethdr(M_WAITOK, MT_DATA);
1019	mrep->m_data += sizeof(uint32_t);
1020
1021	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
1022
1023	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
1024	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
1025		if (!xdr_replymsg(&xdrs, msg))
1026			stat = FALSE;
1027		else
1028			xdrmbuf_append(&xdrs, m);
1029	} else {
1030		stat = xdr_replymsg(&xdrs, msg);
1031	}
1032
1033	if (stat) {
1034		m_fixhdr(mrep);
1035
1036		/*
1037		 * Prepend a record marker containing the reply length.
1038		 */
1039		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
1040		*mtod(mrep, uint32_t *) =
1041			htonl(0x80000000 | (mrep->m_pkthdr.len
1042				- sizeof(uint32_t)));
1043
1044		/* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */
1045		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
1046			/*
1047			 * Copy the mbuf chain to a chain of
1048			 * ext_pgs mbuf(s) as required by KERN_TLS.
1049			 */
1050			maxextsiz = TLS_MAX_MSG_SIZE_V10_2;
1051#ifdef KERN_TLS
1052			if (rpctls_getinfo(&maxlen, false, false))
1053				maxextsiz = min(maxextsiz, maxlen);
1054#endif
1055			mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz);
1056		}
1057		sx_xlock(&xprt->xp_lock);
1058		ct = (struct ct_data *)xprt->xp_p2;
1059		if (ct != NULL)
1060			error = sosend(ct->ct_socket, NULL, NULL, mrep, NULL,
1061			    0, curthread);
1062		else
1063			error = EPIPE;
1064		sx_xunlock(&xprt->xp_lock);
1065		if (!error) {
1066			stat = TRUE;
1067		}
1068	} else {
1069		m_freem(mrep);
1070	}
1071
1072	XDR_DESTROY(&xdrs);
1073
1074	return (stat);
1075}
1076
1077static bool_t
1078svc_vc_null()
1079{
1080
1081	return (FALSE);
1082}
1083
1084static int
1085svc_vc_soupcall(struct socket *so, void *arg, int waitflag)
1086{
1087	SVCXPRT *xprt = (SVCXPRT *) arg;
1088
1089	if (soreadable(xprt->xp_socket))
1090		xprt_active(xprt);
1091	return (SU_OK);
1092}
1093
1094static int
1095svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag)
1096{
1097	SVCXPRT *xprt = (SVCXPRT *) arg;
1098
1099	if (!TAILQ_EMPTY(&head->sol_comp))
1100		xprt_active(xprt);
1101	return (SU_OK);
1102}
1103
1104#if 0
1105/*
1106 * Get the effective UID of the sending process. Used by rpcbind, keyserv
1107 * and rpc.yppasswdd on AF_LOCAL.
1108 */
1109int
1110__rpc_get_local_uid(SVCXPRT *transp, uid_t *uid) {
1111	int sock, ret;
1112	gid_t egid;
1113	uid_t euid;
1114	struct sockaddr *sa;
1115
1116	sock = transp->xp_fd;
1117	sa = (struct sockaddr *)transp->xp_rtaddr;
1118	if (sa->sa_family == AF_LOCAL) {
1119		ret = getpeereid(sock, &euid, &egid);
1120		if (ret == 0)
1121			*uid = euid;
1122		return (ret);
1123	} else
1124		return (-1);
1125}
1126#endif
1127