1255570Strasz/*-
2255570Strasz * Copyright (c) 2012 The FreeBSD Foundation
3255570Strasz * All rights reserved.
4255570Strasz *
5255570Strasz * This software was developed by Edward Tomasz Napierala under sponsorship
6255570Strasz * from the FreeBSD Foundation.
7255570Strasz *
8255570Strasz * Redistribution and use in source and binary forms, with or without
9255570Strasz * modification, are permitted provided that the following conditions
10255570Strasz * are met:
11255570Strasz * 1. Redistributions of source code must retain the above copyright
12255570Strasz *    notice, this list of conditions and the following disclaimer.
13255570Strasz * 2. Redistributions in binary form must reproduce the above copyright
14255570Strasz *    notice, this list of conditions and the following disclaimer in the
15255570Strasz *    documentation and/or other materials provided with the distribution.
16255570Strasz *
17255570Strasz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18255570Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19255570Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20255570Strasz * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21255570Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22255570Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23255570Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24255570Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25255570Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26255570Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27255570Strasz * SUCH DAMAGE.
28255570Strasz *
29255570Strasz * $FreeBSD$
30255570Strasz */
31255570Strasz/*-
32255570Strasz * Copyright (c) 1982, 1986, 1989, 1990, 1993
33255570Strasz *	The Regents of the University of California.  All rights reserved.
34255570Strasz *
35255570Strasz * sendfile(2) and related extensions:
36255570Strasz * Copyright (c) 1998, David Greenman. All rights reserved.
37255570Strasz *
38255570Strasz * Redistribution and use in source and binary forms, with or without
39255570Strasz * modification, are permitted provided that the following conditions
40255570Strasz * are met:
41255570Strasz * 1. Redistributions of source code must retain the above copyright
42255570Strasz *    notice, this list of conditions and the following disclaimer.
43255570Strasz * 2. Redistributions in binary form must reproduce the above copyright
44255570Strasz *    notice, this list of conditions and the following disclaimer in the
45255570Strasz *    documentation and/or other materials provided with the distribution.
46255570Strasz * 4. Neither the name of the University nor the names of its contributors
47255570Strasz *    may be used to endorse or promote products derived from this software
48255570Strasz *    without specific prior written permission.
49255570Strasz *
50255570Strasz * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51255570Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52255570Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53255570Strasz * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54255570Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55255570Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56255570Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57255570Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58255570Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59255570Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60255570Strasz * SUCH DAMAGE.
61255570Strasz *
62255570Strasz *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
63255570Strasz */
64255570Strasz
65255570Strasz/*
66255570Strasz * iSCSI Common Layer, kernel proxy part.
67255570Strasz */
68255570Strasz
69255570Strasz#ifdef ICL_KERNEL_PROXY
70255570Strasz
71255570Strasz#include <sys/param.h>
72255570Strasz#include <sys/capability.h>
73255570Strasz#include <sys/condvar.h>
74255570Strasz#include <sys/conf.h>
75255570Strasz#include <sys/kernel.h>
76255570Strasz#include <sys/kthread.h>
77255570Strasz#include <sys/malloc.h>
78255570Strasz#include <sys/proc.h>
79255570Strasz#include <sys/socket.h>
80255570Strasz#include <sys/socketvar.h>
81255570Strasz#include <sys/sx.h>
82255570Strasz#include <sys/systm.h>
83255570Strasz#include <netinet/in.h>
84255570Strasz#include <netinet/tcp.h>
85255570Strasz#include <linux/types.h>
86255570Strasz#include <rdma/rdma_cm.h>
87255570Strasz
88255570Strasz#include "icl.h"
89255570Strasz
90255570Straszstatic int debug = 1;
91255570Strasz
92255570Strasz#define	ICL_DEBUG(X, ...)					\
93255570Strasz	if (debug > 1) {					\
94255570Strasz		printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
95255570Strasz	} while (0)
96255570Strasz
97255570Strasz#define	ICL_WARN(X, ...)					\
98255570Strasz	if (debug > 0) {					\
99255570Strasz		printf("WARNING: %s: " X "\n",			\
100255570Strasz		    __func__, ## __VA_ARGS__);			\
101255570Strasz	} while (0)
102255570Strasz
103255570Straszstatic MALLOC_DEFINE(M_ICL_PROXY, "ICL_PROXY", "iSCSI common layer proxy");
104255570Strasz
105255570Strasz#ifdef ICL_RDMA
106255570Straszstatic int	icl_conn_connect_rdma(struct icl_conn *ic, int domain, int socktype,
107255570Strasz    int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa);
108255570Straszstatic int	icl_listen_add_rdma(struct icl_listen *il, int domain, int socktype, int protocol,
109255570Strasz    struct sockaddr *sa);
110255570Strasz#endif /* ICL_RDMA */
111255570Strasz
112255570Straszstatic int
113255570Straszicl_conn_connect_tcp(struct icl_conn *ic, int domain, int socktype,
114255570Strasz    int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
115255570Strasz{
116255570Strasz	struct socket *so;
117255570Strasz	int error;
118255570Strasz	int interrupted = 0;
119255570Strasz
120255570Strasz	error = socreate(domain, &so, socktype, protocol,
121255570Strasz	    curthread->td_ucred, curthread);
122255570Strasz	if (error != 0)
123255570Strasz		return (error);
124255570Strasz
125255570Strasz	if (from_sa != NULL) {
126255570Strasz		error = sobind(so, from_sa, curthread);
127255570Strasz		if (error != 0) {
128255570Strasz			soclose(so);
129255570Strasz			return (error);
130255570Strasz		}
131255570Strasz	}
132255570Strasz
133255570Strasz	error = soconnect(so, to_sa, curthread);
134255570Strasz	if (error != 0) {
135255570Strasz		soclose(so);
136255570Strasz		return (error);
137255570Strasz	}
138255570Strasz
139255570Strasz	SOCK_LOCK(so);
140255570Strasz	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
141255570Strasz		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
142255570Strasz		    "icl_connect", 0);
143255570Strasz		if (error) {
144255570Strasz			if (error == EINTR || error == ERESTART)
145255570Strasz				interrupted = 1;
146255570Strasz			break;
147255570Strasz		}
148255570Strasz	}
149255570Strasz	if (error == 0) {
150255570Strasz		error = so->so_error;
151255570Strasz		so->so_error = 0;
152255570Strasz	}
153255570Strasz	SOCK_UNLOCK(so);
154255570Strasz
155255570Strasz	if (error != 0) {
156255570Strasz		soclose(so);
157255570Strasz		return (error);
158255570Strasz	}
159255570Strasz
160255570Strasz	error = icl_conn_handoff_sock(ic, so);
161255570Strasz	if (error != 0)
162255570Strasz		soclose(so);
163255570Strasz
164255570Strasz	return (error);
165255570Strasz}
166255570Strasz
167255570Straszint
168255570Straszicl_conn_connect(struct icl_conn *ic, bool rdma, int domain, int socktype,
169255570Strasz    int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
170255570Strasz{
171255570Strasz
172255570Strasz	if (rdma) {
173255570Strasz#ifdef ICL_RDMA
174255570Strasz		return (icl_conn_connect_rdma(ic, domain, socktype, protocol, from_sa, to_sa));
175255570Strasz#else
176255570Strasz		ICL_DEBUG("RDMA not supported");
177255570Strasz		return (EOPNOTSUPP);
178255570Strasz#endif
179255570Strasz	}
180255570Strasz
181255570Strasz	return (icl_conn_connect_tcp(ic, domain, socktype, protocol, from_sa, to_sa));
182255570Strasz}
183255570Strasz
184255570Straszstruct icl_listen *
185255570Straszicl_listen_new(void (*accept_cb)(struct socket *))
186255570Strasz{
187255570Strasz	struct icl_listen *il;
188255570Strasz
189255570Strasz	il = malloc(sizeof(*il), M_ICL_PROXY, M_ZERO | M_WAITOK);
190255570Strasz	TAILQ_INIT(&il->il_sockets);
191255570Strasz	sx_init(&il->il_lock, "icl_listen");
192255570Strasz	il->il_accept = accept_cb;
193255570Strasz
194255570Strasz	return (il);
195255570Strasz}
196255570Strasz
197255570Straszvoid
198255570Straszicl_listen_free(struct icl_listen *il)
199255570Strasz{
200255570Strasz	struct icl_listen_sock *ils;
201255570Strasz
202255570Strasz	sx_xlock(&il->il_lock);
203255570Strasz	while (!TAILQ_EMPTY(&il->il_sockets)) {
204255570Strasz		ils = TAILQ_FIRST(&il->il_sockets);
205255570Strasz		while (ils->ils_running) {
206255570Strasz			ICL_DEBUG("waiting for accept thread to terminate");
207255570Strasz			sx_xunlock(&il->il_lock);
208255570Strasz			ils->ils_disconnecting = true;
209255570Strasz			wakeup(&ils->ils_socket->so_timeo);
210255570Strasz			pause("icl_unlisten", 1 * hz);
211255570Strasz			sx_xlock(&il->il_lock);
212255570Strasz		}
213255570Strasz
214255570Strasz		TAILQ_REMOVE(&il->il_sockets, ils, ils_next);
215255570Strasz		soclose(ils->ils_socket);
216255570Strasz		free(ils, M_ICL_PROXY);
217255570Strasz	}
218255570Strasz	sx_xunlock(&il->il_lock);
219255570Strasz
220255570Strasz	free(il, M_ICL_PROXY);
221255570Strasz}
222255570Strasz
223255570Strasz/*
224255570Strasz * XXX: Doing accept in a separate thread in each socket might not be the best way
225255570Strasz * 	to do stuff, but it's pretty clean and debuggable - and you probably won't
226255570Strasz * 	have hundreds of listening sockets anyway.
227255570Strasz */
228255570Straszstatic void
229255570Straszicl_accept_thread(void *arg)
230255570Strasz{
231255570Strasz	struct icl_listen_sock *ils;
232255570Strasz	struct socket *head, *so;
233255570Strasz	struct sockaddr *sa;
234255570Strasz	int error;
235255570Strasz
236255570Strasz	ils = arg;
237255570Strasz	head = ils->ils_socket;
238255570Strasz
239255570Strasz	ils->ils_running = true;
240255570Strasz
241255570Strasz	for (;;) {
242255570Strasz		ACCEPT_LOCK();
243255570Strasz		while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0 && ils->ils_disconnecting == false) {
244255570Strasz			if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
245255570Strasz				head->so_error = ECONNABORTED;
246255570Strasz				break;
247255570Strasz			}
248255570Strasz			error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
249255570Strasz			    "accept", 0);
250255570Strasz			if (error) {
251255570Strasz				ACCEPT_UNLOCK();
252255570Strasz				ICL_WARN("msleep failed with error %d", error);
253255570Strasz				continue;
254255570Strasz			}
255255570Strasz			if (ils->ils_disconnecting) {
256255570Strasz				ACCEPT_UNLOCK();
257255570Strasz				ICL_DEBUG("terminating");
258255570Strasz				ils->ils_running = false;
259255570Strasz				kthread_exit();
260255570Strasz				return;
261255570Strasz			}
262255570Strasz		}
263255570Strasz		if (head->so_error) {
264255570Strasz			error = head->so_error;
265255570Strasz			head->so_error = 0;
266255570Strasz			ACCEPT_UNLOCK();
267255570Strasz			ICL_WARN("socket error %d", error);
268255570Strasz			continue;
269255570Strasz		}
270255570Strasz		so = TAILQ_FIRST(&head->so_comp);
271255570Strasz		KASSERT(so != NULL, ("NULL so"));
272255570Strasz		KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
273255570Strasz		KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
274255570Strasz
275255570Strasz		/*
276255570Strasz		 * Before changing the flags on the socket, we have to bump the
277255570Strasz		 * reference count.  Otherwise, if the protocol calls sofree(),
278255570Strasz		 * the socket will be released due to a zero refcount.
279255570Strasz		 */
280255570Strasz		SOCK_LOCK(so);			/* soref() and so_state update */
281255570Strasz		soref(so);			/* file descriptor reference */
282255570Strasz
283255570Strasz		TAILQ_REMOVE(&head->so_comp, so, so_list);
284255570Strasz		head->so_qlen--;
285255570Strasz		so->so_state |= (head->so_state & SS_NBIO);
286255570Strasz		so->so_qstate &= ~SQ_COMP;
287255570Strasz		so->so_head = NULL;
288255570Strasz
289255570Strasz		SOCK_UNLOCK(so);
290255570Strasz		ACCEPT_UNLOCK();
291255570Strasz
292255570Strasz		sa = NULL;
293255570Strasz		error = soaccept(so, &sa);
294255570Strasz		if (error != 0) {
295255570Strasz			ICL_WARN("soaccept error %d", error);
296255570Strasz			if (sa != NULL)
297255570Strasz				free(sa, M_SONAME);
298255570Strasz			soclose(so);
299255570Strasz		}
300255570Strasz
301255570Strasz		(ils->ils_listen->il_accept)(so);
302255570Strasz	}
303255570Strasz}
304255570Strasz
305255570Straszstatic int
306255570Straszicl_listen_add_tcp(struct icl_listen *il, int domain, int socktype, int protocol,
307255570Strasz    struct sockaddr *sa)
308255570Strasz{
309255570Strasz	struct icl_listen_sock *ils;
310255570Strasz	struct socket *so;
311255570Strasz	struct sockopt sopt;
312255570Strasz	int error, one = 1;
313255570Strasz
314255570Strasz	error = socreate(domain, &so, socktype, protocol,
315255570Strasz	    curthread->td_ucred, curthread);
316255570Strasz	if (error != 0) {
317255570Strasz		ICL_WARN("socreate failed with error %d", error);
318255570Strasz		return (error);
319255570Strasz	}
320255570Strasz
321255570Strasz	sopt.sopt_dir = SOPT_SET;
322255570Strasz	sopt.sopt_level = SOL_SOCKET;
323255570Strasz	sopt.sopt_name = SO_REUSEADDR;
324255570Strasz	sopt.sopt_val = &one;
325255570Strasz	sopt.sopt_valsize = sizeof(one);
326255570Strasz	sopt.sopt_td = NULL;
327255570Strasz	error = sosetopt(so, &sopt);
328255570Strasz	if (error != 0) {
329255570Strasz		ICL_WARN("failed to set SO_REUSEADDR with error %d", error);
330255570Strasz		soclose(so);
331255570Strasz		return (error);
332255570Strasz	}
333255570Strasz
334255570Strasz	error = sobind(so, sa, curthread);
335255570Strasz	if (error != 0) {
336255570Strasz		ICL_WARN("sobind failed with error %d", error);
337255570Strasz		soclose(so);
338255570Strasz		return (error);
339255570Strasz	}
340255570Strasz
341255570Strasz	error = solisten(so, -1, curthread);
342255570Strasz	if (error != 0) {
343255570Strasz		ICL_WARN("solisten failed with error %d", error);
344255570Strasz		soclose(so);
345255570Strasz		return (error);
346255570Strasz	}
347255570Strasz
348255570Strasz	ils = malloc(sizeof(*ils), M_ICL_PROXY, M_ZERO | M_WAITOK);
349255570Strasz	ils->ils_listen = il;
350255570Strasz	ils->ils_socket = so;
351255570Strasz
352255570Strasz	error = kthread_add(icl_accept_thread, ils, NULL, NULL, 0, 0, "iclacc");
353255570Strasz	if (error != 0) {
354255570Strasz		ICL_WARN("kthread_add failed with error %d", error);
355255570Strasz		soclose(so);
356255570Strasz		free(ils, M_ICL_PROXY);
357255570Strasz
358255570Strasz		return (error);
359255570Strasz	}
360255570Strasz
361255570Strasz	sx_xlock(&il->il_lock);
362255570Strasz	TAILQ_INSERT_TAIL(&il->il_sockets, ils, ils_next);
363255570Strasz	sx_xunlock(&il->il_lock);
364255570Strasz
365255570Strasz	return (0);
366255570Strasz}
367255570Strasz
368255570Straszint
369255570Straszicl_listen_add(struct icl_listen *il, bool rdma, int domain, int socktype, int protocol,
370255570Strasz    struct sockaddr *sa)
371255570Strasz{
372255570Strasz
373255570Strasz	if (rdma) {
374255570Strasz#ifndef ICL_RDMA
375255570Strasz		ICL_DEBUG("RDMA not supported");
376255570Strasz		return (EOPNOTSUPP);
377255570Strasz#else
378255570Strasz		return (icl_listen_add_rdma(il, domain, socktype, protocol, sa));
379255570Strasz#endif
380255570Strasz	}
381255570Strasz
382255570Strasz
383255570Strasz	return (icl_listen_add_tcp(il, domain, socktype, protocol, sa));
384255570Strasz}
385255570Strasz
386255570Straszint
387255570Straszicl_listen_remove(struct icl_listen *il, struct sockaddr *sa)
388255570Strasz{
389255570Strasz
390255570Strasz	/*
391255570Strasz	 * XXX
392255570Strasz	 */
393255570Strasz
394255570Strasz	return (EOPNOTSUPP);
395255570Strasz}
396255570Strasz
397255570Strasz#endif /* ICL_KERNEL_PROXY */
398