1255570Strasz/*- 2255570Strasz * Copyright (c) 2012 The FreeBSD Foundation 3255570Strasz * All rights reserved. 4255570Strasz * 5255570Strasz * This software was developed by Edward Tomasz Napierala under sponsorship 6255570Strasz * from the FreeBSD Foundation. 7255570Strasz * 8255570Strasz * Redistribution and use in source and binary forms, with or without 9255570Strasz * modification, are permitted provided that the following conditions 10255570Strasz * are met: 11255570Strasz * 1. Redistributions of source code must retain the above copyright 12255570Strasz * notice, this list of conditions and the following disclaimer. 13255570Strasz * 2. Redistributions in binary form must reproduce the above copyright 14255570Strasz * notice, this list of conditions and the following disclaimer in the 15255570Strasz * documentation and/or other materials provided with the distribution. 16255570Strasz * 17255570Strasz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18255570Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19255570Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20255570Strasz * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21255570Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22255570Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23255570Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24255570Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25255570Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26255570Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27255570Strasz * SUCH DAMAGE. 28255570Strasz * 29255570Strasz * $FreeBSD$ 30255570Strasz */ 31255570Strasz/*- 32255570Strasz * Copyright (c) 1982, 1986, 1989, 1990, 1993 33255570Strasz * The Regents of the University of California. All rights reserved. 34255570Strasz * 35255570Strasz * sendfile(2) and related extensions: 36255570Strasz * Copyright (c) 1998, David Greenman. All rights reserved. 37255570Strasz * 38255570Strasz * Redistribution and use in source and binary forms, with or without 39255570Strasz * modification, are permitted provided that the following conditions 40255570Strasz * are met: 41255570Strasz * 1. Redistributions of source code must retain the above copyright 42255570Strasz * notice, this list of conditions and the following disclaimer. 43255570Strasz * 2. Redistributions in binary form must reproduce the above copyright 44255570Strasz * notice, this list of conditions and the following disclaimer in the 45255570Strasz * documentation and/or other materials provided with the distribution. 46255570Strasz * 4. Neither the name of the University nor the names of its contributors 47255570Strasz * may be used to endorse or promote products derived from this software 48255570Strasz * without specific prior written permission. 49255570Strasz * 50255570Strasz * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51255570Strasz * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52255570Strasz * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53255570Strasz * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54255570Strasz * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55255570Strasz * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56255570Strasz * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57255570Strasz * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58255570Strasz * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59255570Strasz * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60255570Strasz * SUCH DAMAGE. 61255570Strasz * 62255570Strasz * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 63255570Strasz */ 64255570Strasz 65255570Strasz/* 66255570Strasz * iSCSI Common Layer, kernel proxy part. 67255570Strasz */ 68255570Strasz 69255570Strasz#ifdef ICL_KERNEL_PROXY 70255570Strasz 71255570Strasz#include <sys/param.h> 72255570Strasz#include <sys/capability.h> 73255570Strasz#include <sys/condvar.h> 74255570Strasz#include <sys/conf.h> 75255570Strasz#include <sys/kernel.h> 76255570Strasz#include <sys/kthread.h> 77255570Strasz#include <sys/malloc.h> 78255570Strasz#include <sys/proc.h> 79255570Strasz#include <sys/socket.h> 80255570Strasz#include <sys/socketvar.h> 81255570Strasz#include <sys/sx.h> 82255570Strasz#include <sys/systm.h> 83255570Strasz#include <netinet/in.h> 84255570Strasz#include <netinet/tcp.h> 85255570Strasz#include <linux/types.h> 86255570Strasz#include <rdma/rdma_cm.h> 87255570Strasz 88255570Strasz#include "icl.h" 89255570Strasz 90255570Straszstatic int debug = 1; 91255570Strasz 92255570Strasz#define ICL_DEBUG(X, ...) \ 93255570Strasz if (debug > 1) { \ 94255570Strasz printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ 95255570Strasz } while (0) 96255570Strasz 97255570Strasz#define ICL_WARN(X, ...) \ 98255570Strasz if (debug > 0) { \ 99255570Strasz printf("WARNING: %s: " X "\n", \ 100255570Strasz __func__, ## __VA_ARGS__); \ 101255570Strasz } while (0) 102255570Strasz 103255570Straszstatic MALLOC_DEFINE(M_ICL_PROXY, "ICL_PROXY", "iSCSI common layer proxy"); 104255570Strasz 105255570Strasz#ifdef ICL_RDMA 106255570Straszstatic int icl_conn_connect_rdma(struct icl_conn *ic, int domain, int socktype, 107255570Strasz int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa); 108255570Straszstatic int icl_listen_add_rdma(struct icl_listen *il, int domain, int socktype, int protocol, 109255570Strasz struct sockaddr *sa); 110255570Strasz#endif /* ICL_RDMA */ 111255570Strasz 112255570Straszstatic int 113255570Straszicl_conn_connect_tcp(struct icl_conn *ic, int domain, int socktype, 114255570Strasz int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 115255570Strasz{ 116255570Strasz struct socket *so; 117255570Strasz int error; 118255570Strasz int interrupted = 0; 119255570Strasz 120255570Strasz error = socreate(domain, &so, socktype, protocol, 121255570Strasz curthread->td_ucred, curthread); 122255570Strasz if (error != 0) 123255570Strasz return (error); 124255570Strasz 125255570Strasz if (from_sa != NULL) { 126255570Strasz error = sobind(so, from_sa, curthread); 127255570Strasz if (error != 0) { 128255570Strasz soclose(so); 129255570Strasz return (error); 130255570Strasz } 131255570Strasz } 132255570Strasz 133255570Strasz error = soconnect(so, to_sa, curthread); 134255570Strasz if (error != 0) { 135255570Strasz soclose(so); 136255570Strasz return (error); 137255570Strasz } 138255570Strasz 139255570Strasz SOCK_LOCK(so); 140255570Strasz while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 141255570Strasz error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 142255570Strasz "icl_connect", 0); 143255570Strasz if (error) { 144255570Strasz if (error == EINTR || error == ERESTART) 145255570Strasz interrupted = 1; 146255570Strasz break; 147255570Strasz } 148255570Strasz } 149255570Strasz if (error == 0) { 150255570Strasz error = so->so_error; 151255570Strasz so->so_error = 0; 152255570Strasz } 153255570Strasz SOCK_UNLOCK(so); 154255570Strasz 155255570Strasz if (error != 0) { 156255570Strasz soclose(so); 157255570Strasz return (error); 158255570Strasz } 159255570Strasz 160255570Strasz error = icl_conn_handoff_sock(ic, so); 161255570Strasz if (error != 0) 162255570Strasz soclose(so); 163255570Strasz 164255570Strasz return (error); 165255570Strasz} 166255570Strasz 167255570Straszint 168255570Straszicl_conn_connect(struct icl_conn *ic, bool rdma, int domain, int socktype, 169255570Strasz int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 170255570Strasz{ 171255570Strasz 172255570Strasz if (rdma) { 173255570Strasz#ifdef ICL_RDMA 174255570Strasz return (icl_conn_connect_rdma(ic, domain, socktype, protocol, from_sa, to_sa)); 175255570Strasz#else 176255570Strasz ICL_DEBUG("RDMA not supported"); 177255570Strasz return (EOPNOTSUPP); 178255570Strasz#endif 179255570Strasz } 180255570Strasz 181255570Strasz return (icl_conn_connect_tcp(ic, domain, socktype, protocol, from_sa, to_sa)); 182255570Strasz} 183255570Strasz 184255570Straszstruct icl_listen * 185255570Straszicl_listen_new(void (*accept_cb)(struct socket *)) 186255570Strasz{ 187255570Strasz struct icl_listen *il; 188255570Strasz 189255570Strasz il = malloc(sizeof(*il), M_ICL_PROXY, M_ZERO | M_WAITOK); 190255570Strasz TAILQ_INIT(&il->il_sockets); 191255570Strasz sx_init(&il->il_lock, "icl_listen"); 192255570Strasz il->il_accept = accept_cb; 193255570Strasz 194255570Strasz return (il); 195255570Strasz} 196255570Strasz 197255570Straszvoid 198255570Straszicl_listen_free(struct icl_listen *il) 199255570Strasz{ 200255570Strasz struct icl_listen_sock *ils; 201255570Strasz 202255570Strasz sx_xlock(&il->il_lock); 203255570Strasz while (!TAILQ_EMPTY(&il->il_sockets)) { 204255570Strasz ils = TAILQ_FIRST(&il->il_sockets); 205255570Strasz while (ils->ils_running) { 206255570Strasz ICL_DEBUG("waiting for accept thread to terminate"); 207255570Strasz sx_xunlock(&il->il_lock); 208255570Strasz ils->ils_disconnecting = true; 209255570Strasz wakeup(&ils->ils_socket->so_timeo); 210255570Strasz pause("icl_unlisten", 1 * hz); 211255570Strasz sx_xlock(&il->il_lock); 212255570Strasz } 213255570Strasz 214255570Strasz TAILQ_REMOVE(&il->il_sockets, ils, ils_next); 215255570Strasz soclose(ils->ils_socket); 216255570Strasz free(ils, M_ICL_PROXY); 217255570Strasz } 218255570Strasz sx_xunlock(&il->il_lock); 219255570Strasz 220255570Strasz free(il, M_ICL_PROXY); 221255570Strasz} 222255570Strasz 223255570Strasz/* 224255570Strasz * XXX: Doing accept in a separate thread in each socket might not be the best way 225255570Strasz * to do stuff, but it's pretty clean and debuggable - and you probably won't 226255570Strasz * have hundreds of listening sockets anyway. 227255570Strasz */ 228255570Straszstatic void 229255570Straszicl_accept_thread(void *arg) 230255570Strasz{ 231255570Strasz struct icl_listen_sock *ils; 232255570Strasz struct socket *head, *so; 233255570Strasz struct sockaddr *sa; 234255570Strasz int error; 235255570Strasz 236255570Strasz ils = arg; 237255570Strasz head = ils->ils_socket; 238255570Strasz 239255570Strasz ils->ils_running = true; 240255570Strasz 241255570Strasz for (;;) { 242255570Strasz ACCEPT_LOCK(); 243255570Strasz while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0 && ils->ils_disconnecting == false) { 244255570Strasz if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 245255570Strasz head->so_error = ECONNABORTED; 246255570Strasz break; 247255570Strasz } 248255570Strasz error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 249255570Strasz "accept", 0); 250255570Strasz if (error) { 251255570Strasz ACCEPT_UNLOCK(); 252255570Strasz ICL_WARN("msleep failed with error %d", error); 253255570Strasz continue; 254255570Strasz } 255255570Strasz if (ils->ils_disconnecting) { 256255570Strasz ACCEPT_UNLOCK(); 257255570Strasz ICL_DEBUG("terminating"); 258255570Strasz ils->ils_running = false; 259255570Strasz kthread_exit(); 260255570Strasz return; 261255570Strasz } 262255570Strasz } 263255570Strasz if (head->so_error) { 264255570Strasz error = head->so_error; 265255570Strasz head->so_error = 0; 266255570Strasz ACCEPT_UNLOCK(); 267255570Strasz ICL_WARN("socket error %d", error); 268255570Strasz continue; 269255570Strasz } 270255570Strasz so = TAILQ_FIRST(&head->so_comp); 271255570Strasz KASSERT(so != NULL, ("NULL so")); 272255570Strasz KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 273255570Strasz KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 274255570Strasz 275255570Strasz /* 276255570Strasz * Before changing the flags on the socket, we have to bump the 277255570Strasz * reference count. Otherwise, if the protocol calls sofree(), 278255570Strasz * the socket will be released due to a zero refcount. 279255570Strasz */ 280255570Strasz SOCK_LOCK(so); /* soref() and so_state update */ 281255570Strasz soref(so); /* file descriptor reference */ 282255570Strasz 283255570Strasz TAILQ_REMOVE(&head->so_comp, so, so_list); 284255570Strasz head->so_qlen--; 285255570Strasz so->so_state |= (head->so_state & SS_NBIO); 286255570Strasz so->so_qstate &= ~SQ_COMP; 287255570Strasz so->so_head = NULL; 288255570Strasz 289255570Strasz SOCK_UNLOCK(so); 290255570Strasz ACCEPT_UNLOCK(); 291255570Strasz 292255570Strasz sa = NULL; 293255570Strasz error = soaccept(so, &sa); 294255570Strasz if (error != 0) { 295255570Strasz ICL_WARN("soaccept error %d", error); 296255570Strasz if (sa != NULL) 297255570Strasz free(sa, M_SONAME); 298255570Strasz soclose(so); 299255570Strasz } 300255570Strasz 301255570Strasz (ils->ils_listen->il_accept)(so); 302255570Strasz } 303255570Strasz} 304255570Strasz 305255570Straszstatic int 306255570Straszicl_listen_add_tcp(struct icl_listen *il, int domain, int socktype, int protocol, 307255570Strasz struct sockaddr *sa) 308255570Strasz{ 309255570Strasz struct icl_listen_sock *ils; 310255570Strasz struct socket *so; 311255570Strasz struct sockopt sopt; 312255570Strasz int error, one = 1; 313255570Strasz 314255570Strasz error = socreate(domain, &so, socktype, protocol, 315255570Strasz curthread->td_ucred, curthread); 316255570Strasz if (error != 0) { 317255570Strasz ICL_WARN("socreate failed with error %d", error); 318255570Strasz return (error); 319255570Strasz } 320255570Strasz 321255570Strasz sopt.sopt_dir = SOPT_SET; 322255570Strasz sopt.sopt_level = SOL_SOCKET; 323255570Strasz sopt.sopt_name = SO_REUSEADDR; 324255570Strasz sopt.sopt_val = &one; 325255570Strasz sopt.sopt_valsize = sizeof(one); 326255570Strasz sopt.sopt_td = NULL; 327255570Strasz error = sosetopt(so, &sopt); 328255570Strasz if (error != 0) { 329255570Strasz ICL_WARN("failed to set SO_REUSEADDR with error %d", error); 330255570Strasz soclose(so); 331255570Strasz return (error); 332255570Strasz } 333255570Strasz 334255570Strasz error = sobind(so, sa, curthread); 335255570Strasz if (error != 0) { 336255570Strasz ICL_WARN("sobind failed with error %d", error); 337255570Strasz soclose(so); 338255570Strasz return (error); 339255570Strasz } 340255570Strasz 341255570Strasz error = solisten(so, -1, curthread); 342255570Strasz if (error != 0) { 343255570Strasz ICL_WARN("solisten failed with error %d", error); 344255570Strasz soclose(so); 345255570Strasz return (error); 346255570Strasz } 347255570Strasz 348255570Strasz ils = malloc(sizeof(*ils), M_ICL_PROXY, M_ZERO | M_WAITOK); 349255570Strasz ils->ils_listen = il; 350255570Strasz ils->ils_socket = so; 351255570Strasz 352255570Strasz error = kthread_add(icl_accept_thread, ils, NULL, NULL, 0, 0, "iclacc"); 353255570Strasz if (error != 0) { 354255570Strasz ICL_WARN("kthread_add failed with error %d", error); 355255570Strasz soclose(so); 356255570Strasz free(ils, M_ICL_PROXY); 357255570Strasz 358255570Strasz return (error); 359255570Strasz } 360255570Strasz 361255570Strasz sx_xlock(&il->il_lock); 362255570Strasz TAILQ_INSERT_TAIL(&il->il_sockets, ils, ils_next); 363255570Strasz sx_xunlock(&il->il_lock); 364255570Strasz 365255570Strasz return (0); 366255570Strasz} 367255570Strasz 368255570Straszint 369255570Straszicl_listen_add(struct icl_listen *il, bool rdma, int domain, int socktype, int protocol, 370255570Strasz struct sockaddr *sa) 371255570Strasz{ 372255570Strasz 373255570Strasz if (rdma) { 374255570Strasz#ifndef ICL_RDMA 375255570Strasz ICL_DEBUG("RDMA not supported"); 376255570Strasz return (EOPNOTSUPP); 377255570Strasz#else 378255570Strasz return (icl_listen_add_rdma(il, domain, socktype, protocol, sa)); 379255570Strasz#endif 380255570Strasz } 381255570Strasz 382255570Strasz 383255570Strasz return (icl_listen_add_tcp(il, domain, socktype, protocol, sa)); 384255570Strasz} 385255570Strasz 386255570Straszint 387255570Straszicl_listen_remove(struct icl_listen *il, struct sockaddr *sa) 388255570Strasz{ 389255570Strasz 390255570Strasz /* 391255570Strasz * XXX 392255570Strasz */ 393255570Strasz 394255570Strasz return (EOPNOTSUPP); 395255570Strasz} 396255570Strasz 397255570Strasz#endif /* ICL_KERNEL_PROXY */ 398