icl_proxy.c revision 256281
1/*- 2 * Copyright (c) 2012 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Edward Tomasz Napierala under sponsorship 6 * from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD: stable/10/sys/dev/iscsi/icl_proxy.c 255570 2013-09-14 15:29:06Z trasz $ 30 */ 31/*- 32 * Copyright (c) 1982, 1986, 1989, 1990, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * sendfile(2) and related extensions: 36 * Copyright (c) 1998, David Greenman. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 4. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 63 */ 64 65/* 66 * iSCSI Common Layer, kernel proxy part. 67 */ 68 69#ifdef ICL_KERNEL_PROXY 70 71#include <sys/param.h> 72#include <sys/capability.h> 73#include <sys/condvar.h> 74#include <sys/conf.h> 75#include <sys/kernel.h> 76#include <sys/kthread.h> 77#include <sys/malloc.h> 78#include <sys/proc.h> 79#include <sys/socket.h> 80#include <sys/socketvar.h> 81#include <sys/sx.h> 82#include <sys/systm.h> 83#include <netinet/in.h> 84#include <netinet/tcp.h> 85#include <linux/types.h> 86#include <rdma/rdma_cm.h> 87 88#include "icl.h" 89 90static int debug = 1; 91 92#define ICL_DEBUG(X, ...) \ 93 if (debug > 1) { \ 94 printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ 95 } while (0) 96 97#define ICL_WARN(X, ...) \ 98 if (debug > 0) { \ 99 printf("WARNING: %s: " X "\n", \ 100 __func__, ## __VA_ARGS__); \ 101 } while (0) 102 103static MALLOC_DEFINE(M_ICL_PROXY, "ICL_PROXY", "iSCSI common layer proxy"); 104 105#ifdef ICL_RDMA 106static int icl_conn_connect_rdma(struct icl_conn *ic, int domain, int socktype, 107 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa); 108static int icl_listen_add_rdma(struct icl_listen *il, int domain, int socktype, int protocol, 109 struct sockaddr *sa); 110#endif /* ICL_RDMA */ 111 112static int 113icl_conn_connect_tcp(struct icl_conn *ic, int domain, int socktype, 114 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 115{ 116 struct socket *so; 117 int error; 118 int interrupted = 0; 119 120 error = socreate(domain, &so, socktype, protocol, 121 curthread->td_ucred, curthread); 122 if (error != 0) 123 return (error); 124 125 if (from_sa != NULL) { 126 error = sobind(so, from_sa, curthread); 127 if (error != 0) { 128 soclose(so); 129 return (error); 130 } 131 } 132 133 error = soconnect(so, to_sa, curthread); 134 if (error != 0) { 135 soclose(so); 136 return (error); 137 } 138 139 SOCK_LOCK(so); 140 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 141 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 142 "icl_connect", 0); 143 if (error) { 144 if (error == EINTR || error == ERESTART) 145 interrupted = 1; 146 break; 147 } 148 } 149 if (error == 0) { 150 error = so->so_error; 151 so->so_error = 0; 152 } 153 SOCK_UNLOCK(so); 154 155 if (error != 0) { 156 soclose(so); 157 return (error); 158 } 159 160 error = icl_conn_handoff_sock(ic, so); 161 if (error != 0) 162 soclose(so); 163 164 return (error); 165} 166 167int 168icl_conn_connect(struct icl_conn *ic, bool rdma, int domain, int socktype, 169 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 170{ 171 172 if (rdma) { 173#ifdef ICL_RDMA 174 return (icl_conn_connect_rdma(ic, domain, socktype, protocol, from_sa, to_sa)); 175#else 176 ICL_DEBUG("RDMA not supported"); 177 return (EOPNOTSUPP); 178#endif 179 } 180 181 return (icl_conn_connect_tcp(ic, domain, socktype, protocol, from_sa, to_sa)); 182} 183 184struct icl_listen * 185icl_listen_new(void (*accept_cb)(struct socket *)) 186{ 187 struct icl_listen *il; 188 189 il = malloc(sizeof(*il), M_ICL_PROXY, M_ZERO | M_WAITOK); 190 TAILQ_INIT(&il->il_sockets); 191 sx_init(&il->il_lock, "icl_listen"); 192 il->il_accept = accept_cb; 193 194 return (il); 195} 196 197void 198icl_listen_free(struct icl_listen *il) 199{ 200 struct icl_listen_sock *ils; 201 202 sx_xlock(&il->il_lock); 203 while (!TAILQ_EMPTY(&il->il_sockets)) { 204 ils = TAILQ_FIRST(&il->il_sockets); 205 while (ils->ils_running) { 206 ICL_DEBUG("waiting for accept thread to terminate"); 207 sx_xunlock(&il->il_lock); 208 ils->ils_disconnecting = true; 209 wakeup(&ils->ils_socket->so_timeo); 210 pause("icl_unlisten", 1 * hz); 211 sx_xlock(&il->il_lock); 212 } 213 214 TAILQ_REMOVE(&il->il_sockets, ils, ils_next); 215 soclose(ils->ils_socket); 216 free(ils, M_ICL_PROXY); 217 } 218 sx_xunlock(&il->il_lock); 219 220 free(il, M_ICL_PROXY); 221} 222 223/* 224 * XXX: Doing accept in a separate thread in each socket might not be the best way 225 * to do stuff, but it's pretty clean and debuggable - and you probably won't 226 * have hundreds of listening sockets anyway. 227 */ 228static void 229icl_accept_thread(void *arg) 230{ 231 struct icl_listen_sock *ils; 232 struct socket *head, *so; 233 struct sockaddr *sa; 234 int error; 235 236 ils = arg; 237 head = ils->ils_socket; 238 239 ils->ils_running = true; 240 241 for (;;) { 242 ACCEPT_LOCK(); 243 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0 && ils->ils_disconnecting == false) { 244 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 245 head->so_error = ECONNABORTED; 246 break; 247 } 248 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 249 "accept", 0); 250 if (error) { 251 ACCEPT_UNLOCK(); 252 ICL_WARN("msleep failed with error %d", error); 253 continue; 254 } 255 if (ils->ils_disconnecting) { 256 ACCEPT_UNLOCK(); 257 ICL_DEBUG("terminating"); 258 ils->ils_running = false; 259 kthread_exit(); 260 return; 261 } 262 } 263 if (head->so_error) { 264 error = head->so_error; 265 head->so_error = 0; 266 ACCEPT_UNLOCK(); 267 ICL_WARN("socket error %d", error); 268 continue; 269 } 270 so = TAILQ_FIRST(&head->so_comp); 271 KASSERT(so != NULL, ("NULL so")); 272 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 273 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 274 275 /* 276 * Before changing the flags on the socket, we have to bump the 277 * reference count. Otherwise, if the protocol calls sofree(), 278 * the socket will be released due to a zero refcount. 279 */ 280 SOCK_LOCK(so); /* soref() and so_state update */ 281 soref(so); /* file descriptor reference */ 282 283 TAILQ_REMOVE(&head->so_comp, so, so_list); 284 head->so_qlen--; 285 so->so_state |= (head->so_state & SS_NBIO); 286 so->so_qstate &= ~SQ_COMP; 287 so->so_head = NULL; 288 289 SOCK_UNLOCK(so); 290 ACCEPT_UNLOCK(); 291 292 sa = NULL; 293 error = soaccept(so, &sa); 294 if (error != 0) { 295 ICL_WARN("soaccept error %d", error); 296 if (sa != NULL) 297 free(sa, M_SONAME); 298 soclose(so); 299 } 300 301 (ils->ils_listen->il_accept)(so); 302 } 303} 304 305static int 306icl_listen_add_tcp(struct icl_listen *il, int domain, int socktype, int protocol, 307 struct sockaddr *sa) 308{ 309 struct icl_listen_sock *ils; 310 struct socket *so; 311 struct sockopt sopt; 312 int error, one = 1; 313 314 error = socreate(domain, &so, socktype, protocol, 315 curthread->td_ucred, curthread); 316 if (error != 0) { 317 ICL_WARN("socreate failed with error %d", error); 318 return (error); 319 } 320 321 sopt.sopt_dir = SOPT_SET; 322 sopt.sopt_level = SOL_SOCKET; 323 sopt.sopt_name = SO_REUSEADDR; 324 sopt.sopt_val = &one; 325 sopt.sopt_valsize = sizeof(one); 326 sopt.sopt_td = NULL; 327 error = sosetopt(so, &sopt); 328 if (error != 0) { 329 ICL_WARN("failed to set SO_REUSEADDR with error %d", error); 330 soclose(so); 331 return (error); 332 } 333 334 error = sobind(so, sa, curthread); 335 if (error != 0) { 336 ICL_WARN("sobind failed with error %d", error); 337 soclose(so); 338 return (error); 339 } 340 341 error = solisten(so, -1, curthread); 342 if (error != 0) { 343 ICL_WARN("solisten failed with error %d", error); 344 soclose(so); 345 return (error); 346 } 347 348 ils = malloc(sizeof(*ils), M_ICL_PROXY, M_ZERO | M_WAITOK); 349 ils->ils_listen = il; 350 ils->ils_socket = so; 351 352 error = kthread_add(icl_accept_thread, ils, NULL, NULL, 0, 0, "iclacc"); 353 if (error != 0) { 354 ICL_WARN("kthread_add failed with error %d", error); 355 soclose(so); 356 free(ils, M_ICL_PROXY); 357 358 return (error); 359 } 360 361 sx_xlock(&il->il_lock); 362 TAILQ_INSERT_TAIL(&il->il_sockets, ils, ils_next); 363 sx_xunlock(&il->il_lock); 364 365 return (0); 366} 367 368int 369icl_listen_add(struct icl_listen *il, bool rdma, int domain, int socktype, int protocol, 370 struct sockaddr *sa) 371{ 372 373 if (rdma) { 374#ifndef ICL_RDMA 375 ICL_DEBUG("RDMA not supported"); 376 return (EOPNOTSUPP); 377#else 378 return (icl_listen_add_rdma(il, domain, socktype, protocol, sa)); 379#endif 380 } 381 382 383 return (icl_listen_add_tcp(il, domain, socktype, protocol, sa)); 384} 385 386int 387icl_listen_remove(struct icl_listen *il, struct sockaddr *sa) 388{ 389 390 /* 391 * XXX 392 */ 393 394 return (EOPNOTSUPP); 395} 396 397#endif /* ICL_KERNEL_PROXY */ 398