iw_cxgb_cm.c revision 294610
1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c 294610 2016-01-22 23:33:34Z np $"); 31 32#include "opt_inet.h" 33 34#ifdef TCP_OFFLOAD 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/bus.h> 39#include <sys/pciio.h> 40#include <sys/conf.h> 41#include <machine/bus.h> 42#include <machine/resource.h> 43#include <sys/bus_dma.h> 44#include <sys/rman.h> 45#include <sys/ioccom.h> 46#include <sys/mbuf.h> 47#include <sys/rwlock.h> 48#include <sys/linker.h> 49#include <sys/firmware.h> 50#include <sys/socket.h> 51#include <sys/socketvar.h> 52#include <sys/sockio.h> 53#include <sys/smp.h> 54#include <sys/sysctl.h> 55#include <sys/syslog.h> 56#include <sys/queue.h> 57#include <sys/taskqueue.h> 58#include <sys/proc.h> 59#include <sys/uio.h> 60 61#include <net/route.h> 62#include <netinet/in_systm.h> 63#include <netinet/in.h> 64#include <netinet/in_fib.h> 65#include <netinet/in_pcb.h> 66#include <netinet/ip.h> 67#include <netinet/ip_var.h> 68#include <netinet/tcp_var.h> 69#include <netinet/tcp.h> 70#include <netinet/tcpip.h> 71 72#include <rdma/ib_verbs.h> 73#include <linux/idr.h> 74#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h> 75 76#include <cxgb_include.h> 77#include <ulp/tom/cxgb_tom.h> 78#include <ulp/tom/cxgb_toepcb.h> 79#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h> 80#include <rdma/ib_verbs.h> 81#include <linux/idr.h> 82 83#include <ulp/iw_cxgb/iw_cxgb_wr.h> 84#include <ulp/iw_cxgb/iw_cxgb_hal.h> 85#include <ulp/iw_cxgb/iw_cxgb_provider.h> 86#include <ulp/iw_cxgb/iw_cxgb_cm.h> 87#include <ulp/iw_cxgb/iw_cxgb.h> 88 89#ifdef KTR 90static char *states[] = { 91 "idle", 92 "listen", 93 "connecting", 94 "mpa_wait_req", 95 "mpa_req_sent", 96 "mpa_req_rcvd", 97 "mpa_rep_sent", 98 "fpdu_mode", 99 "aborting", 100 "closing", 101 "moribund", 102 "dead", 103 NULL, 104}; 105#endif 106 107SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters"); 108 109static int ep_timeout_secs = 60; 110SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0, 111 "CM Endpoint operation timeout in seconds (default=60)"); 112 113static int mpa_rev = 1; 114SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0, 115 "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)"); 116 117static int markers_enabled = 0; 118SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0, 119 "Enable MPA MARKERS (default(0)=disabled)"); 120 121static int crc_enabled = 1; 122SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0, 123 "Enable MPA CRC (default(1)=enabled)"); 124 125static int rcv_win = 256 * 1024; 126SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0, 127 "TCP receive window in bytes (default=256KB)"); 128 129static int snd_win = 32 * 1024; 130SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0, 131 "TCP send window in bytes (default=32KB)"); 132 133static unsigned int nocong = 0; 134SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RWTUN, &nocong, 0, 135 "Turn off congestion control (default=0)"); 136 137static unsigned int cong_flavor = 1; 138SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RWTUN, &cong_flavor, 0, 139 "TCP Congestion control flavor (default=1)"); 140 141static void ep_timeout(void *arg); 142static void connect_reply_upcall(struct iwch_ep *ep, int status); 143static int iwch_so_upcall(struct socket *so, void *arg, int waitflag); 144 145/* 146 * Cruft to offload socket upcalls onto thread. 147 */ 148static struct mtx req_lock; 149static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list; 150static struct task iw_cxgb_task; 151static struct taskqueue *iw_cxgb_taskq; 152static void process_req(void *ctx, int pending); 153 154static void 155start_ep_timer(struct iwch_ep *ep) 156{ 157 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 158 if (callout_pending(&ep->timer)) { 159 CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep); 160 callout_deactivate(&ep->timer); 161 callout_drain(&ep->timer); 162 } else { 163 /* 164 * XXX this looks racy 165 */ 166 get_ep(&ep->com); 167 callout_init(&ep->timer, 1); 168 } 169 callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep); 170} 171 172static void 173stop_ep_timer(struct iwch_ep *ep) 174{ 175 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 176 if (!callout_pending(&ep->timer)) { 177 CTR3(KTR_IW_CXGB, "%s timer stopped when its not running! ep %p state %u\n", 178 __func__, ep, ep->com.state); 179 return; 180 } 181 callout_drain(&ep->timer); 182 put_ep(&ep->com); 183} 184 185static int 186set_tcpinfo(struct iwch_ep *ep) 187{ 188 struct socket *so = ep->com.so; 189 struct inpcb *inp = sotoinpcb(so); 190 struct tcpcb *tp; 191 struct toepcb *toep; 192 int rc = 0; 193 194 INP_WLOCK(inp); 195 tp = intotcpcb(inp); 196 197 if ((tp->t_flags & TF_TOE) == 0) { 198 rc = EINVAL; 199 printf("%s: connection NOT OFFLOADED!\n", __func__); 200 goto done; 201 } 202 toep = tp->t_toe; 203 204 ep->hwtid = toep->tp_tid; 205 ep->snd_seq = tp->snd_nxt; 206 ep->rcv_seq = tp->rcv_nxt; 207 ep->emss = tp->t_maxseg; 208 if (ep->emss < 128) 209 ep->emss = 128; 210done: 211 INP_WUNLOCK(inp); 212 return (rc); 213 214} 215 216static enum iwch_ep_state 217state_read(struct iwch_ep_common *epc) 218{ 219 enum iwch_ep_state state; 220 221 mtx_lock(&epc->lock); 222 state = epc->state; 223 mtx_unlock(&epc->lock); 224 return state; 225} 226 227static void 228__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 229{ 230 epc->state = new; 231} 232 233static void 234state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 235{ 236 237 mtx_lock(&epc->lock); 238 CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]); 239 __state_set(epc, new); 240 mtx_unlock(&epc->lock); 241 return; 242} 243 244static void * 245alloc_ep(int size, int flags) 246{ 247 struct iwch_ep_common *epc; 248 249 epc = malloc(size, M_DEVBUF, flags); 250 if (epc) { 251 memset(epc, 0, size); 252 refcount_init(&epc->refcount, 1); 253 mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK); 254 cv_init(&epc->waitq, "iwch_epc cv"); 255 } 256 CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc); 257 return epc; 258} 259 260void __free_ep(struct iwch_ep_common *epc) 261{ 262 CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]); 263 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc)); 264 free(epc, M_DEVBUF); 265} 266 267static int 268find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 269 __be16 peer_port, u8 tos, struct nhop4_extended *pnh4) 270{ 271 struct in_addr addr; 272 273 addr.s_addr = peer_ip; 274 return (fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4)); 275} 276 277static void 278close_socket(struct iwch_ep_common *epc, int close) 279{ 280 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 281 SOCK_LOCK(epc->so); 282 soupcall_clear(epc->so, SO_RCV); 283 SOCK_UNLOCK(epc->so); 284 if (close) 285 soclose(epc->so); 286 else 287 soshutdown(epc->so, SHUT_WR|SHUT_RD); 288 epc->so = NULL; 289} 290 291static void 292shutdown_socket(struct iwch_ep_common *epc) 293{ 294 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 295 soshutdown(epc->so, SHUT_WR); 296} 297 298static void 299abort_socket(struct iwch_ep *ep) 300{ 301 struct sockopt sopt; 302 int err; 303 struct linger l; 304 305 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 306 l.l_onoff = 1; 307 l.l_linger = 0; 308 309 /* linger_time of 0 forces RST to be sent */ 310 sopt.sopt_dir = SOPT_SET; 311 sopt.sopt_level = SOL_SOCKET; 312 sopt.sopt_name = SO_LINGER; 313 sopt.sopt_val = (caddr_t)&l; 314 sopt.sopt_valsize = sizeof l; 315 sopt.sopt_td = NULL; 316 err = sosetopt(ep->com.so, &sopt); 317 if (err) 318 printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err); 319} 320 321static void 322send_mpa_req(struct iwch_ep *ep) 323{ 324 int mpalen; 325 struct mpa_message *mpa; 326 struct mbuf *m; 327 int err; 328 329 CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen); 330 331 mpalen = sizeof(*mpa) + ep->plen; 332 m = m_gethdr(mpalen, M_NOWAIT); 333 if (m == NULL) { 334 connect_reply_upcall(ep, -ENOMEM); 335 return; 336 } 337 mpa = mtod(m, struct mpa_message *); 338 m->m_len = mpalen; 339 m->m_pkthdr.len = mpalen; 340 memset(mpa, 0, sizeof(*mpa)); 341 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 342 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 343 (markers_enabled ? MPA_MARKERS : 0); 344 mpa->private_data_size = htons(ep->plen); 345 mpa->revision = mpa_rev; 346 if (ep->plen) 347 memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); 348 349 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 350 if (err) { 351 m_freem(m); 352 connect_reply_upcall(ep, -ENOMEM); 353 return; 354 } 355 356 start_ep_timer(ep); 357 state_set(&ep->com, MPA_REQ_SENT); 358 return; 359} 360 361static int 362send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) 363{ 364 int mpalen; 365 struct mpa_message *mpa; 366 struct mbuf *m; 367 int err; 368 369 CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen); 370 371 mpalen = sizeof(*mpa) + plen; 372 373 m = m_gethdr(mpalen, M_NOWAIT); 374 if (m == NULL) { 375 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 376 return (-ENOMEM); 377 } 378 mpa = mtod(m, struct mpa_message *); 379 m->m_len = mpalen; 380 m->m_pkthdr.len = mpalen; 381 memset(mpa, 0, sizeof(*mpa)); 382 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 383 mpa->flags = MPA_REJECT; 384 mpa->revision = mpa_rev; 385 mpa->private_data_size = htons(plen); 386 if (plen) 387 memcpy(mpa->private_data, pdata, plen); 388 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 389 PANIC_IF(err); 390 return 0; 391} 392 393static int 394send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) 395{ 396 int mpalen; 397 struct mpa_message *mpa; 398 struct mbuf *m; 399 400 CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen); 401 402 mpalen = sizeof(*mpa) + plen; 403 404 m = m_gethdr(mpalen, M_NOWAIT); 405 if (m == NULL) { 406 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 407 return (-ENOMEM); 408 } 409 mpa = mtod(m, struct mpa_message *); 410 m->m_len = mpalen; 411 m->m_pkthdr.len = mpalen; 412 memset(mpa, 0, sizeof(*mpa)); 413 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 414 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 415 (markers_enabled ? MPA_MARKERS : 0); 416 mpa->revision = mpa_rev; 417 mpa->private_data_size = htons(plen); 418 if (plen) 419 memcpy(mpa->private_data, pdata, plen); 420 421 state_set(&ep->com, MPA_REP_SENT); 422 return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 423 ep->com.thread); 424} 425 426static void 427close_complete_upcall(struct iwch_ep *ep) 428{ 429 struct iw_cm_event event; 430 431 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 432 memset(&event, 0, sizeof(event)); 433 event.event = IW_CM_EVENT_CLOSE; 434 if (ep->com.cm_id) { 435 CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d", 436 ep, ep->com.cm_id, ep->hwtid); 437 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 438 ep->com.cm_id->rem_ref(ep->com.cm_id); 439 ep->com.cm_id = NULL; 440 ep->com.qp = NULL; 441 } 442} 443 444static void 445abort_connection(struct iwch_ep *ep) 446{ 447 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 448 state_set(&ep->com, ABORTING); 449 abort_socket(ep); 450 close_socket(&ep->com, 0); 451 close_complete_upcall(ep); 452 state_set(&ep->com, DEAD); 453 put_ep(&ep->com); 454} 455 456static void 457peer_close_upcall(struct iwch_ep *ep) 458{ 459 struct iw_cm_event event; 460 461 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 462 memset(&event, 0, sizeof(event)); 463 event.event = IW_CM_EVENT_DISCONNECT; 464 if (ep->com.cm_id) { 465 CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d", 466 ep, ep->com.cm_id, ep->hwtid); 467 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 468 } 469} 470 471static void 472peer_abort_upcall(struct iwch_ep *ep) 473{ 474 struct iw_cm_event event; 475 476 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 477 memset(&event, 0, sizeof(event)); 478 event.event = IW_CM_EVENT_CLOSE; 479 event.status = ECONNRESET; 480 if (ep->com.cm_id) { 481 CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep, 482 ep->com.cm_id, ep->hwtid); 483 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 484 ep->com.cm_id->rem_ref(ep->com.cm_id); 485 ep->com.cm_id = NULL; 486 ep->com.qp = NULL; 487 } 488} 489 490static void 491connect_reply_upcall(struct iwch_ep *ep, int status) 492{ 493 struct iw_cm_event event; 494 495 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status); 496 memset(&event, 0, sizeof(event)); 497 event.event = IW_CM_EVENT_CONNECT_REPLY; 498 event.status = status; 499 event.local_addr = ep->com.local_addr; 500 event.remote_addr = ep->com.remote_addr; 501 502 if ((status == 0) || (status == ECONNREFUSED)) { 503 event.private_data_len = ep->plen; 504 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 505 } 506 if (ep->com.cm_id) { 507 CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep, 508 ep->hwtid, status); 509 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 510 } 511 if (status < 0) { 512 ep->com.cm_id->rem_ref(ep->com.cm_id); 513 ep->com.cm_id = NULL; 514 ep->com.qp = NULL; 515 } 516} 517 518static void 519connect_request_upcall(struct iwch_ep *ep) 520{ 521 struct iw_cm_event event; 522 523 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 524 memset(&event, 0, sizeof(event)); 525 event.event = IW_CM_EVENT_CONNECT_REQUEST; 526 event.local_addr = ep->com.local_addr; 527 event.remote_addr = ep->com.remote_addr; 528 event.private_data_len = ep->plen; 529 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 530 event.provider_data = ep; 531 event.so = ep->com.so; 532 if (state_read(&ep->parent_ep->com) != DEAD) { 533 get_ep(&ep->com); 534 ep->parent_ep->com.cm_id->event_handler( 535 ep->parent_ep->com.cm_id, 536 &event); 537 } 538 put_ep(&ep->parent_ep->com); 539} 540 541static void 542established_upcall(struct iwch_ep *ep) 543{ 544 struct iw_cm_event event; 545 546 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 547 memset(&event, 0, sizeof(event)); 548 event.event = IW_CM_EVENT_ESTABLISHED; 549 if (ep->com.cm_id) { 550 CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid); 551 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 552 } 553} 554 555static void 556process_mpa_reply(struct iwch_ep *ep) 557{ 558 struct mpa_message *mpa; 559 u16 plen; 560 struct iwch_qp_attributes attrs; 561 enum iwch_qp_attr_mask mask; 562 int err; 563 struct mbuf *top, *m; 564 int flags = MSG_DONTWAIT; 565 struct uio uio; 566 int len; 567 568 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 569 570 /* 571 * Stop mpa timer. If it expired, then the state has 572 * changed and we bail since ep_timeout already aborted 573 * the connection. 574 */ 575 stop_ep_timer(ep); 576 if (state_read(&ep->com) != MPA_REQ_SENT) 577 return; 578 579 uio.uio_resid = len = 1000000; 580 uio.uio_td = ep->com.thread; 581 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 582 if (err) { 583 if (err == EWOULDBLOCK) { 584 start_ep_timer(ep); 585 return; 586 } 587 err = -err; 588 goto err; 589 } 590 591 if (ep->com.so->so_rcv.sb_mb) { 592 printf("%s data after soreceive called! so %p sb_mb %p top %p\n", 593 __FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); 594 } 595 596 m = top; 597 do { 598 /* 599 * If we get more than the supported amount of private data 600 * then we must fail this connection. 601 */ 602 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 603 err = (-EINVAL); 604 goto err; 605 } 606 607 /* 608 * copy the new data into our accumulation buffer. 609 */ 610 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 611 ep->mpa_pkt_len += m->m_len; 612 if (!m->m_next) 613 m = m->m_nextpkt; 614 else 615 m = m->m_next; 616 } while (m); 617 618 m_freem(top); 619 620 /* 621 * if we don't even have the mpa message, then bail. 622 */ 623 if (ep->mpa_pkt_len < sizeof(*mpa)) 624 return; 625 mpa = (struct mpa_message *)ep->mpa_pkt; 626 627 /* Validate MPA header. */ 628 if (mpa->revision != mpa_rev) { 629 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 630 err = EPROTO; 631 goto err; 632 } 633 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 634 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 635 err = EPROTO; 636 goto err; 637 } 638 639 plen = ntohs(mpa->private_data_size); 640 641 /* 642 * Fail if there's too much private data. 643 */ 644 if (plen > MPA_MAX_PRIVATE_DATA) { 645 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 646 err = EPROTO; 647 goto err; 648 } 649 650 /* 651 * If plen does not account for pkt size 652 */ 653 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 654 CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len); 655 err = EPROTO; 656 goto err; 657 } 658 659 ep->plen = (u8) plen; 660 661 /* 662 * If we don't have all the pdata yet, then bail. 663 * We'll continue process when more data arrives. 664 */ 665 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 666 return; 667 668 if (mpa->flags & MPA_REJECT) { 669 err = ECONNREFUSED; 670 goto err; 671 } 672 673 /* 674 * If we get here we have accumulated the entire mpa 675 * start reply message including private data. And 676 * the MPA header is valid. 677 */ 678 CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__); 679 state_set(&ep->com, FPDU_MODE); 680 ep->mpa_attr.initiator = 1; 681 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 682 ep->mpa_attr.recv_marker_enabled = markers_enabled; 683 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 684 ep->mpa_attr.version = mpa_rev; 685 if (set_tcpinfo(ep)) { 686 printf("%s set_tcpinfo error\n", __FUNCTION__); 687 goto err; 688 } 689 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 690 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 691 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 692 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 693 694 attrs.mpa_attr = ep->mpa_attr; 695 attrs.max_ird = ep->ird; 696 attrs.max_ord = ep->ord; 697 attrs.llp_stream_handle = ep; 698 attrs.next_state = IWCH_QP_STATE_RTS; 699 700 mask = IWCH_QP_ATTR_NEXT_STATE | 701 IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | 702 IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; 703 704 /* bind QP and TID with INIT_WR */ 705 err = iwch_modify_qp(ep->com.qp->rhp, 706 ep->com.qp, mask, &attrs, 1); 707 if (!err) 708 goto out; 709err: 710 abort_connection(ep); 711out: 712 connect_reply_upcall(ep, err); 713 return; 714} 715 716static void 717process_mpa_request(struct iwch_ep *ep) 718{ 719 struct mpa_message *mpa; 720 u16 plen; 721 int flags = MSG_DONTWAIT; 722 struct mbuf *top, *m; 723 int err; 724 struct uio uio; 725 int len; 726 727 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 728 729 /* 730 * Stop mpa timer. If it expired, then the state has 731 * changed and we bail since ep_timeout already aborted 732 * the connection. 733 */ 734 stop_ep_timer(ep); 735 if (state_read(&ep->com) != MPA_REQ_WAIT) 736 return; 737 738 uio.uio_resid = len = 1000000; 739 uio.uio_td = ep->com.thread; 740 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 741 if (err) { 742 if (err == EWOULDBLOCK) { 743 start_ep_timer(ep); 744 return; 745 } 746 err = -err; 747 goto err; 748 } 749 750 m = top; 751 do { 752 753 /* 754 * If we get more than the supported amount of private data 755 * then we must fail this connection. 756 */ 757 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 758 CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__, 759 ep->mpa_pkt_len + m->m_len); 760 goto err; 761 } 762 763 764 /* 765 * Copy the new data into our accumulation buffer. 766 */ 767 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 768 ep->mpa_pkt_len += m->m_len; 769 770 if (!m->m_next) 771 m = m->m_nextpkt; 772 else 773 m = m->m_next; 774 } while (m); 775 776 m_freem(top); 777 778 /* 779 * If we don't even have the mpa message, then bail. 780 * We'll continue process when more data arrives. 781 */ 782 if (ep->mpa_pkt_len < sizeof(*mpa)) { 783 start_ep_timer(ep); 784 CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__, 785 ep->mpa_pkt_len); 786 return; 787 } 788 mpa = (struct mpa_message *) ep->mpa_pkt; 789 790 /* 791 * Validate MPA Header. 792 */ 793 if (mpa->revision != mpa_rev) { 794 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 795 goto err; 796 } 797 798 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { 799 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 800 goto err; 801 } 802 803 plen = ntohs(mpa->private_data_size); 804 805 /* 806 * Fail if there's too much private data. 807 */ 808 if (plen > MPA_MAX_PRIVATE_DATA) { 809 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 810 goto err; 811 } 812 813 /* 814 * If plen does not account for pkt size 815 */ 816 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 817 CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__, 818 ep->mpa_pkt_len); 819 goto err; 820 } 821 ep->plen = (u8) plen; 822 823 /* 824 * If we don't have all the pdata yet, then bail. 825 */ 826 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { 827 start_ep_timer(ep); 828 CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__, 829 ep->mpa_pkt_len); 830 return; 831 } 832 833 /* 834 * If we get here we have accumulated the entire mpa 835 * start reply message including private data. 836 */ 837 ep->mpa_attr.initiator = 0; 838 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 839 ep->mpa_attr.recv_marker_enabled = markers_enabled; 840 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 841 ep->mpa_attr.version = mpa_rev; 842 if (set_tcpinfo(ep)) { 843 printf("%s set_tcpinfo error\n", __FUNCTION__); 844 goto err; 845 } 846 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 847 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 848 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 849 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 850 851 state_set(&ep->com, MPA_REQ_RCVD); 852 853 /* drive upcall */ 854 connect_request_upcall(ep); 855 return; 856err: 857 abort_connection(ep); 858 return; 859} 860 861static void 862process_peer_close(struct iwch_ep *ep) 863{ 864 struct iwch_qp_attributes attrs; 865 int disconnect = 1; 866 int release = 0; 867 868 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 869 870 mtx_lock(&ep->com.lock); 871 switch (ep->com.state) { 872 case MPA_REQ_WAIT: 873 __state_set(&ep->com, CLOSING); 874 break; 875 case MPA_REQ_SENT: 876 __state_set(&ep->com, CLOSING); 877 connect_reply_upcall(ep, -ECONNRESET); 878 break; 879 case MPA_REQ_RCVD: 880 881 /* 882 * We're gonna mark this puppy DEAD, but keep 883 * the reference on it until the ULP accepts or 884 * rejects the CR. 885 */ 886 __state_set(&ep->com, CLOSING); 887 break; 888 case MPA_REP_SENT: 889 __state_set(&ep->com, CLOSING); 890 break; 891 case FPDU_MODE: 892 start_ep_timer(ep); 893 __state_set(&ep->com, CLOSING); 894 attrs.next_state = IWCH_QP_STATE_CLOSING; 895 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 896 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 897 peer_close_upcall(ep); 898 break; 899 case ABORTING: 900 disconnect = 0; 901 break; 902 case CLOSING: 903 __state_set(&ep->com, MORIBUND); 904 disconnect = 0; 905 break; 906 case MORIBUND: 907 stop_ep_timer(ep); 908 if (ep->com.cm_id && ep->com.qp) { 909 attrs.next_state = IWCH_QP_STATE_IDLE; 910 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 911 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 912 } 913 close_socket(&ep->com, 0); 914 close_complete_upcall(ep); 915 __state_set(&ep->com, DEAD); 916 release = 1; 917 disconnect = 0; 918 break; 919 case DEAD: 920 disconnect = 0; 921 break; 922 default: 923 PANIC_IF(1); 924 } 925 mtx_unlock(&ep->com.lock); 926 if (disconnect) 927 iwch_ep_disconnect(ep, 0, M_NOWAIT); 928 if (release) 929 put_ep(&ep->com); 930 return; 931} 932 933static void 934process_conn_error(struct iwch_ep *ep) 935{ 936 struct iwch_qp_attributes attrs; 937 int ret; 938 939 mtx_lock(&ep->com.lock); 940 CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state); 941 switch (ep->com.state) { 942 case MPA_REQ_WAIT: 943 stop_ep_timer(ep); 944 break; 945 case MPA_REQ_SENT: 946 stop_ep_timer(ep); 947 connect_reply_upcall(ep, -ECONNRESET); 948 break; 949 case MPA_REP_SENT: 950 ep->com.rpl_err = ECONNRESET; 951 CTR1(KTR_IW_CXGB, "waking up ep %p", ep); 952 break; 953 case MPA_REQ_RCVD: 954 955 /* 956 * We're gonna mark this puppy DEAD, but keep 957 * the reference on it until the ULP accepts or 958 * rejects the CR. 959 */ 960 break; 961 case MORIBUND: 962 case CLOSING: 963 stop_ep_timer(ep); 964 /*FALLTHROUGH*/ 965 case FPDU_MODE: 966 if (ep->com.cm_id && ep->com.qp) { 967 attrs.next_state = IWCH_QP_STATE_ERROR; 968 ret = iwch_modify_qp(ep->com.qp->rhp, 969 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 970 &attrs, 1); 971 if (ret) 972 log(LOG_ERR, 973 "%s - qp <- error failed!\n", 974 __FUNCTION__); 975 } 976 peer_abort_upcall(ep); 977 break; 978 case ABORTING: 979 break; 980 case DEAD: 981 mtx_unlock(&ep->com.lock); 982 CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__, 983 ep->com.so->so_error); 984 return; 985 default: 986 PANIC_IF(1); 987 break; 988 } 989 990 if (ep->com.state != ABORTING) { 991 close_socket(&ep->com, 0); 992 __state_set(&ep->com, DEAD); 993 put_ep(&ep->com); 994 } 995 mtx_unlock(&ep->com.lock); 996 return; 997} 998 999static void 1000process_close_complete(struct iwch_ep *ep) 1001{ 1002 struct iwch_qp_attributes attrs; 1003 int release = 0; 1004 1005 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1006 PANIC_IF(!ep); 1007 1008 /* The cm_id may be null if we failed to connect */ 1009 mtx_lock(&ep->com.lock); 1010 switch (ep->com.state) { 1011 case CLOSING: 1012 __state_set(&ep->com, MORIBUND); 1013 break; 1014 case MORIBUND: 1015 stop_ep_timer(ep); 1016 if ((ep->com.cm_id) && (ep->com.qp)) { 1017 attrs.next_state = IWCH_QP_STATE_IDLE; 1018 iwch_modify_qp(ep->com.qp->rhp, 1019 ep->com.qp, 1020 IWCH_QP_ATTR_NEXT_STATE, 1021 &attrs, 1); 1022 } 1023 if (ep->parent_ep) 1024 close_socket(&ep->com, 1); 1025 else 1026 close_socket(&ep->com, 0); 1027 close_complete_upcall(ep); 1028 __state_set(&ep->com, DEAD); 1029 release = 1; 1030 break; 1031 case ABORTING: 1032 break; 1033 case DEAD: 1034 default: 1035 PANIC_IF(1); 1036 break; 1037 } 1038 mtx_unlock(&ep->com.lock); 1039 if (release) 1040 put_ep(&ep->com); 1041 return; 1042} 1043 1044/* 1045 * T3A does 3 things when a TERM is received: 1046 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet 1047 * 2) generate an async event on the QP with the TERMINATE opcode 1048 * 3) post a TERMINATE opcde cqe into the associated CQ. 1049 * 1050 * For (1), we save the message in the qp for later consumer consumption. 1051 * For (2), we move the QP into TERMINATE, post a QP event and disconnect. 1052 * For (3), we toss the CQE in cxio_poll_cq(). 1053 * 1054 * terminate() handles case (1)... 1055 */ 1056static int 1057terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1058{ 1059 struct adapter *sc = qs->adap; 1060 struct tom_data *td = sc->tom_softc; 1061 uint32_t hash = *((uint32_t *)r + 1); 1062 unsigned int tid = ntohl(hash) >> 8 & 0xfffff; 1063 struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1064 struct socket *so = toep->tp_inp->inp_socket; 1065 struct iwch_ep *ep = so->so_rcv.sb_upcallarg; 1066 1067 if (state_read(&ep->com) != FPDU_MODE) 1068 goto done; 1069 1070 m_adj(m, sizeof(struct cpl_rdma_terminate)); 1071 1072 CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes", 1073 __func__, tid, ep, m->m_len); 1074 1075 m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer); 1076 ep->com.qp->attr.terminate_msg_len = m->m_len; 1077 ep->com.qp->attr.is_terminate_local = 0; 1078 1079done: 1080 m_freem(m); 1081 return (0); 1082} 1083 1084static int 1085ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1086{ 1087 struct adapter *sc = qs->adap; 1088 struct tom_data *td = sc->tom_softc; 1089 struct cpl_rdma_ec_status *rep = mtod(m, void *); 1090 unsigned int tid = GET_TID(rep); 1091 struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1092 struct socket *so = toep->tp_inp->inp_socket; 1093 struct iwch_ep *ep = so->so_rcv.sb_upcallarg; 1094 1095 if (rep->status) { 1096 struct iwch_qp_attributes attrs; 1097 1098 CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__); 1099 stop_ep_timer(ep); 1100 attrs.next_state = IWCH_QP_STATE_ERROR; 1101 iwch_modify_qp(ep->com.qp->rhp, 1102 ep->com.qp, 1103 IWCH_QP_ATTR_NEXT_STATE, 1104 &attrs, 1); 1105 abort_connection(ep); 1106 } 1107 1108 m_freem(m); 1109 return (0); 1110} 1111 1112static void 1113ep_timeout(void *arg) 1114{ 1115 struct iwch_ep *ep = (struct iwch_ep *)arg; 1116 struct iwch_qp_attributes attrs; 1117 int err = 0; 1118 int abort = 1; 1119 1120 mtx_lock(&ep->com.lock); 1121 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1122 switch (ep->com.state) { 1123 case MPA_REQ_SENT: 1124 __state_set(&ep->com, ABORTING); 1125 connect_reply_upcall(ep, -ETIMEDOUT); 1126 break; 1127 case MPA_REQ_WAIT: 1128 __state_set(&ep->com, ABORTING); 1129 break; 1130 case CLOSING: 1131 case MORIBUND: 1132 if (ep->com.cm_id && ep->com.qp) 1133 err = 1; 1134 __state_set(&ep->com, ABORTING); 1135 break; 1136 default: 1137 CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n", 1138 __func__, ep, ep->com.state); 1139 abort = 0; 1140 } 1141 mtx_unlock(&ep->com.lock); 1142 if (err){ 1143 attrs.next_state = IWCH_QP_STATE_ERROR; 1144 iwch_modify_qp(ep->com.qp->rhp, 1145 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1146 &attrs, 1); 1147 } 1148 if (abort) 1149 abort_connection(ep); 1150 put_ep(&ep->com); 1151} 1152 1153int 1154iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 1155{ 1156 int err; 1157 struct iwch_ep *ep = to_ep(cm_id); 1158 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1159 1160 if (state_read(&ep->com) == DEAD) { 1161 put_ep(&ep->com); 1162 return (-ECONNRESET); 1163 } 1164 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1165 if (mpa_rev == 0) { 1166 abort_connection(ep); 1167 } else { 1168 err = send_mpa_reject(ep, pdata, pdata_len); 1169 err = soshutdown(ep->com.so, 3); 1170 } 1171 put_ep(&ep->com); 1172 return 0; 1173} 1174 1175int 1176iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1177{ 1178 int err; 1179 struct iwch_qp_attributes attrs; 1180 enum iwch_qp_attr_mask mask; 1181 struct iwch_ep *ep = to_ep(cm_id); 1182 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1183 struct iwch_qp *qp = get_qhp(h, conn_param->qpn); 1184 1185 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1186 if (state_read(&ep->com) == DEAD) { 1187 err = -ECONNRESET; 1188 goto err; 1189 } 1190 1191 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1192 PANIC_IF(!qp); 1193 1194 if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || 1195 (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { 1196 abort_connection(ep); 1197 err = -EINVAL; 1198 goto err; 1199 } 1200 1201 cm_id->add_ref(cm_id); 1202 ep->com.cm_id = cm_id; 1203 ep->com.qp = qp; 1204 1205 ep->com.rpl_err = 0; 1206 ep->com.rpl_done = 0; 1207 ep->ird = conn_param->ird; 1208 ep->ord = conn_param->ord; 1209 CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord); 1210 1211 /* bind QP to EP and move to RTS */ 1212 attrs.mpa_attr = ep->mpa_attr; 1213 attrs.max_ird = ep->ird; 1214 attrs.max_ord = ep->ord; 1215 attrs.llp_stream_handle = ep; 1216 attrs.next_state = IWCH_QP_STATE_RTS; 1217 1218 /* bind QP and TID with INIT_WR */ 1219 mask = IWCH_QP_ATTR_NEXT_STATE | 1220 IWCH_QP_ATTR_LLP_STREAM_HANDLE | 1221 IWCH_QP_ATTR_MPA_ATTR | 1222 IWCH_QP_ATTR_MAX_IRD | 1223 IWCH_QP_ATTR_MAX_ORD; 1224 1225 err = iwch_modify_qp(ep->com.qp->rhp, 1226 ep->com.qp, mask, &attrs, 1); 1227 1228 if (err) 1229 goto err1; 1230 1231 err = send_mpa_reply(ep, conn_param->private_data, 1232 conn_param->private_data_len); 1233 if (err) 1234 goto err1; 1235 state_set(&ep->com, FPDU_MODE); 1236 established_upcall(ep); 1237 put_ep(&ep->com); 1238 return 0; 1239err1: 1240 ep->com.cm_id = NULL; 1241 ep->com.qp = NULL; 1242 cm_id->rem_ref(cm_id); 1243err: 1244 put_ep(&ep->com); 1245 return err; 1246} 1247 1248static int init_sock(struct iwch_ep_common *epc) 1249{ 1250 int err; 1251 struct sockopt sopt; 1252 int on=1; 1253 1254 SOCK_LOCK(epc->so); 1255 soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc); 1256 epc->so->so_state |= SS_NBIO; 1257 SOCK_UNLOCK(epc->so); 1258 sopt.sopt_dir = SOPT_SET; 1259 sopt.sopt_level = IPPROTO_TCP; 1260 sopt.sopt_name = TCP_NODELAY; 1261 sopt.sopt_val = (caddr_t)&on; 1262 sopt.sopt_valsize = sizeof on; 1263 sopt.sopt_td = NULL; 1264 err = sosetopt(epc->so, &sopt); 1265 if (err) 1266 printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err); 1267 1268 return 0; 1269} 1270 1271static int 1272is_loopback_dst(struct iw_cm_id *cm_id) 1273{ 1274 uint16_t port = cm_id->remote_addr.sin_port; 1275 int ifa_present; 1276 1277 cm_id->remote_addr.sin_port = 0; 1278 ifa_present = ifa_ifwithaddr_check( 1279 (struct sockaddr *)&cm_id->remote_addr); 1280 cm_id->remote_addr.sin_port = port; 1281 return (ifa_present); 1282} 1283 1284int 1285iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1286{ 1287 int err = 0; 1288 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1289 struct iwch_ep *ep; 1290 struct nhop4_extended nh4; 1291 struct toedev *tdev; 1292 1293 if (is_loopback_dst(cm_id)) { 1294 err = -ENOSYS; 1295 goto out; 1296 } 1297 1298 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1299 if (!ep) { 1300 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1301 err = (-ENOMEM); 1302 goto out; 1303 } 1304 callout_init(&ep->timer, 1); 1305 ep->plen = conn_param->private_data_len; 1306 if (ep->plen) 1307 memcpy(ep->mpa_pkt + sizeof(struct mpa_message), 1308 conn_param->private_data, ep->plen); 1309 ep->ird = conn_param->ird; 1310 ep->ord = conn_param->ord; 1311 1312 cm_id->add_ref(cm_id); 1313 ep->com.cm_id = cm_id; 1314 ep->com.qp = get_qhp(h, conn_param->qpn); 1315 ep->com.thread = curthread; 1316 PANIC_IF(!ep->com.qp); 1317 CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn, 1318 ep->com.qp, cm_id); 1319 1320 ep->com.so = cm_id->so; 1321 err = init_sock(&ep->com); 1322 if (err) 1323 goto fail2; 1324 1325 /* find a route */ 1326 err = find_route(cm_id->local_addr.sin_addr.s_addr, 1327 cm_id->remote_addr.sin_addr.s_addr, 1328 cm_id->local_addr.sin_port, 1329 cm_id->remote_addr.sin_port, IPTOS_LOWDELAY, &nh4); 1330 if (err) { 1331 printf("%s - cannot find route.\n", __FUNCTION__); 1332 err = EHOSTUNREACH; 1333 goto fail2; 1334 } 1335 1336 if (!(nh4.nh_ifp->if_flags & IFCAP_TOE)) { 1337 printf("%s - interface not TOE capable.\n", __FUNCTION__); 1338 fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); 1339 goto fail2; 1340 } 1341 tdev = TOEDEV(nh4.nh_ifp); 1342 if (tdev == NULL) { 1343 printf("%s - No toedev for interface.\n", __FUNCTION__); 1344 fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); 1345 goto fail2; 1346 } 1347 fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); 1348 1349 state_set(&ep->com, CONNECTING); 1350 ep->com.local_addr = cm_id->local_addr; 1351 ep->com.remote_addr = cm_id->remote_addr; 1352 err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, 1353 ep->com.thread); 1354 if (!err) 1355 goto out; 1356fail2: 1357 put_ep(&ep->com); 1358out: 1359 return err; 1360} 1361 1362int 1363iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog) 1364{ 1365 int err = 0; 1366 struct iwch_listen_ep *ep; 1367 1368 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1369 if (!ep) { 1370 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1371 err = ENOMEM; 1372 goto out; 1373 } 1374 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1375 cm_id->add_ref(cm_id); 1376 ep->com.cm_id = cm_id; 1377 ep->backlog = backlog; 1378 ep->com.local_addr = cm_id->local_addr; 1379 ep->com.thread = curthread; 1380 state_set(&ep->com, LISTEN); 1381 1382 ep->com.so = cm_id->so; 1383 cm_id->provider_data = ep; 1384out: 1385 return err; 1386} 1387 1388void 1389iwch_destroy_listen_ep(struct iw_cm_id *cm_id) 1390{ 1391 struct iwch_listen_ep *ep = to_listen_ep(cm_id); 1392 1393 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1394 1395 state_set(&ep->com, DEAD); 1396 cm_id->rem_ref(cm_id); 1397 put_ep(&ep->com); 1398 return; 1399} 1400 1401int 1402iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags) 1403{ 1404 int close = 0; 1405 1406 mtx_lock(&ep->com.lock); 1407 1408 PANIC_IF(!ep); 1409 PANIC_IF(!ep->com.so); 1410 1411 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep, 1412 ep->com.so, states[ep->com.state], abrupt); 1413 1414 switch (ep->com.state) { 1415 case MPA_REQ_WAIT: 1416 case MPA_REQ_SENT: 1417 case MPA_REQ_RCVD: 1418 case MPA_REP_SENT: 1419 case FPDU_MODE: 1420 close = 1; 1421 if (abrupt) 1422 ep->com.state = ABORTING; 1423 else { 1424 ep->com.state = CLOSING; 1425 start_ep_timer(ep); 1426 } 1427 break; 1428 case CLOSING: 1429 close = 1; 1430 if (abrupt) { 1431 stop_ep_timer(ep); 1432 ep->com.state = ABORTING; 1433 } else 1434 ep->com.state = MORIBUND; 1435 break; 1436 case MORIBUND: 1437 case ABORTING: 1438 case DEAD: 1439 CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n", 1440 __func__, ep, ep->com.state); 1441 break; 1442 default: 1443 panic("unknown state: %d\n", ep->com.state); 1444 break; 1445 } 1446 1447 mtx_unlock(&ep->com.lock); 1448 if (close) { 1449 if (abrupt) 1450 abort_connection(ep); 1451 else { 1452 if (!ep->parent_ep) 1453 __state_set(&ep->com, MORIBUND); 1454 shutdown_socket(&ep->com); 1455 } 1456 } 1457 return 0; 1458} 1459 1460static void 1461process_data(struct iwch_ep *ep) 1462{ 1463 struct sockaddr_in *local, *remote; 1464 1465 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1466 1467 switch (state_read(&ep->com)) { 1468 case MPA_REQ_SENT: 1469 process_mpa_reply(ep); 1470 break; 1471 case MPA_REQ_WAIT: 1472 1473 /* 1474 * XXX 1475 * Set local and remote addrs here because when we 1476 * dequeue the newly accepted socket, they aren't set 1477 * yet in the pcb! 1478 */ 1479 in_getsockaddr(ep->com.so, (struct sockaddr **)&local); 1480 in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); 1481 CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__, 1482 inet_ntoa(local->sin_addr), 1483 inet_ntoa(remote->sin_addr)); 1484 ep->com.local_addr = *local; 1485 ep->com.remote_addr = *remote; 1486 free(local, M_SONAME); 1487 free(remote, M_SONAME); 1488 process_mpa_request(ep); 1489 break; 1490 default: 1491 if (sbavail(&ep->com.so->so_rcv)) 1492 printf("%s Unexpected streaming data." 1493 " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n", 1494 __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state, 1495 sbavail(&ep->com.so->so_rcv), ep->com.so->so_rcv.sb_mb); 1496 break; 1497 } 1498 return; 1499} 1500 1501static void 1502process_connected(struct iwch_ep *ep) 1503{ 1504 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1505 if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) { 1506 send_mpa_req(ep); 1507 } else { 1508 connect_reply_upcall(ep, -ep->com.so->so_error); 1509 close_socket(&ep->com, 0); 1510 state_set(&ep->com, DEAD); 1511 put_ep(&ep->com); 1512 } 1513} 1514 1515void 1516process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so) 1517{ 1518 struct iwch_ep *child_ep; 1519 struct sockaddr_in *local; 1520 struct sockaddr_in *remote; 1521 struct iwch_ep *parent_ep = parent_cm_id->provider_data; 1522 1523 CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so); 1524 if (!child_so) { 1525 log(LOG_ERR, "%s - invalid child socket!\n", __func__); 1526 return; 1527 } 1528 child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); 1529 if (!child_ep) { 1530 log(LOG_ERR, "%s - failed to allocate ep entry!\n", 1531 __FUNCTION__); 1532 return; 1533 } 1534 SOCKBUF_LOCK(&child_so->so_rcv); 1535 soupcall_set(child_so, SO_RCV, iwch_so_upcall, child_ep); 1536 SOCKBUF_UNLOCK(&child_so->so_rcv); 1537 1538 in_getsockaddr(child_so, (struct sockaddr **)&local); 1539 in_getpeeraddr(child_so, (struct sockaddr **)&remote); 1540 1541 CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, 1542 inet_ntoa(remote->sin_addr), ntohs(remote->sin_port)); 1543 child_ep->com.tdev = parent_ep->com.tdev; 1544 child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family; 1545 child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port; 1546 child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr; 1547 child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len; 1548 child_ep->com.remote_addr.sin_family = remote->sin_family; 1549 child_ep->com.remote_addr.sin_port = remote->sin_port; 1550 child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr; 1551 child_ep->com.remote_addr.sin_len = remote->sin_len; 1552 child_ep->com.so = child_so; 1553 child_ep->com.cm_id = NULL; 1554 child_ep->com.thread = parent_ep->com.thread; 1555 child_ep->parent_ep = parent_ep; 1556 1557 free(local, M_SONAME); 1558 free(remote, M_SONAME); 1559 get_ep(&parent_ep->com); 1560 callout_init(&child_ep->timer, 1); 1561 state_set(&child_ep->com, MPA_REQ_WAIT); 1562 start_ep_timer(child_ep); 1563 1564 /* maybe the request has already been queued up on the socket... */ 1565 process_mpa_request(child_ep); 1566} 1567 1568static int 1569iwch_so_upcall(struct socket *so, void *arg, int waitflag) 1570{ 1571 struct iwch_ep *ep = arg; 1572 1573 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1574 mtx_lock(&req_lock); 1575 if (ep && ep->com.so && !ep->com.entry.tqe_prev) { 1576 get_ep(&ep->com); 1577 TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); 1578 taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task); 1579 } 1580 mtx_unlock(&req_lock); 1581 return (SU_OK); 1582} 1583 1584static void 1585process_socket_event(struct iwch_ep *ep) 1586{ 1587 int state = state_read(&ep->com); 1588 struct socket *so = ep->com.so; 1589 1590 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1591 if (state == CONNECTING) { 1592 process_connected(ep); 1593 return; 1594 } 1595 1596 if (state == LISTEN) { 1597 /* socket listening events are handled at IWCM */ 1598 CTR3(KTR_IW_CXGB, "%s Invalid ep state:%u, ep:%p", __func__, 1599 ep->com.state, ep); 1600 BUG(); 1601 return; 1602 } 1603 1604 /* connection error */ 1605 if (so->so_error) { 1606 process_conn_error(ep); 1607 return; 1608 } 1609 1610 /* peer close */ 1611 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { 1612 process_peer_close(ep); 1613 return; 1614 } 1615 1616 /* close complete */ 1617 if (so->so_state & (SS_ISDISCONNECTED)) { 1618 process_close_complete(ep); 1619 return; 1620 } 1621 1622 /* rx data */ 1623 process_data(ep); 1624 return; 1625} 1626 1627static void 1628process_req(void *ctx, int pending) 1629{ 1630 struct iwch_ep_common *epc; 1631 1632 CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__); 1633 mtx_lock(&req_lock); 1634 while (!TAILQ_EMPTY(&req_list)) { 1635 epc = TAILQ_FIRST(&req_list); 1636 TAILQ_REMOVE(&req_list, epc, entry); 1637 epc->entry.tqe_prev = NULL; 1638 mtx_unlock(&req_lock); 1639 if (epc->so) 1640 process_socket_event((struct iwch_ep *)epc); 1641 put_ep(epc); 1642 mtx_lock(&req_lock); 1643 } 1644 mtx_unlock(&req_lock); 1645} 1646 1647int 1648iwch_cm_init(void) 1649{ 1650 TAILQ_INIT(&req_list); 1651 mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF); 1652 iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT, 1653 taskqueue_thread_enqueue, &iw_cxgb_taskq); 1654 if (iw_cxgb_taskq == NULL) { 1655 printf("failed to allocate iw_cxgb taskqueue\n"); 1656 return (ENOMEM); 1657 } 1658 taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq"); 1659 TASK_INIT(&iw_cxgb_task, 0, process_req, NULL); 1660 return (0); 1661} 1662 1663void 1664iwch_cm_term(void) 1665{ 1666 1667 taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task); 1668 taskqueue_free(iw_cxgb_taskq); 1669} 1670 1671void 1672iwch_cm_init_cpl(struct adapter *sc) 1673{ 1674 1675 t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate); 1676 t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status); 1677} 1678 1679void 1680iwch_cm_term_cpl(struct adapter *sc) 1681{ 1682 1683 t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL); 1684 t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL); 1685} 1686#endif 1687