iw_cxgb_cm.c revision 283291
1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c 283291 2015-05-22 17:05:21Z jkim $"); 31 32#include "opt_inet.h" 33 34#ifdef TCP_OFFLOAD 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/bus.h> 39#include <sys/pciio.h> 40#include <sys/conf.h> 41#include <machine/bus.h> 42#include <machine/resource.h> 43#include <sys/bus_dma.h> 44#include <sys/rman.h> 45#include <sys/ioccom.h> 46#include <sys/mbuf.h> 47#include <sys/rwlock.h> 48#include <sys/linker.h> 49#include <sys/firmware.h> 50#include <sys/socket.h> 51#include <sys/socketvar.h> 52#include <sys/sockio.h> 53#include <sys/smp.h> 54#include <sys/sysctl.h> 55#include <sys/syslog.h> 56#include <sys/queue.h> 57#include <sys/taskqueue.h> 58#include <sys/proc.h> 59#include <sys/uio.h> 60 61#include <net/route.h> 62#include <netinet/in_systm.h> 63#include <netinet/in.h> 64#include <netinet/in_pcb.h> 65#include <netinet/ip.h> 66#include <netinet/ip_var.h> 67#include <netinet/tcp_var.h> 68#include <netinet/tcp.h> 69#include <netinet/tcpip.h> 70 71#include <rdma/ib_verbs.h> 72#include <linux/idr.h> 73#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h> 74 75#include <cxgb_include.h> 76#include <ulp/tom/cxgb_tom.h> 77#include <ulp/tom/cxgb_toepcb.h> 78#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h> 79#include <rdma/ib_verbs.h> 80#include <linux/idr.h> 81 82#include <ulp/iw_cxgb/iw_cxgb_wr.h> 83#include <ulp/iw_cxgb/iw_cxgb_hal.h> 84#include <ulp/iw_cxgb/iw_cxgb_provider.h> 85#include <ulp/iw_cxgb/iw_cxgb_cm.h> 86#include <ulp/iw_cxgb/iw_cxgb.h> 87 88#ifdef KTR 89static char *states[] = { 90 "idle", 91 "listen", 92 "connecting", 93 "mpa_wait_req", 94 "mpa_req_sent", 95 "mpa_req_rcvd", 96 "mpa_rep_sent", 97 "fpdu_mode", 98 "aborting", 99 "closing", 100 "moribund", 101 "dead", 102 NULL, 103}; 104#endif 105 106SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters"); 107 108static int ep_timeout_secs = 60; 109SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0, 110 "CM Endpoint operation timeout in seconds (default=60)"); 111 112static int mpa_rev = 1; 113SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0, 114 "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)"); 115 116static int markers_enabled = 0; 117SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0, 118 "Enable MPA MARKERS (default(0)=disabled)"); 119 120static int crc_enabled = 1; 121SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0, 122 "Enable MPA CRC (default(1)=enabled)"); 123 124static int rcv_win = 256 * 1024; 125SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0, 126 "TCP receive window in bytes (default=256KB)"); 127 128static int snd_win = 32 * 1024; 129SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0, 130 "TCP send window in bytes (default=32KB)"); 131 132static unsigned int nocong = 0; 133SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RWTUN, &nocong, 0, 134 "Turn off congestion control (default=0)"); 135 136static unsigned int cong_flavor = 1; 137SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RWTUN, &cong_flavor, 0, 138 "TCP Congestion control flavor (default=1)"); 139 140static void ep_timeout(void *arg); 141static void connect_reply_upcall(struct iwch_ep *ep, int status); 142static int iwch_so_upcall(struct socket *so, void *arg, int waitflag); 143 144/* 145 * Cruft to offload socket upcalls onto thread. 146 */ 147static struct mtx req_lock; 148static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list; 149static struct task iw_cxgb_task; 150static struct taskqueue *iw_cxgb_taskq; 151static void process_req(void *ctx, int pending); 152 153static void 154start_ep_timer(struct iwch_ep *ep) 155{ 156 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 157 if (callout_pending(&ep->timer)) { 158 CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep); 159 callout_deactivate(&ep->timer); 160 callout_drain(&ep->timer); 161 } else { 162 /* 163 * XXX this looks racy 164 */ 165 get_ep(&ep->com); 166 callout_init(&ep->timer, 1); 167 } 168 callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep); 169} 170 171static void 172stop_ep_timer(struct iwch_ep *ep) 173{ 174 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 175 if (!callout_pending(&ep->timer)) { 176 CTR3(KTR_IW_CXGB, "%s timer stopped when its not running! ep %p state %u\n", 177 __func__, ep, ep->com.state); 178 return; 179 } 180 callout_drain(&ep->timer); 181 put_ep(&ep->com); 182} 183 184static int 185set_tcpinfo(struct iwch_ep *ep) 186{ 187 struct socket *so = ep->com.so; 188 struct inpcb *inp = sotoinpcb(so); 189 struct tcpcb *tp; 190 struct toepcb *toep; 191 int rc = 0; 192 193 INP_WLOCK(inp); 194 tp = intotcpcb(inp); 195 196 if ((tp->t_flags & TF_TOE) == 0) { 197 rc = EINVAL; 198 printf("%s: connection NOT OFFLOADED!\n", __func__); 199 goto done; 200 } 201 toep = tp->t_toe; 202 203 ep->hwtid = toep->tp_tid; 204 ep->snd_seq = tp->snd_nxt; 205 ep->rcv_seq = tp->rcv_nxt; 206 ep->emss = tp->t_maxseg; 207 if (ep->emss < 128) 208 ep->emss = 128; 209done: 210 INP_WUNLOCK(inp); 211 return (rc); 212 213} 214 215static enum iwch_ep_state 216state_read(struct iwch_ep_common *epc) 217{ 218 enum iwch_ep_state state; 219 220 mtx_lock(&epc->lock); 221 state = epc->state; 222 mtx_unlock(&epc->lock); 223 return state; 224} 225 226static void 227__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 228{ 229 epc->state = new; 230} 231 232static void 233state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 234{ 235 236 mtx_lock(&epc->lock); 237 CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]); 238 __state_set(epc, new); 239 mtx_unlock(&epc->lock); 240 return; 241} 242 243static void * 244alloc_ep(int size, int flags) 245{ 246 struct iwch_ep_common *epc; 247 248 epc = malloc(size, M_DEVBUF, flags); 249 if (epc) { 250 memset(epc, 0, size); 251 refcount_init(&epc->refcount, 1); 252 mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK); 253 cv_init(&epc->waitq, "iwch_epc cv"); 254 } 255 CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc); 256 return epc; 257} 258 259void __free_ep(struct iwch_ep_common *epc) 260{ 261 CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]); 262 KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so)); 263 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc)); 264 free(epc, M_DEVBUF); 265} 266 267static struct rtentry * 268find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 269 __be16 peer_port, u8 tos) 270{ 271 struct route iproute; 272 struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; 273 274 bzero(&iproute, sizeof iproute); 275 dst->sin_family = AF_INET; 276 dst->sin_len = sizeof *dst; 277 dst->sin_addr.s_addr = peer_ip; 278 279 rtalloc(&iproute); 280 return iproute.ro_rt; 281} 282 283static void 284close_socket(struct iwch_ep_common *epc, int close) 285{ 286 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 287 SOCK_LOCK(epc->so); 288 soupcall_clear(epc->so, SO_RCV); 289 SOCK_UNLOCK(epc->so); 290 if (close) 291 soclose(epc->so); 292 else 293 soshutdown(epc->so, SHUT_WR|SHUT_RD); 294 epc->so = NULL; 295} 296 297static void 298shutdown_socket(struct iwch_ep_common *epc) 299{ 300 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 301 soshutdown(epc->so, SHUT_WR); 302} 303 304static void 305abort_socket(struct iwch_ep *ep) 306{ 307 struct sockopt sopt; 308 int err; 309 struct linger l; 310 311 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 312 l.l_onoff = 1; 313 l.l_linger = 0; 314 315 /* linger_time of 0 forces RST to be sent */ 316 sopt.sopt_dir = SOPT_SET; 317 sopt.sopt_level = SOL_SOCKET; 318 sopt.sopt_name = SO_LINGER; 319 sopt.sopt_val = (caddr_t)&l; 320 sopt.sopt_valsize = sizeof l; 321 sopt.sopt_td = NULL; 322 err = sosetopt(ep->com.so, &sopt); 323 if (err) 324 printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err); 325} 326 327static void 328send_mpa_req(struct iwch_ep *ep) 329{ 330 int mpalen; 331 struct mpa_message *mpa; 332 struct mbuf *m; 333 int err; 334 335 CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen); 336 337 mpalen = sizeof(*mpa) + ep->plen; 338 m = m_gethdr(mpalen, M_NOWAIT); 339 if (m == NULL) { 340 connect_reply_upcall(ep, -ENOMEM); 341 return; 342 } 343 mpa = mtod(m, struct mpa_message *); 344 m->m_len = mpalen; 345 m->m_pkthdr.len = mpalen; 346 memset(mpa, 0, sizeof(*mpa)); 347 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 348 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 349 (markers_enabled ? MPA_MARKERS : 0); 350 mpa->private_data_size = htons(ep->plen); 351 mpa->revision = mpa_rev; 352 if (ep->plen) 353 memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); 354 355 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 356 if (err) { 357 m_freem(m); 358 connect_reply_upcall(ep, -ENOMEM); 359 return; 360 } 361 362 start_ep_timer(ep); 363 state_set(&ep->com, MPA_REQ_SENT); 364 return; 365} 366 367static int 368send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) 369{ 370 int mpalen; 371 struct mpa_message *mpa; 372 struct mbuf *m; 373 int err; 374 375 CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen); 376 377 mpalen = sizeof(*mpa) + plen; 378 379 m = m_gethdr(mpalen, M_NOWAIT); 380 if (m == NULL) { 381 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 382 return (-ENOMEM); 383 } 384 mpa = mtod(m, struct mpa_message *); 385 m->m_len = mpalen; 386 m->m_pkthdr.len = mpalen; 387 memset(mpa, 0, sizeof(*mpa)); 388 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 389 mpa->flags = MPA_REJECT; 390 mpa->revision = mpa_rev; 391 mpa->private_data_size = htons(plen); 392 if (plen) 393 memcpy(mpa->private_data, pdata, plen); 394 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 395 PANIC_IF(err); 396 return 0; 397} 398 399static int 400send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) 401{ 402 int mpalen; 403 struct mpa_message *mpa; 404 struct mbuf *m; 405 406 CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen); 407 408 mpalen = sizeof(*mpa) + plen; 409 410 m = m_gethdr(mpalen, M_NOWAIT); 411 if (m == NULL) { 412 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 413 return (-ENOMEM); 414 } 415 mpa = mtod(m, struct mpa_message *); 416 m->m_len = mpalen; 417 m->m_pkthdr.len = mpalen; 418 memset(mpa, 0, sizeof(*mpa)); 419 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 420 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 421 (markers_enabled ? MPA_MARKERS : 0); 422 mpa->revision = mpa_rev; 423 mpa->private_data_size = htons(plen); 424 if (plen) 425 memcpy(mpa->private_data, pdata, plen); 426 427 state_set(&ep->com, MPA_REP_SENT); 428 return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 429 ep->com.thread); 430} 431 432static void 433close_complete_upcall(struct iwch_ep *ep) 434{ 435 struct iw_cm_event event; 436 437 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 438 memset(&event, 0, sizeof(event)); 439 event.event = IW_CM_EVENT_CLOSE; 440 if (ep->com.cm_id) { 441 CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d", 442 ep, ep->com.cm_id, ep->hwtid); 443 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 444 ep->com.cm_id->rem_ref(ep->com.cm_id); 445 ep->com.cm_id = NULL; 446 ep->com.qp = NULL; 447 } 448} 449 450static void 451abort_connection(struct iwch_ep *ep) 452{ 453 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 454 state_set(&ep->com, ABORTING); 455 abort_socket(ep); 456 close_socket(&ep->com, 0); 457 close_complete_upcall(ep); 458 state_set(&ep->com, DEAD); 459 put_ep(&ep->com); 460} 461 462static void 463peer_close_upcall(struct iwch_ep *ep) 464{ 465 struct iw_cm_event event; 466 467 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 468 memset(&event, 0, sizeof(event)); 469 event.event = IW_CM_EVENT_DISCONNECT; 470 if (ep->com.cm_id) { 471 CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d", 472 ep, ep->com.cm_id, ep->hwtid); 473 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 474 } 475} 476 477static void 478peer_abort_upcall(struct iwch_ep *ep) 479{ 480 struct iw_cm_event event; 481 482 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 483 memset(&event, 0, sizeof(event)); 484 event.event = IW_CM_EVENT_CLOSE; 485 event.status = ECONNRESET; 486 if (ep->com.cm_id) { 487 CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep, 488 ep->com.cm_id, ep->hwtid); 489 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 490 ep->com.cm_id->rem_ref(ep->com.cm_id); 491 ep->com.cm_id = NULL; 492 ep->com.qp = NULL; 493 } 494} 495 496static void 497connect_reply_upcall(struct iwch_ep *ep, int status) 498{ 499 struct iw_cm_event event; 500 501 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status); 502 memset(&event, 0, sizeof(event)); 503 event.event = IW_CM_EVENT_CONNECT_REPLY; 504 event.status = status; 505 event.local_addr = ep->com.local_addr; 506 event.remote_addr = ep->com.remote_addr; 507 508 if ((status == 0) || (status == ECONNREFUSED)) { 509 event.private_data_len = ep->plen; 510 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 511 } 512 if (ep->com.cm_id) { 513 CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep, 514 ep->hwtid, status); 515 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 516 } 517 if (status < 0) { 518 ep->com.cm_id->rem_ref(ep->com.cm_id); 519 ep->com.cm_id = NULL; 520 ep->com.qp = NULL; 521 } 522} 523 524static void 525connect_request_upcall(struct iwch_ep *ep) 526{ 527 struct iw_cm_event event; 528 529 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 530 memset(&event, 0, sizeof(event)); 531 event.event = IW_CM_EVENT_CONNECT_REQUEST; 532 event.local_addr = ep->com.local_addr; 533 event.remote_addr = ep->com.remote_addr; 534 event.private_data_len = ep->plen; 535 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 536 event.provider_data = ep; 537 event.so = ep->com.so; 538 if (state_read(&ep->parent_ep->com) != DEAD) { 539 get_ep(&ep->com); 540 ep->parent_ep->com.cm_id->event_handler( 541 ep->parent_ep->com.cm_id, 542 &event); 543 } 544 put_ep(&ep->parent_ep->com); 545} 546 547static void 548established_upcall(struct iwch_ep *ep) 549{ 550 struct iw_cm_event event; 551 552 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 553 memset(&event, 0, sizeof(event)); 554 event.event = IW_CM_EVENT_ESTABLISHED; 555 if (ep->com.cm_id) { 556 CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid); 557 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 558 } 559} 560 561static void 562process_mpa_reply(struct iwch_ep *ep) 563{ 564 struct mpa_message *mpa; 565 u16 plen; 566 struct iwch_qp_attributes attrs; 567 enum iwch_qp_attr_mask mask; 568 int err; 569 struct mbuf *top, *m; 570 int flags = MSG_DONTWAIT; 571 struct uio uio; 572 int len; 573 574 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 575 576 /* 577 * Stop mpa timer. If it expired, then the state has 578 * changed and we bail since ep_timeout already aborted 579 * the connection. 580 */ 581 stop_ep_timer(ep); 582 if (state_read(&ep->com) != MPA_REQ_SENT) 583 return; 584 585 uio.uio_resid = len = 1000000; 586 uio.uio_td = ep->com.thread; 587 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 588 if (err) { 589 if (err == EWOULDBLOCK) { 590 start_ep_timer(ep); 591 return; 592 } 593 err = -err; 594 goto err; 595 } 596 597 if (ep->com.so->so_rcv.sb_mb) { 598 printf("%s data after soreceive called! so %p sb_mb %p top %p\n", 599 __FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); 600 } 601 602 m = top; 603 do { 604 /* 605 * If we get more than the supported amount of private data 606 * then we must fail this connection. 607 */ 608 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 609 err = (-EINVAL); 610 goto err; 611 } 612 613 /* 614 * copy the new data into our accumulation buffer. 615 */ 616 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 617 ep->mpa_pkt_len += m->m_len; 618 if (!m->m_next) 619 m = m->m_nextpkt; 620 else 621 m = m->m_next; 622 } while (m); 623 624 m_freem(top); 625 626 /* 627 * if we don't even have the mpa message, then bail. 628 */ 629 if (ep->mpa_pkt_len < sizeof(*mpa)) 630 return; 631 mpa = (struct mpa_message *)ep->mpa_pkt; 632 633 /* Validate MPA header. */ 634 if (mpa->revision != mpa_rev) { 635 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 636 err = EPROTO; 637 goto err; 638 } 639 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 640 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 641 err = EPROTO; 642 goto err; 643 } 644 645 plen = ntohs(mpa->private_data_size); 646 647 /* 648 * Fail if there's too much private data. 649 */ 650 if (plen > MPA_MAX_PRIVATE_DATA) { 651 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 652 err = EPROTO; 653 goto err; 654 } 655 656 /* 657 * If plen does not account for pkt size 658 */ 659 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 660 CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len); 661 err = EPROTO; 662 goto err; 663 } 664 665 ep->plen = (u8) plen; 666 667 /* 668 * If we don't have all the pdata yet, then bail. 669 * We'll continue process when more data arrives. 670 */ 671 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 672 return; 673 674 if (mpa->flags & MPA_REJECT) { 675 err = ECONNREFUSED; 676 goto err; 677 } 678 679 /* 680 * If we get here we have accumulated the entire mpa 681 * start reply message including private data. And 682 * the MPA header is valid. 683 */ 684 CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__); 685 state_set(&ep->com, FPDU_MODE); 686 ep->mpa_attr.initiator = 1; 687 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 688 ep->mpa_attr.recv_marker_enabled = markers_enabled; 689 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 690 ep->mpa_attr.version = mpa_rev; 691 if (set_tcpinfo(ep)) { 692 printf("%s set_tcpinfo error\n", __FUNCTION__); 693 goto err; 694 } 695 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 696 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 697 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 698 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 699 700 attrs.mpa_attr = ep->mpa_attr; 701 attrs.max_ird = ep->ird; 702 attrs.max_ord = ep->ord; 703 attrs.llp_stream_handle = ep; 704 attrs.next_state = IWCH_QP_STATE_RTS; 705 706 mask = IWCH_QP_ATTR_NEXT_STATE | 707 IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | 708 IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; 709 710 /* bind QP and TID with INIT_WR */ 711 err = iwch_modify_qp(ep->com.qp->rhp, 712 ep->com.qp, mask, &attrs, 1); 713 if (!err) 714 goto out; 715err: 716 abort_connection(ep); 717out: 718 connect_reply_upcall(ep, err); 719 return; 720} 721 722static void 723process_mpa_request(struct iwch_ep *ep) 724{ 725 struct mpa_message *mpa; 726 u16 plen; 727 int flags = MSG_DONTWAIT; 728 struct mbuf *top, *m; 729 int err; 730 struct uio uio; 731 int len; 732 733 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 734 735 /* 736 * Stop mpa timer. If it expired, then the state has 737 * changed and we bail since ep_timeout already aborted 738 * the connection. 739 */ 740 stop_ep_timer(ep); 741 if (state_read(&ep->com) != MPA_REQ_WAIT) 742 return; 743 744 uio.uio_resid = len = 1000000; 745 uio.uio_td = ep->com.thread; 746 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 747 if (err) { 748 if (err == EWOULDBLOCK) { 749 start_ep_timer(ep); 750 return; 751 } 752 err = -err; 753 goto err; 754 } 755 756 m = top; 757 do { 758 759 /* 760 * If we get more than the supported amount of private data 761 * then we must fail this connection. 762 */ 763 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 764 CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__, 765 ep->mpa_pkt_len + m->m_len); 766 goto err; 767 } 768 769 770 /* 771 * Copy the new data into our accumulation buffer. 772 */ 773 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 774 ep->mpa_pkt_len += m->m_len; 775 776 if (!m->m_next) 777 m = m->m_nextpkt; 778 else 779 m = m->m_next; 780 } while (m); 781 782 m_freem(top); 783 784 /* 785 * If we don't even have the mpa message, then bail. 786 * We'll continue process when more data arrives. 787 */ 788 if (ep->mpa_pkt_len < sizeof(*mpa)) { 789 start_ep_timer(ep); 790 CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__, 791 ep->mpa_pkt_len); 792 return; 793 } 794 mpa = (struct mpa_message *) ep->mpa_pkt; 795 796 /* 797 * Validate MPA Header. 798 */ 799 if (mpa->revision != mpa_rev) { 800 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 801 goto err; 802 } 803 804 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { 805 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 806 goto err; 807 } 808 809 plen = ntohs(mpa->private_data_size); 810 811 /* 812 * Fail if there's too much private data. 813 */ 814 if (plen > MPA_MAX_PRIVATE_DATA) { 815 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 816 goto err; 817 } 818 819 /* 820 * If plen does not account for pkt size 821 */ 822 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 823 CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__, 824 ep->mpa_pkt_len); 825 goto err; 826 } 827 ep->plen = (u8) plen; 828 829 /* 830 * If we don't have all the pdata yet, then bail. 831 */ 832 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { 833 start_ep_timer(ep); 834 CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__, 835 ep->mpa_pkt_len); 836 return; 837 } 838 839 /* 840 * If we get here we have accumulated the entire mpa 841 * start reply message including private data. 842 */ 843 ep->mpa_attr.initiator = 0; 844 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 845 ep->mpa_attr.recv_marker_enabled = markers_enabled; 846 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 847 ep->mpa_attr.version = mpa_rev; 848 if (set_tcpinfo(ep)) { 849 printf("%s set_tcpinfo error\n", __FUNCTION__); 850 goto err; 851 } 852 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 853 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 854 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 855 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 856 857 state_set(&ep->com, MPA_REQ_RCVD); 858 859 /* drive upcall */ 860 connect_request_upcall(ep); 861 return; 862err: 863 abort_connection(ep); 864 return; 865} 866 867static void 868process_peer_close(struct iwch_ep *ep) 869{ 870 struct iwch_qp_attributes attrs; 871 int disconnect = 1; 872 int release = 0; 873 874 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 875 876 mtx_lock(&ep->com.lock); 877 switch (ep->com.state) { 878 case MPA_REQ_WAIT: 879 __state_set(&ep->com, CLOSING); 880 break; 881 case MPA_REQ_SENT: 882 __state_set(&ep->com, CLOSING); 883 connect_reply_upcall(ep, -ECONNRESET); 884 break; 885 case MPA_REQ_RCVD: 886 887 /* 888 * We're gonna mark this puppy DEAD, but keep 889 * the reference on it until the ULP accepts or 890 * rejects the CR. 891 */ 892 __state_set(&ep->com, CLOSING); 893 break; 894 case MPA_REP_SENT: 895 __state_set(&ep->com, CLOSING); 896 break; 897 case FPDU_MODE: 898 start_ep_timer(ep); 899 __state_set(&ep->com, CLOSING); 900 attrs.next_state = IWCH_QP_STATE_CLOSING; 901 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 902 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 903 peer_close_upcall(ep); 904 break; 905 case ABORTING: 906 disconnect = 0; 907 break; 908 case CLOSING: 909 __state_set(&ep->com, MORIBUND); 910 disconnect = 0; 911 break; 912 case MORIBUND: 913 stop_ep_timer(ep); 914 if (ep->com.cm_id && ep->com.qp) { 915 attrs.next_state = IWCH_QP_STATE_IDLE; 916 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 917 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 918 } 919 close_socket(&ep->com, 0); 920 close_complete_upcall(ep); 921 __state_set(&ep->com, DEAD); 922 release = 1; 923 disconnect = 0; 924 break; 925 case DEAD: 926 disconnect = 0; 927 break; 928 default: 929 PANIC_IF(1); 930 } 931 mtx_unlock(&ep->com.lock); 932 if (disconnect) 933 iwch_ep_disconnect(ep, 0, M_NOWAIT); 934 if (release) 935 put_ep(&ep->com); 936 return; 937} 938 939static void 940process_conn_error(struct iwch_ep *ep) 941{ 942 struct iwch_qp_attributes attrs; 943 int ret; 944 945 mtx_lock(&ep->com.lock); 946 CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state); 947 switch (ep->com.state) { 948 case MPA_REQ_WAIT: 949 stop_ep_timer(ep); 950 break; 951 case MPA_REQ_SENT: 952 stop_ep_timer(ep); 953 connect_reply_upcall(ep, -ECONNRESET); 954 break; 955 case MPA_REP_SENT: 956 ep->com.rpl_err = ECONNRESET; 957 CTR1(KTR_IW_CXGB, "waking up ep %p", ep); 958 break; 959 case MPA_REQ_RCVD: 960 961 /* 962 * We're gonna mark this puppy DEAD, but keep 963 * the reference on it until the ULP accepts or 964 * rejects the CR. 965 */ 966 break; 967 case MORIBUND: 968 case CLOSING: 969 stop_ep_timer(ep); 970 /*FALLTHROUGH*/ 971 case FPDU_MODE: 972 if (ep->com.cm_id && ep->com.qp) { 973 attrs.next_state = IWCH_QP_STATE_ERROR; 974 ret = iwch_modify_qp(ep->com.qp->rhp, 975 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 976 &attrs, 1); 977 if (ret) 978 log(LOG_ERR, 979 "%s - qp <- error failed!\n", 980 __FUNCTION__); 981 } 982 peer_abort_upcall(ep); 983 break; 984 case ABORTING: 985 break; 986 case DEAD: 987 mtx_unlock(&ep->com.lock); 988 CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__, 989 ep->com.so->so_error); 990 return; 991 default: 992 PANIC_IF(1); 993 break; 994 } 995 996 if (ep->com.state != ABORTING) { 997 close_socket(&ep->com, 0); 998 __state_set(&ep->com, DEAD); 999 put_ep(&ep->com); 1000 } 1001 mtx_unlock(&ep->com.lock); 1002 return; 1003} 1004 1005static void 1006process_close_complete(struct iwch_ep *ep) 1007{ 1008 struct iwch_qp_attributes attrs; 1009 int release = 0; 1010 1011 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1012 PANIC_IF(!ep); 1013 1014 /* The cm_id may be null if we failed to connect */ 1015 mtx_lock(&ep->com.lock); 1016 switch (ep->com.state) { 1017 case CLOSING: 1018 __state_set(&ep->com, MORIBUND); 1019 break; 1020 case MORIBUND: 1021 stop_ep_timer(ep); 1022 if ((ep->com.cm_id) && (ep->com.qp)) { 1023 attrs.next_state = IWCH_QP_STATE_IDLE; 1024 iwch_modify_qp(ep->com.qp->rhp, 1025 ep->com.qp, 1026 IWCH_QP_ATTR_NEXT_STATE, 1027 &attrs, 1); 1028 } 1029 if (ep->parent_ep) 1030 close_socket(&ep->com, 1); 1031 else 1032 close_socket(&ep->com, 0); 1033 close_complete_upcall(ep); 1034 __state_set(&ep->com, DEAD); 1035 release = 1; 1036 break; 1037 case ABORTING: 1038 break; 1039 case DEAD: 1040 default: 1041 PANIC_IF(1); 1042 break; 1043 } 1044 mtx_unlock(&ep->com.lock); 1045 if (release) 1046 put_ep(&ep->com); 1047 return; 1048} 1049 1050/* 1051 * T3A does 3 things when a TERM is received: 1052 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet 1053 * 2) generate an async event on the QP with the TERMINATE opcode 1054 * 3) post a TERMINATE opcde cqe into the associated CQ. 1055 * 1056 * For (1), we save the message in the qp for later consumer consumption. 1057 * For (2), we move the QP into TERMINATE, post a QP event and disconnect. 1058 * For (3), we toss the CQE in cxio_poll_cq(). 1059 * 1060 * terminate() handles case (1)... 1061 */ 1062static int 1063terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1064{ 1065 struct adapter *sc = qs->adap; 1066 struct tom_data *td = sc->tom_softc; 1067 uint32_t hash = *((uint32_t *)r + 1); 1068 unsigned int tid = ntohl(hash) >> 8 & 0xfffff; 1069 struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1070 struct socket *so = toep->tp_inp->inp_socket; 1071 struct iwch_ep *ep = so->so_rcv.sb_upcallarg; 1072 1073 if (state_read(&ep->com) != FPDU_MODE) 1074 goto done; 1075 1076 m_adj(m, sizeof(struct cpl_rdma_terminate)); 1077 1078 CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes", 1079 __func__, tid, ep, m->m_len); 1080 1081 m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer); 1082 ep->com.qp->attr.terminate_msg_len = m->m_len; 1083 ep->com.qp->attr.is_terminate_local = 0; 1084 1085done: 1086 m_freem(m); 1087 return (0); 1088} 1089 1090static int 1091ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1092{ 1093 struct adapter *sc = qs->adap; 1094 struct tom_data *td = sc->tom_softc; 1095 struct cpl_rdma_ec_status *rep = mtod(m, void *); 1096 unsigned int tid = GET_TID(rep); 1097 struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1098 struct socket *so = toep->tp_inp->inp_socket; 1099 struct iwch_ep *ep = so->so_rcv.sb_upcallarg; 1100 1101 if (rep->status) { 1102 struct iwch_qp_attributes attrs; 1103 1104 CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__); 1105 stop_ep_timer(ep); 1106 attrs.next_state = IWCH_QP_STATE_ERROR; 1107 iwch_modify_qp(ep->com.qp->rhp, 1108 ep->com.qp, 1109 IWCH_QP_ATTR_NEXT_STATE, 1110 &attrs, 1); 1111 abort_connection(ep); 1112 } 1113 1114 m_freem(m); 1115 return (0); 1116} 1117 1118static void 1119ep_timeout(void *arg) 1120{ 1121 struct iwch_ep *ep = (struct iwch_ep *)arg; 1122 struct iwch_qp_attributes attrs; 1123 int err = 0; 1124 int abort = 1; 1125 1126 mtx_lock(&ep->com.lock); 1127 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1128 switch (ep->com.state) { 1129 case MPA_REQ_SENT: 1130 __state_set(&ep->com, ABORTING); 1131 connect_reply_upcall(ep, -ETIMEDOUT); 1132 break; 1133 case MPA_REQ_WAIT: 1134 __state_set(&ep->com, ABORTING); 1135 break; 1136 case CLOSING: 1137 case MORIBUND: 1138 if (ep->com.cm_id && ep->com.qp) 1139 err = 1; 1140 __state_set(&ep->com, ABORTING); 1141 break; 1142 default: 1143 CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n", 1144 __func__, ep, ep->com.state); 1145 abort = 0; 1146 } 1147 mtx_unlock(&ep->com.lock); 1148 if (err){ 1149 attrs.next_state = IWCH_QP_STATE_ERROR; 1150 iwch_modify_qp(ep->com.qp->rhp, 1151 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1152 &attrs, 1); 1153 } 1154 if (abort) 1155 abort_connection(ep); 1156 put_ep(&ep->com); 1157} 1158 1159int 1160iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 1161{ 1162 int err; 1163 struct iwch_ep *ep = to_ep(cm_id); 1164 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1165 1166 if (state_read(&ep->com) == DEAD) { 1167 put_ep(&ep->com); 1168 return (-ECONNRESET); 1169 } 1170 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1171 if (mpa_rev == 0) { 1172 abort_connection(ep); 1173 } else { 1174 err = send_mpa_reject(ep, pdata, pdata_len); 1175 err = soshutdown(ep->com.so, 3); 1176 } 1177 put_ep(&ep->com); 1178 return 0; 1179} 1180 1181int 1182iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1183{ 1184 int err; 1185 struct iwch_qp_attributes attrs; 1186 enum iwch_qp_attr_mask mask; 1187 struct iwch_ep *ep = to_ep(cm_id); 1188 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1189 struct iwch_qp *qp = get_qhp(h, conn_param->qpn); 1190 1191 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1192 if (state_read(&ep->com) == DEAD) { 1193 err = -ECONNRESET; 1194 goto err; 1195 } 1196 1197 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1198 PANIC_IF(!qp); 1199 1200 if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || 1201 (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { 1202 abort_connection(ep); 1203 err = -EINVAL; 1204 goto err; 1205 } 1206 1207 cm_id->add_ref(cm_id); 1208 ep->com.cm_id = cm_id; 1209 ep->com.qp = qp; 1210 1211 ep->com.rpl_err = 0; 1212 ep->com.rpl_done = 0; 1213 ep->ird = conn_param->ird; 1214 ep->ord = conn_param->ord; 1215 CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord); 1216 1217 /* bind QP to EP and move to RTS */ 1218 attrs.mpa_attr = ep->mpa_attr; 1219 attrs.max_ird = ep->ird; 1220 attrs.max_ord = ep->ord; 1221 attrs.llp_stream_handle = ep; 1222 attrs.next_state = IWCH_QP_STATE_RTS; 1223 1224 /* bind QP and TID with INIT_WR */ 1225 mask = IWCH_QP_ATTR_NEXT_STATE | 1226 IWCH_QP_ATTR_LLP_STREAM_HANDLE | 1227 IWCH_QP_ATTR_MPA_ATTR | 1228 IWCH_QP_ATTR_MAX_IRD | 1229 IWCH_QP_ATTR_MAX_ORD; 1230 1231 err = iwch_modify_qp(ep->com.qp->rhp, 1232 ep->com.qp, mask, &attrs, 1); 1233 1234 if (err) 1235 goto err1; 1236 1237 err = send_mpa_reply(ep, conn_param->private_data, 1238 conn_param->private_data_len); 1239 if (err) 1240 goto err1; 1241 state_set(&ep->com, FPDU_MODE); 1242 established_upcall(ep); 1243 put_ep(&ep->com); 1244 return 0; 1245err1: 1246 ep->com.cm_id = NULL; 1247 ep->com.qp = NULL; 1248 cm_id->rem_ref(cm_id); 1249err: 1250 put_ep(&ep->com); 1251 return err; 1252} 1253 1254static int init_sock(struct iwch_ep_common *epc) 1255{ 1256 int err; 1257 struct sockopt sopt; 1258 int on=1; 1259 1260 SOCK_LOCK(epc->so); 1261 soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc); 1262 epc->so->so_state |= SS_NBIO; 1263 SOCK_UNLOCK(epc->so); 1264 sopt.sopt_dir = SOPT_SET; 1265 sopt.sopt_level = IPPROTO_TCP; 1266 sopt.sopt_name = TCP_NODELAY; 1267 sopt.sopt_val = (caddr_t)&on; 1268 sopt.sopt_valsize = sizeof on; 1269 sopt.sopt_td = NULL; 1270 err = sosetopt(epc->so, &sopt); 1271 if (err) 1272 printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err); 1273 1274 return 0; 1275} 1276 1277static int 1278is_loopback_dst(struct iw_cm_id *cm_id) 1279{ 1280 uint16_t port = cm_id->remote_addr.sin_port; 1281 int ifa_present; 1282 1283 cm_id->remote_addr.sin_port = 0; 1284 ifa_present = ifa_ifwithaddr_check( 1285 (struct sockaddr *)&cm_id->remote_addr); 1286 cm_id->remote_addr.sin_port = port; 1287 return (ifa_present); 1288} 1289 1290int 1291iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1292{ 1293 int err = 0; 1294 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1295 struct iwch_ep *ep; 1296 struct rtentry *rt; 1297 struct toedev *tdev; 1298 1299 if (is_loopback_dst(cm_id)) { 1300 err = -ENOSYS; 1301 goto out; 1302 } 1303 1304 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1305 if (!ep) { 1306 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1307 err = (-ENOMEM); 1308 goto out; 1309 } 1310 callout_init(&ep->timer, 1); 1311 ep->plen = conn_param->private_data_len; 1312 if (ep->plen) 1313 memcpy(ep->mpa_pkt + sizeof(struct mpa_message), 1314 conn_param->private_data, ep->plen); 1315 ep->ird = conn_param->ird; 1316 ep->ord = conn_param->ord; 1317 1318 cm_id->add_ref(cm_id); 1319 ep->com.cm_id = cm_id; 1320 ep->com.qp = get_qhp(h, conn_param->qpn); 1321 ep->com.thread = curthread; 1322 PANIC_IF(!ep->com.qp); 1323 CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn, 1324 ep->com.qp, cm_id); 1325 1326 ep->com.so = cm_id->so; 1327 err = init_sock(&ep->com); 1328 if (err) 1329 goto fail2; 1330 1331 /* find a route */ 1332 rt = find_route(cm_id->local_addr.sin_addr.s_addr, 1333 cm_id->remote_addr.sin_addr.s_addr, 1334 cm_id->local_addr.sin_port, 1335 cm_id->remote_addr.sin_port, IPTOS_LOWDELAY); 1336 if (!rt) { 1337 printf("%s - cannot find route.\n", __FUNCTION__); 1338 err = EHOSTUNREACH; 1339 goto fail2; 1340 } 1341 1342 if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) { 1343 printf("%s - interface not TOE capable.\n", __FUNCTION__); 1344 RTFREE(rt); 1345 goto fail2; 1346 } 1347 tdev = TOEDEV(rt->rt_ifp); 1348 if (tdev == NULL) { 1349 printf("%s - No toedev for interface.\n", __FUNCTION__); 1350 RTFREE(rt); 1351 goto fail2; 1352 } 1353 RTFREE(rt); 1354 1355 state_set(&ep->com, CONNECTING); 1356 ep->com.local_addr = cm_id->local_addr; 1357 ep->com.remote_addr = cm_id->remote_addr; 1358 err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, 1359 ep->com.thread); 1360 if (!err) 1361 goto out; 1362fail2: 1363 put_ep(&ep->com); 1364out: 1365 return err; 1366} 1367 1368int 1369iwch_create_listen(struct iw_cm_id *cm_id, int backlog) 1370{ 1371 int err = 0; 1372 struct iwch_listen_ep *ep; 1373 1374 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1375 if (!ep) { 1376 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1377 err = ENOMEM; 1378 goto out; 1379 } 1380 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1381 cm_id->add_ref(cm_id); 1382 ep->com.cm_id = cm_id; 1383 ep->backlog = backlog; 1384 ep->com.local_addr = cm_id->local_addr; 1385 ep->com.thread = curthread; 1386 state_set(&ep->com, LISTEN); 1387 1388 ep->com.so = cm_id->so; 1389 err = init_sock(&ep->com); 1390 if (err) 1391 goto fail; 1392 1393 err = solisten(ep->com.so, ep->backlog, ep->com.thread); 1394 if (!err) { 1395 cm_id->provider_data = ep; 1396 goto out; 1397 } 1398 close_socket(&ep->com, 0); 1399fail: 1400 cm_id->rem_ref(cm_id); 1401 put_ep(&ep->com); 1402out: 1403 return err; 1404} 1405 1406int 1407iwch_destroy_listen(struct iw_cm_id *cm_id) 1408{ 1409 struct iwch_listen_ep *ep = to_listen_ep(cm_id); 1410 1411 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1412 1413 state_set(&ep->com, DEAD); 1414 close_socket(&ep->com, 0); 1415 cm_id->rem_ref(cm_id); 1416 put_ep(&ep->com); 1417 return 0; 1418} 1419 1420int 1421iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags) 1422{ 1423 int close = 0; 1424 1425 mtx_lock(&ep->com.lock); 1426 1427 PANIC_IF(!ep); 1428 PANIC_IF(!ep->com.so); 1429 1430 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep, 1431 ep->com.so, states[ep->com.state], abrupt); 1432 1433 switch (ep->com.state) { 1434 case MPA_REQ_WAIT: 1435 case MPA_REQ_SENT: 1436 case MPA_REQ_RCVD: 1437 case MPA_REP_SENT: 1438 case FPDU_MODE: 1439 close = 1; 1440 if (abrupt) 1441 ep->com.state = ABORTING; 1442 else { 1443 ep->com.state = CLOSING; 1444 start_ep_timer(ep); 1445 } 1446 break; 1447 case CLOSING: 1448 close = 1; 1449 if (abrupt) { 1450 stop_ep_timer(ep); 1451 ep->com.state = ABORTING; 1452 } else 1453 ep->com.state = MORIBUND; 1454 break; 1455 case MORIBUND: 1456 case ABORTING: 1457 case DEAD: 1458 CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n", 1459 __func__, ep, ep->com.state); 1460 break; 1461 default: 1462 panic("unknown state: %d\n", ep->com.state); 1463 break; 1464 } 1465 1466 mtx_unlock(&ep->com.lock); 1467 if (close) { 1468 if (abrupt) 1469 abort_connection(ep); 1470 else { 1471 if (!ep->parent_ep) 1472 __state_set(&ep->com, MORIBUND); 1473 shutdown_socket(&ep->com); 1474 } 1475 } 1476 return 0; 1477} 1478 1479static void 1480process_data(struct iwch_ep *ep) 1481{ 1482 struct sockaddr_in *local, *remote; 1483 1484 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1485 1486 switch (state_read(&ep->com)) { 1487 case MPA_REQ_SENT: 1488 process_mpa_reply(ep); 1489 break; 1490 case MPA_REQ_WAIT: 1491 1492 /* 1493 * XXX 1494 * Set local and remote addrs here because when we 1495 * dequeue the newly accepted socket, they aren't set 1496 * yet in the pcb! 1497 */ 1498 in_getsockaddr(ep->com.so, (struct sockaddr **)&local); 1499 in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); 1500 CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__, 1501 inet_ntoa(local->sin_addr), 1502 inet_ntoa(remote->sin_addr)); 1503 ep->com.local_addr = *local; 1504 ep->com.remote_addr = *remote; 1505 free(local, M_SONAME); 1506 free(remote, M_SONAME); 1507 process_mpa_request(ep); 1508 break; 1509 default: 1510 if (sbavail(&ep->com.so->so_rcv)) 1511 printf("%s Unexpected streaming data." 1512 " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n", 1513 __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state, 1514 sbavail(&ep->com.so->so_rcv), ep->com.so->so_rcv.sb_mb); 1515 break; 1516 } 1517 return; 1518} 1519 1520static void 1521process_connected(struct iwch_ep *ep) 1522{ 1523 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1524 if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) { 1525 send_mpa_req(ep); 1526 } else { 1527 connect_reply_upcall(ep, -ep->com.so->so_error); 1528 close_socket(&ep->com, 0); 1529 state_set(&ep->com, DEAD); 1530 put_ep(&ep->com); 1531 } 1532} 1533 1534static struct socket * 1535dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep) 1536{ 1537 struct socket *so; 1538 1539 ACCEPT_LOCK(); 1540 so = TAILQ_FIRST(&head->so_comp); 1541 if (!so) { 1542 ACCEPT_UNLOCK(); 1543 return NULL; 1544 } 1545 TAILQ_REMOVE(&head->so_comp, so, so_list); 1546 head->so_qlen--; 1547 SOCK_LOCK(so); 1548 so->so_qstate &= ~SQ_COMP; 1549 so->so_head = NULL; 1550 soref(so); 1551 soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep); 1552 so->so_state |= SS_NBIO; 1553 PANIC_IF(!(so->so_state & SS_ISCONNECTED)); 1554 PANIC_IF(so->so_error); 1555 SOCK_UNLOCK(so); 1556 ACCEPT_UNLOCK(); 1557 soaccept(so, (struct sockaddr **)remote); 1558 return so; 1559} 1560 1561static void 1562process_newconn(struct iwch_ep *parent_ep) 1563{ 1564 struct socket *child_so; 1565 struct iwch_ep *child_ep; 1566 struct sockaddr_in *remote; 1567 1568 CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so); 1569 child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); 1570 if (!child_ep) { 1571 log(LOG_ERR, "%s - failed to allocate ep entry!\n", 1572 __FUNCTION__); 1573 return; 1574 } 1575 child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); 1576 if (!child_so) { 1577 log(LOG_ERR, "%s - failed to dequeue child socket!\n", 1578 __FUNCTION__); 1579 __free_ep(&child_ep->com); 1580 return; 1581 } 1582 CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, 1583 inet_ntoa(remote->sin_addr), ntohs(remote->sin_port)); 1584 child_ep->com.tdev = parent_ep->com.tdev; 1585 child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family; 1586 child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port; 1587 child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr; 1588 child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len; 1589 child_ep->com.remote_addr.sin_family = remote->sin_family; 1590 child_ep->com.remote_addr.sin_port = remote->sin_port; 1591 child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr; 1592 child_ep->com.remote_addr.sin_len = remote->sin_len; 1593 child_ep->com.so = child_so; 1594 child_ep->com.cm_id = NULL; 1595 child_ep->com.thread = parent_ep->com.thread; 1596 child_ep->parent_ep = parent_ep; 1597 1598 free(remote, M_SONAME); 1599 get_ep(&parent_ep->com); 1600 child_ep->parent_ep = parent_ep; 1601 callout_init(&child_ep->timer, 1); 1602 state_set(&child_ep->com, MPA_REQ_WAIT); 1603 start_ep_timer(child_ep); 1604 1605 /* maybe the request has already been queued up on the socket... */ 1606 process_mpa_request(child_ep); 1607} 1608 1609static int 1610iwch_so_upcall(struct socket *so, void *arg, int waitflag) 1611{ 1612 struct iwch_ep *ep = arg; 1613 1614 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1615 mtx_lock(&req_lock); 1616 if (ep && ep->com.so && !ep->com.entry.tqe_prev) { 1617 get_ep(&ep->com); 1618 TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); 1619 taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task); 1620 } 1621 mtx_unlock(&req_lock); 1622 return (SU_OK); 1623} 1624 1625static void 1626process_socket_event(struct iwch_ep *ep) 1627{ 1628 int state = state_read(&ep->com); 1629 struct socket *so = ep->com.so; 1630 1631 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1632 if (state == CONNECTING) { 1633 process_connected(ep); 1634 return; 1635 } 1636 1637 if (state == LISTEN) { 1638 process_newconn(ep); 1639 return; 1640 } 1641 1642 /* connection error */ 1643 if (so->so_error) { 1644 process_conn_error(ep); 1645 return; 1646 } 1647 1648 /* peer close */ 1649 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { 1650 process_peer_close(ep); 1651 return; 1652 } 1653 1654 /* close complete */ 1655 if (so->so_state & (SS_ISDISCONNECTED)) { 1656 process_close_complete(ep); 1657 return; 1658 } 1659 1660 /* rx data */ 1661 process_data(ep); 1662 return; 1663} 1664 1665static void 1666process_req(void *ctx, int pending) 1667{ 1668 struct iwch_ep_common *epc; 1669 1670 CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__); 1671 mtx_lock(&req_lock); 1672 while (!TAILQ_EMPTY(&req_list)) { 1673 epc = TAILQ_FIRST(&req_list); 1674 TAILQ_REMOVE(&req_list, epc, entry); 1675 epc->entry.tqe_prev = NULL; 1676 mtx_unlock(&req_lock); 1677 if (epc->so) 1678 process_socket_event((struct iwch_ep *)epc); 1679 put_ep(epc); 1680 mtx_lock(&req_lock); 1681 } 1682 mtx_unlock(&req_lock); 1683} 1684 1685int 1686iwch_cm_init(void) 1687{ 1688 TAILQ_INIT(&req_list); 1689 mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF); 1690 iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT, 1691 taskqueue_thread_enqueue, &iw_cxgb_taskq); 1692 if (iw_cxgb_taskq == NULL) { 1693 printf("failed to allocate iw_cxgb taskqueue\n"); 1694 return (ENOMEM); 1695 } 1696 taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq"); 1697 TASK_INIT(&iw_cxgb_task, 0, process_req, NULL); 1698 return (0); 1699} 1700 1701void 1702iwch_cm_term(void) 1703{ 1704 1705 taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task); 1706 taskqueue_free(iw_cxgb_taskq); 1707} 1708 1709void 1710iwch_cm_init_cpl(struct adapter *sc) 1711{ 1712 1713 t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate); 1714 t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status); 1715} 1716 1717void 1718iwch_cm_term_cpl(struct adapter *sc) 1719{ 1720 1721 t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL); 1722 t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL); 1723} 1724#endif 1725