iw_cxgb_cm.c revision 178786
1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c 178786 2008-05-05 18:46:18Z kmacy $"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/kernel.h> 35#include <sys/bus.h> 36#include <sys/module.h> 37#include <sys/pciio.h> 38#include <sys/conf.h> 39#include <machine/bus.h> 40#include <machine/resource.h> 41#include <sys/bus_dma.h> 42#include <sys/rman.h> 43#include <sys/ioccom.h> 44#include <sys/mbuf.h> 45#include <sys/rwlock.h> 46#include <sys/linker.h> 47#include <sys/firmware.h> 48#include <sys/socket.h> 49#include <sys/socketvar.h> 50#include <sys/sockio.h> 51#include <sys/smp.h> 52#include <sys/sysctl.h> 53#include <sys/syslog.h> 54#include <sys/queue.h> 55#include <sys/taskqueue.h> 56#include <sys/proc.h> 57#include <sys/uio.h> 58 59#include <net/route.h> 60#include <netinet/in_systm.h> 61#include <netinet/in.h> 62#include <netinet/in_pcb.h> 63#include <netinet/ip.h> 64#include <netinet/ip_var.h> 65#include <netinet/tcp_var.h> 66#include <netinet/tcp.h> 67#include <netinet/tcpip.h> 68 69#include <contrib/rdma/ib_verbs.h> 70 71 72#ifdef CONFIG_DEFINED 73#include <cxgb_include.h> 74#include <ulp/tom/cxgb_tom.h> 75#include <ulp/tom/cxgb_t3_ddp.h> 76#include <ulp/tom/cxgb_defs.h> 77#include <ulp/tom/cxgb_toepcb.h> 78#include <ulp/iw_cxgb/iw_cxgb_wr.h> 79#include <ulp/iw_cxgb/iw_cxgb_hal.h> 80#include <ulp/iw_cxgb/iw_cxgb_provider.h> 81#include <ulp/iw_cxgb/iw_cxgb_cm.h> 82#include <ulp/iw_cxgb/iw_cxgb.h> 83#else 84#include <dev/cxgb/cxgb_include.h> 85#include <dev/cxgb/ulp/tom/cxgb_tom.h> 86#include <dev/ulp/tom/cxgb_t3_ddp.h> 87#include <dev/cxgb/ulp/tom/cxgb_defs.h> 88#include <dev/cxgb/ulp/tom/cxgb_toepcb.h> 89#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> 90#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> 91#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> 92#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> 93#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> 94#endif 95 96#ifdef KTR 97static char *states[] = { 98 "idle", 99 "listen", 100 "connecting", 101 "mpa_wait_req", 102 "mpa_req_sent", 103 "mpa_req_rcvd", 104 "mpa_rep_sent", 105 "fpdu_mode", 106 "aborting", 107 "closing", 108 "moribund", 109 "dead", 110 NULL, 111}; 112#endif 113 114SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters"); 115 116static int ep_timeout_secs = 10; 117TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs); 118SYSCTL_UINT(_hw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RDTUN, &ep_timeout_secs, 0, 119 "CM Endpoint operation timeout in seconds (default=10)"); 120 121static int mpa_rev = 1; 122TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev); 123SYSCTL_UINT(_hw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RDTUN, &mpa_rev, 0, 124 "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)"); 125 126static int markers_enabled = 0; 127TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled); 128SYSCTL_UINT(_hw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RDTUN, &markers_enabled, 0, 129 "Enable MPA MARKERS (default(0)=disabled)"); 130 131static int crc_enabled = 1; 132TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled); 133SYSCTL_UINT(_hw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RDTUN, &crc_enabled, 0, 134 "Enable MPA CRC (default(1)=enabled)"); 135 136static int rcv_win = 256 * 1024; 137TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win); 138SYSCTL_UINT(_hw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RDTUN, &rcv_win, 0, 139 "TCP receive window in bytes (default=256KB)"); 140 141static int snd_win = 32 * 1024; 142TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win); 143SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_win, CTLFLAG_RDTUN, &snd_win, 0, 144 "TCP send window in bytes (default=32KB)"); 145 146static unsigned int nocong = 0; 147TUNABLE_INT("hw.iw_cxgb.nocong", &nocong); 148SYSCTL_UINT(_hw_cxgb, OID_AUTO, nocong, CTLFLAG_RDTUN, &nocong, 0, 149 "Turn off congestion control (default=0)"); 150 151static unsigned int cong_flavor = 1; 152TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor); 153SYSCTL_UINT(_hw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RDTUN, &cong_flavor, 0, 154 "TCP Congestion control flavor (default=1)"); 155 156static void ep_timeout(void *arg); 157static void connect_reply_upcall(struct iwch_ep *ep, int status); 158static void iwch_so_upcall(struct socket *so, void *arg, int waitflag); 159 160/* 161 * Cruft to offload socket upcalls onto thread. 162 */ 163static struct mtx req_lock; 164static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list; 165static struct task iw_cxgb_task; 166static struct taskqueue *iw_cxgb_taskq; 167static void process_req(void *ctx, int pending); 168 169static void 170start_ep_timer(struct iwch_ep *ep) 171{ 172 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 173 if (callout_pending(&ep->timer)) { 174 CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep); 175 callout_deactivate(&ep->timer); 176 callout_drain(&ep->timer); 177 } else { 178 /* 179 * XXX this looks racy 180 */ 181 get_ep(&ep->com); 182 callout_init(&ep->timer, TRUE); 183 } 184 callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep); 185} 186 187static void 188stop_ep_timer(struct iwch_ep *ep) 189{ 190 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 191 callout_drain(&ep->timer); 192 put_ep(&ep->com); 193} 194 195static int set_tcpinfo(struct iwch_ep *ep) 196{ 197 struct tcp_info ti; 198 struct sockopt sopt; 199 int err; 200 201 sopt.sopt_dir = SOPT_GET; 202 sopt.sopt_level = IPPROTO_TCP; 203 sopt.sopt_name = TCP_INFO; 204 sopt.sopt_val = (caddr_t)&ti; 205 sopt.sopt_valsize = sizeof ti; 206 sopt.sopt_td = NULL; 207 208 err = sogetopt(ep->com.so, &sopt); 209 if (err) { 210 printf("%s can't get tcpinfo\n", __FUNCTION__); 211 return -err; 212 } 213 if (!(ti.tcpi_options & TCPI_OPT_TOE)) { 214 printf("%s connection NOT OFFLOADED!\n", __FUNCTION__); 215 return -EINVAL; 216 } 217 218 ep->snd_seq = ti.tcpi_snd_nxt; 219 ep->rcv_seq = ti.tcpi_rcv_nxt; 220 ep->emss = ti.__tcpi_snd_mss - sizeof(struct tcpiphdr); 221 ep->hwtid = TOEPCB(ep->com.so)->tp_tid; /* XXX */ 222 if (ti.tcpi_options & TCPI_OPT_TIMESTAMPS) 223 ep->emss -= 12; 224 if (ep->emss < 128) 225 ep->emss = 128; 226 return 0; 227} 228 229static enum iwch_ep_state 230state_read(struct iwch_ep_common *epc) 231{ 232 enum iwch_ep_state state; 233 234 mtx_lock(&epc->lock); 235 state = epc->state; 236 mtx_unlock(&epc->lock); 237 return state; 238} 239 240static void 241__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 242{ 243 epc->state = new; 244} 245 246static void 247state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 248{ 249 250 mtx_lock(&epc->lock); 251 CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]); 252 __state_set(epc, new); 253 mtx_unlock(&epc->lock); 254 return; 255} 256 257static void * 258alloc_ep(int size, int flags) 259{ 260 struct iwch_ep_common *epc; 261 262 epc = malloc(size, M_DEVBUF, flags); 263 if (epc) { 264 memset(epc, 0, size); 265 refcount_init(&epc->refcount, 1); 266 mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK); 267 cv_init(&epc->waitq, "iwch_epc cv"); 268 } 269 CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc); 270 return epc; 271} 272 273void __free_ep(struct iwch_ep_common *epc) 274{ 275 CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]); 276 KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so)); 277 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc)); 278 free(epc, M_DEVBUF); 279} 280 281int 282iwch_quiesce_tid(struct iwch_ep *ep) 283{ 284#ifdef notyet 285 struct cpl_set_tcb_field *req; 286 struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT); 287 288 if (m == NULL) 289 return (-ENOMEM); 290 req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req)); 291 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 292 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); 293 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); 294 req->reply = 0; 295 req->cpu_idx = 0; 296 req->word = htons(W_TCB_RX_QUIESCE); 297 req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); 298 req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); 299 300 m_set_priority(m, CPL_PRIORITY_DATA); 301 cxgb_ofld_send(ep->com.tdev, m); 302#endif 303 return 0; 304} 305 306int 307iwch_resume_tid(struct iwch_ep *ep) 308{ 309#ifdef notyet 310 struct cpl_set_tcb_field *req; 311 struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT); 312 313 if (m == NULL) 314 return (-ENOMEM); 315 req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req)); 316 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 317 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); 318 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); 319 req->reply = 0; 320 req->cpu_idx = 0; 321 req->word = htons(W_TCB_RX_QUIESCE); 322 req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); 323 req->val = 0; 324 325 m_set_priority(m, CPL_PRIORITY_DATA); 326 cxgb_ofld_send(ep->com.tdev, m); 327#endif 328 return 0; 329} 330 331static struct rtentry * 332find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 333 __be16 peer_port, u8 tos) 334{ 335 struct route iproute; 336 struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; 337 338 bzero(&iproute, sizeof iproute); 339 dst->sin_family = AF_INET; 340 dst->sin_len = sizeof *dst; 341 dst->sin_addr.s_addr = peer_ip; 342 343 rtalloc(&iproute); 344 return iproute.ro_rt; 345} 346 347static void 348close_socket(struct iwch_ep_common *epc) 349{ 350 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 351 SOCK_LOCK(epc->so); 352 epc->so->so_upcall = NULL; 353 epc->so->so_upcallarg = NULL; 354 epc->so->so_rcv.sb_flags &= ~SB_UPCALL; 355 SOCK_UNLOCK(epc->so); 356 soshutdown(epc->so, SHUT_WR|SHUT_RD); 357 epc->so = NULL; 358} 359 360static void 361shutdown_socket(struct iwch_ep_common *epc) 362{ 363 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 364 soshutdown(epc->so, SHUT_WR); 365} 366 367static void 368abort_socket(struct iwch_ep *ep) 369{ 370 struct sockopt sopt; 371 int err; 372 struct linger l; 373 374 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 375 l.l_onoff = 1; 376 l.l_linger = 0; 377 378 /* linger_time of 0 forces RST to be sent */ 379 sopt.sopt_dir = SOPT_SET; 380 sopt.sopt_level = SOL_SOCKET; 381 sopt.sopt_name = SO_LINGER; 382 sopt.sopt_val = (caddr_t)&l; 383 sopt.sopt_valsize = sizeof l; 384 sopt.sopt_td = NULL; 385 err = sosetopt(ep->com.so, &sopt); 386 if (err) 387 printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err); 388} 389 390static void 391send_mpa_req(struct iwch_ep *ep) 392{ 393 int mpalen; 394 struct mpa_message *mpa; 395 struct mbuf *m; 396 int err; 397 398 CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen); 399 400 mpalen = sizeof(*mpa) + ep->plen; 401 m = m_gethdr(mpalen, M_NOWAIT); 402 if (m == NULL) { 403 connect_reply_upcall(ep, -ENOMEM); 404 return; 405 } 406 mpa = mtod(m, struct mpa_message *); 407 m->m_len = mpalen; 408 m->m_pkthdr.len = mpalen; 409 memset(mpa, 0, sizeof(*mpa)); 410 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 411 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 412 (markers_enabled ? MPA_MARKERS : 0); 413 mpa->private_data_size = htons(ep->plen); 414 mpa->revision = mpa_rev; 415 if (ep->plen) 416 memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); 417 418 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 419 if (err) { 420 m_freem(m); 421 connect_reply_upcall(ep, -ENOMEM); 422 return; 423 } 424 425 start_ep_timer(ep); 426 state_set(&ep->com, MPA_REQ_SENT); 427 return; 428} 429 430static int 431send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) 432{ 433 int mpalen; 434 struct mpa_message *mpa; 435 struct mbuf *m; 436 int err; 437 438 CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen); 439 440 mpalen = sizeof(*mpa) + plen; 441 442 m = m_gethdr(mpalen, M_NOWAIT); 443 if (m == NULL) { 444 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 445 return (-ENOMEM); 446 } 447 mpa = mtod(m, struct mpa_message *); 448 m->m_len = mpalen; 449 m->m_pkthdr.len = mpalen; 450 memset(mpa, 0, sizeof(*mpa)); 451 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 452 mpa->flags = MPA_REJECT; 453 mpa->revision = mpa_rev; 454 mpa->private_data_size = htons(plen); 455 if (plen) 456 memcpy(mpa->private_data, pdata, plen); 457 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 458 PANIC_IF(err); 459 return 0; 460} 461 462static int 463send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) 464{ 465 int mpalen; 466 struct mpa_message *mpa; 467 struct mbuf *m; 468 469 CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen); 470 471 mpalen = sizeof(*mpa) + plen; 472 473 m = m_gethdr(mpalen, M_NOWAIT); 474 if (m == NULL) { 475 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 476 return (-ENOMEM); 477 } 478 mpa = mtod(m, struct mpa_message *); 479 m->m_len = mpalen; 480 m->m_pkthdr.len = mpalen; 481 memset(mpa, 0, sizeof(*mpa)); 482 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 483 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 484 (markers_enabled ? MPA_MARKERS : 0); 485 mpa->revision = mpa_rev; 486 mpa->private_data_size = htons(plen); 487 if (plen) 488 memcpy(mpa->private_data, pdata, plen); 489 490 state_set(&ep->com, MPA_REP_SENT); 491 return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 492 ep->com.thread); 493} 494 495static void 496close_complete_upcall(struct iwch_ep *ep) 497{ 498 struct iw_cm_event event; 499 500 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 501 memset(&event, 0, sizeof(event)); 502 event.event = IW_CM_EVENT_CLOSE; 503 if (ep->com.cm_id) { 504 CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d", 505 ep, ep->com.cm_id, ep->hwtid); 506 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 507 ep->com.cm_id->rem_ref(ep->com.cm_id); 508 ep->com.cm_id = NULL; 509 ep->com.qp = NULL; 510 } 511} 512 513static void 514abort_connection(struct iwch_ep *ep) 515{ 516 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 517 state_set(&ep->com, ABORTING); 518 abort_socket(ep); 519 close_socket(&ep->com); 520 close_complete_upcall(ep); 521 state_set(&ep->com, DEAD); 522 put_ep(&ep->com); 523} 524 525static void 526peer_close_upcall(struct iwch_ep *ep) 527{ 528 struct iw_cm_event event; 529 530 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 531 memset(&event, 0, sizeof(event)); 532 event.event = IW_CM_EVENT_DISCONNECT; 533 if (ep->com.cm_id) { 534 CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d", 535 ep, ep->com.cm_id, ep->hwtid); 536 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 537 } 538} 539 540static void 541peer_abort_upcall(struct iwch_ep *ep) 542{ 543 struct iw_cm_event event; 544 545 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 546 memset(&event, 0, sizeof(event)); 547 event.event = IW_CM_EVENT_CLOSE; 548 event.status = ECONNRESET; 549 if (ep->com.cm_id) { 550 CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep, 551 ep->com.cm_id, ep->hwtid); 552 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 553 ep->com.cm_id->rem_ref(ep->com.cm_id); 554 ep->com.cm_id = NULL; 555 ep->com.qp = NULL; 556 } 557} 558 559static void 560connect_reply_upcall(struct iwch_ep *ep, int status) 561{ 562 struct iw_cm_event event; 563 564 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status); 565 memset(&event, 0, sizeof(event)); 566 event.event = IW_CM_EVENT_CONNECT_REPLY; 567 event.status = status; 568 event.local_addr = ep->com.local_addr; 569 event.remote_addr = ep->com.remote_addr; 570 571 if ((status == 0) || (status == ECONNREFUSED)) { 572 event.private_data_len = ep->plen; 573 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 574 } 575 if (ep->com.cm_id) { 576 CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep, 577 ep->hwtid, status); 578 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 579 } 580 if (status < 0) { 581 ep->com.cm_id->rem_ref(ep->com.cm_id); 582 ep->com.cm_id = NULL; 583 ep->com.qp = NULL; 584 } 585} 586 587static void 588connect_request_upcall(struct iwch_ep *ep) 589{ 590 struct iw_cm_event event; 591 592 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 593 memset(&event, 0, sizeof(event)); 594 event.event = IW_CM_EVENT_CONNECT_REQUEST; 595 event.local_addr = ep->com.local_addr; 596 event.remote_addr = ep->com.remote_addr; 597 event.private_data_len = ep->plen; 598 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 599 event.provider_data = ep; 600 event.so = ep->com.so; 601 if (state_read(&ep->parent_ep->com) != DEAD) 602 ep->parent_ep->com.cm_id->event_handler( 603 ep->parent_ep->com.cm_id, 604 &event); 605 put_ep(&ep->parent_ep->com); 606 ep->parent_ep = NULL; 607} 608 609static void 610established_upcall(struct iwch_ep *ep) 611{ 612 struct iw_cm_event event; 613 614 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 615 memset(&event, 0, sizeof(event)); 616 event.event = IW_CM_EVENT_ESTABLISHED; 617 if (ep->com.cm_id) { 618 CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid); 619 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 620 } 621} 622 623static void 624process_mpa_reply(struct iwch_ep *ep) 625{ 626 struct mpa_message *mpa; 627 u16 plen; 628 struct iwch_qp_attributes attrs; 629 enum iwch_qp_attr_mask mask; 630 int err; 631 struct mbuf *top, *m; 632 int flags = MSG_DONTWAIT; 633 struct uio uio; 634 int len; 635 636 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 637 638 /* 639 * Stop mpa timer. If it expired, then the state has 640 * changed and we bail since ep_timeout already aborted 641 * the connection. 642 */ 643 stop_ep_timer(ep); 644 if (state_read(&ep->com) != MPA_REQ_SENT) 645 return; 646 647 uio.uio_resid = len = 1000000; 648 uio.uio_td = ep->com.thread; 649 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 650 if (err) { 651 if (err == EWOULDBLOCK) { 652 start_ep_timer(ep); 653 return; 654 } 655 err = -err; 656 goto err; 657 } 658 659 if (ep->com.so->so_rcv.sb_mb) { 660 printf("%s data after soreceive called! so %p sb_mb %p top %p\n", 661 __FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); 662 } 663 664 m = top; 665 do { 666 /* 667 * If we get more than the supported amount of private data 668 * then we must fail this connection. 669 */ 670 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 671 err = (-EINVAL); 672 goto err; 673 } 674 675 /* 676 * copy the new data into our accumulation buffer. 677 */ 678 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 679 ep->mpa_pkt_len += m->m_len; 680 if (!m->m_next) 681 m = m->m_nextpkt; 682 else 683 m = m->m_next; 684 } while (m); 685 686 m_freem(top); 687 688 /* 689 * if we don't even have the mpa message, then bail. 690 */ 691 if (ep->mpa_pkt_len < sizeof(*mpa)) 692 return; 693 mpa = (struct mpa_message *)ep->mpa_pkt; 694 695 /* Validate MPA header. */ 696 if (mpa->revision != mpa_rev) { 697 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 698 err = EPROTO; 699 goto err; 700 } 701 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 702 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 703 err = EPROTO; 704 goto err; 705 } 706 707 plen = ntohs(mpa->private_data_size); 708 709 /* 710 * Fail if there's too much private data. 711 */ 712 if (plen > MPA_MAX_PRIVATE_DATA) { 713 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 714 err = EPROTO; 715 goto err; 716 } 717 718 /* 719 * If plen does not account for pkt size 720 */ 721 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 722 CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len); 723 err = EPROTO; 724 goto err; 725 } 726 727 ep->plen = (u8) plen; 728 729 /* 730 * If we don't have all the pdata yet, then bail. 731 * We'll continue process when more data arrives. 732 */ 733 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 734 return; 735 736 if (mpa->flags & MPA_REJECT) { 737 err = ECONNREFUSED; 738 goto err; 739 } 740 741 /* 742 * If we get here we have accumulated the entire mpa 743 * start reply message including private data. And 744 * the MPA header is valid. 745 */ 746 CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__); 747 state_set(&ep->com, FPDU_MODE); 748 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 749 ep->mpa_attr.recv_marker_enabled = markers_enabled; 750 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 751 ep->mpa_attr.version = mpa_rev; 752 if (set_tcpinfo(ep)) { 753 printf("%s set_tcpinfo error\n", __FUNCTION__); 754 goto err; 755 } 756 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 757 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 758 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 759 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 760 761 attrs.mpa_attr = ep->mpa_attr; 762 attrs.max_ird = ep->ird; 763 attrs.max_ord = ep->ord; 764 attrs.llp_stream_handle = ep; 765 attrs.next_state = IWCH_QP_STATE_RTS; 766 767 mask = IWCH_QP_ATTR_NEXT_STATE | 768 IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | 769 IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; 770 771 /* bind QP and TID with INIT_WR */ 772 err = iwch_modify_qp(ep->com.qp->rhp, 773 ep->com.qp, mask, &attrs, 1); 774 if (!err) 775 goto out; 776err: 777 abort_connection(ep); 778out: 779 connect_reply_upcall(ep, err); 780 return; 781} 782 783static void 784process_mpa_request(struct iwch_ep *ep) 785{ 786 struct mpa_message *mpa; 787 u16 plen; 788 int flags = MSG_DONTWAIT; 789 struct mbuf *top, *m; 790 int err; 791 struct uio uio; 792 int len; 793 794 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 795 796 /* 797 * Stop mpa timer. If it expired, then the state has 798 * changed and we bail since ep_timeout already aborted 799 * the connection. 800 */ 801 stop_ep_timer(ep); 802 if (state_read(&ep->com) != MPA_REQ_WAIT) 803 return; 804 805 uio.uio_resid = len = 1000000; 806 uio.uio_td = ep->com.thread; 807 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 808 if (err) { 809 if (err == EWOULDBLOCK) { 810 start_ep_timer(ep); 811 return; 812 } 813 err = -err; 814 goto err; 815 } 816 817 m = top; 818 do { 819 820 /* 821 * If we get more than the supported amount of private data 822 * then we must fail this connection. 823 */ 824 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 825 CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__, 826 ep->mpa_pkt_len + m->m_len); 827 goto err; 828 } 829 830 831 /* 832 * Copy the new data into our accumulation buffer. 833 */ 834 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 835 ep->mpa_pkt_len += m->m_len; 836 837 if (!m->m_next) 838 m = m->m_nextpkt; 839 else 840 m = m->m_next; 841 } while (m); 842 843 m_freem(top); 844 845 /* 846 * If we don't even have the mpa message, then bail. 847 * We'll continue process when more data arrives. 848 */ 849 if (ep->mpa_pkt_len < sizeof(*mpa)) { 850 start_ep_timer(ep); 851 CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__, 852 ep->mpa_pkt_len); 853 return; 854 } 855 mpa = (struct mpa_message *) ep->mpa_pkt; 856 857 /* 858 * Validate MPA Header. 859 */ 860 if (mpa->revision != mpa_rev) { 861 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 862 goto err; 863 } 864 865 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { 866 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 867 goto err; 868 } 869 870 plen = ntohs(mpa->private_data_size); 871 872 /* 873 * Fail if there's too much private data. 874 */ 875 if (plen > MPA_MAX_PRIVATE_DATA) { 876 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 877 goto err; 878 } 879 880 /* 881 * If plen does not account for pkt size 882 */ 883 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 884 CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__, 885 ep->mpa_pkt_len); 886 goto err; 887 } 888 ep->plen = (u8) plen; 889 890 /* 891 * If we don't have all the pdata yet, then bail. 892 */ 893 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { 894 start_ep_timer(ep); 895 CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__, 896 ep->mpa_pkt_len); 897 return; 898 } 899 900 /* 901 * If we get here we have accumulated the entire mpa 902 * start reply message including private data. 903 */ 904 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 905 ep->mpa_attr.recv_marker_enabled = markers_enabled; 906 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 907 ep->mpa_attr.version = mpa_rev; 908 if (set_tcpinfo(ep)) { 909 printf("%s set_tcpinfo error\n", __FUNCTION__); 910 goto err; 911 } 912 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 913 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 914 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 915 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 916 917 state_set(&ep->com, MPA_REQ_RCVD); 918 919 /* drive upcall */ 920 connect_request_upcall(ep); 921 return; 922err: 923 abort_connection(ep); 924 return; 925} 926 927static void 928process_peer_close(struct iwch_ep *ep) 929{ 930 struct iwch_qp_attributes attrs; 931 int disconnect = 1; 932 int release = 0; 933 934 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 935 936 mtx_lock(&ep->com.lock); 937 switch (ep->com.state) { 938 case MPA_REQ_WAIT: 939 __state_set(&ep->com, CLOSING); 940 break; 941 case MPA_REQ_SENT: 942 __state_set(&ep->com, CLOSING); 943 connect_reply_upcall(ep, -ECONNRESET); 944 break; 945 case MPA_REQ_RCVD: 946 947 /* 948 * We're gonna mark this puppy DEAD, but keep 949 * the reference on it until the ULP accepts or 950 * rejects the CR. 951 */ 952 __state_set(&ep->com, CLOSING); 953 get_ep(&ep->com); 954 break; 955 case MPA_REP_SENT: 956 __state_set(&ep->com, CLOSING); 957 break; 958 case FPDU_MODE: 959 start_ep_timer(ep); 960 __state_set(&ep->com, CLOSING); 961 attrs.next_state = IWCH_QP_STATE_CLOSING; 962 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 963 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 964 peer_close_upcall(ep); 965 break; 966 case ABORTING: 967 disconnect = 0; 968 break; 969 case CLOSING: 970 __state_set(&ep->com, MORIBUND); 971 disconnect = 0; 972 break; 973 case MORIBUND: 974 stop_ep_timer(ep); 975 if (ep->com.cm_id && ep->com.qp) { 976 attrs.next_state = IWCH_QP_STATE_IDLE; 977 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 978 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 979 } 980 close_socket(&ep->com); 981 close_complete_upcall(ep); 982 __state_set(&ep->com, DEAD); 983 release = 1; 984 disconnect = 0; 985 break; 986 case DEAD: 987 disconnect = 0; 988 break; 989 default: 990 PANIC_IF(1); 991 } 992 mtx_unlock(&ep->com.lock); 993 if (disconnect) 994 iwch_ep_disconnect(ep, 0, M_NOWAIT); 995 if (release) 996 put_ep(&ep->com); 997 return; 998} 999 1000static void 1001process_conn_error(struct iwch_ep *ep) 1002{ 1003 struct iwch_qp_attributes attrs; 1004 int ret; 1005 int state; 1006 1007 state = state_read(&ep->com); 1008 CTR5(KTR_IW_CXGB, "%s ep %p so %p so->so_error %u state %s", __FUNCTION__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]); 1009 switch (state) { 1010 case MPA_REQ_WAIT: 1011 stop_ep_timer(ep); 1012 break; 1013 case MPA_REQ_SENT: 1014 stop_ep_timer(ep); 1015 connect_reply_upcall(ep, -ECONNRESET); 1016 break; 1017 case MPA_REP_SENT: 1018 ep->com.rpl_err = ECONNRESET; 1019 CTR1(KTR_IW_CXGB, "waking up ep %p", ep); 1020 break; 1021 case MPA_REQ_RCVD: 1022 1023 /* 1024 * We're gonna mark this puppy DEAD, but keep 1025 * the reference on it until the ULP accepts or 1026 * rejects the CR. 1027 */ 1028 get_ep(&ep->com); 1029 break; 1030 case MORIBUND: 1031 case CLOSING: 1032 stop_ep_timer(ep); 1033 /*FALLTHROUGH*/ 1034 case FPDU_MODE: 1035 if (ep->com.cm_id && ep->com.qp) { 1036 attrs.next_state = IWCH_QP_STATE_ERROR; 1037 ret = iwch_modify_qp(ep->com.qp->rhp, 1038 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1039 &attrs, 1); 1040 if (ret) 1041 log(LOG_ERR, 1042 "%s - qp <- error failed!\n", 1043 __FUNCTION__); 1044 } 1045 peer_abort_upcall(ep); 1046 break; 1047 case ABORTING: 1048 break; 1049 case DEAD: 1050 CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__, 1051 ep->com.so->so_error); 1052 return; 1053 default: 1054 PANIC_IF(1); 1055 break; 1056 } 1057 1058 if (state != ABORTING) { 1059 close_socket(&ep->com); 1060 state_set(&ep->com, DEAD); 1061 put_ep(&ep->com); 1062 } 1063 return; 1064} 1065 1066static void 1067process_close_complete(struct iwch_ep *ep) 1068{ 1069 struct iwch_qp_attributes attrs; 1070 int release = 0; 1071 1072 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1073 PANIC_IF(!ep); 1074 1075 /* The cm_id may be null if we failed to connect */ 1076 mtx_lock(&ep->com.lock); 1077 switch (ep->com.state) { 1078 case CLOSING: 1079 __state_set(&ep->com, MORIBUND); 1080 break; 1081 case MORIBUND: 1082 stop_ep_timer(ep); 1083 if ((ep->com.cm_id) && (ep->com.qp)) { 1084 attrs.next_state = IWCH_QP_STATE_IDLE; 1085 iwch_modify_qp(ep->com.qp->rhp, 1086 ep->com.qp, 1087 IWCH_QP_ATTR_NEXT_STATE, 1088 &attrs, 1); 1089 } 1090 close_socket(&ep->com); 1091 close_complete_upcall(ep); 1092 __state_set(&ep->com, DEAD); 1093 release = 1; 1094 break; 1095 case ABORTING: 1096 break; 1097 case DEAD: 1098 default: 1099 PANIC_IF(1); 1100 break; 1101 } 1102 mtx_unlock(&ep->com.lock); 1103 if (release) 1104 put_ep(&ep->com); 1105 return; 1106} 1107 1108/* 1109 * T3A does 3 things when a TERM is received: 1110 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet 1111 * 2) generate an async event on the QP with the TERMINATE opcode 1112 * 3) post a TERMINATE opcde cqe into the associated CQ. 1113 * 1114 * For (1), we save the message in the qp for later consumer consumption. 1115 * For (2), we move the QP into TERMINATE, post a QP event and disconnect. 1116 * For (3), we toss the CQE in cxio_poll_cq(). 1117 * 1118 * terminate() handles case (1)... 1119 */ 1120static int 1121terminate(struct t3cdev *tdev, struct mbuf *m, void *ctx) 1122{ 1123 struct toepcb *toep = (struct toepcb *)ctx; 1124 struct socket *so = toeptoso(toep); 1125 struct iwch_ep *ep = so->so_upcallarg; 1126 1127 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1128 m_adj(m, sizeof(struct cpl_rdma_terminate)); 1129 CTR2(KTR_IW_CXGB, "%s saving %d bytes of term msg", __FUNCTION__, m->m_len); 1130 m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer); 1131 ep->com.qp->attr.terminate_msg_len = m->m_len; 1132 ep->com.qp->attr.is_terminate_local = 0; 1133 return CPL_RET_BUF_DONE; 1134} 1135 1136static int 1137ec_status(struct t3cdev *tdev, struct mbuf *m, void *ctx) 1138{ 1139 struct toepcb *toep = (struct toepcb *)ctx; 1140 struct socket *so = toeptoso(toep); 1141 struct cpl_rdma_ec_status *rep = cplhdr(m); 1142 struct iwch_ep *ep; 1143 struct iwch_qp_attributes attrs; 1144 int release = 0; 1145 1146 ep = so->so_upcallarg; 1147 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s ec_status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], rep->status); 1148 if (!so || !ep) { 1149 panic("bogosity ep %p state %d, so %p state %x\n", ep, ep ? ep->com.state : -1, so, so ? so->so_state : -1); 1150 } 1151 mtx_lock(&ep->com.lock); 1152 switch (ep->com.state) { 1153 case CLOSING: 1154 if (!rep->status) 1155 __state_set(&ep->com, MORIBUND); 1156 else 1157 __state_set(&ep->com, ABORTING); 1158 break; 1159 case MORIBUND: 1160 stop_ep_timer(ep); 1161 if (!rep->status) { 1162 if ((ep->com.cm_id) && (ep->com.qp)) { 1163 attrs.next_state = IWCH_QP_STATE_IDLE; 1164 iwch_modify_qp(ep->com.qp->rhp, 1165 ep->com.qp, 1166 IWCH_QP_ATTR_NEXT_STATE, 1167 &attrs, 1); 1168 } 1169 close_socket(&ep->com); 1170 close_complete_upcall(ep); 1171 __state_set(&ep->com, DEAD); 1172 release = 1; 1173 } 1174 break; 1175 case DEAD: 1176 break; 1177 default: 1178 panic("unknown state: %d\n", ep->com.state); 1179 } 1180 mtx_unlock(&ep->com.lock); 1181 if (rep->status) { 1182 log(LOG_ERR, "%s BAD CLOSE - Aborting tid %u\n", 1183 __FUNCTION__, ep->hwtid); 1184 attrs.next_state = IWCH_QP_STATE_ERROR; 1185 iwch_modify_qp(ep->com.qp->rhp, 1186 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1187 &attrs, 1); 1188 } 1189 if (release) 1190 put_ep(&ep->com); 1191 return CPL_RET_BUF_DONE; 1192} 1193 1194static void 1195ep_timeout(void *arg) 1196{ 1197 struct iwch_ep *ep = (struct iwch_ep *)arg; 1198 struct iwch_qp_attributes attrs; 1199 int err = 0; 1200 1201 mtx_lock(&ep->com.lock); 1202 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1203 switch (ep->com.state) { 1204 case MPA_REQ_SENT: 1205 connect_reply_upcall(ep, -ETIMEDOUT); 1206 break; 1207 case MPA_REQ_WAIT: 1208 break; 1209 case CLOSING: 1210 case MORIBUND: 1211 if (ep->com.cm_id && ep->com.qp) 1212 err = 1; 1213 break; 1214 default: 1215 panic("unknown state: %d\n", ep->com.state); 1216 } 1217 __state_set(&ep->com, ABORTING); 1218 mtx_unlock(&ep->com.lock); 1219 if (err){ 1220 attrs.next_state = IWCH_QP_STATE_ERROR; 1221 iwch_modify_qp(ep->com.qp->rhp, 1222 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1223 &attrs, 1); 1224 } 1225 abort_connection(ep); 1226 put_ep(&ep->com); 1227} 1228 1229int 1230iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 1231{ 1232 int err; 1233 struct iwch_ep *ep = to_ep(cm_id); 1234 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1235 1236 if (state_read(&ep->com) == DEAD) { 1237 put_ep(&ep->com); 1238 return (-ECONNRESET); 1239 } 1240 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1241 if (mpa_rev == 0) { 1242 abort_connection(ep); 1243 } else { 1244 err = send_mpa_reject(ep, pdata, pdata_len); 1245 err = soshutdown(ep->com.so, 3); 1246 } 1247 return 0; 1248} 1249 1250int 1251iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1252{ 1253 int err; 1254 struct iwch_qp_attributes attrs; 1255 enum iwch_qp_attr_mask mask; 1256 struct iwch_ep *ep = to_ep(cm_id); 1257 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1258 struct iwch_qp *qp = get_qhp(h, conn_param->qpn); 1259 1260 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1261 if (state_read(&ep->com) == DEAD) 1262 return (-ECONNRESET); 1263 1264 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1265 PANIC_IF(!qp); 1266 1267 if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || 1268 (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { 1269 abort_connection(ep); 1270 return (-EINVAL); 1271 } 1272 1273 cm_id->add_ref(cm_id); 1274 ep->com.cm_id = cm_id; 1275 ep->com.qp = qp; 1276 1277 ep->com.rpl_err = 0; 1278 ep->com.rpl_done = 0; 1279 ep->ird = conn_param->ird; 1280 ep->ord = conn_param->ord; 1281 CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord); 1282 get_ep(&ep->com); 1283 1284 /* bind QP to EP and move to RTS */ 1285 attrs.mpa_attr = ep->mpa_attr; 1286 attrs.max_ird = ep->ord; 1287 attrs.max_ord = ep->ord; 1288 attrs.llp_stream_handle = ep; 1289 attrs.next_state = IWCH_QP_STATE_RTS; 1290 1291 /* bind QP and TID with INIT_WR */ 1292 mask = IWCH_QP_ATTR_NEXT_STATE | 1293 IWCH_QP_ATTR_LLP_STREAM_HANDLE | 1294 IWCH_QP_ATTR_MPA_ATTR | 1295 IWCH_QP_ATTR_MAX_IRD | 1296 IWCH_QP_ATTR_MAX_ORD; 1297 1298 err = iwch_modify_qp(ep->com.qp->rhp, 1299 ep->com.qp, mask, &attrs, 1); 1300 1301 if (err) 1302 goto err; 1303 1304 err = send_mpa_reply(ep, conn_param->private_data, 1305 conn_param->private_data_len); 1306 if (err) 1307 goto err; 1308 state_set(&ep->com, FPDU_MODE); 1309 established_upcall(ep); 1310 put_ep(&ep->com); 1311 return 0; 1312err: 1313 ep->com.cm_id = NULL; 1314 ep->com.qp = NULL; 1315 cm_id->rem_ref(cm_id); 1316 put_ep(&ep->com); 1317 return err; 1318} 1319 1320static int init_sock(struct iwch_ep_common *epc) 1321{ 1322 int err; 1323 struct sockopt sopt; 1324 int on=1; 1325 1326 epc->so->so_upcall = iwch_so_upcall; 1327 epc->so->so_upcallarg = epc; 1328 epc->so->so_rcv.sb_flags |= SB_UPCALL; 1329 epc->so->so_state |= SS_NBIO; 1330 sopt.sopt_dir = SOPT_SET; 1331 sopt.sopt_level = SOL_SOCKET; 1332 sopt.sopt_name = SO_NO_DDP; 1333 sopt.sopt_val = (caddr_t)&on; 1334 sopt.sopt_valsize = sizeof on; 1335 sopt.sopt_td = NULL; 1336 err = sosetopt(epc->so, &sopt); 1337 if (err) 1338 printf("%s can't set SO_NO_DDP err %d\n", __FUNCTION__, err); 1339 sopt.sopt_dir = SOPT_SET; 1340 sopt.sopt_level = IPPROTO_TCP; 1341 sopt.sopt_name = TCP_NODELAY; 1342 sopt.sopt_val = (caddr_t)&on; 1343 sopt.sopt_valsize = sizeof on; 1344 sopt.sopt_td = NULL; 1345 err = sosetopt(epc->so, &sopt); 1346 if (err) 1347 printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err); 1348 1349 return 0; 1350} 1351 1352static int 1353is_loopback_dst(struct iw_cm_id *cm_id) 1354{ 1355 uint16_t port = cm_id->remote_addr.sin_port; 1356 struct ifaddr *ifa; 1357 1358 cm_id->remote_addr.sin_port = 0; 1359 ifa = ifa_ifwithaddr((struct sockaddr *)&cm_id->remote_addr); 1360 cm_id->remote_addr.sin_port = port; 1361 return (ifa != NULL); 1362} 1363 1364int 1365iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1366{ 1367 int err = 0; 1368 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1369 struct iwch_ep *ep; 1370 struct rtentry *rt; 1371 struct toedev *tdev; 1372 1373 if (is_loopback_dst(cm_id)) { 1374 err = -ENOSYS; 1375 goto out; 1376 } 1377 1378 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1379 if (!ep) { 1380 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1381 err = (-ENOMEM); 1382 goto out; 1383 } 1384 callout_init(&ep->timer, TRUE); 1385 ep->plen = conn_param->private_data_len; 1386 if (ep->plen) 1387 memcpy(ep->mpa_pkt + sizeof(struct mpa_message), 1388 conn_param->private_data, ep->plen); 1389 ep->ird = conn_param->ird; 1390 ep->ord = conn_param->ord; 1391 1392 cm_id->add_ref(cm_id); 1393 ep->com.cm_id = cm_id; 1394 ep->com.qp = get_qhp(h, conn_param->qpn); 1395 ep->com.thread = curthread; 1396 PANIC_IF(!ep->com.qp); 1397 CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn, 1398 ep->com.qp, cm_id); 1399 1400 ep->com.so = cm_id->so; 1401 err = init_sock(&ep->com); 1402 if (err) 1403 goto fail2; 1404 1405 /* find a route */ 1406 rt = find_route(cm_id->local_addr.sin_addr.s_addr, 1407 cm_id->remote_addr.sin_addr.s_addr, 1408 cm_id->local_addr.sin_port, 1409 cm_id->remote_addr.sin_port, IPTOS_LOWDELAY); 1410 if (!rt) { 1411 printf("%s - cannot find route.\n", __FUNCTION__); 1412 err = EHOSTUNREACH; 1413 goto fail2; 1414 } 1415 1416 if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) { 1417 printf("%s - interface not TOE capable.\n", __FUNCTION__); 1418 goto fail3; 1419 } 1420 tdev = TOEDEV(rt->rt_ifp); 1421 if (tdev == NULL) { 1422 printf("%s - No toedev for interface.\n", __FUNCTION__); 1423 goto fail3; 1424 } 1425 if (!tdev->tod_can_offload(tdev, ep->com.so)) { 1426 printf("%s - interface cannot offload!.\n", __FUNCTION__); 1427 goto fail3; 1428 } 1429 RTFREE(rt); 1430 1431 state_set(&ep->com, CONNECTING); 1432 ep->com.local_addr = cm_id->local_addr; 1433 ep->com.remote_addr = cm_id->remote_addr; 1434 err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, 1435 ep->com.thread); 1436 if (!err) 1437 goto out; 1438fail3: 1439 RTFREE(ep->dst); 1440fail2: 1441 put_ep(&ep->com); 1442out: 1443 return err; 1444} 1445 1446int 1447iwch_create_listen(struct iw_cm_id *cm_id, int backlog) 1448{ 1449 int err = 0; 1450 struct iwch_listen_ep *ep; 1451 1452 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1453 if (!ep) { 1454 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1455 err = ENOMEM; 1456 goto out; 1457 } 1458 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1459 cm_id->add_ref(cm_id); 1460 ep->com.cm_id = cm_id; 1461 ep->backlog = backlog; 1462 ep->com.local_addr = cm_id->local_addr; 1463 ep->com.thread = curthread; 1464 state_set(&ep->com, LISTEN); 1465 1466 ep->com.so = cm_id->so; 1467 err = init_sock(&ep->com); 1468 if (err) 1469 goto fail; 1470 1471 err = solisten(ep->com.so, ep->backlog, ep->com.thread); 1472 if (!err) { 1473 cm_id->provider_data = ep; 1474 goto out; 1475 } 1476 close_socket(&ep->com); 1477fail: 1478 cm_id->rem_ref(cm_id); 1479 put_ep(&ep->com); 1480out: 1481 return err; 1482} 1483 1484int 1485iwch_destroy_listen(struct iw_cm_id *cm_id) 1486{ 1487 struct iwch_listen_ep *ep = to_listen_ep(cm_id); 1488 1489 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1490 1491 state_set(&ep->com, DEAD); 1492 close_socket(&ep->com); 1493 cm_id->rem_ref(cm_id); 1494 put_ep(&ep->com); 1495 return 0; 1496} 1497 1498int 1499iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags) 1500{ 1501 int close = 0; 1502 1503 mtx_lock(&ep->com.lock); 1504 1505 PANIC_IF(!ep); 1506 PANIC_IF(!ep->com.so); 1507 1508 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep, 1509 ep->com.so, states[ep->com.state], abrupt); 1510 1511 if (ep->com.state == DEAD) { 1512 CTR2(KTR_IW_CXGB, "%s already dead ep %p", __FUNCTION__, ep); 1513 goto out; 1514 } 1515 1516 if (abrupt) { 1517 if (ep->com.state != ABORTING) { 1518 ep->com.state = ABORTING; 1519 close = 1; 1520 } 1521 goto out; 1522 } 1523 1524 switch (ep->com.state) { 1525 case MPA_REQ_WAIT: 1526 case MPA_REQ_SENT: 1527 case MPA_REQ_RCVD: 1528 case MPA_REP_SENT: 1529 case FPDU_MODE: 1530 start_ep_timer(ep); 1531 ep->com.state = CLOSING; 1532 close = 1; 1533 break; 1534 case CLOSING: 1535 ep->com.state = MORIBUND; 1536 close = 1; 1537 break; 1538 case MORIBUND: 1539 case ABORTING: 1540 break; 1541 default: 1542 panic("unknown state: %d\n", ep->com.state); 1543 break; 1544 } 1545out: 1546 mtx_unlock(&ep->com.lock); 1547 if (close) { 1548 if (abrupt) 1549 abort_connection(ep); 1550 else 1551 shutdown_socket(&ep->com); 1552 } 1553 return 0; 1554} 1555 1556static void 1557process_data(struct iwch_ep *ep) 1558{ 1559 struct sockaddr_in *local, *remote; 1560 1561 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1562 1563 switch (state_read(&ep->com)) { 1564 case MPA_REQ_SENT: 1565 process_mpa_reply(ep); 1566 break; 1567 case MPA_REQ_WAIT: 1568 1569 /* 1570 * XXX 1571 * Set local and remote addrs here because when we 1572 * dequeue the newly accepted socket, they aren't set 1573 * yet in the pcb! 1574 */ 1575 in_getsockaddr(ep->com.so, (struct sockaddr **)&local); 1576 in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); 1577 CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__, 1578 inet_ntoa(local->sin_addr), 1579 inet_ntoa(remote->sin_addr)); 1580 ep->com.local_addr = *local; 1581 ep->com.remote_addr = *remote; 1582 free(local, M_SONAME); 1583 free(remote, M_SONAME); 1584 process_mpa_request(ep); 1585 break; 1586 default: 1587 if (ep->com.so->so_rcv.sb_cc) 1588 printf("%s Unexpected streaming data." 1589 " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n", 1590 __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state, 1591 ep->com.so->so_rcv.sb_cc, ep->com.so->so_rcv.sb_mb); 1592 break; 1593 } 1594 return; 1595} 1596 1597static void 1598process_connected(struct iwch_ep *ep) 1599{ 1600 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1601 if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) { 1602 send_mpa_req(ep); 1603 } else { 1604 connect_reply_upcall(ep, -ep->com.so->so_error); 1605 close_socket(&ep->com); 1606 state_set(&ep->com, DEAD); 1607 put_ep(&ep->com); 1608 } 1609} 1610 1611static struct socket * 1612dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep) 1613{ 1614 struct socket *so; 1615 1616 ACCEPT_LOCK(); 1617 so = TAILQ_FIRST(&head->so_comp); 1618 if (!so) { 1619 ACCEPT_UNLOCK(); 1620 return NULL; 1621 } 1622 TAILQ_REMOVE(&head->so_comp, so, so_list); 1623 head->so_qlen--; 1624 SOCK_LOCK(so); 1625 so->so_qstate &= ~SQ_COMP; 1626 so->so_head = NULL; 1627 soref(so); 1628 so->so_rcv.sb_flags |= SB_UPCALL; 1629 so->so_state |= SS_NBIO; 1630 so->so_upcall = iwch_so_upcall; 1631 so->so_upcallarg = child_ep; 1632 PANIC_IF(!(so->so_state & SS_ISCONNECTED)); 1633 PANIC_IF(so->so_error); 1634 SOCK_UNLOCK(so); 1635 ACCEPT_UNLOCK(); 1636 soaccept(so, (struct sockaddr **)remote); 1637 return so; 1638} 1639 1640static void 1641process_newconn(struct iwch_ep *parent_ep) 1642{ 1643 struct socket *child_so; 1644 struct iwch_ep *child_ep; 1645 struct sockaddr_in *remote; 1646 1647 CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so); 1648 child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); 1649 if (!child_ep) { 1650 log(LOG_ERR, "%s - failed to allocate ep entry!\n", 1651 __FUNCTION__); 1652 return; 1653 } 1654 child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); 1655 if (!child_so) { 1656 log(LOG_ERR, "%s - failed to dequeue child socket!\n", 1657 __FUNCTION__); 1658 __free_ep(&child_ep->com); 1659 return; 1660 } 1661 CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, 1662 inet_ntoa(remote->sin_addr), ntohs(remote->sin_port)); 1663 child_ep->com.so = child_so; 1664 child_ep->com.cm_id = NULL; 1665 child_ep->com.thread = parent_ep->com.thread; 1666 child_ep->parent_ep = parent_ep; 1667 free(remote, M_SONAME); 1668 get_ep(&parent_ep->com); 1669 child_ep->parent_ep = parent_ep; 1670 callout_init(&child_ep->timer, TRUE); 1671 state_set(&child_ep->com, MPA_REQ_WAIT); 1672 start_ep_timer(child_ep); 1673 1674 /* maybe the request has already been queued up on the socket... */ 1675 process_mpa_request(child_ep); 1676} 1677 1678static void 1679iwch_so_upcall(struct socket *so, void *arg, int waitflag) 1680{ 1681 struct iwch_ep *ep = arg; 1682 1683 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1684 mtx_lock(&req_lock); 1685 if (ep && ep->com.so && !ep->com.entry.tqe_prev) { 1686 get_ep(&ep->com); 1687 TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); 1688 taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task); 1689 } 1690 mtx_unlock(&req_lock); 1691} 1692 1693static void 1694process_socket_event(struct iwch_ep *ep) 1695{ 1696 int state = state_read(&ep->com); 1697 struct socket *so = ep->com.so; 1698 1699 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1700 if (state == CONNECTING) { 1701 process_connected(ep); 1702 return; 1703 } 1704 1705 if (state == LISTEN) { 1706 process_newconn(ep); 1707 return; 1708 } 1709 1710 /* connection error */ 1711 if (so->so_error) { 1712 process_conn_error(ep); 1713 return; 1714 } 1715 1716 /* peer close */ 1717 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { 1718 process_peer_close(ep); 1719 return; 1720 } 1721 1722 /* close complete */ 1723 if (so->so_state & (SS_ISDISCONNECTED)) { 1724 process_close_complete(ep); 1725 return; 1726 } 1727 1728 /* rx data */ 1729 process_data(ep); 1730 return; 1731} 1732 1733static void 1734process_req(void *ctx, int pending) 1735{ 1736 struct iwch_ep_common *epc; 1737 1738 CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__); 1739 mtx_lock(&req_lock); 1740 while (!TAILQ_EMPTY(&req_list)) { 1741 epc = TAILQ_FIRST(&req_list); 1742 TAILQ_REMOVE(&req_list, epc, entry); 1743 epc->entry.tqe_prev = NULL; 1744 mtx_unlock(&req_lock); 1745 if (epc->so) 1746 process_socket_event((struct iwch_ep *)epc); 1747 put_ep(epc); 1748 mtx_lock(&req_lock); 1749 } 1750 mtx_unlock(&req_lock); 1751} 1752 1753int 1754iwch_cm_init(void) 1755{ 1756 TAILQ_INIT(&req_list); 1757 mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF); 1758 iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT, 1759 taskqueue_thread_enqueue, &iw_cxgb_taskq); 1760 if (iw_cxgb_taskq == NULL) { 1761 printf("failed to allocate iw_cxgb taskqueue\n"); 1762 return (ENOMEM); 1763 } 1764 taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq"); 1765 TASK_INIT(&iw_cxgb_task, 0, process_req, NULL); 1766 t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, terminate); 1767 t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, ec_status); 1768 return 0; 1769} 1770 1771void 1772iwch_cm_term(void) 1773{ 1774 t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, NULL); 1775 t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, NULL); 1776 taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task); 1777 taskqueue_free(iw_cxgb_taskq); 1778} 1779 1780