iw_cxgb_cm.c revision 193272
1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c 193272 2009-06-01 21:17:03Z jhb $"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/kernel.h> 35#include <sys/bus.h> 36#include <sys/module.h> 37#include <sys/pciio.h> 38#include <sys/conf.h> 39#include <machine/bus.h> 40#include <machine/resource.h> 41#include <sys/bus_dma.h> 42#include <sys/rman.h> 43#include <sys/ioccom.h> 44#include <sys/mbuf.h> 45#include <sys/rwlock.h> 46#include <sys/linker.h> 47#include <sys/firmware.h> 48#include <sys/socket.h> 49#include <sys/socketvar.h> 50#include <sys/sockio.h> 51#include <sys/smp.h> 52#include <sys/sysctl.h> 53#include <sys/syslog.h> 54#include <sys/queue.h> 55#include <sys/taskqueue.h> 56#include <sys/proc.h> 57#include <sys/uio.h> 58 59#include <net/route.h> 60#include <netinet/in_systm.h> 61#include <netinet/in.h> 62#include <netinet/in_pcb.h> 63#include <netinet/ip.h> 64#include <netinet/ip_var.h> 65#include <netinet/tcp_var.h> 66#include <netinet/tcp.h> 67#include <netinet/tcpip.h> 68 69#include <contrib/rdma/ib_verbs.h> 70 71#include <cxgb_include.h> 72#include <ulp/tom/cxgb_tom.h> 73#include <ulp/tom/cxgb_t3_ddp.h> 74#include <ulp/tom/cxgb_defs.h> 75#include <ulp/tom/cxgb_toepcb.h> 76#include <ulp/iw_cxgb/iw_cxgb_wr.h> 77#include <ulp/iw_cxgb/iw_cxgb_hal.h> 78#include <ulp/iw_cxgb/iw_cxgb_provider.h> 79#include <ulp/iw_cxgb/iw_cxgb_cm.h> 80#include <ulp/iw_cxgb/iw_cxgb.h> 81 82#ifdef KTR 83static char *states[] = { 84 "idle", 85 "listen", 86 "connecting", 87 "mpa_wait_req", 88 "mpa_req_sent", 89 "mpa_req_rcvd", 90 "mpa_rep_sent", 91 "fpdu_mode", 92 "aborting", 93 "closing", 94 "moribund", 95 "dead", 96 NULL, 97}; 98#endif 99 100SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters"); 101 102static int ep_timeout_secs = 10; 103TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs); 104SYSCTL_UINT(_hw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RDTUN, &ep_timeout_secs, 0, 105 "CM Endpoint operation timeout in seconds (default=10)"); 106 107static int mpa_rev = 1; 108TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev); 109SYSCTL_UINT(_hw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RDTUN, &mpa_rev, 0, 110 "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)"); 111 112static int markers_enabled = 0; 113TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled); 114SYSCTL_UINT(_hw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RDTUN, &markers_enabled, 0, 115 "Enable MPA MARKERS (default(0)=disabled)"); 116 117static int crc_enabled = 1; 118TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled); 119SYSCTL_UINT(_hw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RDTUN, &crc_enabled, 0, 120 "Enable MPA CRC (default(1)=enabled)"); 121 122static int rcv_win = 256 * 1024; 123TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win); 124SYSCTL_UINT(_hw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RDTUN, &rcv_win, 0, 125 "TCP receive window in bytes (default=256KB)"); 126 127static int snd_win = 32 * 1024; 128TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win); 129SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_win, CTLFLAG_RDTUN, &snd_win, 0, 130 "TCP send window in bytes (default=32KB)"); 131 132static unsigned int nocong = 0; 133TUNABLE_INT("hw.iw_cxgb.nocong", &nocong); 134SYSCTL_UINT(_hw_cxgb, OID_AUTO, nocong, CTLFLAG_RDTUN, &nocong, 0, 135 "Turn off congestion control (default=0)"); 136 137static unsigned int cong_flavor = 1; 138TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor); 139SYSCTL_UINT(_hw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RDTUN, &cong_flavor, 0, 140 "TCP Congestion control flavor (default=1)"); 141 142static void ep_timeout(void *arg); 143static void connect_reply_upcall(struct iwch_ep *ep, int status); 144static int iwch_so_upcall(struct socket *so, void *arg, int waitflag); 145 146/* 147 * Cruft to offload socket upcalls onto thread. 148 */ 149static struct mtx req_lock; 150static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list; 151static struct task iw_cxgb_task; 152static struct taskqueue *iw_cxgb_taskq; 153static void process_req(void *ctx, int pending); 154 155static void 156start_ep_timer(struct iwch_ep *ep) 157{ 158 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 159 if (callout_pending(&ep->timer)) { 160 CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep); 161 callout_deactivate(&ep->timer); 162 callout_drain(&ep->timer); 163 } else { 164 /* 165 * XXX this looks racy 166 */ 167 get_ep(&ep->com); 168 callout_init(&ep->timer, TRUE); 169 } 170 callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep); 171} 172 173static void 174stop_ep_timer(struct iwch_ep *ep) 175{ 176 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 177 callout_drain(&ep->timer); 178 put_ep(&ep->com); 179} 180 181static int set_tcpinfo(struct iwch_ep *ep) 182{ 183 struct tcp_info ti; 184 struct sockopt sopt; 185 int err; 186 187 sopt.sopt_dir = SOPT_GET; 188 sopt.sopt_level = IPPROTO_TCP; 189 sopt.sopt_name = TCP_INFO; 190 sopt.sopt_val = (caddr_t)&ti; 191 sopt.sopt_valsize = sizeof ti; 192 sopt.sopt_td = NULL; 193 194 err = sogetopt(ep->com.so, &sopt); 195 if (err) { 196 printf("%s can't get tcpinfo\n", __FUNCTION__); 197 return -err; 198 } 199 if (!(ti.tcpi_options & TCPI_OPT_TOE)) { 200 printf("%s connection NOT OFFLOADED!\n", __FUNCTION__); 201 return -EINVAL; 202 } 203 204 ep->snd_seq = ti.tcpi_snd_nxt; 205 ep->rcv_seq = ti.tcpi_rcv_nxt; 206 ep->emss = ti.__tcpi_snd_mss - sizeof(struct tcpiphdr); 207 ep->hwtid = TOEPCB(ep->com.so)->tp_tid; /* XXX */ 208 if (ti.tcpi_options & TCPI_OPT_TIMESTAMPS) 209 ep->emss -= 12; 210 if (ep->emss < 128) 211 ep->emss = 128; 212 return 0; 213} 214 215static enum iwch_ep_state 216state_read(struct iwch_ep_common *epc) 217{ 218 enum iwch_ep_state state; 219 220 mtx_lock(&epc->lock); 221 state = epc->state; 222 mtx_unlock(&epc->lock); 223 return state; 224} 225 226static void 227__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 228{ 229 epc->state = new; 230} 231 232static void 233state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 234{ 235 236 mtx_lock(&epc->lock); 237 CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]); 238 __state_set(epc, new); 239 mtx_unlock(&epc->lock); 240 return; 241} 242 243static void * 244alloc_ep(int size, int flags) 245{ 246 struct iwch_ep_common *epc; 247 248 epc = malloc(size, M_DEVBUF, flags); 249 if (epc) { 250 memset(epc, 0, size); 251 refcount_init(&epc->refcount, 1); 252 mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK); 253 cv_init(&epc->waitq, "iwch_epc cv"); 254 } 255 CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc); 256 return epc; 257} 258 259void __free_ep(struct iwch_ep_common *epc) 260{ 261 CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]); 262 KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so)); 263 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc)); 264 free(epc, M_DEVBUF); 265} 266 267int 268iwch_quiesce_tid(struct iwch_ep *ep) 269{ 270#ifdef notyet 271 struct cpl_set_tcb_field *req; 272 struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT); 273 274 if (m == NULL) 275 return (-ENOMEM); 276 req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req)); 277 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 278 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); 279 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); 280 req->reply = 0; 281 req->cpu_idx = 0; 282 req->word = htons(W_TCB_RX_QUIESCE); 283 req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); 284 req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); 285 286 m_set_priority(m, CPL_PRIORITY_DATA); 287 cxgb_ofld_send(ep->com.tdev, m); 288#endif 289 return 0; 290} 291 292int 293iwch_resume_tid(struct iwch_ep *ep) 294{ 295#ifdef notyet 296 struct cpl_set_tcb_field *req; 297 struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT); 298 299 if (m == NULL) 300 return (-ENOMEM); 301 req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req)); 302 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 303 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); 304 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); 305 req->reply = 0; 306 req->cpu_idx = 0; 307 req->word = htons(W_TCB_RX_QUIESCE); 308 req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); 309 req->val = 0; 310 311 m_set_priority(m, CPL_PRIORITY_DATA); 312 cxgb_ofld_send(ep->com.tdev, m); 313#endif 314 return 0; 315} 316 317static struct rtentry * 318find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 319 __be16 peer_port, u8 tos) 320{ 321 struct route iproute; 322 struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; 323 324 bzero(&iproute, sizeof iproute); 325 dst->sin_family = AF_INET; 326 dst->sin_len = sizeof *dst; 327 dst->sin_addr.s_addr = peer_ip; 328 329 rtalloc(&iproute); 330 return iproute.ro_rt; 331} 332 333static void 334close_socket(struct iwch_ep_common *epc) 335{ 336 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 337 SOCK_LOCK(epc->so); 338 soupcall_clear(epc->so, SO_RCV); 339 SOCK_UNLOCK(epc->so); 340 soshutdown(epc->so, SHUT_WR|SHUT_RD); 341 epc->so = NULL; 342} 343 344static void 345shutdown_socket(struct iwch_ep_common *epc) 346{ 347 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 348 soshutdown(epc->so, SHUT_WR); 349} 350 351static void 352abort_socket(struct iwch_ep *ep) 353{ 354 struct sockopt sopt; 355 int err; 356 struct linger l; 357 358 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 359 l.l_onoff = 1; 360 l.l_linger = 0; 361 362 /* linger_time of 0 forces RST to be sent */ 363 sopt.sopt_dir = SOPT_SET; 364 sopt.sopt_level = SOL_SOCKET; 365 sopt.sopt_name = SO_LINGER; 366 sopt.sopt_val = (caddr_t)&l; 367 sopt.sopt_valsize = sizeof l; 368 sopt.sopt_td = NULL; 369 err = sosetopt(ep->com.so, &sopt); 370 if (err) 371 printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err); 372} 373 374static void 375send_mpa_req(struct iwch_ep *ep) 376{ 377 int mpalen; 378 struct mpa_message *mpa; 379 struct mbuf *m; 380 int err; 381 382 CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen); 383 384 mpalen = sizeof(*mpa) + ep->plen; 385 m = m_gethdr(mpalen, M_NOWAIT); 386 if (m == NULL) { 387 connect_reply_upcall(ep, -ENOMEM); 388 return; 389 } 390 mpa = mtod(m, struct mpa_message *); 391 m->m_len = mpalen; 392 m->m_pkthdr.len = mpalen; 393 memset(mpa, 0, sizeof(*mpa)); 394 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 395 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 396 (markers_enabled ? MPA_MARKERS : 0); 397 mpa->private_data_size = htons(ep->plen); 398 mpa->revision = mpa_rev; 399 if (ep->plen) 400 memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); 401 402 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 403 if (err) { 404 m_freem(m); 405 connect_reply_upcall(ep, -ENOMEM); 406 return; 407 } 408 409 start_ep_timer(ep); 410 state_set(&ep->com, MPA_REQ_SENT); 411 return; 412} 413 414static int 415send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) 416{ 417 int mpalen; 418 struct mpa_message *mpa; 419 struct mbuf *m; 420 int err; 421 422 CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen); 423 424 mpalen = sizeof(*mpa) + plen; 425 426 m = m_gethdr(mpalen, M_NOWAIT); 427 if (m == NULL) { 428 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 429 return (-ENOMEM); 430 } 431 mpa = mtod(m, struct mpa_message *); 432 m->m_len = mpalen; 433 m->m_pkthdr.len = mpalen; 434 memset(mpa, 0, sizeof(*mpa)); 435 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 436 mpa->flags = MPA_REJECT; 437 mpa->revision = mpa_rev; 438 mpa->private_data_size = htons(plen); 439 if (plen) 440 memcpy(mpa->private_data, pdata, plen); 441 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 442 PANIC_IF(err); 443 return 0; 444} 445 446static int 447send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) 448{ 449 int mpalen; 450 struct mpa_message *mpa; 451 struct mbuf *m; 452 453 CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen); 454 455 mpalen = sizeof(*mpa) + plen; 456 457 m = m_gethdr(mpalen, M_NOWAIT); 458 if (m == NULL) { 459 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 460 return (-ENOMEM); 461 } 462 mpa = mtod(m, struct mpa_message *); 463 m->m_len = mpalen; 464 m->m_pkthdr.len = mpalen; 465 memset(mpa, 0, sizeof(*mpa)); 466 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 467 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 468 (markers_enabled ? MPA_MARKERS : 0); 469 mpa->revision = mpa_rev; 470 mpa->private_data_size = htons(plen); 471 if (plen) 472 memcpy(mpa->private_data, pdata, plen); 473 474 state_set(&ep->com, MPA_REP_SENT); 475 return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 476 ep->com.thread); 477} 478 479static void 480close_complete_upcall(struct iwch_ep *ep) 481{ 482 struct iw_cm_event event; 483 484 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 485 memset(&event, 0, sizeof(event)); 486 event.event = IW_CM_EVENT_CLOSE; 487 if (ep->com.cm_id) { 488 CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d", 489 ep, ep->com.cm_id, ep->hwtid); 490 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 491 ep->com.cm_id->rem_ref(ep->com.cm_id); 492 ep->com.cm_id = NULL; 493 ep->com.qp = NULL; 494 } 495} 496 497static void 498abort_connection(struct iwch_ep *ep) 499{ 500 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 501 state_set(&ep->com, ABORTING); 502 abort_socket(ep); 503 close_socket(&ep->com); 504 close_complete_upcall(ep); 505 state_set(&ep->com, DEAD); 506 put_ep(&ep->com); 507} 508 509static void 510peer_close_upcall(struct iwch_ep *ep) 511{ 512 struct iw_cm_event event; 513 514 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 515 memset(&event, 0, sizeof(event)); 516 event.event = IW_CM_EVENT_DISCONNECT; 517 if (ep->com.cm_id) { 518 CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d", 519 ep, ep->com.cm_id, ep->hwtid); 520 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 521 } 522} 523 524static void 525peer_abort_upcall(struct iwch_ep *ep) 526{ 527 struct iw_cm_event event; 528 529 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 530 memset(&event, 0, sizeof(event)); 531 event.event = IW_CM_EVENT_CLOSE; 532 event.status = ECONNRESET; 533 if (ep->com.cm_id) { 534 CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep, 535 ep->com.cm_id, ep->hwtid); 536 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 537 ep->com.cm_id->rem_ref(ep->com.cm_id); 538 ep->com.cm_id = NULL; 539 ep->com.qp = NULL; 540 } 541} 542 543static void 544connect_reply_upcall(struct iwch_ep *ep, int status) 545{ 546 struct iw_cm_event event; 547 548 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status); 549 memset(&event, 0, sizeof(event)); 550 event.event = IW_CM_EVENT_CONNECT_REPLY; 551 event.status = status; 552 event.local_addr = ep->com.local_addr; 553 event.remote_addr = ep->com.remote_addr; 554 555 if ((status == 0) || (status == ECONNREFUSED)) { 556 event.private_data_len = ep->plen; 557 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 558 } 559 if (ep->com.cm_id) { 560 CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep, 561 ep->hwtid, status); 562 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 563 } 564 if (status < 0) { 565 ep->com.cm_id->rem_ref(ep->com.cm_id); 566 ep->com.cm_id = NULL; 567 ep->com.qp = NULL; 568 } 569} 570 571static void 572connect_request_upcall(struct iwch_ep *ep) 573{ 574 struct iw_cm_event event; 575 576 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 577 memset(&event, 0, sizeof(event)); 578 event.event = IW_CM_EVENT_CONNECT_REQUEST; 579 event.local_addr = ep->com.local_addr; 580 event.remote_addr = ep->com.remote_addr; 581 event.private_data_len = ep->plen; 582 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 583 event.provider_data = ep; 584 event.so = ep->com.so; 585 if (state_read(&ep->parent_ep->com) != DEAD) 586 ep->parent_ep->com.cm_id->event_handler( 587 ep->parent_ep->com.cm_id, 588 &event); 589 put_ep(&ep->parent_ep->com); 590 ep->parent_ep = NULL; 591} 592 593static void 594established_upcall(struct iwch_ep *ep) 595{ 596 struct iw_cm_event event; 597 598 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 599 memset(&event, 0, sizeof(event)); 600 event.event = IW_CM_EVENT_ESTABLISHED; 601 if (ep->com.cm_id) { 602 CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid); 603 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 604 } 605} 606 607static void 608process_mpa_reply(struct iwch_ep *ep) 609{ 610 struct mpa_message *mpa; 611 u16 plen; 612 struct iwch_qp_attributes attrs; 613 enum iwch_qp_attr_mask mask; 614 int err; 615 struct mbuf *top, *m; 616 int flags = MSG_DONTWAIT; 617 struct uio uio; 618 int len; 619 620 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 621 622 /* 623 * Stop mpa timer. If it expired, then the state has 624 * changed and we bail since ep_timeout already aborted 625 * the connection. 626 */ 627 stop_ep_timer(ep); 628 if (state_read(&ep->com) != MPA_REQ_SENT) 629 return; 630 631 uio.uio_resid = len = 1000000; 632 uio.uio_td = ep->com.thread; 633 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 634 if (err) { 635 if (err == EWOULDBLOCK) { 636 start_ep_timer(ep); 637 return; 638 } 639 err = -err; 640 goto err; 641 } 642 643 if (ep->com.so->so_rcv.sb_mb) { 644 printf("%s data after soreceive called! so %p sb_mb %p top %p\n", 645 __FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); 646 } 647 648 m = top; 649 do { 650 /* 651 * If we get more than the supported amount of private data 652 * then we must fail this connection. 653 */ 654 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 655 err = (-EINVAL); 656 goto err; 657 } 658 659 /* 660 * copy the new data into our accumulation buffer. 661 */ 662 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 663 ep->mpa_pkt_len += m->m_len; 664 if (!m->m_next) 665 m = m->m_nextpkt; 666 else 667 m = m->m_next; 668 } while (m); 669 670 m_freem(top); 671 672 /* 673 * if we don't even have the mpa message, then bail. 674 */ 675 if (ep->mpa_pkt_len < sizeof(*mpa)) 676 return; 677 mpa = (struct mpa_message *)ep->mpa_pkt; 678 679 /* Validate MPA header. */ 680 if (mpa->revision != mpa_rev) { 681 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 682 err = EPROTO; 683 goto err; 684 } 685 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 686 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 687 err = EPROTO; 688 goto err; 689 } 690 691 plen = ntohs(mpa->private_data_size); 692 693 /* 694 * Fail if there's too much private data. 695 */ 696 if (plen > MPA_MAX_PRIVATE_DATA) { 697 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 698 err = EPROTO; 699 goto err; 700 } 701 702 /* 703 * If plen does not account for pkt size 704 */ 705 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 706 CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len); 707 err = EPROTO; 708 goto err; 709 } 710 711 ep->plen = (u8) plen; 712 713 /* 714 * If we don't have all the pdata yet, then bail. 715 * We'll continue process when more data arrives. 716 */ 717 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 718 return; 719 720 if (mpa->flags & MPA_REJECT) { 721 err = ECONNREFUSED; 722 goto err; 723 } 724 725 /* 726 * If we get here we have accumulated the entire mpa 727 * start reply message including private data. And 728 * the MPA header is valid. 729 */ 730 CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__); 731 state_set(&ep->com, FPDU_MODE); 732 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 733 ep->mpa_attr.recv_marker_enabled = markers_enabled; 734 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 735 ep->mpa_attr.version = mpa_rev; 736 if (set_tcpinfo(ep)) { 737 printf("%s set_tcpinfo error\n", __FUNCTION__); 738 goto err; 739 } 740 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 741 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 742 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 743 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 744 745 attrs.mpa_attr = ep->mpa_attr; 746 attrs.max_ird = ep->ird; 747 attrs.max_ord = ep->ord; 748 attrs.llp_stream_handle = ep; 749 attrs.next_state = IWCH_QP_STATE_RTS; 750 751 mask = IWCH_QP_ATTR_NEXT_STATE | 752 IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | 753 IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; 754 755 /* bind QP and TID with INIT_WR */ 756 err = iwch_modify_qp(ep->com.qp->rhp, 757 ep->com.qp, mask, &attrs, 1); 758 if (!err) 759 goto out; 760err: 761 abort_connection(ep); 762out: 763 connect_reply_upcall(ep, err); 764 return; 765} 766 767static void 768process_mpa_request(struct iwch_ep *ep) 769{ 770 struct mpa_message *mpa; 771 u16 plen; 772 int flags = MSG_DONTWAIT; 773 struct mbuf *top, *m; 774 int err; 775 struct uio uio; 776 int len; 777 778 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 779 780 /* 781 * Stop mpa timer. If it expired, then the state has 782 * changed and we bail since ep_timeout already aborted 783 * the connection. 784 */ 785 stop_ep_timer(ep); 786 if (state_read(&ep->com) != MPA_REQ_WAIT) 787 return; 788 789 uio.uio_resid = len = 1000000; 790 uio.uio_td = ep->com.thread; 791 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 792 if (err) { 793 if (err == EWOULDBLOCK) { 794 start_ep_timer(ep); 795 return; 796 } 797 err = -err; 798 goto err; 799 } 800 801 m = top; 802 do { 803 804 /* 805 * If we get more than the supported amount of private data 806 * then we must fail this connection. 807 */ 808 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 809 CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__, 810 ep->mpa_pkt_len + m->m_len); 811 goto err; 812 } 813 814 815 /* 816 * Copy the new data into our accumulation buffer. 817 */ 818 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 819 ep->mpa_pkt_len += m->m_len; 820 821 if (!m->m_next) 822 m = m->m_nextpkt; 823 else 824 m = m->m_next; 825 } while (m); 826 827 m_freem(top); 828 829 /* 830 * If we don't even have the mpa message, then bail. 831 * We'll continue process when more data arrives. 832 */ 833 if (ep->mpa_pkt_len < sizeof(*mpa)) { 834 start_ep_timer(ep); 835 CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__, 836 ep->mpa_pkt_len); 837 return; 838 } 839 mpa = (struct mpa_message *) ep->mpa_pkt; 840 841 /* 842 * Validate MPA Header. 843 */ 844 if (mpa->revision != mpa_rev) { 845 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 846 goto err; 847 } 848 849 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { 850 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 851 goto err; 852 } 853 854 plen = ntohs(mpa->private_data_size); 855 856 /* 857 * Fail if there's too much private data. 858 */ 859 if (plen > MPA_MAX_PRIVATE_DATA) { 860 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 861 goto err; 862 } 863 864 /* 865 * If plen does not account for pkt size 866 */ 867 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 868 CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__, 869 ep->mpa_pkt_len); 870 goto err; 871 } 872 ep->plen = (u8) plen; 873 874 /* 875 * If we don't have all the pdata yet, then bail. 876 */ 877 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { 878 start_ep_timer(ep); 879 CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__, 880 ep->mpa_pkt_len); 881 return; 882 } 883 884 /* 885 * If we get here we have accumulated the entire mpa 886 * start reply message including private data. 887 */ 888 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 889 ep->mpa_attr.recv_marker_enabled = markers_enabled; 890 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 891 ep->mpa_attr.version = mpa_rev; 892 if (set_tcpinfo(ep)) { 893 printf("%s set_tcpinfo error\n", __FUNCTION__); 894 goto err; 895 } 896 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 897 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 898 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 899 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 900 901 state_set(&ep->com, MPA_REQ_RCVD); 902 903 /* drive upcall */ 904 connect_request_upcall(ep); 905 return; 906err: 907 abort_connection(ep); 908 return; 909} 910 911static void 912process_peer_close(struct iwch_ep *ep) 913{ 914 struct iwch_qp_attributes attrs; 915 int disconnect = 1; 916 int release = 0; 917 918 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 919 920 mtx_lock(&ep->com.lock); 921 switch (ep->com.state) { 922 case MPA_REQ_WAIT: 923 __state_set(&ep->com, CLOSING); 924 break; 925 case MPA_REQ_SENT: 926 __state_set(&ep->com, CLOSING); 927 connect_reply_upcall(ep, -ECONNRESET); 928 break; 929 case MPA_REQ_RCVD: 930 931 /* 932 * We're gonna mark this puppy DEAD, but keep 933 * the reference on it until the ULP accepts or 934 * rejects the CR. 935 */ 936 __state_set(&ep->com, CLOSING); 937 get_ep(&ep->com); 938 break; 939 case MPA_REP_SENT: 940 __state_set(&ep->com, CLOSING); 941 break; 942 case FPDU_MODE: 943 start_ep_timer(ep); 944 __state_set(&ep->com, CLOSING); 945 attrs.next_state = IWCH_QP_STATE_CLOSING; 946 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 947 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 948 peer_close_upcall(ep); 949 break; 950 case ABORTING: 951 disconnect = 0; 952 break; 953 case CLOSING: 954 __state_set(&ep->com, MORIBUND); 955 disconnect = 0; 956 break; 957 case MORIBUND: 958 stop_ep_timer(ep); 959 if (ep->com.cm_id && ep->com.qp) { 960 attrs.next_state = IWCH_QP_STATE_IDLE; 961 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 962 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 963 } 964 close_socket(&ep->com); 965 close_complete_upcall(ep); 966 __state_set(&ep->com, DEAD); 967 release = 1; 968 disconnect = 0; 969 break; 970 case DEAD: 971 disconnect = 0; 972 break; 973 default: 974 PANIC_IF(1); 975 } 976 mtx_unlock(&ep->com.lock); 977 if (disconnect) 978 iwch_ep_disconnect(ep, 0, M_NOWAIT); 979 if (release) 980 put_ep(&ep->com); 981 return; 982} 983 984static void 985process_conn_error(struct iwch_ep *ep) 986{ 987 struct iwch_qp_attributes attrs; 988 int ret; 989 int state; 990 991 state = state_read(&ep->com); 992 CTR5(KTR_IW_CXGB, "%s ep %p so %p so->so_error %u state %s", __FUNCTION__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]); 993 switch (state) { 994 case MPA_REQ_WAIT: 995 stop_ep_timer(ep); 996 break; 997 case MPA_REQ_SENT: 998 stop_ep_timer(ep); 999 connect_reply_upcall(ep, -ECONNRESET); 1000 break; 1001 case MPA_REP_SENT: 1002 ep->com.rpl_err = ECONNRESET; 1003 CTR1(KTR_IW_CXGB, "waking up ep %p", ep); 1004 break; 1005 case MPA_REQ_RCVD: 1006 1007 /* 1008 * We're gonna mark this puppy DEAD, but keep 1009 * the reference on it until the ULP accepts or 1010 * rejects the CR. 1011 */ 1012 get_ep(&ep->com); 1013 break; 1014 case MORIBUND: 1015 case CLOSING: 1016 stop_ep_timer(ep); 1017 /*FALLTHROUGH*/ 1018 case FPDU_MODE: 1019 if (ep->com.cm_id && ep->com.qp) { 1020 attrs.next_state = IWCH_QP_STATE_ERROR; 1021 ret = iwch_modify_qp(ep->com.qp->rhp, 1022 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1023 &attrs, 1); 1024 if (ret) 1025 log(LOG_ERR, 1026 "%s - qp <- error failed!\n", 1027 __FUNCTION__); 1028 } 1029 peer_abort_upcall(ep); 1030 break; 1031 case ABORTING: 1032 break; 1033 case DEAD: 1034 CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__, 1035 ep->com.so->so_error); 1036 return; 1037 default: 1038 PANIC_IF(1); 1039 break; 1040 } 1041 1042 if (state != ABORTING) { 1043 close_socket(&ep->com); 1044 state_set(&ep->com, DEAD); 1045 put_ep(&ep->com); 1046 } 1047 return; 1048} 1049 1050static void 1051process_close_complete(struct iwch_ep *ep) 1052{ 1053 struct iwch_qp_attributes attrs; 1054 int release = 0; 1055 1056 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1057 PANIC_IF(!ep); 1058 1059 /* The cm_id may be null if we failed to connect */ 1060 mtx_lock(&ep->com.lock); 1061 switch (ep->com.state) { 1062 case CLOSING: 1063 __state_set(&ep->com, MORIBUND); 1064 break; 1065 case MORIBUND: 1066 stop_ep_timer(ep); 1067 if ((ep->com.cm_id) && (ep->com.qp)) { 1068 attrs.next_state = IWCH_QP_STATE_IDLE; 1069 iwch_modify_qp(ep->com.qp->rhp, 1070 ep->com.qp, 1071 IWCH_QP_ATTR_NEXT_STATE, 1072 &attrs, 1); 1073 } 1074 close_socket(&ep->com); 1075 close_complete_upcall(ep); 1076 __state_set(&ep->com, DEAD); 1077 release = 1; 1078 break; 1079 case ABORTING: 1080 break; 1081 case DEAD: 1082 default: 1083 PANIC_IF(1); 1084 break; 1085 } 1086 mtx_unlock(&ep->com.lock); 1087 if (release) 1088 put_ep(&ep->com); 1089 return; 1090} 1091 1092/* 1093 * T3A does 3 things when a TERM is received: 1094 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet 1095 * 2) generate an async event on the QP with the TERMINATE opcode 1096 * 3) post a TERMINATE opcde cqe into the associated CQ. 1097 * 1098 * For (1), we save the message in the qp for later consumer consumption. 1099 * For (2), we move the QP into TERMINATE, post a QP event and disconnect. 1100 * For (3), we toss the CQE in cxio_poll_cq(). 1101 * 1102 * terminate() handles case (1)... 1103 */ 1104static int 1105terminate(struct t3cdev *tdev, struct mbuf *m, void *ctx) 1106{ 1107 struct toepcb *toep = (struct toepcb *)ctx; 1108 struct socket *so = toeptoso(toep); 1109 struct iwch_ep *ep = so->so_rcv.sb_upcallarg; 1110 1111 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1112 m_adj(m, sizeof(struct cpl_rdma_terminate)); 1113 CTR2(KTR_IW_CXGB, "%s saving %d bytes of term msg", __FUNCTION__, m->m_len); 1114 m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer); 1115 ep->com.qp->attr.terminate_msg_len = m->m_len; 1116 ep->com.qp->attr.is_terminate_local = 0; 1117 return CPL_RET_BUF_DONE; 1118} 1119 1120static int 1121ec_status(struct t3cdev *tdev, struct mbuf *m, void *ctx) 1122{ 1123 struct toepcb *toep = (struct toepcb *)ctx; 1124 struct socket *so = toeptoso(toep); 1125 struct cpl_rdma_ec_status *rep = cplhdr(m); 1126 struct iwch_ep *ep; 1127 struct iwch_qp_attributes attrs; 1128 int release = 0; 1129 1130 ep = so->so_rcv.sb_upcallarg; 1131 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s ec_status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], rep->status); 1132 if (!so || !ep) { 1133 panic("bogosity ep %p state %d, so %p state %x\n", ep, ep ? ep->com.state : -1, so, so ? so->so_state : -1); 1134 } 1135 mtx_lock(&ep->com.lock); 1136 switch (ep->com.state) { 1137 case CLOSING: 1138 if (!rep->status) 1139 __state_set(&ep->com, MORIBUND); 1140 else 1141 __state_set(&ep->com, ABORTING); 1142 break; 1143 case MORIBUND: 1144 stop_ep_timer(ep); 1145 if (!rep->status) { 1146 if ((ep->com.cm_id) && (ep->com.qp)) { 1147 attrs.next_state = IWCH_QP_STATE_IDLE; 1148 iwch_modify_qp(ep->com.qp->rhp, 1149 ep->com.qp, 1150 IWCH_QP_ATTR_NEXT_STATE, 1151 &attrs, 1); 1152 } 1153 close_socket(&ep->com); 1154 close_complete_upcall(ep); 1155 __state_set(&ep->com, DEAD); 1156 release = 1; 1157 } 1158 break; 1159 case DEAD: 1160 break; 1161 default: 1162 panic("unknown state: %d\n", ep->com.state); 1163 } 1164 mtx_unlock(&ep->com.lock); 1165 if (rep->status) { 1166 log(LOG_ERR, "%s BAD CLOSE - Aborting tid %u\n", 1167 __FUNCTION__, ep->hwtid); 1168 attrs.next_state = IWCH_QP_STATE_ERROR; 1169 iwch_modify_qp(ep->com.qp->rhp, 1170 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1171 &attrs, 1); 1172 } 1173 if (release) 1174 put_ep(&ep->com); 1175 return CPL_RET_BUF_DONE; 1176} 1177 1178static void 1179ep_timeout(void *arg) 1180{ 1181 struct iwch_ep *ep = (struct iwch_ep *)arg; 1182 struct iwch_qp_attributes attrs; 1183 int err = 0; 1184 1185 mtx_lock(&ep->com.lock); 1186 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1187 switch (ep->com.state) { 1188 case MPA_REQ_SENT: 1189 connect_reply_upcall(ep, -ETIMEDOUT); 1190 break; 1191 case MPA_REQ_WAIT: 1192 break; 1193 case CLOSING: 1194 case MORIBUND: 1195 if (ep->com.cm_id && ep->com.qp) 1196 err = 1; 1197 break; 1198 default: 1199 panic("unknown state: %d\n", ep->com.state); 1200 } 1201 __state_set(&ep->com, ABORTING); 1202 mtx_unlock(&ep->com.lock); 1203 if (err){ 1204 attrs.next_state = IWCH_QP_STATE_ERROR; 1205 iwch_modify_qp(ep->com.qp->rhp, 1206 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1207 &attrs, 1); 1208 } 1209 abort_connection(ep); 1210 put_ep(&ep->com); 1211} 1212 1213int 1214iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 1215{ 1216 int err; 1217 struct iwch_ep *ep = to_ep(cm_id); 1218 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1219 1220 if (state_read(&ep->com) == DEAD) { 1221 put_ep(&ep->com); 1222 return (-ECONNRESET); 1223 } 1224 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1225 if (mpa_rev == 0) { 1226 abort_connection(ep); 1227 } else { 1228 err = send_mpa_reject(ep, pdata, pdata_len); 1229 err = soshutdown(ep->com.so, 3); 1230 } 1231 return 0; 1232} 1233 1234int 1235iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1236{ 1237 int err; 1238 struct iwch_qp_attributes attrs; 1239 enum iwch_qp_attr_mask mask; 1240 struct iwch_ep *ep = to_ep(cm_id); 1241 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1242 struct iwch_qp *qp = get_qhp(h, conn_param->qpn); 1243 1244 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1245 if (state_read(&ep->com) == DEAD) 1246 return (-ECONNRESET); 1247 1248 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1249 PANIC_IF(!qp); 1250 1251 if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || 1252 (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { 1253 abort_connection(ep); 1254 return (-EINVAL); 1255 } 1256 1257 cm_id->add_ref(cm_id); 1258 ep->com.cm_id = cm_id; 1259 ep->com.qp = qp; 1260 1261 ep->com.rpl_err = 0; 1262 ep->com.rpl_done = 0; 1263 ep->ird = conn_param->ird; 1264 ep->ord = conn_param->ord; 1265 CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord); 1266 get_ep(&ep->com); 1267 1268 /* bind QP to EP and move to RTS */ 1269 attrs.mpa_attr = ep->mpa_attr; 1270 attrs.max_ird = ep->ord; 1271 attrs.max_ord = ep->ord; 1272 attrs.llp_stream_handle = ep; 1273 attrs.next_state = IWCH_QP_STATE_RTS; 1274 1275 /* bind QP and TID with INIT_WR */ 1276 mask = IWCH_QP_ATTR_NEXT_STATE | 1277 IWCH_QP_ATTR_LLP_STREAM_HANDLE | 1278 IWCH_QP_ATTR_MPA_ATTR | 1279 IWCH_QP_ATTR_MAX_IRD | 1280 IWCH_QP_ATTR_MAX_ORD; 1281 1282 err = iwch_modify_qp(ep->com.qp->rhp, 1283 ep->com.qp, mask, &attrs, 1); 1284 1285 if (err) 1286 goto err; 1287 1288 err = send_mpa_reply(ep, conn_param->private_data, 1289 conn_param->private_data_len); 1290 if (err) 1291 goto err; 1292 state_set(&ep->com, FPDU_MODE); 1293 established_upcall(ep); 1294 put_ep(&ep->com); 1295 return 0; 1296err: 1297 ep->com.cm_id = NULL; 1298 ep->com.qp = NULL; 1299 cm_id->rem_ref(cm_id); 1300 put_ep(&ep->com); 1301 return err; 1302} 1303 1304static int init_sock(struct iwch_ep_common *epc) 1305{ 1306 int err; 1307 struct sockopt sopt; 1308 int on=1; 1309 1310 SOCK_LOCK(epc->so); 1311 soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc); 1312 epc->so->so_state |= SS_NBIO; 1313 SOCK_UNLOCK(epc->so); 1314 sopt.sopt_dir = SOPT_SET; 1315 sopt.sopt_level = SOL_SOCKET; 1316 sopt.sopt_name = SO_NO_DDP; 1317 sopt.sopt_val = (caddr_t)&on; 1318 sopt.sopt_valsize = sizeof on; 1319 sopt.sopt_td = NULL; 1320 err = sosetopt(epc->so, &sopt); 1321 if (err) 1322 printf("%s can't set SO_NO_DDP err %d\n", __FUNCTION__, err); 1323 sopt.sopt_dir = SOPT_SET; 1324 sopt.sopt_level = IPPROTO_TCP; 1325 sopt.sopt_name = TCP_NODELAY; 1326 sopt.sopt_val = (caddr_t)&on; 1327 sopt.sopt_valsize = sizeof on; 1328 sopt.sopt_td = NULL; 1329 err = sosetopt(epc->so, &sopt); 1330 if (err) 1331 printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err); 1332 1333 return 0; 1334} 1335 1336static int 1337is_loopback_dst(struct iw_cm_id *cm_id) 1338{ 1339 uint16_t port = cm_id->remote_addr.sin_port; 1340 struct ifaddr *ifa; 1341 1342 cm_id->remote_addr.sin_port = 0; 1343 ifa = ifa_ifwithaddr((struct sockaddr *)&cm_id->remote_addr); 1344 cm_id->remote_addr.sin_port = port; 1345 return (ifa != NULL); 1346} 1347 1348int 1349iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1350{ 1351 int err = 0; 1352 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1353 struct iwch_ep *ep; 1354 struct rtentry *rt; 1355 struct toedev *tdev; 1356 1357 if (is_loopback_dst(cm_id)) { 1358 err = -ENOSYS; 1359 goto out; 1360 } 1361 1362 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1363 if (!ep) { 1364 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1365 err = (-ENOMEM); 1366 goto out; 1367 } 1368 callout_init(&ep->timer, TRUE); 1369 ep->plen = conn_param->private_data_len; 1370 if (ep->plen) 1371 memcpy(ep->mpa_pkt + sizeof(struct mpa_message), 1372 conn_param->private_data, ep->plen); 1373 ep->ird = conn_param->ird; 1374 ep->ord = conn_param->ord; 1375 1376 cm_id->add_ref(cm_id); 1377 ep->com.cm_id = cm_id; 1378 ep->com.qp = get_qhp(h, conn_param->qpn); 1379 ep->com.thread = curthread; 1380 PANIC_IF(!ep->com.qp); 1381 CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn, 1382 ep->com.qp, cm_id); 1383 1384 ep->com.so = cm_id->so; 1385 err = init_sock(&ep->com); 1386 if (err) 1387 goto fail2; 1388 1389 /* find a route */ 1390 rt = find_route(cm_id->local_addr.sin_addr.s_addr, 1391 cm_id->remote_addr.sin_addr.s_addr, 1392 cm_id->local_addr.sin_port, 1393 cm_id->remote_addr.sin_port, IPTOS_LOWDELAY); 1394 if (!rt) { 1395 printf("%s - cannot find route.\n", __FUNCTION__); 1396 err = EHOSTUNREACH; 1397 goto fail2; 1398 } 1399 1400 if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) { 1401 printf("%s - interface not TOE capable.\n", __FUNCTION__); 1402 goto fail3; 1403 } 1404 tdev = TOEDEV(rt->rt_ifp); 1405 if (tdev == NULL) { 1406 printf("%s - No toedev for interface.\n", __FUNCTION__); 1407 goto fail3; 1408 } 1409 if (!tdev->tod_can_offload(tdev, ep->com.so)) { 1410 printf("%s - interface cannot offload!.\n", __FUNCTION__); 1411 goto fail3; 1412 } 1413 RTFREE(rt); 1414 1415 state_set(&ep->com, CONNECTING); 1416 ep->com.local_addr = cm_id->local_addr; 1417 ep->com.remote_addr = cm_id->remote_addr; 1418 err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, 1419 ep->com.thread); 1420 if (!err) 1421 goto out; 1422fail3: 1423 RTFREE(ep->dst); 1424fail2: 1425 put_ep(&ep->com); 1426out: 1427 return err; 1428} 1429 1430int 1431iwch_create_listen(struct iw_cm_id *cm_id, int backlog) 1432{ 1433 int err = 0; 1434 struct iwch_listen_ep *ep; 1435 1436 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1437 if (!ep) { 1438 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1439 err = ENOMEM; 1440 goto out; 1441 } 1442 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1443 cm_id->add_ref(cm_id); 1444 ep->com.cm_id = cm_id; 1445 ep->backlog = backlog; 1446 ep->com.local_addr = cm_id->local_addr; 1447 ep->com.thread = curthread; 1448 state_set(&ep->com, LISTEN); 1449 1450 ep->com.so = cm_id->so; 1451 err = init_sock(&ep->com); 1452 if (err) 1453 goto fail; 1454 1455 err = solisten(ep->com.so, ep->backlog, ep->com.thread); 1456 if (!err) { 1457 cm_id->provider_data = ep; 1458 goto out; 1459 } 1460 close_socket(&ep->com); 1461fail: 1462 cm_id->rem_ref(cm_id); 1463 put_ep(&ep->com); 1464out: 1465 return err; 1466} 1467 1468int 1469iwch_destroy_listen(struct iw_cm_id *cm_id) 1470{ 1471 struct iwch_listen_ep *ep = to_listen_ep(cm_id); 1472 1473 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1474 1475 state_set(&ep->com, DEAD); 1476 close_socket(&ep->com); 1477 cm_id->rem_ref(cm_id); 1478 put_ep(&ep->com); 1479 return 0; 1480} 1481 1482int 1483iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags) 1484{ 1485 int close = 0; 1486 1487 mtx_lock(&ep->com.lock); 1488 1489 PANIC_IF(!ep); 1490 PANIC_IF(!ep->com.so); 1491 1492 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep, 1493 ep->com.so, states[ep->com.state], abrupt); 1494 1495 if (ep->com.state == DEAD) { 1496 CTR2(KTR_IW_CXGB, "%s already dead ep %p", __FUNCTION__, ep); 1497 goto out; 1498 } 1499 1500 if (abrupt) { 1501 if (ep->com.state != ABORTING) { 1502 ep->com.state = ABORTING; 1503 close = 1; 1504 } 1505 goto out; 1506 } 1507 1508 switch (ep->com.state) { 1509 case MPA_REQ_WAIT: 1510 case MPA_REQ_SENT: 1511 case MPA_REQ_RCVD: 1512 case MPA_REP_SENT: 1513 case FPDU_MODE: 1514 start_ep_timer(ep); 1515 ep->com.state = CLOSING; 1516 close = 1; 1517 break; 1518 case CLOSING: 1519 ep->com.state = MORIBUND; 1520 close = 1; 1521 break; 1522 case MORIBUND: 1523 case ABORTING: 1524 break; 1525 default: 1526 panic("unknown state: %d\n", ep->com.state); 1527 break; 1528 } 1529out: 1530 mtx_unlock(&ep->com.lock); 1531 if (close) { 1532 if (abrupt) 1533 abort_connection(ep); 1534 else 1535 shutdown_socket(&ep->com); 1536 } 1537 return 0; 1538} 1539 1540static void 1541process_data(struct iwch_ep *ep) 1542{ 1543 struct sockaddr_in *local, *remote; 1544 1545 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1546 1547 switch (state_read(&ep->com)) { 1548 case MPA_REQ_SENT: 1549 process_mpa_reply(ep); 1550 break; 1551 case MPA_REQ_WAIT: 1552 1553 /* 1554 * XXX 1555 * Set local and remote addrs here because when we 1556 * dequeue the newly accepted socket, they aren't set 1557 * yet in the pcb! 1558 */ 1559 in_getsockaddr(ep->com.so, (struct sockaddr **)&local); 1560 in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); 1561 CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__, 1562 inet_ntoa(local->sin_addr), 1563 inet_ntoa(remote->sin_addr)); 1564 ep->com.local_addr = *local; 1565 ep->com.remote_addr = *remote; 1566 free(local, M_SONAME); 1567 free(remote, M_SONAME); 1568 process_mpa_request(ep); 1569 break; 1570 default: 1571 if (ep->com.so->so_rcv.sb_cc) 1572 printf("%s Unexpected streaming data." 1573 " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n", 1574 __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state, 1575 ep->com.so->so_rcv.sb_cc, ep->com.so->so_rcv.sb_mb); 1576 break; 1577 } 1578 return; 1579} 1580 1581static void 1582process_connected(struct iwch_ep *ep) 1583{ 1584 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1585 if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) { 1586 send_mpa_req(ep); 1587 } else { 1588 connect_reply_upcall(ep, -ep->com.so->so_error); 1589 close_socket(&ep->com); 1590 state_set(&ep->com, DEAD); 1591 put_ep(&ep->com); 1592 } 1593} 1594 1595static struct socket * 1596dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep) 1597{ 1598 struct socket *so; 1599 1600 ACCEPT_LOCK(); 1601 so = TAILQ_FIRST(&head->so_comp); 1602 if (!so) { 1603 ACCEPT_UNLOCK(); 1604 return NULL; 1605 } 1606 TAILQ_REMOVE(&head->so_comp, so, so_list); 1607 head->so_qlen--; 1608 SOCK_LOCK(so); 1609 so->so_qstate &= ~SQ_COMP; 1610 so->so_head = NULL; 1611 soref(so); 1612 soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep); 1613 so->so_state |= SS_NBIO; 1614 PANIC_IF(!(so->so_state & SS_ISCONNECTED)); 1615 PANIC_IF(so->so_error); 1616 SOCK_UNLOCK(so); 1617 ACCEPT_UNLOCK(); 1618 soaccept(so, (struct sockaddr **)remote); 1619 return so; 1620} 1621 1622static void 1623process_newconn(struct iwch_ep *parent_ep) 1624{ 1625 struct socket *child_so; 1626 struct iwch_ep *child_ep; 1627 struct sockaddr_in *remote; 1628 1629 CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so); 1630 child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); 1631 if (!child_ep) { 1632 log(LOG_ERR, "%s - failed to allocate ep entry!\n", 1633 __FUNCTION__); 1634 return; 1635 } 1636 child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); 1637 if (!child_so) { 1638 log(LOG_ERR, "%s - failed to dequeue child socket!\n", 1639 __FUNCTION__); 1640 __free_ep(&child_ep->com); 1641 return; 1642 } 1643 CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, 1644 inet_ntoa(remote->sin_addr), ntohs(remote->sin_port)); 1645 child_ep->com.so = child_so; 1646 child_ep->com.cm_id = NULL; 1647 child_ep->com.thread = parent_ep->com.thread; 1648 child_ep->parent_ep = parent_ep; 1649 free(remote, M_SONAME); 1650 get_ep(&parent_ep->com); 1651 child_ep->parent_ep = parent_ep; 1652 callout_init(&child_ep->timer, TRUE); 1653 state_set(&child_ep->com, MPA_REQ_WAIT); 1654 start_ep_timer(child_ep); 1655 1656 /* maybe the request has already been queued up on the socket... */ 1657 process_mpa_request(child_ep); 1658} 1659 1660static int 1661iwch_so_upcall(struct socket *so, void *arg, int waitflag) 1662{ 1663 struct iwch_ep *ep = arg; 1664 1665 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1666 mtx_lock(&req_lock); 1667 if (ep && ep->com.so && !ep->com.entry.tqe_prev) { 1668 get_ep(&ep->com); 1669 TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); 1670 taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task); 1671 } 1672 mtx_unlock(&req_lock); 1673 return (SU_OK); 1674} 1675 1676static void 1677process_socket_event(struct iwch_ep *ep) 1678{ 1679 int state = state_read(&ep->com); 1680 struct socket *so = ep->com.so; 1681 1682 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1683 if (state == CONNECTING) { 1684 process_connected(ep); 1685 return; 1686 } 1687 1688 if (state == LISTEN) { 1689 process_newconn(ep); 1690 return; 1691 } 1692 1693 /* connection error */ 1694 if (so->so_error) { 1695 process_conn_error(ep); 1696 return; 1697 } 1698 1699 /* peer close */ 1700 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { 1701 process_peer_close(ep); 1702 return; 1703 } 1704 1705 /* close complete */ 1706 if (so->so_state & (SS_ISDISCONNECTED)) { 1707 process_close_complete(ep); 1708 return; 1709 } 1710 1711 /* rx data */ 1712 process_data(ep); 1713 return; 1714} 1715 1716static void 1717process_req(void *ctx, int pending) 1718{ 1719 struct iwch_ep_common *epc; 1720 1721 CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__); 1722 mtx_lock(&req_lock); 1723 while (!TAILQ_EMPTY(&req_list)) { 1724 epc = TAILQ_FIRST(&req_list); 1725 TAILQ_REMOVE(&req_list, epc, entry); 1726 epc->entry.tqe_prev = NULL; 1727 mtx_unlock(&req_lock); 1728 if (epc->so) 1729 process_socket_event((struct iwch_ep *)epc); 1730 put_ep(epc); 1731 mtx_lock(&req_lock); 1732 } 1733 mtx_unlock(&req_lock); 1734} 1735 1736int 1737iwch_cm_init(void) 1738{ 1739 TAILQ_INIT(&req_list); 1740 mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF); 1741 iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT, 1742 taskqueue_thread_enqueue, &iw_cxgb_taskq); 1743 if (iw_cxgb_taskq == NULL) { 1744 printf("failed to allocate iw_cxgb taskqueue\n"); 1745 return (ENOMEM); 1746 } 1747 taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq"); 1748 TASK_INIT(&iw_cxgb_task, 0, process_req, NULL); 1749 t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, terminate); 1750 t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, ec_status); 1751 return 0; 1752} 1753 1754void 1755iwch_cm_term(void) 1756{ 1757 t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, NULL); 1758 t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, NULL); 1759 taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task); 1760 taskqueue_free(iw_cxgb_taskq); 1761} 1762 1763