iw_cxgb_cm.c revision 194622
1240116Smarcel/************************************************************************** 2240116Smarcel 3240116SmarcelCopyright (c) 2007, Chelsio Inc. 4240116SmarcelAll rights reserved. 5240116Smarcel 6240116SmarcelRedistribution and use in source and binary forms, with or without 7240116Smarcelmodification, are permitted provided that the following conditions are met: 8240116Smarcel 9240116Smarcel 1. Redistributions of source code must retain the above copyright notice, 10240116Smarcel this list of conditions and the following disclaimer. 11240116Smarcel 12240116Smarcel 2. Neither the name of the Chelsio Corporation nor the names of its 13240116Smarcel contributors may be used to endorse or promote products derived from 14240116Smarcel this software without specific prior written permission. 15240116Smarcel 16240116SmarcelTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17240116SmarcelAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18240116SmarcelIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19240116SmarcelARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20240116SmarcelLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21240116SmarcelCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22240116SmarcelSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23240116SmarcelINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24240116SmarcelCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25240116SmarcelARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26240116SmarcelPOSSIBILITY OF SUCH DAMAGE. 27240116Smarcel 28240116Smarcel***************************************************************************/ 29240116Smarcel#include <sys/cdefs.h> 30240116Smarcel__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c 194622 2009-06-22 10:59:34Z rwatson $"); 31240116Smarcel 32240116Smarcel#include <sys/param.h> 33240116Smarcel#include <sys/systm.h> 34240116Smarcel#include <sys/kernel.h> 35240116Smarcel#include <sys/bus.h> 36240116Smarcel#include <sys/module.h> 37240116Smarcel#include <sys/pciio.h> 38240116Smarcel#include <sys/conf.h> 39240116Smarcel#include <machine/bus.h> 40240116Smarcel#include <machine/resource.h> 41240116Smarcel#include <sys/bus_dma.h> 42240116Smarcel#include <sys/rman.h> 43240116Smarcel#include <sys/ioccom.h> 44240116Smarcel#include <sys/mbuf.h> 45240116Smarcel#include <sys/rwlock.h> 46240116Smarcel#include <sys/linker.h> 47240116Smarcel#include <sys/firmware.h> 48240116Smarcel#include <sys/socket.h> 49240116Smarcel#include <sys/socketvar.h> 50240116Smarcel#include <sys/sockio.h> 51240116Smarcel#include <sys/smp.h> 52240116Smarcel#include <sys/sysctl.h> 53240116Smarcel#include <sys/syslog.h> 54240116Smarcel#include <sys/queue.h> 55240116Smarcel#include <sys/taskqueue.h> 56240116Smarcel#include <sys/proc.h> 57240116Smarcel#include <sys/uio.h> 58240116Smarcel 59240116Smarcel#include <net/route.h> 60240116Smarcel#include <netinet/in_systm.h> 61240116Smarcel#include <netinet/in.h> 62240116Smarcel#include <netinet/in_pcb.h> 63240116Smarcel#include <netinet/ip.h> 64240116Smarcel#include <netinet/ip_var.h> 65240116Smarcel#include <netinet/tcp_var.h> 66240116Smarcel#include <netinet/tcp.h> 67240116Smarcel#include <netinet/tcpip.h> 68240116Smarcel 69240116Smarcel#include <contrib/rdma/ib_verbs.h> 70240116Smarcel 71240116Smarcel#include <cxgb_include.h> 72240116Smarcel#include <ulp/tom/cxgb_tom.h> 73240116Smarcel#include <ulp/tom/cxgb_t3_ddp.h> 74240116Smarcel#include <ulp/tom/cxgb_defs.h> 75240116Smarcel#include <ulp/tom/cxgb_toepcb.h> 76240116Smarcel#include <ulp/iw_cxgb/iw_cxgb_wr.h> 77240116Smarcel#include <ulp/iw_cxgb/iw_cxgb_hal.h> 78240116Smarcel#include <ulp/iw_cxgb/iw_cxgb_provider.h> 79240116Smarcel#include <ulp/iw_cxgb/iw_cxgb_cm.h> 80240116Smarcel#include <ulp/iw_cxgb/iw_cxgb.h> 81240116Smarcel 82240116Smarcel#ifdef KTR 83240116Smarcelstatic char *states[] = { 84240116Smarcel "idle", 85240116Smarcel "listen", 86240116Smarcel "connecting", 87240116Smarcel "mpa_wait_req", 88240116Smarcel "mpa_req_sent", 89240116Smarcel "mpa_req_rcvd", 90240116Smarcel "mpa_rep_sent", 91240116Smarcel "fpdu_mode", 92 "aborting", 93 "closing", 94 "moribund", 95 "dead", 96 NULL, 97}; 98#endif 99 100SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters"); 101 102static int ep_timeout_secs = 10; 103TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs); 104SYSCTL_UINT(_hw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RDTUN, &ep_timeout_secs, 0, 105 "CM Endpoint operation timeout in seconds (default=10)"); 106 107static int mpa_rev = 1; 108TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev); 109SYSCTL_UINT(_hw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RDTUN, &mpa_rev, 0, 110 "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)"); 111 112static int markers_enabled = 0; 113TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled); 114SYSCTL_UINT(_hw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RDTUN, &markers_enabled, 0, 115 "Enable MPA MARKERS (default(0)=disabled)"); 116 117static int crc_enabled = 1; 118TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled); 119SYSCTL_UINT(_hw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RDTUN, &crc_enabled, 0, 120 "Enable MPA CRC (default(1)=enabled)"); 121 122static int rcv_win = 256 * 1024; 123TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win); 124SYSCTL_UINT(_hw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RDTUN, &rcv_win, 0, 125 "TCP receive window in bytes (default=256KB)"); 126 127static int snd_win = 32 * 1024; 128TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win); 129SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_win, CTLFLAG_RDTUN, &snd_win, 0, 130 "TCP send window in bytes (default=32KB)"); 131 132static unsigned int nocong = 0; 133TUNABLE_INT("hw.iw_cxgb.nocong", &nocong); 134SYSCTL_UINT(_hw_cxgb, OID_AUTO, nocong, CTLFLAG_RDTUN, &nocong, 0, 135 "Turn off congestion control (default=0)"); 136 137static unsigned int cong_flavor = 1; 138TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor); 139SYSCTL_UINT(_hw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RDTUN, &cong_flavor, 0, 140 "TCP Congestion control flavor (default=1)"); 141 142static void ep_timeout(void *arg); 143static void connect_reply_upcall(struct iwch_ep *ep, int status); 144static int iwch_so_upcall(struct socket *so, void *arg, int waitflag); 145 146/* 147 * Cruft to offload socket upcalls onto thread. 148 */ 149static struct mtx req_lock; 150static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list; 151static struct task iw_cxgb_task; 152static struct taskqueue *iw_cxgb_taskq; 153static void process_req(void *ctx, int pending); 154 155static void 156start_ep_timer(struct iwch_ep *ep) 157{ 158 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 159 if (callout_pending(&ep->timer)) { 160 CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep); 161 callout_deactivate(&ep->timer); 162 callout_drain(&ep->timer); 163 } else { 164 /* 165 * XXX this looks racy 166 */ 167 get_ep(&ep->com); 168 callout_init(&ep->timer, TRUE); 169 } 170 callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep); 171} 172 173static void 174stop_ep_timer(struct iwch_ep *ep) 175{ 176 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 177 callout_drain(&ep->timer); 178 put_ep(&ep->com); 179} 180 181static int set_tcpinfo(struct iwch_ep *ep) 182{ 183 struct tcp_info ti; 184 struct sockopt sopt; 185 int err; 186 187 sopt.sopt_dir = SOPT_GET; 188 sopt.sopt_level = IPPROTO_TCP; 189 sopt.sopt_name = TCP_INFO; 190 sopt.sopt_val = (caddr_t)&ti; 191 sopt.sopt_valsize = sizeof ti; 192 sopt.sopt_td = NULL; 193 194 err = sogetopt(ep->com.so, &sopt); 195 if (err) { 196 printf("%s can't get tcpinfo\n", __FUNCTION__); 197 return -err; 198 } 199 if (!(ti.tcpi_options & TCPI_OPT_TOE)) { 200 printf("%s connection NOT OFFLOADED!\n", __FUNCTION__); 201 return -EINVAL; 202 } 203 204 ep->snd_seq = ti.tcpi_snd_nxt; 205 ep->rcv_seq = ti.tcpi_rcv_nxt; 206 ep->emss = ti.__tcpi_snd_mss - sizeof(struct tcpiphdr); 207 ep->hwtid = TOEPCB(ep->com.so)->tp_tid; /* XXX */ 208 if (ti.tcpi_options & TCPI_OPT_TIMESTAMPS) 209 ep->emss -= 12; 210 if (ep->emss < 128) 211 ep->emss = 128; 212 return 0; 213} 214 215static enum iwch_ep_state 216state_read(struct iwch_ep_common *epc) 217{ 218 enum iwch_ep_state state; 219 220 mtx_lock(&epc->lock); 221 state = epc->state; 222 mtx_unlock(&epc->lock); 223 return state; 224} 225 226static void 227__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 228{ 229 epc->state = new; 230} 231 232static void 233state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 234{ 235 236 mtx_lock(&epc->lock); 237 CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]); 238 __state_set(epc, new); 239 mtx_unlock(&epc->lock); 240 return; 241} 242 243static void * 244alloc_ep(int size, int flags) 245{ 246 struct iwch_ep_common *epc; 247 248 epc = malloc(size, M_DEVBUF, flags); 249 if (epc) { 250 memset(epc, 0, size); 251 refcount_init(&epc->refcount, 1); 252 mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK); 253 cv_init(&epc->waitq, "iwch_epc cv"); 254 } 255 CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc); 256 return epc; 257} 258 259void __free_ep(struct iwch_ep_common *epc) 260{ 261 CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]); 262 KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so)); 263 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc)); 264 free(epc, M_DEVBUF); 265} 266 267int 268iwch_quiesce_tid(struct iwch_ep *ep) 269{ 270#ifdef notyet 271 struct cpl_set_tcb_field *req; 272 struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT); 273 274 if (m == NULL) 275 return (-ENOMEM); 276 req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req)); 277 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 278 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); 279 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); 280 req->reply = 0; 281 req->cpu_idx = 0; 282 req->word = htons(W_TCB_RX_QUIESCE); 283 req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); 284 req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); 285 286 m_set_priority(m, CPL_PRIORITY_DATA); 287 cxgb_ofld_send(ep->com.tdev, m); 288#endif 289 return 0; 290} 291 292int 293iwch_resume_tid(struct iwch_ep *ep) 294{ 295#ifdef notyet 296 struct cpl_set_tcb_field *req; 297 struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT); 298 299 if (m == NULL) 300 return (-ENOMEM); 301 req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req)); 302 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 303 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); 304 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); 305 req->reply = 0; 306 req->cpu_idx = 0; 307 req->word = htons(W_TCB_RX_QUIESCE); 308 req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); 309 req->val = 0; 310 311 m_set_priority(m, CPL_PRIORITY_DATA); 312 cxgb_ofld_send(ep->com.tdev, m); 313#endif 314 return 0; 315} 316 317static struct rtentry * 318find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 319 __be16 peer_port, u8 tos) 320{ 321 struct route iproute; 322 struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; 323 324 bzero(&iproute, sizeof iproute); 325 dst->sin_family = AF_INET; 326 dst->sin_len = sizeof *dst; 327 dst->sin_addr.s_addr = peer_ip; 328 329 rtalloc(&iproute); 330 return iproute.ro_rt; 331} 332 333static void 334close_socket(struct iwch_ep_common *epc) 335{ 336 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 337 SOCK_LOCK(epc->so); 338 soupcall_clear(epc->so, SO_RCV); 339 SOCK_UNLOCK(epc->so); 340 soshutdown(epc->so, SHUT_WR|SHUT_RD); 341 epc->so = NULL; 342} 343 344static void 345shutdown_socket(struct iwch_ep_common *epc) 346{ 347 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 348 soshutdown(epc->so, SHUT_WR); 349} 350 351static void 352abort_socket(struct iwch_ep *ep) 353{ 354 struct sockopt sopt; 355 int err; 356 struct linger l; 357 358 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 359 l.l_onoff = 1; 360 l.l_linger = 0; 361 362 /* linger_time of 0 forces RST to be sent */ 363 sopt.sopt_dir = SOPT_SET; 364 sopt.sopt_level = SOL_SOCKET; 365 sopt.sopt_name = SO_LINGER; 366 sopt.sopt_val = (caddr_t)&l; 367 sopt.sopt_valsize = sizeof l; 368 sopt.sopt_td = NULL; 369 err = sosetopt(ep->com.so, &sopt); 370 if (err) 371 printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err); 372} 373 374static void 375send_mpa_req(struct iwch_ep *ep) 376{ 377 int mpalen; 378 struct mpa_message *mpa; 379 struct mbuf *m; 380 int err; 381 382 CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen); 383 384 mpalen = sizeof(*mpa) + ep->plen; 385 m = m_gethdr(mpalen, M_NOWAIT); 386 if (m == NULL) { 387 connect_reply_upcall(ep, -ENOMEM); 388 return; 389 } 390 mpa = mtod(m, struct mpa_message *); 391 m->m_len = mpalen; 392 m->m_pkthdr.len = mpalen; 393 memset(mpa, 0, sizeof(*mpa)); 394 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 395 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 396 (markers_enabled ? MPA_MARKERS : 0); 397 mpa->private_data_size = htons(ep->plen); 398 mpa->revision = mpa_rev; 399 if (ep->plen) 400 memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); 401 402 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 403 if (err) { 404 m_freem(m); 405 connect_reply_upcall(ep, -ENOMEM); 406 return; 407 } 408 409 start_ep_timer(ep); 410 state_set(&ep->com, MPA_REQ_SENT); 411 return; 412} 413 414static int 415send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) 416{ 417 int mpalen; 418 struct mpa_message *mpa; 419 struct mbuf *m; 420 int err; 421 422 CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen); 423 424 mpalen = sizeof(*mpa) + plen; 425 426 m = m_gethdr(mpalen, M_NOWAIT); 427 if (m == NULL) { 428 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 429 return (-ENOMEM); 430 } 431 mpa = mtod(m, struct mpa_message *); 432 m->m_len = mpalen; 433 m->m_pkthdr.len = mpalen; 434 memset(mpa, 0, sizeof(*mpa)); 435 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 436 mpa->flags = MPA_REJECT; 437 mpa->revision = mpa_rev; 438 mpa->private_data_size = htons(plen); 439 if (plen) 440 memcpy(mpa->private_data, pdata, plen); 441 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 442 PANIC_IF(err); 443 return 0; 444} 445 446static int 447send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) 448{ 449 int mpalen; 450 struct mpa_message *mpa; 451 struct mbuf *m; 452 453 CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen); 454 455 mpalen = sizeof(*mpa) + plen; 456 457 m = m_gethdr(mpalen, M_NOWAIT); 458 if (m == NULL) { 459 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 460 return (-ENOMEM); 461 } 462 mpa = mtod(m, struct mpa_message *); 463 m->m_len = mpalen; 464 m->m_pkthdr.len = mpalen; 465 memset(mpa, 0, sizeof(*mpa)); 466 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 467 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 468 (markers_enabled ? MPA_MARKERS : 0); 469 mpa->revision = mpa_rev; 470 mpa->private_data_size = htons(plen); 471 if (plen) 472 memcpy(mpa->private_data, pdata, plen); 473 474 state_set(&ep->com, MPA_REP_SENT); 475 return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 476 ep->com.thread); 477} 478 479static void 480close_complete_upcall(struct iwch_ep *ep) 481{ 482 struct iw_cm_event event; 483 484 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 485 memset(&event, 0, sizeof(event)); 486 event.event = IW_CM_EVENT_CLOSE; 487 if (ep->com.cm_id) { 488 CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d", 489 ep, ep->com.cm_id, ep->hwtid); 490 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 491 ep->com.cm_id->rem_ref(ep->com.cm_id); 492 ep->com.cm_id = NULL; 493 ep->com.qp = NULL; 494 } 495} 496 497static void 498abort_connection(struct iwch_ep *ep) 499{ 500 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 501 state_set(&ep->com, ABORTING); 502 abort_socket(ep); 503 close_socket(&ep->com); 504 close_complete_upcall(ep); 505 state_set(&ep->com, DEAD); 506 put_ep(&ep->com); 507} 508 509static void 510peer_close_upcall(struct iwch_ep *ep) 511{ 512 struct iw_cm_event event; 513 514 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 515 memset(&event, 0, sizeof(event)); 516 event.event = IW_CM_EVENT_DISCONNECT; 517 if (ep->com.cm_id) { 518 CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d", 519 ep, ep->com.cm_id, ep->hwtid); 520 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 521 } 522} 523 524static void 525peer_abort_upcall(struct iwch_ep *ep) 526{ 527 struct iw_cm_event event; 528 529 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 530 memset(&event, 0, sizeof(event)); 531 event.event = IW_CM_EVENT_CLOSE; 532 event.status = ECONNRESET; 533 if (ep->com.cm_id) { 534 CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep, 535 ep->com.cm_id, ep->hwtid); 536 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 537 ep->com.cm_id->rem_ref(ep->com.cm_id); 538 ep->com.cm_id = NULL; 539 ep->com.qp = NULL; 540 } 541} 542 543static void 544connect_reply_upcall(struct iwch_ep *ep, int status) 545{ 546 struct iw_cm_event event; 547 548 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status); 549 memset(&event, 0, sizeof(event)); 550 event.event = IW_CM_EVENT_CONNECT_REPLY; 551 event.status = status; 552 event.local_addr = ep->com.local_addr; 553 event.remote_addr = ep->com.remote_addr; 554 555 if ((status == 0) || (status == ECONNREFUSED)) { 556 event.private_data_len = ep->plen; 557 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 558 } 559 if (ep->com.cm_id) { 560 CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep, 561 ep->hwtid, status); 562 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 563 } 564 if (status < 0) { 565 ep->com.cm_id->rem_ref(ep->com.cm_id); 566 ep->com.cm_id = NULL; 567 ep->com.qp = NULL; 568 } 569} 570 571static void 572connect_request_upcall(struct iwch_ep *ep) 573{ 574 struct iw_cm_event event; 575 576 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 577 memset(&event, 0, sizeof(event)); 578 event.event = IW_CM_EVENT_CONNECT_REQUEST; 579 event.local_addr = ep->com.local_addr; 580 event.remote_addr = ep->com.remote_addr; 581 event.private_data_len = ep->plen; 582 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 583 event.provider_data = ep; 584 event.so = ep->com.so; 585 if (state_read(&ep->parent_ep->com) != DEAD) 586 ep->parent_ep->com.cm_id->event_handler( 587 ep->parent_ep->com.cm_id, 588 &event); 589 put_ep(&ep->parent_ep->com); 590 ep->parent_ep = NULL; 591} 592 593static void 594established_upcall(struct iwch_ep *ep) 595{ 596 struct iw_cm_event event; 597 598 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 599 memset(&event, 0, sizeof(event)); 600 event.event = IW_CM_EVENT_ESTABLISHED; 601 if (ep->com.cm_id) { 602 CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid); 603 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 604 } 605} 606 607static void 608process_mpa_reply(struct iwch_ep *ep) 609{ 610 struct mpa_message *mpa; 611 u16 plen; 612 struct iwch_qp_attributes attrs; 613 enum iwch_qp_attr_mask mask; 614 int err; 615 struct mbuf *top, *m; 616 int flags = MSG_DONTWAIT; 617 struct uio uio; 618 int len; 619 620 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 621 622 /* 623 * Stop mpa timer. If it expired, then the state has 624 * changed and we bail since ep_timeout already aborted 625 * the connection. 626 */ 627 stop_ep_timer(ep); 628 if (state_read(&ep->com) != MPA_REQ_SENT) 629 return; 630 631 uio.uio_resid = len = 1000000; 632 uio.uio_td = ep->com.thread; 633 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 634 if (err) { 635 if (err == EWOULDBLOCK) { 636 start_ep_timer(ep); 637 return; 638 } 639 err = -err; 640 goto err; 641 } 642 643 if (ep->com.so->so_rcv.sb_mb) { 644 printf("%s data after soreceive called! so %p sb_mb %p top %p\n", 645 __FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); 646 } 647 648 m = top; 649 do { 650 /* 651 * If we get more than the supported amount of private data 652 * then we must fail this connection. 653 */ 654 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 655 err = (-EINVAL); 656 goto err; 657 } 658 659 /* 660 * copy the new data into our accumulation buffer. 661 */ 662 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 663 ep->mpa_pkt_len += m->m_len; 664 if (!m->m_next) 665 m = m->m_nextpkt; 666 else 667 m = m->m_next; 668 } while (m); 669 670 m_freem(top); 671 672 /* 673 * if we don't even have the mpa message, then bail. 674 */ 675 if (ep->mpa_pkt_len < sizeof(*mpa)) 676 return; 677 mpa = (struct mpa_message *)ep->mpa_pkt; 678 679 /* Validate MPA header. */ 680 if (mpa->revision != mpa_rev) { 681 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 682 err = EPROTO; 683 goto err; 684 } 685 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 686 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 687 err = EPROTO; 688 goto err; 689 } 690 691 plen = ntohs(mpa->private_data_size); 692 693 /* 694 * Fail if there's too much private data. 695 */ 696 if (plen > MPA_MAX_PRIVATE_DATA) { 697 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 698 err = EPROTO; 699 goto err; 700 } 701 702 /* 703 * If plen does not account for pkt size 704 */ 705 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 706 CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len); 707 err = EPROTO; 708 goto err; 709 } 710 711 ep->plen = (u8) plen; 712 713 /* 714 * If we don't have all the pdata yet, then bail. 715 * We'll continue process when more data arrives. 716 */ 717 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 718 return; 719 720 if (mpa->flags & MPA_REJECT) { 721 err = ECONNREFUSED; 722 goto err; 723 } 724 725 /* 726 * If we get here we have accumulated the entire mpa 727 * start reply message including private data. And 728 * the MPA header is valid. 729 */ 730 CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__); 731 state_set(&ep->com, FPDU_MODE); 732 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 733 ep->mpa_attr.recv_marker_enabled = markers_enabled; 734 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 735 ep->mpa_attr.version = mpa_rev; 736 if (set_tcpinfo(ep)) { 737 printf("%s set_tcpinfo error\n", __FUNCTION__); 738 goto err; 739 } 740 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 741 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 742 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 743 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 744 745 attrs.mpa_attr = ep->mpa_attr; 746 attrs.max_ird = ep->ird; 747 attrs.max_ord = ep->ord; 748 attrs.llp_stream_handle = ep; 749 attrs.next_state = IWCH_QP_STATE_RTS; 750 751 mask = IWCH_QP_ATTR_NEXT_STATE | 752 IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | 753 IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; 754 755 /* bind QP and TID with INIT_WR */ 756 err = iwch_modify_qp(ep->com.qp->rhp, 757 ep->com.qp, mask, &attrs, 1); 758 if (!err) 759 goto out; 760err: 761 abort_connection(ep); 762out: 763 connect_reply_upcall(ep, err); 764 return; 765} 766 767static void 768process_mpa_request(struct iwch_ep *ep) 769{ 770 struct mpa_message *mpa; 771 u16 plen; 772 int flags = MSG_DONTWAIT; 773 struct mbuf *top, *m; 774 int err; 775 struct uio uio; 776 int len; 777 778 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 779 780 /* 781 * Stop mpa timer. If it expired, then the state has 782 * changed and we bail since ep_timeout already aborted 783 * the connection. 784 */ 785 stop_ep_timer(ep); 786 if (state_read(&ep->com) != MPA_REQ_WAIT) 787 return; 788 789 uio.uio_resid = len = 1000000; 790 uio.uio_td = ep->com.thread; 791 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 792 if (err) { 793 if (err == EWOULDBLOCK) { 794 start_ep_timer(ep); 795 return; 796 } 797 err = -err; 798 goto err; 799 } 800 801 m = top; 802 do { 803 804 /* 805 * If we get more than the supported amount of private data 806 * then we must fail this connection. 807 */ 808 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 809 CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__, 810 ep->mpa_pkt_len + m->m_len); 811 goto err; 812 } 813 814 815 /* 816 * Copy the new data into our accumulation buffer. 817 */ 818 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 819 ep->mpa_pkt_len += m->m_len; 820 821 if (!m->m_next) 822 m = m->m_nextpkt; 823 else 824 m = m->m_next; 825 } while (m); 826 827 m_freem(top); 828 829 /* 830 * If we don't even have the mpa message, then bail. 831 * We'll continue process when more data arrives. 832 */ 833 if (ep->mpa_pkt_len < sizeof(*mpa)) { 834 start_ep_timer(ep); 835 CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__, 836 ep->mpa_pkt_len); 837 return; 838 } 839 mpa = (struct mpa_message *) ep->mpa_pkt; 840 841 /* 842 * Validate MPA Header. 843 */ 844 if (mpa->revision != mpa_rev) { 845 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 846 goto err; 847 } 848 849 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { 850 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 851 goto err; 852 } 853 854 plen = ntohs(mpa->private_data_size); 855 856 /* 857 * Fail if there's too much private data. 858 */ 859 if (plen > MPA_MAX_PRIVATE_DATA) { 860 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 861 goto err; 862 } 863 864 /* 865 * If plen does not account for pkt size 866 */ 867 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 868 CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__, 869 ep->mpa_pkt_len); 870 goto err; 871 } 872 ep->plen = (u8) plen; 873 874 /* 875 * If we don't have all the pdata yet, then bail. 876 */ 877 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { 878 start_ep_timer(ep); 879 CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__, 880 ep->mpa_pkt_len); 881 return; 882 } 883 884 /* 885 * If we get here we have accumulated the entire mpa 886 * start reply message including private data. 887 */ 888 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 889 ep->mpa_attr.recv_marker_enabled = markers_enabled; 890 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 891 ep->mpa_attr.version = mpa_rev; 892 if (set_tcpinfo(ep)) { 893 printf("%s set_tcpinfo error\n", __FUNCTION__); 894 goto err; 895 } 896 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 897 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 898 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 899 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 900 901 state_set(&ep->com, MPA_REQ_RCVD); 902 903 /* drive upcall */ 904 connect_request_upcall(ep); 905 return; 906err: 907 abort_connection(ep); 908 return; 909} 910 911static void 912process_peer_close(struct iwch_ep *ep) 913{ 914 struct iwch_qp_attributes attrs; 915 int disconnect = 1; 916 int release = 0; 917 918 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 919 920 mtx_lock(&ep->com.lock); 921 switch (ep->com.state) { 922 case MPA_REQ_WAIT: 923 __state_set(&ep->com, CLOSING); 924 break; 925 case MPA_REQ_SENT: 926 __state_set(&ep->com, CLOSING); 927 connect_reply_upcall(ep, -ECONNRESET); 928 break; 929 case MPA_REQ_RCVD: 930 931 /* 932 * We're gonna mark this puppy DEAD, but keep 933 * the reference on it until the ULP accepts or 934 * rejects the CR. 935 */ 936 __state_set(&ep->com, CLOSING); 937 get_ep(&ep->com); 938 break; 939 case MPA_REP_SENT: 940 __state_set(&ep->com, CLOSING); 941 break; 942 case FPDU_MODE: 943 start_ep_timer(ep); 944 __state_set(&ep->com, CLOSING); 945 attrs.next_state = IWCH_QP_STATE_CLOSING; 946 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 947 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 948 peer_close_upcall(ep); 949 break; 950 case ABORTING: 951 disconnect = 0; 952 break; 953 case CLOSING: 954 __state_set(&ep->com, MORIBUND); 955 disconnect = 0; 956 break; 957 case MORIBUND: 958 stop_ep_timer(ep); 959 if (ep->com.cm_id && ep->com.qp) { 960 attrs.next_state = IWCH_QP_STATE_IDLE; 961 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 962 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 963 } 964 close_socket(&ep->com); 965 close_complete_upcall(ep); 966 __state_set(&ep->com, DEAD); 967 release = 1; 968 disconnect = 0; 969 break; 970 case DEAD: 971 disconnect = 0; 972 break; 973 default: 974 PANIC_IF(1); 975 } 976 mtx_unlock(&ep->com.lock); 977 if (disconnect) 978 iwch_ep_disconnect(ep, 0, M_NOWAIT); 979 if (release) 980 put_ep(&ep->com); 981 return; 982} 983 984static void 985process_conn_error(struct iwch_ep *ep) 986{ 987 struct iwch_qp_attributes attrs; 988 int ret; 989 int state; 990 991 state = state_read(&ep->com); 992 CTR5(KTR_IW_CXGB, "%s ep %p so %p so->so_error %u state %s", __FUNCTION__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]); 993 switch (state) { 994 case MPA_REQ_WAIT: 995 stop_ep_timer(ep); 996 break; 997 case MPA_REQ_SENT: 998 stop_ep_timer(ep); 999 connect_reply_upcall(ep, -ECONNRESET); 1000 break; 1001 case MPA_REP_SENT: 1002 ep->com.rpl_err = ECONNRESET; 1003 CTR1(KTR_IW_CXGB, "waking up ep %p", ep); 1004 break; 1005 case MPA_REQ_RCVD: 1006 1007 /* 1008 * We're gonna mark this puppy DEAD, but keep 1009 * the reference on it until the ULP accepts or 1010 * rejects the CR. 1011 */ 1012 get_ep(&ep->com); 1013 break; 1014 case MORIBUND: 1015 case CLOSING: 1016 stop_ep_timer(ep); 1017 /*FALLTHROUGH*/ 1018 case FPDU_MODE: 1019 if (ep->com.cm_id && ep->com.qp) { 1020 attrs.next_state = IWCH_QP_STATE_ERROR; 1021 ret = iwch_modify_qp(ep->com.qp->rhp, 1022 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1023 &attrs, 1); 1024 if (ret) 1025 log(LOG_ERR, 1026 "%s - qp <- error failed!\n", 1027 __FUNCTION__); 1028 } 1029 peer_abort_upcall(ep); 1030 break; 1031 case ABORTING: 1032 break; 1033 case DEAD: 1034 CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__, 1035 ep->com.so->so_error); 1036 return; 1037 default: 1038 PANIC_IF(1); 1039 break; 1040 } 1041 1042 if (state != ABORTING) { 1043 close_socket(&ep->com); 1044 state_set(&ep->com, DEAD); 1045 put_ep(&ep->com); 1046 } 1047 return; 1048} 1049 1050static void 1051process_close_complete(struct iwch_ep *ep) 1052{ 1053 struct iwch_qp_attributes attrs; 1054 int release = 0; 1055 1056 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1057 PANIC_IF(!ep); 1058 1059 /* The cm_id may be null if we failed to connect */ 1060 mtx_lock(&ep->com.lock); 1061 switch (ep->com.state) { 1062 case CLOSING: 1063 __state_set(&ep->com, MORIBUND); 1064 break; 1065 case MORIBUND: 1066 stop_ep_timer(ep); 1067 if ((ep->com.cm_id) && (ep->com.qp)) { 1068 attrs.next_state = IWCH_QP_STATE_IDLE; 1069 iwch_modify_qp(ep->com.qp->rhp, 1070 ep->com.qp, 1071 IWCH_QP_ATTR_NEXT_STATE, 1072 &attrs, 1); 1073 } 1074 close_socket(&ep->com); 1075 close_complete_upcall(ep); 1076 __state_set(&ep->com, DEAD); 1077 release = 1; 1078 break; 1079 case ABORTING: 1080 break; 1081 case DEAD: 1082 default: 1083 PANIC_IF(1); 1084 break; 1085 } 1086 mtx_unlock(&ep->com.lock); 1087 if (release) 1088 put_ep(&ep->com); 1089 return; 1090} 1091 1092/* 1093 * T3A does 3 things when a TERM is received: 1094 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet 1095 * 2) generate an async event on the QP with the TERMINATE opcode 1096 * 3) post a TERMINATE opcde cqe into the associated CQ. 1097 * 1098 * For (1), we save the message in the qp for later consumer consumption. 1099 * For (2), we move the QP into TERMINATE, post a QP event and disconnect. 1100 * For (3), we toss the CQE in cxio_poll_cq(). 1101 * 1102 * terminate() handles case (1)... 1103 */ 1104static int 1105terminate(struct t3cdev *tdev, struct mbuf *m, void *ctx) 1106{ 1107 struct toepcb *toep = (struct toepcb *)ctx; 1108 struct socket *so = toeptoso(toep); 1109 struct iwch_ep *ep = so->so_rcv.sb_upcallarg; 1110 1111 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1112 m_adj(m, sizeof(struct cpl_rdma_terminate)); 1113 CTR2(KTR_IW_CXGB, "%s saving %d bytes of term msg", __FUNCTION__, m->m_len); 1114 m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer); 1115 ep->com.qp->attr.terminate_msg_len = m->m_len; 1116 ep->com.qp->attr.is_terminate_local = 0; 1117 return CPL_RET_BUF_DONE; 1118} 1119 1120static int 1121ec_status(struct t3cdev *tdev, struct mbuf *m, void *ctx) 1122{ 1123 struct toepcb *toep = (struct toepcb *)ctx; 1124 struct socket *so = toeptoso(toep); 1125 struct cpl_rdma_ec_status *rep = cplhdr(m); 1126 struct iwch_ep *ep; 1127 struct iwch_qp_attributes attrs; 1128 int release = 0; 1129 1130 ep = so->so_rcv.sb_upcallarg; 1131 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s ec_status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], rep->status); 1132 if (!so || !ep) { 1133 panic("bogosity ep %p state %d, so %p state %x\n", ep, ep ? ep->com.state : -1, so, so ? so->so_state : -1); 1134 } 1135 mtx_lock(&ep->com.lock); 1136 switch (ep->com.state) { 1137 case CLOSING: 1138 if (!rep->status) 1139 __state_set(&ep->com, MORIBUND); 1140 else 1141 __state_set(&ep->com, ABORTING); 1142 break; 1143 case MORIBUND: 1144 stop_ep_timer(ep); 1145 if (!rep->status) { 1146 if ((ep->com.cm_id) && (ep->com.qp)) { 1147 attrs.next_state = IWCH_QP_STATE_IDLE; 1148 iwch_modify_qp(ep->com.qp->rhp, 1149 ep->com.qp, 1150 IWCH_QP_ATTR_NEXT_STATE, 1151 &attrs, 1); 1152 } 1153 close_socket(&ep->com); 1154 close_complete_upcall(ep); 1155 __state_set(&ep->com, DEAD); 1156 release = 1; 1157 } 1158 break; 1159 case DEAD: 1160 break; 1161 default: 1162 panic("unknown state: %d\n", ep->com.state); 1163 } 1164 mtx_unlock(&ep->com.lock); 1165 if (rep->status) { 1166 log(LOG_ERR, "%s BAD CLOSE - Aborting tid %u\n", 1167 __FUNCTION__, ep->hwtid); 1168 attrs.next_state = IWCH_QP_STATE_ERROR; 1169 iwch_modify_qp(ep->com.qp->rhp, 1170 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1171 &attrs, 1); 1172 } 1173 if (release) 1174 put_ep(&ep->com); 1175 return CPL_RET_BUF_DONE; 1176} 1177 1178static void 1179ep_timeout(void *arg) 1180{ 1181 struct iwch_ep *ep = (struct iwch_ep *)arg; 1182 struct iwch_qp_attributes attrs; 1183 int err = 0; 1184 1185 mtx_lock(&ep->com.lock); 1186 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1187 switch (ep->com.state) { 1188 case MPA_REQ_SENT: 1189 connect_reply_upcall(ep, -ETIMEDOUT); 1190 break; 1191 case MPA_REQ_WAIT: 1192 break; 1193 case CLOSING: 1194 case MORIBUND: 1195 if (ep->com.cm_id && ep->com.qp) 1196 err = 1; 1197 break; 1198 default: 1199 panic("unknown state: %d\n", ep->com.state); 1200 } 1201 __state_set(&ep->com, ABORTING); 1202 mtx_unlock(&ep->com.lock); 1203 if (err){ 1204 attrs.next_state = IWCH_QP_STATE_ERROR; 1205 iwch_modify_qp(ep->com.qp->rhp, 1206 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1207 &attrs, 1); 1208 } 1209 abort_connection(ep); 1210 put_ep(&ep->com); 1211} 1212 1213int 1214iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 1215{ 1216 int err; 1217 struct iwch_ep *ep = to_ep(cm_id); 1218 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1219 1220 if (state_read(&ep->com) == DEAD) { 1221 put_ep(&ep->com); 1222 return (-ECONNRESET); 1223 } 1224 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1225 if (mpa_rev == 0) { 1226 abort_connection(ep); 1227 } else { 1228 err = send_mpa_reject(ep, pdata, pdata_len); 1229 err = soshutdown(ep->com.so, 3); 1230 } 1231 return 0; 1232} 1233 1234int 1235iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1236{ 1237 int err; 1238 struct iwch_qp_attributes attrs; 1239 enum iwch_qp_attr_mask mask; 1240 struct iwch_ep *ep = to_ep(cm_id); 1241 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1242 struct iwch_qp *qp = get_qhp(h, conn_param->qpn); 1243 1244 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1245 if (state_read(&ep->com) == DEAD) 1246 return (-ECONNRESET); 1247 1248 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1249 PANIC_IF(!qp); 1250 1251 if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || 1252 (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { 1253 abort_connection(ep); 1254 return (-EINVAL); 1255 } 1256 1257 cm_id->add_ref(cm_id); 1258 ep->com.cm_id = cm_id; 1259 ep->com.qp = qp; 1260 1261 ep->com.rpl_err = 0; 1262 ep->com.rpl_done = 0; 1263 ep->ird = conn_param->ird; 1264 ep->ord = conn_param->ord; 1265 CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord); 1266 get_ep(&ep->com); 1267 1268 /* bind QP to EP and move to RTS */ 1269 attrs.mpa_attr = ep->mpa_attr; 1270 attrs.max_ird = ep->ord; 1271 attrs.max_ord = ep->ord; 1272 attrs.llp_stream_handle = ep; 1273 attrs.next_state = IWCH_QP_STATE_RTS; 1274 1275 /* bind QP and TID with INIT_WR */ 1276 mask = IWCH_QP_ATTR_NEXT_STATE | 1277 IWCH_QP_ATTR_LLP_STREAM_HANDLE | 1278 IWCH_QP_ATTR_MPA_ATTR | 1279 IWCH_QP_ATTR_MAX_IRD | 1280 IWCH_QP_ATTR_MAX_ORD; 1281 1282 err = iwch_modify_qp(ep->com.qp->rhp, 1283 ep->com.qp, mask, &attrs, 1); 1284 1285 if (err) 1286 goto err; 1287 1288 err = send_mpa_reply(ep, conn_param->private_data, 1289 conn_param->private_data_len); 1290 if (err) 1291 goto err; 1292 state_set(&ep->com, FPDU_MODE); 1293 established_upcall(ep); 1294 put_ep(&ep->com); 1295 return 0; 1296err: 1297 ep->com.cm_id = NULL; 1298 ep->com.qp = NULL; 1299 cm_id->rem_ref(cm_id); 1300 put_ep(&ep->com); 1301 return err; 1302} 1303 1304static int init_sock(struct iwch_ep_common *epc) 1305{ 1306 int err; 1307 struct sockopt sopt; 1308 int on=1; 1309 1310 SOCK_LOCK(epc->so); 1311 soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc); 1312 epc->so->so_state |= SS_NBIO; 1313 SOCK_UNLOCK(epc->so); 1314 sopt.sopt_dir = SOPT_SET; 1315 sopt.sopt_level = SOL_SOCKET; 1316 sopt.sopt_name = SO_NO_DDP; 1317 sopt.sopt_val = (caddr_t)&on; 1318 sopt.sopt_valsize = sizeof on; 1319 sopt.sopt_td = NULL; 1320 err = sosetopt(epc->so, &sopt); 1321 if (err) 1322 printf("%s can't set SO_NO_DDP err %d\n", __FUNCTION__, err); 1323 sopt.sopt_dir = SOPT_SET; 1324 sopt.sopt_level = IPPROTO_TCP; 1325 sopt.sopt_name = TCP_NODELAY; 1326 sopt.sopt_val = (caddr_t)&on; 1327 sopt.sopt_valsize = sizeof on; 1328 sopt.sopt_td = NULL; 1329 err = sosetopt(epc->so, &sopt); 1330 if (err) 1331 printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err); 1332 1333 return 0; 1334} 1335 1336static int 1337is_loopback_dst(struct iw_cm_id *cm_id) 1338{ 1339 uint16_t port = cm_id->remote_addr.sin_port; 1340 int ifa_present; 1341 1342 cm_id->remote_addr.sin_port = 0; 1343 ifa_present = ifa_ifwithaddr_check( 1344 (struct sockaddr *)&cm_id->remote_addr); 1345 cm_id->remote_addr.sin_port = port; 1346 return (ifa_present); 1347} 1348 1349int 1350iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1351{ 1352 int err = 0; 1353 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1354 struct iwch_ep *ep; 1355 struct rtentry *rt; 1356 struct toedev *tdev; 1357 1358 if (is_loopback_dst(cm_id)) { 1359 err = -ENOSYS; 1360 goto out; 1361 } 1362 1363 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1364 if (!ep) { 1365 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1366 err = (-ENOMEM); 1367 goto out; 1368 } 1369 callout_init(&ep->timer, TRUE); 1370 ep->plen = conn_param->private_data_len; 1371 if (ep->plen) 1372 memcpy(ep->mpa_pkt + sizeof(struct mpa_message), 1373 conn_param->private_data, ep->plen); 1374 ep->ird = conn_param->ird; 1375 ep->ord = conn_param->ord; 1376 1377 cm_id->add_ref(cm_id); 1378 ep->com.cm_id = cm_id; 1379 ep->com.qp = get_qhp(h, conn_param->qpn); 1380 ep->com.thread = curthread; 1381 PANIC_IF(!ep->com.qp); 1382 CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn, 1383 ep->com.qp, cm_id); 1384 1385 ep->com.so = cm_id->so; 1386 err = init_sock(&ep->com); 1387 if (err) 1388 goto fail2; 1389 1390 /* find a route */ 1391 rt = find_route(cm_id->local_addr.sin_addr.s_addr, 1392 cm_id->remote_addr.sin_addr.s_addr, 1393 cm_id->local_addr.sin_port, 1394 cm_id->remote_addr.sin_port, IPTOS_LOWDELAY); 1395 if (!rt) { 1396 printf("%s - cannot find route.\n", __FUNCTION__); 1397 err = EHOSTUNREACH; 1398 goto fail2; 1399 } 1400 1401 if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) { 1402 printf("%s - interface not TOE capable.\n", __FUNCTION__); 1403 goto fail3; 1404 } 1405 tdev = TOEDEV(rt->rt_ifp); 1406 if (tdev == NULL) { 1407 printf("%s - No toedev for interface.\n", __FUNCTION__); 1408 goto fail3; 1409 } 1410 if (!tdev->tod_can_offload(tdev, ep->com.so)) { 1411 printf("%s - interface cannot offload!.\n", __FUNCTION__); 1412 goto fail3; 1413 } 1414 RTFREE(rt); 1415 1416 state_set(&ep->com, CONNECTING); 1417 ep->com.local_addr = cm_id->local_addr; 1418 ep->com.remote_addr = cm_id->remote_addr; 1419 err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, 1420 ep->com.thread); 1421 if (!err) 1422 goto out; 1423fail3: 1424 RTFREE(ep->dst); 1425fail2: 1426 put_ep(&ep->com); 1427out: 1428 return err; 1429} 1430 1431int 1432iwch_create_listen(struct iw_cm_id *cm_id, int backlog) 1433{ 1434 int err = 0; 1435 struct iwch_listen_ep *ep; 1436 1437 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1438 if (!ep) { 1439 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1440 err = ENOMEM; 1441 goto out; 1442 } 1443 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1444 cm_id->add_ref(cm_id); 1445 ep->com.cm_id = cm_id; 1446 ep->backlog = backlog; 1447 ep->com.local_addr = cm_id->local_addr; 1448 ep->com.thread = curthread; 1449 state_set(&ep->com, LISTEN); 1450 1451 ep->com.so = cm_id->so; 1452 err = init_sock(&ep->com); 1453 if (err) 1454 goto fail; 1455 1456 err = solisten(ep->com.so, ep->backlog, ep->com.thread); 1457 if (!err) { 1458 cm_id->provider_data = ep; 1459 goto out; 1460 } 1461 close_socket(&ep->com); 1462fail: 1463 cm_id->rem_ref(cm_id); 1464 put_ep(&ep->com); 1465out: 1466 return err; 1467} 1468 1469int 1470iwch_destroy_listen(struct iw_cm_id *cm_id) 1471{ 1472 struct iwch_listen_ep *ep = to_listen_ep(cm_id); 1473 1474 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1475 1476 state_set(&ep->com, DEAD); 1477 close_socket(&ep->com); 1478 cm_id->rem_ref(cm_id); 1479 put_ep(&ep->com); 1480 return 0; 1481} 1482 1483int 1484iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags) 1485{ 1486 int close = 0; 1487 1488 mtx_lock(&ep->com.lock); 1489 1490 PANIC_IF(!ep); 1491 PANIC_IF(!ep->com.so); 1492 1493 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep, 1494 ep->com.so, states[ep->com.state], abrupt); 1495 1496 if (ep->com.state == DEAD) { 1497 CTR2(KTR_IW_CXGB, "%s already dead ep %p", __FUNCTION__, ep); 1498 goto out; 1499 } 1500 1501 if (abrupt) { 1502 if (ep->com.state != ABORTING) { 1503 ep->com.state = ABORTING; 1504 close = 1; 1505 } 1506 goto out; 1507 } 1508 1509 switch (ep->com.state) { 1510 case MPA_REQ_WAIT: 1511 case MPA_REQ_SENT: 1512 case MPA_REQ_RCVD: 1513 case MPA_REP_SENT: 1514 case FPDU_MODE: 1515 start_ep_timer(ep); 1516 ep->com.state = CLOSING; 1517 close = 1; 1518 break; 1519 case CLOSING: 1520 ep->com.state = MORIBUND; 1521 close = 1; 1522 break; 1523 case MORIBUND: 1524 case ABORTING: 1525 break; 1526 default: 1527 panic("unknown state: %d\n", ep->com.state); 1528 break; 1529 } 1530out: 1531 mtx_unlock(&ep->com.lock); 1532 if (close) { 1533 if (abrupt) 1534 abort_connection(ep); 1535 else 1536 shutdown_socket(&ep->com); 1537 } 1538 return 0; 1539} 1540 1541static void 1542process_data(struct iwch_ep *ep) 1543{ 1544 struct sockaddr_in *local, *remote; 1545 1546 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1547 1548 switch (state_read(&ep->com)) { 1549 case MPA_REQ_SENT: 1550 process_mpa_reply(ep); 1551 break; 1552 case MPA_REQ_WAIT: 1553 1554 /* 1555 * XXX 1556 * Set local and remote addrs here because when we 1557 * dequeue the newly accepted socket, they aren't set 1558 * yet in the pcb! 1559 */ 1560 in_getsockaddr(ep->com.so, (struct sockaddr **)&local); 1561 in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); 1562 CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__, 1563 inet_ntoa(local->sin_addr), 1564 inet_ntoa(remote->sin_addr)); 1565 ep->com.local_addr = *local; 1566 ep->com.remote_addr = *remote; 1567 free(local, M_SONAME); 1568 free(remote, M_SONAME); 1569 process_mpa_request(ep); 1570 break; 1571 default: 1572 if (ep->com.so->so_rcv.sb_cc) 1573 printf("%s Unexpected streaming data." 1574 " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n", 1575 __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state, 1576 ep->com.so->so_rcv.sb_cc, ep->com.so->so_rcv.sb_mb); 1577 break; 1578 } 1579 return; 1580} 1581 1582static void 1583process_connected(struct iwch_ep *ep) 1584{ 1585 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1586 if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) { 1587 send_mpa_req(ep); 1588 } else { 1589 connect_reply_upcall(ep, -ep->com.so->so_error); 1590 close_socket(&ep->com); 1591 state_set(&ep->com, DEAD); 1592 put_ep(&ep->com); 1593 } 1594} 1595 1596static struct socket * 1597dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep) 1598{ 1599 struct socket *so; 1600 1601 ACCEPT_LOCK(); 1602 so = TAILQ_FIRST(&head->so_comp); 1603 if (!so) { 1604 ACCEPT_UNLOCK(); 1605 return NULL; 1606 } 1607 TAILQ_REMOVE(&head->so_comp, so, so_list); 1608 head->so_qlen--; 1609 SOCK_LOCK(so); 1610 so->so_qstate &= ~SQ_COMP; 1611 so->so_head = NULL; 1612 soref(so); 1613 soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep); 1614 so->so_state |= SS_NBIO; 1615 PANIC_IF(!(so->so_state & SS_ISCONNECTED)); 1616 PANIC_IF(so->so_error); 1617 SOCK_UNLOCK(so); 1618 ACCEPT_UNLOCK(); 1619 soaccept(so, (struct sockaddr **)remote); 1620 return so; 1621} 1622 1623static void 1624process_newconn(struct iwch_ep *parent_ep) 1625{ 1626 struct socket *child_so; 1627 struct iwch_ep *child_ep; 1628 struct sockaddr_in *remote; 1629 1630 CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so); 1631 child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); 1632 if (!child_ep) { 1633 log(LOG_ERR, "%s - failed to allocate ep entry!\n", 1634 __FUNCTION__); 1635 return; 1636 } 1637 child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); 1638 if (!child_so) { 1639 log(LOG_ERR, "%s - failed to dequeue child socket!\n", 1640 __FUNCTION__); 1641 __free_ep(&child_ep->com); 1642 return; 1643 } 1644 CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, 1645 inet_ntoa(remote->sin_addr), ntohs(remote->sin_port)); 1646 child_ep->com.so = child_so; 1647 child_ep->com.cm_id = NULL; 1648 child_ep->com.thread = parent_ep->com.thread; 1649 child_ep->parent_ep = parent_ep; 1650 free(remote, M_SONAME); 1651 get_ep(&parent_ep->com); 1652 child_ep->parent_ep = parent_ep; 1653 callout_init(&child_ep->timer, TRUE); 1654 state_set(&child_ep->com, MPA_REQ_WAIT); 1655 start_ep_timer(child_ep); 1656 1657 /* maybe the request has already been queued up on the socket... */ 1658 process_mpa_request(child_ep); 1659} 1660 1661static int 1662iwch_so_upcall(struct socket *so, void *arg, int waitflag) 1663{ 1664 struct iwch_ep *ep = arg; 1665 1666 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1667 mtx_lock(&req_lock); 1668 if (ep && ep->com.so && !ep->com.entry.tqe_prev) { 1669 get_ep(&ep->com); 1670 TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); 1671 taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task); 1672 } 1673 mtx_unlock(&req_lock); 1674 return (SU_OK); 1675} 1676 1677static void 1678process_socket_event(struct iwch_ep *ep) 1679{ 1680 int state = state_read(&ep->com); 1681 struct socket *so = ep->com.so; 1682 1683 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1684 if (state == CONNECTING) { 1685 process_connected(ep); 1686 return; 1687 } 1688 1689 if (state == LISTEN) { 1690 process_newconn(ep); 1691 return; 1692 } 1693 1694 /* connection error */ 1695 if (so->so_error) { 1696 process_conn_error(ep); 1697 return; 1698 } 1699 1700 /* peer close */ 1701 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { 1702 process_peer_close(ep); 1703 return; 1704 } 1705 1706 /* close complete */ 1707 if (so->so_state & (SS_ISDISCONNECTED)) { 1708 process_close_complete(ep); 1709 return; 1710 } 1711 1712 /* rx data */ 1713 process_data(ep); 1714 return; 1715} 1716 1717static void 1718process_req(void *ctx, int pending) 1719{ 1720 struct iwch_ep_common *epc; 1721 1722 CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__); 1723 mtx_lock(&req_lock); 1724 while (!TAILQ_EMPTY(&req_list)) { 1725 epc = TAILQ_FIRST(&req_list); 1726 TAILQ_REMOVE(&req_list, epc, entry); 1727 epc->entry.tqe_prev = NULL; 1728 mtx_unlock(&req_lock); 1729 if (epc->so) 1730 process_socket_event((struct iwch_ep *)epc); 1731 put_ep(epc); 1732 mtx_lock(&req_lock); 1733 } 1734 mtx_unlock(&req_lock); 1735} 1736 1737int 1738iwch_cm_init(void) 1739{ 1740 TAILQ_INIT(&req_list); 1741 mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF); 1742 iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT, 1743 taskqueue_thread_enqueue, &iw_cxgb_taskq); 1744 if (iw_cxgb_taskq == NULL) { 1745 printf("failed to allocate iw_cxgb taskqueue\n"); 1746 return (ENOMEM); 1747 } 1748 taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq"); 1749 TASK_INIT(&iw_cxgb_task, 0, process_req, NULL); 1750 t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, terminate); 1751 t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, ec_status); 1752 return 0; 1753} 1754 1755void 1756iwch_cm_term(void) 1757{ 1758 t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, NULL); 1759 t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, NULL); 1760 taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task); 1761 taskqueue_free(iw_cxgb_taskq); 1762} 1763 1764