32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/fcntl.h> 36#include <sys/kernel.h> 37#include <sys/limits.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/mbuf.h> 41#include <sys/mutex.h> 42#include <sys/sockstate.h> 43#include <sys/sockopt.h> 44#include <sys/socket.h> 45#include <sys/sockbuf.h> 46#include <sys/sysctl.h> 47#include <sys/syslog.h> 48#include <sys/protosw.h> 49#include <sys/priv.h> 50 51#if __FreeBSD_version < 800044 52#define V_tcp_do_autosndbuf tcp_do_autosndbuf 53#define V_tcp_autosndbuf_max tcp_autosndbuf_max 54#define V_tcp_do_rfc1323 tcp_do_rfc1323 55#define V_tcp_do_autorcvbuf tcp_do_autorcvbuf 56#define V_tcp_autorcvbuf_max tcp_autorcvbuf_max 57#define V_tcpstat tcpstat 58#endif 59 60#include <net/if.h> 61#include <net/route.h> 62 63#include <netinet/in.h> 64#include <netinet/in_pcb.h> 65#include <netinet/in_systm.h> 66#include <netinet/in_var.h> 67 68 69#include <cxgb_osdep.h> 70#include <sys/mbufq.h> 71 72#include <netinet/ip.h> 73#include <netinet/tcp_var.h> 74#include <netinet/tcp_fsm.h> 75#include <netinet/tcp_offload.h> 76#include <netinet/tcp_seq.h> 77#include <netinet/tcp_syncache.h> 78#include <netinet/tcp_timer.h> 79#include <net/route.h> 80 81#include <t3cdev.h> 82#include <common/cxgb_firmware_exports.h> 83#include <common/cxgb_t3_cpl.h> 84#include <common/cxgb_tcb.h> 85#include <common/cxgb_ctl_defs.h> 86#include <cxgb_offload.h> 87#include <vm/vm.h> 88#include <vm/pmap.h> 89#include <machine/bus.h> 90#include <sys/mvec.h> 91#include <ulp/toecore/cxgb_toedev.h> 92#include <ulp/tom/cxgb_l2t.h> 93#include <ulp/tom/cxgb_defs.h> 94#include <ulp/tom/cxgb_tom.h> 95#include <ulp/tom/cxgb_t3_ddp.h> 96#include <ulp/tom/cxgb_toepcb.h> 97#include <ulp/tom/cxgb_tcp.h> 98#include <ulp/tom/cxgb_tcp_offload.h> 99 100/* 101 * For ULP connections HW may add headers, e.g., for digests, that aren't part 102 * of the messages sent by the host but that are part of the TCP payload and 103 * therefore consume TCP sequence space. Tx connection parameters that 104 * operate in TCP sequence space are affected by the HW additions and need to 105 * compensate for them to accurately track TCP sequence numbers. This array 106 * contains the compensating extra lengths for ULP packets. It is indexed by 107 * a packet's ULP submode. 108 */ 109const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8}; 110 111#ifdef notyet 112/* 113 * This sk_buff holds a fake header-only TCP segment that we use whenever we 114 * need to exploit SW TCP functionality that expects TCP headers, such as 115 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple 116 * CPUs without locking. 117 */ 118static struct mbuf *tcphdr_mbuf __read_mostly; 119#endif 120 121/* 122 * Size of WRs in bytes. Note that we assume all devices we are handling have 123 * the same WR size. 124 */ 125static unsigned int wrlen __read_mostly; 126 127/* 128 * The number of WRs needed for an skb depends on the number of page fragments 129 * in the skb and whether it has any payload in its main body. This maps the 130 * length of the gather list represented by an skb into the # of necessary WRs. 131 */ 132static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly; 133 134/* 135 * Max receive window supported by HW in bytes. Only a small part of it can 136 * be set through option0, the rest needs to be set through RX_DATA_ACK. 137 */ 138#define MAX_RCV_WND ((1U << 27) - 1) 139 140/* 141 * Min receive window. We want it to be large enough to accommodate receive 142 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. 143 */ 144#define MIN_RCV_WND (24 * 1024U) 145#define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS) 146 147#define VALIDATE_SEQ 0 148#define VALIDATE_SOCK(so) 149#define DEBUG_WR 0 150 151#define TCP_TIMEWAIT 1 152#define TCP_CLOSE 2 153#define TCP_DROP 3 154 155static void t3_send_reset(struct toepcb *toep); 156static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status); 157static inline void free_atid(struct t3cdev *cdev, unsigned int tid); 158static void handle_syncache_event(int event, void *arg); 159 160static inline void 161SBAPPEND(struct sockbuf *sb, struct mbuf *n) 162{ 163 struct mbuf *m; 164 165 m = sb->sb_mb; 166 while (m) { 167 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 168 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 169 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 170 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 171 m->m_next, m->m_nextpkt, m->m_flags)); 172 m = m->m_next; 173 } 174 m = n; 175 while (m) { 176 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 177 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 178 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 179 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 180 m->m_next, m->m_nextpkt, m->m_flags)); 181 m = m->m_next; 182 } 183 KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set")); 184 sbappendstream_locked(sb, n); 185 m = sb->sb_mb; 186 187 while (m) { 188 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 189 m->m_next, m->m_nextpkt, m->m_flags)); 190 m = m->m_next; 191 } 192} 193 194static inline int 195is_t3a(const struct toedev *dev) 196{ 197 return (dev->tod_ttid == TOE_ID_CHELSIO_T3); 198} 199 200static void 201dump_toepcb(struct toepcb *toep) 202{ 203 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n", 204 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode, 205 toep->tp_mtu_idx, toep->tp_tid); 206 207 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n", 208 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, 209 toep->tp_mss_clamp, toep->tp_flags); 210} 211 212#ifndef RTALLOC2_DEFINED 213static struct rtentry * 214rtalloc2(struct sockaddr *dst, int report, u_long ignflags) 215{ 216 struct rtentry *rt = NULL; 217 218 if ((rt = rtalloc1(dst, report, ignflags)) != NULL) 219 RT_UNLOCK(rt); 220 221 return (rt); 222} 223#endif 224 225/* 226 * Determine whether to send a CPL message now or defer it. A message is 227 * deferred if the connection is in SYN_SENT since we don't know the TID yet. 228 * For connections in other states the message is sent immediately. 229 * If through_l2t is set the message is subject to ARP processing, otherwise 230 * it is sent directly. 231 */ 232static inline void 233send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t) 234{ 235 struct tcpcb *tp = toep->tp_tp; 236 237 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) { 238 inp_wlock(tp->t_inpcb); 239 mbufq_tail(&toep->out_of_order_queue, m); // defer 240 inp_wunlock(tp->t_inpcb); 241 } else if (through_l2t) 242 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T 243 else 244 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly 245} 246 247static inline unsigned int 248mkprio(unsigned int cntrl, const struct toepcb *toep) 249{ 250 return (cntrl); 251} 252 253/* 254 * Populate a TID_RELEASE WR. The skb must be already propely sized. 255 */ 256static inline void 257mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid) 258{ 259 struct cpl_tid_release *req; 260 261 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep)); 262 m->m_pkthdr.len = m->m_len = sizeof(*req); 263 req = mtod(m, struct cpl_tid_release *); 264 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 265 req->wr.wr_lo = 0; 266 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); 267} 268 269static inline void 270make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) 271{ 272 struct tcpcb *tp = so_sototcpcb(so); 273 struct toepcb *toep = tp->t_toe; 274 struct tx_data_wr *req; 275 struct sockbuf *snd; 276 277 inp_lock_assert(tp->t_inpcb); 278 snd = so_sockbuf_snd(so); 279 280 req = mtod(m, struct tx_data_wr *); 281 m->m_len = sizeof(*req); 282 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 283 req->wr_lo = htonl(V_WR_TID(toep->tp_tid)); 284 /* len includes the length of any HW ULP additions */ 285 req->len = htonl(len); 286 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx)); 287 /* V_TX_ULP_SUBMODE sets both the mode and submode */ 288 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) | 289 V_TX_URG(/* skb_urgent(skb) */ 0 ) | 290 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) && 291 (tail ? 0 : 1)))); 292 req->sndseq = htonl(tp->snd_nxt); 293 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) { 294 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | 295 V_TX_CPU_IDX(toep->tp_qset)); 296 297 /* Sendbuffer is in units of 32KB. 298 */ 299 if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE) 300 req->param |= htonl(V_TX_SNDBUF(V_tcp_autosndbuf_max >> 15)); 301 else { 302 req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15)); 303 } 304 305 toep->tp_flags |= TP_DATASENT; 306 } 307} 308 309#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */ 310 311int 312t3_push_frames(struct socket *so, int req_completion) 313{ 314 struct tcpcb *tp = so_sototcpcb(so); 315 struct toepcb *toep = tp->t_toe; 316 317 struct mbuf *tail, *m0, *last; 318 struct t3cdev *cdev; 319 struct tom_data *d; 320 int state, bytes, count, total_bytes; 321 bus_dma_segment_t segs[TX_MAX_SEGS], *segp; 322 struct sockbuf *snd; 323 324 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) { 325 DPRINTF("tcp state=%d\n", tp->t_state); 326 return (0); 327 } 328 329 state = so_state_get(so); 330 331 if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { 332 DPRINTF("disconnecting\n"); 333 334 return (0); 335 } 336 337 inp_lock_assert(tp->t_inpcb); 338 339 snd = so_sockbuf_snd(so); 340 sockbuf_lock(snd); 341 342 d = TOM_DATA(toep->tp_toedev); 343 cdev = d->cdev; 344 345 last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb; 346 347 total_bytes = 0; 348 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n", 349 toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last); 350 351 if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) { 352 KASSERT(tail, ("sbdrop error")); 353 last = tail = tail->m_next; 354 } 355 356 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) { 357 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail); 358 sockbuf_unlock(snd); 359 360 return (0); 361 } 362 363 toep->tp_m_last = NULL; 364 while (toep->tp_wr_avail && (tail != NULL)) { 365 count = bytes = 0; 366 segp = segs; 367 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) { 368 sockbuf_unlock(snd); 369 return (0); 370 } 371 /* 372 * If the data in tail fits as in-line, then 373 * make an immediate data wr. 374 */ 375 if (tail->m_len <= IMM_LEN) { 376 count = 1; 377 bytes = tail->m_len; 378 last = tail; 379 tail = tail->m_next; 380 m_set_sgl(m0, NULL); 381 m_set_sgllen(m0, 0); 382 make_tx_data_wr(so, m0, bytes, tail); 383 m_append(m0, bytes, mtod(last, caddr_t)); 384 KASSERT(!m0->m_next, ("bad append")); 385 } else { 386 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) 387 && (tail != NULL) && (count < TX_MAX_SEGS-1)) { 388 bytes += tail->m_len; 389 last = tail; 390 count++; 391 /* 392 * technically an abuse to be using this for a VA 393 * but less gross than defining my own structure 394 * or calling pmap_kextract from here :-| 395 */ 396 segp->ds_addr = (bus_addr_t)tail->m_data; 397 segp->ds_len = tail->m_len; 398 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n", 399 count, mbuf_wrs[count], tail->m_data, tail->m_len); 400 segp++; 401 tail = tail->m_next; 402 } 403 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n", 404 toep->tp_wr_avail, count, mbuf_wrs[count], tail); 405 406 m_set_sgl(m0, segs); 407 m_set_sgllen(m0, count); 408 make_tx_data_wr(so, m0, bytes, tail); 409 } 410 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep)); 411 412 if (tail) { 413 snd->sb_sndptr = tail; 414 toep->tp_m_last = NULL; 415 } else 416 toep->tp_m_last = snd->sb_sndptr = last; 417 418 419 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last); 420 421 snd->sb_sndptroff += bytes; 422 total_bytes += bytes; 423 toep->tp_write_seq += bytes; 424 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d" 425 " tail=%p sndptr=%p sndptroff=%d", 426 toep->tp_wr_avail, count, mbuf_wrs[count], 427 tail, snd->sb_sndptr, snd->sb_sndptroff); 428 if (tail) 429 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d" 430 " tp_m_last=%p tailbuf=%p snd_una=0x%08x", 431 total_bytes, toep->tp_m_last, tail->m_data, 432 tp->snd_una); 433 else 434 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d" 435 " tp_m_last=%p snd_una=0x%08x", 436 total_bytes, toep->tp_m_last, tp->snd_una); 437 438 439#ifdef KTR 440{ 441 int i; 442 443 i = 0; 444 while (i < count && m_get_sgllen(m0)) { 445 if ((count - i) >= 3) { 446 CTR6(KTR_TOM, 447 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx" 448 " len=%d pa=0x%zx len=%d", 449 segs[i].ds_addr, segs[i].ds_len, 450 segs[i + 1].ds_addr, segs[i + 1].ds_len, 451 segs[i + 2].ds_addr, segs[i + 2].ds_len); 452 i += 3; 453 } else if ((count - i) == 2) { 454 CTR4(KTR_TOM, 455 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx" 456 " len=%d", 457 segs[i].ds_addr, segs[i].ds_len, 458 segs[i + 1].ds_addr, segs[i + 1].ds_len); 459 i += 2; 460 } else { 461 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d", 462 segs[i].ds_addr, segs[i].ds_len); 463 i++; 464 } 465 466 } 467} 468#endif 469 /* 470 * remember credits used 471 */ 472 m0->m_pkthdr.csum_data = mbuf_wrs[count]; 473 m0->m_pkthdr.len = bytes; 474 toep->tp_wr_avail -= mbuf_wrs[count]; 475 toep->tp_wr_unacked += mbuf_wrs[count]; 476 477 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) || 478 toep->tp_wr_unacked >= toep->tp_wr_max / 2) { 479 struct work_request_hdr *wr = cplhdr(m0); 480 481 wr->wr_hi |= htonl(F_WR_COMPL); 482 toep->tp_wr_unacked = 0; 483 } 484 KASSERT((m0->m_pkthdr.csum_data > 0) && 485 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d", 486 m0->m_pkthdr.csum_data)); 487 m0->m_type = MT_DONTFREE; 488 enqueue_wr(toep, m0); 489 DPRINTF("sending offload tx with %d bytes in %d segments\n", 490 bytes, count); 491 l2t_send(cdev, m0, toep->tp_l2t); 492 } 493 sockbuf_unlock(snd); 494 return (total_bytes); 495} 496 497/* 498 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail 499 * under any circumstances. We take the easy way out and always queue the 500 * message to the write_queue. We can optimize the case where the queue is 501 * already empty though the optimization is probably not worth it. 502 */ 503static void 504close_conn(struct socket *so) 505{ 506 struct mbuf *m; 507 struct cpl_close_con_req *req; 508 struct tom_data *d; 509 struct inpcb *inp = so_sotoinpcb(so); 510 struct tcpcb *tp; 511 struct toepcb *toep; 512 unsigned int tid; 513 514 515 inp_wlock(inp); 516 tp = so_sototcpcb(so); 517 toep = tp->t_toe; 518 519 if (tp->t_state != TCPS_SYN_SENT) 520 t3_push_frames(so, 1); 521 522 if (toep->tp_flags & TP_FIN_SENT) { 523 inp_wunlock(inp); 524 return; 525 } 526 527 tid = toep->tp_tid; 528 529 d = TOM_DATA(toep->tp_toedev); 530 531 m = m_gethdr_nofail(sizeof(*req)); 532 m_set_priority(m, CPL_PRIORITY_DATA); 533 m_set_sgl(m, NULL); 534 m_set_sgllen(m, 0); 535 536 toep->tp_flags |= TP_FIN_SENT; 537 req = mtod(m, struct cpl_close_con_req *); 538 539 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); 540 req->wr.wr_lo = htonl(V_WR_TID(tid)); 541 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 542 req->rsvd = 0; 543 inp_wunlock(inp); 544 /* 545 * XXX - need to defer shutdown while there is still data in the queue 546 * 547 */ 548 CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid); 549 cxgb_ofld_send(d->cdev, m); 550 551} 552 553/* 554 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant 555 * and send it along. 556 */ 557static void 558abort_arp_failure(struct t3cdev *cdev, struct mbuf *m) 559{ 560 struct cpl_abort_req *req = cplhdr(m); 561 562 req->cmd = CPL_ABORT_NO_RST; 563 cxgb_ofld_send(cdev, m); 564} 565 566/* 567 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are 568 * permitted to return without sending the message in case we cannot allocate 569 * an sk_buff. Returns the number of credits sent. 570 */ 571uint32_t 572t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail) 573{ 574 struct mbuf *m; 575 struct cpl_rx_data_ack *req; 576 struct toepcb *toep = tp->t_toe; 577 struct toedev *tdev = toep->tp_toedev; 578 579 m = m_gethdr_nofail(sizeof(*req)); 580 581 DPRINTF("returning %u credits to HW\n", credits); 582 583 req = mtod(m, struct cpl_rx_data_ack *); 584 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 585 req->wr.wr_lo = 0; 586 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 587 req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); 588 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep)); 589 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 590 return (credits); 591} 592 593/* 594 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled. 595 * This is only used in DDP mode, so we take the opportunity to also set the 596 * DACK mode and flush any Rx credits. 597 */ 598void 599t3_send_rx_modulate(struct toepcb *toep) 600{ 601 struct mbuf *m; 602 struct cpl_rx_data_ack *req; 603 604 m = m_gethdr_nofail(sizeof(*req)); 605 606 req = mtod(m, struct cpl_rx_data_ack *); 607 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 608 req->wr.wr_lo = 0; 609 m->m_pkthdr.len = m->m_len = sizeof(*req); 610 611 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 612 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 613 V_RX_DACK_MODE(1) | 614 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup)); 615 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 616 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 617 toep->tp_rcv_wup = toep->tp_copied_seq; 618} 619 620/* 621 * Handle receipt of an urgent pointer. 622 */ 623static void 624handle_urg_ptr(struct socket *so, uint32_t urg_seq) 625{ 626#ifdef URGENT_DATA_SUPPORTED 627 struct tcpcb *tp = so_sototcpcb(so); 628 629 urg_seq--; /* initially points past the urgent data, per BSD */ 630 631 if (tp->urg_data && !after(urg_seq, tp->urg_seq)) 632 return; /* duplicate pointer */ 633 sk_send_sigurg(sk); 634 if (tp->urg_seq == tp->copied_seq && tp->urg_data && 635 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) { 636 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 637 638 tp->copied_seq++; 639 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len) 640 tom_eat_skb(sk, skb, 0); 641 } 642 tp->urg_data = TCP_URG_NOTYET; 643 tp->urg_seq = urg_seq; 644#endif 645} 646 647/* 648 * Returns true if a socket cannot accept new Rx data. 649 */ 650static inline int 651so_no_receive(const struct socket *so) 652{ 653 return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING)); 654} 655 656/* 657 * Process an urgent data notification. 658 */ 659static void 660rx_urg_notify(struct toepcb *toep, struct mbuf *m) 661{ 662 struct cpl_rx_urg_notify *hdr = cplhdr(m); 663 struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 664 665 VALIDATE_SOCK(so); 666 667 if (!so_no_receive(so)) 668 handle_urg_ptr(so, ntohl(hdr->seq)); 669 670 m_freem(m); 671} 672 673/* 674 * Handler for RX_URG_NOTIFY CPL messages. 675 */ 676static int 677do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx) 678{ 679 struct toepcb *toep = (struct toepcb *)ctx; 680 681 rx_urg_notify(toep, m); 682 return (0); 683} 684 685static __inline int 686is_delack_mode_valid(struct toedev *dev, struct toepcb *toep) 687{ 688 return (toep->tp_ulp_mode || 689 (toep->tp_ulp_mode == ULP_MODE_TCPDDP && 690 dev->tod_ttid >= TOE_ID_CHELSIO_T3)); 691} 692 693/* 694 * Set of states for which we should return RX credits. 695 */ 696#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2) 697 698/* 699 * Called after some received data has been read. It returns RX credits 700 * to the HW for the amount of data processed. 701 */ 702void 703t3_cleanup_rbuf(struct tcpcb *tp, int copied) 704{ 705 struct toepcb *toep = tp->t_toe; 706 struct socket *so; 707 struct toedev *dev; 708 int dack_mode, must_send, read; 709 u32 thres, credits, dack = 0; 710 struct sockbuf *rcv; 711 712 so = inp_inpcbtosocket(tp->t_inpcb); 713 rcv = so_sockbuf_rcv(so); 714 715 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || 716 (tp->t_state == TCPS_FIN_WAIT_2))) { 717 if (copied) { 718 sockbuf_lock(rcv); 719 toep->tp_copied_seq += copied; 720 sockbuf_unlock(rcv); 721 } 722 723 return; 724 } 725 726 inp_lock_assert(tp->t_inpcb); 727 728 sockbuf_lock(rcv); 729 if (copied) 730 toep->tp_copied_seq += copied; 731 else { 732 read = toep->tp_enqueued_bytes - rcv->sb_cc; 733 toep->tp_copied_seq += read; 734 } 735 credits = toep->tp_copied_seq - toep->tp_rcv_wup; 736 toep->tp_enqueued_bytes = rcv->sb_cc; 737 sockbuf_unlock(rcv); 738 739 if (credits > rcv->sb_mbmax) { 740 log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n", 741 toep->tp_copied_seq, toep->tp_rcv_wup, credits); 742 credits = rcv->sb_mbmax; 743 } 744 745 746 /* 747 * XXX this won't accurately reflect credit return - we need 748 * to look at the difference between the amount that has been 749 * put in the recv sockbuf and what is there now 750 */ 751 752 if (__predict_false(!credits)) 753 return; 754 755 dev = toep->tp_toedev; 756 thres = TOM_TUNABLE(dev, rx_credit_thres); 757 758 if (__predict_false(thres == 0)) 759 return; 760 761 if (is_delack_mode_valid(dev, toep)) { 762 dack_mode = TOM_TUNABLE(dev, delack); 763 if (__predict_false(dack_mode != toep->tp_delack_mode)) { 764 u32 r = tp->rcv_nxt - toep->tp_delack_seq; 765 766 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp) 767 dack = F_RX_DACK_CHANGE | 768 V_RX_DACK_MODE(dack_mode); 769 } 770 } else 771 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 772 773 /* 774 * For coalescing to work effectively ensure the receive window has 775 * at least 16KB left. 776 */ 777 must_send = credits + 16384 >= tp->rcv_wnd; 778 779 if (must_send || credits >= thres) 780 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send); 781} 782 783static int 784cxgb_toe_disconnect(struct tcpcb *tp) 785{ 786 struct socket *so; 787 788 DPRINTF("cxgb_toe_disconnect\n"); 789 790 so = inp_inpcbtosocket(tp->t_inpcb); 791 close_conn(so); 792 return (0); 793} 794 795static int 796cxgb_toe_reset(struct tcpcb *tp) 797{ 798 struct toepcb *toep = tp->t_toe; 799 800 t3_send_reset(toep); 801 802 /* 803 * unhook from socket 804 */ 805 tp->t_flags &= ~TF_TOE; 806 toep->tp_tp = NULL; 807 tp->t_toe = NULL; 808 return (0); 809} 810 811static int 812cxgb_toe_send(struct tcpcb *tp) 813{ 814 struct socket *so; 815 816 DPRINTF("cxgb_toe_send\n"); 817 dump_toepcb(tp->t_toe); 818 819 so = inp_inpcbtosocket(tp->t_inpcb); 820 t3_push_frames(so, 1); 821 return (0); 822} 823 824static int 825cxgb_toe_rcvd(struct tcpcb *tp) 826{ 827 828 inp_lock_assert(tp->t_inpcb); 829 830 t3_cleanup_rbuf(tp, 0); 831 832 return (0); 833} 834 835static void 836cxgb_toe_detach(struct tcpcb *tp) 837{ 838 struct toepcb *toep; 839 840 /* 841 * XXX how do we handle teardown in the SYN_SENT state? 842 * 843 */ 844 inp_lock_assert(tp->t_inpcb); 845 toep = tp->t_toe; 846 toep->tp_tp = NULL; 847 848 /* 849 * unhook from socket 850 */ 851 tp->t_flags &= ~TF_TOE; 852 tp->t_toe = NULL; 853} 854 855 856static struct toe_usrreqs cxgb_toe_usrreqs = { 857 .tu_disconnect = cxgb_toe_disconnect, 858 .tu_reset = cxgb_toe_reset, 859 .tu_send = cxgb_toe_send, 860 .tu_rcvd = cxgb_toe_rcvd, 861 .tu_detach = cxgb_toe_detach, 862 .tu_detach = cxgb_toe_detach, 863 .tu_syncache_event = handle_syncache_event, 864}; 865 866 867static void 868__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word, 869 uint64_t mask, uint64_t val, int no_reply) 870{ 871 struct cpl_set_tcb_field *req; 872 873 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 874 toep->tp_tid, word, mask, val); 875 876 req = mtod(m, struct cpl_set_tcb_field *); 877 m->m_pkthdr.len = m->m_len = sizeof(*req); 878 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 879 req->wr.wr_lo = 0; 880 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid)); 881 req->reply = V_NO_REPLY(no_reply); 882 req->cpu_idx = 0; 883 req->word = htons(word); 884 req->mask = htobe64(mask); 885 req->val = htobe64(val); 886 887 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 888 send_or_defer(toep, m, 0); 889} 890 891static void 892t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val) 893{ 894 struct mbuf *m; 895 struct tcpcb *tp = toep->tp_tp; 896 897 if (toep == NULL) 898 return; 899 900 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) { 901 printf("not seting field\n"); 902 return; 903 } 904 905 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field)); 906 907 __set_tcb_field(toep, m, word, mask, val, 1); 908} 909 910/* 911 * Set one of the t_flags bits in the TCB. 912 */ 913static void 914set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val) 915{ 916 917 t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); 918} 919 920/* 921 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting. 922 */ 923static void 924t3_set_nagle(struct toepcb *toep) 925{ 926 struct tcpcb *tp = toep->tp_tp; 927 928 set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); 929} 930 931/* 932 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting. 933 */ 934void 935t3_set_keepalive(struct toepcb *toep, int on_off) 936{ 937 938 set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off); 939} 940 941void 942t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off) 943{ 944 set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off); 945} 946 947void 948t3_set_dack_mss(struct toepcb *toep, int on_off) 949{ 950 951 set_tcb_tflag(toep, S_TF_DACK_MSS, on_off); 952} 953 954/* 955 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting. 956 */ 957static void 958t3_set_tos(struct toepcb *toep) 959{ 960 int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb); 961 962 t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), 963 V_TCB_TOS(tos)); 964} 965 966 967/* 968 * In DDP mode, TP fails to schedule a timer to push RX data to the host when 969 * DDP is disabled (data is delivered to freelist). [Note that, the peer should 970 * set the PSH bit in the last segment, which would trigger delivery.] 971 * We work around the issue by setting a DDP buffer in a partial placed state, 972 * which guarantees that TP will schedule a timer. 973 */ 974#define TP_DDP_TIMER_WORKAROUND_MASK\ 975 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\ 976 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\ 977 V_TCB_RX_DDP_BUF0_LEN(3)) << 32)) 978#define TP_DDP_TIMER_WORKAROUND_VAL\ 979 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\ 980 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\ 981 32)) 982 983static void 984t3_enable_ddp(struct toepcb *toep, int on) 985{ 986 if (on) { 987 988 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), 989 V_TF_DDP_OFF(0)); 990 } else 991 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, 992 V_TF_DDP_OFF(1) | 993 TP_DDP_TIMER_WORKAROUND_MASK, 994 V_TF_DDP_OFF(1) | 995 TP_DDP_TIMER_WORKAROUND_VAL); 996 997} 998 999void 1000t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color) 1001{ 1002 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx, 1003 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), 1004 tag_color); 1005} 1006 1007void 1008t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset, 1009 unsigned int len) 1010{ 1011 if (buf_idx == 0) 1012 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET, 1013 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 1014 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 1015 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) | 1016 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 1017 else 1018 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET, 1019 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 1020 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32), 1021 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) | 1022 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32)); 1023} 1024 1025static int 1026t3_set_cong_control(struct socket *so, const char *name) 1027{ 1028#ifdef CONGESTION_CONTROL_SUPPORTED 1029 int cong_algo; 1030 1031 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++) 1032 if (!strcmp(name, t3_cong_ops[cong_algo].name)) 1033 break; 1034 1035 if (cong_algo >= ARRAY_SIZE(t3_cong_ops)) 1036 return -EINVAL; 1037#endif 1038 return 0; 1039} 1040 1041int 1042t3_get_tcb(struct toepcb *toep) 1043{ 1044 struct cpl_get_tcb *req; 1045 struct tcpcb *tp = toep->tp_tp; 1046 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 1047 1048 if (!m) 1049 return (ENOMEM); 1050 1051 inp_lock_assert(tp->t_inpcb); 1052 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 1053 req = mtod(m, struct cpl_get_tcb *); 1054 m->m_pkthdr.len = m->m_len = sizeof(*req); 1055 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1056 req->wr.wr_lo = 0; 1057 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid)); 1058 req->cpuno = htons(toep->tp_qset); 1059 req->rsvd = 0; 1060 if (tp->t_state == TCPS_SYN_SENT) 1061 mbufq_tail(&toep->out_of_order_queue, m); // defer 1062 else 1063 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 1064 return 0; 1065} 1066 1067static inline void 1068so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid) 1069{ 1070 1071 toepcb_hold(toep); 1072 1073 cxgb_insert_tid(d->cdev, d->client, toep, tid); 1074} 1075 1076/** 1077 * find_best_mtu - find the entry in the MTU table closest to an MTU 1078 * @d: TOM state 1079 * @mtu: the target MTU 1080 * 1081 * Returns the index of the value in the MTU table that is closest to but 1082 * does not exceed the target MTU. 1083 */ 1084static unsigned int 1085find_best_mtu(const struct t3c_data *d, unsigned short mtu) 1086{ 1087 int i = 0; 1088 1089 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) 1090 ++i; 1091 return (i); 1092} 1093 1094static unsigned int 1095select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu) 1096{ 1097 unsigned int idx; 1098 1099#ifdef notyet 1100 struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt; 1101#endif 1102 if (tp) { 1103 tp->t_maxseg = pmtu - 40; 1104 if (tp->t_maxseg < td->mtus[0] - 40) 1105 tp->t_maxseg = td->mtus[0] - 40; 1106 idx = find_best_mtu(td, tp->t_maxseg + 40); 1107 1108 tp->t_maxseg = td->mtus[idx] - 40; 1109 } else 1110 idx = find_best_mtu(td, pmtu); 1111 1112 return (idx); 1113} 1114 1115static inline void 1116free_atid(struct t3cdev *cdev, unsigned int tid) 1117{ 1118 struct toepcb *toep = cxgb_free_atid(cdev, tid); 1119 1120 if (toep) 1121 toepcb_release(toep); 1122} 1123 1124/* 1125 * Release resources held by an offload connection (TID, L2T entry, etc.) 1126 */ 1127static void 1128t3_release_offload_resources(struct toepcb *toep) 1129{ 1130 struct tcpcb *tp = toep->tp_tp; 1131 struct toedev *tdev = toep->tp_toedev; 1132 struct t3cdev *cdev; 1133 struct socket *so; 1134 unsigned int tid = toep->tp_tid; 1135 struct sockbuf *rcv; 1136 1137 CTR0(KTR_TOM, "t3_release_offload_resources"); 1138 1139 if (!tdev) 1140 return; 1141 1142 cdev = TOEP_T3C_DEV(toep); 1143 if (!cdev) 1144 return; 1145 1146 toep->tp_qset = 0; 1147 t3_release_ddp_resources(toep); 1148 1149#ifdef CTRL_SKB_CACHE 1150 kfree_skb(CTRL_SKB_CACHE(tp)); 1151 CTRL_SKB_CACHE(tp) = NULL; 1152#endif 1153 1154 if (toep->tp_wr_avail != toep->tp_wr_max) { 1155 purge_wr_queue(toep); 1156 reset_wr_list(toep); 1157 } 1158 1159 if (toep->tp_l2t) { 1160 l2t_release(L2DATA(cdev), toep->tp_l2t); 1161 toep->tp_l2t = NULL; 1162 } 1163 toep->tp_tp = NULL; 1164 if (tp) { 1165 inp_lock_assert(tp->t_inpcb); 1166 so = inp_inpcbtosocket(tp->t_inpcb); 1167 rcv = so_sockbuf_rcv(so); 1168 /* 1169 * cancel any offloaded reads 1170 * 1171 */ 1172 sockbuf_lock(rcv); 1173 tp->t_toe = NULL; 1174 tp->t_flags &= ~TF_TOE; 1175 if (toep->tp_ddp_state.user_ddp_pending) { 1176 t3_cancel_ubuf(toep, rcv); 1177 toep->tp_ddp_state.user_ddp_pending = 0; 1178 } 1179 so_sorwakeup_locked(so); 1180 1181 } 1182 1183 if (toep->tp_state == TCPS_SYN_SENT) { 1184 free_atid(cdev, tid); 1185#ifdef notyet 1186 __skb_queue_purge(&tp->out_of_order_queue); 1187#endif 1188 } else { // we have TID 1189 cxgb_remove_tid(cdev, toep, tid); 1190 toepcb_release(toep); 1191 } 1192#if 0 1193 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state); 1194#endif 1195} 1196 1197static void 1198install_offload_ops(struct socket *so) 1199{ 1200 struct tcpcb *tp = so_sototcpcb(so); 1201 1202 KASSERT(tp->t_toe != NULL, ("toepcb not set")); 1203 1204 t3_install_socket_ops(so); 1205 tp->t_flags |= TF_TOE; 1206 tp->t_tu = &cxgb_toe_usrreqs; 1207} 1208 1209/* 1210 * Determine the receive window scaling factor given a target max 1211 * receive window. 1212 */ 1213static __inline int 1214select_rcv_wscale(int space, struct vnet *vnet) 1215{ 1216 int wscale = 0; 1217 1218 if (space > MAX_RCV_WND) 1219 space = MAX_RCV_WND; 1220 1221 if (V_tcp_do_rfc1323) 1222 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ; 1223 1224 return (wscale); 1225} 1226 1227/* 1228 * Determine the receive window size for a socket. 1229 */ 1230static unsigned long 1231select_rcv_wnd(struct toedev *dev, struct socket *so) 1232{ 1233 struct tom_data *d = TOM_DATA(dev); 1234 unsigned int wnd; 1235 unsigned int max_rcv_wnd; 1236 struct sockbuf *rcv; 1237 1238 rcv = so_sockbuf_rcv(so); 1239 1240 if (V_tcp_do_autorcvbuf) 1241 wnd = V_tcp_autorcvbuf_max; 1242 else 1243 wnd = rcv->sb_hiwat; 1244 1245 1246 1247 /* XXX 1248 * For receive coalescing to work effectively we need a receive window 1249 * that can accomodate a coalesced segment. 1250 */ 1251 if (wnd < MIN_RCV_WND) 1252 wnd = MIN_RCV_WND; 1253 1254 /* PR 5138 */ 1255 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ? 1256 (uint32_t)d->rx_page_size * 23 : 1257 MAX_RCV_WND); 1258 1259 return min(wnd, max_rcv_wnd); 1260} 1261 1262/* 1263 * Assign offload parameters to some socket fields. This code is used by 1264 * both active and passive opens. 1265 */ 1266static inline void 1267init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, 1268 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep) 1269{ 1270 struct tcpcb *tp = so_sototcpcb(so); 1271 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev); 1272 struct sockbuf *snd, *rcv; 1273 1274#ifdef notyet 1275 SOCK_LOCK_ASSERT(so); 1276#endif 1277 1278 snd = so_sockbuf_snd(so); 1279 rcv = so_sockbuf_rcv(so); 1280 1281 log(LOG_INFO, "initializing offload socket\n"); 1282 /* 1283 * We either need to fix push frames to work with sbcompress 1284 * or we need to add this 1285 */ 1286 snd->sb_flags |= SB_NOCOALESCE; 1287 rcv->sb_flags |= SB_NOCOALESCE; 1288 1289 tp->t_toe = toep; 1290 toep->tp_tp = tp; 1291 toep->tp_toedev = dev; 1292 1293 toep->tp_tid = tid; 1294 toep->tp_l2t = e; 1295 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs); 1296 toep->tp_wr_unacked = 0; 1297 toep->tp_delack_mode = 0; 1298 1299 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu); 1300 /* 1301 * XXX broken 1302 * 1303 */ 1304 tp->rcv_wnd = select_rcv_wnd(dev, so); 1305 1306 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) && 1307 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 1308 toep->tp_qset_idx = 0; 1309 1310 reset_wr_list(toep); 1311 DPRINTF("initialization done\n"); 1312} 1313 1314/* 1315 * The next two functions calculate the option 0 value for a socket. 1316 */ 1317static inline unsigned int 1318calc_opt0h(struct socket *so, int mtu_idx) 1319{ 1320 struct tcpcb *tp = so_sototcpcb(so); 1321 int wscale = select_rcv_wscale(tp->rcv_wnd, so->so_vnet); 1322 1323 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) | 1324 V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS | 1325 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx); 1326} 1327 1328static inline unsigned int 1329calc_opt0l(struct socket *so, int ulp_mode) 1330{ 1331 struct tcpcb *tp = so_sototcpcb(so); 1332 unsigned int val; 1333 1334 val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) | 1335 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ)); 1336 1337 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val); 1338 return (val); 1339} 1340 1341static inline unsigned int 1342calc_opt2(const struct socket *so, struct toedev *dev) 1343{ 1344 int flv_valid; 1345 1346 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1); 1347 1348 return (V_FLAVORS_VALID(flv_valid) | 1349 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0)); 1350} 1351 1352#if DEBUG_WR > 1 1353static int 1354count_pending_wrs(const struct toepcb *toep) 1355{ 1356 const struct mbuf *m; 1357 int n = 0; 1358 1359 wr_queue_walk(toep, m) 1360 n += m->m_pkthdr.csum_data; 1361 return (n); 1362} 1363#endif 1364 1365#if 0 1366(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1) 1367#endif 1368 1369static void 1370mk_act_open_req(struct socket *so, struct mbuf *m, 1371 unsigned int atid, const struct l2t_entry *e) 1372{ 1373 struct cpl_act_open_req *req; 1374 struct inpcb *inp = so_sotoinpcb(so); 1375 struct tcpcb *tp = inp_inpcbtotcpcb(inp); 1376 struct toepcb *toep = tp->t_toe; 1377 struct toedev *tdev = toep->tp_toedev; 1378 1379 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep)); 1380 1381 req = mtod(m, struct cpl_act_open_req *); 1382 m->m_pkthdr.len = m->m_len = sizeof(*req); 1383 1384 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1385 req->wr.wr_lo = 0; 1386 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 1387 inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port); 1388#if 0 1389 req->local_port = inp->inp_lport; 1390 req->peer_port = inp->inp_fport; 1391 memcpy(&req->local_ip, &inp->inp_laddr, 4); 1392 memcpy(&req->peer_ip, &inp->inp_faddr, 4); 1393#endif 1394 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) | 1395 V_TX_CHANNEL(e->smt_idx)); 1396 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode)); 1397 req->params = 0; 1398 req->opt2 = htonl(calc_opt2(so, tdev)); 1399} 1400 1401 1402/* 1403 * Convert an ACT_OPEN_RPL status to an errno. 1404 */ 1405static int 1406act_open_rpl_status_to_errno(int status) 1407{ 1408 switch (status) { 1409 case CPL_ERR_CONN_RESET: 1410 return (ECONNREFUSED); 1411 case CPL_ERR_ARP_MISS: 1412 return (EHOSTUNREACH); 1413 case CPL_ERR_CONN_TIMEDOUT: 1414 return (ETIMEDOUT); 1415 case CPL_ERR_TCAM_FULL: 1416 return (ENOMEM); 1417 case CPL_ERR_CONN_EXIST: 1418 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); 1419 return (EADDRINUSE); 1420 default: 1421 return (EIO); 1422 } 1423} 1424 1425static void 1426fail_act_open(struct toepcb *toep, int errno) 1427{ 1428 struct tcpcb *tp = toep->tp_tp; 1429 1430 t3_release_offload_resources(toep); 1431 if (tp) { 1432 inp_wunlock(tp->t_inpcb); 1433 tcp_offload_drop(tp, errno); 1434 } 1435 1436#ifdef notyet 1437 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1438#endif 1439} 1440 1441/* 1442 * Handle active open failures. 1443 */ 1444static void 1445active_open_failed(struct toepcb *toep, struct mbuf *m) 1446{ 1447 struct cpl_act_open_rpl *rpl = cplhdr(m); 1448 struct inpcb *inp; 1449 1450 if (toep->tp_tp == NULL) 1451 goto done; 1452 1453 inp = toep->tp_tp->t_inpcb; 1454 1455/* 1456 * Don't handle connection retry for now 1457 */ 1458#ifdef notyet 1459 struct inet_connection_sock *icsk = inet_csk(sk); 1460 1461 if (rpl->status == CPL_ERR_CONN_EXIST && 1462 icsk->icsk_retransmit_timer.function != act_open_retry_timer) { 1463 icsk->icsk_retransmit_timer.function = act_open_retry_timer; 1464 sk_reset_timer(so, &icsk->icsk_retransmit_timer, 1465 jiffies + HZ / 2); 1466 } else 1467#endif 1468 { 1469 inp_wlock(inp); 1470 /* 1471 * drops the inpcb lock 1472 */ 1473 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status)); 1474 } 1475 1476 done: 1477 m_free(m); 1478} 1479 1480/* 1481 * Return whether a failed active open has allocated a TID 1482 */ 1483static inline int 1484act_open_has_tid(int status) 1485{ 1486 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && 1487 status != CPL_ERR_ARP_MISS; 1488} 1489 1490/* 1491 * Process an ACT_OPEN_RPL CPL message. 1492 */ 1493static int 1494do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1495{ 1496 struct toepcb *toep = (struct toepcb *)ctx; 1497 struct cpl_act_open_rpl *rpl = cplhdr(m); 1498 1499 if (cdev->type != T3A && act_open_has_tid(rpl->status)) 1500 cxgb_queue_tid_release(cdev, GET_TID(rpl)); 1501 1502 active_open_failed(toep, m); 1503 return (0); 1504} 1505 1506/* 1507 * Handle an ARP failure for an active open. XXX purge ofo queue 1508 * 1509 * XXX badly broken for crossed SYNs as the ATID is no longer valid. 1510 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should 1511 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't 1512 * free the atid. Hmm. 1513 */ 1514#ifdef notyet 1515static void 1516act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) 1517{ 1518 struct toepcb *toep = m_get_toep(m); 1519 struct tcpcb *tp = toep->tp_tp; 1520 struct inpcb *inp = tp->t_inpcb; 1521 struct socket *so; 1522 1523 inp_wlock(inp); 1524 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 1525 /* 1526 * drops the inpcb lock 1527 */ 1528 fail_act_open(so, EHOSTUNREACH); 1529 printf("freeing %p\n", m); 1530 1531 m_free(m); 1532 } else 1533 inp_wunlock(inp); 1534} 1535#endif 1536/* 1537 * Send an active open request. 1538 */ 1539int 1540t3_connect(struct toedev *tdev, struct socket *so, 1541 struct rtentry *rt, struct sockaddr *nam) 1542{ 1543 struct mbuf *m; 1544 struct l2t_entry *e; 1545 struct tom_data *d = TOM_DATA(tdev); 1546 struct inpcb *inp = so_sotoinpcb(so); 1547 struct tcpcb *tp = intotcpcb(inp); 1548 struct toepcb *toep; /* allocated by init_offload_socket */ 1549 1550 int atid; 1551 1552 toep = toepcb_alloc(); 1553 if (toep == NULL) 1554 goto out_err; 1555 1556 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0) 1557 goto out_err; 1558 1559 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam); 1560 if (!e) 1561 goto free_tid; 1562 1563 inp_lock_assert(inp); 1564 m = m_gethdr(MT_DATA, M_WAITOK); 1565 1566#if 0 1567 m->m_toe.mt_toepcb = tp->t_toe; 1568 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure); 1569#endif 1570 so_lock(so); 1571 1572 init_offload_socket(so, tdev, atid, e, rt, toep); 1573 1574 install_offload_ops(so); 1575 1576 mk_act_open_req(so, m, atid, e); 1577 so_unlock(so); 1578 1579 soisconnecting(so); 1580 toep = tp->t_toe; 1581 m_set_toep(m, tp->t_toe); 1582 1583 toep->tp_state = TCPS_SYN_SENT; 1584 l2t_send(d->cdev, (struct mbuf *)m, e); 1585 1586 if (toep->tp_ulp_mode) 1587 t3_enable_ddp(toep, 0); 1588 return (0); 1589 1590free_tid: 1591 printf("failing connect - free atid\n"); 1592 1593 free_atid(d->cdev, atid); 1594out_err: 1595 printf("return ENOMEM\n"); 1596 return (ENOMEM); 1597} 1598 1599/* 1600 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do 1601 * not send multiple ABORT_REQs for the same connection and also that we do 1602 * not try to send a message after the connection has closed. Returns 1 if 1603 * an ABORT_REQ wasn't generated after all, 0 otherwise. 1604 */ 1605static void 1606t3_send_reset(struct toepcb *toep) 1607{ 1608 1609 struct cpl_abort_req *req; 1610 unsigned int tid = toep->tp_tid; 1611 int mode = CPL_ABORT_SEND_RST; 1612 struct tcpcb *tp = toep->tp_tp; 1613 struct toedev *tdev = toep->tp_toedev; 1614 struct socket *so = NULL; 1615 struct mbuf *m; 1616 struct sockbuf *snd; 1617 1618 if (tp) { 1619 inp_lock_assert(tp->t_inpcb); 1620 so = inp_inpcbtosocket(tp->t_inpcb); 1621 } 1622 1623 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) || 1624 tdev == NULL)) 1625 return; 1626 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN); 1627 1628 snd = so_sockbuf_snd(so); 1629 /* Purge the send queue so we don't send anything after an abort. */ 1630 if (so) 1631 sbflush(snd); 1632 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev)) 1633 mode |= CPL_ABORT_POST_CLOSE_REQ; 1634 1635 m = m_gethdr_nofail(sizeof(*req)); 1636 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep)); 1637 set_arp_failure_handler(m, abort_arp_failure); 1638 1639 req = mtod(m, struct cpl_abort_req *); 1640 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); 1641 req->wr.wr_lo = htonl(V_WR_TID(tid)); 1642 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); 1643 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0; 1644 req->rsvd1 = !(toep->tp_flags & TP_DATASENT); 1645 req->cmd = mode; 1646 if (tp && (tp->t_state == TCPS_SYN_SENT)) 1647 mbufq_tail(&toep->out_of_order_queue, m); // defer 1648 else 1649 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); 1650} 1651 1652static int 1653t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) 1654{ 1655 struct inpcb *inp; 1656 int error, optval; 1657 1658 if (sopt->sopt_name == IP_OPTIONS) 1659 return (ENOPROTOOPT); 1660 1661 if (sopt->sopt_name != IP_TOS) 1662 return (EOPNOTSUPP); 1663 1664 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 1665 1666 if (error) 1667 return (error); 1668 1669 if (optval > IPTOS_PREC_CRITIC_ECP) 1670 return (EINVAL); 1671 1672 inp = so_sotoinpcb(so); 1673 inp_wlock(inp); 1674 inp_ip_tos_set(inp, optval); 1675#if 0 1676 inp->inp_ip_tos = optval; 1677#endif 1678 t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe); 1679 inp_wunlock(inp); 1680 1681 return (0); 1682} 1683 1684static int 1685t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1686{ 1687 int err = 0; 1688 size_t copied; 1689 1690 if (sopt->sopt_name != TCP_CONGESTION && 1691 sopt->sopt_name != TCP_NODELAY) 1692 return (EOPNOTSUPP); 1693 1694 if (sopt->sopt_name == TCP_CONGESTION) { 1695 char name[TCP_CA_NAME_MAX]; 1696 int optlen = sopt->sopt_valsize; 1697 struct tcpcb *tp; 1698 1699 if (sopt->sopt_dir == SOPT_GET) { 1700 KASSERT(0, ("unimplemented")); 1701 return (EOPNOTSUPP); 1702 } 1703 1704 if (optlen < 1) 1705 return (EINVAL); 1706 1707 err = copyinstr(sopt->sopt_val, name, 1708 min(TCP_CA_NAME_MAX - 1, optlen), &copied); 1709 if (err) 1710 return (err); 1711 if (copied < 1) 1712 return (EINVAL); 1713 1714 tp = so_sototcpcb(so); 1715 /* 1716 * XXX I need to revisit this 1717 */ 1718 if ((err = t3_set_cong_control(so, name)) == 0) { 1719#ifdef CONGESTION_CONTROL_SUPPORTED 1720 tp->t_cong_control = strdup(name, M_CXGB); 1721#endif 1722 } else 1723 return (err); 1724 } else { 1725 int optval, oldval; 1726 struct inpcb *inp; 1727 struct tcpcb *tp; 1728 1729 if (sopt->sopt_dir == SOPT_GET) 1730 return (EOPNOTSUPP); 1731 1732 err = sooptcopyin(sopt, &optval, sizeof optval, 1733 sizeof optval); 1734 1735 if (err) 1736 return (err); 1737 1738 inp = so_sotoinpcb(so); 1739 inp_wlock(inp); 1740 tp = inp_inpcbtotcpcb(inp); 1741 1742 oldval = tp->t_flags; 1743 if (optval) 1744 tp->t_flags |= TF_NODELAY; 1745 else 1746 tp->t_flags &= ~TF_NODELAY; 1747 inp_wunlock(inp); 1748 1749 1750 if (oldval != tp->t_flags && (tp->t_toe != NULL)) 1751 t3_set_nagle(tp->t_toe); 1752 1753 } 1754 1755 return (0); 1756} 1757 1758int 1759t3_ctloutput(struct socket *so, struct sockopt *sopt) 1760{ 1761 int err; 1762 1763 if (sopt->sopt_level != IPPROTO_TCP) 1764 err = t3_ip_ctloutput(so, sopt); 1765 else 1766 err = t3_tcp_ctloutput(so, sopt); 1767 1768 if (err != EOPNOTSUPP) 1769 return (err); 1770 1771 return (tcp_ctloutput(so, sopt)); 1772} 1773 1774/* 1775 * Returns true if we need to explicitly request RST when we receive new data 1776 * on an RX-closed connection. 1777 */ 1778static inline int 1779need_rst_on_excess_rx(const struct toepcb *toep) 1780{ 1781 return (1); 1782} 1783 1784/* 1785 * Handles Rx data that arrives in a state where the socket isn't accepting 1786 * new data. 1787 */ 1788static void 1789handle_excess_rx(struct toepcb *toep, struct mbuf *m) 1790{ 1791 1792 if (need_rst_on_excess_rx(toep) && 1793 !(toep->tp_flags & TP_ABORT_SHUTDOWN)) 1794 t3_send_reset(toep); 1795 m_freem(m); 1796} 1797 1798/* 1799 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE) 1800 * by getting the DDP offset from the TCB. 1801 */ 1802static void 1803tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) 1804{ 1805 struct ddp_state *q = &toep->tp_ddp_state; 1806 struct ddp_buf_state *bsp; 1807 struct cpl_get_tcb_rpl *hdr; 1808 unsigned int ddp_offset; 1809 struct socket *so; 1810 struct tcpcb *tp; 1811 struct sockbuf *rcv; 1812 int state; 1813 1814 uint64_t t; 1815 __be64 *tcb; 1816 1817 tp = toep->tp_tp; 1818 so = inp_inpcbtosocket(tp->t_inpcb); 1819 1820 inp_lock_assert(tp->t_inpcb); 1821 rcv = so_sockbuf_rcv(so); 1822 sockbuf_lock(rcv); 1823 1824 /* Note that we only accout for CPL_GET_TCB issued by the DDP code. 1825 * We really need a cookie in order to dispatch the RPLs. 1826 */ 1827 q->get_tcb_count--; 1828 1829 /* It is a possible that a previous CPL already invalidated UBUF DDP 1830 * and moved the cur_buf idx and hence no further processing of this 1831 * skb is required. However, the app might be sleeping on 1832 * !q->get_tcb_count and we need to wake it up. 1833 */ 1834 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) { 1835 int state = so_state_get(so); 1836 1837 m_freem(m); 1838 if (__predict_true((state & SS_NOFDREF) == 0)) 1839 so_sorwakeup_locked(so); 1840 else 1841 sockbuf_unlock(rcv); 1842 1843 return; 1844 } 1845 1846 bsp = &q->buf_state[q->cur_buf]; 1847 hdr = cplhdr(m); 1848 tcb = (__be64 *)(hdr + 1); 1849 if (q->cur_buf == 0) { 1850 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]); 1851 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET); 1852 } else { 1853 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]); 1854 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET; 1855 } 1856 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET; 1857 m->m_cur_offset = bsp->cur_offset; 1858 bsp->cur_offset = ddp_offset; 1859 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset; 1860 1861 CTR5(KTR_TOM, 1862 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u", 1863 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset); 1864 KASSERT(ddp_offset >= m->m_cur_offset, 1865 ("ddp_offset=%u less than cur_offset=%u", 1866 ddp_offset, m->m_cur_offset)); 1867 1868#if 0 1869{ 1870 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx; 1871 1872 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]); 1873 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS; 1874 1875 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]); 1876 rcv_nxt = t >> S_TCB_RCV_NXT; 1877 rcv_nxt &= M_TCB_RCV_NXT; 1878 1879 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]); 1880 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET); 1881 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET; 1882 1883 T3_TRACE2(TIDTB(sk), 1884 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x", 1885 ddp_flags, rcv_nxt - rx_hdr_offset); 1886 T3_TRACE4(TB(q), 1887 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u", 1888 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf); 1889 T3_TRACE3(TB(q), 1890 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u", 1891 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset); 1892 T3_TRACE2(TB(q), 1893 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x", 1894 q->buf_state[0].flags, q->buf_state[1].flags); 1895 1896} 1897#endif 1898 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) { 1899 handle_excess_rx(toep, m); 1900 return; 1901 } 1902 1903#ifdef T3_TRACE 1904 if ((int)m->m_pkthdr.len < 0) { 1905 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len"); 1906 } 1907#endif 1908 if (bsp->flags & DDP_BF_NOCOPY) { 1909#ifdef T3_TRACE 1910 T3_TRACE0(TB(q), 1911 "tcb_rpl_as_ddp_complete: CANCEL UBUF"); 1912 1913 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) { 1914 printk("!cancel_ubuf"); 1915 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf"); 1916 } 1917#endif 1918 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1; 1919 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA); 1920 q->cur_buf ^= 1; 1921 } else if (bsp->flags & DDP_BF_NOFLIP) { 1922 1923 m->m_ddp_flags = 1; /* always a kernel buffer */ 1924 1925 /* now HW buffer carries a user buffer */ 1926 bsp->flags &= ~DDP_BF_NOFLIP; 1927 bsp->flags |= DDP_BF_NOCOPY; 1928 1929 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate 1930 * any new data in which case we're done. If in addition the 1931 * offset is 0, then there wasn't a completion for the kbuf 1932 * and we need to decrement the posted count. 1933 */ 1934 if (m->m_pkthdr.len == 0) { 1935 if (ddp_offset == 0) { 1936 q->kbuf_posted--; 1937 bsp->flags |= DDP_BF_NODATA; 1938 } 1939 sockbuf_unlock(rcv); 1940 m_free(m); 1941 return; 1942 } 1943 } else { 1944 sockbuf_unlock(rcv); 1945 1946 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP, 1947 * but it got here way late and nobody cares anymore. 1948 */ 1949 m_free(m); 1950 return; 1951 } 1952 1953 m->m_ddp_gl = (unsigned char *)bsp->gl; 1954 m->m_flags |= M_DDP; 1955 m->m_seq = tp->rcv_nxt; 1956 tp->rcv_nxt += m->m_pkthdr.len; 1957 tp->t_rcvtime = ticks; 1958 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u", 1959 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1960 if (m->m_pkthdr.len == 0) { 1961 q->user_ddp_pending = 0; 1962 m_free(m); 1963 } else 1964 SBAPPEND(rcv, m); 1965 1966 state = so_state_get(so); 1967 if (__predict_true((state & SS_NOFDREF) == 0)) 1968 so_sorwakeup_locked(so); 1969 else 1970 sockbuf_unlock(rcv); 1971} 1972 1973/* 1974 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code, 1975 * in that case they are similar to DDP completions. 1976 */ 1977static int 1978do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1979{ 1980 struct toepcb *toep = (struct toepcb *)ctx; 1981 1982 /* OK if socket doesn't exist */ 1983 if (toep == NULL) { 1984 printf("null toep in do_get_tcb_rpl\n"); 1985 return (CPL_RET_BUF_DONE); 1986 } 1987 1988 inp_wlock(toep->tp_tp->t_inpcb); 1989 tcb_rpl_as_ddp_complete(toep, m); 1990 inp_wunlock(toep->tp_tp->t_inpcb); 1991 1992 return (0); 1993} 1994 1995static void 1996handle_ddp_data(struct toepcb *toep, struct mbuf *m) 1997{ 1998 struct tcpcb *tp = toep->tp_tp; 1999 struct socket *so; 2000 struct ddp_state *q; 2001 struct ddp_buf_state *bsp; 2002 struct cpl_rx_data *hdr = cplhdr(m); 2003 unsigned int rcv_nxt = ntohl(hdr->seq); 2004 struct sockbuf *rcv; 2005 2006 if (tp->rcv_nxt == rcv_nxt) 2007 return; 2008 2009 inp_lock_assert(tp->t_inpcb); 2010 so = inp_inpcbtosocket(tp->t_inpcb); 2011 rcv = so_sockbuf_rcv(so); 2012 sockbuf_lock(rcv); 2013 2014 q = &toep->tp_ddp_state; 2015 bsp = &q->buf_state[q->cur_buf]; 2016 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x", 2017 rcv_nxt, tp->rcv_nxt)); 2018 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2019 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2020 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d", 2021 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len); 2022 2023#ifdef T3_TRACE 2024 if ((int)m->m_pkthdr.len < 0) { 2025 t3_ddp_error(so, "handle_ddp_data: neg len"); 2026 } 2027#endif 2028 m->m_ddp_gl = (unsigned char *)bsp->gl; 2029 m->m_flags |= M_DDP; 2030 m->m_cur_offset = bsp->cur_offset; 2031 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2032 if (bsp->flags & DDP_BF_NOCOPY) 2033 bsp->flags &= ~DDP_BF_NOCOPY; 2034 2035 m->m_seq = tp->rcv_nxt; 2036 tp->rcv_nxt = rcv_nxt; 2037 bsp->cur_offset += m->m_pkthdr.len; 2038 if (!(bsp->flags & DDP_BF_NOFLIP)) 2039 q->cur_buf ^= 1; 2040 /* 2041 * For now, don't re-enable DDP after a connection fell out of DDP 2042 * mode. 2043 */ 2044 q->ubuf_ddp_ready = 0; 2045 sockbuf_unlock(rcv); 2046} 2047 2048/* 2049 * Process new data received for a connection. 2050 */ 2051static void 2052new_rx_data(struct toepcb *toep, struct mbuf *m) 2053{ 2054 struct cpl_rx_data *hdr = cplhdr(m); 2055 struct tcpcb *tp = toep->tp_tp; 2056 struct socket *so; 2057 struct sockbuf *rcv; 2058 int state; 2059 int len = be16toh(hdr->len); 2060 2061 inp_wlock(tp->t_inpcb); 2062 2063 so = inp_inpcbtosocket(tp->t_inpcb); 2064 2065 if (__predict_false(so_no_receive(so))) { 2066 handle_excess_rx(toep, m); 2067 inp_wunlock(tp->t_inpcb); 2068 TRACE_EXIT; 2069 return; 2070 } 2071 2072 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) 2073 handle_ddp_data(toep, m); 2074 2075 m->m_seq = ntohl(hdr->seq); 2076 m->m_ulp_mode = 0; /* for iSCSI */ 2077 2078#if VALIDATE_SEQ 2079 if (__predict_false(m->m_seq != tp->rcv_nxt)) { 2080 log(LOG_ERR, 2081 "%s: TID %u: Bad sequence number %u, expected %u\n", 2082 toep->tp_toedev->name, toep->tp_tid, m->m_seq, 2083 tp->rcv_nxt); 2084 m_freem(m); 2085 inp_wunlock(tp->t_inpcb); 2086 return; 2087 } 2088#endif 2089 m_adj(m, sizeof(*hdr)); 2090 2091#ifdef URGENT_DATA_SUPPORTED 2092 /* 2093 * We don't handle urgent data yet 2094 */ 2095 if (__predict_false(hdr->urg)) 2096 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg)); 2097 if (__predict_false(tp->urg_data == TCP_URG_NOTYET && 2098 tp->urg_seq - tp->rcv_nxt < skb->len)) 2099 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq - 2100 tp->rcv_nxt]; 2101#endif 2102 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) { 2103 toep->tp_delack_mode = hdr->dack_mode; 2104 toep->tp_delack_seq = tp->rcv_nxt; 2105 } 2106 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d", 2107 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes); 2108 2109 if (len < m->m_pkthdr.len) 2110 m->m_pkthdr.len = m->m_len = len; 2111 2112 tp->rcv_nxt += m->m_pkthdr.len; 2113 tp->t_rcvtime = ticks; 2114 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2115 CTR2(KTR_TOM, 2116 "new_rx_data: seq 0x%x len %u", 2117 m->m_seq, m->m_pkthdr.len); 2118 inp_wunlock(tp->t_inpcb); 2119 rcv = so_sockbuf_rcv(so); 2120 sockbuf_lock(rcv); 2121#if 0 2122 if (sb_notify(rcv)) 2123 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len); 2124#endif 2125 SBAPPEND(rcv, m); 2126 2127#ifdef notyet 2128 /* 2129 * We're giving too many credits to the card - but disable this check so we can keep on moving :-| 2130 * 2131 */ 2132 KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1), 2133 2134 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d", 2135 so, rcv->sb_cc, rcv->sb_mbmax)); 2136#endif 2137 2138 2139 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d", 2140 rcv->sb_cc, rcv->sb_mbcnt); 2141 2142 state = so_state_get(so); 2143 if (__predict_true((state & SS_NOFDREF) == 0)) 2144 so_sorwakeup_locked(so); 2145 else 2146 sockbuf_unlock(rcv); 2147} 2148 2149/* 2150 * Handler for RX_DATA CPL messages. 2151 */ 2152static int 2153do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2154{ 2155 struct toepcb *toep = (struct toepcb *)ctx; 2156 2157 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len); 2158 2159 new_rx_data(toep, m); 2160 2161 return (0); 2162} 2163 2164static void 2165new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) 2166{ 2167 struct tcpcb *tp; 2168 struct ddp_state *q; 2169 struct ddp_buf_state *bsp; 2170 struct cpl_rx_data_ddp *hdr; 2171 struct socket *so; 2172 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx; 2173 int nomoredata = 0; 2174 unsigned int delack_mode; 2175 struct sockbuf *rcv; 2176 2177 tp = toep->tp_tp; 2178 inp_wlock(tp->t_inpcb); 2179 so = inp_inpcbtosocket(tp->t_inpcb); 2180 2181 if (__predict_false(so_no_receive(so))) { 2182 2183 handle_excess_rx(toep, m); 2184 inp_wunlock(tp->t_inpcb); 2185 return; 2186 } 2187 2188 q = &toep->tp_ddp_state; 2189 hdr = cplhdr(m); 2190 ddp_report = ntohl(hdr->u.ddp_report); 2191 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2192 bsp = &q->buf_state[buf_idx]; 2193 2194 CTR4(KTR_TOM, 2195 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2196 "hdr seq 0x%x len %u", 2197 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2198 ntohs(hdr->len)); 2199 CTR3(KTR_TOM, 2200 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d", 2201 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx); 2202 2203 ddp_len = ntohs(hdr->len); 2204 rcv_nxt = ntohl(hdr->seq) + ddp_len; 2205 2206 delack_mode = G_DDP_DACK_MODE(ddp_report); 2207 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2208 toep->tp_delack_mode = delack_mode; 2209 toep->tp_delack_seq = tp->rcv_nxt; 2210 } 2211 2212 m->m_seq = tp->rcv_nxt; 2213 tp->rcv_nxt = rcv_nxt; 2214 2215 tp->t_rcvtime = ticks; 2216 /* 2217 * Store the length in m->m_len. We are changing the meaning of 2218 * m->m_len here, we need to be very careful that nothing from now on 2219 * interprets ->len of this packet the usual way. 2220 */ 2221 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq; 2222 inp_wunlock(tp->t_inpcb); 2223 CTR3(KTR_TOM, 2224 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ", 2225 m->m_len, rcv_nxt, m->m_seq); 2226 /* 2227 * Figure out where the new data was placed in the buffer and store it 2228 * in when. Assumes the buffer offset starts at 0, consumer needs to 2229 * account for page pod's pg_offset. 2230 */ 2231 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; 2232 m->m_cur_offset = end_offset - m->m_pkthdr.len; 2233 2234 rcv = so_sockbuf_rcv(so); 2235 sockbuf_lock(rcv); 2236 2237 m->m_ddp_gl = (unsigned char *)bsp->gl; 2238 m->m_flags |= M_DDP; 2239 bsp->cur_offset = end_offset; 2240 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2241 2242 /* 2243 * Length is only meaningful for kbuf 2244 */ 2245 if (!(bsp->flags & DDP_BF_NOCOPY)) 2246 KASSERT(m->m_len <= bsp->gl->dgl_length, 2247 ("length received exceeds ddp pages: len=%d dgl_length=%d", 2248 m->m_len, bsp->gl->dgl_length)); 2249 2250 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2251 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next)); 2252 /* 2253 * Bit 0 of flags stores whether the DDP buffer is completed. 2254 * Note that other parts of the code depend on this being in bit 0. 2255 */ 2256 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) { 2257 panic("spurious ddp completion"); 2258 } else { 2259 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); 2260 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) 2261 q->cur_buf ^= 1; /* flip buffers */ 2262 } 2263 2264 if (bsp->flags & DDP_BF_NOCOPY) { 2265 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY); 2266 bsp->flags &= ~DDP_BF_NOCOPY; 2267 } 2268 2269 if (ddp_report & F_DDP_PSH) 2270 m->m_ddp_flags |= DDP_BF_PSH; 2271 if (nomoredata) 2272 m->m_ddp_flags |= DDP_BF_NODATA; 2273 2274#ifdef notyet 2275 skb_reset_transport_header(skb); 2276 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */ 2277#endif 2278 SBAPPEND(rcv, m); 2279 2280 if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) || 2281 (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1)) 2282 || !(m->m_ddp_flags & DDP_BF_NOCOPY)))) 2283 so_sorwakeup_locked(so); 2284 else 2285 sockbuf_unlock(rcv); 2286} 2287 2288#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ 2289 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ 2290 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ 2291 F_DDP_INVALID_PPOD) 2292 2293/* 2294 * Handler for RX_DATA_DDP CPL messages. 2295 */ 2296static int 2297do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2298{ 2299 struct toepcb *toep = ctx; 2300 const struct cpl_rx_data_ddp *hdr = cplhdr(m); 2301 2302 VALIDATE_SOCK(so); 2303 2304 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) { 2305 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n", 2306 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status))); 2307 return (CPL_RET_BUF_DONE); 2308 } 2309#if 0 2310 skb->h.th = tcphdr_skb->h.th; 2311#endif 2312 new_rx_data_ddp(toep, m); 2313 return (0); 2314} 2315 2316static void 2317process_ddp_complete(struct toepcb *toep, struct mbuf *m) 2318{ 2319 struct tcpcb *tp = toep->tp_tp; 2320 struct socket *so; 2321 struct ddp_state *q; 2322 struct ddp_buf_state *bsp; 2323 struct cpl_rx_ddp_complete *hdr; 2324 unsigned int ddp_report, buf_idx, when, delack_mode; 2325 int nomoredata = 0; 2326 struct sockbuf *rcv; 2327 2328 inp_wlock(tp->t_inpcb); 2329 so = inp_inpcbtosocket(tp->t_inpcb); 2330 2331 if (__predict_false(so_no_receive(so))) { 2332 struct inpcb *inp = so_sotoinpcb(so); 2333 2334 handle_excess_rx(toep, m); 2335 inp_wunlock(inp); 2336 return; 2337 } 2338 q = &toep->tp_ddp_state; 2339 hdr = cplhdr(m); 2340 ddp_report = ntohl(hdr->ddp_report); 2341 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2342 m->m_pkthdr.csum_data = tp->rcv_nxt; 2343 2344 rcv = so_sockbuf_rcv(so); 2345 sockbuf_lock(rcv); 2346 2347 bsp = &q->buf_state[buf_idx]; 2348 when = bsp->cur_offset; 2349 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; 2350 tp->rcv_nxt += m->m_len; 2351 tp->t_rcvtime = ticks; 2352 2353 delack_mode = G_DDP_DACK_MODE(ddp_report); 2354 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2355 toep->tp_delack_mode = delack_mode; 2356 toep->tp_delack_seq = tp->rcv_nxt; 2357 } 2358#ifdef notyet 2359 skb_reset_transport_header(skb); 2360 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2361#endif 2362 inp_wunlock(tp->t_inpcb); 2363 2364 KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2365 CTR5(KTR_TOM, 2366 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2367 "ddp_report 0x%x offset %u, len %u", 2368 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2369 G_DDP_OFFSET(ddp_report), m->m_len); 2370 2371 m->m_cur_offset = bsp->cur_offset; 2372 bsp->cur_offset += m->m_len; 2373 2374 if (!(bsp->flags & DDP_BF_NOFLIP)) { 2375 q->cur_buf ^= 1; /* flip buffers */ 2376 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length) 2377 nomoredata=1; 2378 } 2379 2380 CTR4(KTR_TOM, 2381 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2382 "ddp_report %u offset %u", 2383 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2384 G_DDP_OFFSET(ddp_report)); 2385 2386 m->m_ddp_gl = (unsigned char *)bsp->gl; 2387 m->m_flags |= M_DDP; 2388 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; 2389 if (bsp->flags & DDP_BF_NOCOPY) 2390 bsp->flags &= ~DDP_BF_NOCOPY; 2391 if (nomoredata) 2392 m->m_ddp_flags |= DDP_BF_NODATA; 2393 2394 SBAPPEND(rcv, m); 2395 if ((so_state_get(so) & SS_NOFDREF) == 0) 2396 so_sorwakeup_locked(so); 2397 else 2398 sockbuf_unlock(rcv); 2399} 2400 2401/* 2402 * Handler for RX_DDP_COMPLETE CPL messages. 2403 */ 2404static int 2405do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2406{ 2407 struct toepcb *toep = ctx; 2408 2409 VALIDATE_SOCK(so); 2410#if 0 2411 skb->h.th = tcphdr_skb->h.th; 2412#endif 2413 process_ddp_complete(toep, m); 2414 return (0); 2415} 2416 2417/* 2418 * Move a socket to TIME_WAIT state. We need to make some adjustments to the 2419 * socket state before calling tcp_time_wait to comply with its expectations. 2420 */ 2421static void 2422enter_timewait(struct tcpcb *tp) 2423{ 2424 /* 2425 * Bump rcv_nxt for the peer FIN. We don't do this at the time we 2426 * process peer_close because we don't want to carry the peer FIN in 2427 * the socket's receive queue and if we increment rcv_nxt without 2428 * having the FIN in the receive queue we'll confuse facilities such 2429 * as SIOCINQ. 2430 */ 2431 inp_wlock(tp->t_inpcb); 2432 tp->rcv_nxt++; 2433 2434 tp->ts_recent_age = 0; /* defeat recycling */ 2435 tp->t_srtt = 0; /* defeat tcp_update_metrics */ 2436 inp_wunlock(tp->t_inpcb); 2437 tcp_offload_twstart(tp); 2438} 2439 2440/* 2441 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This 2442 * function deals with the data that may be reported along with the FIN. 2443 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to 2444 * perform normal FIN-related processing. In the latter case 1 indicates that 2445 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the 2446 * skb can be freed. 2447 */ 2448static int 2449handle_peer_close_data(struct socket *so, struct mbuf *m) 2450{ 2451 struct tcpcb *tp = so_sototcpcb(so); 2452 struct toepcb *toep = tp->t_toe; 2453 struct ddp_state *q; 2454 struct ddp_buf_state *bsp; 2455 struct cpl_peer_close *req = cplhdr(m); 2456 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */ 2457 struct sockbuf *rcv; 2458 2459 if (tp->rcv_nxt == rcv_nxt) /* no data */ 2460 return (0); 2461 2462 CTR0(KTR_TOM, "handle_peer_close_data"); 2463 if (__predict_false(so_no_receive(so))) { 2464 handle_excess_rx(toep, m); 2465 2466 /* 2467 * Although we discard the data we want to process the FIN so 2468 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP + 2469 * PEER_CLOSE without data. In particular this PEER_CLOSE 2470 * may be what will close the connection. We return 1 because 2471 * handle_excess_rx() already freed the packet. 2472 */ 2473 return (1); 2474 } 2475 2476 inp_lock_assert(tp->t_inpcb); 2477 q = &toep->tp_ddp_state; 2478 rcv = so_sockbuf_rcv(so); 2479 sockbuf_lock(rcv); 2480 2481 bsp = &q->buf_state[q->cur_buf]; 2482 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2483 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2484 m->m_ddp_gl = (unsigned char *)bsp->gl; 2485 m->m_flags |= M_DDP; 2486 m->m_cur_offset = bsp->cur_offset; 2487 m->m_ddp_flags = 2488 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2489 m->m_seq = tp->rcv_nxt; 2490 tp->rcv_nxt = rcv_nxt; 2491 bsp->cur_offset += m->m_pkthdr.len; 2492 if (!(bsp->flags & DDP_BF_NOFLIP)) 2493 q->cur_buf ^= 1; 2494#ifdef notyet 2495 skb_reset_transport_header(skb); 2496 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2497#endif 2498 tp->t_rcvtime = ticks; 2499 SBAPPEND(rcv, m); 2500 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) 2501 so_sorwakeup_locked(so); 2502 else 2503 sockbuf_unlock(rcv); 2504 2505 return (1); 2506} 2507 2508/* 2509 * Handle a peer FIN. 2510 */ 2511static void 2512do_peer_fin(struct toepcb *toep, struct mbuf *m) 2513{ 2514 struct socket *so; 2515 struct tcpcb *tp = toep->tp_tp; 2516 int keep, action; 2517 2518 action = keep = 0; 2519 CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state); 2520 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2521 printf("abort_pending set\n"); 2522 2523 goto out; 2524 } 2525 inp_wlock(tp->t_inpcb); 2526 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 2527 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) { 2528 keep = handle_peer_close_data(so, m); 2529 if (keep < 0) { 2530 inp_wunlock(tp->t_inpcb); 2531 return; 2532 } 2533 } 2534 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 2535 CTR1(KTR_TOM, 2536 "waking up waiters for cantrcvmore on %p ", so); 2537 socantrcvmore(so); 2538 2539 /* 2540 * If connection is half-synchronized 2541 * (ie NEEDSYN flag on) then delay ACK, 2542 * so it may be piggybacked when SYN is sent. 2543 * Otherwise, since we received a FIN then no 2544 * more input can be expected, send ACK now. 2545 */ 2546 if (tp->t_flags & TF_NEEDSYN) 2547 tp->t_flags |= TF_DELACK; 2548 else 2549 tp->t_flags |= TF_ACKNOW; 2550 tp->rcv_nxt++; 2551 } 2552 2553 switch (tp->t_state) { 2554 case TCPS_SYN_RECEIVED: 2555 tp->t_starttime = ticks; 2556 /* FALLTHROUGH */ 2557 case TCPS_ESTABLISHED: 2558 tp->t_state = TCPS_CLOSE_WAIT; 2559 break; 2560 case TCPS_FIN_WAIT_1: 2561 tp->t_state = TCPS_CLOSING; 2562 break; 2563 case TCPS_FIN_WAIT_2: 2564 /* 2565 * If we've sent an abort_req we must have sent it too late, 2566 * HW will send us a reply telling us so, and this peer_close 2567 * is really the last message for this connection and needs to 2568 * be treated as an abort_rpl, i.e., transition the connection 2569 * to TCP_CLOSE (note that the host stack does this at the 2570 * time of generating the RST but we must wait for HW). 2571 * Otherwise we enter TIME_WAIT. 2572 */ 2573 t3_release_offload_resources(toep); 2574 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2575 action = TCP_CLOSE; 2576 } else { 2577 action = TCP_TIMEWAIT; 2578 } 2579 break; 2580 default: 2581 log(LOG_ERR, 2582 "%s: TID %u received PEER_CLOSE in bad state %d\n", 2583 toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state); 2584 } 2585 inp_wunlock(tp->t_inpcb); 2586 2587 if (action == TCP_TIMEWAIT) { 2588 enter_timewait(tp); 2589 } else if (action == TCP_DROP) { 2590 tcp_offload_drop(tp, 0); 2591 } else if (action == TCP_CLOSE) { 2592 tcp_offload_close(tp); 2593 } 2594 2595#ifdef notyet 2596 /* Do not send POLL_HUP for half duplex close. */ 2597 if ((sk->sk_shutdown & SEND_SHUTDOWN) || 2598 sk->sk_state == TCP_CLOSE) 2599 sk_wake_async(so, 1, POLL_HUP); 2600 else 2601 sk_wake_async(so, 1, POLL_IN); 2602#endif 2603 2604out: 2605 if (!keep) 2606 m_free(m); 2607} 2608 2609/* 2610 * Handler for PEER_CLOSE CPL messages. 2611 */ 2612static int 2613do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2614{ 2615 struct toepcb *toep = (struct toepcb *)ctx; 2616 2617 VALIDATE_SOCK(so); 2618 2619 do_peer_fin(toep, m); 2620 return (0); 2621} 2622 2623static void 2624process_close_con_rpl(struct toepcb *toep, struct mbuf *m) 2625{ 2626 struct cpl_close_con_rpl *rpl = cplhdr(m); 2627 struct tcpcb *tp = toep->tp_tp; 2628 struct socket *so; 2629 int action = 0; 2630 struct sockbuf *rcv; 2631 2632 inp_wlock(tp->t_inpcb); 2633 so = inp_inpcbtosocket(tp->t_inpcb); 2634 2635 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ 2636 2637 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2638 inp_wunlock(tp->t_inpcb); 2639 goto out; 2640 } 2641 2642 CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep, 2643 tp->t_state, !!(so_state_get(so) & SS_NOFDREF)); 2644 2645 switch (tp->t_state) { 2646 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */ 2647 t3_release_offload_resources(toep); 2648 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2649 action = TCP_CLOSE; 2650 2651 } else { 2652 action = TCP_TIMEWAIT; 2653 } 2654 break; 2655 case TCPS_LAST_ACK: 2656 /* 2657 * In this state we don't care about pending abort_rpl. 2658 * If we've sent abort_req it was post-close and was sent too 2659 * late, this close_con_rpl is the actual last message. 2660 */ 2661 t3_release_offload_resources(toep); 2662 action = TCP_CLOSE; 2663 break; 2664 case TCPS_FIN_WAIT_1: 2665 /* 2666 * If we can't receive any more 2667 * data, then closing user can proceed. 2668 * Starting the timer is contrary to the 2669 * specification, but if we don't get a FIN 2670 * we'll hang forever. 2671 * 2672 * XXXjl: 2673 * we should release the tp also, and use a 2674 * compressed state. 2675 */ 2676 if (so) 2677 rcv = so_sockbuf_rcv(so); 2678 else 2679 break; 2680 2681 if (rcv->sb_state & SBS_CANTRCVMORE) { 2682 int timeout; 2683 2684 if (so) 2685 soisdisconnected(so); 2686 timeout = (tcp_fast_finwait2_recycle) ? 2687 tcp_finwait2_timeout : tcp_maxidle; 2688 tcp_timer_activate(tp, TT_2MSL, timeout); 2689 } 2690 tp->t_state = TCPS_FIN_WAIT_2; 2691 if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 && 2692 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) { 2693 action = TCP_DROP; 2694 } 2695 2696 break; 2697 default: 2698 log(LOG_ERR, 2699 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", 2700 toep->tp_toedev->tod_name, toep->tp_tid, 2701 tp->t_state); 2702 } 2703 inp_wunlock(tp->t_inpcb); 2704 2705 2706 if (action == TCP_TIMEWAIT) { 2707 enter_timewait(tp); 2708 } else if (action == TCP_DROP) { 2709 tcp_offload_drop(tp, 0); 2710 } else if (action == TCP_CLOSE) { 2711 tcp_offload_close(tp); 2712 } 2713out: 2714 m_freem(m); 2715} 2716 2717/* 2718 * Handler for CLOSE_CON_RPL CPL messages. 2719 */ 2720static int 2721do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, 2722 void *ctx) 2723{ 2724 struct toepcb *toep = (struct toepcb *)ctx; 2725 2726 process_close_con_rpl(toep, m); 2727 return (0); 2728} 2729 2730/* 2731 * Process abort replies. We only process these messages if we anticipate 2732 * them as the coordination between SW and HW in this area is somewhat lacking 2733 * and sometimes we get ABORT_RPLs after we are done with the connection that 2734 * originated the ABORT_REQ. 2735 */ 2736static void 2737process_abort_rpl(struct toepcb *toep, struct mbuf *m) 2738{ 2739 struct tcpcb *tp = toep->tp_tp; 2740 struct socket *so; 2741 int needclose = 0; 2742 2743#ifdef T3_TRACE 2744 T3_TRACE1(TIDTB(sk), 2745 "process_abort_rpl: GTS rpl pending %d", 2746 sock_flag(sk, ABORT_RPL_PENDING)); 2747#endif 2748 2749 inp_wlock(tp->t_inpcb); 2750 so = inp_inpcbtosocket(tp->t_inpcb); 2751 2752 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2753 /* 2754 * XXX panic on tcpdrop 2755 */ 2756 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev)) 2757 toep->tp_flags |= TP_ABORT_RPL_RCVD; 2758 else { 2759 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING); 2760 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) || 2761 !is_t3a(toep->tp_toedev)) { 2762 if (toep->tp_flags & TP_ABORT_REQ_RCVD) 2763 panic("TP_ABORT_REQ_RCVD set"); 2764 t3_release_offload_resources(toep); 2765 needclose = 1; 2766 } 2767 } 2768 } 2769 inp_wunlock(tp->t_inpcb); 2770 2771 if (needclose) 2772 tcp_offload_close(tp); 2773 2774 m_free(m); 2775} 2776 2777/* 2778 * Handle an ABORT_RPL_RSS CPL message. 2779 */ 2780static int 2781do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2782{ 2783 struct cpl_abort_rpl_rss *rpl = cplhdr(m); 2784 struct toepcb *toep; 2785 2786 /* 2787 * Ignore replies to post-close aborts indicating that the abort was 2788 * requested too late. These connections are terminated when we get 2789 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss 2790 * arrives the TID is either no longer used or it has been recycled. 2791 */ 2792 if (rpl->status == CPL_ERR_ABORT_FAILED) { 2793discard: 2794 m_free(m); 2795 return (0); 2796 } 2797 2798 toep = (struct toepcb *)ctx; 2799 2800 /* 2801 * Sometimes we've already closed the socket, e.g., a post-close 2802 * abort races with ABORT_REQ_RSS, the latter frees the socket 2803 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, 2804 * but FW turns the ABORT_REQ into a regular one and so we get 2805 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A. 2806 */ 2807 if (!toep) 2808 goto discard; 2809 2810 if (toep->tp_tp == NULL) { 2811 log(LOG_NOTICE, "removing tid for abort\n"); 2812 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2813 if (toep->tp_l2t) 2814 l2t_release(L2DATA(cdev), toep->tp_l2t); 2815 2816 toepcb_release(toep); 2817 goto discard; 2818 } 2819 2820 log(LOG_NOTICE, "toep=%p\n", toep); 2821 log(LOG_NOTICE, "tp=%p\n", toep->tp_tp); 2822 2823 toepcb_hold(toep); 2824 process_abort_rpl(toep, m); 2825 toepcb_release(toep); 2826 return (0); 2827} 2828 2829/* 2830 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also 2831 * indicate whether RST should be sent in response. 2832 */ 2833static int 2834abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst) 2835{ 2836 struct tcpcb *tp = so_sototcpcb(so); 2837 2838 switch (abort_reason) { 2839 case CPL_ERR_BAD_SYN: 2840#if 0 2841 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through 2842#endif 2843 case CPL_ERR_CONN_RESET: 2844 // XXX need to handle SYN_RECV due to crossed SYNs 2845 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 2846 case CPL_ERR_XMIT_TIMEDOUT: 2847 case CPL_ERR_PERSIST_TIMEDOUT: 2848 case CPL_ERR_FINWAIT2_TIMEDOUT: 2849 case CPL_ERR_KEEPALIVE_TIMEDOUT: 2850#if 0 2851 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); 2852#endif 2853 return (ETIMEDOUT); 2854 default: 2855 return (EIO); 2856 } 2857} 2858 2859static inline void 2860set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd) 2861{ 2862 struct cpl_abort_rpl *rpl = cplhdr(m); 2863 2864 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); 2865 rpl->wr.wr_lo = htonl(V_WR_TID(tid)); 2866 m->m_len = m->m_pkthdr.len = sizeof(*rpl); 2867 2868 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); 2869 rpl->cmd = cmd; 2870} 2871 2872static void 2873send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m) 2874{ 2875 struct mbuf *reply_mbuf; 2876 struct cpl_abort_req_rss *req = cplhdr(m); 2877 2878 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl)); 2879 m_set_priority(m, CPL_PRIORITY_DATA); 2880 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl); 2881 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status); 2882 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2883 m_free(m); 2884} 2885 2886/* 2887 * Returns whether an ABORT_REQ_RSS message is a negative advice. 2888 */ 2889static inline int 2890is_neg_adv_abort(unsigned int status) 2891{ 2892 return status == CPL_ERR_RTX_NEG_ADVICE || 2893 status == CPL_ERR_PERSIST_NEG_ADVICE; 2894} 2895 2896static void 2897send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status) 2898{ 2899 struct mbuf *reply_mbuf; 2900 struct cpl_abort_req_rss *req = cplhdr(m); 2901 2902 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 2903 2904 if (!reply_mbuf) { 2905 /* Defer the reply. Stick rst_status into req->cmd. */ 2906 req->status = rst_status; 2907 t3_defer_reply(m, tdev, send_deferred_abort_rpl); 2908 return; 2909 } 2910 2911 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA); 2912 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status); 2913 m_free(m); 2914 2915 /* 2916 * XXX need to sync with ARP as for SYN_RECV connections we can send 2917 * these messages while ARP is pending. For other connection states 2918 * it's not a problem. 2919 */ 2920 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2921} 2922 2923#ifdef notyet 2924static void 2925cleanup_syn_rcv_conn(struct socket *child, struct socket *parent) 2926{ 2927 CXGB_UNIMPLEMENTED(); 2928#ifdef notyet 2929 struct request_sock *req = child->sk_user_data; 2930 2931 inet_csk_reqsk_queue_removed(parent, req); 2932 synq_remove(tcp_sk(child)); 2933 __reqsk_free(req); 2934 child->sk_user_data = NULL; 2935#endif 2936} 2937 2938 2939/* 2940 * Performs the actual work to abort a SYN_RECV connection. 2941 */ 2942static void 2943do_abort_syn_rcv(struct socket *child, struct socket *parent) 2944{ 2945 struct tcpcb *parenttp = so_sototcpcb(parent); 2946 struct tcpcb *childtp = so_sototcpcb(child); 2947 2948 /* 2949 * If the server is still open we clean up the child connection, 2950 * otherwise the server already did the clean up as it was purging 2951 * its SYN queue and the skb was just sitting in its backlog. 2952 */ 2953 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { 2954 cleanup_syn_rcv_conn(child, parent); 2955 inp_wlock(childtp->t_inpcb); 2956 t3_release_offload_resources(childtp->t_toe); 2957 inp_wunlock(childtp->t_inpcb); 2958 tcp_offload_close(childtp); 2959 } 2960} 2961#endif 2962 2963/* 2964 * Handle abort requests for a SYN_RECV connection. These need extra work 2965 * because the socket is on its parent's SYN queue. 2966 */ 2967static int 2968abort_syn_rcv(struct socket *so, struct mbuf *m) 2969{ 2970 CXGB_UNIMPLEMENTED(); 2971#ifdef notyet 2972 struct socket *parent; 2973 struct toedev *tdev = toep->tp_toedev; 2974 struct t3cdev *cdev = TOM_DATA(tdev)->cdev; 2975 struct socket *oreq = so->so_incomp; 2976 struct t3c_tid_entry *t3c_stid; 2977 struct tid_info *t; 2978 2979 if (!oreq) 2980 return -1; /* somehow we are not on the SYN queue */ 2981 2982 t = &(T3C_DATA(cdev))->tid_maps; 2983 t3c_stid = lookup_stid(t, oreq->ts_recent); 2984 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 2985 2986 so_lock(parent); 2987 do_abort_syn_rcv(so, parent); 2988 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST); 2989 so_unlock(parent); 2990#endif 2991 return (0); 2992} 2993 2994/* 2995 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this 2996 * request except that we need to reply to it. 2997 */ 2998static void 2999process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev) 3000{ 3001 int rst_status = CPL_ABORT_NO_RST; 3002 const struct cpl_abort_req_rss *req = cplhdr(m); 3003 struct tcpcb *tp = toep->tp_tp; 3004 struct socket *so; 3005 int needclose = 0; 3006 3007 inp_wlock(tp->t_inpcb); 3008 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 3009 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) { 3010 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN); 3011 m_free(m); 3012 goto skip; 3013 } 3014 3015 toep->tp_flags &= ~TP_ABORT_REQ_RCVD; 3016 /* 3017 * Three cases to consider: 3018 * a) We haven't sent an abort_req; close the connection. 3019 * b) We have sent a post-close abort_req that will get to TP too late 3020 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will 3021 * be ignored and the connection should be closed now. 3022 * c) We have sent a regular abort_req that will get to TP too late. 3023 * That will generate an abort_rpl with status 0, wait for it. 3024 */ 3025 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) || 3026 (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) { 3027 int error; 3028 3029 error = abort_status_to_errno(so, req->status, 3030 &rst_status); 3031 so_error_set(so, error); 3032 3033 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) 3034 so_sorwakeup(so); 3035 /* 3036 * SYN_RECV needs special processing. If abort_syn_rcv() 3037 * returns 0 is has taken care of the abort. 3038 */ 3039 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m)) 3040 goto skip; 3041 3042 t3_release_offload_resources(toep); 3043 needclose = 1; 3044 } 3045 inp_wunlock(tp->t_inpcb); 3046 3047 if (needclose) 3048 tcp_offload_close(tp); 3049 3050 send_abort_rpl(m, tdev, rst_status); 3051 return; 3052skip: 3053 inp_wunlock(tp->t_inpcb); 3054} 3055 3056/* 3057 * Handle an ABORT_REQ_RSS CPL message. 3058 */ 3059static int 3060do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3061{ 3062 const struct cpl_abort_req_rss *req = cplhdr(m); 3063 struct toepcb *toep = (struct toepcb *)ctx; 3064 3065 if (is_neg_adv_abort(req->status)) { 3066 m_free(m); 3067 return (0); 3068 } 3069 3070 log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid); 3071 3072 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) { 3073 cxgb_remove_tid(cdev, toep, toep->tp_tid); 3074 toep->tp_flags |= TP_ABORT_REQ_RCVD; 3075 3076 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST); 3077 if (toep->tp_l2t) 3078 l2t_release(L2DATA(cdev), toep->tp_l2t); 3079 3080 /* 3081 * Unhook 3082 */ 3083 toep->tp_tp->t_toe = NULL; 3084 toep->tp_tp->t_flags &= ~TF_TOE; 3085 toep->tp_tp = NULL; 3086 /* 3087 * XXX need to call syncache_chkrst - but we don't 3088 * have a way of doing that yet 3089 */ 3090 toepcb_release(toep); 3091 log(LOG_ERR, "abort for unestablished connection :-(\n"); 3092 return (0); 3093 } 3094 if (toep->tp_tp == NULL) { 3095 log(LOG_NOTICE, "disconnected toepcb\n"); 3096 /* should be freed momentarily */ 3097 return (0); 3098 } 3099 3100 3101 toepcb_hold(toep); 3102 process_abort_req(toep, m, toep->tp_toedev); 3103 toepcb_release(toep); 3104 return (0); 3105} 3106#ifdef notyet 3107static void 3108pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m) 3109{ 3110 struct toedev *tdev = TOE_DEV(parent); 3111 3112 do_abort_syn_rcv(child, parent); 3113 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) { 3114 struct cpl_pass_accept_rpl *rpl = cplhdr(m); 3115 3116 rpl->opt0h = htonl(F_TCAM_BYPASS); 3117 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3118 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3119 } else 3120 m_free(m); 3121} 3122#endif 3123static void 3124handle_pass_open_arp_failure(struct socket *so, struct mbuf *m) 3125{ 3126 CXGB_UNIMPLEMENTED(); 3127 3128#ifdef notyet 3129 struct t3cdev *cdev; 3130 struct socket *parent; 3131 struct socket *oreq; 3132 struct t3c_tid_entry *t3c_stid; 3133 struct tid_info *t; 3134 struct tcpcb *otp, *tp = so_sototcpcb(so); 3135 struct toepcb *toep = tp->t_toe; 3136 3137 /* 3138 * If the connection is being aborted due to the parent listening 3139 * socket going away there's nothing to do, the ABORT_REQ will close 3140 * the connection. 3141 */ 3142 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 3143 m_free(m); 3144 return; 3145 } 3146 3147 oreq = so->so_incomp; 3148 otp = so_sototcpcb(oreq); 3149 3150 cdev = T3C_DEV(so); 3151 t = &(T3C_DATA(cdev))->tid_maps; 3152 t3c_stid = lookup_stid(t, otp->ts_recent); 3153 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 3154 3155 so_lock(parent); 3156 pass_open_abort(so, parent, m); 3157 so_unlock(parent); 3158#endif 3159} 3160 3161/* 3162 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly 3163 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV 3164 * connection. 3165 */ 3166static void 3167pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m) 3168{ 3169 3170#ifdef notyet 3171 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3172 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk); 3173#endif 3174 handle_pass_open_arp_failure(m_get_socket(m), m); 3175} 3176 3177/* 3178 * Populate a reject CPL_PASS_ACCEPT_RPL WR. 3179 */ 3180static void 3181mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf) 3182{ 3183 struct cpl_pass_accept_req *req = cplhdr(req_mbuf); 3184 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf); 3185 unsigned int tid = GET_TID(req); 3186 3187 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP); 3188 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3189 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3190 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet 3191 rpl->opt0h = htonl(F_TCAM_BYPASS); 3192 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3193 rpl->opt2 = 0; 3194 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3195} 3196 3197/* 3198 * Send a deferred reject to an accept request. 3199 */ 3200static void 3201reject_pass_request(struct toedev *tdev, struct mbuf *m) 3202{ 3203 struct mbuf *reply_mbuf; 3204 3205 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl)); 3206 mk_pass_accept_rpl(reply_mbuf, m); 3207 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 3208 m_free(m); 3209} 3210 3211static void 3212handle_syncache_event(int event, void *arg) 3213{ 3214 struct toepcb *toep = arg; 3215 3216 switch (event) { 3217 case TOE_SC_ENTRY_PRESENT: 3218 /* 3219 * entry already exists - free toepcb 3220 * and l2t 3221 */ 3222 printf("syncache entry present\n"); 3223 toepcb_release(toep); 3224 break; 3225 case TOE_SC_DROP: 3226 /* 3227 * The syncache has given up on this entry 3228 * either it timed out, or it was evicted 3229 * we need to explicitly release the tid 3230 */ 3231 printf("syncache entry dropped\n"); 3232 toepcb_release(toep); 3233 break; 3234 default: 3235 log(LOG_ERR, "unknown syncache event %d\n", event); 3236 break; 3237 } 3238} 3239 3240static void 3241syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep) 3242{ 3243 struct in_conninfo inc; 3244 struct toeopt toeo; 3245 struct tcphdr th; 3246 struct inpcb *inp; 3247 int mss, wsf, sack, ts; 3248 uint32_t rcv_isn = ntohl(req->rcv_isn); 3249 3250 bzero(&toeo, sizeof(struct toeopt)); 3251 inp = so_sotoinpcb(lso); 3252 3253 /* 3254 * Fill out information for entering us into the syncache 3255 */ 3256 bzero(&inc, sizeof(inc)); 3257 inc.inc_fport = th.th_sport = req->peer_port; 3258 inc.inc_lport = th.th_dport = req->local_port; 3259 th.th_seq = req->rcv_isn; 3260 th.th_flags = TH_SYN; 3261 3262 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1; 3263 3264 inc.inc_len = 0; 3265 inc.inc_faddr.s_addr = req->peer_ip; 3266 inc.inc_laddr.s_addr = req->local_ip; 3267 3268 DPRINTF("syncache add of %d:%d %d:%d\n", 3269 ntohl(req->local_ip), ntohs(req->local_port), 3270 ntohl(req->peer_ip), ntohs(req->peer_port)); 3271 3272 mss = req->tcp_options.mss; 3273 wsf = req->tcp_options.wsf; 3274 ts = req->tcp_options.tstamp; 3275 sack = req->tcp_options.sack; 3276 toeo.to_mss = mss; 3277 toeo.to_wscale = wsf; 3278 toeo.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3279 tcp_offload_syncache_add(&inc, &toeo, &th, inp, &lso, &cxgb_toe_usrreqs, 3280toep); 3281} 3282 3283 3284/* 3285 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket 3286 * lock held. Note that the sock here is a listening socket that is not owned 3287 * by the TOE. 3288 */ 3289static void 3290process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev, 3291 struct listen_ctx *lctx) 3292{ 3293 int rt_flags; 3294 struct l2t_entry *e; 3295 struct iff_mac tim; 3296 struct mbuf *reply_mbuf, *ddp_mbuf = NULL; 3297 struct cpl_pass_accept_rpl *rpl; 3298 struct cpl_pass_accept_req *req = cplhdr(m); 3299 unsigned int tid = GET_TID(req); 3300 struct tom_data *d = TOM_DATA(tdev); 3301 struct t3cdev *cdev = d->cdev; 3302 struct tcpcb *tp = so_sototcpcb(so); 3303 struct toepcb *newtoep; 3304 struct rtentry *dst; 3305 struct sockaddr_in nam; 3306 struct t3c_data *td = T3C_DATA(cdev); 3307 3308 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3309 if (__predict_false(reply_mbuf == NULL)) { 3310 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3311 t3_defer_reply(m, tdev, reject_pass_request); 3312 else { 3313 cxgb_queue_tid_release(cdev, tid); 3314 m_free(m); 3315 } 3316 DPRINTF("failed to get reply_mbuf\n"); 3317 3318 goto out; 3319 } 3320 3321 if (tp->t_state != TCPS_LISTEN) { 3322 DPRINTF("socket not in listen state\n"); 3323 3324 goto reject; 3325 } 3326 3327 tim.mac_addr = req->dst_mac; 3328 tim.vlan_tag = ntohs(req->vlan_tag); 3329 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { 3330 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n"); 3331 goto reject; 3332 } 3333 3334#ifdef notyet 3335 /* 3336 * XXX do route lookup to confirm that we're still listening on this 3337 * address 3338 */ 3339 if (ip_route_input(skb, req->local_ip, req->peer_ip, 3340 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev)) 3341 goto reject; 3342 rt_flags = ((struct rtable *)skb->dst)->rt_flags & 3343 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); 3344 dst_release(skb->dst); // done with the input route, release it 3345 skb->dst = NULL; 3346 3347 if ((rt_flags & RTF_LOCAL) == 0) 3348 goto reject; 3349#endif 3350 /* 3351 * XXX 3352 */ 3353 rt_flags = RTF_LOCAL; 3354 if ((rt_flags & RTF_LOCAL) == 0) 3355 goto reject; 3356 3357 /* 3358 * Calculate values and add to syncache 3359 */ 3360 3361 newtoep = toepcb_alloc(); 3362 if (newtoep == NULL) 3363 goto reject; 3364 3365 bzero(&nam, sizeof(struct sockaddr_in)); 3366 3367 nam.sin_len = sizeof(struct sockaddr_in); 3368 nam.sin_family = AF_INET; 3369 nam.sin_addr.s_addr =req->peer_ip; 3370 dst = rtalloc2((struct sockaddr *)&nam, 1, 0); 3371 3372 if (dst == NULL) { 3373 printf("failed to find route\n"); 3374 goto reject; 3375 } 3376 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev, 3377 (struct sockaddr *)&nam); 3378 if (e == NULL) { 3379 DPRINTF("failed to get l2t\n"); 3380 } 3381 /* 3382 * Point to our listen socket until accept 3383 */ 3384 newtoep->tp_tp = tp; 3385 newtoep->tp_flags = TP_SYN_RCVD; 3386 newtoep->tp_tid = tid; 3387 newtoep->tp_toedev = tdev; 3388 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3389 3390 cxgb_insert_tid(cdev, d->client, newtoep, tid); 3391 so_lock(so); 3392 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); 3393 so_unlock(so); 3394 3395 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) && 3396 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 3397 3398 if (newtoep->tp_ulp_mode) { 3399 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3400 3401 if (ddp_mbuf == NULL) 3402 newtoep->tp_ulp_mode = 0; 3403 } 3404 3405 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d", 3406 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode); 3407 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); 3408 /* 3409 * XXX workaround for lack of syncache drop 3410 */ 3411 toepcb_hold(newtoep); 3412 syncache_add_accept_req(req, so, newtoep); 3413 3414 rpl = cplhdr(reply_mbuf); 3415 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl); 3416 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3417 rpl->wr.wr_lo = 0; 3418 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3419 rpl->opt2 = htonl(calc_opt2(so, tdev)); 3420 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3421 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten 3422 3423 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | 3424 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); 3425 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) | 3426 CPL_PASS_OPEN_ACCEPT); 3427 3428 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status); 3429 3430 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep)); 3431 3432 l2t_send(cdev, reply_mbuf, e); 3433 m_free(m); 3434 if (newtoep->tp_ulp_mode) { 3435 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS, 3436 V_TF_DDP_OFF(1) | 3437 TP_DDP_TIMER_WORKAROUND_MASK, 3438 V_TF_DDP_OFF(1) | 3439 TP_DDP_TIMER_WORKAROUND_VAL, 1); 3440 } else 3441 DPRINTF("no DDP\n"); 3442 3443 return; 3444reject: 3445 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3446 mk_pass_accept_rpl(reply_mbuf, m); 3447 else 3448 mk_tid_release(reply_mbuf, newtoep, tid); 3449 cxgb_ofld_send(cdev, reply_mbuf); 3450 m_free(m); 3451out: 3452#if 0 3453 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3454#else 3455 return; 3456#endif 3457} 3458 3459/* 3460 * Handle a CPL_PASS_ACCEPT_REQ message. 3461 */ 3462static int 3463do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3464{ 3465 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx; 3466 struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */ 3467 struct tom_data *d = listen_ctx->tom_data; 3468 3469#if VALIDATE_TID 3470 struct cpl_pass_accept_req *req = cplhdr(m); 3471 unsigned int tid = GET_TID(req); 3472 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps; 3473 3474 if (unlikely(!lsk)) { 3475 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n", 3476 cdev->name, 3477 (unsigned long)((union listen_entry *)ctx - 3478 t->stid_tab)); 3479 return CPL_RET_BUF_DONE; 3480 } 3481 if (unlikely(tid >= t->ntids)) { 3482 printk(KERN_ERR "%s: passive open TID %u too large\n", 3483 cdev->name, tid); 3484 return CPL_RET_BUF_DONE; 3485 } 3486 /* 3487 * For T3A the current user of the TID may have closed but its last 3488 * message(s) may have been backlogged so the TID appears to be still 3489 * in use. Just take the TID away, the connection can close at its 3490 * own leisure. For T3B this situation is a bug. 3491 */ 3492 if (!valid_new_tid(t, tid) && 3493 cdev->type != T3A) { 3494 printk(KERN_ERR "%s: passive open uses existing TID %u\n", 3495 cdev->name, tid); 3496 return CPL_RET_BUF_DONE; 3497 } 3498#endif 3499 3500 process_pass_accept_req(lso, m, &d->tdev, listen_ctx); 3501 return (0); 3502} 3503 3504/* 3505 * Called when a connection is established to translate the TCP options 3506 * reported by HW to FreeBSD's native format. 3507 */ 3508static void 3509assign_rxopt(struct socket *so, unsigned int opt) 3510{ 3511 struct tcpcb *tp = so_sototcpcb(so); 3512 struct toepcb *toep = tp->t_toe; 3513 const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep)); 3514 3515 inp_lock_assert(tp->t_inpcb); 3516 3517 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3518 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0; 3519 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0; 3520 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0; 3521 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 3522 (TF_RCVD_SCALE|TF_REQ_SCALE)) 3523 tp->rcv_scale = tp->request_r_scale; 3524} 3525 3526/* 3527 * Completes some final bits of initialization for just established connections 3528 * and changes their state to TCP_ESTABLISHED. 3529 * 3530 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1. 3531 */ 3532static void 3533make_established(struct socket *so, u32 snd_isn, unsigned int opt) 3534{ 3535 struct tcpcb *tp = so_sototcpcb(so); 3536 struct toepcb *toep = tp->t_toe; 3537 3538 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn; 3539 assign_rxopt(so, opt); 3540 3541 /* 3542 *XXXXXXXXXXX 3543 * 3544 */ 3545#ifdef notyet 3546 so->so_proto->pr_ctloutput = t3_ctloutput; 3547#endif 3548 3549#if 0 3550 inet_sk(sk)->id = tp->write_seq ^ jiffies; 3551#endif 3552 /* 3553 * XXX not clear what rcv_wup maps to 3554 */ 3555 /* 3556 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't 3557 * pass through opt0. 3558 */ 3559 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10)) 3560 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10); 3561 3562 dump_toepcb(toep); 3563 3564#ifdef notyet 3565/* 3566 * no clean interface for marking ARP up to date 3567 */ 3568 dst_confirm(sk->sk_dst_cache); 3569#endif 3570 tp->t_starttime = ticks; 3571 tp->t_state = TCPS_ESTABLISHED; 3572 soisconnected(so); 3573} 3574 3575static int 3576syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep) 3577{ 3578 3579 struct in_conninfo inc; 3580 struct toeopt toeo; 3581 struct tcphdr th; 3582 int mss, wsf, sack, ts; 3583 struct mbuf *m = NULL; 3584 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev); 3585 unsigned int opt; 3586 3587#ifdef MAC 3588#error "no MAC support" 3589#endif 3590 3591 opt = ntohs(req->tcp_opt); 3592 3593 bzero(&toeo, sizeof(struct toeopt)); 3594 3595 /* 3596 * Fill out information for entering us into the syncache 3597 */ 3598 bzero(&inc, sizeof(inc)); 3599 inc.inc_fport = th.th_sport = req->peer_port; 3600 inc.inc_lport = th.th_dport = req->local_port; 3601 th.th_seq = req->rcv_isn; 3602 th.th_flags = TH_ACK; 3603 3604 inc.inc_len = 0; 3605 inc.inc_faddr.s_addr = req->peer_ip; 3606 inc.inc_laddr.s_addr = req->local_ip; 3607 3608 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3609 wsf = G_TCPOPT_WSCALE_OK(opt); 3610 ts = G_TCPOPT_TSTAMP(opt); 3611 sack = G_TCPOPT_SACK(opt); 3612 3613 toeo.to_mss = mss; 3614 toeo.to_wscale = G_TCPOPT_SND_WSCALE(opt); 3615 toeo.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3616 3617 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n", 3618 ntohl(req->local_ip), ntohs(req->local_port), 3619 ntohl(req->peer_ip), ntohs(req->peer_port), 3620 mss, wsf, ts, sack); 3621 return tcp_offload_syncache_expand(&inc, &toeo, &th, so, m); 3622} 3623 3624 3625/* 3626 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work 3627 * if we are in TCP_SYN_RECV due to crossed SYNs 3628 */ 3629static int 3630do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3631{ 3632 struct cpl_pass_establish *req = cplhdr(m); 3633 struct toepcb *toep = (struct toepcb *)ctx; 3634 struct tcpcb *tp = toep->tp_tp; 3635 struct socket *so, *lso; 3636 struct t3c_data *td = T3C_DATA(cdev); 3637 struct sockbuf *snd, *rcv; 3638 3639 // Complete socket initialization now that we have the SND_ISN 3640 3641 struct toedev *tdev; 3642 3643 3644 tdev = toep->tp_toedev; 3645 3646 inp_wlock(tp->t_inpcb); 3647 3648 /* 3649 * 3650 * XXX need to add reference while we're manipulating 3651 */ 3652 so = lso = inp_inpcbtosocket(tp->t_inpcb); 3653 3654 inp_wunlock(tp->t_inpcb); 3655 3656 so_lock(so); 3657 LIST_REMOVE(toep, synq_entry); 3658 so_unlock(so); 3659 3660 if (!syncache_expand_establish_req(req, &so, toep)) { 3661 /* 3662 * No entry 3663 */ 3664 CXGB_UNIMPLEMENTED(); 3665 } 3666 if (so == NULL) { 3667 /* 3668 * Couldn't create the socket 3669 */ 3670 CXGB_UNIMPLEMENTED(); 3671 } 3672 3673 tp = so_sototcpcb(so); 3674 inp_wlock(tp->t_inpcb); 3675 3676 snd = so_sockbuf_snd(so); 3677 rcv = so_sockbuf_rcv(so); 3678 3679 snd->sb_flags |= SB_NOCOALESCE; 3680 rcv->sb_flags |= SB_NOCOALESCE; 3681 3682 toep->tp_tp = tp; 3683 toep->tp_flags = 0; 3684 tp->t_toe = toep; 3685 reset_wr_list(toep); 3686 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3687 tp->rcv_nxt = toep->tp_copied_seq; 3688 install_offload_ops(so); 3689 3690 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs); 3691 toep->tp_wr_unacked = 0; 3692 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3693 toep->tp_qset_idx = 0; 3694 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu); 3695 3696 /* 3697 * XXX Cancel any keep alive timer 3698 */ 3699 3700 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3701 3702 /* 3703 * XXX workaround for lack of syncache drop 3704 */ 3705 toepcb_release(toep); 3706 inp_wunlock(tp->t_inpcb); 3707 3708 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid); 3709 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3710#ifdef notyet 3711 /* 3712 * XXX not sure how these checks map to us 3713 */ 3714 if (unlikely(sk->sk_socket)) { // simultaneous opens only 3715 sk->sk_state_change(sk); 3716 sk_wake_async(so, 0, POLL_OUT); 3717 } 3718 /* 3719 * The state for the new connection is now up to date. 3720 * Next check if we should add the connection to the parent's 3721 * accept queue. When the parent closes it resets connections 3722 * on its SYN queue, so check if we are being reset. If so we 3723 * don't need to do anything more, the coming ABORT_RPL will 3724 * destroy this socket. Otherwise move the connection to the 3725 * accept queue. 3726 * 3727 * Note that we reset the synq before closing the server so if 3728 * we are not being reset the stid is still open. 3729 */ 3730 if (unlikely(!tp->forward_skb_hint)) { // removed from synq 3731 __kfree_skb(skb); 3732 goto unlock; 3733 } 3734#endif 3735 m_free(m); 3736 3737 return (0); 3738} 3739 3740/* 3741 * Fill in the right TID for CPL messages waiting in the out-of-order queue 3742 * and send them to the TOE. 3743 */ 3744static void 3745fixup_and_send_ofo(struct toepcb *toep) 3746{ 3747 struct mbuf *m; 3748 struct toedev *tdev = toep->tp_toedev; 3749 struct tcpcb *tp = toep->tp_tp; 3750 unsigned int tid = toep->tp_tid; 3751 3752 log(LOG_NOTICE, "fixup_and_send_ofo\n"); 3753 3754 inp_lock_assert(tp->t_inpcb); 3755 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { 3756 /* 3757 * A variety of messages can be waiting but the fields we'll 3758 * be touching are common to all so any message type will do. 3759 */ 3760 struct cpl_close_con_req *p = cplhdr(m); 3761 3762 p->wr.wr_lo = htonl(V_WR_TID(tid)); 3763 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid)); 3764 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3765 } 3766} 3767 3768/* 3769 * Updates socket state from an active establish CPL message. Runs with the 3770 * socket lock held. 3771 */ 3772static void 3773socket_act_establish(struct socket *so, struct mbuf *m) 3774{ 3775 struct cpl_act_establish *req = cplhdr(m); 3776 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ 3777 struct tcpcb *tp = so_sototcpcb(so); 3778 struct toepcb *toep = tp->t_toe; 3779 3780 if (__predict_false(tp->t_state != TCPS_SYN_SENT)) 3781 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n", 3782 toep->tp_tid, tp->t_state); 3783 3784 tp->ts_recent_age = ticks; 3785 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn; 3786 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs; 3787 3788 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3789 3790 /* 3791 * Now that we finally have a TID send any CPL messages that we had to 3792 * defer for lack of a TID. 3793 */ 3794 if (mbufq_len(&toep->out_of_order_queue)) 3795 fixup_and_send_ofo(toep); 3796 3797 if (__predict_false(so_state_get(so) & SS_NOFDREF)) { 3798 /* 3799 * XXX does this even make sense? 3800 */ 3801 so_sorwakeup(so); 3802 } 3803 m_free(m); 3804#ifdef notyet 3805/* 3806 * XXX assume no write requests permitted while socket connection is 3807 * incomplete 3808 */ 3809 /* 3810 * Currently the send queue must be empty at this point because the 3811 * socket layer does not send anything before a connection is 3812 * established. To be future proof though we handle the possibility 3813 * that there are pending buffers to send (either TX_DATA or 3814 * CLOSE_CON_REQ). First we need to adjust the sequence number of the 3815 * buffers according to the just learned write_seq, and then we send 3816 * them on their way. 3817 */ 3818 fixup_pending_writeq_buffers(sk); 3819 if (t3_push_frames(so, 1)) 3820 sk->sk_write_space(sk); 3821#endif 3822 3823 toep->tp_state = tp->t_state;
|
3825 3826} 3827 3828/* 3829 * Process a CPL_ACT_ESTABLISH message. 3830 */ 3831static int 3832do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3833{ 3834 struct cpl_act_establish *req = cplhdr(m); 3835 unsigned int tid = GET_TID(req); 3836 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); 3837 struct toepcb *toep = (struct toepcb *)ctx; 3838 struct tcpcb *tp = toep->tp_tp; 3839 struct socket *so; 3840 struct toedev *tdev; 3841 struct tom_data *d; 3842 3843 if (tp == NULL) { 3844 free_atid(cdev, atid); 3845 return (0); 3846 } 3847 inp_wlock(tp->t_inpcb); 3848 3849 /* 3850 * XXX 3851 */ 3852 so = inp_inpcbtosocket(tp->t_inpcb); 3853 tdev = toep->tp_toedev; /* blow up here if link was down */ 3854 d = TOM_DATA(tdev); 3855 3856 /* 3857 * It's OK if the TID is currently in use, the owning socket may have 3858 * backlogged its last CPL message(s). Just take it away. 3859 */ 3860 toep->tp_tid = tid; 3861 toep->tp_tp = tp; 3862 so_insert_tid(d, toep, tid); 3863 free_atid(cdev, atid); 3864 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3865 3866 socket_act_establish(so, m); 3867 inp_wunlock(tp->t_inpcb); 3868 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid); 3869 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3870 3871 return (0); 3872} 3873 3874/* 3875 * Process an acknowledgment of WR completion. Advance snd_una and send the 3876 * next batch of work requests from the write queue. 3877 */ 3878static void 3879wr_ack(struct toepcb *toep, struct mbuf *m) 3880{ 3881 struct tcpcb *tp = toep->tp_tp; 3882 struct cpl_wr_ack *hdr = cplhdr(m); 3883 struct socket *so; 3884 unsigned int credits = ntohs(hdr->credits); 3885 u32 snd_una = ntohl(hdr->snd_una); 3886 int bytes = 0; 3887 struct sockbuf *snd; 3888 3889 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits); 3890 3891 inp_wlock(tp->t_inpcb); 3892 so = inp_inpcbtosocket(tp->t_inpcb); 3893 toep->tp_wr_avail += credits; 3894 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) 3895 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; 3896 3897 while (credits) { 3898 struct mbuf *p = peek_wr(toep); 3899 3900 if (__predict_false(!p)) { 3901 log(LOG_ERR, "%u WR_ACK credits for TID %u with " 3902 "nothing pending, state %u wr_avail=%u\n", 3903 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail); 3904 break; 3905 } 3906 CTR2(KTR_TOM, 3907 "wr_ack: p->credits=%d p->bytes=%d", 3908 p->m_pkthdr.csum_data, p->m_pkthdr.len); 3909 KASSERT(p->m_pkthdr.csum_data != 0, 3910 ("empty request still on list")); 3911 3912 if (__predict_false(credits < p->m_pkthdr.csum_data)) { 3913 3914#if DEBUG_WR > 1 3915 struct tx_data_wr *w = cplhdr(p); 3916 log(LOG_ERR, 3917 "TID %u got %u WR credits, need %u, len %u, " 3918 "main body %u, frags %u, seq # %u, ACK una %u," 3919 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n", 3920 toep->tp_tid, credits, p->csum, p->len, 3921 p->len - p->data_len, skb_shinfo(p)->nr_frags, 3922 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt), 3923 toep->tp_wr_avail, count_pending_wrs(tp) - credits); 3924#endif 3925 p->m_pkthdr.csum_data -= credits; 3926 break; 3927 } else { 3928 dequeue_wr(toep); 3929 credits -= p->m_pkthdr.csum_data; 3930 bytes += p->m_pkthdr.len; 3931 CTR3(KTR_TOM, 3932 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d", 3933 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data); 3934 3935 m_free(p); 3936 } 3937 } 3938 3939#if DEBUG_WR 3940 check_wr_invariants(tp); 3941#endif 3942 3943 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 3944#if VALIDATE_SEQ 3945 struct tom_data *d = TOM_DATA(TOE_DEV(so)); 3946 3947 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK " 3948 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una, 3949 toep->tp_tid, tp->snd_una); 3950#endif 3951 goto out_free; 3952 } 3953 3954 if (tp->snd_una != snd_una) { 3955 tp->snd_una = snd_una; 3956 tp->ts_recent_age = ticks; 3957#ifdef notyet 3958 /* 3959 * Keep ARP entry "minty fresh" 3960 */ 3961 dst_confirm(sk->sk_dst_cache); 3962#endif 3963 if (tp->snd_una == tp->snd_nxt) 3964 toep->tp_flags &= ~TP_TX_WAIT_IDLE; 3965 } 3966 3967 snd = so_sockbuf_snd(so); 3968 if (bytes) { 3969 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes); 3970 snd = so_sockbuf_snd(so); 3971 sockbuf_lock(snd); 3972 sbdrop_locked(snd, bytes); 3973 so_sowwakeup_locked(so); 3974 } 3975 3976 if (snd->sb_sndptroff < snd->sb_cc) 3977 t3_push_frames(so, 0); 3978 3979out_free: 3980 inp_wunlock(tp->t_inpcb); 3981 m_free(m); 3982} 3983 3984/* 3985 * Handler for TX_DATA_ACK CPL messages. 3986 */ 3987static int 3988do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx) 3989{ 3990 struct toepcb *toep = (struct toepcb *)ctx; 3991 3992 VALIDATE_SOCK(so); 3993 3994 wr_ack(toep, m); 3995 return 0; 3996} 3997 3998/* 3999 * Handler for TRACE_PKT CPL messages. Just sink these packets. 4000 */ 4001static int 4002do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx) 4003{ 4004 m_freem(m); 4005 return 0; 4006} 4007 4008/* 4009 * Reset a connection that is on a listener's SYN queue or accept queue, 4010 * i.e., one that has not had a struct socket associated with it. 4011 * Must be called from process context. 4012 * 4013 * Modeled after code in inet_csk_listen_stop(). 4014 */ 4015static void 4016t3_reset_listen_child(struct socket *child) 4017{ 4018 struct tcpcb *tp = so_sototcpcb(child); 4019 4020 t3_send_reset(tp->t_toe); 4021} 4022 4023 4024static void 4025t3_child_disconnect(struct socket *so, void *arg) 4026{ 4027 struct tcpcb *tp = so_sototcpcb(so); 4028 4029 if (tp->t_flags & TF_TOE) { 4030 inp_wlock(tp->t_inpcb); 4031 t3_reset_listen_child(so); 4032 inp_wunlock(tp->t_inpcb); 4033 } 4034} 4035 4036/* 4037 * Disconnect offloaded established but not yet accepted connections sitting 4038 * on a server's accept_queue. We just send an ABORT_REQ at this point and 4039 * finish off the disconnect later as we may need to wait for the ABORT_RPL. 4040 */ 4041void 4042t3_disconnect_acceptq(struct socket *listen_so) 4043{ 4044 4045 so_lock(listen_so); 4046 so_listeners_apply_all(listen_so, t3_child_disconnect, NULL); 4047 so_unlock(listen_so); 4048} 4049 4050/* 4051 * Reset offloaded connections sitting on a server's syn queue. As above 4052 * we send ABORT_REQ and finish off when we get ABORT_RPL. 4053 */ 4054 4055void 4056t3_reset_synq(struct listen_ctx *lctx) 4057{ 4058 struct toepcb *toep; 4059 4060 so_lock(lctx->lso); 4061 while (!LIST_EMPTY(&lctx->synq_head)) { 4062 toep = LIST_FIRST(&lctx->synq_head); 4063 LIST_REMOVE(toep, synq_entry); 4064 toep->tp_tp = NULL; 4065 t3_send_reset(toep); 4066 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid); 4067 toepcb_release(toep); 4068 } 4069 so_unlock(lctx->lso); 4070} 4071 4072 4073int 4074t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl, 4075 unsigned int nppods, unsigned int tag, unsigned int maxoff, 4076 unsigned int pg_off, unsigned int color) 4077{ 4078 unsigned int i, j, pidx; 4079 struct pagepod *p; 4080 struct mbuf *m; 4081 struct ulp_mem_io *req; 4082 unsigned int tid = toep->tp_tid; 4083 const struct tom_data *td = TOM_DATA(toep->tp_toedev); 4084 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; 4085 4086 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)", 4087 gl, nppods, tag, maxoff, pg_off, color); 4088 4089 for (i = 0; i < nppods; ++i) { 4090 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE); 4091 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4092 req = mtod(m, struct ulp_mem_io *); 4093 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE; 4094 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4095 req->wr.wr_lo = 0; 4096 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) | 4097 V_ULPTX_CMD(ULP_MEM_WRITE)); 4098 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) | 4099 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1)); 4100 4101 p = (struct pagepod *)(req + 1); 4102 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) { 4103 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid)); 4104 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) | 4105 V_PPOD_COLOR(color)); 4106 p->pp_max_offset = htonl(maxoff); 4107 p->pp_page_offset = htonl(pg_off); 4108 p->pp_rsvd = 0; 4109 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx) 4110 p->pp_addr[j] = pidx < gl->dgl_nelem ? 4111 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0; 4112 } else 4113 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */ 4114 send_or_defer(toep, m, 0); 4115 ppod_addr += PPOD_SIZE; 4116 } 4117 return (0); 4118} 4119 4120/* 4121 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command. 4122 */ 4123static inline void 4124mk_cpl_barrier_ulp(struct cpl_barrier *b) 4125{ 4126 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b; 4127 4128 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4129 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8)); 4130 b->opcode = CPL_BARRIER; 4131} 4132 4133/* 4134 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command. 4135 */ 4136static inline void 4137mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno) 4138{ 4139 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4140 4141 txpkt = (struct ulp_txpkt *)req; 4142 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4143 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4144 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid)); 4145 req->cpuno = htons(cpuno); 4146} 4147 4148/* 4149 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command. 4150 */ 4151static inline void 4152mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, 4153 unsigned int word, uint64_t mask, uint64_t val) 4154{ 4155 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4156 4157 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 4158 tid, word, mask, val); 4159 4160 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4161 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4162 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); 4163 req->reply = V_NO_REPLY(1); 4164 req->cpu_idx = 0; 4165 req->word = htons(word); 4166 req->mask = htobe64(mask); 4167 req->val = htobe64(val); 4168} 4169 4170/* 4171 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command. 4172 */ 4173static void 4174mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack, 4175 unsigned int tid, unsigned int credits) 4176{ 4177 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack; 4178 4179 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4180 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8)); 4181 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); 4182 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 4183 V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) | 4184 V_RX_CREDITS(credits)); 4185} 4186 4187void 4188t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx) 4189{ 4190 unsigned int wrlen; 4191 struct mbuf *m; 4192 struct work_request_hdr *wr; 4193 struct cpl_barrier *lock; 4194 struct cpl_set_tcb_field *req; 4195 struct cpl_get_tcb *getreq; 4196 struct ddp_state *p = &toep->tp_ddp_state; 4197 4198#if 0 4199 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4200#endif 4201 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) + 4202 sizeof(*getreq); 4203 m = m_gethdr_nofail(wrlen); 4204 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4205 wr = mtod(m, struct work_request_hdr *); 4206 bzero(wr, wrlen); 4207 4208 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4209 m->m_pkthdr.len = m->m_len = wrlen; 4210 4211 lock = (struct cpl_barrier *)(wr + 1); 4212 mk_cpl_barrier_ulp(lock); 4213 4214 req = (struct cpl_set_tcb_field *)(lock + 1); 4215 4216 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx); 4217 4218 /* Hmmm, not sure if this actually a good thing: reactivating 4219 * the other buffer might be an issue if it has been completed 4220 * already. However, that is unlikely, since the fact that the UBUF 4221 * is not completed indicates that there is no oustanding data. 4222 */ 4223 if (bufidx == 0) 4224 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4225 V_TF_DDP_ACTIVE_BUF(1) | 4226 V_TF_DDP_BUF0_VALID(1), 4227 V_TF_DDP_ACTIVE_BUF(1)); 4228 else 4229 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4230 V_TF_DDP_ACTIVE_BUF(1) | 4231 V_TF_DDP_BUF1_VALID(1), 0); 4232 4233 getreq = (struct cpl_get_tcb *)(req + 1); 4234 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4235 4236 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1)); 4237 4238 /* Keep track of the number of oustanding CPL_GET_TCB requests 4239 */ 4240 p->get_tcb_count++; 4241 4242#ifdef T3_TRACE 4243 T3_TRACE1(TIDTB(so), 4244 "t3_cancel_ddpbuf: bufidx %u", bufidx); 4245#endif 4246 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4247} 4248 4249/** 4250 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one 4251 * @sk: the socket associated with the buffers 4252 * @bufidx: index of HW DDP buffer (0 or 1) 4253 * @tag0: new tag for HW buffer 0 4254 * @tag1: new tag for HW buffer 1 4255 * @len: new length for HW buf @bufidx 4256 * 4257 * Sends a compound WR to overlay a new DDP buffer on top of an existing 4258 * buffer by changing the buffer tag and length and setting the valid and 4259 * active flag accordingly. The caller must ensure the new buffer is at 4260 * least as big as the existing one. Since we typically reprogram both HW 4261 * buffers this function sets both tags for convenience. Read the TCB to 4262 * determine how made data was written into the buffer before the overlay 4263 * took place. 4264 */ 4265void 4266t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0, 4267 unsigned int tag1, unsigned int len) 4268{ 4269 unsigned int wrlen; 4270 struct mbuf *m; 4271 struct work_request_hdr *wr; 4272 struct cpl_get_tcb *getreq; 4273 struct cpl_set_tcb_field *req; 4274 struct ddp_state *p = &toep->tp_ddp_state; 4275 4276 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)", 4277 bufidx, tag0, tag1, len); 4278#if 0 4279 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4280#endif 4281 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq); 4282 m = m_gethdr_nofail(wrlen); 4283 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4284 wr = mtod(m, struct work_request_hdr *); 4285 m->m_pkthdr.len = m->m_len = wrlen; 4286 bzero(wr, wrlen); 4287 4288 4289 /* Set the ATOMIC flag to make sure that TP processes the following 4290 * CPLs in an atomic manner and no wire segments can be interleaved. 4291 */ 4292 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); 4293 req = (struct cpl_set_tcb_field *)(wr + 1); 4294 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG, 4295 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) | 4296 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32, 4297 V_TCB_RX_DDP_BUF0_TAG(tag0) | 4298 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32); 4299 req++; 4300 if (bufidx == 0) { 4301 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN, 4302 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4303 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 4304 req++; 4305 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4306 V_TF_DDP_PUSH_DISABLE_0(1) | 4307 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4308 V_TF_DDP_PUSH_DISABLE_0(0) | 4309 V_TF_DDP_BUF0_VALID(1)); 4310 } else { 4311 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN, 4312 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN), 4313 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len)); 4314 req++; 4315 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4316 V_TF_DDP_PUSH_DISABLE_1(1) | 4317 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4318 V_TF_DDP_PUSH_DISABLE_1(0) | 4319 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1)); 4320 } 4321 4322 getreq = (struct cpl_get_tcb *)(req + 1); 4323 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4324 4325 /* Keep track of the number of oustanding CPL_GET_TCB requests 4326 */ 4327 p->get_tcb_count++; 4328 4329#ifdef T3_TRACE 4330 T3_TRACE4(TIDTB(sk), 4331 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u " 4332 "len %d", 4333 bufidx, tag0, tag1, len); 4334#endif 4335 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4336} 4337 4338/* 4339 * Sends a compound WR containing all the CPL messages needed to program the 4340 * two HW DDP buffers, namely optionally setting up the length and offset of 4341 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK. 4342 */ 4343void 4344t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0, 4345 unsigned int len1, unsigned int offset1, 4346 uint64_t ddp_flags, uint64_t flag_mask, int modulate) 4347{ 4348 unsigned int wrlen; 4349 struct mbuf *m; 4350 struct work_request_hdr *wr; 4351 struct cpl_set_tcb_field *req; 4352 4353 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ", 4354 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff); 4355 4356#if 0 4357 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4358#endif 4359 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + 4360 (len1 ? sizeof(*req) : 0) + 4361 (modulate ? sizeof(struct cpl_rx_data_ack) : 0); 4362 m = m_gethdr_nofail(wrlen); 4363 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4364 wr = mtod(m, struct work_request_hdr *); 4365 bzero(wr, wrlen); 4366 4367 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4368 m->m_pkthdr.len = m->m_len = wrlen; 4369 4370 req = (struct cpl_set_tcb_field *)(wr + 1); 4371 if (len0) { /* program buffer 0 offset and length */ 4372 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET, 4373 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 4374 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4375 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) | 4376 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0)); 4377 req++; 4378 } 4379 if (len1) { /* program buffer 1 offset and length */ 4380 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET, 4381 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 4382 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32, 4383 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) | 4384 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32); 4385 req++; 4386 } 4387 4388 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask, 4389 ddp_flags); 4390 4391 if (modulate) { 4392 mk_rx_data_ack_ulp(toep, 4393 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid, 4394 toep->tp_copied_seq - toep->tp_rcv_wup); 4395 toep->tp_rcv_wup = toep->tp_copied_seq; 4396 } 4397 4398#ifdef T3_TRACE 4399 T3_TRACE5(TIDTB(sk), 4400 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x " 4401 "modulate %d", 4402 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff, 4403 modulate); 4404#endif 4405 4406 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4407} 4408 4409void 4410t3_init_wr_tab(unsigned int wr_len) 4411{ 4412 int i; 4413 4414 if (mbuf_wrs[1]) /* already initialized */ 4415 return; 4416 4417 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) { 4418 int sgl_len = (3 * i) / 2 + (i & 1); 4419 4420 sgl_len += 3; 4421 mbuf_wrs[i] = sgl_len <= wr_len ? 4422 1 : 1 + (sgl_len - 2) / (wr_len - 1); 4423 } 4424 4425 wrlen = wr_len * 8; 4426} 4427 4428int 4429t3_init_cpl_io(void) 4430{ 4431#ifdef notyet 4432 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL); 4433 if (!tcphdr_skb) { 4434 log(LOG_ERR, 4435 "Chelsio TCP offload: can't allocate sk_buff\n"); 4436 return -1; 4437 } 4438 skb_put(tcphdr_skb, sizeof(struct tcphdr)); 4439 tcphdr_skb->h.raw = tcphdr_skb->data; 4440 memset(tcphdr_skb->data, 0, tcphdr_skb->len); 4441#endif 4442 4443 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); 4444 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); 4445 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack); 4446 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data); 4447 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 4448 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 4449 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish); 4450 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 4451 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 4452 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 4453 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp); 4454 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); 4455 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify); 4456 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt); 4457 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); 4458 return (0); 4459} 4460
|