cxgb_cpl_io.c revision 196039
1/************************************************************************** 2 3Copyright (c) 2007-2008, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c 196039 2009-08-02 19:43:32Z rwatson $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/fcntl.h> 36#include <sys/kernel.h> 37#include <sys/limits.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/mbuf.h> 41#include <sys/mutex.h> 42#include <sys/sockstate.h> 43#include <sys/sockopt.h> 44#include <sys/socket.h> 45#include <sys/sockbuf.h> 46#include <sys/sysctl.h> 47#include <sys/syslog.h> 48#include <sys/protosw.h> 49#include <sys/priv.h> 50 51#if __FreeBSD_version < 800044 52#define V_tcp_do_autosndbuf tcp_do_autosndbuf 53#define V_tcp_autosndbuf_max tcp_autosndbuf_max 54#define V_tcp_do_rfc1323 tcp_do_rfc1323 55#define V_tcp_do_autorcvbuf tcp_do_autorcvbuf 56#define V_tcp_autorcvbuf_max tcp_autorcvbuf_max 57#define V_tcpstat tcpstat 58#endif 59 60#include <net/if.h> 61#include <net/route.h> 62 63#include <netinet/in.h> 64#include <netinet/in_pcb.h> 65#include <netinet/in_systm.h> 66#include <netinet/in_var.h> 67 68 69#include <cxgb_osdep.h> 70#include <sys/mbufq.h> 71 72#include <netinet/ip.h> 73#include <netinet/tcp_var.h> 74#include <netinet/tcp_fsm.h> 75#include <netinet/tcp_offload.h> 76#include <netinet/tcp_seq.h> 77#include <netinet/tcp_syncache.h> 78#include <netinet/tcp_timer.h> 79#include <net/route.h> 80 81#include <t3cdev.h> 82#include <common/cxgb_firmware_exports.h> 83#include <common/cxgb_t3_cpl.h> 84#include <common/cxgb_tcb.h> 85#include <common/cxgb_ctl_defs.h> 86#include <cxgb_offload.h> 87#include <vm/vm.h> 88#include <vm/pmap.h> 89#include <machine/bus.h> 90#include <sys/mvec.h> 91#include <ulp/toecore/cxgb_toedev.h> 92#include <ulp/tom/cxgb_l2t.h> 93#include <ulp/tom/cxgb_defs.h> 94#include <ulp/tom/cxgb_tom.h> 95#include <ulp/tom/cxgb_t3_ddp.h> 96#include <ulp/tom/cxgb_toepcb.h> 97#include <ulp/tom/cxgb_tcp.h> 98#include <ulp/tom/cxgb_tcp_offload.h> 99 100/* 101 * For ULP connections HW may add headers, e.g., for digests, that aren't part 102 * of the messages sent by the host but that are part of the TCP payload and 103 * therefore consume TCP sequence space. Tx connection parameters that 104 * operate in TCP sequence space are affected by the HW additions and need to 105 * compensate for them to accurately track TCP sequence numbers. This array 106 * contains the compensating extra lengths for ULP packets. It is indexed by 107 * a packet's ULP submode. 108 */ 109const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8}; 110 111#ifdef notyet 112/* 113 * This sk_buff holds a fake header-only TCP segment that we use whenever we 114 * need to exploit SW TCP functionality that expects TCP headers, such as 115 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple 116 * CPUs without locking. 117 */ 118static struct mbuf *tcphdr_mbuf __read_mostly; 119#endif 120 121/* 122 * Size of WRs in bytes. Note that we assume all devices we are handling have 123 * the same WR size. 124 */ 125static unsigned int wrlen __read_mostly; 126 127/* 128 * The number of WRs needed for an skb depends on the number of page fragments 129 * in the skb and whether it has any payload in its main body. This maps the 130 * length of the gather list represented by an skb into the # of necessary WRs. 131 */ 132static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly; 133 134/* 135 * Max receive window supported by HW in bytes. Only a small part of it can 136 * be set through option0, the rest needs to be set through RX_DATA_ACK. 137 */ 138#define MAX_RCV_WND ((1U << 27) - 1) 139 140/* 141 * Min receive window. We want it to be large enough to accommodate receive 142 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. 143 */ 144#define MIN_RCV_WND (24 * 1024U) 145#define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS) 146 147#define VALIDATE_SEQ 0 148#define VALIDATE_SOCK(so) 149#define DEBUG_WR 0 150 151#define TCP_TIMEWAIT 1 152#define TCP_CLOSE 2 153#define TCP_DROP 3 154 155static void t3_send_reset(struct toepcb *toep); 156static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status); 157static inline void free_atid(struct t3cdev *cdev, unsigned int tid); 158static void handle_syncache_event(int event, void *arg); 159 160static inline void 161SBAPPEND(struct sockbuf *sb, struct mbuf *n) 162{ 163 struct mbuf *m; 164 165 m = sb->sb_mb; 166 while (m) { 167 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 168 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 169 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 170 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 171 m->m_next, m->m_nextpkt, m->m_flags)); 172 m = m->m_next; 173 } 174 m = n; 175 while (m) { 176 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 177 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 178 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 179 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 180 m->m_next, m->m_nextpkt, m->m_flags)); 181 m = m->m_next; 182 } 183 KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set")); 184 sbappendstream_locked(sb, n); 185 m = sb->sb_mb; 186 187 while (m) { 188 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 189 m->m_next, m->m_nextpkt, m->m_flags)); 190 m = m->m_next; 191 } 192} 193 194static inline int 195is_t3a(const struct toedev *dev) 196{ 197 return (dev->tod_ttid == TOE_ID_CHELSIO_T3); 198} 199 200static void 201dump_toepcb(struct toepcb *toep) 202{ 203 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n", 204 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode, 205 toep->tp_mtu_idx, toep->tp_tid); 206 207 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n", 208 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, 209 toep->tp_mss_clamp, toep->tp_flags); 210} 211 212#ifndef RTALLOC2_DEFINED 213static struct rtentry * 214rtalloc2(struct sockaddr *dst, int report, u_long ignflags) 215{ 216 struct rtentry *rt = NULL; 217 218 if ((rt = rtalloc1(dst, report, ignflags)) != NULL) 219 RT_UNLOCK(rt); 220 221 return (rt); 222} 223#endif 224 225/* 226 * Determine whether to send a CPL message now or defer it. A message is 227 * deferred if the connection is in SYN_SENT since we don't know the TID yet. 228 * For connections in other states the message is sent immediately. 229 * If through_l2t is set the message is subject to ARP processing, otherwise 230 * it is sent directly. 231 */ 232static inline void 233send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t) 234{ 235 struct tcpcb *tp = toep->tp_tp; 236 237 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) { 238 inp_wlock(tp->t_inpcb); 239 mbufq_tail(&toep->out_of_order_queue, m); // defer 240 inp_wunlock(tp->t_inpcb); 241 } else if (through_l2t) 242 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T 243 else 244 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly 245} 246 247static inline unsigned int 248mkprio(unsigned int cntrl, const struct toepcb *toep) 249{ 250 return (cntrl); 251} 252 253/* 254 * Populate a TID_RELEASE WR. The skb must be already propely sized. 255 */ 256static inline void 257mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid) 258{ 259 struct cpl_tid_release *req; 260 261 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep)); 262 m->m_pkthdr.len = m->m_len = sizeof(*req); 263 req = mtod(m, struct cpl_tid_release *); 264 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 265 req->wr.wr_lo = 0; 266 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); 267} 268 269static inline void 270make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) 271{ 272 struct tcpcb *tp = so_sototcpcb(so); 273 struct toepcb *toep = tp->t_toe; 274 struct tx_data_wr *req; 275 struct sockbuf *snd; 276 277 inp_lock_assert(tp->t_inpcb); 278 snd = so_sockbuf_snd(so); 279 280 req = mtod(m, struct tx_data_wr *); 281 m->m_len = sizeof(*req); 282 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 283 req->wr_lo = htonl(V_WR_TID(toep->tp_tid)); 284 /* len includes the length of any HW ULP additions */ 285 req->len = htonl(len); 286 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx)); 287 /* V_TX_ULP_SUBMODE sets both the mode and submode */ 288 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) | 289 V_TX_URG(/* skb_urgent(skb) */ 0 ) | 290 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) && 291 (tail ? 0 : 1)))); 292 req->sndseq = htonl(tp->snd_nxt); 293 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) { 294 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | 295 V_TX_CPU_IDX(toep->tp_qset)); 296 297 /* Sendbuffer is in units of 32KB. 298 */ 299 if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE) 300 req->param |= htonl(V_TX_SNDBUF(V_tcp_autosndbuf_max >> 15)); 301 else { 302 req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15)); 303 } 304 305 toep->tp_flags |= TP_DATASENT; 306 } 307} 308 309#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */ 310 311int 312t3_push_frames(struct socket *so, int req_completion) 313{ 314 struct tcpcb *tp = so_sototcpcb(so); 315 struct toepcb *toep = tp->t_toe; 316 317 struct mbuf *tail, *m0, *last; 318 struct t3cdev *cdev; 319 struct tom_data *d; 320 int state, bytes, count, total_bytes; 321 bus_dma_segment_t segs[TX_MAX_SEGS], *segp; 322 struct sockbuf *snd; 323 324 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) { 325 DPRINTF("tcp state=%d\n", tp->t_state); 326 return (0); 327 } 328 329 state = so_state_get(so); 330 331 if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { 332 DPRINTF("disconnecting\n"); 333 334 return (0); 335 } 336 337 inp_lock_assert(tp->t_inpcb); 338 339 snd = so_sockbuf_snd(so); 340 sockbuf_lock(snd); 341 342 d = TOM_DATA(toep->tp_toedev); 343 cdev = d->cdev; 344 345 last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb; 346 347 total_bytes = 0; 348 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n", 349 toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last); 350 351 if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) { 352 KASSERT(tail, ("sbdrop error")); 353 last = tail = tail->m_next; 354 } 355 356 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) { 357 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail); 358 sockbuf_unlock(snd); 359 360 return (0); 361 } 362 363 toep->tp_m_last = NULL; 364 while (toep->tp_wr_avail && (tail != NULL)) { 365 count = bytes = 0; 366 segp = segs; 367 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) { 368 sockbuf_unlock(snd); 369 return (0); 370 } 371 /* 372 * If the data in tail fits as in-line, then 373 * make an immediate data wr. 374 */ 375 if (tail->m_len <= IMM_LEN) { 376 count = 1; 377 bytes = tail->m_len; 378 last = tail; 379 tail = tail->m_next; 380 m_set_sgl(m0, NULL); 381 m_set_sgllen(m0, 0); 382 make_tx_data_wr(so, m0, bytes, tail); 383 m_append(m0, bytes, mtod(last, caddr_t)); 384 KASSERT(!m0->m_next, ("bad append")); 385 } else { 386 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) 387 && (tail != NULL) && (count < TX_MAX_SEGS-1)) { 388 bytes += tail->m_len; 389 last = tail; 390 count++; 391 /* 392 * technically an abuse to be using this for a VA 393 * but less gross than defining my own structure 394 * or calling pmap_kextract from here :-| 395 */ 396 segp->ds_addr = (bus_addr_t)tail->m_data; 397 segp->ds_len = tail->m_len; 398 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n", 399 count, mbuf_wrs[count], tail->m_data, tail->m_len); 400 segp++; 401 tail = tail->m_next; 402 } 403 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n", 404 toep->tp_wr_avail, count, mbuf_wrs[count], tail); 405 406 m_set_sgl(m0, segs); 407 m_set_sgllen(m0, count); 408 make_tx_data_wr(so, m0, bytes, tail); 409 } 410 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep)); 411 412 if (tail) { 413 snd->sb_sndptr = tail; 414 toep->tp_m_last = NULL; 415 } else 416 toep->tp_m_last = snd->sb_sndptr = last; 417 418 419 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last); 420 421 snd->sb_sndptroff += bytes; 422 total_bytes += bytes; 423 toep->tp_write_seq += bytes; 424 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d" 425 " tail=%p sndptr=%p sndptroff=%d", 426 toep->tp_wr_avail, count, mbuf_wrs[count], 427 tail, snd->sb_sndptr, snd->sb_sndptroff); 428 if (tail) 429 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d" 430 " tp_m_last=%p tailbuf=%p snd_una=0x%08x", 431 total_bytes, toep->tp_m_last, tail->m_data, 432 tp->snd_una); 433 else 434 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d" 435 " tp_m_last=%p snd_una=0x%08x", 436 total_bytes, toep->tp_m_last, tp->snd_una); 437 438 439#ifdef KTR 440{ 441 int i; 442 443 i = 0; 444 while (i < count && m_get_sgllen(m0)) { 445 if ((count - i) >= 3) { 446 CTR6(KTR_TOM, 447 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx" 448 " len=%d pa=0x%zx len=%d", 449 segs[i].ds_addr, segs[i].ds_len, 450 segs[i + 1].ds_addr, segs[i + 1].ds_len, 451 segs[i + 2].ds_addr, segs[i + 2].ds_len); 452 i += 3; 453 } else if ((count - i) == 2) { 454 CTR4(KTR_TOM, 455 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx" 456 " len=%d", 457 segs[i].ds_addr, segs[i].ds_len, 458 segs[i + 1].ds_addr, segs[i + 1].ds_len); 459 i += 2; 460 } else { 461 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d", 462 segs[i].ds_addr, segs[i].ds_len); 463 i++; 464 } 465 466 } 467} 468#endif 469 /* 470 * remember credits used 471 */ 472 m0->m_pkthdr.csum_data = mbuf_wrs[count]; 473 m0->m_pkthdr.len = bytes; 474 toep->tp_wr_avail -= mbuf_wrs[count]; 475 toep->tp_wr_unacked += mbuf_wrs[count]; 476 477 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) || 478 toep->tp_wr_unacked >= toep->tp_wr_max / 2) { 479 struct work_request_hdr *wr = cplhdr(m0); 480 481 wr->wr_hi |= htonl(F_WR_COMPL); 482 toep->tp_wr_unacked = 0; 483 } 484 KASSERT((m0->m_pkthdr.csum_data > 0) && 485 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d", 486 m0->m_pkthdr.csum_data)); 487 m0->m_type = MT_DONTFREE; 488 enqueue_wr(toep, m0); 489 DPRINTF("sending offload tx with %d bytes in %d segments\n", 490 bytes, count); 491 l2t_send(cdev, m0, toep->tp_l2t); 492 } 493 sockbuf_unlock(snd); 494 return (total_bytes); 495} 496 497/* 498 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail 499 * under any circumstances. We take the easy way out and always queue the 500 * message to the write_queue. We can optimize the case where the queue is 501 * already empty though the optimization is probably not worth it. 502 */ 503static void 504close_conn(struct socket *so) 505{ 506 struct mbuf *m; 507 struct cpl_close_con_req *req; 508 struct tom_data *d; 509 struct inpcb *inp = so_sotoinpcb(so); 510 struct tcpcb *tp; 511 struct toepcb *toep; 512 unsigned int tid; 513 514 515 inp_wlock(inp); 516 tp = so_sototcpcb(so); 517 toep = tp->t_toe; 518 519 if (tp->t_state != TCPS_SYN_SENT) 520 t3_push_frames(so, 1); 521 522 if (toep->tp_flags & TP_FIN_SENT) { 523 inp_wunlock(inp); 524 return; 525 } 526 527 tid = toep->tp_tid; 528 529 d = TOM_DATA(toep->tp_toedev); 530 531 m = m_gethdr_nofail(sizeof(*req)); 532 m_set_priority(m, CPL_PRIORITY_DATA); 533 m_set_sgl(m, NULL); 534 m_set_sgllen(m, 0); 535 536 toep->tp_flags |= TP_FIN_SENT; 537 req = mtod(m, struct cpl_close_con_req *); 538 539 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); 540 req->wr.wr_lo = htonl(V_WR_TID(tid)); 541 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 542 req->rsvd = 0; 543 inp_wunlock(inp); 544 /* 545 * XXX - need to defer shutdown while there is still data in the queue 546 * 547 */ 548 CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid); 549 cxgb_ofld_send(d->cdev, m); 550 551} 552 553/* 554 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant 555 * and send it along. 556 */ 557static void 558abort_arp_failure(struct t3cdev *cdev, struct mbuf *m) 559{ 560 struct cpl_abort_req *req = cplhdr(m); 561 562 req->cmd = CPL_ABORT_NO_RST; 563 cxgb_ofld_send(cdev, m); 564} 565 566/* 567 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are 568 * permitted to return without sending the message in case we cannot allocate 569 * an sk_buff. Returns the number of credits sent. 570 */ 571uint32_t 572t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail) 573{ 574 struct mbuf *m; 575 struct cpl_rx_data_ack *req; 576 struct toepcb *toep = tp->t_toe; 577 struct toedev *tdev = toep->tp_toedev; 578 579 m = m_gethdr_nofail(sizeof(*req)); 580 581 DPRINTF("returning %u credits to HW\n", credits); 582 583 req = mtod(m, struct cpl_rx_data_ack *); 584 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 585 req->wr.wr_lo = 0; 586 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 587 req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); 588 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep)); 589 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 590 return (credits); 591} 592 593/* 594 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled. 595 * This is only used in DDP mode, so we take the opportunity to also set the 596 * DACK mode and flush any Rx credits. 597 */ 598void 599t3_send_rx_modulate(struct toepcb *toep) 600{ 601 struct mbuf *m; 602 struct cpl_rx_data_ack *req; 603 604 m = m_gethdr_nofail(sizeof(*req)); 605 606 req = mtod(m, struct cpl_rx_data_ack *); 607 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 608 req->wr.wr_lo = 0; 609 m->m_pkthdr.len = m->m_len = sizeof(*req); 610 611 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 612 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 613 V_RX_DACK_MODE(1) | 614 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup)); 615 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 616 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 617 toep->tp_rcv_wup = toep->tp_copied_seq; 618} 619 620/* 621 * Handle receipt of an urgent pointer. 622 */ 623static void 624handle_urg_ptr(struct socket *so, uint32_t urg_seq) 625{ 626#ifdef URGENT_DATA_SUPPORTED 627 struct tcpcb *tp = so_sototcpcb(so); 628 629 urg_seq--; /* initially points past the urgent data, per BSD */ 630 631 if (tp->urg_data && !after(urg_seq, tp->urg_seq)) 632 return; /* duplicate pointer */ 633 sk_send_sigurg(sk); 634 if (tp->urg_seq == tp->copied_seq && tp->urg_data && 635 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) { 636 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 637 638 tp->copied_seq++; 639 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len) 640 tom_eat_skb(sk, skb, 0); 641 } 642 tp->urg_data = TCP_URG_NOTYET; 643 tp->urg_seq = urg_seq; 644#endif 645} 646 647/* 648 * Returns true if a socket cannot accept new Rx data. 649 */ 650static inline int 651so_no_receive(const struct socket *so) 652{ 653 return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING)); 654} 655 656/* 657 * Process an urgent data notification. 658 */ 659static void 660rx_urg_notify(struct toepcb *toep, struct mbuf *m) 661{ 662 struct cpl_rx_urg_notify *hdr = cplhdr(m); 663 struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 664 665 VALIDATE_SOCK(so); 666 667 if (!so_no_receive(so)) 668 handle_urg_ptr(so, ntohl(hdr->seq)); 669 670 m_freem(m); 671} 672 673/* 674 * Handler for RX_URG_NOTIFY CPL messages. 675 */ 676static int 677do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx) 678{ 679 struct toepcb *toep = (struct toepcb *)ctx; 680 681 rx_urg_notify(toep, m); 682 return (0); 683} 684 685static __inline int 686is_delack_mode_valid(struct toedev *dev, struct toepcb *toep) 687{ 688 return (toep->tp_ulp_mode || 689 (toep->tp_ulp_mode == ULP_MODE_TCPDDP && 690 dev->tod_ttid >= TOE_ID_CHELSIO_T3)); 691} 692 693/* 694 * Set of states for which we should return RX credits. 695 */ 696#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2) 697 698/* 699 * Called after some received data has been read. It returns RX credits 700 * to the HW for the amount of data processed. 701 */ 702void 703t3_cleanup_rbuf(struct tcpcb *tp, int copied) 704{ 705 struct toepcb *toep = tp->t_toe; 706 struct socket *so; 707 struct toedev *dev; 708 int dack_mode, must_send, read; 709 u32 thres, credits, dack = 0; 710 struct sockbuf *rcv; 711 712 so = inp_inpcbtosocket(tp->t_inpcb); 713 rcv = so_sockbuf_rcv(so); 714 715 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || 716 (tp->t_state == TCPS_FIN_WAIT_2))) { 717 if (copied) { 718 sockbuf_lock(rcv); 719 toep->tp_copied_seq += copied; 720 sockbuf_unlock(rcv); 721 } 722 723 return; 724 } 725 726 inp_lock_assert(tp->t_inpcb); 727 728 sockbuf_lock(rcv); 729 if (copied) 730 toep->tp_copied_seq += copied; 731 else { 732 read = toep->tp_enqueued_bytes - rcv->sb_cc; 733 toep->tp_copied_seq += read; 734 } 735 credits = toep->tp_copied_seq - toep->tp_rcv_wup; 736 toep->tp_enqueued_bytes = rcv->sb_cc; 737 sockbuf_unlock(rcv); 738 739 if (credits > rcv->sb_mbmax) { 740 log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n", 741 toep->tp_copied_seq, toep->tp_rcv_wup, credits); 742 credits = rcv->sb_mbmax; 743 } 744 745 746 /* 747 * XXX this won't accurately reflect credit return - we need 748 * to look at the difference between the amount that has been 749 * put in the recv sockbuf and what is there now 750 */ 751 752 if (__predict_false(!credits)) 753 return; 754 755 dev = toep->tp_toedev; 756 thres = TOM_TUNABLE(dev, rx_credit_thres); 757 758 if (__predict_false(thres == 0)) 759 return; 760 761 if (is_delack_mode_valid(dev, toep)) { 762 dack_mode = TOM_TUNABLE(dev, delack); 763 if (__predict_false(dack_mode != toep->tp_delack_mode)) { 764 u32 r = tp->rcv_nxt - toep->tp_delack_seq; 765 766 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp) 767 dack = F_RX_DACK_CHANGE | 768 V_RX_DACK_MODE(dack_mode); 769 } 770 } else 771 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 772 773 /* 774 * For coalescing to work effectively ensure the receive window has 775 * at least 16KB left. 776 */ 777 must_send = credits + 16384 >= tp->rcv_wnd; 778 779 if (must_send || credits >= thres) 780 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send); 781} 782 783static int 784cxgb_toe_disconnect(struct tcpcb *tp) 785{ 786 struct socket *so; 787 788 DPRINTF("cxgb_toe_disconnect\n"); 789 790 so = inp_inpcbtosocket(tp->t_inpcb); 791 close_conn(so); 792 return (0); 793} 794 795static int 796cxgb_toe_reset(struct tcpcb *tp) 797{ 798 struct toepcb *toep = tp->t_toe; 799 800 t3_send_reset(toep); 801 802 /* 803 * unhook from socket 804 */ 805 tp->t_flags &= ~TF_TOE; 806 toep->tp_tp = NULL; 807 tp->t_toe = NULL; 808 return (0); 809} 810 811static int 812cxgb_toe_send(struct tcpcb *tp) 813{ 814 struct socket *so; 815 816 DPRINTF("cxgb_toe_send\n"); 817 dump_toepcb(tp->t_toe); 818 819 so = inp_inpcbtosocket(tp->t_inpcb); 820 t3_push_frames(so, 1); 821 return (0); 822} 823 824static int 825cxgb_toe_rcvd(struct tcpcb *tp) 826{ 827 828 inp_lock_assert(tp->t_inpcb); 829 830 t3_cleanup_rbuf(tp, 0); 831 832 return (0); 833} 834 835static void 836cxgb_toe_detach(struct tcpcb *tp) 837{ 838 struct toepcb *toep; 839 840 /* 841 * XXX how do we handle teardown in the SYN_SENT state? 842 * 843 */ 844 inp_lock_assert(tp->t_inpcb); 845 toep = tp->t_toe; 846 toep->tp_tp = NULL; 847 848 /* 849 * unhook from socket 850 */ 851 tp->t_flags &= ~TF_TOE; 852 tp->t_toe = NULL; 853} 854 855 856static struct toe_usrreqs cxgb_toe_usrreqs = { 857 .tu_disconnect = cxgb_toe_disconnect, 858 .tu_reset = cxgb_toe_reset, 859 .tu_send = cxgb_toe_send, 860 .tu_rcvd = cxgb_toe_rcvd, 861 .tu_detach = cxgb_toe_detach, 862 .tu_detach = cxgb_toe_detach, 863 .tu_syncache_event = handle_syncache_event, 864}; 865 866 867static void 868__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word, 869 uint64_t mask, uint64_t val, int no_reply) 870{ 871 struct cpl_set_tcb_field *req; 872 873 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 874 toep->tp_tid, word, mask, val); 875 876 req = mtod(m, struct cpl_set_tcb_field *); 877 m->m_pkthdr.len = m->m_len = sizeof(*req); 878 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 879 req->wr.wr_lo = 0; 880 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid)); 881 req->reply = V_NO_REPLY(no_reply); 882 req->cpu_idx = 0; 883 req->word = htons(word); 884 req->mask = htobe64(mask); 885 req->val = htobe64(val); 886 887 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 888 send_or_defer(toep, m, 0); 889} 890 891static void 892t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val) 893{ 894 struct mbuf *m; 895 struct tcpcb *tp = toep->tp_tp; 896 897 if (toep == NULL) 898 return; 899 900 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) { 901 printf("not seting field\n"); 902 return; 903 } 904 905 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field)); 906 907 __set_tcb_field(toep, m, word, mask, val, 1); 908} 909 910/* 911 * Set one of the t_flags bits in the TCB. 912 */ 913static void 914set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val) 915{ 916 917 t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); 918} 919 920/* 921 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting. 922 */ 923static void 924t3_set_nagle(struct toepcb *toep) 925{ 926 struct tcpcb *tp = toep->tp_tp; 927 928 set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); 929} 930 931/* 932 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting. 933 */ 934void 935t3_set_keepalive(struct toepcb *toep, int on_off) 936{ 937 938 set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off); 939} 940 941void 942t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off) 943{ 944 set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off); 945} 946 947void 948t3_set_dack_mss(struct toepcb *toep, int on_off) 949{ 950 951 set_tcb_tflag(toep, S_TF_DACK_MSS, on_off); 952} 953 954/* 955 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting. 956 */ 957static void 958t3_set_tos(struct toepcb *toep) 959{ 960 int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb); 961 962 t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), 963 V_TCB_TOS(tos)); 964} 965 966 967/* 968 * In DDP mode, TP fails to schedule a timer to push RX data to the host when 969 * DDP is disabled (data is delivered to freelist). [Note that, the peer should 970 * set the PSH bit in the last segment, which would trigger delivery.] 971 * We work around the issue by setting a DDP buffer in a partial placed state, 972 * which guarantees that TP will schedule a timer. 973 */ 974#define TP_DDP_TIMER_WORKAROUND_MASK\ 975 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\ 976 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\ 977 V_TCB_RX_DDP_BUF0_LEN(3)) << 32)) 978#define TP_DDP_TIMER_WORKAROUND_VAL\ 979 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\ 980 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\ 981 32)) 982 983static void 984t3_enable_ddp(struct toepcb *toep, int on) 985{ 986 if (on) { 987 988 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), 989 V_TF_DDP_OFF(0)); 990 } else 991 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, 992 V_TF_DDP_OFF(1) | 993 TP_DDP_TIMER_WORKAROUND_MASK, 994 V_TF_DDP_OFF(1) | 995 TP_DDP_TIMER_WORKAROUND_VAL); 996 997} 998 999void 1000t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color) 1001{ 1002 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx, 1003 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), 1004 tag_color); 1005} 1006 1007void 1008t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset, 1009 unsigned int len) 1010{ 1011 if (buf_idx == 0) 1012 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET, 1013 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 1014 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 1015 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) | 1016 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 1017 else 1018 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET, 1019 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 1020 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32), 1021 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) | 1022 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32)); 1023} 1024 1025static int 1026t3_set_cong_control(struct socket *so, const char *name) 1027{ 1028#ifdef CONGESTION_CONTROL_SUPPORTED 1029 int cong_algo; 1030 1031 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++) 1032 if (!strcmp(name, t3_cong_ops[cong_algo].name)) 1033 break; 1034 1035 if (cong_algo >= ARRAY_SIZE(t3_cong_ops)) 1036 return -EINVAL; 1037#endif 1038 return 0; 1039} 1040 1041int 1042t3_get_tcb(struct toepcb *toep) 1043{ 1044 struct cpl_get_tcb *req; 1045 struct tcpcb *tp = toep->tp_tp; 1046 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 1047 1048 if (!m) 1049 return (ENOMEM); 1050 1051 inp_lock_assert(tp->t_inpcb); 1052 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 1053 req = mtod(m, struct cpl_get_tcb *); 1054 m->m_pkthdr.len = m->m_len = sizeof(*req); 1055 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1056 req->wr.wr_lo = 0; 1057 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid)); 1058 req->cpuno = htons(toep->tp_qset); 1059 req->rsvd = 0; 1060 if (tp->t_state == TCPS_SYN_SENT) 1061 mbufq_tail(&toep->out_of_order_queue, m); // defer 1062 else 1063 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 1064 return 0; 1065} 1066 1067static inline void 1068so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid) 1069{ 1070 1071 toepcb_hold(toep); 1072 1073 cxgb_insert_tid(d->cdev, d->client, toep, tid); 1074} 1075 1076/** 1077 * find_best_mtu - find the entry in the MTU table closest to an MTU 1078 * @d: TOM state 1079 * @mtu: the target MTU 1080 * 1081 * Returns the index of the value in the MTU table that is closest to but 1082 * does not exceed the target MTU. 1083 */ 1084static unsigned int 1085find_best_mtu(const struct t3c_data *d, unsigned short mtu) 1086{ 1087 int i = 0; 1088 1089 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) 1090 ++i; 1091 return (i); 1092} 1093 1094static unsigned int 1095select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu) 1096{ 1097 unsigned int idx; 1098 1099#ifdef notyet 1100 struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt; 1101#endif 1102 if (tp) { 1103 tp->t_maxseg = pmtu - 40; 1104 if (tp->t_maxseg < td->mtus[0] - 40) 1105 tp->t_maxseg = td->mtus[0] - 40; 1106 idx = find_best_mtu(td, tp->t_maxseg + 40); 1107 1108 tp->t_maxseg = td->mtus[idx] - 40; 1109 } else 1110 idx = find_best_mtu(td, pmtu); 1111 1112 return (idx); 1113} 1114 1115static inline void 1116free_atid(struct t3cdev *cdev, unsigned int tid) 1117{ 1118 struct toepcb *toep = cxgb_free_atid(cdev, tid); 1119 1120 if (toep) 1121 toepcb_release(toep); 1122} 1123 1124/* 1125 * Release resources held by an offload connection (TID, L2T entry, etc.) 1126 */ 1127static void 1128t3_release_offload_resources(struct toepcb *toep) 1129{ 1130 struct tcpcb *tp = toep->tp_tp; 1131 struct toedev *tdev = toep->tp_toedev; 1132 struct t3cdev *cdev; 1133 struct socket *so; 1134 unsigned int tid = toep->tp_tid; 1135 struct sockbuf *rcv; 1136 1137 CTR0(KTR_TOM, "t3_release_offload_resources"); 1138 1139 if (!tdev) 1140 return; 1141 1142 cdev = TOEP_T3C_DEV(toep); 1143 if (!cdev) 1144 return; 1145 1146 toep->tp_qset = 0; 1147 t3_release_ddp_resources(toep); 1148 1149#ifdef CTRL_SKB_CACHE 1150 kfree_skb(CTRL_SKB_CACHE(tp)); 1151 CTRL_SKB_CACHE(tp) = NULL; 1152#endif 1153 1154 if (toep->tp_wr_avail != toep->tp_wr_max) { 1155 purge_wr_queue(toep); 1156 reset_wr_list(toep); 1157 } 1158 1159 if (toep->tp_l2t) { 1160 l2t_release(L2DATA(cdev), toep->tp_l2t); 1161 toep->tp_l2t = NULL; 1162 } 1163 toep->tp_tp = NULL; 1164 if (tp) { 1165 inp_lock_assert(tp->t_inpcb); 1166 so = inp_inpcbtosocket(tp->t_inpcb); 1167 rcv = so_sockbuf_rcv(so); 1168 /* 1169 * cancel any offloaded reads 1170 * 1171 */ 1172 sockbuf_lock(rcv); 1173 tp->t_toe = NULL; 1174 tp->t_flags &= ~TF_TOE; 1175 if (toep->tp_ddp_state.user_ddp_pending) { 1176 t3_cancel_ubuf(toep, rcv); 1177 toep->tp_ddp_state.user_ddp_pending = 0; 1178 } 1179 so_sorwakeup_locked(so); 1180 1181 } 1182 1183 if (toep->tp_state == TCPS_SYN_SENT) { 1184 free_atid(cdev, tid); 1185#ifdef notyet 1186 __skb_queue_purge(&tp->out_of_order_queue); 1187#endif 1188 } else { // we have TID 1189 cxgb_remove_tid(cdev, toep, tid); 1190 toepcb_release(toep); 1191 } 1192#if 0 1193 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state); 1194#endif 1195} 1196 1197static void 1198install_offload_ops(struct socket *so) 1199{ 1200 struct tcpcb *tp = so_sototcpcb(so); 1201 1202 KASSERT(tp->t_toe != NULL, ("toepcb not set")); 1203 1204 t3_install_socket_ops(so); 1205 tp->t_flags |= TF_TOE; 1206 tp->t_tu = &cxgb_toe_usrreqs; 1207} 1208 1209/* 1210 * Determine the receive window scaling factor given a target max 1211 * receive window. 1212 */ 1213static __inline int 1214select_rcv_wscale(int space, struct vnet *vnet) 1215{ 1216 int wscale = 0; 1217 1218 if (space > MAX_RCV_WND) 1219 space = MAX_RCV_WND; 1220 1221 if (V_tcp_do_rfc1323) 1222 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ; 1223 1224 return (wscale); 1225} 1226 1227/* 1228 * Determine the receive window size for a socket. 1229 */ 1230static unsigned long 1231select_rcv_wnd(struct toedev *dev, struct socket *so) 1232{ 1233 struct tom_data *d = TOM_DATA(dev); 1234 unsigned int wnd; 1235 unsigned int max_rcv_wnd; 1236 struct sockbuf *rcv; 1237 1238 rcv = so_sockbuf_rcv(so); 1239 1240 if (V_tcp_do_autorcvbuf) 1241 wnd = V_tcp_autorcvbuf_max; 1242 else 1243 wnd = rcv->sb_hiwat; 1244 1245 1246 1247 /* XXX 1248 * For receive coalescing to work effectively we need a receive window 1249 * that can accomodate a coalesced segment. 1250 */ 1251 if (wnd < MIN_RCV_WND) 1252 wnd = MIN_RCV_WND; 1253 1254 /* PR 5138 */ 1255 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ? 1256 (uint32_t)d->rx_page_size * 23 : 1257 MAX_RCV_WND); 1258 1259 return min(wnd, max_rcv_wnd); 1260} 1261 1262/* 1263 * Assign offload parameters to some socket fields. This code is used by 1264 * both active and passive opens. 1265 */ 1266static inline void 1267init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, 1268 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep) 1269{ 1270 struct tcpcb *tp = so_sototcpcb(so); 1271 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev); 1272 struct sockbuf *snd, *rcv; 1273 1274#ifdef notyet 1275 SOCK_LOCK_ASSERT(so); 1276#endif 1277 1278 snd = so_sockbuf_snd(so); 1279 rcv = so_sockbuf_rcv(so); 1280 1281 log(LOG_INFO, "initializing offload socket\n"); 1282 /* 1283 * We either need to fix push frames to work with sbcompress 1284 * or we need to add this 1285 */ 1286 snd->sb_flags |= SB_NOCOALESCE; 1287 rcv->sb_flags |= SB_NOCOALESCE; 1288 1289 tp->t_toe = toep; 1290 toep->tp_tp = tp; 1291 toep->tp_toedev = dev; 1292 1293 toep->tp_tid = tid; 1294 toep->tp_l2t = e; 1295 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs); 1296 toep->tp_wr_unacked = 0; 1297 toep->tp_delack_mode = 0; 1298 1299 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu); 1300 /* 1301 * XXX broken 1302 * 1303 */ 1304 tp->rcv_wnd = select_rcv_wnd(dev, so); 1305 1306 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) && 1307 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 1308 toep->tp_qset_idx = 0; 1309 1310 reset_wr_list(toep); 1311 DPRINTF("initialization done\n"); 1312} 1313 1314/* 1315 * The next two functions calculate the option 0 value for a socket. 1316 */ 1317static inline unsigned int 1318calc_opt0h(struct socket *so, int mtu_idx) 1319{ 1320 struct tcpcb *tp = so_sototcpcb(so); 1321 int wscale = select_rcv_wscale(tp->rcv_wnd, so->so_vnet); 1322 1323 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) | 1324 V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS | 1325 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx); 1326} 1327 1328static inline unsigned int 1329calc_opt0l(struct socket *so, int ulp_mode) 1330{ 1331 struct tcpcb *tp = so_sototcpcb(so); 1332 unsigned int val; 1333 1334 val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) | 1335 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ)); 1336 1337 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val); 1338 return (val); 1339} 1340 1341static inline unsigned int 1342calc_opt2(const struct socket *so, struct toedev *dev) 1343{ 1344 int flv_valid; 1345 1346 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1); 1347 1348 return (V_FLAVORS_VALID(flv_valid) | 1349 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0)); 1350} 1351 1352#if DEBUG_WR > 1 1353static int 1354count_pending_wrs(const struct toepcb *toep) 1355{ 1356 const struct mbuf *m; 1357 int n = 0; 1358 1359 wr_queue_walk(toep, m) 1360 n += m->m_pkthdr.csum_data; 1361 return (n); 1362} 1363#endif 1364 1365#if 0 1366(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1) 1367#endif 1368 1369static void 1370mk_act_open_req(struct socket *so, struct mbuf *m, 1371 unsigned int atid, const struct l2t_entry *e) 1372{ 1373 struct cpl_act_open_req *req; 1374 struct inpcb *inp = so_sotoinpcb(so); 1375 struct tcpcb *tp = inp_inpcbtotcpcb(inp); 1376 struct toepcb *toep = tp->t_toe; 1377 struct toedev *tdev = toep->tp_toedev; 1378 1379 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep)); 1380 1381 req = mtod(m, struct cpl_act_open_req *); 1382 m->m_pkthdr.len = m->m_len = sizeof(*req); 1383 1384 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1385 req->wr.wr_lo = 0; 1386 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 1387 inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port); 1388#if 0 1389 req->local_port = inp->inp_lport; 1390 req->peer_port = inp->inp_fport; 1391 memcpy(&req->local_ip, &inp->inp_laddr, 4); 1392 memcpy(&req->peer_ip, &inp->inp_faddr, 4); 1393#endif 1394 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) | 1395 V_TX_CHANNEL(e->smt_idx)); 1396 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode)); 1397 req->params = 0; 1398 req->opt2 = htonl(calc_opt2(so, tdev)); 1399} 1400 1401 1402/* 1403 * Convert an ACT_OPEN_RPL status to an errno. 1404 */ 1405static int 1406act_open_rpl_status_to_errno(int status) 1407{ 1408 switch (status) { 1409 case CPL_ERR_CONN_RESET: 1410 return (ECONNREFUSED); 1411 case CPL_ERR_ARP_MISS: 1412 return (EHOSTUNREACH); 1413 case CPL_ERR_CONN_TIMEDOUT: 1414 return (ETIMEDOUT); 1415 case CPL_ERR_TCAM_FULL: 1416 return (ENOMEM); 1417 case CPL_ERR_CONN_EXIST: 1418 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); 1419 return (EADDRINUSE); 1420 default: 1421 return (EIO); 1422 } 1423} 1424 1425static void 1426fail_act_open(struct toepcb *toep, int errno) 1427{ 1428 struct tcpcb *tp = toep->tp_tp; 1429 1430 t3_release_offload_resources(toep); 1431 if (tp) { 1432 inp_wunlock(tp->t_inpcb); 1433 tcp_offload_drop(tp, errno); 1434 } 1435 1436#ifdef notyet 1437 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1438#endif 1439} 1440 1441/* 1442 * Handle active open failures. 1443 */ 1444static void 1445active_open_failed(struct toepcb *toep, struct mbuf *m) 1446{ 1447 struct cpl_act_open_rpl *rpl = cplhdr(m); 1448 struct inpcb *inp; 1449 1450 if (toep->tp_tp == NULL) 1451 goto done; 1452 1453 inp = toep->tp_tp->t_inpcb; 1454 1455/* 1456 * Don't handle connection retry for now 1457 */ 1458#ifdef notyet 1459 struct inet_connection_sock *icsk = inet_csk(sk); 1460 1461 if (rpl->status == CPL_ERR_CONN_EXIST && 1462 icsk->icsk_retransmit_timer.function != act_open_retry_timer) { 1463 icsk->icsk_retransmit_timer.function = act_open_retry_timer; 1464 sk_reset_timer(so, &icsk->icsk_retransmit_timer, 1465 jiffies + HZ / 2); 1466 } else 1467#endif 1468 { 1469 inp_wlock(inp); 1470 /* 1471 * drops the inpcb lock 1472 */ 1473 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status)); 1474 } 1475 1476 done: 1477 m_free(m); 1478} 1479 1480/* 1481 * Return whether a failed active open has allocated a TID 1482 */ 1483static inline int 1484act_open_has_tid(int status) 1485{ 1486 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && 1487 status != CPL_ERR_ARP_MISS; 1488} 1489 1490/* 1491 * Process an ACT_OPEN_RPL CPL message. 1492 */ 1493static int 1494do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1495{ 1496 struct toepcb *toep = (struct toepcb *)ctx; 1497 struct cpl_act_open_rpl *rpl = cplhdr(m); 1498 1499 if (cdev->type != T3A && act_open_has_tid(rpl->status)) 1500 cxgb_queue_tid_release(cdev, GET_TID(rpl)); 1501 1502 active_open_failed(toep, m); 1503 return (0); 1504} 1505 1506/* 1507 * Handle an ARP failure for an active open. XXX purge ofo queue 1508 * 1509 * XXX badly broken for crossed SYNs as the ATID is no longer valid. 1510 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should 1511 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't 1512 * free the atid. Hmm. 1513 */ 1514#ifdef notyet 1515static void 1516act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) 1517{ 1518 struct toepcb *toep = m_get_toep(m); 1519 struct tcpcb *tp = toep->tp_tp; 1520 struct inpcb *inp = tp->t_inpcb; 1521 struct socket *so; 1522 1523 inp_wlock(inp); 1524 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 1525 /* 1526 * drops the inpcb lock 1527 */ 1528 fail_act_open(so, EHOSTUNREACH); 1529 printf("freeing %p\n", m); 1530 1531 m_free(m); 1532 } else 1533 inp_wunlock(inp); 1534} 1535#endif 1536/* 1537 * Send an active open request. 1538 */ 1539int 1540t3_connect(struct toedev *tdev, struct socket *so, 1541 struct rtentry *rt, struct sockaddr *nam) 1542{ 1543 struct mbuf *m; 1544 struct l2t_entry *e; 1545 struct tom_data *d = TOM_DATA(tdev); 1546 struct inpcb *inp = so_sotoinpcb(so); 1547 struct tcpcb *tp = intotcpcb(inp); 1548 struct toepcb *toep; /* allocated by init_offload_socket */ 1549 1550 int atid; 1551 1552 toep = toepcb_alloc(); 1553 if (toep == NULL) 1554 goto out_err; 1555 1556 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0) 1557 goto out_err; 1558 1559 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam); 1560 if (!e) 1561 goto free_tid; 1562 1563 inp_lock_assert(inp); 1564 m = m_gethdr(MT_DATA, M_WAITOK); 1565 1566#if 0 1567 m->m_toe.mt_toepcb = tp->t_toe; 1568 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure); 1569#endif 1570 so_lock(so); 1571 1572 init_offload_socket(so, tdev, atid, e, rt, toep); 1573 1574 install_offload_ops(so); 1575 1576 mk_act_open_req(so, m, atid, e); 1577 so_unlock(so); 1578 1579 soisconnecting(so); 1580 toep = tp->t_toe; 1581 m_set_toep(m, tp->t_toe); 1582 1583 toep->tp_state = TCPS_SYN_SENT; 1584 l2t_send(d->cdev, (struct mbuf *)m, e); 1585 1586 if (toep->tp_ulp_mode) 1587 t3_enable_ddp(toep, 0); 1588 return (0); 1589 1590free_tid: 1591 printf("failing connect - free atid\n"); 1592 1593 free_atid(d->cdev, atid); 1594out_err: 1595 printf("return ENOMEM\n"); 1596 return (ENOMEM); 1597} 1598 1599/* 1600 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do 1601 * not send multiple ABORT_REQs for the same connection and also that we do 1602 * not try to send a message after the connection has closed. Returns 1 if 1603 * an ABORT_REQ wasn't generated after all, 0 otherwise. 1604 */ 1605static void 1606t3_send_reset(struct toepcb *toep) 1607{ 1608 1609 struct cpl_abort_req *req; 1610 unsigned int tid = toep->tp_tid; 1611 int mode = CPL_ABORT_SEND_RST; 1612 struct tcpcb *tp = toep->tp_tp; 1613 struct toedev *tdev = toep->tp_toedev; 1614 struct socket *so = NULL; 1615 struct mbuf *m; 1616 struct sockbuf *snd; 1617 1618 if (tp) { 1619 inp_lock_assert(tp->t_inpcb); 1620 so = inp_inpcbtosocket(tp->t_inpcb); 1621 } 1622 1623 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) || 1624 tdev == NULL)) 1625 return; 1626 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN); 1627 1628 snd = so_sockbuf_snd(so); 1629 /* Purge the send queue so we don't send anything after an abort. */ 1630 if (so) 1631 sbflush(snd); 1632 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev)) 1633 mode |= CPL_ABORT_POST_CLOSE_REQ; 1634 1635 m = m_gethdr_nofail(sizeof(*req)); 1636 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep)); 1637 set_arp_failure_handler(m, abort_arp_failure); 1638 1639 req = mtod(m, struct cpl_abort_req *); 1640 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); 1641 req->wr.wr_lo = htonl(V_WR_TID(tid)); 1642 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); 1643 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0; 1644 req->rsvd1 = !(toep->tp_flags & TP_DATASENT); 1645 req->cmd = mode; 1646 if (tp && (tp->t_state == TCPS_SYN_SENT)) 1647 mbufq_tail(&toep->out_of_order_queue, m); // defer 1648 else 1649 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); 1650} 1651 1652static int 1653t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) 1654{ 1655 struct inpcb *inp; 1656 int error, optval; 1657 1658 if (sopt->sopt_name == IP_OPTIONS) 1659 return (ENOPROTOOPT); 1660 1661 if (sopt->sopt_name != IP_TOS) 1662 return (EOPNOTSUPP); 1663 1664 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 1665 1666 if (error) 1667 return (error); 1668 1669 if (optval > IPTOS_PREC_CRITIC_ECP) 1670 return (EINVAL); 1671 1672 inp = so_sotoinpcb(so); 1673 inp_wlock(inp); 1674 inp_ip_tos_set(inp, optval); 1675#if 0 1676 inp->inp_ip_tos = optval; 1677#endif 1678 t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe); 1679 inp_wunlock(inp); 1680 1681 return (0); 1682} 1683 1684static int 1685t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1686{ 1687 int err = 0; 1688 size_t copied; 1689 1690 if (sopt->sopt_name != TCP_CONGESTION && 1691 sopt->sopt_name != TCP_NODELAY) 1692 return (EOPNOTSUPP); 1693 1694 if (sopt->sopt_name == TCP_CONGESTION) { 1695 char name[TCP_CA_NAME_MAX]; 1696 int optlen = sopt->sopt_valsize; 1697 struct tcpcb *tp; 1698 1699 if (sopt->sopt_dir == SOPT_GET) { 1700 KASSERT(0, ("unimplemented")); 1701 return (EOPNOTSUPP); 1702 } 1703 1704 if (optlen < 1) 1705 return (EINVAL); 1706 1707 err = copyinstr(sopt->sopt_val, name, 1708 min(TCP_CA_NAME_MAX - 1, optlen), &copied); 1709 if (err) 1710 return (err); 1711 if (copied < 1) 1712 return (EINVAL); 1713 1714 tp = so_sototcpcb(so); 1715 /* 1716 * XXX I need to revisit this 1717 */ 1718 if ((err = t3_set_cong_control(so, name)) == 0) { 1719#ifdef CONGESTION_CONTROL_SUPPORTED 1720 tp->t_cong_control = strdup(name, M_CXGB); 1721#endif 1722 } else 1723 return (err); 1724 } else { 1725 int optval, oldval; 1726 struct inpcb *inp; 1727 struct tcpcb *tp; 1728 1729 if (sopt->sopt_dir == SOPT_GET) 1730 return (EOPNOTSUPP); 1731 1732 err = sooptcopyin(sopt, &optval, sizeof optval, 1733 sizeof optval); 1734 1735 if (err) 1736 return (err); 1737 1738 inp = so_sotoinpcb(so); 1739 inp_wlock(inp); 1740 tp = inp_inpcbtotcpcb(inp); 1741 1742 oldval = tp->t_flags; 1743 if (optval) 1744 tp->t_flags |= TF_NODELAY; 1745 else 1746 tp->t_flags &= ~TF_NODELAY; 1747 inp_wunlock(inp); 1748 1749 1750 if (oldval != tp->t_flags && (tp->t_toe != NULL)) 1751 t3_set_nagle(tp->t_toe); 1752 1753 } 1754 1755 return (0); 1756} 1757 1758int 1759t3_ctloutput(struct socket *so, struct sockopt *sopt) 1760{ 1761 int err; 1762 1763 if (sopt->sopt_level != IPPROTO_TCP) 1764 err = t3_ip_ctloutput(so, sopt); 1765 else 1766 err = t3_tcp_ctloutput(so, sopt); 1767 1768 if (err != EOPNOTSUPP) 1769 return (err); 1770 1771 return (tcp_ctloutput(so, sopt)); 1772} 1773 1774/* 1775 * Returns true if we need to explicitly request RST when we receive new data 1776 * on an RX-closed connection. 1777 */ 1778static inline int 1779need_rst_on_excess_rx(const struct toepcb *toep) 1780{ 1781 return (1); 1782} 1783 1784/* 1785 * Handles Rx data that arrives in a state where the socket isn't accepting 1786 * new data. 1787 */ 1788static void 1789handle_excess_rx(struct toepcb *toep, struct mbuf *m) 1790{ 1791 1792 if (need_rst_on_excess_rx(toep) && 1793 !(toep->tp_flags & TP_ABORT_SHUTDOWN)) 1794 t3_send_reset(toep); 1795 m_freem(m); 1796} 1797 1798/* 1799 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE) 1800 * by getting the DDP offset from the TCB. 1801 */ 1802static void 1803tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) 1804{ 1805 struct ddp_state *q = &toep->tp_ddp_state; 1806 struct ddp_buf_state *bsp; 1807 struct cpl_get_tcb_rpl *hdr; 1808 unsigned int ddp_offset; 1809 struct socket *so; 1810 struct tcpcb *tp; 1811 struct sockbuf *rcv; 1812 int state; 1813 1814 uint64_t t; 1815 __be64 *tcb; 1816 1817 tp = toep->tp_tp; 1818 so = inp_inpcbtosocket(tp->t_inpcb); 1819 1820 inp_lock_assert(tp->t_inpcb); 1821 rcv = so_sockbuf_rcv(so); 1822 sockbuf_lock(rcv); 1823 1824 /* Note that we only accout for CPL_GET_TCB issued by the DDP code. 1825 * We really need a cookie in order to dispatch the RPLs. 1826 */ 1827 q->get_tcb_count--; 1828 1829 /* It is a possible that a previous CPL already invalidated UBUF DDP 1830 * and moved the cur_buf idx and hence no further processing of this 1831 * skb is required. However, the app might be sleeping on 1832 * !q->get_tcb_count and we need to wake it up. 1833 */ 1834 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) { 1835 int state = so_state_get(so); 1836 1837 m_freem(m); 1838 if (__predict_true((state & SS_NOFDREF) == 0)) 1839 so_sorwakeup_locked(so); 1840 else 1841 sockbuf_unlock(rcv); 1842 1843 return; 1844 } 1845 1846 bsp = &q->buf_state[q->cur_buf]; 1847 hdr = cplhdr(m); 1848 tcb = (__be64 *)(hdr + 1); 1849 if (q->cur_buf == 0) { 1850 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]); 1851 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET); 1852 } else { 1853 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]); 1854 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET; 1855 } 1856 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET; 1857 m->m_cur_offset = bsp->cur_offset; 1858 bsp->cur_offset = ddp_offset; 1859 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset; 1860 1861 CTR5(KTR_TOM, 1862 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u", 1863 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset); 1864 KASSERT(ddp_offset >= m->m_cur_offset, 1865 ("ddp_offset=%u less than cur_offset=%u", 1866 ddp_offset, m->m_cur_offset)); 1867 1868#if 0 1869{ 1870 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx; 1871 1872 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]); 1873 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS; 1874 1875 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]); 1876 rcv_nxt = t >> S_TCB_RCV_NXT; 1877 rcv_nxt &= M_TCB_RCV_NXT; 1878 1879 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]); 1880 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET); 1881 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET; 1882 1883 T3_TRACE2(TIDTB(sk), 1884 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x", 1885 ddp_flags, rcv_nxt - rx_hdr_offset); 1886 T3_TRACE4(TB(q), 1887 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u", 1888 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf); 1889 T3_TRACE3(TB(q), 1890 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u", 1891 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset); 1892 T3_TRACE2(TB(q), 1893 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x", 1894 q->buf_state[0].flags, q->buf_state[1].flags); 1895 1896} 1897#endif 1898 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) { 1899 handle_excess_rx(toep, m); 1900 return; 1901 } 1902 1903#ifdef T3_TRACE 1904 if ((int)m->m_pkthdr.len < 0) { 1905 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len"); 1906 } 1907#endif 1908 if (bsp->flags & DDP_BF_NOCOPY) { 1909#ifdef T3_TRACE 1910 T3_TRACE0(TB(q), 1911 "tcb_rpl_as_ddp_complete: CANCEL UBUF"); 1912 1913 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) { 1914 printk("!cancel_ubuf"); 1915 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf"); 1916 } 1917#endif 1918 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1; 1919 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA); 1920 q->cur_buf ^= 1; 1921 } else if (bsp->flags & DDP_BF_NOFLIP) { 1922 1923 m->m_ddp_flags = 1; /* always a kernel buffer */ 1924 1925 /* now HW buffer carries a user buffer */ 1926 bsp->flags &= ~DDP_BF_NOFLIP; 1927 bsp->flags |= DDP_BF_NOCOPY; 1928 1929 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate 1930 * any new data in which case we're done. If in addition the 1931 * offset is 0, then there wasn't a completion for the kbuf 1932 * and we need to decrement the posted count. 1933 */ 1934 if (m->m_pkthdr.len == 0) { 1935 if (ddp_offset == 0) { 1936 q->kbuf_posted--; 1937 bsp->flags |= DDP_BF_NODATA; 1938 } 1939 sockbuf_unlock(rcv); 1940 m_free(m); 1941 return; 1942 } 1943 } else { 1944 sockbuf_unlock(rcv); 1945 1946 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP, 1947 * but it got here way late and nobody cares anymore. 1948 */ 1949 m_free(m); 1950 return; 1951 } 1952 1953 m->m_ddp_gl = (unsigned char *)bsp->gl; 1954 m->m_flags |= M_DDP; 1955 m->m_seq = tp->rcv_nxt; 1956 tp->rcv_nxt += m->m_pkthdr.len; 1957 tp->t_rcvtime = ticks; 1958 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u", 1959 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1960 if (m->m_pkthdr.len == 0) { 1961 q->user_ddp_pending = 0; 1962 m_free(m); 1963 } else 1964 SBAPPEND(rcv, m); 1965 1966 state = so_state_get(so); 1967 if (__predict_true((state & SS_NOFDREF) == 0)) 1968 so_sorwakeup_locked(so); 1969 else 1970 sockbuf_unlock(rcv); 1971} 1972 1973/* 1974 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code, 1975 * in that case they are similar to DDP completions. 1976 */ 1977static int 1978do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1979{ 1980 struct toepcb *toep = (struct toepcb *)ctx; 1981 1982 /* OK if socket doesn't exist */ 1983 if (toep == NULL) { 1984 printf("null toep in do_get_tcb_rpl\n"); 1985 return (CPL_RET_BUF_DONE); 1986 } 1987 1988 inp_wlock(toep->tp_tp->t_inpcb); 1989 tcb_rpl_as_ddp_complete(toep, m); 1990 inp_wunlock(toep->tp_tp->t_inpcb); 1991 1992 return (0); 1993} 1994 1995static void 1996handle_ddp_data(struct toepcb *toep, struct mbuf *m) 1997{ 1998 struct tcpcb *tp = toep->tp_tp; 1999 struct socket *so; 2000 struct ddp_state *q; 2001 struct ddp_buf_state *bsp; 2002 struct cpl_rx_data *hdr = cplhdr(m); 2003 unsigned int rcv_nxt = ntohl(hdr->seq); 2004 struct sockbuf *rcv; 2005 2006 if (tp->rcv_nxt == rcv_nxt) 2007 return; 2008 2009 inp_lock_assert(tp->t_inpcb); 2010 so = inp_inpcbtosocket(tp->t_inpcb); 2011 rcv = so_sockbuf_rcv(so); 2012 sockbuf_lock(rcv); 2013 2014 q = &toep->tp_ddp_state; 2015 bsp = &q->buf_state[q->cur_buf]; 2016 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x", 2017 rcv_nxt, tp->rcv_nxt)); 2018 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2019 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2020 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d", 2021 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len); 2022 2023#ifdef T3_TRACE 2024 if ((int)m->m_pkthdr.len < 0) { 2025 t3_ddp_error(so, "handle_ddp_data: neg len"); 2026 } 2027#endif 2028 m->m_ddp_gl = (unsigned char *)bsp->gl; 2029 m->m_flags |= M_DDP; 2030 m->m_cur_offset = bsp->cur_offset; 2031 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2032 if (bsp->flags & DDP_BF_NOCOPY) 2033 bsp->flags &= ~DDP_BF_NOCOPY; 2034 2035 m->m_seq = tp->rcv_nxt; 2036 tp->rcv_nxt = rcv_nxt; 2037 bsp->cur_offset += m->m_pkthdr.len; 2038 if (!(bsp->flags & DDP_BF_NOFLIP)) 2039 q->cur_buf ^= 1; 2040 /* 2041 * For now, don't re-enable DDP after a connection fell out of DDP 2042 * mode. 2043 */ 2044 q->ubuf_ddp_ready = 0; 2045 sockbuf_unlock(rcv); 2046} 2047 2048/* 2049 * Process new data received for a connection. 2050 */ 2051static void 2052new_rx_data(struct toepcb *toep, struct mbuf *m) 2053{ 2054 struct cpl_rx_data *hdr = cplhdr(m); 2055 struct tcpcb *tp = toep->tp_tp; 2056 struct socket *so; 2057 struct sockbuf *rcv; 2058 int state; 2059 int len = be16toh(hdr->len); 2060 2061 inp_wlock(tp->t_inpcb); 2062 2063 so = inp_inpcbtosocket(tp->t_inpcb); 2064 2065 if (__predict_false(so_no_receive(so))) { 2066 handle_excess_rx(toep, m); 2067 inp_wunlock(tp->t_inpcb); 2068 TRACE_EXIT; 2069 return; 2070 } 2071 2072 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) 2073 handle_ddp_data(toep, m); 2074 2075 m->m_seq = ntohl(hdr->seq); 2076 m->m_ulp_mode = 0; /* for iSCSI */ 2077 2078#if VALIDATE_SEQ 2079 if (__predict_false(m->m_seq != tp->rcv_nxt)) { 2080 log(LOG_ERR, 2081 "%s: TID %u: Bad sequence number %u, expected %u\n", 2082 toep->tp_toedev->name, toep->tp_tid, m->m_seq, 2083 tp->rcv_nxt); 2084 m_freem(m); 2085 inp_wunlock(tp->t_inpcb); 2086 return; 2087 } 2088#endif 2089 m_adj(m, sizeof(*hdr)); 2090 2091#ifdef URGENT_DATA_SUPPORTED 2092 /* 2093 * We don't handle urgent data yet 2094 */ 2095 if (__predict_false(hdr->urg)) 2096 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg)); 2097 if (__predict_false(tp->urg_data == TCP_URG_NOTYET && 2098 tp->urg_seq - tp->rcv_nxt < skb->len)) 2099 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq - 2100 tp->rcv_nxt]; 2101#endif 2102 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) { 2103 toep->tp_delack_mode = hdr->dack_mode; 2104 toep->tp_delack_seq = tp->rcv_nxt; 2105 } 2106 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d", 2107 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes); 2108 2109 if (len < m->m_pkthdr.len) 2110 m->m_pkthdr.len = m->m_len = len; 2111 2112 tp->rcv_nxt += m->m_pkthdr.len; 2113 tp->t_rcvtime = ticks; 2114 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2115 CTR2(KTR_TOM, 2116 "new_rx_data: seq 0x%x len %u", 2117 m->m_seq, m->m_pkthdr.len); 2118 inp_wunlock(tp->t_inpcb); 2119 rcv = so_sockbuf_rcv(so); 2120 sockbuf_lock(rcv); 2121#if 0 2122 if (sb_notify(rcv)) 2123 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len); 2124#endif 2125 SBAPPEND(rcv, m); 2126 2127#ifdef notyet 2128 /* 2129 * We're giving too many credits to the card - but disable this check so we can keep on moving :-| 2130 * 2131 */ 2132 KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1), 2133 2134 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d", 2135 so, rcv->sb_cc, rcv->sb_mbmax)); 2136#endif 2137 2138 2139 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d", 2140 rcv->sb_cc, rcv->sb_mbcnt); 2141 2142 state = so_state_get(so); 2143 if (__predict_true((state & SS_NOFDREF) == 0)) 2144 so_sorwakeup_locked(so); 2145 else 2146 sockbuf_unlock(rcv); 2147} 2148 2149/* 2150 * Handler for RX_DATA CPL messages. 2151 */ 2152static int 2153do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2154{ 2155 struct toepcb *toep = (struct toepcb *)ctx; 2156 2157 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len); 2158 2159 new_rx_data(toep, m); 2160 2161 return (0); 2162} 2163 2164static void 2165new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) 2166{ 2167 struct tcpcb *tp; 2168 struct ddp_state *q; 2169 struct ddp_buf_state *bsp; 2170 struct cpl_rx_data_ddp *hdr; 2171 struct socket *so; 2172 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx; 2173 int nomoredata = 0; 2174 unsigned int delack_mode; 2175 struct sockbuf *rcv; 2176 2177 tp = toep->tp_tp; 2178 inp_wlock(tp->t_inpcb); 2179 so = inp_inpcbtosocket(tp->t_inpcb); 2180 2181 if (__predict_false(so_no_receive(so))) { 2182 2183 handle_excess_rx(toep, m); 2184 inp_wunlock(tp->t_inpcb); 2185 return; 2186 } 2187 2188 q = &toep->tp_ddp_state; 2189 hdr = cplhdr(m); 2190 ddp_report = ntohl(hdr->u.ddp_report); 2191 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2192 bsp = &q->buf_state[buf_idx]; 2193 2194 CTR4(KTR_TOM, 2195 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2196 "hdr seq 0x%x len %u", 2197 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2198 ntohs(hdr->len)); 2199 CTR3(KTR_TOM, 2200 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d", 2201 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx); 2202 2203 ddp_len = ntohs(hdr->len); 2204 rcv_nxt = ntohl(hdr->seq) + ddp_len; 2205 2206 delack_mode = G_DDP_DACK_MODE(ddp_report); 2207 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2208 toep->tp_delack_mode = delack_mode; 2209 toep->tp_delack_seq = tp->rcv_nxt; 2210 } 2211 2212 m->m_seq = tp->rcv_nxt; 2213 tp->rcv_nxt = rcv_nxt; 2214 2215 tp->t_rcvtime = ticks; 2216 /* 2217 * Store the length in m->m_len. We are changing the meaning of 2218 * m->m_len here, we need to be very careful that nothing from now on 2219 * interprets ->len of this packet the usual way. 2220 */ 2221 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq; 2222 inp_wunlock(tp->t_inpcb); 2223 CTR3(KTR_TOM, 2224 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ", 2225 m->m_len, rcv_nxt, m->m_seq); 2226 /* 2227 * Figure out where the new data was placed in the buffer and store it 2228 * in when. Assumes the buffer offset starts at 0, consumer needs to 2229 * account for page pod's pg_offset. 2230 */ 2231 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; 2232 m->m_cur_offset = end_offset - m->m_pkthdr.len; 2233 2234 rcv = so_sockbuf_rcv(so); 2235 sockbuf_lock(rcv); 2236 2237 m->m_ddp_gl = (unsigned char *)bsp->gl; 2238 m->m_flags |= M_DDP; 2239 bsp->cur_offset = end_offset; 2240 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2241 2242 /* 2243 * Length is only meaningful for kbuf 2244 */ 2245 if (!(bsp->flags & DDP_BF_NOCOPY)) 2246 KASSERT(m->m_len <= bsp->gl->dgl_length, 2247 ("length received exceeds ddp pages: len=%d dgl_length=%d", 2248 m->m_len, bsp->gl->dgl_length)); 2249 2250 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2251 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next)); 2252 /* 2253 * Bit 0 of flags stores whether the DDP buffer is completed. 2254 * Note that other parts of the code depend on this being in bit 0. 2255 */ 2256 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) { 2257 panic("spurious ddp completion"); 2258 } else { 2259 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); 2260 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) 2261 q->cur_buf ^= 1; /* flip buffers */ 2262 } 2263 2264 if (bsp->flags & DDP_BF_NOCOPY) { 2265 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY); 2266 bsp->flags &= ~DDP_BF_NOCOPY; 2267 } 2268 2269 if (ddp_report & F_DDP_PSH) 2270 m->m_ddp_flags |= DDP_BF_PSH; 2271 if (nomoredata) 2272 m->m_ddp_flags |= DDP_BF_NODATA; 2273 2274#ifdef notyet 2275 skb_reset_transport_header(skb); 2276 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */ 2277#endif 2278 SBAPPEND(rcv, m); 2279 2280 if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) || 2281 (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1)) 2282 || !(m->m_ddp_flags & DDP_BF_NOCOPY)))) 2283 so_sorwakeup_locked(so); 2284 else 2285 sockbuf_unlock(rcv); 2286} 2287 2288#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ 2289 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ 2290 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ 2291 F_DDP_INVALID_PPOD) 2292 2293/* 2294 * Handler for RX_DATA_DDP CPL messages. 2295 */ 2296static int 2297do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2298{ 2299 struct toepcb *toep = ctx; 2300 const struct cpl_rx_data_ddp *hdr = cplhdr(m); 2301 2302 VALIDATE_SOCK(so); 2303 2304 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) { 2305 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n", 2306 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status))); 2307 return (CPL_RET_BUF_DONE); 2308 } 2309#if 0 2310 skb->h.th = tcphdr_skb->h.th; 2311#endif 2312 new_rx_data_ddp(toep, m); 2313 return (0); 2314} 2315 2316static void 2317process_ddp_complete(struct toepcb *toep, struct mbuf *m) 2318{ 2319 struct tcpcb *tp = toep->tp_tp; 2320 struct socket *so; 2321 struct ddp_state *q; 2322 struct ddp_buf_state *bsp; 2323 struct cpl_rx_ddp_complete *hdr; 2324 unsigned int ddp_report, buf_idx, when, delack_mode; 2325 int nomoredata = 0; 2326 struct sockbuf *rcv; 2327 2328 inp_wlock(tp->t_inpcb); 2329 so = inp_inpcbtosocket(tp->t_inpcb); 2330 2331 if (__predict_false(so_no_receive(so))) { 2332 struct inpcb *inp = so_sotoinpcb(so); 2333 2334 handle_excess_rx(toep, m); 2335 inp_wunlock(inp); 2336 return; 2337 } 2338 q = &toep->tp_ddp_state; 2339 hdr = cplhdr(m); 2340 ddp_report = ntohl(hdr->ddp_report); 2341 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2342 m->m_pkthdr.csum_data = tp->rcv_nxt; 2343 2344 rcv = so_sockbuf_rcv(so); 2345 sockbuf_lock(rcv); 2346 2347 bsp = &q->buf_state[buf_idx]; 2348 when = bsp->cur_offset; 2349 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; 2350 tp->rcv_nxt += m->m_len; 2351 tp->t_rcvtime = ticks; 2352 2353 delack_mode = G_DDP_DACK_MODE(ddp_report); 2354 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2355 toep->tp_delack_mode = delack_mode; 2356 toep->tp_delack_seq = tp->rcv_nxt; 2357 } 2358#ifdef notyet 2359 skb_reset_transport_header(skb); 2360 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2361#endif 2362 inp_wunlock(tp->t_inpcb); 2363 2364 KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2365 CTR5(KTR_TOM, 2366 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2367 "ddp_report 0x%x offset %u, len %u", 2368 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2369 G_DDP_OFFSET(ddp_report), m->m_len); 2370 2371 m->m_cur_offset = bsp->cur_offset; 2372 bsp->cur_offset += m->m_len; 2373 2374 if (!(bsp->flags & DDP_BF_NOFLIP)) { 2375 q->cur_buf ^= 1; /* flip buffers */ 2376 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length) 2377 nomoredata=1; 2378 } 2379 2380 CTR4(KTR_TOM, 2381 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2382 "ddp_report %u offset %u", 2383 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2384 G_DDP_OFFSET(ddp_report)); 2385 2386 m->m_ddp_gl = (unsigned char *)bsp->gl; 2387 m->m_flags |= M_DDP; 2388 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; 2389 if (bsp->flags & DDP_BF_NOCOPY) 2390 bsp->flags &= ~DDP_BF_NOCOPY; 2391 if (nomoredata) 2392 m->m_ddp_flags |= DDP_BF_NODATA; 2393 2394 SBAPPEND(rcv, m); 2395 if ((so_state_get(so) & SS_NOFDREF) == 0) 2396 so_sorwakeup_locked(so); 2397 else 2398 sockbuf_unlock(rcv); 2399} 2400 2401/* 2402 * Handler for RX_DDP_COMPLETE CPL messages. 2403 */ 2404static int 2405do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2406{ 2407 struct toepcb *toep = ctx; 2408 2409 VALIDATE_SOCK(so); 2410#if 0 2411 skb->h.th = tcphdr_skb->h.th; 2412#endif 2413 process_ddp_complete(toep, m); 2414 return (0); 2415} 2416 2417/* 2418 * Move a socket to TIME_WAIT state. We need to make some adjustments to the 2419 * socket state before calling tcp_time_wait to comply with its expectations. 2420 */ 2421static void 2422enter_timewait(struct tcpcb *tp) 2423{ 2424 /* 2425 * Bump rcv_nxt for the peer FIN. We don't do this at the time we 2426 * process peer_close because we don't want to carry the peer FIN in 2427 * the socket's receive queue and if we increment rcv_nxt without 2428 * having the FIN in the receive queue we'll confuse facilities such 2429 * as SIOCINQ. 2430 */ 2431 inp_wlock(tp->t_inpcb); 2432 tp->rcv_nxt++; 2433 2434 tp->ts_recent_age = 0; /* defeat recycling */ 2435 tp->t_srtt = 0; /* defeat tcp_update_metrics */ 2436 inp_wunlock(tp->t_inpcb); 2437 tcp_offload_twstart(tp); 2438} 2439 2440/* 2441 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This 2442 * function deals with the data that may be reported along with the FIN. 2443 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to 2444 * perform normal FIN-related processing. In the latter case 1 indicates that 2445 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the 2446 * skb can be freed. 2447 */ 2448static int 2449handle_peer_close_data(struct socket *so, struct mbuf *m) 2450{ 2451 struct tcpcb *tp = so_sototcpcb(so); 2452 struct toepcb *toep = tp->t_toe; 2453 struct ddp_state *q; 2454 struct ddp_buf_state *bsp; 2455 struct cpl_peer_close *req = cplhdr(m); 2456 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */ 2457 struct sockbuf *rcv; 2458 2459 if (tp->rcv_nxt == rcv_nxt) /* no data */ 2460 return (0); 2461 2462 CTR0(KTR_TOM, "handle_peer_close_data"); 2463 if (__predict_false(so_no_receive(so))) { 2464 handle_excess_rx(toep, m); 2465 2466 /* 2467 * Although we discard the data we want to process the FIN so 2468 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP + 2469 * PEER_CLOSE without data. In particular this PEER_CLOSE 2470 * may be what will close the connection. We return 1 because 2471 * handle_excess_rx() already freed the packet. 2472 */ 2473 return (1); 2474 } 2475 2476 inp_lock_assert(tp->t_inpcb); 2477 q = &toep->tp_ddp_state; 2478 rcv = so_sockbuf_rcv(so); 2479 sockbuf_lock(rcv); 2480 2481 bsp = &q->buf_state[q->cur_buf]; 2482 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2483 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2484 m->m_ddp_gl = (unsigned char *)bsp->gl; 2485 m->m_flags |= M_DDP; 2486 m->m_cur_offset = bsp->cur_offset; 2487 m->m_ddp_flags = 2488 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2489 m->m_seq = tp->rcv_nxt; 2490 tp->rcv_nxt = rcv_nxt; 2491 bsp->cur_offset += m->m_pkthdr.len; 2492 if (!(bsp->flags & DDP_BF_NOFLIP)) 2493 q->cur_buf ^= 1; 2494#ifdef notyet 2495 skb_reset_transport_header(skb); 2496 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2497#endif 2498 tp->t_rcvtime = ticks; 2499 SBAPPEND(rcv, m); 2500 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) 2501 so_sorwakeup_locked(so); 2502 else 2503 sockbuf_unlock(rcv); 2504 2505 return (1); 2506} 2507 2508/* 2509 * Handle a peer FIN. 2510 */ 2511static void 2512do_peer_fin(struct toepcb *toep, struct mbuf *m) 2513{ 2514 struct socket *so; 2515 struct tcpcb *tp = toep->tp_tp; 2516 int keep, action; 2517 2518 action = keep = 0; 2519 CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state); 2520 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2521 printf("abort_pending set\n"); 2522 2523 goto out; 2524 } 2525 inp_wlock(tp->t_inpcb); 2526 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 2527 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) { 2528 keep = handle_peer_close_data(so, m); 2529 if (keep < 0) { 2530 inp_wunlock(tp->t_inpcb); 2531 return; 2532 } 2533 } 2534 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 2535 CTR1(KTR_TOM, 2536 "waking up waiters for cantrcvmore on %p ", so); 2537 socantrcvmore(so); 2538 2539 /* 2540 * If connection is half-synchronized 2541 * (ie NEEDSYN flag on) then delay ACK, 2542 * so it may be piggybacked when SYN is sent. 2543 * Otherwise, since we received a FIN then no 2544 * more input can be expected, send ACK now. 2545 */ 2546 if (tp->t_flags & TF_NEEDSYN) 2547 tp->t_flags |= TF_DELACK; 2548 else 2549 tp->t_flags |= TF_ACKNOW; 2550 tp->rcv_nxt++; 2551 } 2552 2553 switch (tp->t_state) { 2554 case TCPS_SYN_RECEIVED: 2555 tp->t_starttime = ticks; 2556 /* FALLTHROUGH */ 2557 case TCPS_ESTABLISHED: 2558 tp->t_state = TCPS_CLOSE_WAIT; 2559 break; 2560 case TCPS_FIN_WAIT_1: 2561 tp->t_state = TCPS_CLOSING; 2562 break; 2563 case TCPS_FIN_WAIT_2: 2564 /* 2565 * If we've sent an abort_req we must have sent it too late, 2566 * HW will send us a reply telling us so, and this peer_close 2567 * is really the last message for this connection and needs to 2568 * be treated as an abort_rpl, i.e., transition the connection 2569 * to TCP_CLOSE (note that the host stack does this at the 2570 * time of generating the RST but we must wait for HW). 2571 * Otherwise we enter TIME_WAIT. 2572 */ 2573 t3_release_offload_resources(toep); 2574 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2575 action = TCP_CLOSE; 2576 } else { 2577 action = TCP_TIMEWAIT; 2578 } 2579 break; 2580 default: 2581 log(LOG_ERR, 2582 "%s: TID %u received PEER_CLOSE in bad state %d\n", 2583 toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state); 2584 } 2585 inp_wunlock(tp->t_inpcb); 2586 2587 if (action == TCP_TIMEWAIT) { 2588 enter_timewait(tp); 2589 } else if (action == TCP_DROP) { 2590 tcp_offload_drop(tp, 0); 2591 } else if (action == TCP_CLOSE) { 2592 tcp_offload_close(tp); 2593 } 2594 2595#ifdef notyet 2596 /* Do not send POLL_HUP for half duplex close. */ 2597 if ((sk->sk_shutdown & SEND_SHUTDOWN) || 2598 sk->sk_state == TCP_CLOSE) 2599 sk_wake_async(so, 1, POLL_HUP); 2600 else 2601 sk_wake_async(so, 1, POLL_IN); 2602#endif 2603 2604out: 2605 if (!keep) 2606 m_free(m); 2607} 2608 2609/* 2610 * Handler for PEER_CLOSE CPL messages. 2611 */ 2612static int 2613do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2614{ 2615 struct toepcb *toep = (struct toepcb *)ctx; 2616 2617 VALIDATE_SOCK(so); 2618 2619 do_peer_fin(toep, m); 2620 return (0); 2621} 2622 2623static void 2624process_close_con_rpl(struct toepcb *toep, struct mbuf *m) 2625{ 2626 struct cpl_close_con_rpl *rpl = cplhdr(m); 2627 struct tcpcb *tp = toep->tp_tp; 2628 struct socket *so; 2629 int action = 0; 2630 struct sockbuf *rcv; 2631 2632 inp_wlock(tp->t_inpcb); 2633 so = inp_inpcbtosocket(tp->t_inpcb); 2634 2635 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ 2636 2637 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2638 inp_wunlock(tp->t_inpcb); 2639 goto out; 2640 } 2641 2642 CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep, 2643 tp->t_state, !!(so_state_get(so) & SS_NOFDREF)); 2644 2645 switch (tp->t_state) { 2646 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */ 2647 t3_release_offload_resources(toep); 2648 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2649 action = TCP_CLOSE; 2650 2651 } else { 2652 action = TCP_TIMEWAIT; 2653 } 2654 break; 2655 case TCPS_LAST_ACK: 2656 /* 2657 * In this state we don't care about pending abort_rpl. 2658 * If we've sent abort_req it was post-close and was sent too 2659 * late, this close_con_rpl is the actual last message. 2660 */ 2661 t3_release_offload_resources(toep); 2662 action = TCP_CLOSE; 2663 break; 2664 case TCPS_FIN_WAIT_1: 2665 /* 2666 * If we can't receive any more 2667 * data, then closing user can proceed. 2668 * Starting the timer is contrary to the 2669 * specification, but if we don't get a FIN 2670 * we'll hang forever. 2671 * 2672 * XXXjl: 2673 * we should release the tp also, and use a 2674 * compressed state. 2675 */ 2676 if (so) 2677 rcv = so_sockbuf_rcv(so); 2678 else 2679 break; 2680 2681 if (rcv->sb_state & SBS_CANTRCVMORE) { 2682 int timeout; 2683 2684 if (so) 2685 soisdisconnected(so); 2686 timeout = (tcp_fast_finwait2_recycle) ? 2687 tcp_finwait2_timeout : tcp_maxidle; 2688 tcp_timer_activate(tp, TT_2MSL, timeout); 2689 } 2690 tp->t_state = TCPS_FIN_WAIT_2; 2691 if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 && 2692 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) { 2693 action = TCP_DROP; 2694 } 2695 2696 break; 2697 default: 2698 log(LOG_ERR, 2699 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", 2700 toep->tp_toedev->tod_name, toep->tp_tid, 2701 tp->t_state); 2702 } 2703 inp_wunlock(tp->t_inpcb); 2704 2705 2706 if (action == TCP_TIMEWAIT) { 2707 enter_timewait(tp); 2708 } else if (action == TCP_DROP) { 2709 tcp_offload_drop(tp, 0); 2710 } else if (action == TCP_CLOSE) { 2711 tcp_offload_close(tp); 2712 } 2713out: 2714 m_freem(m); 2715} 2716 2717/* 2718 * Handler for CLOSE_CON_RPL CPL messages. 2719 */ 2720static int 2721do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, 2722 void *ctx) 2723{ 2724 struct toepcb *toep = (struct toepcb *)ctx; 2725 2726 process_close_con_rpl(toep, m); 2727 return (0); 2728} 2729 2730/* 2731 * Process abort replies. We only process these messages if we anticipate 2732 * them as the coordination between SW and HW in this area is somewhat lacking 2733 * and sometimes we get ABORT_RPLs after we are done with the connection that 2734 * originated the ABORT_REQ. 2735 */ 2736static void 2737process_abort_rpl(struct toepcb *toep, struct mbuf *m) 2738{ 2739 struct tcpcb *tp = toep->tp_tp; 2740 struct socket *so; 2741 int needclose = 0; 2742 2743#ifdef T3_TRACE 2744 T3_TRACE1(TIDTB(sk), 2745 "process_abort_rpl: GTS rpl pending %d", 2746 sock_flag(sk, ABORT_RPL_PENDING)); 2747#endif 2748 2749 inp_wlock(tp->t_inpcb); 2750 so = inp_inpcbtosocket(tp->t_inpcb); 2751 2752 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2753 /* 2754 * XXX panic on tcpdrop 2755 */ 2756 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev)) 2757 toep->tp_flags |= TP_ABORT_RPL_RCVD; 2758 else { 2759 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING); 2760 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) || 2761 !is_t3a(toep->tp_toedev)) { 2762 if (toep->tp_flags & TP_ABORT_REQ_RCVD) 2763 panic("TP_ABORT_REQ_RCVD set"); 2764 t3_release_offload_resources(toep); 2765 needclose = 1; 2766 } 2767 } 2768 } 2769 inp_wunlock(tp->t_inpcb); 2770 2771 if (needclose) 2772 tcp_offload_close(tp); 2773 2774 m_free(m); 2775} 2776 2777/* 2778 * Handle an ABORT_RPL_RSS CPL message. 2779 */ 2780static int 2781do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2782{ 2783 struct cpl_abort_rpl_rss *rpl = cplhdr(m); 2784 struct toepcb *toep; 2785 2786 /* 2787 * Ignore replies to post-close aborts indicating that the abort was 2788 * requested too late. These connections are terminated when we get 2789 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss 2790 * arrives the TID is either no longer used or it has been recycled. 2791 */ 2792 if (rpl->status == CPL_ERR_ABORT_FAILED) { 2793discard: 2794 m_free(m); 2795 return (0); 2796 } 2797 2798 toep = (struct toepcb *)ctx; 2799 2800 /* 2801 * Sometimes we've already closed the socket, e.g., a post-close 2802 * abort races with ABORT_REQ_RSS, the latter frees the socket 2803 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, 2804 * but FW turns the ABORT_REQ into a regular one and so we get 2805 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A. 2806 */ 2807 if (!toep) 2808 goto discard; 2809 2810 if (toep->tp_tp == NULL) { 2811 log(LOG_NOTICE, "removing tid for abort\n"); 2812 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2813 if (toep->tp_l2t) 2814 l2t_release(L2DATA(cdev), toep->tp_l2t); 2815 2816 toepcb_release(toep); 2817 goto discard; 2818 } 2819 2820 log(LOG_NOTICE, "toep=%p\n", toep); 2821 log(LOG_NOTICE, "tp=%p\n", toep->tp_tp); 2822 2823 toepcb_hold(toep); 2824 process_abort_rpl(toep, m); 2825 toepcb_release(toep); 2826 return (0); 2827} 2828 2829/* 2830 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also 2831 * indicate whether RST should be sent in response. 2832 */ 2833static int 2834abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst) 2835{ 2836 struct tcpcb *tp = so_sototcpcb(so); 2837 2838 switch (abort_reason) { 2839 case CPL_ERR_BAD_SYN: 2840#if 0 2841 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through 2842#endif 2843 case CPL_ERR_CONN_RESET: 2844 // XXX need to handle SYN_RECV due to crossed SYNs 2845 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 2846 case CPL_ERR_XMIT_TIMEDOUT: 2847 case CPL_ERR_PERSIST_TIMEDOUT: 2848 case CPL_ERR_FINWAIT2_TIMEDOUT: 2849 case CPL_ERR_KEEPALIVE_TIMEDOUT: 2850#if 0 2851 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); 2852#endif 2853 return (ETIMEDOUT); 2854 default: 2855 return (EIO); 2856 } 2857} 2858 2859static inline void 2860set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd) 2861{ 2862 struct cpl_abort_rpl *rpl = cplhdr(m); 2863 2864 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); 2865 rpl->wr.wr_lo = htonl(V_WR_TID(tid)); 2866 m->m_len = m->m_pkthdr.len = sizeof(*rpl); 2867 2868 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); 2869 rpl->cmd = cmd; 2870} 2871 2872static void 2873send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m) 2874{ 2875 struct mbuf *reply_mbuf; 2876 struct cpl_abort_req_rss *req = cplhdr(m); 2877 2878 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl)); 2879 m_set_priority(m, CPL_PRIORITY_DATA); 2880 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl); 2881 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status); 2882 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2883 m_free(m); 2884} 2885 2886/* 2887 * Returns whether an ABORT_REQ_RSS message is a negative advice. 2888 */ 2889static inline int 2890is_neg_adv_abort(unsigned int status) 2891{ 2892 return status == CPL_ERR_RTX_NEG_ADVICE || 2893 status == CPL_ERR_PERSIST_NEG_ADVICE; 2894} 2895 2896static void 2897send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status) 2898{ 2899 struct mbuf *reply_mbuf; 2900 struct cpl_abort_req_rss *req = cplhdr(m); 2901 2902 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 2903 2904 if (!reply_mbuf) { 2905 /* Defer the reply. Stick rst_status into req->cmd. */ 2906 req->status = rst_status; 2907 t3_defer_reply(m, tdev, send_deferred_abort_rpl); 2908 return; 2909 } 2910 2911 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA); 2912 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status); 2913 m_free(m); 2914 2915 /* 2916 * XXX need to sync with ARP as for SYN_RECV connections we can send 2917 * these messages while ARP is pending. For other connection states 2918 * it's not a problem. 2919 */ 2920 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2921} 2922 2923#ifdef notyet 2924static void 2925cleanup_syn_rcv_conn(struct socket *child, struct socket *parent) 2926{ 2927 CXGB_UNIMPLEMENTED(); 2928#ifdef notyet 2929 struct request_sock *req = child->sk_user_data; 2930 2931 inet_csk_reqsk_queue_removed(parent, req); 2932 synq_remove(tcp_sk(child)); 2933 __reqsk_free(req); 2934 child->sk_user_data = NULL; 2935#endif 2936} 2937 2938 2939/* 2940 * Performs the actual work to abort a SYN_RECV connection. 2941 */ 2942static void 2943do_abort_syn_rcv(struct socket *child, struct socket *parent) 2944{ 2945 struct tcpcb *parenttp = so_sototcpcb(parent); 2946 struct tcpcb *childtp = so_sototcpcb(child); 2947 2948 /* 2949 * If the server is still open we clean up the child connection, 2950 * otherwise the server already did the clean up as it was purging 2951 * its SYN queue and the skb was just sitting in its backlog. 2952 */ 2953 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { 2954 cleanup_syn_rcv_conn(child, parent); 2955 inp_wlock(childtp->t_inpcb); 2956 t3_release_offload_resources(childtp->t_toe); 2957 inp_wunlock(childtp->t_inpcb); 2958 tcp_offload_close(childtp); 2959 } 2960} 2961#endif 2962 2963/* 2964 * Handle abort requests for a SYN_RECV connection. These need extra work 2965 * because the socket is on its parent's SYN queue. 2966 */ 2967static int 2968abort_syn_rcv(struct socket *so, struct mbuf *m) 2969{ 2970 CXGB_UNIMPLEMENTED(); 2971#ifdef notyet 2972 struct socket *parent; 2973 struct toedev *tdev = toep->tp_toedev; 2974 struct t3cdev *cdev = TOM_DATA(tdev)->cdev; 2975 struct socket *oreq = so->so_incomp; 2976 struct t3c_tid_entry *t3c_stid; 2977 struct tid_info *t; 2978 2979 if (!oreq) 2980 return -1; /* somehow we are not on the SYN queue */ 2981 2982 t = &(T3C_DATA(cdev))->tid_maps; 2983 t3c_stid = lookup_stid(t, oreq->ts_recent); 2984 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 2985 2986 so_lock(parent); 2987 do_abort_syn_rcv(so, parent); 2988 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST); 2989 so_unlock(parent); 2990#endif 2991 return (0); 2992} 2993 2994/* 2995 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this 2996 * request except that we need to reply to it. 2997 */ 2998static void 2999process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev) 3000{ 3001 int rst_status = CPL_ABORT_NO_RST; 3002 const struct cpl_abort_req_rss *req = cplhdr(m); 3003 struct tcpcb *tp = toep->tp_tp; 3004 struct socket *so; 3005 int needclose = 0; 3006 3007 inp_wlock(tp->t_inpcb); 3008 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 3009 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) { 3010 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN); 3011 m_free(m); 3012 goto skip; 3013 } 3014 3015 toep->tp_flags &= ~TP_ABORT_REQ_RCVD; 3016 /* 3017 * Three cases to consider: 3018 * a) We haven't sent an abort_req; close the connection. 3019 * b) We have sent a post-close abort_req that will get to TP too late 3020 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will 3021 * be ignored and the connection should be closed now. 3022 * c) We have sent a regular abort_req that will get to TP too late. 3023 * That will generate an abort_rpl with status 0, wait for it. 3024 */ 3025 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) || 3026 (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) { 3027 int error; 3028 3029 error = abort_status_to_errno(so, req->status, 3030 &rst_status); 3031 so_error_set(so, error); 3032 3033 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) 3034 so_sorwakeup(so); 3035 /* 3036 * SYN_RECV needs special processing. If abort_syn_rcv() 3037 * returns 0 is has taken care of the abort. 3038 */ 3039 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m)) 3040 goto skip; 3041 3042 t3_release_offload_resources(toep); 3043 needclose = 1; 3044 } 3045 inp_wunlock(tp->t_inpcb); 3046 3047 if (needclose) 3048 tcp_offload_close(tp); 3049 3050 send_abort_rpl(m, tdev, rst_status); 3051 return; 3052skip: 3053 inp_wunlock(tp->t_inpcb); 3054} 3055 3056/* 3057 * Handle an ABORT_REQ_RSS CPL message. 3058 */ 3059static int 3060do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3061{ 3062 const struct cpl_abort_req_rss *req = cplhdr(m); 3063 struct toepcb *toep = (struct toepcb *)ctx; 3064 3065 if (is_neg_adv_abort(req->status)) { 3066 m_free(m); 3067 return (0); 3068 } 3069 3070 log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid); 3071 3072 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) { 3073 cxgb_remove_tid(cdev, toep, toep->tp_tid); 3074 toep->tp_flags |= TP_ABORT_REQ_RCVD; 3075 3076 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST); 3077 if (toep->tp_l2t) 3078 l2t_release(L2DATA(cdev), toep->tp_l2t); 3079 3080 /* 3081 * Unhook 3082 */ 3083 toep->tp_tp->t_toe = NULL; 3084 toep->tp_tp->t_flags &= ~TF_TOE; 3085 toep->tp_tp = NULL; 3086 /* 3087 * XXX need to call syncache_chkrst - but we don't 3088 * have a way of doing that yet 3089 */ 3090 toepcb_release(toep); 3091 log(LOG_ERR, "abort for unestablished connection :-(\n"); 3092 return (0); 3093 } 3094 if (toep->tp_tp == NULL) { 3095 log(LOG_NOTICE, "disconnected toepcb\n"); 3096 /* should be freed momentarily */ 3097 return (0); 3098 } 3099 3100 3101 toepcb_hold(toep); 3102 process_abort_req(toep, m, toep->tp_toedev); 3103 toepcb_release(toep); 3104 return (0); 3105} 3106#ifdef notyet 3107static void 3108pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m) 3109{ 3110 struct toedev *tdev = TOE_DEV(parent); 3111 3112 do_abort_syn_rcv(child, parent); 3113 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) { 3114 struct cpl_pass_accept_rpl *rpl = cplhdr(m); 3115 3116 rpl->opt0h = htonl(F_TCAM_BYPASS); 3117 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3118 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3119 } else 3120 m_free(m); 3121} 3122#endif 3123static void 3124handle_pass_open_arp_failure(struct socket *so, struct mbuf *m) 3125{ 3126 CXGB_UNIMPLEMENTED(); 3127 3128#ifdef notyet 3129 struct t3cdev *cdev; 3130 struct socket *parent; 3131 struct socket *oreq; 3132 struct t3c_tid_entry *t3c_stid; 3133 struct tid_info *t; 3134 struct tcpcb *otp, *tp = so_sototcpcb(so); 3135 struct toepcb *toep = tp->t_toe; 3136 3137 /* 3138 * If the connection is being aborted due to the parent listening 3139 * socket going away there's nothing to do, the ABORT_REQ will close 3140 * the connection. 3141 */ 3142 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 3143 m_free(m); 3144 return; 3145 } 3146 3147 oreq = so->so_incomp; 3148 otp = so_sototcpcb(oreq); 3149 3150 cdev = T3C_DEV(so); 3151 t = &(T3C_DATA(cdev))->tid_maps; 3152 t3c_stid = lookup_stid(t, otp->ts_recent); 3153 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 3154 3155 so_lock(parent); 3156 pass_open_abort(so, parent, m); 3157 so_unlock(parent); 3158#endif 3159} 3160 3161/* 3162 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly 3163 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV 3164 * connection. 3165 */ 3166static void 3167pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m) 3168{ 3169 3170#ifdef notyet 3171 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3172 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk); 3173#endif 3174 handle_pass_open_arp_failure(m_get_socket(m), m); 3175} 3176 3177/* 3178 * Populate a reject CPL_PASS_ACCEPT_RPL WR. 3179 */ 3180static void 3181mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf) 3182{ 3183 struct cpl_pass_accept_req *req = cplhdr(req_mbuf); 3184 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf); 3185 unsigned int tid = GET_TID(req); 3186 3187 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP); 3188 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3189 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3190 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet 3191 rpl->opt0h = htonl(F_TCAM_BYPASS); 3192 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3193 rpl->opt2 = 0; 3194 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3195} 3196 3197/* 3198 * Send a deferred reject to an accept request. 3199 */ 3200static void 3201reject_pass_request(struct toedev *tdev, struct mbuf *m) 3202{ 3203 struct mbuf *reply_mbuf; 3204 3205 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl)); 3206 mk_pass_accept_rpl(reply_mbuf, m); 3207 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 3208 m_free(m); 3209} 3210 3211static void 3212handle_syncache_event(int event, void *arg) 3213{ 3214 struct toepcb *toep = arg; 3215 3216 switch (event) { 3217 case TOE_SC_ENTRY_PRESENT: 3218 /* 3219 * entry already exists - free toepcb 3220 * and l2t 3221 */ 3222 printf("syncache entry present\n"); 3223 toepcb_release(toep); 3224 break; 3225 case TOE_SC_DROP: 3226 /* 3227 * The syncache has given up on this entry 3228 * either it timed out, or it was evicted 3229 * we need to explicitly release the tid 3230 */ 3231 printf("syncache entry dropped\n"); 3232 toepcb_release(toep); 3233 break; 3234 default: 3235 log(LOG_ERR, "unknown syncache event %d\n", event); 3236 break; 3237 } 3238} 3239 3240static void 3241syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep) 3242{ 3243 struct in_conninfo inc; 3244 struct toeopt toeo; 3245 struct tcphdr th; 3246 struct inpcb *inp; 3247 int mss, wsf, sack, ts; 3248 uint32_t rcv_isn = ntohl(req->rcv_isn); 3249 3250 bzero(&toeo, sizeof(struct toeopt)); 3251 inp = so_sotoinpcb(lso); 3252 3253 /* 3254 * Fill out information for entering us into the syncache 3255 */ 3256 bzero(&inc, sizeof(inc)); 3257 inc.inc_fport = th.th_sport = req->peer_port; 3258 inc.inc_lport = th.th_dport = req->local_port; 3259 th.th_seq = req->rcv_isn; 3260 th.th_flags = TH_SYN; 3261 3262 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1; 3263 3264 inc.inc_len = 0; 3265 inc.inc_faddr.s_addr = req->peer_ip; 3266 inc.inc_laddr.s_addr = req->local_ip; 3267 3268 DPRINTF("syncache add of %d:%d %d:%d\n", 3269 ntohl(req->local_ip), ntohs(req->local_port), 3270 ntohl(req->peer_ip), ntohs(req->peer_port)); 3271 3272 mss = req->tcp_options.mss; 3273 wsf = req->tcp_options.wsf; 3274 ts = req->tcp_options.tstamp; 3275 sack = req->tcp_options.sack; 3276 toeo.to_mss = mss; 3277 toeo.to_wscale = wsf; 3278 toeo.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3279 tcp_offload_syncache_add(&inc, &toeo, &th, inp, &lso, &cxgb_toe_usrreqs, 3280toep); 3281} 3282 3283 3284/* 3285 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket 3286 * lock held. Note that the sock here is a listening socket that is not owned 3287 * by the TOE. 3288 */ 3289static void 3290process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev, 3291 struct listen_ctx *lctx) 3292{ 3293 int rt_flags; 3294 struct l2t_entry *e; 3295 struct iff_mac tim; 3296 struct mbuf *reply_mbuf, *ddp_mbuf = NULL; 3297 struct cpl_pass_accept_rpl *rpl; 3298 struct cpl_pass_accept_req *req = cplhdr(m); 3299 unsigned int tid = GET_TID(req); 3300 struct tom_data *d = TOM_DATA(tdev); 3301 struct t3cdev *cdev = d->cdev; 3302 struct tcpcb *tp = so_sototcpcb(so); 3303 struct toepcb *newtoep; 3304 struct rtentry *dst; 3305 struct sockaddr_in nam; 3306 struct t3c_data *td = T3C_DATA(cdev); 3307 3308 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3309 if (__predict_false(reply_mbuf == NULL)) { 3310 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3311 t3_defer_reply(m, tdev, reject_pass_request); 3312 else { 3313 cxgb_queue_tid_release(cdev, tid); 3314 m_free(m); 3315 } 3316 DPRINTF("failed to get reply_mbuf\n"); 3317 3318 goto out; 3319 } 3320 3321 if (tp->t_state != TCPS_LISTEN) { 3322 DPRINTF("socket not in listen state\n"); 3323 3324 goto reject; 3325 } 3326 3327 tim.mac_addr = req->dst_mac; 3328 tim.vlan_tag = ntohs(req->vlan_tag); 3329 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { 3330 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n"); 3331 goto reject; 3332 } 3333 3334#ifdef notyet 3335 /* 3336 * XXX do route lookup to confirm that we're still listening on this 3337 * address 3338 */ 3339 if (ip_route_input(skb, req->local_ip, req->peer_ip, 3340 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev)) 3341 goto reject; 3342 rt_flags = ((struct rtable *)skb->dst)->rt_flags & 3343 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); 3344 dst_release(skb->dst); // done with the input route, release it 3345 skb->dst = NULL; 3346 3347 if ((rt_flags & RTF_LOCAL) == 0) 3348 goto reject; 3349#endif 3350 /* 3351 * XXX 3352 */ 3353 rt_flags = RTF_LOCAL; 3354 if ((rt_flags & RTF_LOCAL) == 0) 3355 goto reject; 3356 3357 /* 3358 * Calculate values and add to syncache 3359 */ 3360 3361 newtoep = toepcb_alloc(); 3362 if (newtoep == NULL) 3363 goto reject; 3364 3365 bzero(&nam, sizeof(struct sockaddr_in)); 3366 3367 nam.sin_len = sizeof(struct sockaddr_in); 3368 nam.sin_family = AF_INET; 3369 nam.sin_addr.s_addr =req->peer_ip; 3370 dst = rtalloc2((struct sockaddr *)&nam, 1, 0); 3371 3372 if (dst == NULL) { 3373 printf("failed to find route\n"); 3374 goto reject; 3375 } 3376 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev, 3377 (struct sockaddr *)&nam); 3378 if (e == NULL) { 3379 DPRINTF("failed to get l2t\n"); 3380 } 3381 /* 3382 * Point to our listen socket until accept 3383 */ 3384 newtoep->tp_tp = tp; 3385 newtoep->tp_flags = TP_SYN_RCVD; 3386 newtoep->tp_tid = tid; 3387 newtoep->tp_toedev = tdev; 3388 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3389 3390 cxgb_insert_tid(cdev, d->client, newtoep, tid); 3391 so_lock(so); 3392 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); 3393 so_unlock(so); 3394 3395 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) && 3396 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 3397 3398 if (newtoep->tp_ulp_mode) { 3399 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3400 3401 if (ddp_mbuf == NULL) 3402 newtoep->tp_ulp_mode = 0; 3403 } 3404 3405 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d", 3406 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode); 3407 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); 3408 /* 3409 * XXX workaround for lack of syncache drop 3410 */ 3411 toepcb_hold(newtoep); 3412 syncache_add_accept_req(req, so, newtoep); 3413 3414 rpl = cplhdr(reply_mbuf); 3415 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl); 3416 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3417 rpl->wr.wr_lo = 0; 3418 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3419 rpl->opt2 = htonl(calc_opt2(so, tdev)); 3420 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3421 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten 3422 3423 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | 3424 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); 3425 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) | 3426 CPL_PASS_OPEN_ACCEPT); 3427 3428 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status); 3429 3430 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep)); 3431 3432 l2t_send(cdev, reply_mbuf, e); 3433 m_free(m); 3434 if (newtoep->tp_ulp_mode) { 3435 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS, 3436 V_TF_DDP_OFF(1) | 3437 TP_DDP_TIMER_WORKAROUND_MASK, 3438 V_TF_DDP_OFF(1) | 3439 TP_DDP_TIMER_WORKAROUND_VAL, 1); 3440 } else 3441 DPRINTF("no DDP\n"); 3442 3443 return; 3444reject: 3445 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3446 mk_pass_accept_rpl(reply_mbuf, m); 3447 else 3448 mk_tid_release(reply_mbuf, newtoep, tid); 3449 cxgb_ofld_send(cdev, reply_mbuf); 3450 m_free(m); 3451out: 3452#if 0 3453 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3454#else 3455 return; 3456#endif 3457} 3458 3459/* 3460 * Handle a CPL_PASS_ACCEPT_REQ message. 3461 */ 3462static int 3463do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3464{ 3465 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx; 3466 struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */ 3467 struct tom_data *d = listen_ctx->tom_data; 3468 3469#if VALIDATE_TID 3470 struct cpl_pass_accept_req *req = cplhdr(m); 3471 unsigned int tid = GET_TID(req); 3472 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps; 3473 3474 if (unlikely(!lsk)) { 3475 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n", 3476 cdev->name, 3477 (unsigned long)((union listen_entry *)ctx - 3478 t->stid_tab)); 3479 return CPL_RET_BUF_DONE; 3480 } 3481 if (unlikely(tid >= t->ntids)) { 3482 printk(KERN_ERR "%s: passive open TID %u too large\n", 3483 cdev->name, tid); 3484 return CPL_RET_BUF_DONE; 3485 } 3486 /* 3487 * For T3A the current user of the TID may have closed but its last 3488 * message(s) may have been backlogged so the TID appears to be still 3489 * in use. Just take the TID away, the connection can close at its 3490 * own leisure. For T3B this situation is a bug. 3491 */ 3492 if (!valid_new_tid(t, tid) && 3493 cdev->type != T3A) { 3494 printk(KERN_ERR "%s: passive open uses existing TID %u\n", 3495 cdev->name, tid); 3496 return CPL_RET_BUF_DONE; 3497 } 3498#endif 3499 3500 process_pass_accept_req(lso, m, &d->tdev, listen_ctx); 3501 return (0); 3502} 3503 3504/* 3505 * Called when a connection is established to translate the TCP options 3506 * reported by HW to FreeBSD's native format. 3507 */ 3508static void 3509assign_rxopt(struct socket *so, unsigned int opt) 3510{ 3511 struct tcpcb *tp = so_sototcpcb(so); 3512 struct toepcb *toep = tp->t_toe; 3513 const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep)); 3514 3515 inp_lock_assert(tp->t_inpcb); 3516 3517 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3518 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0; 3519 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0; 3520 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0; 3521 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 3522 (TF_RCVD_SCALE|TF_REQ_SCALE)) 3523 tp->rcv_scale = tp->request_r_scale; 3524} 3525 3526/* 3527 * Completes some final bits of initialization for just established connections 3528 * and changes their state to TCP_ESTABLISHED. 3529 * 3530 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1. 3531 */ 3532static void 3533make_established(struct socket *so, u32 snd_isn, unsigned int opt) 3534{ 3535 struct tcpcb *tp = so_sototcpcb(so); 3536 struct toepcb *toep = tp->t_toe; 3537 3538 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn; 3539 assign_rxopt(so, opt); 3540 3541 /* 3542 *XXXXXXXXXXX 3543 * 3544 */ 3545#ifdef notyet 3546 so->so_proto->pr_ctloutput = t3_ctloutput; 3547#endif 3548 3549#if 0 3550 inet_sk(sk)->id = tp->write_seq ^ jiffies; 3551#endif 3552 /* 3553 * XXX not clear what rcv_wup maps to 3554 */ 3555 /* 3556 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't 3557 * pass through opt0. 3558 */ 3559 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10)) 3560 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10); 3561 3562 dump_toepcb(toep); 3563 3564#ifdef notyet 3565/* 3566 * no clean interface for marking ARP up to date 3567 */ 3568 dst_confirm(sk->sk_dst_cache); 3569#endif 3570 tp->t_starttime = ticks; 3571 tp->t_state = TCPS_ESTABLISHED; 3572 soisconnected(so); 3573} 3574 3575static int 3576syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep) 3577{ 3578 3579 struct in_conninfo inc; 3580 struct toeopt toeo; 3581 struct tcphdr th; 3582 int mss, wsf, sack, ts; 3583 struct mbuf *m = NULL; 3584 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev); 3585 unsigned int opt; 3586 3587#ifdef MAC 3588#error "no MAC support" 3589#endif 3590 3591 opt = ntohs(req->tcp_opt); 3592 3593 bzero(&toeo, sizeof(struct toeopt)); 3594 3595 /* 3596 * Fill out information for entering us into the syncache 3597 */ 3598 bzero(&inc, sizeof(inc)); 3599 inc.inc_fport = th.th_sport = req->peer_port; 3600 inc.inc_lport = th.th_dport = req->local_port; 3601 th.th_seq = req->rcv_isn; 3602 th.th_flags = TH_ACK; 3603 3604 inc.inc_len = 0; 3605 inc.inc_faddr.s_addr = req->peer_ip; 3606 inc.inc_laddr.s_addr = req->local_ip; 3607 3608 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3609 wsf = G_TCPOPT_WSCALE_OK(opt); 3610 ts = G_TCPOPT_TSTAMP(opt); 3611 sack = G_TCPOPT_SACK(opt); 3612 3613 toeo.to_mss = mss; 3614 toeo.to_wscale = G_TCPOPT_SND_WSCALE(opt); 3615 toeo.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3616 3617 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n", 3618 ntohl(req->local_ip), ntohs(req->local_port), 3619 ntohl(req->peer_ip), ntohs(req->peer_port), 3620 mss, wsf, ts, sack); 3621 return tcp_offload_syncache_expand(&inc, &toeo, &th, so, m); 3622} 3623 3624 3625/* 3626 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work 3627 * if we are in TCP_SYN_RECV due to crossed SYNs 3628 */ 3629static int 3630do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3631{ 3632 struct cpl_pass_establish *req = cplhdr(m); 3633 struct toepcb *toep = (struct toepcb *)ctx; 3634 struct tcpcb *tp = toep->tp_tp; 3635 struct socket *so, *lso; 3636 struct t3c_data *td = T3C_DATA(cdev); 3637 struct sockbuf *snd, *rcv; 3638 3639 // Complete socket initialization now that we have the SND_ISN 3640 3641 struct toedev *tdev; 3642 3643 3644 tdev = toep->tp_toedev; 3645 3646 inp_wlock(tp->t_inpcb); 3647 3648 /* 3649 * 3650 * XXX need to add reference while we're manipulating 3651 */ 3652 so = lso = inp_inpcbtosocket(tp->t_inpcb); 3653 3654 inp_wunlock(tp->t_inpcb); 3655 3656 so_lock(so); 3657 LIST_REMOVE(toep, synq_entry); 3658 so_unlock(so); 3659 3660 if (!syncache_expand_establish_req(req, &so, toep)) { 3661 /* 3662 * No entry 3663 */ 3664 CXGB_UNIMPLEMENTED(); 3665 } 3666 if (so == NULL) { 3667 /* 3668 * Couldn't create the socket 3669 */ 3670 CXGB_UNIMPLEMENTED(); 3671 } 3672 3673 tp = so_sototcpcb(so); 3674 inp_wlock(tp->t_inpcb); 3675 3676 snd = so_sockbuf_snd(so); 3677 rcv = so_sockbuf_rcv(so); 3678 3679 snd->sb_flags |= SB_NOCOALESCE; 3680 rcv->sb_flags |= SB_NOCOALESCE; 3681 3682 toep->tp_tp = tp; 3683 toep->tp_flags = 0; 3684 tp->t_toe = toep; 3685 reset_wr_list(toep); 3686 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3687 tp->rcv_nxt = toep->tp_copied_seq; 3688 install_offload_ops(so); 3689 3690 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs); 3691 toep->tp_wr_unacked = 0; 3692 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3693 toep->tp_qset_idx = 0; 3694 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu); 3695 3696 /* 3697 * XXX Cancel any keep alive timer 3698 */ 3699 3700 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3701 3702 /* 3703 * XXX workaround for lack of syncache drop 3704 */ 3705 toepcb_release(toep); 3706 inp_wunlock(tp->t_inpcb); 3707 3708 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid); 3709 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3710#ifdef notyet 3711 /* 3712 * XXX not sure how these checks map to us 3713 */ 3714 if (unlikely(sk->sk_socket)) { // simultaneous opens only 3715 sk->sk_state_change(sk); 3716 sk_wake_async(so, 0, POLL_OUT); 3717 } 3718 /* 3719 * The state for the new connection is now up to date. 3720 * Next check if we should add the connection to the parent's 3721 * accept queue. When the parent closes it resets connections 3722 * on its SYN queue, so check if we are being reset. If so we 3723 * don't need to do anything more, the coming ABORT_RPL will 3724 * destroy this socket. Otherwise move the connection to the 3725 * accept queue. 3726 * 3727 * Note that we reset the synq before closing the server so if 3728 * we are not being reset the stid is still open. 3729 */ 3730 if (unlikely(!tp->forward_skb_hint)) { // removed from synq 3731 __kfree_skb(skb); 3732 goto unlock; 3733 } 3734#endif 3735 m_free(m); 3736 3737 return (0); 3738} 3739 3740/* 3741 * Fill in the right TID for CPL messages waiting in the out-of-order queue 3742 * and send them to the TOE. 3743 */ 3744static void 3745fixup_and_send_ofo(struct toepcb *toep) 3746{ 3747 struct mbuf *m; 3748 struct toedev *tdev = toep->tp_toedev; 3749 struct tcpcb *tp = toep->tp_tp; 3750 unsigned int tid = toep->tp_tid; 3751 3752 log(LOG_NOTICE, "fixup_and_send_ofo\n"); 3753 3754 inp_lock_assert(tp->t_inpcb); 3755 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { 3756 /* 3757 * A variety of messages can be waiting but the fields we'll 3758 * be touching are common to all so any message type will do. 3759 */ 3760 struct cpl_close_con_req *p = cplhdr(m); 3761 3762 p->wr.wr_lo = htonl(V_WR_TID(tid)); 3763 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid)); 3764 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3765 } 3766} 3767 3768/* 3769 * Updates socket state from an active establish CPL message. Runs with the 3770 * socket lock held. 3771 */ 3772static void 3773socket_act_establish(struct socket *so, struct mbuf *m) 3774{ 3775 struct cpl_act_establish *req = cplhdr(m); 3776 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ 3777 struct tcpcb *tp = so_sototcpcb(so); 3778 struct toepcb *toep = tp->t_toe; 3779 3780 if (__predict_false(tp->t_state != TCPS_SYN_SENT)) 3781 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n", 3782 toep->tp_tid, tp->t_state); 3783 3784 tp->ts_recent_age = ticks; 3785 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn; 3786 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs; 3787 3788 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3789 3790 /* 3791 * Now that we finally have a TID send any CPL messages that we had to 3792 * defer for lack of a TID. 3793 */ 3794 if (mbufq_len(&toep->out_of_order_queue)) 3795 fixup_and_send_ofo(toep); 3796 3797 if (__predict_false(so_state_get(so) & SS_NOFDREF)) { 3798 /* 3799 * XXX does this even make sense? 3800 */ 3801 so_sorwakeup(so); 3802 } 3803 m_free(m); 3804#ifdef notyet 3805/* 3806 * XXX assume no write requests permitted while socket connection is 3807 * incomplete 3808 */ 3809 /* 3810 * Currently the send queue must be empty at this point because the 3811 * socket layer does not send anything before a connection is 3812 * established. To be future proof though we handle the possibility 3813 * that there are pending buffers to send (either TX_DATA or 3814 * CLOSE_CON_REQ). First we need to adjust the sequence number of the 3815 * buffers according to the just learned write_seq, and then we send 3816 * them on their way. 3817 */ 3818 fixup_pending_writeq_buffers(sk); 3819 if (t3_push_frames(so, 1)) 3820 sk->sk_write_space(sk); 3821#endif 3822 3823 toep->tp_state = tp->t_state; 3824 KMOD_TCPSTAT_INC(tcps_connects); 3825 3826} 3827 3828/* 3829 * Process a CPL_ACT_ESTABLISH message. 3830 */ 3831static int 3832do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3833{ 3834 struct cpl_act_establish *req = cplhdr(m); 3835 unsigned int tid = GET_TID(req); 3836 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); 3837 struct toepcb *toep = (struct toepcb *)ctx; 3838 struct tcpcb *tp = toep->tp_tp; 3839 struct socket *so; 3840 struct toedev *tdev; 3841 struct tom_data *d; 3842 3843 if (tp == NULL) { 3844 free_atid(cdev, atid); 3845 return (0); 3846 } 3847 inp_wlock(tp->t_inpcb); 3848 3849 /* 3850 * XXX 3851 */ 3852 so = inp_inpcbtosocket(tp->t_inpcb); 3853 tdev = toep->tp_toedev; /* blow up here if link was down */ 3854 d = TOM_DATA(tdev); 3855 3856 /* 3857 * It's OK if the TID is currently in use, the owning socket may have 3858 * backlogged its last CPL message(s). Just take it away. 3859 */ 3860 toep->tp_tid = tid; 3861 toep->tp_tp = tp; 3862 so_insert_tid(d, toep, tid); 3863 free_atid(cdev, atid); 3864 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3865 3866 socket_act_establish(so, m); 3867 inp_wunlock(tp->t_inpcb); 3868 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid); 3869 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3870 3871 return (0); 3872} 3873 3874/* 3875 * Process an acknowledgment of WR completion. Advance snd_una and send the 3876 * next batch of work requests from the write queue. 3877 */ 3878static void 3879wr_ack(struct toepcb *toep, struct mbuf *m) 3880{ 3881 struct tcpcb *tp = toep->tp_tp; 3882 struct cpl_wr_ack *hdr = cplhdr(m); 3883 struct socket *so; 3884 unsigned int credits = ntohs(hdr->credits); 3885 u32 snd_una = ntohl(hdr->snd_una); 3886 int bytes = 0; 3887 struct sockbuf *snd; 3888 3889 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits); 3890 3891 inp_wlock(tp->t_inpcb); 3892 so = inp_inpcbtosocket(tp->t_inpcb); 3893 toep->tp_wr_avail += credits; 3894 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) 3895 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; 3896 3897 while (credits) { 3898 struct mbuf *p = peek_wr(toep); 3899 3900 if (__predict_false(!p)) { 3901 log(LOG_ERR, "%u WR_ACK credits for TID %u with " 3902 "nothing pending, state %u wr_avail=%u\n", 3903 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail); 3904 break; 3905 } 3906 CTR2(KTR_TOM, 3907 "wr_ack: p->credits=%d p->bytes=%d", 3908 p->m_pkthdr.csum_data, p->m_pkthdr.len); 3909 KASSERT(p->m_pkthdr.csum_data != 0, 3910 ("empty request still on list")); 3911 3912 if (__predict_false(credits < p->m_pkthdr.csum_data)) { 3913 3914#if DEBUG_WR > 1 3915 struct tx_data_wr *w = cplhdr(p); 3916 log(LOG_ERR, 3917 "TID %u got %u WR credits, need %u, len %u, " 3918 "main body %u, frags %u, seq # %u, ACK una %u," 3919 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n", 3920 toep->tp_tid, credits, p->csum, p->len, 3921 p->len - p->data_len, skb_shinfo(p)->nr_frags, 3922 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt), 3923 toep->tp_wr_avail, count_pending_wrs(tp) - credits); 3924#endif 3925 p->m_pkthdr.csum_data -= credits; 3926 break; 3927 } else { 3928 dequeue_wr(toep); 3929 credits -= p->m_pkthdr.csum_data; 3930 bytes += p->m_pkthdr.len; 3931 CTR3(KTR_TOM, 3932 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d", 3933 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data); 3934 3935 m_free(p); 3936 } 3937 } 3938 3939#if DEBUG_WR 3940 check_wr_invariants(tp); 3941#endif 3942 3943 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 3944#if VALIDATE_SEQ 3945 struct tom_data *d = TOM_DATA(TOE_DEV(so)); 3946 3947 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK " 3948 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una, 3949 toep->tp_tid, tp->snd_una); 3950#endif 3951 goto out_free; 3952 } 3953 3954 if (tp->snd_una != snd_una) { 3955 tp->snd_una = snd_una; 3956 tp->ts_recent_age = ticks; 3957#ifdef notyet 3958 /* 3959 * Keep ARP entry "minty fresh" 3960 */ 3961 dst_confirm(sk->sk_dst_cache); 3962#endif 3963 if (tp->snd_una == tp->snd_nxt) 3964 toep->tp_flags &= ~TP_TX_WAIT_IDLE; 3965 } 3966 3967 snd = so_sockbuf_snd(so); 3968 if (bytes) { 3969 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes); 3970 snd = so_sockbuf_snd(so); 3971 sockbuf_lock(snd); 3972 sbdrop_locked(snd, bytes); 3973 so_sowwakeup_locked(so); 3974 } 3975 3976 if (snd->sb_sndptroff < snd->sb_cc) 3977 t3_push_frames(so, 0); 3978 3979out_free: 3980 inp_wunlock(tp->t_inpcb); 3981 m_free(m); 3982} 3983 3984/* 3985 * Handler for TX_DATA_ACK CPL messages. 3986 */ 3987static int 3988do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx) 3989{ 3990 struct toepcb *toep = (struct toepcb *)ctx; 3991 3992 VALIDATE_SOCK(so); 3993 3994 wr_ack(toep, m); 3995 return 0; 3996} 3997 3998/* 3999 * Handler for TRACE_PKT CPL messages. Just sink these packets. 4000 */ 4001static int 4002do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx) 4003{ 4004 m_freem(m); 4005 return 0; 4006} 4007 4008/* 4009 * Reset a connection that is on a listener's SYN queue or accept queue, 4010 * i.e., one that has not had a struct socket associated with it. 4011 * Must be called from process context. 4012 * 4013 * Modeled after code in inet_csk_listen_stop(). 4014 */ 4015static void 4016t3_reset_listen_child(struct socket *child) 4017{ 4018 struct tcpcb *tp = so_sototcpcb(child); 4019 4020 t3_send_reset(tp->t_toe); 4021} 4022 4023 4024static void 4025t3_child_disconnect(struct socket *so, void *arg) 4026{ 4027 struct tcpcb *tp = so_sototcpcb(so); 4028 4029 if (tp->t_flags & TF_TOE) { 4030 inp_wlock(tp->t_inpcb); 4031 t3_reset_listen_child(so); 4032 inp_wunlock(tp->t_inpcb); 4033 } 4034} 4035 4036/* 4037 * Disconnect offloaded established but not yet accepted connections sitting 4038 * on a server's accept_queue. We just send an ABORT_REQ at this point and 4039 * finish off the disconnect later as we may need to wait for the ABORT_RPL. 4040 */ 4041void 4042t3_disconnect_acceptq(struct socket *listen_so) 4043{ 4044 4045 so_lock(listen_so); 4046 so_listeners_apply_all(listen_so, t3_child_disconnect, NULL); 4047 so_unlock(listen_so); 4048} 4049 4050/* 4051 * Reset offloaded connections sitting on a server's syn queue. As above 4052 * we send ABORT_REQ and finish off when we get ABORT_RPL. 4053 */ 4054 4055void 4056t3_reset_synq(struct listen_ctx *lctx) 4057{ 4058 struct toepcb *toep; 4059 4060 so_lock(lctx->lso); 4061 while (!LIST_EMPTY(&lctx->synq_head)) { 4062 toep = LIST_FIRST(&lctx->synq_head); 4063 LIST_REMOVE(toep, synq_entry); 4064 toep->tp_tp = NULL; 4065 t3_send_reset(toep); 4066 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid); 4067 toepcb_release(toep); 4068 } 4069 so_unlock(lctx->lso); 4070} 4071 4072 4073int 4074t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl, 4075 unsigned int nppods, unsigned int tag, unsigned int maxoff, 4076 unsigned int pg_off, unsigned int color) 4077{ 4078 unsigned int i, j, pidx; 4079 struct pagepod *p; 4080 struct mbuf *m; 4081 struct ulp_mem_io *req; 4082 unsigned int tid = toep->tp_tid; 4083 const struct tom_data *td = TOM_DATA(toep->tp_toedev); 4084 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; 4085 4086 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)", 4087 gl, nppods, tag, maxoff, pg_off, color); 4088 4089 for (i = 0; i < nppods; ++i) { 4090 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE); 4091 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4092 req = mtod(m, struct ulp_mem_io *); 4093 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE; 4094 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4095 req->wr.wr_lo = 0; 4096 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) | 4097 V_ULPTX_CMD(ULP_MEM_WRITE)); 4098 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) | 4099 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1)); 4100 4101 p = (struct pagepod *)(req + 1); 4102 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) { 4103 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid)); 4104 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) | 4105 V_PPOD_COLOR(color)); 4106 p->pp_max_offset = htonl(maxoff); 4107 p->pp_page_offset = htonl(pg_off); 4108 p->pp_rsvd = 0; 4109 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx) 4110 p->pp_addr[j] = pidx < gl->dgl_nelem ? 4111 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0; 4112 } else 4113 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */ 4114 send_or_defer(toep, m, 0); 4115 ppod_addr += PPOD_SIZE; 4116 } 4117 return (0); 4118} 4119 4120/* 4121 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command. 4122 */ 4123static inline void 4124mk_cpl_barrier_ulp(struct cpl_barrier *b) 4125{ 4126 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b; 4127 4128 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4129 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8)); 4130 b->opcode = CPL_BARRIER; 4131} 4132 4133/* 4134 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command. 4135 */ 4136static inline void 4137mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno) 4138{ 4139 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4140 4141 txpkt = (struct ulp_txpkt *)req; 4142 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4143 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4144 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid)); 4145 req->cpuno = htons(cpuno); 4146} 4147 4148/* 4149 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command. 4150 */ 4151static inline void 4152mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, 4153 unsigned int word, uint64_t mask, uint64_t val) 4154{ 4155 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4156 4157 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 4158 tid, word, mask, val); 4159 4160 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4161 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4162 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); 4163 req->reply = V_NO_REPLY(1); 4164 req->cpu_idx = 0; 4165 req->word = htons(word); 4166 req->mask = htobe64(mask); 4167 req->val = htobe64(val); 4168} 4169 4170/* 4171 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command. 4172 */ 4173static void 4174mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack, 4175 unsigned int tid, unsigned int credits) 4176{ 4177 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack; 4178 4179 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4180 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8)); 4181 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); 4182 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 4183 V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) | 4184 V_RX_CREDITS(credits)); 4185} 4186 4187void 4188t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx) 4189{ 4190 unsigned int wrlen; 4191 struct mbuf *m; 4192 struct work_request_hdr *wr; 4193 struct cpl_barrier *lock; 4194 struct cpl_set_tcb_field *req; 4195 struct cpl_get_tcb *getreq; 4196 struct ddp_state *p = &toep->tp_ddp_state; 4197 4198#if 0 4199 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4200#endif 4201 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) + 4202 sizeof(*getreq); 4203 m = m_gethdr_nofail(wrlen); 4204 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4205 wr = mtod(m, struct work_request_hdr *); 4206 bzero(wr, wrlen); 4207 4208 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4209 m->m_pkthdr.len = m->m_len = wrlen; 4210 4211 lock = (struct cpl_barrier *)(wr + 1); 4212 mk_cpl_barrier_ulp(lock); 4213 4214 req = (struct cpl_set_tcb_field *)(lock + 1); 4215 4216 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx); 4217 4218 /* Hmmm, not sure if this actually a good thing: reactivating 4219 * the other buffer might be an issue if it has been completed 4220 * already. However, that is unlikely, since the fact that the UBUF 4221 * is not completed indicates that there is no oustanding data. 4222 */ 4223 if (bufidx == 0) 4224 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4225 V_TF_DDP_ACTIVE_BUF(1) | 4226 V_TF_DDP_BUF0_VALID(1), 4227 V_TF_DDP_ACTIVE_BUF(1)); 4228 else 4229 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4230 V_TF_DDP_ACTIVE_BUF(1) | 4231 V_TF_DDP_BUF1_VALID(1), 0); 4232 4233 getreq = (struct cpl_get_tcb *)(req + 1); 4234 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4235 4236 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1)); 4237 4238 /* Keep track of the number of oustanding CPL_GET_TCB requests 4239 */ 4240 p->get_tcb_count++; 4241 4242#ifdef T3_TRACE 4243 T3_TRACE1(TIDTB(so), 4244 "t3_cancel_ddpbuf: bufidx %u", bufidx); 4245#endif 4246 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4247} 4248 4249/** 4250 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one 4251 * @sk: the socket associated with the buffers 4252 * @bufidx: index of HW DDP buffer (0 or 1) 4253 * @tag0: new tag for HW buffer 0 4254 * @tag1: new tag for HW buffer 1 4255 * @len: new length for HW buf @bufidx 4256 * 4257 * Sends a compound WR to overlay a new DDP buffer on top of an existing 4258 * buffer by changing the buffer tag and length and setting the valid and 4259 * active flag accordingly. The caller must ensure the new buffer is at 4260 * least as big as the existing one. Since we typically reprogram both HW 4261 * buffers this function sets both tags for convenience. Read the TCB to 4262 * determine how made data was written into the buffer before the overlay 4263 * took place. 4264 */ 4265void 4266t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0, 4267 unsigned int tag1, unsigned int len) 4268{ 4269 unsigned int wrlen; 4270 struct mbuf *m; 4271 struct work_request_hdr *wr; 4272 struct cpl_get_tcb *getreq; 4273 struct cpl_set_tcb_field *req; 4274 struct ddp_state *p = &toep->tp_ddp_state; 4275 4276 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)", 4277 bufidx, tag0, tag1, len); 4278#if 0 4279 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4280#endif 4281 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq); 4282 m = m_gethdr_nofail(wrlen); 4283 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4284 wr = mtod(m, struct work_request_hdr *); 4285 m->m_pkthdr.len = m->m_len = wrlen; 4286 bzero(wr, wrlen); 4287 4288 4289 /* Set the ATOMIC flag to make sure that TP processes the following 4290 * CPLs in an atomic manner and no wire segments can be interleaved. 4291 */ 4292 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); 4293 req = (struct cpl_set_tcb_field *)(wr + 1); 4294 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG, 4295 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) | 4296 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32, 4297 V_TCB_RX_DDP_BUF0_TAG(tag0) | 4298 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32); 4299 req++; 4300 if (bufidx == 0) { 4301 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN, 4302 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4303 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 4304 req++; 4305 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4306 V_TF_DDP_PUSH_DISABLE_0(1) | 4307 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4308 V_TF_DDP_PUSH_DISABLE_0(0) | 4309 V_TF_DDP_BUF0_VALID(1)); 4310 } else { 4311 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN, 4312 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN), 4313 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len)); 4314 req++; 4315 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4316 V_TF_DDP_PUSH_DISABLE_1(1) | 4317 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4318 V_TF_DDP_PUSH_DISABLE_1(0) | 4319 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1)); 4320 } 4321 4322 getreq = (struct cpl_get_tcb *)(req + 1); 4323 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4324 4325 /* Keep track of the number of oustanding CPL_GET_TCB requests 4326 */ 4327 p->get_tcb_count++; 4328 4329#ifdef T3_TRACE 4330 T3_TRACE4(TIDTB(sk), 4331 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u " 4332 "len %d", 4333 bufidx, tag0, tag1, len); 4334#endif 4335 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4336} 4337 4338/* 4339 * Sends a compound WR containing all the CPL messages needed to program the 4340 * two HW DDP buffers, namely optionally setting up the length and offset of 4341 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK. 4342 */ 4343void 4344t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0, 4345 unsigned int len1, unsigned int offset1, 4346 uint64_t ddp_flags, uint64_t flag_mask, int modulate) 4347{ 4348 unsigned int wrlen; 4349 struct mbuf *m; 4350 struct work_request_hdr *wr; 4351 struct cpl_set_tcb_field *req; 4352 4353 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ", 4354 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff); 4355 4356#if 0 4357 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4358#endif 4359 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + 4360 (len1 ? sizeof(*req) : 0) + 4361 (modulate ? sizeof(struct cpl_rx_data_ack) : 0); 4362 m = m_gethdr_nofail(wrlen); 4363 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4364 wr = mtod(m, struct work_request_hdr *); 4365 bzero(wr, wrlen); 4366 4367 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4368 m->m_pkthdr.len = m->m_len = wrlen; 4369 4370 req = (struct cpl_set_tcb_field *)(wr + 1); 4371 if (len0) { /* program buffer 0 offset and length */ 4372 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET, 4373 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 4374 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4375 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) | 4376 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0)); 4377 req++; 4378 } 4379 if (len1) { /* program buffer 1 offset and length */ 4380 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET, 4381 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 4382 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32, 4383 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) | 4384 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32); 4385 req++; 4386 } 4387 4388 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask, 4389 ddp_flags); 4390 4391 if (modulate) { 4392 mk_rx_data_ack_ulp(toep, 4393 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid, 4394 toep->tp_copied_seq - toep->tp_rcv_wup); 4395 toep->tp_rcv_wup = toep->tp_copied_seq; 4396 } 4397 4398#ifdef T3_TRACE 4399 T3_TRACE5(TIDTB(sk), 4400 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x " 4401 "modulate %d", 4402 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff, 4403 modulate); 4404#endif 4405 4406 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4407} 4408 4409void 4410t3_init_wr_tab(unsigned int wr_len) 4411{ 4412 int i; 4413 4414 if (mbuf_wrs[1]) /* already initialized */ 4415 return; 4416 4417 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) { 4418 int sgl_len = (3 * i) / 2 + (i & 1); 4419 4420 sgl_len += 3; 4421 mbuf_wrs[i] = sgl_len <= wr_len ? 4422 1 : 1 + (sgl_len - 2) / (wr_len - 1); 4423 } 4424 4425 wrlen = wr_len * 8; 4426} 4427 4428int 4429t3_init_cpl_io(void) 4430{ 4431#ifdef notyet 4432 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL); 4433 if (!tcphdr_skb) { 4434 log(LOG_ERR, 4435 "Chelsio TCP offload: can't allocate sk_buff\n"); 4436 return -1; 4437 } 4438 skb_put(tcphdr_skb, sizeof(struct tcphdr)); 4439 tcphdr_skb->h.raw = tcphdr_skb->data; 4440 memset(tcphdr_skb->data, 0, tcphdr_skb->len); 4441#endif 4442 4443 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); 4444 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); 4445 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack); 4446 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data); 4447 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 4448 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 4449 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish); 4450 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 4451 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 4452 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 4453 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp); 4454 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); 4455 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify); 4456 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt); 4457 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); 4458 return (0); 4459} 4460 4461