cxgb_cpl_io.c revision 181011
1/************************************************************************** 2 3Copyright (c) 2007-2008, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c 181011 2008-07-30 20:08:34Z kmacy $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/fcntl.h> 36#include <sys/kernel.h> 37#include <sys/limits.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/mbuf.h> 41#include <sys/mutex.h> 42#include <sys/sockbuf.h> 43#include <sys/sockstate.h> 44#include <sys/sockopt.h> 45#include <sys/socket.h> 46#include <sys/sysctl.h> 47#include <sys/syslog.h> 48#include <sys/protosw.h> 49#include <sys/priv.h> 50 51#include <net/if.h> 52#include <net/route.h> 53 54#include <netinet/in.h> 55#include <netinet/in_pcb.h> 56#include <netinet/in_systm.h> 57#include <netinet/in_var.h> 58 59 60#include <dev/cxgb/cxgb_osdep.h> 61#include <dev/cxgb/sys/mbufq.h> 62 63#include <netinet/ip.h> 64#include <netinet/tcp_var.h> 65#include <netinet/tcp_fsm.h> 66#include <netinet/tcp_offload.h> 67#include <netinet/tcp_seq.h> 68#include <netinet/tcp_syncache.h> 69#include <netinet/tcp_timer.h> 70#include <net/route.h> 71 72#include <dev/cxgb/t3cdev.h> 73#include <dev/cxgb/common/cxgb_firmware_exports.h> 74#include <dev/cxgb/common/cxgb_t3_cpl.h> 75#include <dev/cxgb/common/cxgb_tcb.h> 76#include <dev/cxgb/common/cxgb_ctl_defs.h> 77#include <dev/cxgb/cxgb_offload.h> 78#include <vm/vm.h> 79#include <vm/pmap.h> 80#include <machine/bus.h> 81#include <dev/cxgb/sys/mvec.h> 82#include <dev/cxgb/ulp/toecore/cxgb_toedev.h> 83#include <dev/cxgb/ulp/tom/cxgb_defs.h> 84#include <dev/cxgb/ulp/tom/cxgb_tom.h> 85#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h> 86#include <dev/cxgb/ulp/tom/cxgb_toepcb.h> 87#include <dev/cxgb/ulp/tom/cxgb_tcp.h> 88 89#include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h> 90 91/* 92 * For ULP connections HW may add headers, e.g., for digests, that aren't part 93 * of the messages sent by the host but that are part of the TCP payload and 94 * therefore consume TCP sequence space. Tx connection parameters that 95 * operate in TCP sequence space are affected by the HW additions and need to 96 * compensate for them to accurately track TCP sequence numbers. This array 97 * contains the compensating extra lengths for ULP packets. It is indexed by 98 * a packet's ULP submode. 99 */ 100const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8}; 101 102#ifdef notyet 103/* 104 * This sk_buff holds a fake header-only TCP segment that we use whenever we 105 * need to exploit SW TCP functionality that expects TCP headers, such as 106 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple 107 * CPUs without locking. 108 */ 109static struct mbuf *tcphdr_mbuf __read_mostly; 110#endif 111 112/* 113 * Size of WRs in bytes. Note that we assume all devices we are handling have 114 * the same WR size. 115 */ 116static unsigned int wrlen __read_mostly; 117 118/* 119 * The number of WRs needed for an skb depends on the number of page fragments 120 * in the skb and whether it has any payload in its main body. This maps the 121 * length of the gather list represented by an skb into the # of necessary WRs. 122 */ 123static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly; 124 125/* 126 * Max receive window supported by HW in bytes. Only a small part of it can 127 * be set through option0, the rest needs to be set through RX_DATA_ACK. 128 */ 129#define MAX_RCV_WND ((1U << 27) - 1) 130 131/* 132 * Min receive window. We want it to be large enough to accommodate receive 133 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. 134 */ 135#define MIN_RCV_WND (24 * 1024U) 136#define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS) 137 138#define VALIDATE_SEQ 0 139#define VALIDATE_SOCK(so) 140#define DEBUG_WR 0 141 142#define TCP_TIMEWAIT 1 143#define TCP_CLOSE 2 144#define TCP_DROP 3 145 146extern int tcp_do_autorcvbuf; 147extern int tcp_do_autosndbuf; 148extern int tcp_autorcvbuf_max; 149extern int tcp_autosndbuf_max; 150 151static void t3_send_reset(struct toepcb *toep); 152static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status); 153static inline void free_atid(struct t3cdev *cdev, unsigned int tid); 154static void handle_syncache_event(int event, void *arg); 155 156static inline void 157SBAPPEND(struct sockbuf *sb, struct mbuf *n) 158{ 159 struct mbuf *m; 160 161 m = sb->sb_mb; 162 while (m) { 163 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 164 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 165 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 166 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 167 m->m_next, m->m_nextpkt, m->m_flags)); 168 m = m->m_next; 169 } 170 m = n; 171 while (m) { 172 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 173 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 174 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 175 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 176 m->m_next, m->m_nextpkt, m->m_flags)); 177 m = m->m_next; 178 } 179 KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set")); 180 sbappendstream_locked(sb, n); 181 m = sb->sb_mb; 182 183 while (m) { 184 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 185 m->m_next, m->m_nextpkt, m->m_flags)); 186 m = m->m_next; 187 } 188} 189 190static inline int 191is_t3a(const struct toedev *dev) 192{ 193 return (dev->tod_ttid == TOE_ID_CHELSIO_T3); 194} 195 196static void 197dump_toepcb(struct toepcb *toep) 198{ 199 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n", 200 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode, 201 toep->tp_mtu_idx, toep->tp_tid); 202 203 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n", 204 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, 205 toep->tp_mss_clamp, toep->tp_flags); 206} 207 208#ifndef RTALLOC2_DEFINED 209static struct rtentry * 210rtalloc2(struct sockaddr *dst, int report, u_long ignflags) 211{ 212 struct rtentry *rt = NULL; 213 214 if ((rt = rtalloc1(dst, report, ignflags)) != NULL) 215 RT_UNLOCK(rt); 216 217 return (rt); 218} 219#endif 220 221/* 222 * Determine whether to send a CPL message now or defer it. A message is 223 * deferred if the connection is in SYN_SENT since we don't know the TID yet. 224 * For connections in other states the message is sent immediately. 225 * If through_l2t is set the message is subject to ARP processing, otherwise 226 * it is sent directly. 227 */ 228static inline void 229send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t) 230{ 231 struct tcpcb *tp = toep->tp_tp; 232 233 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) { 234 inp_wlock(tp->t_inpcb); 235 mbufq_tail(&toep->out_of_order_queue, m); // defer 236 inp_wunlock(tp->t_inpcb); 237 } else if (through_l2t) 238 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T 239 else 240 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly 241} 242 243static inline unsigned int 244mkprio(unsigned int cntrl, const struct toepcb *toep) 245{ 246 return (cntrl); 247} 248 249/* 250 * Populate a TID_RELEASE WR. The skb must be already propely sized. 251 */ 252static inline void 253mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid) 254{ 255 struct cpl_tid_release *req; 256 257 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep)); 258 m->m_pkthdr.len = m->m_len = sizeof(*req); 259 req = mtod(m, struct cpl_tid_release *); 260 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 261 req->wr.wr_lo = 0; 262 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); 263} 264 265static inline void 266make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) 267{ 268 struct tcpcb *tp = so_sototcpcb(so); 269 struct toepcb *toep = tp->t_toe; 270 struct tx_data_wr *req; 271 struct sockbuf *snd; 272 273 inp_lock_assert(tp->t_inpcb); 274 snd = so_sockbuf_snd(so); 275 276 req = mtod(m, struct tx_data_wr *); 277 m->m_len = sizeof(*req); 278 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 279 req->wr_lo = htonl(V_WR_TID(toep->tp_tid)); 280 /* len includes the length of any HW ULP additions */ 281 req->len = htonl(len); 282 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx)); 283 /* V_TX_ULP_SUBMODE sets both the mode and submode */ 284 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) | 285 V_TX_URG(/* skb_urgent(skb) */ 0 ) | 286 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) && 287 (tail ? 0 : 1)))); 288 req->sndseq = htonl(tp->snd_nxt); 289 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) { 290 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | 291 V_TX_CPU_IDX(toep->tp_qset)); 292 293 /* Sendbuffer is in units of 32KB. 294 */ 295 if (tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE) 296 req->param |= htonl(V_TX_SNDBUF(tcp_autosndbuf_max >> 15)); 297 else { 298 req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15)); 299 } 300 301 toep->tp_flags |= TP_DATASENT; 302 } 303} 304 305#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */ 306 307int 308t3_push_frames(struct socket *so, int req_completion) 309{ 310 struct tcpcb *tp = so_sototcpcb(so); 311 struct toepcb *toep = tp->t_toe; 312 313 struct mbuf *tail, *m0, *last; 314 struct t3cdev *cdev; 315 struct tom_data *d; 316 int state, bytes, count, total_bytes; 317 bus_dma_segment_t segs[TX_MAX_SEGS], *segp; 318 struct sockbuf *snd; 319 320 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) { 321 DPRINTF("tcp state=%d\n", tp->t_state); 322 return (0); 323 } 324 325 state = so_state_get(so); 326 327 if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { 328 DPRINTF("disconnecting\n"); 329 330 return (0); 331 } 332 333 inp_lock_assert(tp->t_inpcb); 334 335 snd = so_sockbuf_snd(so); 336 sockbuf_lock(snd); 337 338 d = TOM_DATA(toep->tp_toedev); 339 cdev = d->cdev; 340 341 last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb; 342 343 total_bytes = 0; 344 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n", 345 toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last); 346 347 if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) { 348 KASSERT(tail, ("sbdrop error")); 349 last = tail = tail->m_next; 350 } 351 352 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) { 353 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail); 354 sockbuf_unlock(snd); 355 356 return (0); 357 } 358 359 toep->tp_m_last = NULL; 360 while (toep->tp_wr_avail && (tail != NULL)) { 361 count = bytes = 0; 362 segp = segs; 363 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) { 364 sockbuf_unlock(snd); 365 return (0); 366 } 367 /* 368 * If the data in tail fits as in-line, then 369 * make an immediate data wr. 370 */ 371 if (tail->m_len <= IMM_LEN) { 372 count = 1; 373 bytes = tail->m_len; 374 last = tail; 375 tail = tail->m_next; 376 m_set_sgl(m0, NULL); 377 m_set_sgllen(m0, 0); 378 make_tx_data_wr(so, m0, bytes, tail); 379 m_append(m0, bytes, mtod(last, caddr_t)); 380 KASSERT(!m0->m_next, ("bad append")); 381 } else { 382 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) 383 && (tail != NULL) && (count < TX_MAX_SEGS-1)) { 384 bytes += tail->m_len; 385 last = tail; 386 count++; 387 /* 388 * technically an abuse to be using this for a VA 389 * but less gross than defining my own structure 390 * or calling pmap_kextract from here :-| 391 */ 392 segp->ds_addr = (bus_addr_t)tail->m_data; 393 segp->ds_len = tail->m_len; 394 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n", 395 count, mbuf_wrs[count], tail->m_data, tail->m_len); 396 segp++; 397 tail = tail->m_next; 398 } 399 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n", 400 toep->tp_wr_avail, count, mbuf_wrs[count], tail); 401 402 m_set_sgl(m0, segs); 403 m_set_sgllen(m0, count); 404 make_tx_data_wr(so, m0, bytes, tail); 405 } 406 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep)); 407 408 if (tail) { 409 snd->sb_sndptr = tail; 410 toep->tp_m_last = NULL; 411 } else 412 toep->tp_m_last = snd->sb_sndptr = last; 413 414 415 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last); 416 417 snd->sb_sndptroff += bytes; 418 total_bytes += bytes; 419 toep->tp_write_seq += bytes; 420 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d" 421 " tail=%p sndptr=%p sndptroff=%d", 422 toep->tp_wr_avail, count, mbuf_wrs[count], 423 tail, snd->sb_sndptr, snd->sb_sndptroff); 424 if (tail) 425 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d" 426 " tp_m_last=%p tailbuf=%p snd_una=0x%08x", 427 total_bytes, toep->tp_m_last, tail->m_data, 428 tp->snd_una); 429 else 430 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d" 431 " tp_m_last=%p snd_una=0x%08x", 432 total_bytes, toep->tp_m_last, tp->snd_una); 433 434 435#ifdef KTR 436{ 437 int i; 438 439 i = 0; 440 while (i < count && m_get_sgllen(m0)) { 441 if ((count - i) >= 3) { 442 CTR6(KTR_TOM, 443 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx" 444 " len=%d pa=0x%zx len=%d", 445 segs[i].ds_addr, segs[i].ds_len, 446 segs[i + 1].ds_addr, segs[i + 1].ds_len, 447 segs[i + 2].ds_addr, segs[i + 2].ds_len); 448 i += 3; 449 } else if ((count - i) == 2) { 450 CTR4(KTR_TOM, 451 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx" 452 " len=%d", 453 segs[i].ds_addr, segs[i].ds_len, 454 segs[i + 1].ds_addr, segs[i + 1].ds_len); 455 i += 2; 456 } else { 457 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d", 458 segs[i].ds_addr, segs[i].ds_len); 459 i++; 460 } 461 462 } 463} 464#endif 465 /* 466 * remember credits used 467 */ 468 m0->m_pkthdr.csum_data = mbuf_wrs[count]; 469 m0->m_pkthdr.len = bytes; 470 toep->tp_wr_avail -= mbuf_wrs[count]; 471 toep->tp_wr_unacked += mbuf_wrs[count]; 472 473 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) || 474 toep->tp_wr_unacked >= toep->tp_wr_max / 2) { 475 struct work_request_hdr *wr = cplhdr(m0); 476 477 wr->wr_hi |= htonl(F_WR_COMPL); 478 toep->tp_wr_unacked = 0; 479 } 480 KASSERT((m0->m_pkthdr.csum_data > 0) && 481 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d", 482 m0->m_pkthdr.csum_data)); 483 m0->m_type = MT_DONTFREE; 484 enqueue_wr(toep, m0); 485 DPRINTF("sending offload tx with %d bytes in %d segments\n", 486 bytes, count); 487 l2t_send(cdev, m0, toep->tp_l2t); 488 } 489 sockbuf_unlock(snd); 490 return (total_bytes); 491} 492 493/* 494 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail 495 * under any circumstances. We take the easy way out and always queue the 496 * message to the write_queue. We can optimize the case where the queue is 497 * already empty though the optimization is probably not worth it. 498 */ 499static void 500close_conn(struct socket *so) 501{ 502 struct mbuf *m; 503 struct cpl_close_con_req *req; 504 struct tom_data *d; 505 struct inpcb *inp = so_sotoinpcb(so); 506 struct tcpcb *tp; 507 struct toepcb *toep; 508 unsigned int tid; 509 510 511 inp_wlock(inp); 512 tp = so_sototcpcb(so); 513 toep = tp->t_toe; 514 515 if (tp->t_state != TCPS_SYN_SENT) 516 t3_push_frames(so, 1); 517 518 if (toep->tp_flags & TP_FIN_SENT) { 519 inp_wunlock(inp); 520 return; 521 } 522 523 tid = toep->tp_tid; 524 525 d = TOM_DATA(toep->tp_toedev); 526 527 m = m_gethdr_nofail(sizeof(*req)); 528 m_set_priority(m, CPL_PRIORITY_DATA); 529 m_set_sgl(m, NULL); 530 m_set_sgllen(m, 0); 531 532 toep->tp_flags |= TP_FIN_SENT; 533 req = mtod(m, struct cpl_close_con_req *); 534 535 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); 536 req->wr.wr_lo = htonl(V_WR_TID(tid)); 537 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 538 req->rsvd = 0; 539 inp_wunlock(inp); 540 /* 541 * XXX - need to defer shutdown while there is still data in the queue 542 * 543 */ 544 CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid); 545 cxgb_ofld_send(d->cdev, m); 546 547} 548 549/* 550 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant 551 * and send it along. 552 */ 553static void 554abort_arp_failure(struct t3cdev *cdev, struct mbuf *m) 555{ 556 struct cpl_abort_req *req = cplhdr(m); 557 558 req->cmd = CPL_ABORT_NO_RST; 559 cxgb_ofld_send(cdev, m); 560} 561 562/* 563 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are 564 * permitted to return without sending the message in case we cannot allocate 565 * an sk_buff. Returns the number of credits sent. 566 */ 567uint32_t 568t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail) 569{ 570 struct mbuf *m; 571 struct cpl_rx_data_ack *req; 572 struct toepcb *toep = tp->t_toe; 573 struct toedev *tdev = toep->tp_toedev; 574 575 m = m_gethdr_nofail(sizeof(*req)); 576 577 DPRINTF("returning %u credits to HW\n", credits); 578 579 req = mtod(m, struct cpl_rx_data_ack *); 580 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 581 req->wr.wr_lo = 0; 582 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 583 req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); 584 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep)); 585 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 586 return (credits); 587} 588 589/* 590 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled. 591 * This is only used in DDP mode, so we take the opportunity to also set the 592 * DACK mode and flush any Rx credits. 593 */ 594void 595t3_send_rx_modulate(struct toepcb *toep) 596{ 597 struct mbuf *m; 598 struct cpl_rx_data_ack *req; 599 600 m = m_gethdr_nofail(sizeof(*req)); 601 602 req = mtod(m, struct cpl_rx_data_ack *); 603 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 604 req->wr.wr_lo = 0; 605 m->m_pkthdr.len = m->m_len = sizeof(*req); 606 607 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 608 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 609 V_RX_DACK_MODE(1) | 610 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup)); 611 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 612 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 613 toep->tp_rcv_wup = toep->tp_copied_seq; 614} 615 616/* 617 * Handle receipt of an urgent pointer. 618 */ 619static void 620handle_urg_ptr(struct socket *so, uint32_t urg_seq) 621{ 622#ifdef URGENT_DATA_SUPPORTED 623 struct tcpcb *tp = so_sototcpcb(so); 624 625 urg_seq--; /* initially points past the urgent data, per BSD */ 626 627 if (tp->urg_data && !after(urg_seq, tp->urg_seq)) 628 return; /* duplicate pointer */ 629 sk_send_sigurg(sk); 630 if (tp->urg_seq == tp->copied_seq && tp->urg_data && 631 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) { 632 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 633 634 tp->copied_seq++; 635 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len) 636 tom_eat_skb(sk, skb, 0); 637 } 638 tp->urg_data = TCP_URG_NOTYET; 639 tp->urg_seq = urg_seq; 640#endif 641} 642 643/* 644 * Returns true if a socket cannot accept new Rx data. 645 */ 646static inline int 647so_no_receive(const struct socket *so) 648{ 649 return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING)); 650} 651 652/* 653 * Process an urgent data notification. 654 */ 655static void 656rx_urg_notify(struct toepcb *toep, struct mbuf *m) 657{ 658 struct cpl_rx_urg_notify *hdr = cplhdr(m); 659 struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 660 661 VALIDATE_SOCK(so); 662 663 if (!so_no_receive(so)) 664 handle_urg_ptr(so, ntohl(hdr->seq)); 665 666 m_freem(m); 667} 668 669/* 670 * Handler for RX_URG_NOTIFY CPL messages. 671 */ 672static int 673do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx) 674{ 675 struct toepcb *toep = (struct toepcb *)ctx; 676 677 rx_urg_notify(toep, m); 678 return (0); 679} 680 681static __inline int 682is_delack_mode_valid(struct toedev *dev, struct toepcb *toep) 683{ 684 return (toep->tp_ulp_mode || 685 (toep->tp_ulp_mode == ULP_MODE_TCPDDP && 686 dev->tod_ttid >= TOE_ID_CHELSIO_T3)); 687} 688 689/* 690 * Set of states for which we should return RX credits. 691 */ 692#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2) 693 694/* 695 * Called after some received data has been read. It returns RX credits 696 * to the HW for the amount of data processed. 697 */ 698void 699t3_cleanup_rbuf(struct tcpcb *tp, int copied) 700{ 701 struct toepcb *toep = tp->t_toe; 702 struct socket *so; 703 struct toedev *dev; 704 int dack_mode, must_send, read; 705 u32 thres, credits, dack = 0; 706 struct sockbuf *rcv; 707 708 so = inp_inpcbtosocket(tp->t_inpcb); 709 rcv = so_sockbuf_rcv(so); 710 711 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || 712 (tp->t_state == TCPS_FIN_WAIT_2))) { 713 if (copied) { 714 sockbuf_lock(rcv); 715 toep->tp_copied_seq += copied; 716 sockbuf_unlock(rcv); 717 } 718 719 return; 720 } 721 722 inp_lock_assert(tp->t_inpcb); 723 724 sockbuf_lock(rcv); 725 if (copied) 726 toep->tp_copied_seq += copied; 727 else { 728 read = toep->tp_enqueued_bytes - rcv->sb_cc; 729 toep->tp_copied_seq += read; 730 } 731 credits = toep->tp_copied_seq - toep->tp_rcv_wup; 732 toep->tp_enqueued_bytes = rcv->sb_cc; 733 sockbuf_unlock(rcv); 734 735 if (credits > rcv->sb_mbmax) { 736 log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n", 737 toep->tp_copied_seq, toep->tp_rcv_wup, credits); 738 credits = rcv->sb_mbmax; 739 } 740 741 742 /* 743 * XXX this won't accurately reflect credit return - we need 744 * to look at the difference between the amount that has been 745 * put in the recv sockbuf and what is there now 746 */ 747 748 if (__predict_false(!credits)) 749 return; 750 751 dev = toep->tp_toedev; 752 thres = TOM_TUNABLE(dev, rx_credit_thres); 753 754 if (__predict_false(thres == 0)) 755 return; 756 757 if (is_delack_mode_valid(dev, toep)) { 758 dack_mode = TOM_TUNABLE(dev, delack); 759 if (__predict_false(dack_mode != toep->tp_delack_mode)) { 760 u32 r = tp->rcv_nxt - toep->tp_delack_seq; 761 762 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp) 763 dack = F_RX_DACK_CHANGE | 764 V_RX_DACK_MODE(dack_mode); 765 } 766 } else 767 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 768 769 /* 770 * For coalescing to work effectively ensure the receive window has 771 * at least 16KB left. 772 */ 773 must_send = credits + 16384 >= tp->rcv_wnd; 774 775 if (must_send || credits >= thres) 776 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send); 777} 778 779static int 780cxgb_toe_disconnect(struct tcpcb *tp) 781{ 782 struct socket *so; 783 784 DPRINTF("cxgb_toe_disconnect\n"); 785 786 so = inp_inpcbtosocket(tp->t_inpcb); 787 close_conn(so); 788 return (0); 789} 790 791static int 792cxgb_toe_reset(struct tcpcb *tp) 793{ 794 struct toepcb *toep = tp->t_toe; 795 796 t3_send_reset(toep); 797 798 /* 799 * unhook from socket 800 */ 801 tp->t_flags &= ~TF_TOE; 802 toep->tp_tp = NULL; 803 tp->t_toe = NULL; 804 return (0); 805} 806 807static int 808cxgb_toe_send(struct tcpcb *tp) 809{ 810 struct socket *so; 811 812 DPRINTF("cxgb_toe_send\n"); 813 dump_toepcb(tp->t_toe); 814 815 so = inp_inpcbtosocket(tp->t_inpcb); 816 t3_push_frames(so, 1); 817 return (0); 818} 819 820static int 821cxgb_toe_rcvd(struct tcpcb *tp) 822{ 823 824 inp_lock_assert(tp->t_inpcb); 825 826 t3_cleanup_rbuf(tp, 0); 827 828 return (0); 829} 830 831static void 832cxgb_toe_detach(struct tcpcb *tp) 833{ 834 struct toepcb *toep; 835 836 /* 837 * XXX how do we handle teardown in the SYN_SENT state? 838 * 839 */ 840 inp_lock_assert(tp->t_inpcb); 841 toep = tp->t_toe; 842 toep->tp_tp = NULL; 843 844 /* 845 * unhook from socket 846 */ 847 tp->t_flags &= ~TF_TOE; 848 tp->t_toe = NULL; 849} 850 851 852static struct toe_usrreqs cxgb_toe_usrreqs = { 853 .tu_disconnect = cxgb_toe_disconnect, 854 .tu_reset = cxgb_toe_reset, 855 .tu_send = cxgb_toe_send, 856 .tu_rcvd = cxgb_toe_rcvd, 857 .tu_detach = cxgb_toe_detach, 858 .tu_detach = cxgb_toe_detach, 859 .tu_syncache_event = handle_syncache_event, 860}; 861 862 863static void 864__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word, 865 uint64_t mask, uint64_t val, int no_reply) 866{ 867 struct cpl_set_tcb_field *req; 868 869 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 870 toep->tp_tid, word, mask, val); 871 872 req = mtod(m, struct cpl_set_tcb_field *); 873 m->m_pkthdr.len = m->m_len = sizeof(*req); 874 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 875 req->wr.wr_lo = 0; 876 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid)); 877 req->reply = V_NO_REPLY(no_reply); 878 req->cpu_idx = 0; 879 req->word = htons(word); 880 req->mask = htobe64(mask); 881 req->val = htobe64(val); 882 883 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 884 send_or_defer(toep, m, 0); 885} 886 887static void 888t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val) 889{ 890 struct mbuf *m; 891 struct tcpcb *tp = toep->tp_tp; 892 893 if (toep == NULL) 894 return; 895 896 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) { 897 printf("not seting field\n"); 898 return; 899 } 900 901 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field)); 902 903 __set_tcb_field(toep, m, word, mask, val, 1); 904} 905 906/* 907 * Set one of the t_flags bits in the TCB. 908 */ 909static void 910set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val) 911{ 912 913 t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); 914} 915 916/* 917 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting. 918 */ 919static void 920t3_set_nagle(struct toepcb *toep) 921{ 922 struct tcpcb *tp = toep->tp_tp; 923 924 set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); 925} 926 927/* 928 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting. 929 */ 930void 931t3_set_keepalive(struct toepcb *toep, int on_off) 932{ 933 934 set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off); 935} 936 937void 938t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off) 939{ 940 set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off); 941} 942 943void 944t3_set_dack_mss(struct toepcb *toep, int on_off) 945{ 946 947 set_tcb_tflag(toep, S_TF_DACK_MSS, on_off); 948} 949 950/* 951 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting. 952 */ 953static void 954t3_set_tos(struct toepcb *toep) 955{ 956 int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb); 957 958 t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), 959 V_TCB_TOS(tos)); 960} 961 962 963/* 964 * In DDP mode, TP fails to schedule a timer to push RX data to the host when 965 * DDP is disabled (data is delivered to freelist). [Note that, the peer should 966 * set the PSH bit in the last segment, which would trigger delivery.] 967 * We work around the issue by setting a DDP buffer in a partial placed state, 968 * which guarantees that TP will schedule a timer. 969 */ 970#define TP_DDP_TIMER_WORKAROUND_MASK\ 971 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\ 972 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\ 973 V_TCB_RX_DDP_BUF0_LEN(3)) << 32)) 974#define TP_DDP_TIMER_WORKAROUND_VAL\ 975 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\ 976 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\ 977 32)) 978 979static void 980t3_enable_ddp(struct toepcb *toep, int on) 981{ 982 if (on) { 983 984 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), 985 V_TF_DDP_OFF(0)); 986 } else 987 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, 988 V_TF_DDP_OFF(1) | 989 TP_DDP_TIMER_WORKAROUND_MASK, 990 V_TF_DDP_OFF(1) | 991 TP_DDP_TIMER_WORKAROUND_VAL); 992 993} 994 995void 996t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color) 997{ 998 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx, 999 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), 1000 tag_color); 1001} 1002 1003void 1004t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset, 1005 unsigned int len) 1006{ 1007 if (buf_idx == 0) 1008 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET, 1009 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 1010 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 1011 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) | 1012 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 1013 else 1014 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET, 1015 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 1016 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32), 1017 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) | 1018 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32)); 1019} 1020 1021static int 1022t3_set_cong_control(struct socket *so, const char *name) 1023{ 1024#ifdef CONGESTION_CONTROL_SUPPORTED 1025 int cong_algo; 1026 1027 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++) 1028 if (!strcmp(name, t3_cong_ops[cong_algo].name)) 1029 break; 1030 1031 if (cong_algo >= ARRAY_SIZE(t3_cong_ops)) 1032 return -EINVAL; 1033#endif 1034 return 0; 1035} 1036 1037int 1038t3_get_tcb(struct toepcb *toep) 1039{ 1040 struct cpl_get_tcb *req; 1041 struct tcpcb *tp = toep->tp_tp; 1042 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 1043 1044 if (!m) 1045 return (ENOMEM); 1046 1047 inp_lock_assert(tp->t_inpcb); 1048 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 1049 req = mtod(m, struct cpl_get_tcb *); 1050 m->m_pkthdr.len = m->m_len = sizeof(*req); 1051 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1052 req->wr.wr_lo = 0; 1053 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid)); 1054 req->cpuno = htons(toep->tp_qset); 1055 req->rsvd = 0; 1056 if (tp->t_state == TCPS_SYN_SENT) 1057 mbufq_tail(&toep->out_of_order_queue, m); // defer 1058 else 1059 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 1060 return 0; 1061} 1062 1063static inline void 1064so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid) 1065{ 1066 1067 toepcb_hold(toep); 1068 1069 cxgb_insert_tid(d->cdev, d->client, toep, tid); 1070} 1071 1072/** 1073 * find_best_mtu - find the entry in the MTU table closest to an MTU 1074 * @d: TOM state 1075 * @mtu: the target MTU 1076 * 1077 * Returns the index of the value in the MTU table that is closest to but 1078 * does not exceed the target MTU. 1079 */ 1080static unsigned int 1081find_best_mtu(const struct t3c_data *d, unsigned short mtu) 1082{ 1083 int i = 0; 1084 1085 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) 1086 ++i; 1087 return (i); 1088} 1089 1090static unsigned int 1091select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu) 1092{ 1093 unsigned int idx; 1094 1095#ifdef notyet 1096 struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt; 1097#endif 1098 if (tp) { 1099 tp->t_maxseg = pmtu - 40; 1100 if (tp->t_maxseg < td->mtus[0] - 40) 1101 tp->t_maxseg = td->mtus[0] - 40; 1102 idx = find_best_mtu(td, tp->t_maxseg + 40); 1103 1104 tp->t_maxseg = td->mtus[idx] - 40; 1105 } else 1106 idx = find_best_mtu(td, pmtu); 1107 1108 return (idx); 1109} 1110 1111static inline void 1112free_atid(struct t3cdev *cdev, unsigned int tid) 1113{ 1114 struct toepcb *toep = cxgb_free_atid(cdev, tid); 1115 1116 if (toep) 1117 toepcb_release(toep); 1118} 1119 1120/* 1121 * Release resources held by an offload connection (TID, L2T entry, etc.) 1122 */ 1123static void 1124t3_release_offload_resources(struct toepcb *toep) 1125{ 1126 struct tcpcb *tp = toep->tp_tp; 1127 struct toedev *tdev = toep->tp_toedev; 1128 struct t3cdev *cdev; 1129 struct socket *so; 1130 unsigned int tid = toep->tp_tid; 1131 struct sockbuf *rcv; 1132 1133 CTR0(KTR_TOM, "t3_release_offload_resources"); 1134 1135 if (!tdev) 1136 return; 1137 1138 cdev = TOEP_T3C_DEV(toep); 1139 if (!cdev) 1140 return; 1141 1142 toep->tp_qset = 0; 1143 t3_release_ddp_resources(toep); 1144 1145#ifdef CTRL_SKB_CACHE 1146 kfree_skb(CTRL_SKB_CACHE(tp)); 1147 CTRL_SKB_CACHE(tp) = NULL; 1148#endif 1149 1150 if (toep->tp_wr_avail != toep->tp_wr_max) { 1151 purge_wr_queue(toep); 1152 reset_wr_list(toep); 1153 } 1154 1155 if (toep->tp_l2t) { 1156 l2t_release(L2DATA(cdev), toep->tp_l2t); 1157 toep->tp_l2t = NULL; 1158 } 1159 toep->tp_tp = NULL; 1160 if (tp) { 1161 inp_lock_assert(tp->t_inpcb); 1162 so = inp_inpcbtosocket(tp->t_inpcb); 1163 rcv = so_sockbuf_rcv(so); 1164 /* 1165 * cancel any offloaded reads 1166 * 1167 */ 1168 sockbuf_lock(rcv); 1169 tp->t_toe = NULL; 1170 tp->t_flags &= ~TF_TOE; 1171 if (toep->tp_ddp_state.user_ddp_pending) { 1172 t3_cancel_ubuf(toep, rcv); 1173 toep->tp_ddp_state.user_ddp_pending = 0; 1174 } 1175 so_sorwakeup_locked(so); 1176 1177 } 1178 1179 if (toep->tp_state == TCPS_SYN_SENT) { 1180 free_atid(cdev, tid); 1181#ifdef notyet 1182 __skb_queue_purge(&tp->out_of_order_queue); 1183#endif 1184 } else { // we have TID 1185 cxgb_remove_tid(cdev, toep, tid); 1186 toepcb_release(toep); 1187 } 1188#if 0 1189 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state); 1190#endif 1191} 1192 1193static void 1194install_offload_ops(struct socket *so) 1195{ 1196 struct tcpcb *tp = so_sototcpcb(so); 1197 1198 KASSERT(tp->t_toe != NULL, ("toepcb not set")); 1199 1200 t3_install_socket_ops(so); 1201 tp->t_flags |= TF_TOE; 1202 tp->t_tu = &cxgb_toe_usrreqs; 1203} 1204 1205/* 1206 * Determine the receive window scaling factor given a target max 1207 * receive window. 1208 */ 1209static __inline int 1210select_rcv_wscale(int space) 1211{ 1212 int wscale = 0; 1213 1214 if (space > MAX_RCV_WND) 1215 space = MAX_RCV_WND; 1216 1217 if (tcp_do_rfc1323) 1218 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ; 1219 1220 return (wscale); 1221} 1222 1223/* 1224 * Determine the receive window size for a socket. 1225 */ 1226static unsigned long 1227select_rcv_wnd(struct toedev *dev, struct socket *so) 1228{ 1229 struct tom_data *d = TOM_DATA(dev); 1230 unsigned int wnd; 1231 unsigned int max_rcv_wnd; 1232 struct sockbuf *rcv; 1233 1234 rcv = so_sockbuf_rcv(so); 1235 1236 if (tcp_do_autorcvbuf) 1237 wnd = tcp_autorcvbuf_max; 1238 else 1239 wnd = rcv->sb_hiwat; 1240 1241 1242 1243 /* XXX 1244 * For receive coalescing to work effectively we need a receive window 1245 * that can accomodate a coalesced segment. 1246 */ 1247 if (wnd < MIN_RCV_WND) 1248 wnd = MIN_RCV_WND; 1249 1250 /* PR 5138 */ 1251 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ? 1252 (uint32_t)d->rx_page_size * 23 : 1253 MAX_RCV_WND); 1254 1255 return min(wnd, max_rcv_wnd); 1256} 1257 1258/* 1259 * Assign offload parameters to some socket fields. This code is used by 1260 * both active and passive opens. 1261 */ 1262static inline void 1263init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, 1264 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep) 1265{ 1266 struct tcpcb *tp = so_sototcpcb(so); 1267 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev); 1268 struct sockbuf *snd, *rcv; 1269 1270#ifdef notyet 1271 SOCK_LOCK_ASSERT(so); 1272#endif 1273 1274 snd = so_sockbuf_snd(so); 1275 rcv = so_sockbuf_rcv(so); 1276 1277 log(LOG_INFO, "initializing offload socket\n"); 1278 /* 1279 * We either need to fix push frames to work with sbcompress 1280 * or we need to add this 1281 */ 1282 snd->sb_flags |= SB_NOCOALESCE; 1283 rcv->sb_flags |= SB_NOCOALESCE; 1284 1285 tp->t_toe = toep; 1286 toep->tp_tp = tp; 1287 toep->tp_toedev = dev; 1288 1289 toep->tp_tid = tid; 1290 toep->tp_l2t = e; 1291 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs); 1292 toep->tp_wr_unacked = 0; 1293 toep->tp_delack_mode = 0; 1294 1295 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu); 1296 /* 1297 * XXX broken 1298 * 1299 */ 1300 tp->rcv_wnd = select_rcv_wnd(dev, so); 1301 1302 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) && 1303 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 1304 toep->tp_qset_idx = 0; 1305 1306 reset_wr_list(toep); 1307 DPRINTF("initialization done\n"); 1308} 1309 1310/* 1311 * The next two functions calculate the option 0 value for a socket. 1312 */ 1313static inline unsigned int 1314calc_opt0h(struct socket *so, int mtu_idx) 1315{ 1316 struct tcpcb *tp = so_sototcpcb(so); 1317 int wscale = select_rcv_wscale(tp->rcv_wnd); 1318 1319 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) | 1320 V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS | 1321 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx); 1322} 1323 1324static inline unsigned int 1325calc_opt0l(struct socket *so, int ulp_mode) 1326{ 1327 struct tcpcb *tp = so_sototcpcb(so); 1328 unsigned int val; 1329 1330 val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) | 1331 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ)); 1332 1333 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val); 1334 return (val); 1335} 1336 1337static inline unsigned int 1338calc_opt2(const struct socket *so, struct toedev *dev) 1339{ 1340 int flv_valid; 1341 1342 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1); 1343 1344 return (V_FLAVORS_VALID(flv_valid) | 1345 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0)); 1346} 1347 1348#if DEBUG_WR > 1 1349static int 1350count_pending_wrs(const struct toepcb *toep) 1351{ 1352 const struct mbuf *m; 1353 int n = 0; 1354 1355 wr_queue_walk(toep, m) 1356 n += m->m_pkthdr.csum_data; 1357 return (n); 1358} 1359#endif 1360 1361#if 0 1362(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1) 1363#endif 1364 1365static void 1366mk_act_open_req(struct socket *so, struct mbuf *m, 1367 unsigned int atid, const struct l2t_entry *e) 1368{ 1369 struct cpl_act_open_req *req; 1370 struct inpcb *inp = so_sotoinpcb(so); 1371 struct tcpcb *tp = inp_inpcbtotcpcb(inp); 1372 struct toepcb *toep = tp->t_toe; 1373 struct toedev *tdev = toep->tp_toedev; 1374 1375 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep)); 1376 1377 req = mtod(m, struct cpl_act_open_req *); 1378 m->m_pkthdr.len = m->m_len = sizeof(*req); 1379 1380 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1381 req->wr.wr_lo = 0; 1382 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 1383 inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port); 1384#if 0 1385 req->local_port = inp->inp_lport; 1386 req->peer_port = inp->inp_fport; 1387 memcpy(&req->local_ip, &inp->inp_laddr, 4); 1388 memcpy(&req->peer_ip, &inp->inp_faddr, 4); 1389#endif 1390 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) | 1391 V_TX_CHANNEL(e->smt_idx)); 1392 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode)); 1393 req->params = 0; 1394 req->opt2 = htonl(calc_opt2(so, tdev)); 1395} 1396 1397 1398/* 1399 * Convert an ACT_OPEN_RPL status to an errno. 1400 */ 1401static int 1402act_open_rpl_status_to_errno(int status) 1403{ 1404 switch (status) { 1405 case CPL_ERR_CONN_RESET: 1406 return (ECONNREFUSED); 1407 case CPL_ERR_ARP_MISS: 1408 return (EHOSTUNREACH); 1409 case CPL_ERR_CONN_TIMEDOUT: 1410 return (ETIMEDOUT); 1411 case CPL_ERR_TCAM_FULL: 1412 return (ENOMEM); 1413 case CPL_ERR_CONN_EXIST: 1414 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); 1415 return (EADDRINUSE); 1416 default: 1417 return (EIO); 1418 } 1419} 1420 1421static void 1422fail_act_open(struct toepcb *toep, int errno) 1423{ 1424 struct tcpcb *tp = toep->tp_tp; 1425 1426 t3_release_offload_resources(toep); 1427 if (tp) { 1428 inp_wunlock(tp->t_inpcb); 1429 tcp_offload_drop(tp, errno); 1430 } 1431 1432#ifdef notyet 1433 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1434#endif 1435} 1436 1437/* 1438 * Handle active open failures. 1439 */ 1440static void 1441active_open_failed(struct toepcb *toep, struct mbuf *m) 1442{ 1443 struct cpl_act_open_rpl *rpl = cplhdr(m); 1444 struct inpcb *inp; 1445 1446 if (toep->tp_tp == NULL) 1447 goto done; 1448 1449 inp = toep->tp_tp->t_inpcb; 1450 1451/* 1452 * Don't handle connection retry for now 1453 */ 1454#ifdef notyet 1455 struct inet_connection_sock *icsk = inet_csk(sk); 1456 1457 if (rpl->status == CPL_ERR_CONN_EXIST && 1458 icsk->icsk_retransmit_timer.function != act_open_retry_timer) { 1459 icsk->icsk_retransmit_timer.function = act_open_retry_timer; 1460 sk_reset_timer(so, &icsk->icsk_retransmit_timer, 1461 jiffies + HZ / 2); 1462 } else 1463#endif 1464 { 1465 inp_wlock(inp); 1466 /* 1467 * drops the inpcb lock 1468 */ 1469 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status)); 1470 } 1471 1472 done: 1473 m_free(m); 1474} 1475 1476/* 1477 * Return whether a failed active open has allocated a TID 1478 */ 1479static inline int 1480act_open_has_tid(int status) 1481{ 1482 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && 1483 status != CPL_ERR_ARP_MISS; 1484} 1485 1486/* 1487 * Process an ACT_OPEN_RPL CPL message. 1488 */ 1489static int 1490do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1491{ 1492 struct toepcb *toep = (struct toepcb *)ctx; 1493 struct cpl_act_open_rpl *rpl = cplhdr(m); 1494 1495 if (cdev->type != T3A && act_open_has_tid(rpl->status)) 1496 cxgb_queue_tid_release(cdev, GET_TID(rpl)); 1497 1498 active_open_failed(toep, m); 1499 return (0); 1500} 1501 1502/* 1503 * Handle an ARP failure for an active open. XXX purge ofo queue 1504 * 1505 * XXX badly broken for crossed SYNs as the ATID is no longer valid. 1506 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should 1507 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't 1508 * free the atid. Hmm. 1509 */ 1510#ifdef notyet 1511static void 1512act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) 1513{ 1514 struct toepcb *toep = m_get_toep(m); 1515 struct tcpcb *tp = toep->tp_tp; 1516 struct inpcb *inp = tp->t_inpcb; 1517 struct socket *so; 1518 1519 inp_wlock(inp); 1520 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 1521 /* 1522 * drops the inpcb lock 1523 */ 1524 fail_act_open(so, EHOSTUNREACH); 1525 printf("freeing %p\n", m); 1526 1527 m_free(m); 1528 } else 1529 inp_wunlock(inp); 1530} 1531#endif 1532/* 1533 * Send an active open request. 1534 */ 1535int 1536t3_connect(struct toedev *tdev, struct socket *so, 1537 struct rtentry *rt, struct sockaddr *nam) 1538{ 1539 struct mbuf *m; 1540 struct l2t_entry *e; 1541 struct tom_data *d = TOM_DATA(tdev); 1542 struct inpcb *inp = so_sotoinpcb(so); 1543 struct tcpcb *tp = intotcpcb(inp); 1544 struct toepcb *toep; /* allocated by init_offload_socket */ 1545 1546 int atid; 1547 1548 toep = toepcb_alloc(); 1549 if (toep == NULL) 1550 goto out_err; 1551 1552 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0) 1553 goto out_err; 1554 1555 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam); 1556 if (!e) 1557 goto free_tid; 1558 1559 inp_lock_assert(inp); 1560 m = m_gethdr(MT_DATA, M_WAITOK); 1561 1562#if 0 1563 m->m_toe.mt_toepcb = tp->t_toe; 1564 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure); 1565#endif 1566 so_lock(so); 1567 1568 init_offload_socket(so, tdev, atid, e, rt, toep); 1569 1570 install_offload_ops(so); 1571 1572 mk_act_open_req(so, m, atid, e); 1573 so_unlock(so); 1574 1575 soisconnecting(so); 1576 toep = tp->t_toe; 1577 m_set_toep(m, tp->t_toe); 1578 1579 toep->tp_state = TCPS_SYN_SENT; 1580 l2t_send(d->cdev, (struct mbuf *)m, e); 1581 1582 if (toep->tp_ulp_mode) 1583 t3_enable_ddp(toep, 0); 1584 return (0); 1585 1586free_tid: 1587 printf("failing connect - free atid\n"); 1588 1589 free_atid(d->cdev, atid); 1590out_err: 1591 printf("return ENOMEM\n"); 1592 return (ENOMEM); 1593} 1594 1595/* 1596 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do 1597 * not send multiple ABORT_REQs for the same connection and also that we do 1598 * not try to send a message after the connection has closed. Returns 1 if 1599 * an ABORT_REQ wasn't generated after all, 0 otherwise. 1600 */ 1601static void 1602t3_send_reset(struct toepcb *toep) 1603{ 1604 1605 struct cpl_abort_req *req; 1606 unsigned int tid = toep->tp_tid; 1607 int mode = CPL_ABORT_SEND_RST; 1608 struct tcpcb *tp = toep->tp_tp; 1609 struct toedev *tdev = toep->tp_toedev; 1610 struct socket *so = NULL; 1611 struct mbuf *m; 1612 struct sockbuf *snd; 1613 1614 if (tp) { 1615 inp_lock_assert(tp->t_inpcb); 1616 so = inp_inpcbtosocket(tp->t_inpcb); 1617 } 1618 1619 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) || 1620 tdev == NULL)) 1621 return; 1622 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN); 1623 1624 snd = so_sockbuf_snd(so); 1625 /* Purge the send queue so we don't send anything after an abort. */ 1626 if (so) 1627 sbflush(snd); 1628 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev)) 1629 mode |= CPL_ABORT_POST_CLOSE_REQ; 1630 1631 m = m_gethdr_nofail(sizeof(*req)); 1632 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep)); 1633 set_arp_failure_handler(m, abort_arp_failure); 1634 1635 req = mtod(m, struct cpl_abort_req *); 1636 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); 1637 req->wr.wr_lo = htonl(V_WR_TID(tid)); 1638 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); 1639 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0; 1640 req->rsvd1 = !(toep->tp_flags & TP_DATASENT); 1641 req->cmd = mode; 1642 if (tp && (tp->t_state == TCPS_SYN_SENT)) 1643 mbufq_tail(&toep->out_of_order_queue, m); // defer 1644 else 1645 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); 1646} 1647 1648static int 1649t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) 1650{ 1651 struct inpcb *inp; 1652 int error, optval; 1653 1654 if (sopt->sopt_name == IP_OPTIONS) 1655 return (ENOPROTOOPT); 1656 1657 if (sopt->sopt_name != IP_TOS) 1658 return (EOPNOTSUPP); 1659 1660 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 1661 1662 if (error) 1663 return (error); 1664 1665 if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread)) 1666 return (EPERM); 1667 1668 inp = so_sotoinpcb(so); 1669 inp_wlock(inp); 1670 inp_ip_tos_set(inp, optval); 1671#if 0 1672 inp->inp_ip_tos = optval; 1673#endif 1674 t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe); 1675 inp_wunlock(inp); 1676 1677 return (0); 1678} 1679 1680static int 1681t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1682{ 1683 int err = 0; 1684 size_t copied; 1685 1686 if (sopt->sopt_name != TCP_CONGESTION && 1687 sopt->sopt_name != TCP_NODELAY) 1688 return (EOPNOTSUPP); 1689 1690 if (sopt->sopt_name == TCP_CONGESTION) { 1691 char name[TCP_CA_NAME_MAX]; 1692 int optlen = sopt->sopt_valsize; 1693 struct tcpcb *tp; 1694 1695 if (sopt->sopt_dir == SOPT_GET) { 1696 KASSERT(0, ("unimplemented")); 1697 return (EOPNOTSUPP); 1698 } 1699 1700 if (optlen < 1) 1701 return (EINVAL); 1702 1703 err = copyinstr(sopt->sopt_val, name, 1704 min(TCP_CA_NAME_MAX - 1, optlen), &copied); 1705 if (err) 1706 return (err); 1707 if (copied < 1) 1708 return (EINVAL); 1709 1710 tp = so_sototcpcb(so); 1711 /* 1712 * XXX I need to revisit this 1713 */ 1714 if ((err = t3_set_cong_control(so, name)) == 0) { 1715#ifdef CONGESTION_CONTROL_SUPPORTED 1716 tp->t_cong_control = strdup(name, M_CXGB); 1717#endif 1718 } else 1719 return (err); 1720 } else { 1721 int optval, oldval; 1722 struct inpcb *inp; 1723 struct tcpcb *tp; 1724 1725 if (sopt->sopt_dir == SOPT_GET) 1726 return (EOPNOTSUPP); 1727 1728 err = sooptcopyin(sopt, &optval, sizeof optval, 1729 sizeof optval); 1730 1731 if (err) 1732 return (err); 1733 1734 inp = so_sotoinpcb(so); 1735 tp = inp_inpcbtotcpcb(inp); 1736 1737 inp_wlock(inp); 1738 1739 oldval = tp->t_flags; 1740 if (optval) 1741 tp->t_flags |= TF_NODELAY; 1742 else 1743 tp->t_flags &= ~TF_NODELAY; 1744 inp_wunlock(inp); 1745 1746 1747 if (oldval != tp->t_flags && (tp->t_toe != NULL)) 1748 t3_set_nagle(tp->t_toe); 1749 1750 } 1751 1752 return (0); 1753} 1754 1755int 1756t3_ctloutput(struct socket *so, struct sockopt *sopt) 1757{ 1758 int err; 1759 1760 if (sopt->sopt_level != IPPROTO_TCP) 1761 err = t3_ip_ctloutput(so, sopt); 1762 else 1763 err = t3_tcp_ctloutput(so, sopt); 1764 1765 if (err != EOPNOTSUPP) 1766 return (err); 1767 1768 return (tcp_ctloutput(so, sopt)); 1769} 1770 1771/* 1772 * Returns true if we need to explicitly request RST when we receive new data 1773 * on an RX-closed connection. 1774 */ 1775static inline int 1776need_rst_on_excess_rx(const struct toepcb *toep) 1777{ 1778 return (1); 1779} 1780 1781/* 1782 * Handles Rx data that arrives in a state where the socket isn't accepting 1783 * new data. 1784 */ 1785static void 1786handle_excess_rx(struct toepcb *toep, struct mbuf *m) 1787{ 1788 1789 if (need_rst_on_excess_rx(toep) && 1790 !(toep->tp_flags & TP_ABORT_SHUTDOWN)) 1791 t3_send_reset(toep); 1792 m_freem(m); 1793} 1794 1795/* 1796 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE) 1797 * by getting the DDP offset from the TCB. 1798 */ 1799static void 1800tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) 1801{ 1802 struct ddp_state *q = &toep->tp_ddp_state; 1803 struct ddp_buf_state *bsp; 1804 struct cpl_get_tcb_rpl *hdr; 1805 unsigned int ddp_offset; 1806 struct socket *so; 1807 struct tcpcb *tp; 1808 struct sockbuf *rcv; 1809 int state; 1810 1811 uint64_t t; 1812 __be64 *tcb; 1813 1814 tp = toep->tp_tp; 1815 so = inp_inpcbtosocket(tp->t_inpcb); 1816 1817 inp_lock_assert(tp->t_inpcb); 1818 rcv = so_sockbuf_rcv(so); 1819 sockbuf_lock(rcv); 1820 1821 /* Note that we only accout for CPL_GET_TCB issued by the DDP code. 1822 * We really need a cookie in order to dispatch the RPLs. 1823 */ 1824 q->get_tcb_count--; 1825 1826 /* It is a possible that a previous CPL already invalidated UBUF DDP 1827 * and moved the cur_buf idx and hence no further processing of this 1828 * skb is required. However, the app might be sleeping on 1829 * !q->get_tcb_count and we need to wake it up. 1830 */ 1831 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) { 1832 int state = so_state_get(so); 1833 1834 m_freem(m); 1835 if (__predict_true((state & SS_NOFDREF) == 0)) 1836 so_sorwakeup_locked(so); 1837 else 1838 sockbuf_unlock(rcv); 1839 1840 return; 1841 } 1842 1843 bsp = &q->buf_state[q->cur_buf]; 1844 hdr = cplhdr(m); 1845 tcb = (__be64 *)(hdr + 1); 1846 if (q->cur_buf == 0) { 1847 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]); 1848 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET); 1849 } else { 1850 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]); 1851 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET; 1852 } 1853 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET; 1854 m->m_cur_offset = bsp->cur_offset; 1855 bsp->cur_offset = ddp_offset; 1856 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset; 1857 1858 CTR5(KTR_TOM, 1859 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u", 1860 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset); 1861 KASSERT(ddp_offset >= m->m_cur_offset, 1862 ("ddp_offset=%u less than cur_offset=%u", 1863 ddp_offset, m->m_cur_offset)); 1864 1865#if 0 1866{ 1867 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx; 1868 1869 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]); 1870 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS; 1871 1872 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]); 1873 rcv_nxt = t >> S_TCB_RCV_NXT; 1874 rcv_nxt &= M_TCB_RCV_NXT; 1875 1876 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]); 1877 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET); 1878 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET; 1879 1880 T3_TRACE2(TIDTB(sk), 1881 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x", 1882 ddp_flags, rcv_nxt - rx_hdr_offset); 1883 T3_TRACE4(TB(q), 1884 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u", 1885 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf); 1886 T3_TRACE3(TB(q), 1887 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u", 1888 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset); 1889 T3_TRACE2(TB(q), 1890 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x", 1891 q->buf_state[0].flags, q->buf_state[1].flags); 1892 1893} 1894#endif 1895 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) { 1896 handle_excess_rx(toep, m); 1897 return; 1898 } 1899 1900#ifdef T3_TRACE 1901 if ((int)m->m_pkthdr.len < 0) { 1902 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len"); 1903 } 1904#endif 1905 if (bsp->flags & DDP_BF_NOCOPY) { 1906#ifdef T3_TRACE 1907 T3_TRACE0(TB(q), 1908 "tcb_rpl_as_ddp_complete: CANCEL UBUF"); 1909 1910 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) { 1911 printk("!cancel_ubuf"); 1912 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf"); 1913 } 1914#endif 1915 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1; 1916 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA); 1917 q->cur_buf ^= 1; 1918 } else if (bsp->flags & DDP_BF_NOFLIP) { 1919 1920 m->m_ddp_flags = 1; /* always a kernel buffer */ 1921 1922 /* now HW buffer carries a user buffer */ 1923 bsp->flags &= ~DDP_BF_NOFLIP; 1924 bsp->flags |= DDP_BF_NOCOPY; 1925 1926 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate 1927 * any new data in which case we're done. If in addition the 1928 * offset is 0, then there wasn't a completion for the kbuf 1929 * and we need to decrement the posted count. 1930 */ 1931 if (m->m_pkthdr.len == 0) { 1932 if (ddp_offset == 0) { 1933 q->kbuf_posted--; 1934 bsp->flags |= DDP_BF_NODATA; 1935 } 1936 sockbuf_unlock(rcv); 1937 m_free(m); 1938 return; 1939 } 1940 } else { 1941 sockbuf_unlock(rcv); 1942 1943 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP, 1944 * but it got here way late and nobody cares anymore. 1945 */ 1946 m_free(m); 1947 return; 1948 } 1949 1950 m->m_ddp_gl = (unsigned char *)bsp->gl; 1951 m->m_flags |= M_DDP; 1952 m->m_seq = tp->rcv_nxt; 1953 tp->rcv_nxt += m->m_pkthdr.len; 1954 tp->t_rcvtime = ticks; 1955 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u", 1956 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1957 if (m->m_pkthdr.len == 0) { 1958 q->user_ddp_pending = 0; 1959 m_free(m); 1960 } else 1961 SBAPPEND(rcv, m); 1962 1963 state = so_state_get(so); 1964 if (__predict_true((state & SS_NOFDREF) == 0)) 1965 so_sorwakeup_locked(so); 1966 else 1967 sockbuf_unlock(rcv); 1968} 1969 1970/* 1971 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code, 1972 * in that case they are similar to DDP completions. 1973 */ 1974static int 1975do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1976{ 1977 struct toepcb *toep = (struct toepcb *)ctx; 1978 1979 /* OK if socket doesn't exist */ 1980 if (toep == NULL) { 1981 printf("null toep in do_get_tcb_rpl\n"); 1982 return (CPL_RET_BUF_DONE); 1983 } 1984 1985 inp_wlock(toep->tp_tp->t_inpcb); 1986 tcb_rpl_as_ddp_complete(toep, m); 1987 inp_wunlock(toep->tp_tp->t_inpcb); 1988 1989 return (0); 1990} 1991 1992static void 1993handle_ddp_data(struct toepcb *toep, struct mbuf *m) 1994{ 1995 struct tcpcb *tp = toep->tp_tp; 1996 struct socket *so; 1997 struct ddp_state *q; 1998 struct ddp_buf_state *bsp; 1999 struct cpl_rx_data *hdr = cplhdr(m); 2000 unsigned int rcv_nxt = ntohl(hdr->seq); 2001 struct sockbuf *rcv; 2002 2003 if (tp->rcv_nxt == rcv_nxt) 2004 return; 2005 2006 inp_lock_assert(tp->t_inpcb); 2007 so = inp_inpcbtosocket(tp->t_inpcb); 2008 rcv = so_sockbuf_rcv(so); 2009 sockbuf_lock(rcv); 2010 2011 q = &toep->tp_ddp_state; 2012 bsp = &q->buf_state[q->cur_buf]; 2013 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x", 2014 rcv_nxt, tp->rcv_nxt)); 2015 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2016 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2017 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d", 2018 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len); 2019 2020#ifdef T3_TRACE 2021 if ((int)m->m_pkthdr.len < 0) { 2022 t3_ddp_error(so, "handle_ddp_data: neg len"); 2023 } 2024#endif 2025 m->m_ddp_gl = (unsigned char *)bsp->gl; 2026 m->m_flags |= M_DDP; 2027 m->m_cur_offset = bsp->cur_offset; 2028 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2029 if (bsp->flags & DDP_BF_NOCOPY) 2030 bsp->flags &= ~DDP_BF_NOCOPY; 2031 2032 m->m_seq = tp->rcv_nxt; 2033 tp->rcv_nxt = rcv_nxt; 2034 bsp->cur_offset += m->m_pkthdr.len; 2035 if (!(bsp->flags & DDP_BF_NOFLIP)) 2036 q->cur_buf ^= 1; 2037 /* 2038 * For now, don't re-enable DDP after a connection fell out of DDP 2039 * mode. 2040 */ 2041 q->ubuf_ddp_ready = 0; 2042 sockbuf_unlock(rcv); 2043} 2044 2045/* 2046 * Process new data received for a connection. 2047 */ 2048static void 2049new_rx_data(struct toepcb *toep, struct mbuf *m) 2050{ 2051 struct cpl_rx_data *hdr = cplhdr(m); 2052 struct tcpcb *tp = toep->tp_tp; 2053 struct socket *so; 2054 struct sockbuf *rcv; 2055 int state; 2056 int len = be16toh(hdr->len); 2057 2058 inp_wlock(tp->t_inpcb); 2059 2060 so = inp_inpcbtosocket(tp->t_inpcb); 2061 2062 if (__predict_false(so_no_receive(so))) { 2063 handle_excess_rx(toep, m); 2064 inp_wunlock(tp->t_inpcb); 2065 TRACE_EXIT; 2066 return; 2067 } 2068 2069 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) 2070 handle_ddp_data(toep, m); 2071 2072 m->m_seq = ntohl(hdr->seq); 2073 m->m_ulp_mode = 0; /* for iSCSI */ 2074 2075#if VALIDATE_SEQ 2076 if (__predict_false(m->m_seq != tp->rcv_nxt)) { 2077 log(LOG_ERR, 2078 "%s: TID %u: Bad sequence number %u, expected %u\n", 2079 toep->tp_toedev->name, toep->tp_tid, m->m_seq, 2080 tp->rcv_nxt); 2081 m_freem(m); 2082 inp_wunlock(tp->t_inpcb); 2083 return; 2084 } 2085#endif 2086 m_adj(m, sizeof(*hdr)); 2087 2088#ifdef URGENT_DATA_SUPPORTED 2089 /* 2090 * We don't handle urgent data yet 2091 */ 2092 if (__predict_false(hdr->urg)) 2093 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg)); 2094 if (__predict_false(tp->urg_data == TCP_URG_NOTYET && 2095 tp->urg_seq - tp->rcv_nxt < skb->len)) 2096 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq - 2097 tp->rcv_nxt]; 2098#endif 2099 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) { 2100 toep->tp_delack_mode = hdr->dack_mode; 2101 toep->tp_delack_seq = tp->rcv_nxt; 2102 } 2103 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d", 2104 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes); 2105 2106 if (len < m->m_pkthdr.len) 2107 m->m_pkthdr.len = m->m_len = len; 2108 2109 tp->rcv_nxt += m->m_pkthdr.len; 2110 tp->t_rcvtime = ticks; 2111 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2112 CTR2(KTR_TOM, 2113 "new_rx_data: seq 0x%x len %u", 2114 m->m_seq, m->m_pkthdr.len); 2115 inp_wunlock(tp->t_inpcb); 2116 rcv = so_sockbuf_rcv(so); 2117 sockbuf_lock(rcv); 2118#if 0 2119 if (sb_notify(rcv)) 2120 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len); 2121#endif 2122 SBAPPEND(rcv, m); 2123 2124#ifdef notyet 2125 /* 2126 * We're giving too many credits to the card - but disable this check so we can keep on moving :-| 2127 * 2128 */ 2129 KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1), 2130 2131 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d", 2132 so, rcv->sb_cc, rcv->sb_mbmax)); 2133#endif 2134 2135 2136 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d", 2137 rcv->sb_cc, rcv->sb_mbcnt); 2138 2139 state = so_state_get(so); 2140 if (__predict_true((state & SS_NOFDREF) == 0)) 2141 so_sorwakeup_locked(so); 2142 else 2143 sockbuf_unlock(rcv); 2144} 2145 2146/* 2147 * Handler for RX_DATA CPL messages. 2148 */ 2149static int 2150do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2151{ 2152 struct toepcb *toep = (struct toepcb *)ctx; 2153 2154 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len); 2155 2156 new_rx_data(toep, m); 2157 2158 return (0); 2159} 2160 2161static void 2162new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) 2163{ 2164 struct tcpcb *tp; 2165 struct ddp_state *q; 2166 struct ddp_buf_state *bsp; 2167 struct cpl_rx_data_ddp *hdr; 2168 struct socket *so; 2169 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx; 2170 int nomoredata = 0; 2171 unsigned int delack_mode; 2172 struct sockbuf *rcv; 2173 2174 tp = toep->tp_tp; 2175 inp_wlock(tp->t_inpcb); 2176 so = inp_inpcbtosocket(tp->t_inpcb); 2177 2178 if (__predict_false(so_no_receive(so))) { 2179 2180 handle_excess_rx(toep, m); 2181 inp_wunlock(tp->t_inpcb); 2182 return; 2183 } 2184 2185 q = &toep->tp_ddp_state; 2186 hdr = cplhdr(m); 2187 ddp_report = ntohl(hdr->u.ddp_report); 2188 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2189 bsp = &q->buf_state[buf_idx]; 2190 2191 CTR4(KTR_TOM, 2192 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2193 "hdr seq 0x%x len %u", 2194 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2195 ntohs(hdr->len)); 2196 CTR3(KTR_TOM, 2197 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d", 2198 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx); 2199 2200 ddp_len = ntohs(hdr->len); 2201 rcv_nxt = ntohl(hdr->seq) + ddp_len; 2202 2203 delack_mode = G_DDP_DACK_MODE(ddp_report); 2204 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2205 toep->tp_delack_mode = delack_mode; 2206 toep->tp_delack_seq = tp->rcv_nxt; 2207 } 2208 2209 m->m_seq = tp->rcv_nxt; 2210 tp->rcv_nxt = rcv_nxt; 2211 2212 tp->t_rcvtime = ticks; 2213 /* 2214 * Store the length in m->m_len. We are changing the meaning of 2215 * m->m_len here, we need to be very careful that nothing from now on 2216 * interprets ->len of this packet the usual way. 2217 */ 2218 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq; 2219 inp_wunlock(tp->t_inpcb); 2220 CTR3(KTR_TOM, 2221 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ", 2222 m->m_len, rcv_nxt, m->m_seq); 2223 /* 2224 * Figure out where the new data was placed in the buffer and store it 2225 * in when. Assumes the buffer offset starts at 0, consumer needs to 2226 * account for page pod's pg_offset. 2227 */ 2228 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; 2229 m->m_cur_offset = end_offset - m->m_pkthdr.len; 2230 2231 rcv = so_sockbuf_rcv(so); 2232 sockbuf_lock(rcv); 2233 2234 m->m_ddp_gl = (unsigned char *)bsp->gl; 2235 m->m_flags |= M_DDP; 2236 bsp->cur_offset = end_offset; 2237 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2238 2239 /* 2240 * Length is only meaningful for kbuf 2241 */ 2242 if (!(bsp->flags & DDP_BF_NOCOPY)) 2243 KASSERT(m->m_len <= bsp->gl->dgl_length, 2244 ("length received exceeds ddp pages: len=%d dgl_length=%d", 2245 m->m_len, bsp->gl->dgl_length)); 2246 2247 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2248 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next)); 2249 /* 2250 * Bit 0 of flags stores whether the DDP buffer is completed. 2251 * Note that other parts of the code depend on this being in bit 0. 2252 */ 2253 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) { 2254 panic("spurious ddp completion"); 2255 } else { 2256 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); 2257 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) 2258 q->cur_buf ^= 1; /* flip buffers */ 2259 } 2260 2261 if (bsp->flags & DDP_BF_NOCOPY) { 2262 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY); 2263 bsp->flags &= ~DDP_BF_NOCOPY; 2264 } 2265 2266 if (ddp_report & F_DDP_PSH) 2267 m->m_ddp_flags |= DDP_BF_PSH; 2268 if (nomoredata) 2269 m->m_ddp_flags |= DDP_BF_NODATA; 2270 2271#ifdef notyet 2272 skb_reset_transport_header(skb); 2273 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */ 2274#endif 2275 SBAPPEND(rcv, m); 2276 2277 if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) || 2278 (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1)) 2279 || !(m->m_ddp_flags & DDP_BF_NOCOPY)))) 2280 so_sorwakeup_locked(so); 2281 else 2282 sockbuf_unlock(rcv); 2283} 2284 2285#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ 2286 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ 2287 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ 2288 F_DDP_INVALID_PPOD) 2289 2290/* 2291 * Handler for RX_DATA_DDP CPL messages. 2292 */ 2293static int 2294do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2295{ 2296 struct toepcb *toep = ctx; 2297 const struct cpl_rx_data_ddp *hdr = cplhdr(m); 2298 2299 VALIDATE_SOCK(so); 2300 2301 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) { 2302 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n", 2303 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status))); 2304 return (CPL_RET_BUF_DONE); 2305 } 2306#if 0 2307 skb->h.th = tcphdr_skb->h.th; 2308#endif 2309 new_rx_data_ddp(toep, m); 2310 return (0); 2311} 2312 2313static void 2314process_ddp_complete(struct toepcb *toep, struct mbuf *m) 2315{ 2316 struct tcpcb *tp = toep->tp_tp; 2317 struct socket *so; 2318 struct ddp_state *q; 2319 struct ddp_buf_state *bsp; 2320 struct cpl_rx_ddp_complete *hdr; 2321 unsigned int ddp_report, buf_idx, when, delack_mode; 2322 int nomoredata = 0; 2323 struct sockbuf *rcv; 2324 2325 inp_wlock(tp->t_inpcb); 2326 so = inp_inpcbtosocket(tp->t_inpcb); 2327 2328 if (__predict_false(so_no_receive(so))) { 2329 struct inpcb *inp = so_sotoinpcb(so); 2330 2331 handle_excess_rx(toep, m); 2332 inp_wunlock(inp); 2333 return; 2334 } 2335 q = &toep->tp_ddp_state; 2336 hdr = cplhdr(m); 2337 ddp_report = ntohl(hdr->ddp_report); 2338 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2339 m->m_pkthdr.csum_data = tp->rcv_nxt; 2340 2341 rcv = so_sockbuf_rcv(so); 2342 sockbuf_lock(rcv); 2343 2344 bsp = &q->buf_state[buf_idx]; 2345 when = bsp->cur_offset; 2346 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; 2347 tp->rcv_nxt += m->m_len; 2348 tp->t_rcvtime = ticks; 2349 2350 delack_mode = G_DDP_DACK_MODE(ddp_report); 2351 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2352 toep->tp_delack_mode = delack_mode; 2353 toep->tp_delack_seq = tp->rcv_nxt; 2354 } 2355#ifdef notyet 2356 skb_reset_transport_header(skb); 2357 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2358#endif 2359 inp_wunlock(tp->t_inpcb); 2360 2361 KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2362 CTR5(KTR_TOM, 2363 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2364 "ddp_report 0x%x offset %u, len %u", 2365 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2366 G_DDP_OFFSET(ddp_report), m->m_len); 2367 2368 m->m_cur_offset = bsp->cur_offset; 2369 bsp->cur_offset += m->m_len; 2370 2371 if (!(bsp->flags & DDP_BF_NOFLIP)) { 2372 q->cur_buf ^= 1; /* flip buffers */ 2373 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length) 2374 nomoredata=1; 2375 } 2376 2377 CTR4(KTR_TOM, 2378 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2379 "ddp_report %u offset %u", 2380 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2381 G_DDP_OFFSET(ddp_report)); 2382 2383 m->m_ddp_gl = (unsigned char *)bsp->gl; 2384 m->m_flags |= M_DDP; 2385 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; 2386 if (bsp->flags & DDP_BF_NOCOPY) 2387 bsp->flags &= ~DDP_BF_NOCOPY; 2388 if (nomoredata) 2389 m->m_ddp_flags |= DDP_BF_NODATA; 2390 2391 SBAPPEND(rcv, m); 2392 if ((so_state_get(so) & SS_NOFDREF) == 0) 2393 so_sorwakeup_locked(so); 2394 else 2395 sockbuf_unlock(rcv); 2396} 2397 2398/* 2399 * Handler for RX_DDP_COMPLETE CPL messages. 2400 */ 2401static int 2402do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2403{ 2404 struct toepcb *toep = ctx; 2405 2406 VALIDATE_SOCK(so); 2407#if 0 2408 skb->h.th = tcphdr_skb->h.th; 2409#endif 2410 process_ddp_complete(toep, m); 2411 return (0); 2412} 2413 2414/* 2415 * Move a socket to TIME_WAIT state. We need to make some adjustments to the 2416 * socket state before calling tcp_time_wait to comply with its expectations. 2417 */ 2418static void 2419enter_timewait(struct tcpcb *tp) 2420{ 2421 /* 2422 * Bump rcv_nxt for the peer FIN. We don't do this at the time we 2423 * process peer_close because we don't want to carry the peer FIN in 2424 * the socket's receive queue and if we increment rcv_nxt without 2425 * having the FIN in the receive queue we'll confuse facilities such 2426 * as SIOCINQ. 2427 */ 2428 inp_wlock(tp->t_inpcb); 2429 tp->rcv_nxt++; 2430 2431 tp->ts_recent_age = 0; /* defeat recycling */ 2432 tp->t_srtt = 0; /* defeat tcp_update_metrics */ 2433 inp_wunlock(tp->t_inpcb); 2434 tcp_offload_twstart(tp); 2435} 2436 2437/* 2438 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This 2439 * function deals with the data that may be reported along with the FIN. 2440 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to 2441 * perform normal FIN-related processing. In the latter case 1 indicates that 2442 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the 2443 * skb can be freed. 2444 */ 2445static int 2446handle_peer_close_data(struct socket *so, struct mbuf *m) 2447{ 2448 struct tcpcb *tp = so_sototcpcb(so); 2449 struct toepcb *toep = tp->t_toe; 2450 struct ddp_state *q; 2451 struct ddp_buf_state *bsp; 2452 struct cpl_peer_close *req = cplhdr(m); 2453 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */ 2454 struct sockbuf *rcv; 2455 2456 if (tp->rcv_nxt == rcv_nxt) /* no data */ 2457 return (0); 2458 2459 CTR0(KTR_TOM, "handle_peer_close_data"); 2460 if (__predict_false(so_no_receive(so))) { 2461 handle_excess_rx(toep, m); 2462 2463 /* 2464 * Although we discard the data we want to process the FIN so 2465 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP + 2466 * PEER_CLOSE without data. In particular this PEER_CLOSE 2467 * may be what will close the connection. We return 1 because 2468 * handle_excess_rx() already freed the packet. 2469 */ 2470 return (1); 2471 } 2472 2473 inp_lock_assert(tp->t_inpcb); 2474 q = &toep->tp_ddp_state; 2475 rcv = so_sockbuf_rcv(so); 2476 sockbuf_lock(rcv); 2477 2478 bsp = &q->buf_state[q->cur_buf]; 2479 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2480 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2481 m->m_ddp_gl = (unsigned char *)bsp->gl; 2482 m->m_flags |= M_DDP; 2483 m->m_cur_offset = bsp->cur_offset; 2484 m->m_ddp_flags = 2485 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2486 m->m_seq = tp->rcv_nxt; 2487 tp->rcv_nxt = rcv_nxt; 2488 bsp->cur_offset += m->m_pkthdr.len; 2489 if (!(bsp->flags & DDP_BF_NOFLIP)) 2490 q->cur_buf ^= 1; 2491#ifdef notyet 2492 skb_reset_transport_header(skb); 2493 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2494#endif 2495 tp->t_rcvtime = ticks; 2496 SBAPPEND(rcv, m); 2497 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) 2498 so_sorwakeup_locked(so); 2499 else 2500 sockbuf_unlock(rcv); 2501 2502 return (1); 2503} 2504 2505/* 2506 * Handle a peer FIN. 2507 */ 2508static void 2509do_peer_fin(struct toepcb *toep, struct mbuf *m) 2510{ 2511 struct socket *so; 2512 struct tcpcb *tp = toep->tp_tp; 2513 int keep, action; 2514 2515 action = keep = 0; 2516 CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state); 2517 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2518 printf("abort_pending set\n"); 2519 2520 goto out; 2521 } 2522 inp_wlock(tp->t_inpcb); 2523 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 2524 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) { 2525 keep = handle_peer_close_data(so, m); 2526 if (keep < 0) { 2527 inp_wunlock(tp->t_inpcb); 2528 return; 2529 } 2530 } 2531 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 2532 CTR1(KTR_TOM, 2533 "waking up waiters for cantrcvmore on %p ", so); 2534 socantrcvmore(so); 2535 2536 /* 2537 * If connection is half-synchronized 2538 * (ie NEEDSYN flag on) then delay ACK, 2539 * so it may be piggybacked when SYN is sent. 2540 * Otherwise, since we received a FIN then no 2541 * more input can be expected, send ACK now. 2542 */ 2543 if (tp->t_flags & TF_NEEDSYN) 2544 tp->t_flags |= TF_DELACK; 2545 else 2546 tp->t_flags |= TF_ACKNOW; 2547 tp->rcv_nxt++; 2548 } 2549 2550 switch (tp->t_state) { 2551 case TCPS_SYN_RECEIVED: 2552 tp->t_starttime = ticks; 2553 /* FALLTHROUGH */ 2554 case TCPS_ESTABLISHED: 2555 tp->t_state = TCPS_CLOSE_WAIT; 2556 break; 2557 case TCPS_FIN_WAIT_1: 2558 tp->t_state = TCPS_CLOSING; 2559 break; 2560 case TCPS_FIN_WAIT_2: 2561 /* 2562 * If we've sent an abort_req we must have sent it too late, 2563 * HW will send us a reply telling us so, and this peer_close 2564 * is really the last message for this connection and needs to 2565 * be treated as an abort_rpl, i.e., transition the connection 2566 * to TCP_CLOSE (note that the host stack does this at the 2567 * time of generating the RST but we must wait for HW). 2568 * Otherwise we enter TIME_WAIT. 2569 */ 2570 t3_release_offload_resources(toep); 2571 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2572 action = TCP_CLOSE; 2573 } else { 2574 action = TCP_TIMEWAIT; 2575 } 2576 break; 2577 default: 2578 log(LOG_ERR, 2579 "%s: TID %u received PEER_CLOSE in bad state %d\n", 2580 toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state); 2581 } 2582 inp_wunlock(tp->t_inpcb); 2583 2584 if (action == TCP_TIMEWAIT) { 2585 enter_timewait(tp); 2586 } else if (action == TCP_DROP) { 2587 tcp_offload_drop(tp, 0); 2588 } else if (action == TCP_CLOSE) { 2589 tcp_offload_close(tp); 2590 } 2591 2592#ifdef notyet 2593 /* Do not send POLL_HUP for half duplex close. */ 2594 if ((sk->sk_shutdown & SEND_SHUTDOWN) || 2595 sk->sk_state == TCP_CLOSE) 2596 sk_wake_async(so, 1, POLL_HUP); 2597 else 2598 sk_wake_async(so, 1, POLL_IN); 2599#endif 2600 2601out: 2602 if (!keep) 2603 m_free(m); 2604} 2605 2606/* 2607 * Handler for PEER_CLOSE CPL messages. 2608 */ 2609static int 2610do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2611{ 2612 struct toepcb *toep = (struct toepcb *)ctx; 2613 2614 VALIDATE_SOCK(so); 2615 2616 do_peer_fin(toep, m); 2617 return (0); 2618} 2619 2620static void 2621process_close_con_rpl(struct toepcb *toep, struct mbuf *m) 2622{ 2623 struct cpl_close_con_rpl *rpl = cplhdr(m); 2624 struct tcpcb *tp = toep->tp_tp; 2625 struct socket *so; 2626 int action = 0; 2627 struct sockbuf *rcv; 2628 2629 inp_wlock(tp->t_inpcb); 2630 so = inp_inpcbtosocket(tp->t_inpcb); 2631 2632 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ 2633 2634 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2635 inp_wunlock(tp->t_inpcb); 2636 goto out; 2637 } 2638 2639 CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep, 2640 tp->t_state, !!(so_state_get(so) & SS_NOFDREF)); 2641 2642 switch (tp->t_state) { 2643 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */ 2644 t3_release_offload_resources(toep); 2645 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2646 action = TCP_CLOSE; 2647 2648 } else { 2649 action = TCP_TIMEWAIT; 2650 } 2651 break; 2652 case TCPS_LAST_ACK: 2653 /* 2654 * In this state we don't care about pending abort_rpl. 2655 * If we've sent abort_req it was post-close and was sent too 2656 * late, this close_con_rpl is the actual last message. 2657 */ 2658 t3_release_offload_resources(toep); 2659 action = TCP_CLOSE; 2660 break; 2661 case TCPS_FIN_WAIT_1: 2662 /* 2663 * If we can't receive any more 2664 * data, then closing user can proceed. 2665 * Starting the timer is contrary to the 2666 * specification, but if we don't get a FIN 2667 * we'll hang forever. 2668 * 2669 * XXXjl: 2670 * we should release the tp also, and use a 2671 * compressed state. 2672 */ 2673 if (so) 2674 rcv = so_sockbuf_rcv(so); 2675 else 2676 break; 2677 2678 if (rcv->sb_state & SBS_CANTRCVMORE) { 2679 int timeout; 2680 2681 if (so) 2682 soisdisconnected(so); 2683 timeout = (tcp_fast_finwait2_recycle) ? 2684 tcp_finwait2_timeout : tcp_maxidle; 2685 tcp_timer_activate(tp, TT_2MSL, timeout); 2686 } 2687 tp->t_state = TCPS_FIN_WAIT_2; 2688 if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 && 2689 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) { 2690 action = TCP_DROP; 2691 } 2692 2693 break; 2694 default: 2695 log(LOG_ERR, 2696 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", 2697 toep->tp_toedev->tod_name, toep->tp_tid, 2698 tp->t_state); 2699 } 2700 inp_wunlock(tp->t_inpcb); 2701 2702 2703 if (action == TCP_TIMEWAIT) { 2704 enter_timewait(tp); 2705 } else if (action == TCP_DROP) { 2706 tcp_offload_drop(tp, 0); 2707 } else if (action == TCP_CLOSE) { 2708 tcp_offload_close(tp); 2709 } 2710out: 2711 m_freem(m); 2712} 2713 2714/* 2715 * Handler for CLOSE_CON_RPL CPL messages. 2716 */ 2717static int 2718do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, 2719 void *ctx) 2720{ 2721 struct toepcb *toep = (struct toepcb *)ctx; 2722 2723 process_close_con_rpl(toep, m); 2724 return (0); 2725} 2726 2727/* 2728 * Process abort replies. We only process these messages if we anticipate 2729 * them as the coordination between SW and HW in this area is somewhat lacking 2730 * and sometimes we get ABORT_RPLs after we are done with the connection that 2731 * originated the ABORT_REQ. 2732 */ 2733static void 2734process_abort_rpl(struct toepcb *toep, struct mbuf *m) 2735{ 2736 struct tcpcb *tp = toep->tp_tp; 2737 struct socket *so; 2738 int needclose = 0; 2739 2740#ifdef T3_TRACE 2741 T3_TRACE1(TIDTB(sk), 2742 "process_abort_rpl: GTS rpl pending %d", 2743 sock_flag(sk, ABORT_RPL_PENDING)); 2744#endif 2745 2746 inp_wlock(tp->t_inpcb); 2747 so = inp_inpcbtosocket(tp->t_inpcb); 2748 2749 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2750 /* 2751 * XXX panic on tcpdrop 2752 */ 2753 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev)) 2754 toep->tp_flags |= TP_ABORT_RPL_RCVD; 2755 else { 2756 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING); 2757 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) || 2758 !is_t3a(toep->tp_toedev)) { 2759 if (toep->tp_flags & TP_ABORT_REQ_RCVD) 2760 panic("TP_ABORT_REQ_RCVD set"); 2761 t3_release_offload_resources(toep); 2762 needclose = 1; 2763 } 2764 } 2765 } 2766 inp_wunlock(tp->t_inpcb); 2767 2768 if (needclose) 2769 tcp_offload_close(tp); 2770 2771 m_free(m); 2772} 2773 2774/* 2775 * Handle an ABORT_RPL_RSS CPL message. 2776 */ 2777static int 2778do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2779{ 2780 struct cpl_abort_rpl_rss *rpl = cplhdr(m); 2781 struct toepcb *toep; 2782 2783 /* 2784 * Ignore replies to post-close aborts indicating that the abort was 2785 * requested too late. These connections are terminated when we get 2786 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss 2787 * arrives the TID is either no longer used or it has been recycled. 2788 */ 2789 if (rpl->status == CPL_ERR_ABORT_FAILED) { 2790discard: 2791 m_free(m); 2792 return (0); 2793 } 2794 2795 toep = (struct toepcb *)ctx; 2796 2797 /* 2798 * Sometimes we've already closed the socket, e.g., a post-close 2799 * abort races with ABORT_REQ_RSS, the latter frees the socket 2800 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, 2801 * but FW turns the ABORT_REQ into a regular one and so we get 2802 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A. 2803 */ 2804 if (!toep) 2805 goto discard; 2806 2807 if (toep->tp_tp == NULL) { 2808 log(LOG_NOTICE, "removing tid for abort\n"); 2809 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2810 if (toep->tp_l2t) 2811 l2t_release(L2DATA(cdev), toep->tp_l2t); 2812 2813 toepcb_release(toep); 2814 goto discard; 2815 } 2816 2817 log(LOG_NOTICE, "toep=%p\n", toep); 2818 log(LOG_NOTICE, "tp=%p\n", toep->tp_tp); 2819 2820 toepcb_hold(toep); 2821 process_abort_rpl(toep, m); 2822 toepcb_release(toep); 2823 return (0); 2824} 2825 2826/* 2827 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also 2828 * indicate whether RST should be sent in response. 2829 */ 2830static int 2831abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst) 2832{ 2833 struct tcpcb *tp = so_sototcpcb(so); 2834 2835 switch (abort_reason) { 2836 case CPL_ERR_BAD_SYN: 2837#if 0 2838 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through 2839#endif 2840 case CPL_ERR_CONN_RESET: 2841 // XXX need to handle SYN_RECV due to crossed SYNs 2842 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 2843 case CPL_ERR_XMIT_TIMEDOUT: 2844 case CPL_ERR_PERSIST_TIMEDOUT: 2845 case CPL_ERR_FINWAIT2_TIMEDOUT: 2846 case CPL_ERR_KEEPALIVE_TIMEDOUT: 2847#if 0 2848 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); 2849#endif 2850 return (ETIMEDOUT); 2851 default: 2852 return (EIO); 2853 } 2854} 2855 2856static inline void 2857set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd) 2858{ 2859 struct cpl_abort_rpl *rpl = cplhdr(m); 2860 2861 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); 2862 rpl->wr.wr_lo = htonl(V_WR_TID(tid)); 2863 m->m_len = m->m_pkthdr.len = sizeof(*rpl); 2864 2865 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); 2866 rpl->cmd = cmd; 2867} 2868 2869static void 2870send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m) 2871{ 2872 struct mbuf *reply_mbuf; 2873 struct cpl_abort_req_rss *req = cplhdr(m); 2874 2875 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl)); 2876 m_set_priority(m, CPL_PRIORITY_DATA); 2877 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl); 2878 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status); 2879 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2880 m_free(m); 2881} 2882 2883/* 2884 * Returns whether an ABORT_REQ_RSS message is a negative advice. 2885 */ 2886static inline int 2887is_neg_adv_abort(unsigned int status) 2888{ 2889 return status == CPL_ERR_RTX_NEG_ADVICE || 2890 status == CPL_ERR_PERSIST_NEG_ADVICE; 2891} 2892 2893static void 2894send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status) 2895{ 2896 struct mbuf *reply_mbuf; 2897 struct cpl_abort_req_rss *req = cplhdr(m); 2898 2899 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 2900 2901 if (!reply_mbuf) { 2902 /* Defer the reply. Stick rst_status into req->cmd. */ 2903 req->status = rst_status; 2904 t3_defer_reply(m, tdev, send_deferred_abort_rpl); 2905 return; 2906 } 2907 2908 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA); 2909 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status); 2910 m_free(m); 2911 2912 /* 2913 * XXX need to sync with ARP as for SYN_RECV connections we can send 2914 * these messages while ARP is pending. For other connection states 2915 * it's not a problem. 2916 */ 2917 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2918} 2919 2920#ifdef notyet 2921static void 2922cleanup_syn_rcv_conn(struct socket *child, struct socket *parent) 2923{ 2924 CXGB_UNIMPLEMENTED(); 2925#ifdef notyet 2926 struct request_sock *req = child->sk_user_data; 2927 2928 inet_csk_reqsk_queue_removed(parent, req); 2929 synq_remove(tcp_sk(child)); 2930 __reqsk_free(req); 2931 child->sk_user_data = NULL; 2932#endif 2933} 2934 2935 2936/* 2937 * Performs the actual work to abort a SYN_RECV connection. 2938 */ 2939static void 2940do_abort_syn_rcv(struct socket *child, struct socket *parent) 2941{ 2942 struct tcpcb *parenttp = so_sototcpcb(parent); 2943 struct tcpcb *childtp = so_sototcpcb(child); 2944 2945 /* 2946 * If the server is still open we clean up the child connection, 2947 * otherwise the server already did the clean up as it was purging 2948 * its SYN queue and the skb was just sitting in its backlog. 2949 */ 2950 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { 2951 cleanup_syn_rcv_conn(child, parent); 2952 inp_wlock(childtp->t_inpcb); 2953 t3_release_offload_resources(childtp->t_toe); 2954 inp_wunlock(childtp->t_inpcb); 2955 tcp_offload_close(childtp); 2956 } 2957} 2958#endif 2959 2960/* 2961 * Handle abort requests for a SYN_RECV connection. These need extra work 2962 * because the socket is on its parent's SYN queue. 2963 */ 2964static int 2965abort_syn_rcv(struct socket *so, struct mbuf *m) 2966{ 2967 CXGB_UNIMPLEMENTED(); 2968#ifdef notyet 2969 struct socket *parent; 2970 struct toedev *tdev = toep->tp_toedev; 2971 struct t3cdev *cdev = TOM_DATA(tdev)->cdev; 2972 struct socket *oreq = so->so_incomp; 2973 struct t3c_tid_entry *t3c_stid; 2974 struct tid_info *t; 2975 2976 if (!oreq) 2977 return -1; /* somehow we are not on the SYN queue */ 2978 2979 t = &(T3C_DATA(cdev))->tid_maps; 2980 t3c_stid = lookup_stid(t, oreq->ts_recent); 2981 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 2982 2983 so_lock(parent); 2984 do_abort_syn_rcv(so, parent); 2985 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST); 2986 so_unlock(parent); 2987#endif 2988 return (0); 2989} 2990 2991/* 2992 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this 2993 * request except that we need to reply to it. 2994 */ 2995static void 2996process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev) 2997{ 2998 int rst_status = CPL_ABORT_NO_RST; 2999 const struct cpl_abort_req_rss *req = cplhdr(m); 3000 struct tcpcb *tp = toep->tp_tp; 3001 struct socket *so; 3002 int needclose = 0; 3003 3004 inp_wlock(tp->t_inpcb); 3005 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 3006 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) { 3007 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN); 3008 m_free(m); 3009 goto skip; 3010 } 3011 3012 toep->tp_flags &= ~TP_ABORT_REQ_RCVD; 3013 /* 3014 * Three cases to consider: 3015 * a) We haven't sent an abort_req; close the connection. 3016 * b) We have sent a post-close abort_req that will get to TP too late 3017 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will 3018 * be ignored and the connection should be closed now. 3019 * c) We have sent a regular abort_req that will get to TP too late. 3020 * That will generate an abort_rpl with status 0, wait for it. 3021 */ 3022 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) || 3023 (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) { 3024 int error; 3025 3026 error = abort_status_to_errno(so, req->status, 3027 &rst_status); 3028 so_error_set(so, error); 3029 3030 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) 3031 so_sorwakeup(so); 3032 /* 3033 * SYN_RECV needs special processing. If abort_syn_rcv() 3034 * returns 0 is has taken care of the abort. 3035 */ 3036 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m)) 3037 goto skip; 3038 3039 t3_release_offload_resources(toep); 3040 needclose = 1; 3041 } 3042 inp_wunlock(tp->t_inpcb); 3043 3044 if (needclose) 3045 tcp_offload_close(tp); 3046 3047 send_abort_rpl(m, tdev, rst_status); 3048 return; 3049skip: 3050 inp_wunlock(tp->t_inpcb); 3051} 3052 3053/* 3054 * Handle an ABORT_REQ_RSS CPL message. 3055 */ 3056static int 3057do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3058{ 3059 const struct cpl_abort_req_rss *req = cplhdr(m); 3060 struct toepcb *toep = (struct toepcb *)ctx; 3061 3062 if (is_neg_adv_abort(req->status)) { 3063 m_free(m); 3064 return (0); 3065 } 3066 3067 log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid); 3068 3069 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) { 3070 cxgb_remove_tid(cdev, toep, toep->tp_tid); 3071 toep->tp_flags |= TP_ABORT_REQ_RCVD; 3072 3073 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST); 3074 if (toep->tp_l2t) 3075 l2t_release(L2DATA(cdev), toep->tp_l2t); 3076 3077 /* 3078 * Unhook 3079 */ 3080 toep->tp_tp->t_toe = NULL; 3081 toep->tp_tp->t_flags &= ~TF_TOE; 3082 toep->tp_tp = NULL; 3083 /* 3084 * XXX need to call syncache_chkrst - but we don't 3085 * have a way of doing that yet 3086 */ 3087 toepcb_release(toep); 3088 log(LOG_ERR, "abort for unestablished connection :-(\n"); 3089 return (0); 3090 } 3091 if (toep->tp_tp == NULL) { 3092 log(LOG_NOTICE, "disconnected toepcb\n"); 3093 /* should be freed momentarily */ 3094 return (0); 3095 } 3096 3097 3098 toepcb_hold(toep); 3099 process_abort_req(toep, m, toep->tp_toedev); 3100 toepcb_release(toep); 3101 return (0); 3102} 3103#ifdef notyet 3104static void 3105pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m) 3106{ 3107 struct toedev *tdev = TOE_DEV(parent); 3108 3109 do_abort_syn_rcv(child, parent); 3110 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) { 3111 struct cpl_pass_accept_rpl *rpl = cplhdr(m); 3112 3113 rpl->opt0h = htonl(F_TCAM_BYPASS); 3114 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3115 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3116 } else 3117 m_free(m); 3118} 3119#endif 3120static void 3121handle_pass_open_arp_failure(struct socket *so, struct mbuf *m) 3122{ 3123 CXGB_UNIMPLEMENTED(); 3124 3125#ifdef notyet 3126 struct t3cdev *cdev; 3127 struct socket *parent; 3128 struct socket *oreq; 3129 struct t3c_tid_entry *t3c_stid; 3130 struct tid_info *t; 3131 struct tcpcb *otp, *tp = so_sototcpcb(so); 3132 struct toepcb *toep = tp->t_toe; 3133 3134 /* 3135 * If the connection is being aborted due to the parent listening 3136 * socket going away there's nothing to do, the ABORT_REQ will close 3137 * the connection. 3138 */ 3139 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 3140 m_free(m); 3141 return; 3142 } 3143 3144 oreq = so->so_incomp; 3145 otp = so_sototcpcb(oreq); 3146 3147 cdev = T3C_DEV(so); 3148 t = &(T3C_DATA(cdev))->tid_maps; 3149 t3c_stid = lookup_stid(t, otp->ts_recent); 3150 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 3151 3152 so_lock(parent); 3153 pass_open_abort(so, parent, m); 3154 so_unlock(parent); 3155#endif 3156} 3157 3158/* 3159 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly 3160 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV 3161 * connection. 3162 */ 3163static void 3164pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m) 3165{ 3166 3167#ifdef notyet 3168 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3169 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk); 3170#endif 3171 handle_pass_open_arp_failure(m_get_socket(m), m); 3172} 3173 3174/* 3175 * Populate a reject CPL_PASS_ACCEPT_RPL WR. 3176 */ 3177static void 3178mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf) 3179{ 3180 struct cpl_pass_accept_req *req = cplhdr(req_mbuf); 3181 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf); 3182 unsigned int tid = GET_TID(req); 3183 3184 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP); 3185 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3186 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3187 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet 3188 rpl->opt0h = htonl(F_TCAM_BYPASS); 3189 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3190 rpl->opt2 = 0; 3191 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3192} 3193 3194/* 3195 * Send a deferred reject to an accept request. 3196 */ 3197static void 3198reject_pass_request(struct toedev *tdev, struct mbuf *m) 3199{ 3200 struct mbuf *reply_mbuf; 3201 3202 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl)); 3203 mk_pass_accept_rpl(reply_mbuf, m); 3204 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 3205 m_free(m); 3206} 3207 3208static void 3209handle_syncache_event(int event, void *arg) 3210{ 3211 struct toepcb *toep = arg; 3212 3213 switch (event) { 3214 case TOE_SC_ENTRY_PRESENT: 3215 /* 3216 * entry already exists - free toepcb 3217 * and l2t 3218 */ 3219 printf("syncache entry present\n"); 3220 toepcb_release(toep); 3221 break; 3222 case TOE_SC_DROP: 3223 /* 3224 * The syncache has given up on this entry 3225 * either it timed out, or it was evicted 3226 * we need to explicitly release the tid 3227 */ 3228 printf("syncache entry dropped\n"); 3229 toepcb_release(toep); 3230 break; 3231 default: 3232 log(LOG_ERR, "unknown syncache event %d\n", event); 3233 break; 3234 } 3235} 3236 3237static void 3238syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep) 3239{ 3240 struct in_conninfo inc; 3241 struct tcpopt to; 3242 struct tcphdr th; 3243 struct inpcb *inp; 3244 int mss, wsf, sack, ts; 3245 uint32_t rcv_isn = ntohl(req->rcv_isn); 3246 3247 bzero(&to, sizeof(struct tcpopt)); 3248 inp = so_sotoinpcb(lso); 3249 3250 /* 3251 * Fill out information for entering us into the syncache 3252 */ 3253 inc.inc_fport = th.th_sport = req->peer_port; 3254 inc.inc_lport = th.th_dport = req->local_port; 3255 th.th_seq = req->rcv_isn; 3256 th.th_flags = TH_SYN; 3257 3258 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1; 3259 3260 3261 inc.inc_isipv6 = 0; 3262 inc.inc_len = 0; 3263 inc.inc_faddr.s_addr = req->peer_ip; 3264 inc.inc_laddr.s_addr = req->local_ip; 3265 3266 DPRINTF("syncache add of %d:%d %d:%d\n", 3267 ntohl(req->local_ip), ntohs(req->local_port), 3268 ntohl(req->peer_ip), ntohs(req->peer_port)); 3269 3270 mss = req->tcp_options.mss; 3271 wsf = req->tcp_options.wsf; 3272 ts = req->tcp_options.tstamp; 3273 sack = req->tcp_options.sack; 3274 to.to_mss = mss; 3275 to.to_wscale = wsf; 3276 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3277 tcp_offload_syncache_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep); 3278} 3279 3280 3281/* 3282 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket 3283 * lock held. Note that the sock here is a listening socket that is not owned 3284 * by the TOE. 3285 */ 3286static void 3287process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev, 3288 struct listen_ctx *lctx) 3289{ 3290 int rt_flags; 3291 struct l2t_entry *e; 3292 struct iff_mac tim; 3293 struct mbuf *reply_mbuf, *ddp_mbuf = NULL; 3294 struct cpl_pass_accept_rpl *rpl; 3295 struct cpl_pass_accept_req *req = cplhdr(m); 3296 unsigned int tid = GET_TID(req); 3297 struct tom_data *d = TOM_DATA(tdev); 3298 struct t3cdev *cdev = d->cdev; 3299 struct tcpcb *tp = so_sototcpcb(so); 3300 struct toepcb *newtoep; 3301 struct rtentry *dst; 3302 struct sockaddr_in nam; 3303 struct t3c_data *td = T3C_DATA(cdev); 3304 3305 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3306 if (__predict_false(reply_mbuf == NULL)) { 3307 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3308 t3_defer_reply(m, tdev, reject_pass_request); 3309 else { 3310 cxgb_queue_tid_release(cdev, tid); 3311 m_free(m); 3312 } 3313 DPRINTF("failed to get reply_mbuf\n"); 3314 3315 goto out; 3316 } 3317 3318 if (tp->t_state != TCPS_LISTEN) { 3319 DPRINTF("socket not in listen state\n"); 3320 3321 goto reject; 3322 } 3323 3324 tim.mac_addr = req->dst_mac; 3325 tim.vlan_tag = ntohs(req->vlan_tag); 3326 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { 3327 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n"); 3328 goto reject; 3329 } 3330 3331#ifdef notyet 3332 /* 3333 * XXX do route lookup to confirm that we're still listening on this 3334 * address 3335 */ 3336 if (ip_route_input(skb, req->local_ip, req->peer_ip, 3337 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev)) 3338 goto reject; 3339 rt_flags = ((struct rtable *)skb->dst)->rt_flags & 3340 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); 3341 dst_release(skb->dst); // done with the input route, release it 3342 skb->dst = NULL; 3343 3344 if ((rt_flags & RTF_LOCAL) == 0) 3345 goto reject; 3346#endif 3347 /* 3348 * XXX 3349 */ 3350 rt_flags = RTF_LOCAL; 3351 if ((rt_flags & RTF_LOCAL) == 0) 3352 goto reject; 3353 3354 /* 3355 * Calculate values and add to syncache 3356 */ 3357 3358 newtoep = toepcb_alloc(); 3359 if (newtoep == NULL) 3360 goto reject; 3361 3362 bzero(&nam, sizeof(struct sockaddr_in)); 3363 3364 nam.sin_len = sizeof(struct sockaddr_in); 3365 nam.sin_family = AF_INET; 3366 nam.sin_addr.s_addr =req->peer_ip; 3367 dst = rtalloc2((struct sockaddr *)&nam, 1, 0); 3368 3369 if (dst == NULL) { 3370 printf("failed to find route\n"); 3371 goto reject; 3372 } 3373 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev, 3374 (struct sockaddr *)&nam); 3375 if (e == NULL) { 3376 DPRINTF("failed to get l2t\n"); 3377 } 3378 /* 3379 * Point to our listen socket until accept 3380 */ 3381 newtoep->tp_tp = tp; 3382 newtoep->tp_flags = TP_SYN_RCVD; 3383 newtoep->tp_tid = tid; 3384 newtoep->tp_toedev = tdev; 3385 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3386 3387 cxgb_insert_tid(cdev, d->client, newtoep, tid); 3388 so_lock(so); 3389 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); 3390 so_unlock(so); 3391 3392 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) && 3393 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 3394 3395 if (newtoep->tp_ulp_mode) { 3396 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3397 3398 if (ddp_mbuf == NULL) 3399 newtoep->tp_ulp_mode = 0; 3400 } 3401 3402 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d", 3403 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode); 3404 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); 3405 /* 3406 * XXX workaround for lack of syncache drop 3407 */ 3408 toepcb_hold(newtoep); 3409 syncache_add_accept_req(req, so, newtoep); 3410 3411 rpl = cplhdr(reply_mbuf); 3412 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl); 3413 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3414 rpl->wr.wr_lo = 0; 3415 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3416 rpl->opt2 = htonl(calc_opt2(so, tdev)); 3417 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3418 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten 3419 3420 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | 3421 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); 3422 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) | 3423 CPL_PASS_OPEN_ACCEPT); 3424 3425 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status); 3426 3427 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep)); 3428 3429 l2t_send(cdev, reply_mbuf, e); 3430 m_free(m); 3431 if (newtoep->tp_ulp_mode) { 3432 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS, 3433 V_TF_DDP_OFF(1) | 3434 TP_DDP_TIMER_WORKAROUND_MASK, 3435 V_TF_DDP_OFF(1) | 3436 TP_DDP_TIMER_WORKAROUND_VAL, 1); 3437 } else 3438 printf("not offloading\n"); 3439 3440 3441 3442 return; 3443reject: 3444 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3445 mk_pass_accept_rpl(reply_mbuf, m); 3446 else 3447 mk_tid_release(reply_mbuf, newtoep, tid); 3448 cxgb_ofld_send(cdev, reply_mbuf); 3449 m_free(m); 3450out: 3451#if 0 3452 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3453#else 3454 return; 3455#endif 3456} 3457 3458/* 3459 * Handle a CPL_PASS_ACCEPT_REQ message. 3460 */ 3461static int 3462do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3463{ 3464 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx; 3465 struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */ 3466 struct tom_data *d = listen_ctx->tom_data; 3467 3468#if VALIDATE_TID 3469 struct cpl_pass_accept_req *req = cplhdr(m); 3470 unsigned int tid = GET_TID(req); 3471 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps; 3472 3473 if (unlikely(!lsk)) { 3474 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n", 3475 cdev->name, 3476 (unsigned long)((union listen_entry *)ctx - 3477 t->stid_tab)); 3478 return CPL_RET_BUF_DONE; 3479 } 3480 if (unlikely(tid >= t->ntids)) { 3481 printk(KERN_ERR "%s: passive open TID %u too large\n", 3482 cdev->name, tid); 3483 return CPL_RET_BUF_DONE; 3484 } 3485 /* 3486 * For T3A the current user of the TID may have closed but its last 3487 * message(s) may have been backlogged so the TID appears to be still 3488 * in use. Just take the TID away, the connection can close at its 3489 * own leisure. For T3B this situation is a bug. 3490 */ 3491 if (!valid_new_tid(t, tid) && 3492 cdev->type != T3A) { 3493 printk(KERN_ERR "%s: passive open uses existing TID %u\n", 3494 cdev->name, tid); 3495 return CPL_RET_BUF_DONE; 3496 } 3497#endif 3498 3499 process_pass_accept_req(lso, m, &d->tdev, listen_ctx); 3500 return (0); 3501} 3502 3503/* 3504 * Called when a connection is established to translate the TCP options 3505 * reported by HW to FreeBSD's native format. 3506 */ 3507static void 3508assign_rxopt(struct socket *so, unsigned int opt) 3509{ 3510 struct tcpcb *tp = so_sototcpcb(so); 3511 struct toepcb *toep = tp->t_toe; 3512 const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep)); 3513 3514 inp_lock_assert(tp->t_inpcb); 3515 3516 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3517 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0; 3518 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0; 3519 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0; 3520 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 3521 (TF_RCVD_SCALE|TF_REQ_SCALE)) 3522 tp->rcv_scale = tp->request_r_scale; 3523} 3524 3525/* 3526 * Completes some final bits of initialization for just established connections 3527 * and changes their state to TCP_ESTABLISHED. 3528 * 3529 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1. 3530 */ 3531static void 3532make_established(struct socket *so, u32 snd_isn, unsigned int opt) 3533{ 3534 struct tcpcb *tp = so_sototcpcb(so); 3535 struct toepcb *toep = tp->t_toe; 3536 3537 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn; 3538 assign_rxopt(so, opt); 3539 3540 /* 3541 *XXXXXXXXXXX 3542 * 3543 */ 3544#ifdef notyet 3545 so->so_proto->pr_ctloutput = t3_ctloutput; 3546#endif 3547 3548#if 0 3549 inet_sk(sk)->id = tp->write_seq ^ jiffies; 3550#endif 3551 /* 3552 * XXX not clear what rcv_wup maps to 3553 */ 3554 /* 3555 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't 3556 * pass through opt0. 3557 */ 3558 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10)) 3559 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10); 3560 3561 dump_toepcb(toep); 3562 3563#ifdef notyet 3564/* 3565 * no clean interface for marking ARP up to date 3566 */ 3567 dst_confirm(sk->sk_dst_cache); 3568#endif 3569 tp->t_starttime = ticks; 3570 tp->t_state = TCPS_ESTABLISHED; 3571 soisconnected(so); 3572} 3573 3574static int 3575syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep) 3576{ 3577 3578 struct in_conninfo inc; 3579 struct tcpopt to; 3580 struct tcphdr th; 3581 int mss, wsf, sack, ts; 3582 struct mbuf *m = NULL; 3583 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev); 3584 unsigned int opt; 3585 3586#ifdef MAC 3587#error "no MAC support" 3588#endif 3589 3590 opt = ntohs(req->tcp_opt); 3591 3592 bzero(&to, sizeof(struct tcpopt)); 3593 3594 /* 3595 * Fill out information for entering us into the syncache 3596 */ 3597 inc.inc_fport = th.th_sport = req->peer_port; 3598 inc.inc_lport = th.th_dport = req->local_port; 3599 th.th_seq = req->rcv_isn; 3600 th.th_flags = TH_ACK; 3601 3602 inc.inc_isipv6 = 0; 3603 inc.inc_len = 0; 3604 inc.inc_faddr.s_addr = req->peer_ip; 3605 inc.inc_laddr.s_addr = req->local_ip; 3606 3607 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3608 wsf = G_TCPOPT_WSCALE_OK(opt); 3609 ts = G_TCPOPT_TSTAMP(opt); 3610 sack = G_TCPOPT_SACK(opt); 3611 3612 to.to_mss = mss; 3613 to.to_wscale = G_TCPOPT_SND_WSCALE(opt); 3614 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3615 3616 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n", 3617 ntohl(req->local_ip), ntohs(req->local_port), 3618 ntohl(req->peer_ip), ntohs(req->peer_port), 3619 mss, wsf, ts, sack); 3620 return tcp_offload_syncache_expand(&inc, &to, &th, so, m); 3621} 3622 3623 3624/* 3625 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work 3626 * if we are in TCP_SYN_RECV due to crossed SYNs 3627 */ 3628static int 3629do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3630{ 3631 struct cpl_pass_establish *req = cplhdr(m); 3632 struct toepcb *toep = (struct toepcb *)ctx; 3633 struct tcpcb *tp = toep->tp_tp; 3634 struct socket *so, *lso; 3635 struct t3c_data *td = T3C_DATA(cdev); 3636 struct sockbuf *snd, *rcv; 3637 3638 // Complete socket initialization now that we have the SND_ISN 3639 3640 struct toedev *tdev; 3641 3642 3643 tdev = toep->tp_toedev; 3644 3645 inp_wlock(tp->t_inpcb); 3646 3647 /* 3648 * 3649 * XXX need to add reference while we're manipulating 3650 */ 3651 so = lso = inp_inpcbtosocket(tp->t_inpcb); 3652 3653 inp_wunlock(tp->t_inpcb); 3654 3655 so_lock(so); 3656 LIST_REMOVE(toep, synq_entry); 3657 so_unlock(so); 3658 3659 if (!syncache_expand_establish_req(req, &so, toep)) { 3660 /* 3661 * No entry 3662 */ 3663 CXGB_UNIMPLEMENTED(); 3664 } 3665 if (so == NULL) { 3666 /* 3667 * Couldn't create the socket 3668 */ 3669 CXGB_UNIMPLEMENTED(); 3670 } 3671 3672 tp = so_sototcpcb(so); 3673 inp_wlock(tp->t_inpcb); 3674 3675 snd = so_sockbuf_snd(so); 3676 rcv = so_sockbuf_rcv(so); 3677 3678 snd->sb_flags |= SB_NOCOALESCE; 3679 rcv->sb_flags |= SB_NOCOALESCE; 3680 3681 toep->tp_tp = tp; 3682 toep->tp_flags = 0; 3683 tp->t_toe = toep; 3684 reset_wr_list(toep); 3685 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3686 tp->rcv_nxt = toep->tp_copied_seq; 3687 install_offload_ops(so); 3688 3689 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs); 3690 toep->tp_wr_unacked = 0; 3691 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3692 toep->tp_qset_idx = 0; 3693 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu); 3694 3695 /* 3696 * XXX Cancel any keep alive timer 3697 */ 3698 3699 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3700 3701 /* 3702 * XXX workaround for lack of syncache drop 3703 */ 3704 toepcb_release(toep); 3705 inp_wunlock(tp->t_inpcb); 3706 3707 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid); 3708 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3709#ifdef notyet 3710 /* 3711 * XXX not sure how these checks map to us 3712 */ 3713 if (unlikely(sk->sk_socket)) { // simultaneous opens only 3714 sk->sk_state_change(sk); 3715 sk_wake_async(so, 0, POLL_OUT); 3716 } 3717 /* 3718 * The state for the new connection is now up to date. 3719 * Next check if we should add the connection to the parent's 3720 * accept queue. When the parent closes it resets connections 3721 * on its SYN queue, so check if we are being reset. If so we 3722 * don't need to do anything more, the coming ABORT_RPL will 3723 * destroy this socket. Otherwise move the connection to the 3724 * accept queue. 3725 * 3726 * Note that we reset the synq before closing the server so if 3727 * we are not being reset the stid is still open. 3728 */ 3729 if (unlikely(!tp->forward_skb_hint)) { // removed from synq 3730 __kfree_skb(skb); 3731 goto unlock; 3732 } 3733#endif 3734 m_free(m); 3735 3736 return (0); 3737} 3738 3739/* 3740 * Fill in the right TID for CPL messages waiting in the out-of-order queue 3741 * and send them to the TOE. 3742 */ 3743static void 3744fixup_and_send_ofo(struct toepcb *toep) 3745{ 3746 struct mbuf *m; 3747 struct toedev *tdev = toep->tp_toedev; 3748 struct tcpcb *tp = toep->tp_tp; 3749 unsigned int tid = toep->tp_tid; 3750 3751 log(LOG_NOTICE, "fixup_and_send_ofo\n"); 3752 3753 inp_lock_assert(tp->t_inpcb); 3754 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { 3755 /* 3756 * A variety of messages can be waiting but the fields we'll 3757 * be touching are common to all so any message type will do. 3758 */ 3759 struct cpl_close_con_req *p = cplhdr(m); 3760 3761 p->wr.wr_lo = htonl(V_WR_TID(tid)); 3762 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid)); 3763 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3764 } 3765} 3766 3767/* 3768 * Updates socket state from an active establish CPL message. Runs with the 3769 * socket lock held. 3770 */ 3771static void 3772socket_act_establish(struct socket *so, struct mbuf *m) 3773{ 3774 struct cpl_act_establish *req = cplhdr(m); 3775 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ 3776 struct tcpcb *tp = so_sototcpcb(so); 3777 struct toepcb *toep = tp->t_toe; 3778 3779 if (__predict_false(tp->t_state != TCPS_SYN_SENT)) 3780 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n", 3781 toep->tp_tid, tp->t_state); 3782 3783 tp->ts_recent_age = ticks; 3784 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn; 3785 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs; 3786 3787 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3788 3789 /* 3790 * Now that we finally have a TID send any CPL messages that we had to 3791 * defer for lack of a TID. 3792 */ 3793 if (mbufq_len(&toep->out_of_order_queue)) 3794 fixup_and_send_ofo(toep); 3795 3796 if (__predict_false(so_state_get(so) & SS_NOFDREF)) { 3797 /* 3798 * XXX does this even make sense? 3799 */ 3800 so_sorwakeup(so); 3801 } 3802 m_free(m); 3803#ifdef notyet 3804/* 3805 * XXX assume no write requests permitted while socket connection is 3806 * incomplete 3807 */ 3808 /* 3809 * Currently the send queue must be empty at this point because the 3810 * socket layer does not send anything before a connection is 3811 * established. To be future proof though we handle the possibility 3812 * that there are pending buffers to send (either TX_DATA or 3813 * CLOSE_CON_REQ). First we need to adjust the sequence number of the 3814 * buffers according to the just learned write_seq, and then we send 3815 * them on their way. 3816 */ 3817 fixup_pending_writeq_buffers(sk); 3818 if (t3_push_frames(so, 1)) 3819 sk->sk_write_space(sk); 3820#endif 3821 3822 toep->tp_state = tp->t_state; 3823 tcpstat.tcps_connects++; 3824 3825} 3826 3827/* 3828 * Process a CPL_ACT_ESTABLISH message. 3829 */ 3830static int 3831do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3832{ 3833 struct cpl_act_establish *req = cplhdr(m); 3834 unsigned int tid = GET_TID(req); 3835 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); 3836 struct toepcb *toep = (struct toepcb *)ctx; 3837 struct tcpcb *tp = toep->tp_tp; 3838 struct socket *so; 3839 struct toedev *tdev; 3840 struct tom_data *d; 3841 3842 if (tp == NULL) { 3843 free_atid(cdev, atid); 3844 return (0); 3845 } 3846 inp_wlock(tp->t_inpcb); 3847 3848 /* 3849 * XXX 3850 */ 3851 so = inp_inpcbtosocket(tp->t_inpcb); 3852 tdev = toep->tp_toedev; /* blow up here if link was down */ 3853 d = TOM_DATA(tdev); 3854 3855 /* 3856 * It's OK if the TID is currently in use, the owning socket may have 3857 * backlogged its last CPL message(s). Just take it away. 3858 */ 3859 toep->tp_tid = tid; 3860 toep->tp_tp = tp; 3861 so_insert_tid(d, toep, tid); 3862 free_atid(cdev, atid); 3863 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3864 3865 socket_act_establish(so, m); 3866 inp_wunlock(tp->t_inpcb); 3867 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid); 3868 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3869 3870 return (0); 3871} 3872 3873/* 3874 * Process an acknowledgment of WR completion. Advance snd_una and send the 3875 * next batch of work requests from the write queue. 3876 */ 3877static void 3878wr_ack(struct toepcb *toep, struct mbuf *m) 3879{ 3880 struct tcpcb *tp = toep->tp_tp; 3881 struct cpl_wr_ack *hdr = cplhdr(m); 3882 struct socket *so; 3883 unsigned int credits = ntohs(hdr->credits); 3884 u32 snd_una = ntohl(hdr->snd_una); 3885 int bytes = 0; 3886 struct sockbuf *snd; 3887 3888 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits); 3889 3890 inp_wlock(tp->t_inpcb); 3891 so = inp_inpcbtosocket(tp->t_inpcb); 3892 toep->tp_wr_avail += credits; 3893 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) 3894 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; 3895 3896 while (credits) { 3897 struct mbuf *p = peek_wr(toep); 3898 3899 if (__predict_false(!p)) { 3900 log(LOG_ERR, "%u WR_ACK credits for TID %u with " 3901 "nothing pending, state %u wr_avail=%u\n", 3902 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail); 3903 break; 3904 } 3905 CTR2(KTR_TOM, 3906 "wr_ack: p->credits=%d p->bytes=%d", 3907 p->m_pkthdr.csum_data, p->m_pkthdr.len); 3908 KASSERT(p->m_pkthdr.csum_data != 0, 3909 ("empty request still on list")); 3910 3911 if (__predict_false(credits < p->m_pkthdr.csum_data)) { 3912 3913#if DEBUG_WR > 1 3914 struct tx_data_wr *w = cplhdr(p); 3915 log(LOG_ERR, 3916 "TID %u got %u WR credits, need %u, len %u, " 3917 "main body %u, frags %u, seq # %u, ACK una %u," 3918 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n", 3919 toep->tp_tid, credits, p->csum, p->len, 3920 p->len - p->data_len, skb_shinfo(p)->nr_frags, 3921 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt), 3922 toep->tp_wr_avail, count_pending_wrs(tp) - credits); 3923#endif 3924 p->m_pkthdr.csum_data -= credits; 3925 break; 3926 } else { 3927 dequeue_wr(toep); 3928 credits -= p->m_pkthdr.csum_data; 3929 bytes += p->m_pkthdr.len; 3930 CTR3(KTR_TOM, 3931 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d", 3932 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data); 3933 3934 m_free(p); 3935 } 3936 } 3937 3938#if DEBUG_WR 3939 check_wr_invariants(tp); 3940#endif 3941 3942 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 3943#if VALIDATE_SEQ 3944 struct tom_data *d = TOM_DATA(TOE_DEV(so)); 3945 3946 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK " 3947 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una, 3948 toep->tp_tid, tp->snd_una); 3949#endif 3950 goto out_free; 3951 } 3952 3953 if (tp->snd_una != snd_una) { 3954 tp->snd_una = snd_una; 3955 tp->ts_recent_age = ticks; 3956#ifdef notyet 3957 /* 3958 * Keep ARP entry "minty fresh" 3959 */ 3960 dst_confirm(sk->sk_dst_cache); 3961#endif 3962 if (tp->snd_una == tp->snd_nxt) 3963 toep->tp_flags &= ~TP_TX_WAIT_IDLE; 3964 } 3965 3966 snd = so_sockbuf_snd(so); 3967 if (bytes) { 3968 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes); 3969 snd = so_sockbuf_snd(so); 3970 sockbuf_lock(snd); 3971 sbdrop_locked(snd, bytes); 3972 so_sowwakeup_locked(so); 3973 } 3974 3975 if (snd->sb_sndptroff < snd->sb_cc) 3976 t3_push_frames(so, 0); 3977 3978out_free: 3979 inp_wunlock(tp->t_inpcb); 3980 m_free(m); 3981} 3982 3983/* 3984 * Handler for TX_DATA_ACK CPL messages. 3985 */ 3986static int 3987do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx) 3988{ 3989 struct toepcb *toep = (struct toepcb *)ctx; 3990 3991 VALIDATE_SOCK(so); 3992 3993 wr_ack(toep, m); 3994 return 0; 3995} 3996 3997/* 3998 * Handler for TRACE_PKT CPL messages. Just sink these packets. 3999 */ 4000static int 4001do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx) 4002{ 4003 m_freem(m); 4004 return 0; 4005} 4006 4007/* 4008 * Reset a connection that is on a listener's SYN queue or accept queue, 4009 * i.e., one that has not had a struct socket associated with it. 4010 * Must be called from process context. 4011 * 4012 * Modeled after code in inet_csk_listen_stop(). 4013 */ 4014static void 4015t3_reset_listen_child(struct socket *child) 4016{ 4017 struct tcpcb *tp = so_sototcpcb(child); 4018 4019 t3_send_reset(tp->t_toe); 4020} 4021 4022 4023static void 4024t3_child_disconnect(struct socket *so, void *arg) 4025{ 4026 struct tcpcb *tp = so_sototcpcb(so); 4027 4028 if (tp->t_flags & TF_TOE) { 4029 inp_wlock(tp->t_inpcb); 4030 t3_reset_listen_child(so); 4031 inp_wunlock(tp->t_inpcb); 4032 } 4033} 4034 4035/* 4036 * Disconnect offloaded established but not yet accepted connections sitting 4037 * on a server's accept_queue. We just send an ABORT_REQ at this point and 4038 * finish off the disconnect later as we may need to wait for the ABORT_RPL. 4039 */ 4040void 4041t3_disconnect_acceptq(struct socket *listen_so) 4042{ 4043 4044 so_lock(listen_so); 4045 so_listeners_apply_all(listen_so, t3_child_disconnect, NULL); 4046 so_unlock(listen_so); 4047} 4048 4049/* 4050 * Reset offloaded connections sitting on a server's syn queue. As above 4051 * we send ABORT_REQ and finish off when we get ABORT_RPL. 4052 */ 4053 4054void 4055t3_reset_synq(struct listen_ctx *lctx) 4056{ 4057 struct toepcb *toep; 4058 4059 so_lock(lctx->lso); 4060 while (!LIST_EMPTY(&lctx->synq_head)) { 4061 toep = LIST_FIRST(&lctx->synq_head); 4062 LIST_REMOVE(toep, synq_entry); 4063 toep->tp_tp = NULL; 4064 t3_send_reset(toep); 4065 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid); 4066 toepcb_release(toep); 4067 } 4068 so_unlock(lctx->lso); 4069} 4070 4071 4072int 4073t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl, 4074 unsigned int nppods, unsigned int tag, unsigned int maxoff, 4075 unsigned int pg_off, unsigned int color) 4076{ 4077 unsigned int i, j, pidx; 4078 struct pagepod *p; 4079 struct mbuf *m; 4080 struct ulp_mem_io *req; 4081 unsigned int tid = toep->tp_tid; 4082 const struct tom_data *td = TOM_DATA(toep->tp_toedev); 4083 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; 4084 4085 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)", 4086 gl, nppods, tag, maxoff, pg_off, color); 4087 4088 for (i = 0; i < nppods; ++i) { 4089 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE); 4090 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4091 req = mtod(m, struct ulp_mem_io *); 4092 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE; 4093 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4094 req->wr.wr_lo = 0; 4095 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) | 4096 V_ULPTX_CMD(ULP_MEM_WRITE)); 4097 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) | 4098 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1)); 4099 4100 p = (struct pagepod *)(req + 1); 4101 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) { 4102 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid)); 4103 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) | 4104 V_PPOD_COLOR(color)); 4105 p->pp_max_offset = htonl(maxoff); 4106 p->pp_page_offset = htonl(pg_off); 4107 p->pp_rsvd = 0; 4108 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx) 4109 p->pp_addr[j] = pidx < gl->dgl_nelem ? 4110 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0; 4111 } else 4112 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */ 4113 send_or_defer(toep, m, 0); 4114 ppod_addr += PPOD_SIZE; 4115 } 4116 return (0); 4117} 4118 4119/* 4120 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command. 4121 */ 4122static inline void 4123mk_cpl_barrier_ulp(struct cpl_barrier *b) 4124{ 4125 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b; 4126 4127 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4128 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8)); 4129 b->opcode = CPL_BARRIER; 4130} 4131 4132/* 4133 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command. 4134 */ 4135static inline void 4136mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno) 4137{ 4138 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4139 4140 txpkt = (struct ulp_txpkt *)req; 4141 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4142 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4143 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid)); 4144 req->cpuno = htons(cpuno); 4145} 4146 4147/* 4148 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command. 4149 */ 4150static inline void 4151mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, 4152 unsigned int word, uint64_t mask, uint64_t val) 4153{ 4154 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4155 4156 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 4157 tid, word, mask, val); 4158 4159 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4160 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4161 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); 4162 req->reply = V_NO_REPLY(1); 4163 req->cpu_idx = 0; 4164 req->word = htons(word); 4165 req->mask = htobe64(mask); 4166 req->val = htobe64(val); 4167} 4168 4169/* 4170 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command. 4171 */ 4172static void 4173mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack, 4174 unsigned int tid, unsigned int credits) 4175{ 4176 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack; 4177 4178 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4179 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8)); 4180 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); 4181 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 4182 V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) | 4183 V_RX_CREDITS(credits)); 4184} 4185 4186void 4187t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx) 4188{ 4189 unsigned int wrlen; 4190 struct mbuf *m; 4191 struct work_request_hdr *wr; 4192 struct cpl_barrier *lock; 4193 struct cpl_set_tcb_field *req; 4194 struct cpl_get_tcb *getreq; 4195 struct ddp_state *p = &toep->tp_ddp_state; 4196 4197#if 0 4198 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4199#endif 4200 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) + 4201 sizeof(*getreq); 4202 m = m_gethdr_nofail(wrlen); 4203 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4204 wr = mtod(m, struct work_request_hdr *); 4205 bzero(wr, wrlen); 4206 4207 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4208 m->m_pkthdr.len = m->m_len = wrlen; 4209 4210 lock = (struct cpl_barrier *)(wr + 1); 4211 mk_cpl_barrier_ulp(lock); 4212 4213 req = (struct cpl_set_tcb_field *)(lock + 1); 4214 4215 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx); 4216 4217 /* Hmmm, not sure if this actually a good thing: reactivating 4218 * the other buffer might be an issue if it has been completed 4219 * already. However, that is unlikely, since the fact that the UBUF 4220 * is not completed indicates that there is no oustanding data. 4221 */ 4222 if (bufidx == 0) 4223 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4224 V_TF_DDP_ACTIVE_BUF(1) | 4225 V_TF_DDP_BUF0_VALID(1), 4226 V_TF_DDP_ACTIVE_BUF(1)); 4227 else 4228 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4229 V_TF_DDP_ACTIVE_BUF(1) | 4230 V_TF_DDP_BUF1_VALID(1), 0); 4231 4232 getreq = (struct cpl_get_tcb *)(req + 1); 4233 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4234 4235 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1)); 4236 4237 /* Keep track of the number of oustanding CPL_GET_TCB requests 4238 */ 4239 p->get_tcb_count++; 4240 4241#ifdef T3_TRACE 4242 T3_TRACE1(TIDTB(so), 4243 "t3_cancel_ddpbuf: bufidx %u", bufidx); 4244#endif 4245 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4246} 4247 4248/** 4249 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one 4250 * @sk: the socket associated with the buffers 4251 * @bufidx: index of HW DDP buffer (0 or 1) 4252 * @tag0: new tag for HW buffer 0 4253 * @tag1: new tag for HW buffer 1 4254 * @len: new length for HW buf @bufidx 4255 * 4256 * Sends a compound WR to overlay a new DDP buffer on top of an existing 4257 * buffer by changing the buffer tag and length and setting the valid and 4258 * active flag accordingly. The caller must ensure the new buffer is at 4259 * least as big as the existing one. Since we typically reprogram both HW 4260 * buffers this function sets both tags for convenience. Read the TCB to 4261 * determine how made data was written into the buffer before the overlay 4262 * took place. 4263 */ 4264void 4265t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0, 4266 unsigned int tag1, unsigned int len) 4267{ 4268 unsigned int wrlen; 4269 struct mbuf *m; 4270 struct work_request_hdr *wr; 4271 struct cpl_get_tcb *getreq; 4272 struct cpl_set_tcb_field *req; 4273 struct ddp_state *p = &toep->tp_ddp_state; 4274 4275 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)", 4276 bufidx, tag0, tag1, len); 4277#if 0 4278 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4279#endif 4280 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq); 4281 m = m_gethdr_nofail(wrlen); 4282 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4283 wr = mtod(m, struct work_request_hdr *); 4284 m->m_pkthdr.len = m->m_len = wrlen; 4285 bzero(wr, wrlen); 4286 4287 4288 /* Set the ATOMIC flag to make sure that TP processes the following 4289 * CPLs in an atomic manner and no wire segments can be interleaved. 4290 */ 4291 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); 4292 req = (struct cpl_set_tcb_field *)(wr + 1); 4293 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG, 4294 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) | 4295 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32, 4296 V_TCB_RX_DDP_BUF0_TAG(tag0) | 4297 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32); 4298 req++; 4299 if (bufidx == 0) { 4300 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN, 4301 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4302 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 4303 req++; 4304 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4305 V_TF_DDP_PUSH_DISABLE_0(1) | 4306 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4307 V_TF_DDP_PUSH_DISABLE_0(0) | 4308 V_TF_DDP_BUF0_VALID(1)); 4309 } else { 4310 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN, 4311 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN), 4312 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len)); 4313 req++; 4314 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4315 V_TF_DDP_PUSH_DISABLE_1(1) | 4316 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4317 V_TF_DDP_PUSH_DISABLE_1(0) | 4318 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1)); 4319 } 4320 4321 getreq = (struct cpl_get_tcb *)(req + 1); 4322 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4323 4324 /* Keep track of the number of oustanding CPL_GET_TCB requests 4325 */ 4326 p->get_tcb_count++; 4327 4328#ifdef T3_TRACE 4329 T3_TRACE4(TIDTB(sk), 4330 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u " 4331 "len %d", 4332 bufidx, tag0, tag1, len); 4333#endif 4334 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4335} 4336 4337/* 4338 * Sends a compound WR containing all the CPL messages needed to program the 4339 * two HW DDP buffers, namely optionally setting up the length and offset of 4340 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK. 4341 */ 4342void 4343t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0, 4344 unsigned int len1, unsigned int offset1, 4345 uint64_t ddp_flags, uint64_t flag_mask, int modulate) 4346{ 4347 unsigned int wrlen; 4348 struct mbuf *m; 4349 struct work_request_hdr *wr; 4350 struct cpl_set_tcb_field *req; 4351 4352 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ", 4353 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff); 4354 4355#if 0 4356 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4357#endif 4358 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + 4359 (len1 ? sizeof(*req) : 0) + 4360 (modulate ? sizeof(struct cpl_rx_data_ack) : 0); 4361 m = m_gethdr_nofail(wrlen); 4362 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4363 wr = mtod(m, struct work_request_hdr *); 4364 bzero(wr, wrlen); 4365 4366 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4367 m->m_pkthdr.len = m->m_len = wrlen; 4368 4369 req = (struct cpl_set_tcb_field *)(wr + 1); 4370 if (len0) { /* program buffer 0 offset and length */ 4371 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET, 4372 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 4373 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4374 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) | 4375 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0)); 4376 req++; 4377 } 4378 if (len1) { /* program buffer 1 offset and length */ 4379 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET, 4380 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 4381 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32, 4382 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) | 4383 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32); 4384 req++; 4385 } 4386 4387 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask, 4388 ddp_flags); 4389 4390 if (modulate) { 4391 mk_rx_data_ack_ulp(toep, 4392 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid, 4393 toep->tp_copied_seq - toep->tp_rcv_wup); 4394 toep->tp_rcv_wup = toep->tp_copied_seq; 4395 } 4396 4397#ifdef T3_TRACE 4398 T3_TRACE5(TIDTB(sk), 4399 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x " 4400 "modulate %d", 4401 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff, 4402 modulate); 4403#endif 4404 4405 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4406} 4407 4408void 4409t3_init_wr_tab(unsigned int wr_len) 4410{ 4411 int i; 4412 4413 if (mbuf_wrs[1]) /* already initialized */ 4414 return; 4415 4416 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) { 4417 int sgl_len = (3 * i) / 2 + (i & 1); 4418 4419 sgl_len += 3; 4420 mbuf_wrs[i] = sgl_len <= wr_len ? 4421 1 : 1 + (sgl_len - 2) / (wr_len - 1); 4422 } 4423 4424 wrlen = wr_len * 8; 4425} 4426 4427int 4428t3_init_cpl_io(void) 4429{ 4430#ifdef notyet 4431 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL); 4432 if (!tcphdr_skb) { 4433 log(LOG_ERR, 4434 "Chelsio TCP offload: can't allocate sk_buff\n"); 4435 return -1; 4436 } 4437 skb_put(tcphdr_skb, sizeof(struct tcphdr)); 4438 tcphdr_skb->h.raw = tcphdr_skb->data; 4439 memset(tcphdr_skb->data, 0, tcphdr_skb->len); 4440#endif 4441 4442 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); 4443 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); 4444 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack); 4445 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data); 4446 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 4447 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 4448 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish); 4449 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 4450 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 4451 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 4452 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp); 4453 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); 4454 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify); 4455 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt); 4456 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); 4457 return (0); 4458} 4459 4460