cxgb_cpl_io.c revision 178767
1/************************************************************************** 2 3Copyright (c) 2007-2008, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c 178767 2008-05-05 01:41:53Z kmacy $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/fcntl.h> 36#include <sys/kernel.h> 37#include <sys/limits.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/mbuf.h> 41#include <sys/mutex.h> 42#include <sys/socket.h> 43#include <sys/sysctl.h> 44#include <sys/syslog.h> 45#include <sys/protosw.h> 46#include <sys/priv.h> 47 48#include <net/if.h> 49#include <net/route.h> 50 51#include <netinet/in.h> 52#include <netinet/in_pcb.h> 53#include <netinet/in_systm.h> 54#include <netinet/in_var.h> 55 56 57#include <dev/cxgb/cxgb_osdep.h> 58#include <dev/cxgb/sys/mbufq.h> 59 60#include <netinet/ip.h> 61#include <netinet/tcp_var.h> 62#include <netinet/tcp_fsm.h> 63#include <netinet/tcp_offload.h> 64#include <netinet/tcp_seq.h> 65#include <netinet/tcp_syncache.h> 66#include <netinet/tcp_timer.h> 67#include <net/route.h> 68 69#include <dev/cxgb/t3cdev.h> 70#include <dev/cxgb/common/cxgb_firmware_exports.h> 71#include <dev/cxgb/common/cxgb_t3_cpl.h> 72#include <dev/cxgb/common/cxgb_tcb.h> 73#include <dev/cxgb/common/cxgb_ctl_defs.h> 74#include <dev/cxgb/cxgb_offload.h> 75#include <vm/vm.h> 76#include <vm/pmap.h> 77#include <machine/bus.h> 78#include <dev/cxgb/sys/mvec.h> 79#include <dev/cxgb/ulp/toecore/cxgb_toedev.h> 80#include <dev/cxgb/ulp/tom/cxgb_defs.h> 81#include <dev/cxgb/ulp/tom/cxgb_tom.h> 82#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h> 83#include <dev/cxgb/ulp/tom/cxgb_toepcb.h> 84#include <dev/cxgb/ulp/tom/cxgb_tcp.h> 85 86#include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h> 87 88/* 89 * For ULP connections HW may add headers, e.g., for digests, that aren't part 90 * of the messages sent by the host but that are part of the TCP payload and 91 * therefore consume TCP sequence space. Tx connection parameters that 92 * operate in TCP sequence space are affected by the HW additions and need to 93 * compensate for them to accurately track TCP sequence numbers. This array 94 * contains the compensating extra lengths for ULP packets. It is indexed by 95 * a packet's ULP submode. 96 */ 97const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8}; 98 99#ifdef notyet 100/* 101 * This sk_buff holds a fake header-only TCP segment that we use whenever we 102 * need to exploit SW TCP functionality that expects TCP headers, such as 103 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple 104 * CPUs without locking. 105 */ 106static struct mbuf *tcphdr_mbuf __read_mostly; 107#endif 108 109/* 110 * Size of WRs in bytes. Note that we assume all devices we are handling have 111 * the same WR size. 112 */ 113static unsigned int wrlen __read_mostly; 114 115/* 116 * The number of WRs needed for an skb depends on the number of page fragments 117 * in the skb and whether it has any payload in its main body. This maps the 118 * length of the gather list represented by an skb into the # of necessary WRs. 119 */ 120static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly; 121 122/* 123 * Max receive window supported by HW in bytes. Only a small part of it can 124 * be set through option0, the rest needs to be set through RX_DATA_ACK. 125 */ 126#define MAX_RCV_WND ((1U << 27) - 1) 127 128/* 129 * Min receive window. We want it to be large enough to accommodate receive 130 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. 131 */ 132#define MIN_RCV_WND (24 * 1024U) 133#define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS) 134 135#define VALIDATE_SEQ 0 136#define VALIDATE_SOCK(so) 137#define DEBUG_WR 0 138 139#define TCP_TIMEWAIT 1 140#define TCP_CLOSE 2 141#define TCP_DROP 3 142 143extern int tcp_do_autorcvbuf; 144extern int tcp_do_autosndbuf; 145extern int tcp_autorcvbuf_max; 146extern int tcp_autosndbuf_max; 147 148static void t3_send_reset(struct toepcb *toep); 149static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status); 150static inline void free_atid(struct t3cdev *cdev, unsigned int tid); 151static void handle_syncache_event(int event, void *arg); 152 153static inline void 154SBAPPEND(struct sockbuf *sb, struct mbuf *n) 155{ 156 struct mbuf *m; 157 158 m = sb->sb_mb; 159 while (m) { 160 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 161 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 162 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 163 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 164 m->m_next, m->m_nextpkt, m->m_flags)); 165 m = m->m_next; 166 } 167 m = n; 168 while (m) { 169 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 170 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 171 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 172 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 173 m->m_next, m->m_nextpkt, m->m_flags)); 174 m = m->m_next; 175 } 176 KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set")); 177 sbappendstream_locked(sb, n); 178 m = sb->sb_mb; 179 180 while (m) { 181 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 182 m->m_next, m->m_nextpkt, m->m_flags)); 183 m = m->m_next; 184 } 185} 186 187static inline int 188is_t3a(const struct toedev *dev) 189{ 190 return (dev->tod_ttid == TOE_ID_CHELSIO_T3); 191} 192 193static void 194dump_toepcb(struct toepcb *toep) 195{ 196 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n", 197 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode, 198 toep->tp_mtu_idx, toep->tp_tid); 199 200 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n", 201 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, 202 toep->tp_mss_clamp, toep->tp_flags); 203} 204 205#ifndef RTALLOC2_DEFINED 206static struct rtentry * 207rtalloc2(struct sockaddr *dst, int report, u_long ignflags) 208{ 209 struct rtentry *rt = NULL; 210 211 if ((rt = rtalloc1(dst, report, ignflags)) != NULL) 212 RT_UNLOCK(rt); 213 214 return (rt); 215} 216#endif 217 218/* 219 * Determine whether to send a CPL message now or defer it. A message is 220 * deferred if the connection is in SYN_SENT since we don't know the TID yet. 221 * For connections in other states the message is sent immediately. 222 * If through_l2t is set the message is subject to ARP processing, otherwise 223 * it is sent directly. 224 */ 225static inline void 226send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t) 227{ 228 struct tcpcb *tp = toep->tp_tp; 229 230 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) { 231 inp_wlock(tp->t_inpcb); 232 mbufq_tail(&toep->out_of_order_queue, m); // defer 233 inp_wunlock(tp->t_inpcb); 234 } else if (through_l2t) 235 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T 236 else 237 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly 238} 239 240static inline unsigned int 241mkprio(unsigned int cntrl, const struct toepcb *toep) 242{ 243 return (cntrl); 244} 245 246/* 247 * Populate a TID_RELEASE WR. The skb must be already propely sized. 248 */ 249static inline void 250mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid) 251{ 252 struct cpl_tid_release *req; 253 254 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep)); 255 m->m_pkthdr.len = m->m_len = sizeof(*req); 256 req = mtod(m, struct cpl_tid_release *); 257 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 258 req->wr.wr_lo = 0; 259 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); 260} 261 262static inline void 263make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) 264{ 265 struct tcpcb *tp = so_sototcpcb(so); 266 struct toepcb *toep = tp->t_toe; 267 struct tx_data_wr *req; 268 struct sockbuf *snd; 269 270 inp_lock_assert(tp->t_inpcb); 271 snd = so_sockbuf_snd(so); 272 273 req = mtod(m, struct tx_data_wr *); 274 m->m_len = sizeof(*req); 275 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 276 req->wr_lo = htonl(V_WR_TID(toep->tp_tid)); 277 /* len includes the length of any HW ULP additions */ 278 req->len = htonl(len); 279 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx)); 280 /* V_TX_ULP_SUBMODE sets both the mode and submode */ 281 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) | 282 V_TX_URG(/* skb_urgent(skb) */ 0 ) | 283 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) && 284 (tail ? 0 : 1)))); 285 req->sndseq = htonl(tp->snd_nxt); 286 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) { 287 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | 288 V_TX_CPU_IDX(toep->tp_qset)); 289 290 /* Sendbuffer is in units of 32KB. 291 */ 292 if (tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE) 293 req->param |= htonl(V_TX_SNDBUF(tcp_autosndbuf_max >> 15)); 294 else { 295 req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15)); 296 } 297 298 toep->tp_flags |= TP_DATASENT; 299 } 300} 301 302#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */ 303 304int 305t3_push_frames(struct socket *so, int req_completion) 306{ 307 struct tcpcb *tp = so_sototcpcb(so); 308 struct toepcb *toep = tp->t_toe; 309 310 struct mbuf *tail, *m0, *last; 311 struct t3cdev *cdev; 312 struct tom_data *d; 313 int state, bytes, count, total_bytes; 314 bus_dma_segment_t segs[TX_MAX_SEGS], *segp; 315 struct sockbuf *snd; 316 317 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) { 318 DPRINTF("tcp state=%d\n", tp->t_state); 319 return (0); 320 } 321 322 state = so_state_get(so); 323 324 if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { 325 DPRINTF("disconnecting\n"); 326 327 return (0); 328 } 329 330 inp_lock_assert(tp->t_inpcb); 331 332 snd = so_sockbuf_snd(so); 333 sockbuf_lock(snd); 334 335 d = TOM_DATA(toep->tp_toedev); 336 cdev = d->cdev; 337 338 last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb; 339 340 total_bytes = 0; 341 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n", 342 toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last); 343 344 if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) { 345 KASSERT(tail, ("sbdrop error")); 346 last = tail = tail->m_next; 347 } 348 349 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) { 350 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail); 351 sockbuf_unlock(snd); 352 353 return (0); 354 } 355 356 toep->tp_m_last = NULL; 357 while (toep->tp_wr_avail && (tail != NULL)) { 358 count = bytes = 0; 359 segp = segs; 360 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) { 361 sockbuf_unlock(snd); 362 return (0); 363 } 364 /* 365 * If the data in tail fits as in-line, then 366 * make an immediate data wr. 367 */ 368 if (tail->m_len <= IMM_LEN) { 369 count = 1; 370 bytes = tail->m_len; 371 last = tail; 372 tail = tail->m_next; 373 m_set_sgl(m0, NULL); 374 m_set_sgllen(m0, 0); 375 make_tx_data_wr(so, m0, bytes, tail); 376 m_append(m0, bytes, mtod(last, caddr_t)); 377 KASSERT(!m0->m_next, ("bad append")); 378 } else { 379 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) 380 && (tail != NULL) && (count < TX_MAX_SEGS-1)) { 381 bytes += tail->m_len; 382 last = tail; 383 count++; 384 /* 385 * technically an abuse to be using this for a VA 386 * but less gross than defining my own structure 387 * or calling pmap_kextract from here :-| 388 */ 389 segp->ds_addr = (bus_addr_t)tail->m_data; 390 segp->ds_len = tail->m_len; 391 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n", 392 count, mbuf_wrs[count], tail->m_data, tail->m_len); 393 segp++; 394 tail = tail->m_next; 395 } 396 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n", 397 toep->tp_wr_avail, count, mbuf_wrs[count], tail); 398 399 m_set_sgl(m0, segs); 400 m_set_sgllen(m0, count); 401 make_tx_data_wr(so, m0, bytes, tail); 402 } 403 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep)); 404 405 if (tail) { 406 snd->sb_sndptr = tail; 407 toep->tp_m_last = NULL; 408 } else 409 toep->tp_m_last = snd->sb_sndptr = last; 410 411 412 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last); 413 414 snd->sb_sndptroff += bytes; 415 total_bytes += bytes; 416 toep->tp_write_seq += bytes; 417 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d tail=%p sndptr=%p sndptroff=%d", 418 toep->tp_wr_avail, count, mbuf_wrs[count], tail, snd->sb_sndptr, snd->sb_sndptroff); 419 if (tail) 420 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p tailbuf=%p snd_una=0x%08x", 421 total_bytes, toep->tp_m_last, tail->m_data, tp->snd_una); 422 else 423 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p snd_una=0x%08x", 424 total_bytes, toep->tp_m_last, tp->snd_una); 425 426 427#ifdef KTR 428{ 429 int i; 430 431 i = 0; 432 while (i < count && m_get_sgllen(m0)) { 433 if ((count - i) >= 3) { 434 CTR6(KTR_TOM, 435 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d pa=0x%zx len=%d", 436 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len, 437 segs[i + 2].ds_addr, segs[i + 2].ds_len); 438 i += 3; 439 } else if ((count - i) == 2) { 440 CTR4(KTR_TOM, 441 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d", 442 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len); 443 i += 2; 444 } else { 445 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d", 446 segs[i].ds_addr, segs[i].ds_len); 447 i++; 448 } 449 450 } 451} 452#endif 453 /* 454 * remember credits used 455 */ 456 m0->m_pkthdr.csum_data = mbuf_wrs[count]; 457 m0->m_pkthdr.len = bytes; 458 toep->tp_wr_avail -= mbuf_wrs[count]; 459 toep->tp_wr_unacked += mbuf_wrs[count]; 460 461 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) || 462 toep->tp_wr_unacked >= toep->tp_wr_max / 2) { 463 struct work_request_hdr *wr = cplhdr(m0); 464 465 wr->wr_hi |= htonl(F_WR_COMPL); 466 toep->tp_wr_unacked = 0; 467 } 468 KASSERT((m0->m_pkthdr.csum_data > 0) && 469 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d", 470 m0->m_pkthdr.csum_data)); 471 m0->m_type = MT_DONTFREE; 472 enqueue_wr(toep, m0); 473 DPRINTF("sending offload tx with %d bytes in %d segments\n", 474 bytes, count); 475 l2t_send(cdev, m0, toep->tp_l2t); 476 } 477 sockbuf_unlock(snd); 478 return (total_bytes); 479} 480 481/* 482 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail 483 * under any circumstances. We take the easy way out and always queue the 484 * message to the write_queue. We can optimize the case where the queue is 485 * already empty though the optimization is probably not worth it. 486 */ 487static void 488close_conn(struct socket *so) 489{ 490 struct mbuf *m; 491 struct cpl_close_con_req *req; 492 struct tom_data *d; 493 struct inpcb *inp = so_sotoinpcb(so); 494 struct tcpcb *tp; 495 struct toepcb *toep; 496 unsigned int tid; 497 498 499 inp_wlock(inp); 500 tp = so_sototcpcb(so); 501 toep = tp->t_toe; 502 503 if (tp->t_state != TCPS_SYN_SENT) 504 t3_push_frames(so, 1); 505 506 if (toep->tp_flags & TP_FIN_SENT) { 507 inp_wunlock(inp); 508 return; 509 } 510 511 tid = toep->tp_tid; 512 513 d = TOM_DATA(toep->tp_toedev); 514 515 m = m_gethdr_nofail(sizeof(*req)); 516 m_set_priority(m, CPL_PRIORITY_DATA); 517 m_set_sgl(m, NULL); 518 m_set_sgllen(m, 0); 519 520 toep->tp_flags |= TP_FIN_SENT; 521 req = mtod(m, struct cpl_close_con_req *); 522 523 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); 524 req->wr.wr_lo = htonl(V_WR_TID(tid)); 525 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 526 req->rsvd = 0; 527 inp_wunlock(inp); 528 /* 529 * XXX - need to defer shutdown while there is still data in the queue 530 * 531 */ 532 CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid); 533 cxgb_ofld_send(d->cdev, m); 534 535} 536 537/* 538 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant 539 * and send it along. 540 */ 541static void 542abort_arp_failure(struct t3cdev *cdev, struct mbuf *m) 543{ 544 struct cpl_abort_req *req = cplhdr(m); 545 546 req->cmd = CPL_ABORT_NO_RST; 547 cxgb_ofld_send(cdev, m); 548} 549 550/* 551 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are 552 * permitted to return without sending the message in case we cannot allocate 553 * an sk_buff. Returns the number of credits sent. 554 */ 555uint32_t 556t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail) 557{ 558 struct mbuf *m; 559 struct cpl_rx_data_ack *req; 560 struct toepcb *toep = tp->t_toe; 561 struct toedev *tdev = toep->tp_toedev; 562 563 m = m_gethdr_nofail(sizeof(*req)); 564 565 DPRINTF("returning %u credits to HW\n", credits); 566 567 req = mtod(m, struct cpl_rx_data_ack *); 568 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 569 req->wr.wr_lo = 0; 570 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 571 req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); 572 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep)); 573 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 574 return (credits); 575} 576 577/* 578 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled. 579 * This is only used in DDP mode, so we take the opportunity to also set the 580 * DACK mode and flush any Rx credits. 581 */ 582void 583t3_send_rx_modulate(struct toepcb *toep) 584{ 585 struct mbuf *m; 586 struct cpl_rx_data_ack *req; 587 588 m = m_gethdr_nofail(sizeof(*req)); 589 590 req = mtod(m, struct cpl_rx_data_ack *); 591 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 592 req->wr.wr_lo = 0; 593 m->m_pkthdr.len = m->m_len = sizeof(*req); 594 595 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 596 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 597 V_RX_DACK_MODE(1) | 598 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup)); 599 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 600 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 601 toep->tp_rcv_wup = toep->tp_copied_seq; 602} 603 604/* 605 * Handle receipt of an urgent pointer. 606 */ 607static void 608handle_urg_ptr(struct socket *so, uint32_t urg_seq) 609{ 610#ifdef URGENT_DATA_SUPPORTED 611 struct tcpcb *tp = so_sototcpcb(so); 612 613 urg_seq--; /* initially points past the urgent data, per BSD */ 614 615 if (tp->urg_data && !after(urg_seq, tp->urg_seq)) 616 return; /* duplicate pointer */ 617 sk_send_sigurg(sk); 618 if (tp->urg_seq == tp->copied_seq && tp->urg_data && 619 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) { 620 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 621 622 tp->copied_seq++; 623 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len) 624 tom_eat_skb(sk, skb, 0); 625 } 626 tp->urg_data = TCP_URG_NOTYET; 627 tp->urg_seq = urg_seq; 628#endif 629} 630 631/* 632 * Returns true if a socket cannot accept new Rx data. 633 */ 634static inline int 635so_no_receive(const struct socket *so) 636{ 637 return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING)); 638} 639 640/* 641 * Process an urgent data notification. 642 */ 643static void 644rx_urg_notify(struct toepcb *toep, struct mbuf *m) 645{ 646 struct cpl_rx_urg_notify *hdr = cplhdr(m); 647 struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 648 649 VALIDATE_SOCK(so); 650 651 if (!so_no_receive(so)) 652 handle_urg_ptr(so, ntohl(hdr->seq)); 653 654 m_freem(m); 655} 656 657/* 658 * Handler for RX_URG_NOTIFY CPL messages. 659 */ 660static int 661do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx) 662{ 663 struct toepcb *toep = (struct toepcb *)ctx; 664 665 rx_urg_notify(toep, m); 666 return (0); 667} 668 669static __inline int 670is_delack_mode_valid(struct toedev *dev, struct toepcb *toep) 671{ 672 return (toep->tp_ulp_mode || 673 (toep->tp_ulp_mode == ULP_MODE_TCPDDP && 674 dev->tod_ttid >= TOE_ID_CHELSIO_T3)); 675} 676 677/* 678 * Set of states for which we should return RX credits. 679 */ 680#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2) 681 682/* 683 * Called after some received data has been read. It returns RX credits 684 * to the HW for the amount of data processed. 685 */ 686void 687t3_cleanup_rbuf(struct tcpcb *tp, int copied) 688{ 689 struct toepcb *toep = tp->t_toe; 690 struct socket *so; 691 struct toedev *dev; 692 int dack_mode, must_send, read; 693 u32 thres, credits, dack = 0; 694 struct sockbuf *rcv; 695 696 so = inp_inpcbtosocket(tp->t_inpcb); 697 rcv = so_sockbuf_rcv(so); 698 699 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || 700 (tp->t_state == TCPS_FIN_WAIT_2))) { 701 if (copied) { 702 sockbuf_lock(rcv); 703 toep->tp_copied_seq += copied; 704 sockbuf_unlock(rcv); 705 } 706 707 return; 708 } 709 710 inp_lock_assert(tp->t_inpcb); 711 712 sockbuf_lock(rcv); 713 if (copied) 714 toep->tp_copied_seq += copied; 715 else { 716 read = toep->tp_enqueued_bytes - rcv->sb_cc; 717 toep->tp_copied_seq += read; 718 } 719 credits = toep->tp_copied_seq - toep->tp_rcv_wup; 720 toep->tp_enqueued_bytes = rcv->sb_cc; 721 sockbuf_unlock(rcv); 722 723 if (credits > rcv->sb_mbmax) { 724 log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n", 725 toep->tp_copied_seq, toep->tp_rcv_wup, credits); 726 credits = rcv->sb_mbmax; 727 } 728 729 730 /* 731 * XXX this won't accurately reflect credit return - we need 732 * to look at the difference between the amount that has been 733 * put in the recv sockbuf and what is there now 734 */ 735 736 if (__predict_false(!credits)) 737 return; 738 739 dev = toep->tp_toedev; 740 thres = TOM_TUNABLE(dev, rx_credit_thres); 741 742 if (__predict_false(thres == 0)) 743 return; 744 745 if (is_delack_mode_valid(dev, toep)) { 746 dack_mode = TOM_TUNABLE(dev, delack); 747 if (__predict_false(dack_mode != toep->tp_delack_mode)) { 748 u32 r = tp->rcv_nxt - toep->tp_delack_seq; 749 750 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp) 751 dack = F_RX_DACK_CHANGE | 752 V_RX_DACK_MODE(dack_mode); 753 } 754 } else 755 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 756 757 /* 758 * For coalescing to work effectively ensure the receive window has 759 * at least 16KB left. 760 */ 761 must_send = credits + 16384 >= tp->rcv_wnd; 762 763 if (must_send || credits >= thres) 764 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send); 765} 766 767static int 768cxgb_toe_disconnect(struct tcpcb *tp) 769{ 770 struct socket *so; 771 772 DPRINTF("cxgb_toe_disconnect\n"); 773 774 so = inp_inpcbtosocket(tp->t_inpcb); 775 close_conn(so); 776 return (0); 777} 778 779static int 780cxgb_toe_reset(struct tcpcb *tp) 781{ 782 struct toepcb *toep = tp->t_toe; 783 784 t3_send_reset(toep); 785 786 /* 787 * unhook from socket 788 */ 789 tp->t_flags &= ~TF_TOE; 790 toep->tp_tp = NULL; 791 tp->t_toe = NULL; 792 return (0); 793} 794 795static int 796cxgb_toe_send(struct tcpcb *tp) 797{ 798 struct socket *so; 799 800 DPRINTF("cxgb_toe_send\n"); 801 dump_toepcb(tp->t_toe); 802 803 so = inp_inpcbtosocket(tp->t_inpcb); 804 t3_push_frames(so, 1); 805 return (0); 806} 807 808static int 809cxgb_toe_rcvd(struct tcpcb *tp) 810{ 811 812 inp_lock_assert(tp->t_inpcb); 813 814 t3_cleanup_rbuf(tp, 0); 815 816 return (0); 817} 818 819static void 820cxgb_toe_detach(struct tcpcb *tp) 821{ 822 struct toepcb *toep; 823 824 /* 825 * XXX how do we handle teardown in the SYN_SENT state? 826 * 827 */ 828 inp_lock_assert(tp->t_inpcb); 829 toep = tp->t_toe; 830 toep->tp_tp = NULL; 831 832 /* 833 * unhook from socket 834 */ 835 tp->t_flags &= ~TF_TOE; 836 tp->t_toe = NULL; 837} 838 839 840static struct toe_usrreqs cxgb_toe_usrreqs = { 841 .tu_disconnect = cxgb_toe_disconnect, 842 .tu_reset = cxgb_toe_reset, 843 .tu_send = cxgb_toe_send, 844 .tu_rcvd = cxgb_toe_rcvd, 845 .tu_detach = cxgb_toe_detach, 846 .tu_detach = cxgb_toe_detach, 847 .tu_syncache_event = handle_syncache_event, 848}; 849 850 851static void 852__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word, 853 uint64_t mask, uint64_t val, int no_reply) 854{ 855 struct cpl_set_tcb_field *req; 856 857 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 858 toep->tp_tid, word, mask, val); 859 860 req = mtod(m, struct cpl_set_tcb_field *); 861 m->m_pkthdr.len = m->m_len = sizeof(*req); 862 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 863 req->wr.wr_lo = 0; 864 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid)); 865 req->reply = V_NO_REPLY(no_reply); 866 req->cpu_idx = 0; 867 req->word = htons(word); 868 req->mask = htobe64(mask); 869 req->val = htobe64(val); 870 871 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 872 send_or_defer(toep, m, 0); 873} 874 875static void 876t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val) 877{ 878 struct mbuf *m; 879 struct tcpcb *tp = toep->tp_tp; 880 881 if (toep == NULL) 882 return; 883 884 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) { 885 printf("not seting field\n"); 886 return; 887 } 888 889 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field)); 890 891 __set_tcb_field(toep, m, word, mask, val, 1); 892} 893 894/* 895 * Set one of the t_flags bits in the TCB. 896 */ 897static void 898set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val) 899{ 900 901 t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); 902} 903 904/* 905 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting. 906 */ 907static void 908t3_set_nagle(struct toepcb *toep) 909{ 910 struct tcpcb *tp = toep->tp_tp; 911 912 set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); 913} 914 915/* 916 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting. 917 */ 918void 919t3_set_keepalive(struct toepcb *toep, int on_off) 920{ 921 922 set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off); 923} 924 925void 926t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off) 927{ 928 set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off); 929} 930 931void 932t3_set_dack_mss(struct toepcb *toep, int on_off) 933{ 934 935 set_tcb_tflag(toep, S_TF_DACK_MSS, on_off); 936} 937 938/* 939 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting. 940 */ 941static void 942t3_set_tos(struct toepcb *toep) 943{ 944 int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb); 945 946 t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), 947 V_TCB_TOS(tos)); 948} 949 950 951/* 952 * In DDP mode, TP fails to schedule a timer to push RX data to the host when 953 * DDP is disabled (data is delivered to freelist). [Note that, the peer should 954 * set the PSH bit in the last segment, which would trigger delivery.] 955 * We work around the issue by setting a DDP buffer in a partial placed state, 956 * which guarantees that TP will schedule a timer. 957 */ 958#define TP_DDP_TIMER_WORKAROUND_MASK\ 959 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\ 960 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\ 961 V_TCB_RX_DDP_BUF0_LEN(3)) << 32)) 962#define TP_DDP_TIMER_WORKAROUND_VAL\ 963 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\ 964 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\ 965 32)) 966 967static void 968t3_enable_ddp(struct toepcb *toep, int on) 969{ 970 if (on) { 971 972 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), 973 V_TF_DDP_OFF(0)); 974 } else 975 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, 976 V_TF_DDP_OFF(1) | 977 TP_DDP_TIMER_WORKAROUND_MASK, 978 V_TF_DDP_OFF(1) | 979 TP_DDP_TIMER_WORKAROUND_VAL); 980 981} 982 983void 984t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color) 985{ 986 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx, 987 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), 988 tag_color); 989} 990 991void 992t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset, 993 unsigned int len) 994{ 995 if (buf_idx == 0) 996 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET, 997 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 998 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 999 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) | 1000 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 1001 else 1002 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET, 1003 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 1004 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32), 1005 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) | 1006 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32)); 1007} 1008 1009static int 1010t3_set_cong_control(struct socket *so, const char *name) 1011{ 1012#ifdef CONGESTION_CONTROL_SUPPORTED 1013 int cong_algo; 1014 1015 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++) 1016 if (!strcmp(name, t3_cong_ops[cong_algo].name)) 1017 break; 1018 1019 if (cong_algo >= ARRAY_SIZE(t3_cong_ops)) 1020 return -EINVAL; 1021#endif 1022 return 0; 1023} 1024 1025int 1026t3_get_tcb(struct toepcb *toep) 1027{ 1028 struct cpl_get_tcb *req; 1029 struct tcpcb *tp = toep->tp_tp; 1030 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 1031 1032 if (!m) 1033 return (ENOMEM); 1034 1035 inp_lock_assert(tp->t_inpcb); 1036 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 1037 req = mtod(m, struct cpl_get_tcb *); 1038 m->m_pkthdr.len = m->m_len = sizeof(*req); 1039 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1040 req->wr.wr_lo = 0; 1041 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid)); 1042 req->cpuno = htons(toep->tp_qset); 1043 req->rsvd = 0; 1044 if (tp->t_state == TCPS_SYN_SENT) 1045 mbufq_tail(&toep->out_of_order_queue, m); // defer 1046 else 1047 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 1048 return 0; 1049} 1050 1051static inline void 1052so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid) 1053{ 1054 1055 toepcb_hold(toep); 1056 1057 cxgb_insert_tid(d->cdev, d->client, toep, tid); 1058} 1059 1060/** 1061 * find_best_mtu - find the entry in the MTU table closest to an MTU 1062 * @d: TOM state 1063 * @mtu: the target MTU 1064 * 1065 * Returns the index of the value in the MTU table that is closest to but 1066 * does not exceed the target MTU. 1067 */ 1068static unsigned int 1069find_best_mtu(const struct t3c_data *d, unsigned short mtu) 1070{ 1071 int i = 0; 1072 1073 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) 1074 ++i; 1075 return (i); 1076} 1077 1078static unsigned int 1079select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu) 1080{ 1081 unsigned int idx; 1082 1083#ifdef notyet 1084 struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt; 1085#endif 1086 if (tp) { 1087 tp->t_maxseg = pmtu - 40; 1088 if (tp->t_maxseg < td->mtus[0] - 40) 1089 tp->t_maxseg = td->mtus[0] - 40; 1090 idx = find_best_mtu(td, tp->t_maxseg + 40); 1091 1092 tp->t_maxseg = td->mtus[idx] - 40; 1093 } else 1094 idx = find_best_mtu(td, pmtu); 1095 1096 return (idx); 1097} 1098 1099static inline void 1100free_atid(struct t3cdev *cdev, unsigned int tid) 1101{ 1102 struct toepcb *toep = cxgb_free_atid(cdev, tid); 1103 1104 if (toep) 1105 toepcb_release(toep); 1106} 1107 1108/* 1109 * Release resources held by an offload connection (TID, L2T entry, etc.) 1110 */ 1111static void 1112t3_release_offload_resources(struct toepcb *toep) 1113{ 1114 struct tcpcb *tp = toep->tp_tp; 1115 struct toedev *tdev = toep->tp_toedev; 1116 struct t3cdev *cdev; 1117 struct socket *so; 1118 unsigned int tid = toep->tp_tid; 1119 struct sockbuf *rcv; 1120 1121 CTR0(KTR_TOM, "t3_release_offload_resources"); 1122 1123 if (!tdev) 1124 return; 1125 1126 cdev = TOEP_T3C_DEV(toep); 1127 if (!cdev) 1128 return; 1129 1130 toep->tp_qset = 0; 1131 t3_release_ddp_resources(toep); 1132 1133#ifdef CTRL_SKB_CACHE 1134 kfree_skb(CTRL_SKB_CACHE(tp)); 1135 CTRL_SKB_CACHE(tp) = NULL; 1136#endif 1137 1138 if (toep->tp_wr_avail != toep->tp_wr_max) { 1139 purge_wr_queue(toep); 1140 reset_wr_list(toep); 1141 } 1142 1143 if (toep->tp_l2t) { 1144 l2t_release(L2DATA(cdev), toep->tp_l2t); 1145 toep->tp_l2t = NULL; 1146 } 1147 toep->tp_tp = NULL; 1148 if (tp) { 1149 inp_lock_assert(tp->t_inpcb); 1150 so = inp_inpcbtosocket(tp->t_inpcb); 1151 rcv = so_sockbuf_rcv(so); 1152 /* 1153 * cancel any offloaded reads 1154 * 1155 */ 1156 sockbuf_lock(rcv); 1157 tp->t_toe = NULL; 1158 tp->t_flags &= ~TF_TOE; 1159 if (toep->tp_ddp_state.user_ddp_pending) { 1160 t3_cancel_ubuf(toep, rcv); 1161 toep->tp_ddp_state.user_ddp_pending = 0; 1162 } 1163 so_sorwakeup_locked(so); 1164 1165 } 1166 1167 if (toep->tp_state == TCPS_SYN_SENT) { 1168 free_atid(cdev, tid); 1169#ifdef notyet 1170 __skb_queue_purge(&tp->out_of_order_queue); 1171#endif 1172 } else { // we have TID 1173 cxgb_remove_tid(cdev, toep, tid); 1174 toepcb_release(toep); 1175 } 1176#if 0 1177 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state); 1178#endif 1179} 1180 1181static void 1182install_offload_ops(struct socket *so) 1183{ 1184 struct tcpcb *tp = so_sototcpcb(so); 1185 1186 KASSERT(tp->t_toe != NULL, ("toepcb not set")); 1187 1188 t3_install_socket_ops(so); 1189 tp->t_flags |= TF_TOE; 1190 tp->t_tu = &cxgb_toe_usrreqs; 1191} 1192 1193/* 1194 * Determine the receive window scaling factor given a target max 1195 * receive window. 1196 */ 1197static __inline int 1198select_rcv_wscale(int space) 1199{ 1200 int wscale = 0; 1201 1202 if (space > MAX_RCV_WND) 1203 space = MAX_RCV_WND; 1204 1205 if (tcp_do_rfc1323) 1206 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ; 1207 1208 return (wscale); 1209} 1210 1211/* 1212 * Determine the receive window size for a socket. 1213 */ 1214static unsigned long 1215select_rcv_wnd(struct toedev *dev, struct socket *so) 1216{ 1217 struct tom_data *d = TOM_DATA(dev); 1218 unsigned int wnd; 1219 unsigned int max_rcv_wnd; 1220 struct sockbuf *rcv; 1221 1222 rcv = so_sockbuf_rcv(so); 1223 1224 if (tcp_do_autorcvbuf) 1225 wnd = tcp_autorcvbuf_max; 1226 else 1227 wnd = rcv->sb_hiwat; 1228 1229 1230 1231 /* XXX 1232 * For receive coalescing to work effectively we need a receive window 1233 * that can accomodate a coalesced segment. 1234 */ 1235 if (wnd < MIN_RCV_WND) 1236 wnd = MIN_RCV_WND; 1237 1238 /* PR 5138 */ 1239 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ? 1240 (uint32_t)d->rx_page_size * 23 : 1241 MAX_RCV_WND); 1242 1243 return min(wnd, max_rcv_wnd); 1244} 1245 1246/* 1247 * Assign offload parameters to some socket fields. This code is used by 1248 * both active and passive opens. 1249 */ 1250static inline void 1251init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, 1252 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep) 1253{ 1254 struct tcpcb *tp = so_sototcpcb(so); 1255 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev); 1256 struct sockbuf *snd, *rcv; 1257 1258#ifdef notyet 1259 SOCK_LOCK_ASSERT(so); 1260#endif 1261 1262 snd = so_sockbuf_snd(so); 1263 rcv = so_sockbuf_rcv(so); 1264 1265 log(LOG_INFO, "initializing offload socket\n"); 1266 /* 1267 * We either need to fix push frames to work with sbcompress 1268 * or we need to add this 1269 */ 1270 snd->sb_flags |= SB_NOCOALESCE; 1271 rcv->sb_flags |= SB_NOCOALESCE; 1272 1273 tp->t_toe = toep; 1274 toep->tp_tp = tp; 1275 toep->tp_toedev = dev; 1276 1277 toep->tp_tid = tid; 1278 toep->tp_l2t = e; 1279 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs); 1280 toep->tp_wr_unacked = 0; 1281 toep->tp_delack_mode = 0; 1282 1283 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu); 1284 /* 1285 * XXX broken 1286 * 1287 */ 1288 tp->rcv_wnd = select_rcv_wnd(dev, so); 1289 1290 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) && 1291 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 1292 toep->tp_qset_idx = 0; 1293 1294 reset_wr_list(toep); 1295 DPRINTF("initialization done\n"); 1296} 1297 1298/* 1299 * The next two functions calculate the option 0 value for a socket. 1300 */ 1301static inline unsigned int 1302calc_opt0h(struct socket *so, int mtu_idx) 1303{ 1304 struct tcpcb *tp = so_sototcpcb(so); 1305 int wscale = select_rcv_wscale(tp->rcv_wnd); 1306 1307 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) | 1308 V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS | 1309 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx); 1310} 1311 1312static inline unsigned int 1313calc_opt0l(struct socket *so, int ulp_mode) 1314{ 1315 struct tcpcb *tp = so_sototcpcb(so); 1316 unsigned int val; 1317 1318 val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) | 1319 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ)); 1320 1321 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val); 1322 return (val); 1323} 1324 1325static inline unsigned int 1326calc_opt2(const struct socket *so, struct toedev *dev) 1327{ 1328 int flv_valid; 1329 1330 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1); 1331 1332 return (V_FLAVORS_VALID(flv_valid) | 1333 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0)); 1334} 1335 1336#if DEBUG_WR > 1 1337static int 1338count_pending_wrs(const struct toepcb *toep) 1339{ 1340 const struct mbuf *m; 1341 int n = 0; 1342 1343 wr_queue_walk(toep, m) 1344 n += m->m_pkthdr.csum_data; 1345 return (n); 1346} 1347#endif 1348 1349#if 0 1350(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1) 1351#endif 1352 1353static void 1354mk_act_open_req(struct socket *so, struct mbuf *m, 1355 unsigned int atid, const struct l2t_entry *e) 1356{ 1357 struct cpl_act_open_req *req; 1358 struct inpcb *inp = so_sotoinpcb(so); 1359 struct tcpcb *tp = inp_inpcbtotcpcb(inp); 1360 struct toepcb *toep = tp->t_toe; 1361 struct toedev *tdev = toep->tp_toedev; 1362 1363 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep)); 1364 1365 req = mtod(m, struct cpl_act_open_req *); 1366 m->m_pkthdr.len = m->m_len = sizeof(*req); 1367 1368 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1369 req->wr.wr_lo = 0; 1370 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 1371 inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port); 1372#if 0 1373 req->local_port = inp->inp_lport; 1374 req->peer_port = inp->inp_fport; 1375 memcpy(&req->local_ip, &inp->inp_laddr, 4); 1376 memcpy(&req->peer_ip, &inp->inp_faddr, 4); 1377#endif 1378 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) | 1379 V_TX_CHANNEL(e->smt_idx)); 1380 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode)); 1381 req->params = 0; 1382 req->opt2 = htonl(calc_opt2(so, tdev)); 1383} 1384 1385 1386/* 1387 * Convert an ACT_OPEN_RPL status to an errno. 1388 */ 1389static int 1390act_open_rpl_status_to_errno(int status) 1391{ 1392 switch (status) { 1393 case CPL_ERR_CONN_RESET: 1394 return (ECONNREFUSED); 1395 case CPL_ERR_ARP_MISS: 1396 return (EHOSTUNREACH); 1397 case CPL_ERR_CONN_TIMEDOUT: 1398 return (ETIMEDOUT); 1399 case CPL_ERR_TCAM_FULL: 1400 return (ENOMEM); 1401 case CPL_ERR_CONN_EXIST: 1402 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); 1403 return (EADDRINUSE); 1404 default: 1405 return (EIO); 1406 } 1407} 1408 1409static void 1410fail_act_open(struct toepcb *toep, int errno) 1411{ 1412 struct tcpcb *tp = toep->tp_tp; 1413 1414 t3_release_offload_resources(toep); 1415 if (tp) { 1416 inp_wunlock(tp->t_inpcb); 1417 tcp_offload_drop(tp, errno); 1418 } 1419 1420#ifdef notyet 1421 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1422#endif 1423} 1424 1425/* 1426 * Handle active open failures. 1427 */ 1428static void 1429active_open_failed(struct toepcb *toep, struct mbuf *m) 1430{ 1431 struct cpl_act_open_rpl *rpl = cplhdr(m); 1432 struct inpcb *inp; 1433 1434 if (toep->tp_tp == NULL) 1435 goto done; 1436 1437 inp = toep->tp_tp->t_inpcb; 1438 1439/* 1440 * Don't handle connection retry for now 1441 */ 1442#ifdef notyet 1443 struct inet_connection_sock *icsk = inet_csk(sk); 1444 1445 if (rpl->status == CPL_ERR_CONN_EXIST && 1446 icsk->icsk_retransmit_timer.function != act_open_retry_timer) { 1447 icsk->icsk_retransmit_timer.function = act_open_retry_timer; 1448 sk_reset_timer(so, &icsk->icsk_retransmit_timer, 1449 jiffies + HZ / 2); 1450 } else 1451#endif 1452 { 1453 inp_wlock(inp); 1454 /* 1455 * drops the inpcb lock 1456 */ 1457 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status)); 1458 } 1459 1460 done: 1461 m_free(m); 1462} 1463 1464/* 1465 * Return whether a failed active open has allocated a TID 1466 */ 1467static inline int 1468act_open_has_tid(int status) 1469{ 1470 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && 1471 status != CPL_ERR_ARP_MISS; 1472} 1473 1474/* 1475 * Process an ACT_OPEN_RPL CPL message. 1476 */ 1477static int 1478do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1479{ 1480 struct toepcb *toep = (struct toepcb *)ctx; 1481 struct cpl_act_open_rpl *rpl = cplhdr(m); 1482 1483 if (cdev->type != T3A && act_open_has_tid(rpl->status)) 1484 cxgb_queue_tid_release(cdev, GET_TID(rpl)); 1485 1486 active_open_failed(toep, m); 1487 return (0); 1488} 1489 1490/* 1491 * Handle an ARP failure for an active open. XXX purge ofo queue 1492 * 1493 * XXX badly broken for crossed SYNs as the ATID is no longer valid. 1494 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should 1495 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't 1496 * free the atid. Hmm. 1497 */ 1498#ifdef notyet 1499static void 1500act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) 1501{ 1502 struct toepcb *toep = m_get_toep(m); 1503 struct tcpcb *tp = toep->tp_tp; 1504 struct inpcb *inp = tp->t_inpcb; 1505 struct socket *so; 1506 1507 inp_wlock(inp); 1508 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 1509 /* 1510 * drops the inpcb lock 1511 */ 1512 fail_act_open(so, EHOSTUNREACH); 1513 printf("freeing %p\n", m); 1514 1515 m_free(m); 1516 } else 1517 inp_wunlock(inp); 1518} 1519#endif 1520/* 1521 * Send an active open request. 1522 */ 1523int 1524t3_connect(struct toedev *tdev, struct socket *so, 1525 struct rtentry *rt, struct sockaddr *nam) 1526{ 1527 struct mbuf *m; 1528 struct l2t_entry *e; 1529 struct tom_data *d = TOM_DATA(tdev); 1530 struct inpcb *inp = so_sotoinpcb(so); 1531 struct tcpcb *tp = intotcpcb(inp); 1532 struct toepcb *toep; /* allocated by init_offload_socket */ 1533 1534 int atid; 1535 1536 toep = toepcb_alloc(); 1537 if (toep == NULL) 1538 goto out_err; 1539 1540 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0) 1541 goto out_err; 1542 1543 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam); 1544 if (!e) 1545 goto free_tid; 1546 1547 inp_lock_assert(inp); 1548 m = m_gethdr(MT_DATA, M_WAITOK); 1549 1550#if 0 1551 m->m_toe.mt_toepcb = tp->t_toe; 1552 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure); 1553#endif 1554 so_lock(so); 1555 1556 init_offload_socket(so, tdev, atid, e, rt, toep); 1557 1558 install_offload_ops(so); 1559 1560 mk_act_open_req(so, m, atid, e); 1561 so_unlock(so); 1562 1563 soisconnecting(so); 1564 toep = tp->t_toe; 1565 m_set_toep(m, tp->t_toe); 1566 1567 toep->tp_state = TCPS_SYN_SENT; 1568 l2t_send(d->cdev, (struct mbuf *)m, e); 1569 1570 if (toep->tp_ulp_mode) 1571 t3_enable_ddp(toep, 0); 1572 return (0); 1573 1574free_tid: 1575 printf("failing connect - free atid\n"); 1576 1577 free_atid(d->cdev, atid); 1578out_err: 1579 printf("return ENOMEM\n"); 1580 return (ENOMEM); 1581} 1582 1583/* 1584 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do 1585 * not send multiple ABORT_REQs for the same connection and also that we do 1586 * not try to send a message after the connection has closed. Returns 1 if 1587 * an ABORT_REQ wasn't generated after all, 0 otherwise. 1588 */ 1589static void 1590t3_send_reset(struct toepcb *toep) 1591{ 1592 1593 struct cpl_abort_req *req; 1594 unsigned int tid = toep->tp_tid; 1595 int mode = CPL_ABORT_SEND_RST; 1596 struct tcpcb *tp = toep->tp_tp; 1597 struct toedev *tdev = toep->tp_toedev; 1598 struct socket *so = NULL; 1599 struct mbuf *m; 1600 struct sockbuf *snd; 1601 1602 if (tp) { 1603 inp_lock_assert(tp->t_inpcb); 1604 so = inp_inpcbtosocket(tp->t_inpcb); 1605 } 1606 1607 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) || 1608 tdev == NULL)) 1609 return; 1610 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN); 1611 1612 snd = so_sockbuf_snd(so); 1613 /* Purge the send queue so we don't send anything after an abort. */ 1614 if (so) 1615 sbflush(snd); 1616 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev)) 1617 mode |= CPL_ABORT_POST_CLOSE_REQ; 1618 1619 m = m_gethdr_nofail(sizeof(*req)); 1620 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep)); 1621 set_arp_failure_handler(m, abort_arp_failure); 1622 1623 req = mtod(m, struct cpl_abort_req *); 1624 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); 1625 req->wr.wr_lo = htonl(V_WR_TID(tid)); 1626 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); 1627 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0; 1628 req->rsvd1 = !(toep->tp_flags & TP_DATASENT); 1629 req->cmd = mode; 1630 if (tp && (tp->t_state == TCPS_SYN_SENT)) 1631 mbufq_tail(&toep->out_of_order_queue, m); // defer 1632 else 1633 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); 1634} 1635 1636static int 1637t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) 1638{ 1639 struct inpcb *inp; 1640 int error, optval; 1641 1642 if (sopt->sopt_name == IP_OPTIONS) 1643 return (ENOPROTOOPT); 1644 1645 if (sopt->sopt_name != IP_TOS) 1646 return (EOPNOTSUPP); 1647 1648 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 1649 1650 if (error) 1651 return (error); 1652 1653 if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread)) 1654 return (EPERM); 1655 1656 inp = so_sotoinpcb(so); 1657 inp_wlock(inp); 1658 inp_ip_tos_set(inp, optval); 1659#if 0 1660 inp->inp_ip_tos = optval; 1661#endif 1662 t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe); 1663 inp_wunlock(inp); 1664 1665 return (0); 1666} 1667 1668static int 1669t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1670{ 1671 int err = 0; 1672 size_t copied; 1673 1674 if (sopt->sopt_name != TCP_CONGESTION && 1675 sopt->sopt_name != TCP_NODELAY) 1676 return (EOPNOTSUPP); 1677 1678 if (sopt->sopt_name == TCP_CONGESTION) { 1679 char name[TCP_CA_NAME_MAX]; 1680 int optlen = sopt->sopt_valsize; 1681 struct tcpcb *tp; 1682 1683 if (optlen < 1) 1684 return (EINVAL); 1685 1686 err = copyinstr(sopt->sopt_val, name, 1687 min(TCP_CA_NAME_MAX - 1, optlen), &copied); 1688 if (err) 1689 return (err); 1690 if (copied < 1) 1691 return (EINVAL); 1692 1693 tp = so_sototcpcb(so); 1694 /* 1695 * XXX I need to revisit this 1696 */ 1697 if ((err = t3_set_cong_control(so, name)) == 0) { 1698#ifdef CONGESTION_CONTROL_SUPPORTED 1699 tp->t_cong_control = strdup(name, M_CXGB); 1700#endif 1701 } else 1702 return (err); 1703 } else { 1704 int optval, oldval; 1705 struct inpcb *inp; 1706 struct tcpcb *tp; 1707 1708 err = sooptcopyin(sopt, &optval, sizeof optval, 1709 sizeof optval); 1710 1711 if (err) 1712 return (err); 1713 1714 inp = so_sotoinpcb(so); 1715 tp = inp_inpcbtotcpcb(inp); 1716 1717 inp_wlock(inp); 1718 1719 oldval = tp->t_flags; 1720 if (optval) 1721 tp->t_flags |= TF_NODELAY; 1722 else 1723 tp->t_flags &= ~TF_NODELAY; 1724 inp_wunlock(inp); 1725 1726 1727 if (oldval != tp->t_flags && (tp->t_toe != NULL)) 1728 t3_set_nagle(tp->t_toe); 1729 1730 } 1731 1732 return (0); 1733} 1734 1735int 1736t3_ctloutput(struct socket *so, struct sockopt *sopt) 1737{ 1738 int err; 1739 1740 if (sopt->sopt_level != IPPROTO_TCP) 1741 err = t3_ip_ctloutput(so, sopt); 1742 else 1743 err = t3_tcp_ctloutput(so, sopt); 1744 1745 if (err != EOPNOTSUPP) 1746 return (err); 1747 1748 return (tcp_ctloutput(so, sopt)); 1749} 1750 1751/* 1752 * Returns true if we need to explicitly request RST when we receive new data 1753 * on an RX-closed connection. 1754 */ 1755static inline int 1756need_rst_on_excess_rx(const struct toepcb *toep) 1757{ 1758 return (1); 1759} 1760 1761/* 1762 * Handles Rx data that arrives in a state where the socket isn't accepting 1763 * new data. 1764 */ 1765static void 1766handle_excess_rx(struct toepcb *toep, struct mbuf *m) 1767{ 1768 1769 if (need_rst_on_excess_rx(toep) && 1770 !(toep->tp_flags & TP_ABORT_SHUTDOWN)) 1771 t3_send_reset(toep); 1772 m_freem(m); 1773} 1774 1775/* 1776 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE) 1777 * by getting the DDP offset from the TCB. 1778 */ 1779static void 1780tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) 1781{ 1782 struct ddp_state *q = &toep->tp_ddp_state; 1783 struct ddp_buf_state *bsp; 1784 struct cpl_get_tcb_rpl *hdr; 1785 unsigned int ddp_offset; 1786 struct socket *so; 1787 struct tcpcb *tp; 1788 struct sockbuf *rcv; 1789 int state; 1790 1791 uint64_t t; 1792 __be64 *tcb; 1793 1794 tp = toep->tp_tp; 1795 so = inp_inpcbtosocket(tp->t_inpcb); 1796 1797 inp_lock_assert(tp->t_inpcb); 1798 rcv = so_sockbuf_rcv(so); 1799 sockbuf_lock(rcv); 1800 1801 /* Note that we only accout for CPL_GET_TCB issued by the DDP code. 1802 * We really need a cookie in order to dispatch the RPLs. 1803 */ 1804 q->get_tcb_count--; 1805 1806 /* It is a possible that a previous CPL already invalidated UBUF DDP 1807 * and moved the cur_buf idx and hence no further processing of this 1808 * skb is required. However, the app might be sleeping on 1809 * !q->get_tcb_count and we need to wake it up. 1810 */ 1811 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) { 1812 int state = so_state_get(so); 1813 1814 m_freem(m); 1815 if (__predict_true((state & SS_NOFDREF) == 0)) 1816 so_sorwakeup_locked(so); 1817 else 1818 sockbuf_unlock(rcv); 1819 1820 return; 1821 } 1822 1823 bsp = &q->buf_state[q->cur_buf]; 1824 hdr = cplhdr(m); 1825 tcb = (__be64 *)(hdr + 1); 1826 if (q->cur_buf == 0) { 1827 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]); 1828 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET); 1829 } else { 1830 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]); 1831 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET; 1832 } 1833 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET; 1834 m->m_cur_offset = bsp->cur_offset; 1835 bsp->cur_offset = ddp_offset; 1836 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset; 1837 1838 CTR5(KTR_TOM, 1839 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u", 1840 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset); 1841 KASSERT(ddp_offset >= m->m_cur_offset, 1842 ("ddp_offset=%u less than cur_offset=%u", 1843 ddp_offset, m->m_cur_offset)); 1844 1845#if 0 1846{ 1847 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx; 1848 1849 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]); 1850 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS; 1851 1852 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]); 1853 rcv_nxt = t >> S_TCB_RCV_NXT; 1854 rcv_nxt &= M_TCB_RCV_NXT; 1855 1856 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]); 1857 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET); 1858 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET; 1859 1860 T3_TRACE2(TIDTB(sk), 1861 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x", 1862 ddp_flags, rcv_nxt - rx_hdr_offset); 1863 T3_TRACE4(TB(q), 1864 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u", 1865 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf); 1866 T3_TRACE3(TB(q), 1867 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u", 1868 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset); 1869 T3_TRACE2(TB(q), 1870 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x", 1871 q->buf_state[0].flags, q->buf_state[1].flags); 1872 1873} 1874#endif 1875 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) { 1876 handle_excess_rx(toep, m); 1877 return; 1878 } 1879 1880#ifdef T3_TRACE 1881 if ((int)m->m_pkthdr.len < 0) { 1882 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len"); 1883 } 1884#endif 1885 if (bsp->flags & DDP_BF_NOCOPY) { 1886#ifdef T3_TRACE 1887 T3_TRACE0(TB(q), 1888 "tcb_rpl_as_ddp_complete: CANCEL UBUF"); 1889 1890 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) { 1891 printk("!cancel_ubuf"); 1892 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf"); 1893 } 1894#endif 1895 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1; 1896 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA); 1897 q->cur_buf ^= 1; 1898 } else if (bsp->flags & DDP_BF_NOFLIP) { 1899 1900 m->m_ddp_flags = 1; /* always a kernel buffer */ 1901 1902 /* now HW buffer carries a user buffer */ 1903 bsp->flags &= ~DDP_BF_NOFLIP; 1904 bsp->flags |= DDP_BF_NOCOPY; 1905 1906 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate 1907 * any new data in which case we're done. If in addition the 1908 * offset is 0, then there wasn't a completion for the kbuf 1909 * and we need to decrement the posted count. 1910 */ 1911 if (m->m_pkthdr.len == 0) { 1912 if (ddp_offset == 0) { 1913 q->kbuf_posted--; 1914 bsp->flags |= DDP_BF_NODATA; 1915 } 1916 sockbuf_unlock(rcv); 1917 m_free(m); 1918 return; 1919 } 1920 } else { 1921 sockbuf_unlock(rcv); 1922 1923 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP, 1924 * but it got here way late and nobody cares anymore. 1925 */ 1926 m_free(m); 1927 return; 1928 } 1929 1930 m->m_ddp_gl = (unsigned char *)bsp->gl; 1931 m->m_flags |= M_DDP; 1932 m->m_seq = tp->rcv_nxt; 1933 tp->rcv_nxt += m->m_pkthdr.len; 1934 tp->t_rcvtime = ticks; 1935 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u", 1936 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1937 if (m->m_pkthdr.len == 0) { 1938 q->user_ddp_pending = 0; 1939 m_free(m); 1940 } else 1941 SBAPPEND(rcv, m); 1942 1943 state = so_state_get(so); 1944 if (__predict_true((state & SS_NOFDREF) == 0)) 1945 so_sorwakeup_locked(so); 1946 else 1947 sockbuf_unlock(rcv); 1948} 1949 1950/* 1951 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code, 1952 * in that case they are similar to DDP completions. 1953 */ 1954static int 1955do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1956{ 1957 struct toepcb *toep = (struct toepcb *)ctx; 1958 1959 /* OK if socket doesn't exist */ 1960 if (toep == NULL) { 1961 printf("null toep in do_get_tcb_rpl\n"); 1962 return (CPL_RET_BUF_DONE); 1963 } 1964 1965 inp_wlock(toep->tp_tp->t_inpcb); 1966 tcb_rpl_as_ddp_complete(toep, m); 1967 inp_wunlock(toep->tp_tp->t_inpcb); 1968 1969 return (0); 1970} 1971 1972static void 1973handle_ddp_data(struct toepcb *toep, struct mbuf *m) 1974{ 1975 struct tcpcb *tp = toep->tp_tp; 1976 struct socket *so; 1977 struct ddp_state *q; 1978 struct ddp_buf_state *bsp; 1979 struct cpl_rx_data *hdr = cplhdr(m); 1980 unsigned int rcv_nxt = ntohl(hdr->seq); 1981 struct sockbuf *rcv; 1982 1983 if (tp->rcv_nxt == rcv_nxt) 1984 return; 1985 1986 inp_lock_assert(tp->t_inpcb); 1987 so = inp_inpcbtosocket(tp->t_inpcb); 1988 rcv = so_sockbuf_rcv(so); 1989 sockbuf_lock(rcv); 1990 1991 q = &toep->tp_ddp_state; 1992 bsp = &q->buf_state[q->cur_buf]; 1993 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x", 1994 rcv_nxt, tp->rcv_nxt)); 1995 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 1996 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 1997 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d", 1998 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len); 1999 2000#ifdef T3_TRACE 2001 if ((int)m->m_pkthdr.len < 0) { 2002 t3_ddp_error(so, "handle_ddp_data: neg len"); 2003 } 2004#endif 2005 m->m_ddp_gl = (unsigned char *)bsp->gl; 2006 m->m_flags |= M_DDP; 2007 m->m_cur_offset = bsp->cur_offset; 2008 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2009 if (bsp->flags & DDP_BF_NOCOPY) 2010 bsp->flags &= ~DDP_BF_NOCOPY; 2011 2012 m->m_seq = tp->rcv_nxt; 2013 tp->rcv_nxt = rcv_nxt; 2014 bsp->cur_offset += m->m_pkthdr.len; 2015 if (!(bsp->flags & DDP_BF_NOFLIP)) 2016 q->cur_buf ^= 1; 2017 /* 2018 * For now, don't re-enable DDP after a connection fell out of DDP 2019 * mode. 2020 */ 2021 q->ubuf_ddp_ready = 0; 2022 sockbuf_unlock(rcv); 2023} 2024 2025/* 2026 * Process new data received for a connection. 2027 */ 2028static void 2029new_rx_data(struct toepcb *toep, struct mbuf *m) 2030{ 2031 struct cpl_rx_data *hdr = cplhdr(m); 2032 struct tcpcb *tp = toep->tp_tp; 2033 struct socket *so; 2034 struct sockbuf *rcv; 2035 int state; 2036 int len = be16toh(hdr->len); 2037 2038 inp_wlock(tp->t_inpcb); 2039 2040 so = inp_inpcbtosocket(tp->t_inpcb); 2041 2042 if (__predict_false(so_no_receive(so))) { 2043 handle_excess_rx(toep, m); 2044 inp_wunlock(tp->t_inpcb); 2045 TRACE_EXIT; 2046 return; 2047 } 2048 2049 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) 2050 handle_ddp_data(toep, m); 2051 2052 m->m_seq = ntohl(hdr->seq); 2053 m->m_ulp_mode = 0; /* for iSCSI */ 2054 2055#if VALIDATE_SEQ 2056 if (__predict_false(m->m_seq != tp->rcv_nxt)) { 2057 log(LOG_ERR, 2058 "%s: TID %u: Bad sequence number %u, expected %u\n", 2059 toep->tp_toedev->name, toep->tp_tid, m->m_seq, 2060 tp->rcv_nxt); 2061 m_freem(m); 2062 inp_wunlock(tp->t_inpcb); 2063 return; 2064 } 2065#endif 2066 m_adj(m, sizeof(*hdr)); 2067 2068#ifdef URGENT_DATA_SUPPORTED 2069 /* 2070 * We don't handle urgent data yet 2071 */ 2072 if (__predict_false(hdr->urg)) 2073 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg)); 2074 if (__predict_false(tp->urg_data == TCP_URG_NOTYET && 2075 tp->urg_seq - tp->rcv_nxt < skb->len)) 2076 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq - 2077 tp->rcv_nxt]; 2078#endif 2079 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) { 2080 toep->tp_delack_mode = hdr->dack_mode; 2081 toep->tp_delack_seq = tp->rcv_nxt; 2082 } 2083 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d", 2084 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes); 2085 2086 if (len < m->m_pkthdr.len) 2087 m->m_pkthdr.len = m->m_len = len; 2088 2089 tp->rcv_nxt += m->m_pkthdr.len; 2090 tp->t_rcvtime = ticks; 2091 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2092 CTR2(KTR_TOM, 2093 "new_rx_data: seq 0x%x len %u", 2094 m->m_seq, m->m_pkthdr.len); 2095 inp_wunlock(tp->t_inpcb); 2096 rcv = so_sockbuf_rcv(so); 2097 sockbuf_lock(rcv); 2098#if 0 2099 if (sb_notify(rcv)) 2100 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len); 2101#endif 2102 SBAPPEND(rcv, m); 2103 2104#ifdef notyet 2105 /* 2106 * We're giving too many credits to the card - but disable this check so we can keep on moving :-| 2107 * 2108 */ 2109 KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1), 2110 2111 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d", 2112 so, rcv->sb_cc, rcv->sb_mbmax)); 2113#endif 2114 2115 2116 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d", 2117 rcv->sb_cc, rcv->sb_mbcnt); 2118 2119 state = so_state_get(so); 2120 if (__predict_true((state & SS_NOFDREF) == 0)) 2121 so_sorwakeup_locked(so); 2122 else 2123 sockbuf_unlock(rcv); 2124} 2125 2126/* 2127 * Handler for RX_DATA CPL messages. 2128 */ 2129static int 2130do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2131{ 2132 struct toepcb *toep = (struct toepcb *)ctx; 2133 2134 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len); 2135 2136 new_rx_data(toep, m); 2137 2138 return (0); 2139} 2140 2141static void 2142new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) 2143{ 2144 struct tcpcb *tp; 2145 struct ddp_state *q; 2146 struct ddp_buf_state *bsp; 2147 struct cpl_rx_data_ddp *hdr; 2148 struct socket *so; 2149 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx; 2150 int nomoredata = 0; 2151 unsigned int delack_mode; 2152 struct sockbuf *rcv; 2153 2154 tp = toep->tp_tp; 2155 inp_wlock(tp->t_inpcb); 2156 so = inp_inpcbtosocket(tp->t_inpcb); 2157 2158 if (__predict_false(so_no_receive(so))) { 2159 2160 handle_excess_rx(toep, m); 2161 inp_wunlock(tp->t_inpcb); 2162 return; 2163 } 2164 2165 q = &toep->tp_ddp_state; 2166 hdr = cplhdr(m); 2167 ddp_report = ntohl(hdr->u.ddp_report); 2168 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2169 bsp = &q->buf_state[buf_idx]; 2170 2171 CTR4(KTR_TOM, 2172 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2173 "hdr seq 0x%x len %u", 2174 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2175 ntohs(hdr->len)); 2176 CTR3(KTR_TOM, 2177 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d", 2178 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx); 2179 2180 ddp_len = ntohs(hdr->len); 2181 rcv_nxt = ntohl(hdr->seq) + ddp_len; 2182 2183 delack_mode = G_DDP_DACK_MODE(ddp_report); 2184 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2185 toep->tp_delack_mode = delack_mode; 2186 toep->tp_delack_seq = tp->rcv_nxt; 2187 } 2188 2189 m->m_seq = tp->rcv_nxt; 2190 tp->rcv_nxt = rcv_nxt; 2191 2192 tp->t_rcvtime = ticks; 2193 /* 2194 * Store the length in m->m_len. We are changing the meaning of 2195 * m->m_len here, we need to be very careful that nothing from now on 2196 * interprets ->len of this packet the usual way. 2197 */ 2198 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq; 2199 inp_wunlock(tp->t_inpcb); 2200 CTR3(KTR_TOM, 2201 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ", 2202 m->m_len, rcv_nxt, m->m_seq); 2203 /* 2204 * Figure out where the new data was placed in the buffer and store it 2205 * in when. Assumes the buffer offset starts at 0, consumer needs to 2206 * account for page pod's pg_offset. 2207 */ 2208 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; 2209 m->m_cur_offset = end_offset - m->m_pkthdr.len; 2210 2211 rcv = so_sockbuf_rcv(so); 2212 sockbuf_lock(rcv); 2213 2214 m->m_ddp_gl = (unsigned char *)bsp->gl; 2215 m->m_flags |= M_DDP; 2216 bsp->cur_offset = end_offset; 2217 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2218 2219 /* 2220 * Length is only meaningful for kbuf 2221 */ 2222 if (!(bsp->flags & DDP_BF_NOCOPY)) 2223 KASSERT(m->m_len <= bsp->gl->dgl_length, 2224 ("length received exceeds ddp pages: len=%d dgl_length=%d", 2225 m->m_len, bsp->gl->dgl_length)); 2226 2227 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2228 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next)); 2229 /* 2230 * Bit 0 of flags stores whether the DDP buffer is completed. 2231 * Note that other parts of the code depend on this being in bit 0. 2232 */ 2233 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) { 2234 panic("spurious ddp completion"); 2235 } else { 2236 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); 2237 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) 2238 q->cur_buf ^= 1; /* flip buffers */ 2239 } 2240 2241 if (bsp->flags & DDP_BF_NOCOPY) { 2242 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY); 2243 bsp->flags &= ~DDP_BF_NOCOPY; 2244 } 2245 2246 if (ddp_report & F_DDP_PSH) 2247 m->m_ddp_flags |= DDP_BF_PSH; 2248 if (nomoredata) 2249 m->m_ddp_flags |= DDP_BF_NODATA; 2250 2251#ifdef notyet 2252 skb_reset_transport_header(skb); 2253 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */ 2254#endif 2255 SBAPPEND(rcv, m); 2256 2257 if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) || 2258 (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1)) 2259 || !(m->m_ddp_flags & DDP_BF_NOCOPY)))) 2260 so_sorwakeup_locked(so); 2261 else 2262 sockbuf_unlock(rcv); 2263} 2264 2265#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ 2266 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ 2267 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ 2268 F_DDP_INVALID_PPOD) 2269 2270/* 2271 * Handler for RX_DATA_DDP CPL messages. 2272 */ 2273static int 2274do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2275{ 2276 struct toepcb *toep = ctx; 2277 const struct cpl_rx_data_ddp *hdr = cplhdr(m); 2278 2279 VALIDATE_SOCK(so); 2280 2281 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) { 2282 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n", 2283 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status))); 2284 return (CPL_RET_BUF_DONE); 2285 } 2286#if 0 2287 skb->h.th = tcphdr_skb->h.th; 2288#endif 2289 new_rx_data_ddp(toep, m); 2290 return (0); 2291} 2292 2293static void 2294process_ddp_complete(struct toepcb *toep, struct mbuf *m) 2295{ 2296 struct tcpcb *tp = toep->tp_tp; 2297 struct socket *so; 2298 struct ddp_state *q; 2299 struct ddp_buf_state *bsp; 2300 struct cpl_rx_ddp_complete *hdr; 2301 unsigned int ddp_report, buf_idx, when, delack_mode; 2302 int nomoredata = 0; 2303 struct sockbuf *rcv; 2304 2305 inp_wlock(tp->t_inpcb); 2306 so = inp_inpcbtosocket(tp->t_inpcb); 2307 2308 if (__predict_false(so_no_receive(so))) { 2309 struct inpcb *inp = so_sotoinpcb(so); 2310 2311 handle_excess_rx(toep, m); 2312 inp_wunlock(inp); 2313 return; 2314 } 2315 q = &toep->tp_ddp_state; 2316 hdr = cplhdr(m); 2317 ddp_report = ntohl(hdr->ddp_report); 2318 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2319 m->m_pkthdr.csum_data = tp->rcv_nxt; 2320 2321 rcv = so_sockbuf_rcv(so); 2322 sockbuf_lock(rcv); 2323 2324 bsp = &q->buf_state[buf_idx]; 2325 when = bsp->cur_offset; 2326 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; 2327 tp->rcv_nxt += m->m_len; 2328 tp->t_rcvtime = ticks; 2329 2330 delack_mode = G_DDP_DACK_MODE(ddp_report); 2331 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2332 toep->tp_delack_mode = delack_mode; 2333 toep->tp_delack_seq = tp->rcv_nxt; 2334 } 2335#ifdef notyet 2336 skb_reset_transport_header(skb); 2337 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2338#endif 2339 inp_wunlock(tp->t_inpcb); 2340 2341 KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2342 CTR5(KTR_TOM, 2343 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2344 "ddp_report 0x%x offset %u, len %u", 2345 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2346 G_DDP_OFFSET(ddp_report), m->m_len); 2347 2348 m->m_cur_offset = bsp->cur_offset; 2349 bsp->cur_offset += m->m_len; 2350 2351 if (!(bsp->flags & DDP_BF_NOFLIP)) { 2352 q->cur_buf ^= 1; /* flip buffers */ 2353 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length) 2354 nomoredata=1; 2355 } 2356 2357 CTR4(KTR_TOM, 2358 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2359 "ddp_report %u offset %u", 2360 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2361 G_DDP_OFFSET(ddp_report)); 2362 2363 m->m_ddp_gl = (unsigned char *)bsp->gl; 2364 m->m_flags |= M_DDP; 2365 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; 2366 if (bsp->flags & DDP_BF_NOCOPY) 2367 bsp->flags &= ~DDP_BF_NOCOPY; 2368 if (nomoredata) 2369 m->m_ddp_flags |= DDP_BF_NODATA; 2370 2371 SBAPPEND(rcv, m); 2372 if ((so_state_get(so) & SS_NOFDREF) == 0) 2373 so_sorwakeup_locked(so); 2374 else 2375 sockbuf_unlock(rcv); 2376} 2377 2378/* 2379 * Handler for RX_DDP_COMPLETE CPL messages. 2380 */ 2381static int 2382do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2383{ 2384 struct toepcb *toep = ctx; 2385 2386 VALIDATE_SOCK(so); 2387#if 0 2388 skb->h.th = tcphdr_skb->h.th; 2389#endif 2390 process_ddp_complete(toep, m); 2391 return (0); 2392} 2393 2394/* 2395 * Move a socket to TIME_WAIT state. We need to make some adjustments to the 2396 * socket state before calling tcp_time_wait to comply with its expectations. 2397 */ 2398static void 2399enter_timewait(struct tcpcb *tp) 2400{ 2401 /* 2402 * Bump rcv_nxt for the peer FIN. We don't do this at the time we 2403 * process peer_close because we don't want to carry the peer FIN in 2404 * the socket's receive queue and if we increment rcv_nxt without 2405 * having the FIN in the receive queue we'll confuse facilities such 2406 * as SIOCINQ. 2407 */ 2408 inp_wlock(tp->t_inpcb); 2409 tp->rcv_nxt++; 2410 2411 tp->ts_recent_age = 0; /* defeat recycling */ 2412 tp->t_srtt = 0; /* defeat tcp_update_metrics */ 2413 inp_wunlock(tp->t_inpcb); 2414 tcp_offload_twstart(tp); 2415} 2416 2417static void 2418enter_timewait_disconnect(struct tcpcb *tp) 2419{ 2420 /* 2421 * Bump rcv_nxt for the peer FIN. We don't do this at the time we 2422 * process peer_close because we don't want to carry the peer FIN in 2423 * the socket's receive queue and if we increment rcv_nxt without 2424 * having the FIN in the receive queue we'll confuse facilities such 2425 * as SIOCINQ. 2426 */ 2427 inp_wlock(tp->t_inpcb); 2428 tp->rcv_nxt++; 2429 2430 tp->ts_recent_age = 0; /* defeat recycling */ 2431 tp->t_srtt = 0; /* defeat tcp_update_metrics */ 2432 inp_wunlock(tp->t_inpcb); 2433 tcp_offload_twstart_disconnect(tp); 2434} 2435 2436/* 2437 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This 2438 * function deals with the data that may be reported along with the FIN. 2439 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to 2440 * perform normal FIN-related processing. In the latter case 1 indicates that 2441 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the 2442 * skb can be freed. 2443 */ 2444static int 2445handle_peer_close_data(struct socket *so, struct mbuf *m) 2446{ 2447 struct tcpcb *tp = so_sototcpcb(so); 2448 struct toepcb *toep = tp->t_toe; 2449 struct ddp_state *q; 2450 struct ddp_buf_state *bsp; 2451 struct cpl_peer_close *req = cplhdr(m); 2452 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */ 2453 struct sockbuf *rcv; 2454 2455 if (tp->rcv_nxt == rcv_nxt) /* no data */ 2456 return (0); 2457 2458 CTR0(KTR_TOM, "handle_peer_close_data"); 2459 if (__predict_false(so_no_receive(so))) { 2460 handle_excess_rx(toep, m); 2461 2462 /* 2463 * Although we discard the data we want to process the FIN so 2464 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP + 2465 * PEER_CLOSE without data. In particular this PEER_CLOSE 2466 * may be what will close the connection. We return 1 because 2467 * handle_excess_rx() already freed the packet. 2468 */ 2469 return (1); 2470 } 2471 2472 inp_lock_assert(tp->t_inpcb); 2473 q = &toep->tp_ddp_state; 2474 rcv = so_sockbuf_rcv(so); 2475 sockbuf_lock(rcv); 2476 2477 bsp = &q->buf_state[q->cur_buf]; 2478 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2479 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2480 m->m_ddp_gl = (unsigned char *)bsp->gl; 2481 m->m_flags |= M_DDP; 2482 m->m_cur_offset = bsp->cur_offset; 2483 m->m_ddp_flags = 2484 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2485 m->m_seq = tp->rcv_nxt; 2486 tp->rcv_nxt = rcv_nxt; 2487 bsp->cur_offset += m->m_pkthdr.len; 2488 if (!(bsp->flags & DDP_BF_NOFLIP)) 2489 q->cur_buf ^= 1; 2490#ifdef notyet 2491 skb_reset_transport_header(skb); 2492 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2493#endif 2494 tp->t_rcvtime = ticks; 2495 SBAPPEND(rcv, m); 2496 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) 2497 so_sorwakeup_locked(so); 2498 else 2499 sockbuf_unlock(rcv); 2500 2501 return (1); 2502} 2503 2504/* 2505 * Handle a peer FIN. 2506 */ 2507static void 2508do_peer_fin(struct toepcb *toep, struct mbuf *m) 2509{ 2510 struct socket *so; 2511 struct tcpcb *tp = toep->tp_tp; 2512 int keep, action; 2513 2514 action = keep = 0; 2515 CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state); 2516 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2517 printf("abort_pending set\n"); 2518 2519 goto out; 2520 } 2521 inp_wlock(tp->t_inpcb); 2522 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 2523 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) { 2524 keep = handle_peer_close_data(so, m); 2525 if (keep < 0) { 2526 inp_wunlock(tp->t_inpcb); 2527 return; 2528 } 2529 } 2530 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 2531 CTR1(KTR_TOM, 2532 "waking up waiters for cantrcvmore on %p ", so); 2533 socantrcvmore(so); 2534 2535 /* 2536 * If connection is half-synchronized 2537 * (ie NEEDSYN flag on) then delay ACK, 2538 * so it may be piggybacked when SYN is sent. 2539 * Otherwise, since we received a FIN then no 2540 * more input can be expected, send ACK now. 2541 */ 2542 if (tp->t_flags & TF_NEEDSYN) 2543 tp->t_flags |= TF_DELACK; 2544 else 2545 tp->t_flags |= TF_ACKNOW; 2546 tp->rcv_nxt++; 2547 } 2548 2549 switch (tp->t_state) { 2550 case TCPS_SYN_RECEIVED: 2551 tp->t_starttime = ticks; 2552 /* FALLTHROUGH */ 2553 case TCPS_ESTABLISHED: 2554 tp->t_state = TCPS_CLOSE_WAIT; 2555 break; 2556 case TCPS_FIN_WAIT_1: 2557 tp->t_state = TCPS_CLOSING; 2558 break; 2559 case TCPS_FIN_WAIT_2: 2560 /* 2561 * If we've sent an abort_req we must have sent it too late, 2562 * HW will send us a reply telling us so, and this peer_close 2563 * is really the last message for this connection and needs to 2564 * be treated as an abort_rpl, i.e., transition the connection 2565 * to TCP_CLOSE (note that the host stack does this at the 2566 * time of generating the RST but we must wait for HW). 2567 * Otherwise we enter TIME_WAIT. 2568 */ 2569 t3_release_offload_resources(toep); 2570 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2571 action = TCP_CLOSE; 2572 } else { 2573 action = TCP_TIMEWAIT; 2574 } 2575 break; 2576 default: 2577 log(LOG_ERR, 2578 "%s: TID %u received PEER_CLOSE in bad state %d\n", 2579 toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state); 2580 } 2581 inp_wunlock(tp->t_inpcb); 2582 2583 if (action == TCP_TIMEWAIT) { 2584 enter_timewait(tp); 2585 } else if (action == TCP_DROP) { 2586 tcp_offload_drop(tp, 0); 2587 } else if (action == TCP_CLOSE) { 2588 tcp_offload_close(tp); 2589 } 2590 2591#ifdef notyet 2592 /* Do not send POLL_HUP for half duplex close. */ 2593 if ((sk->sk_shutdown & SEND_SHUTDOWN) || 2594 sk->sk_state == TCP_CLOSE) 2595 sk_wake_async(so, 1, POLL_HUP); 2596 else 2597 sk_wake_async(so, 1, POLL_IN); 2598#endif 2599 2600out: 2601 if (!keep) 2602 m_free(m); 2603} 2604 2605/* 2606 * Handler for PEER_CLOSE CPL messages. 2607 */ 2608static int 2609do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2610{ 2611 struct toepcb *toep = (struct toepcb *)ctx; 2612 2613 VALIDATE_SOCK(so); 2614 2615 do_peer_fin(toep, m); 2616 return (0); 2617} 2618 2619static void 2620process_close_con_rpl(struct toepcb *toep, struct mbuf *m) 2621{ 2622 struct cpl_close_con_rpl *rpl = cplhdr(m); 2623 struct tcpcb *tp = toep->tp_tp; 2624 struct socket *so; 2625 int action = 0; 2626 struct sockbuf *rcv; 2627 2628 inp_wlock(tp->t_inpcb); 2629 so = inp_inpcbtosocket(tp->t_inpcb); 2630 2631 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ 2632 2633 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2634 inp_wunlock(tp->t_inpcb); 2635 goto out; 2636 } 2637 2638 CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep, 2639 tp->t_state, !!(so_state_get(so) & SS_NOFDREF)); 2640 2641 switch (tp->t_state) { 2642 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */ 2643 t3_release_offload_resources(toep); 2644 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2645 action = TCP_CLOSE; 2646 2647 } else { 2648 action = TCP_TIMEWAIT; 2649 } 2650 break; 2651 case TCPS_LAST_ACK: 2652 /* 2653 * In this state we don't care about pending abort_rpl. 2654 * If we've sent abort_req it was post-close and was sent too 2655 * late, this close_con_rpl is the actual last message. 2656 */ 2657 t3_release_offload_resources(toep); 2658 action = TCP_CLOSE; 2659 break; 2660 case TCPS_FIN_WAIT_1: 2661 /* 2662 * If we can't receive any more 2663 * data, then closing user can proceed. 2664 * Starting the timer is contrary to the 2665 * specification, but if we don't get a FIN 2666 * we'll hang forever. 2667 * 2668 * XXXjl: 2669 * we should release the tp also, and use a 2670 * compressed state. 2671 */ 2672 if (so) 2673 rcv = so_sockbuf_rcv(so); 2674 else 2675 break; 2676 2677 if (rcv->sb_state & SBS_CANTRCVMORE) { 2678 int timeout; 2679 2680 if (so) 2681 soisdisconnected(so); 2682 timeout = (tcp_fast_finwait2_recycle) ? 2683 tcp_finwait2_timeout : tcp_maxidle; 2684 tcp_timer_activate(tp, TT_2MSL, timeout); 2685 } 2686 tp->t_state = TCPS_FIN_WAIT_2; 2687 if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 && 2688 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) { 2689 action = TCP_DROP; 2690 } 2691 2692 break; 2693 default: 2694 log(LOG_ERR, 2695 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", 2696 toep->tp_toedev->tod_name, toep->tp_tid, 2697 tp->t_state); 2698 } 2699 inp_wunlock(tp->t_inpcb); 2700 2701 2702 if (action == TCP_TIMEWAIT) { 2703 enter_timewait_disconnect(tp); 2704 } else if (action == TCP_DROP) { 2705 tcp_offload_drop(tp, 0); 2706 } else if (action == TCP_CLOSE) { 2707 tcp_offload_close(tp); 2708 } 2709out: 2710 m_freem(m); 2711} 2712 2713/* 2714 * Handler for CLOSE_CON_RPL CPL messages. 2715 */ 2716static int 2717do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, 2718 void *ctx) 2719{ 2720 struct toepcb *toep = (struct toepcb *)ctx; 2721 2722 process_close_con_rpl(toep, m); 2723 return (0); 2724} 2725 2726/* 2727 * Process abort replies. We only process these messages if we anticipate 2728 * them as the coordination between SW and HW in this area is somewhat lacking 2729 * and sometimes we get ABORT_RPLs after we are done with the connection that 2730 * originated the ABORT_REQ. 2731 */ 2732static void 2733process_abort_rpl(struct toepcb *toep, struct mbuf *m) 2734{ 2735 struct tcpcb *tp = toep->tp_tp; 2736 struct socket *so; 2737 int needclose = 0; 2738 2739#ifdef T3_TRACE 2740 T3_TRACE1(TIDTB(sk), 2741 "process_abort_rpl: GTS rpl pending %d", 2742 sock_flag(sk, ABORT_RPL_PENDING)); 2743#endif 2744 2745 inp_wlock(tp->t_inpcb); 2746 so = inp_inpcbtosocket(tp->t_inpcb); 2747 2748 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2749 /* 2750 * XXX panic on tcpdrop 2751 */ 2752 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev)) 2753 toep->tp_flags |= TP_ABORT_RPL_RCVD; 2754 else { 2755 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING); 2756 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) || 2757 !is_t3a(toep->tp_toedev)) { 2758 if (toep->tp_flags & TP_ABORT_REQ_RCVD) 2759 panic("TP_ABORT_REQ_RCVD set"); 2760 t3_release_offload_resources(toep); 2761 needclose = 1; 2762 } 2763 } 2764 } 2765 inp_wunlock(tp->t_inpcb); 2766 2767 if (needclose) 2768 tcp_offload_close(tp); 2769 2770 m_free(m); 2771} 2772 2773/* 2774 * Handle an ABORT_RPL_RSS CPL message. 2775 */ 2776static int 2777do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2778{ 2779 struct cpl_abort_rpl_rss *rpl = cplhdr(m); 2780 struct toepcb *toep; 2781 2782 /* 2783 * Ignore replies to post-close aborts indicating that the abort was 2784 * requested too late. These connections are terminated when we get 2785 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss 2786 * arrives the TID is either no longer used or it has been recycled. 2787 */ 2788 if (rpl->status == CPL_ERR_ABORT_FAILED) { 2789discard: 2790 m_free(m); 2791 return (0); 2792 } 2793 2794 toep = (struct toepcb *)ctx; 2795 2796 /* 2797 * Sometimes we've already closed the socket, e.g., a post-close 2798 * abort races with ABORT_REQ_RSS, the latter frees the socket 2799 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, 2800 * but FW turns the ABORT_REQ into a regular one and so we get 2801 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A. 2802 */ 2803 if (!toep) 2804 goto discard; 2805 2806 if (toep->tp_tp == NULL) { 2807 log(LOG_NOTICE, "removing tid for abort\n"); 2808 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2809 if (toep->tp_l2t) 2810 l2t_release(L2DATA(cdev), toep->tp_l2t); 2811 2812 toepcb_release(toep); 2813 goto discard; 2814 } 2815 2816 log(LOG_NOTICE, "toep=%p\n", toep); 2817 log(LOG_NOTICE, "tp=%p\n", toep->tp_tp); 2818 2819 toepcb_hold(toep); 2820 process_abort_rpl(toep, m); 2821 toepcb_release(toep); 2822 return (0); 2823} 2824 2825/* 2826 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also 2827 * indicate whether RST should be sent in response. 2828 */ 2829static int 2830abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst) 2831{ 2832 struct tcpcb *tp = so_sototcpcb(so); 2833 2834 switch (abort_reason) { 2835 case CPL_ERR_BAD_SYN: 2836#if 0 2837 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through 2838#endif 2839 case CPL_ERR_CONN_RESET: 2840 // XXX need to handle SYN_RECV due to crossed SYNs 2841 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 2842 case CPL_ERR_XMIT_TIMEDOUT: 2843 case CPL_ERR_PERSIST_TIMEDOUT: 2844 case CPL_ERR_FINWAIT2_TIMEDOUT: 2845 case CPL_ERR_KEEPALIVE_TIMEDOUT: 2846#if 0 2847 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); 2848#endif 2849 return (ETIMEDOUT); 2850 default: 2851 return (EIO); 2852 } 2853} 2854 2855static inline void 2856set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd) 2857{ 2858 struct cpl_abort_rpl *rpl = cplhdr(m); 2859 2860 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); 2861 rpl->wr.wr_lo = htonl(V_WR_TID(tid)); 2862 m->m_len = m->m_pkthdr.len = sizeof(*rpl); 2863 2864 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); 2865 rpl->cmd = cmd; 2866} 2867 2868static void 2869send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m) 2870{ 2871 struct mbuf *reply_mbuf; 2872 struct cpl_abort_req_rss *req = cplhdr(m); 2873 2874 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl)); 2875 m_set_priority(m, CPL_PRIORITY_DATA); 2876 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl); 2877 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status); 2878 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2879 m_free(m); 2880} 2881 2882/* 2883 * Returns whether an ABORT_REQ_RSS message is a negative advice. 2884 */ 2885static inline int 2886is_neg_adv_abort(unsigned int status) 2887{ 2888 return status == CPL_ERR_RTX_NEG_ADVICE || 2889 status == CPL_ERR_PERSIST_NEG_ADVICE; 2890} 2891 2892static void 2893send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status) 2894{ 2895 struct mbuf *reply_mbuf; 2896 struct cpl_abort_req_rss *req = cplhdr(m); 2897 2898 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 2899 2900 if (!reply_mbuf) { 2901 /* Defer the reply. Stick rst_status into req->cmd. */ 2902 req->status = rst_status; 2903 t3_defer_reply(m, tdev, send_deferred_abort_rpl); 2904 return; 2905 } 2906 2907 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA); 2908 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status); 2909 m_free(m); 2910 2911 /* 2912 * XXX need to sync with ARP as for SYN_RECV connections we can send 2913 * these messages while ARP is pending. For other connection states 2914 * it's not a problem. 2915 */ 2916 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2917} 2918 2919#ifdef notyet 2920static void 2921cleanup_syn_rcv_conn(struct socket *child, struct socket *parent) 2922{ 2923 CXGB_UNIMPLEMENTED(); 2924#ifdef notyet 2925 struct request_sock *req = child->sk_user_data; 2926 2927 inet_csk_reqsk_queue_removed(parent, req); 2928 synq_remove(tcp_sk(child)); 2929 __reqsk_free(req); 2930 child->sk_user_data = NULL; 2931#endif 2932} 2933 2934 2935/* 2936 * Performs the actual work to abort a SYN_RECV connection. 2937 */ 2938static void 2939do_abort_syn_rcv(struct socket *child, struct socket *parent) 2940{ 2941 struct tcpcb *parenttp = so_sototcpcb(parent); 2942 struct tcpcb *childtp = so_sototcpcb(child); 2943 2944 /* 2945 * If the server is still open we clean up the child connection, 2946 * otherwise the server already did the clean up as it was purging 2947 * its SYN queue and the skb was just sitting in its backlog. 2948 */ 2949 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { 2950 cleanup_syn_rcv_conn(child, parent); 2951 inp_wlock(childtp->t_inpcb); 2952 t3_release_offload_resources(childtp->t_toe); 2953 inp_wunlock(childtp->t_inpcb); 2954 tcp_offload_close(childtp); 2955 } 2956} 2957#endif 2958 2959/* 2960 * Handle abort requests for a SYN_RECV connection. These need extra work 2961 * because the socket is on its parent's SYN queue. 2962 */ 2963static int 2964abort_syn_rcv(struct socket *so, struct mbuf *m) 2965{ 2966 CXGB_UNIMPLEMENTED(); 2967#ifdef notyet 2968 struct socket *parent; 2969 struct toedev *tdev = toep->tp_toedev; 2970 struct t3cdev *cdev = TOM_DATA(tdev)->cdev; 2971 struct socket *oreq = so->so_incomp; 2972 struct t3c_tid_entry *t3c_stid; 2973 struct tid_info *t; 2974 2975 if (!oreq) 2976 return -1; /* somehow we are not on the SYN queue */ 2977 2978 t = &(T3C_DATA(cdev))->tid_maps; 2979 t3c_stid = lookup_stid(t, oreq->ts_recent); 2980 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 2981 2982 so_lock(parent); 2983 do_abort_syn_rcv(so, parent); 2984 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST); 2985 so_unlock(parent); 2986#endif 2987 return (0); 2988} 2989 2990/* 2991 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this 2992 * request except that we need to reply to it. 2993 */ 2994static void 2995process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev) 2996{ 2997 int rst_status = CPL_ABORT_NO_RST; 2998 const struct cpl_abort_req_rss *req = cplhdr(m); 2999 struct tcpcb *tp = toep->tp_tp; 3000 struct socket *so; 3001 int needclose = 0; 3002 3003 inp_wlock(tp->t_inpcb); 3004 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 3005 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) { 3006 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN); 3007 m_free(m); 3008 goto skip; 3009 } 3010 3011 toep->tp_flags &= ~TP_ABORT_REQ_RCVD; 3012 /* 3013 * Three cases to consider: 3014 * a) We haven't sent an abort_req; close the connection. 3015 * b) We have sent a post-close abort_req that will get to TP too late 3016 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will 3017 * be ignored and the connection should be closed now. 3018 * c) We have sent a regular abort_req that will get to TP too late. 3019 * That will generate an abort_rpl with status 0, wait for it. 3020 */ 3021 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) || 3022 (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) { 3023 int error; 3024 3025 error = abort_status_to_errno(so, req->status, 3026 &rst_status); 3027 so_error_set(so, error); 3028 3029 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) 3030 so_sorwakeup(so); 3031 /* 3032 * SYN_RECV needs special processing. If abort_syn_rcv() 3033 * returns 0 is has taken care of the abort. 3034 */ 3035 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m)) 3036 goto skip; 3037 3038 t3_release_offload_resources(toep); 3039 needclose = 1; 3040 } 3041 inp_wunlock(tp->t_inpcb); 3042 3043 if (needclose) 3044 tcp_offload_close(tp); 3045 3046 send_abort_rpl(m, tdev, rst_status); 3047 return; 3048skip: 3049 inp_wunlock(tp->t_inpcb); 3050} 3051 3052/* 3053 * Handle an ABORT_REQ_RSS CPL message. 3054 */ 3055static int 3056do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3057{ 3058 const struct cpl_abort_req_rss *req = cplhdr(m); 3059 struct toepcb *toep = (struct toepcb *)ctx; 3060 3061 if (is_neg_adv_abort(req->status)) { 3062 m_free(m); 3063 return (0); 3064 } 3065 3066 log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid); 3067 3068 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) { 3069 cxgb_remove_tid(cdev, toep, toep->tp_tid); 3070 toep->tp_flags |= TP_ABORT_REQ_RCVD; 3071 3072 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST); 3073 if (toep->tp_l2t) 3074 l2t_release(L2DATA(cdev), toep->tp_l2t); 3075 3076 /* 3077 * Unhook 3078 */ 3079 toep->tp_tp->t_toe = NULL; 3080 toep->tp_tp->t_flags &= ~TF_TOE; 3081 toep->tp_tp = NULL; 3082 /* 3083 * XXX need to call syncache_chkrst - but we don't 3084 * have a way of doing that yet 3085 */ 3086 toepcb_release(toep); 3087 log(LOG_ERR, "abort for unestablished connection :-(\n"); 3088 return (0); 3089 } 3090 if (toep->tp_tp == NULL) { 3091 log(LOG_NOTICE, "disconnected toepcb\n"); 3092 /* should be freed momentarily */ 3093 return (0); 3094 } 3095 3096 3097 toepcb_hold(toep); 3098 process_abort_req(toep, m, toep->tp_toedev); 3099 toepcb_release(toep); 3100 return (0); 3101} 3102#ifdef notyet 3103static void 3104pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m) 3105{ 3106 struct toedev *tdev = TOE_DEV(parent); 3107 3108 do_abort_syn_rcv(child, parent); 3109 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) { 3110 struct cpl_pass_accept_rpl *rpl = cplhdr(m); 3111 3112 rpl->opt0h = htonl(F_TCAM_BYPASS); 3113 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3114 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3115 } else 3116 m_free(m); 3117} 3118#endif 3119static void 3120handle_pass_open_arp_failure(struct socket *so, struct mbuf *m) 3121{ 3122 CXGB_UNIMPLEMENTED(); 3123 3124#ifdef notyet 3125 struct t3cdev *cdev; 3126 struct socket *parent; 3127 struct socket *oreq; 3128 struct t3c_tid_entry *t3c_stid; 3129 struct tid_info *t; 3130 struct tcpcb *otp, *tp = so_sototcpcb(so); 3131 struct toepcb *toep = tp->t_toe; 3132 3133 /* 3134 * If the connection is being aborted due to the parent listening 3135 * socket going away there's nothing to do, the ABORT_REQ will close 3136 * the connection. 3137 */ 3138 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 3139 m_free(m); 3140 return; 3141 } 3142 3143 oreq = so->so_incomp; 3144 otp = so_sototcpcb(oreq); 3145 3146 cdev = T3C_DEV(so); 3147 t = &(T3C_DATA(cdev))->tid_maps; 3148 t3c_stid = lookup_stid(t, otp->ts_recent); 3149 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 3150 3151 so_lock(parent); 3152 pass_open_abort(so, parent, m); 3153 so_unlock(parent); 3154#endif 3155} 3156 3157/* 3158 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly 3159 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV 3160 * connection. 3161 */ 3162static void 3163pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m) 3164{ 3165 3166#ifdef notyet 3167 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3168 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk); 3169#endif 3170 handle_pass_open_arp_failure(m_get_socket(m), m); 3171} 3172 3173/* 3174 * Populate a reject CPL_PASS_ACCEPT_RPL WR. 3175 */ 3176static void 3177mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf) 3178{ 3179 struct cpl_pass_accept_req *req = cplhdr(req_mbuf); 3180 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf); 3181 unsigned int tid = GET_TID(req); 3182 3183 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP); 3184 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3185 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3186 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet 3187 rpl->opt0h = htonl(F_TCAM_BYPASS); 3188 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3189 rpl->opt2 = 0; 3190 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3191} 3192 3193/* 3194 * Send a deferred reject to an accept request. 3195 */ 3196static void 3197reject_pass_request(struct toedev *tdev, struct mbuf *m) 3198{ 3199 struct mbuf *reply_mbuf; 3200 3201 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl)); 3202 mk_pass_accept_rpl(reply_mbuf, m); 3203 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 3204 m_free(m); 3205} 3206 3207static void 3208handle_syncache_event(int event, void *arg) 3209{ 3210 struct toepcb *toep = arg; 3211 3212 switch (event) { 3213 case TOE_SC_ENTRY_PRESENT: 3214 /* 3215 * entry already exists - free toepcb 3216 * and l2t 3217 */ 3218 printf("syncache entry present\n"); 3219 toepcb_release(toep); 3220 break; 3221 case TOE_SC_DROP: 3222 /* 3223 * The syncache has given up on this entry 3224 * either it timed out, or it was evicted 3225 * we need to explicitly release the tid 3226 */ 3227 printf("syncache entry dropped\n"); 3228 toepcb_release(toep); 3229 break; 3230 default: 3231 log(LOG_ERR, "unknown syncache event %d\n", event); 3232 break; 3233 } 3234} 3235 3236static void 3237syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep) 3238{ 3239 struct in_conninfo inc; 3240 struct tcpopt to; 3241 struct tcphdr th; 3242 struct inpcb *inp; 3243 int mss, wsf, sack, ts; 3244 uint32_t rcv_isn = ntohl(req->rcv_isn); 3245 3246 bzero(&to, sizeof(struct tcpopt)); 3247 inp = so_sotoinpcb(lso); 3248 3249 /* 3250 * Fill out information for entering us into the syncache 3251 */ 3252 inc.inc_fport = th.th_sport = req->peer_port; 3253 inc.inc_lport = th.th_dport = req->local_port; 3254 th.th_seq = req->rcv_isn; 3255 th.th_flags = TH_SYN; 3256 3257 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1; 3258 3259 3260 inc.inc_isipv6 = 0; 3261 inc.inc_len = 0; 3262 inc.inc_faddr.s_addr = req->peer_ip; 3263 inc.inc_laddr.s_addr = req->local_ip; 3264 3265 DPRINTF("syncache add of %d:%d %d:%d\n", 3266 ntohl(req->local_ip), ntohs(req->local_port), 3267 ntohl(req->peer_ip), ntohs(req->peer_port)); 3268 3269 mss = req->tcp_options.mss; 3270 wsf = req->tcp_options.wsf; 3271 ts = req->tcp_options.tstamp; 3272 sack = req->tcp_options.sack; 3273 to.to_mss = mss; 3274 to.to_wscale = wsf; 3275 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3276 syncache_offload_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep); 3277} 3278 3279 3280/* 3281 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket 3282 * lock held. Note that the sock here is a listening socket that is not owned 3283 * by the TOE. 3284 */ 3285static void 3286process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev, 3287 struct listen_ctx *lctx) 3288{ 3289 int rt_flags; 3290 struct l2t_entry *e; 3291 struct iff_mac tim; 3292 struct mbuf *reply_mbuf, *ddp_mbuf = NULL; 3293 struct cpl_pass_accept_rpl *rpl; 3294 struct cpl_pass_accept_req *req = cplhdr(m); 3295 unsigned int tid = GET_TID(req); 3296 struct tom_data *d = TOM_DATA(tdev); 3297 struct t3cdev *cdev = d->cdev; 3298 struct tcpcb *tp = so_sototcpcb(so); 3299 struct toepcb *newtoep; 3300 struct rtentry *dst; 3301 struct sockaddr_in nam; 3302 struct t3c_data *td = T3C_DATA(cdev); 3303 3304 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3305 if (__predict_false(reply_mbuf == NULL)) { 3306 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3307 t3_defer_reply(m, tdev, reject_pass_request); 3308 else { 3309 cxgb_queue_tid_release(cdev, tid); 3310 m_free(m); 3311 } 3312 DPRINTF("failed to get reply_mbuf\n"); 3313 3314 goto out; 3315 } 3316 3317 if (tp->t_state != TCPS_LISTEN) { 3318 DPRINTF("socket not in listen state\n"); 3319 3320 goto reject; 3321 } 3322 3323 tim.mac_addr = req->dst_mac; 3324 tim.vlan_tag = ntohs(req->vlan_tag); 3325 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { 3326 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n"); 3327 goto reject; 3328 } 3329 3330#ifdef notyet 3331 /* 3332 * XXX do route lookup to confirm that we're still listening on this 3333 * address 3334 */ 3335 if (ip_route_input(skb, req->local_ip, req->peer_ip, 3336 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev)) 3337 goto reject; 3338 rt_flags = ((struct rtable *)skb->dst)->rt_flags & 3339 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); 3340 dst_release(skb->dst); // done with the input route, release it 3341 skb->dst = NULL; 3342 3343 if ((rt_flags & RTF_LOCAL) == 0) 3344 goto reject; 3345#endif 3346 /* 3347 * XXX 3348 */ 3349 rt_flags = RTF_LOCAL; 3350 if ((rt_flags & RTF_LOCAL) == 0) 3351 goto reject; 3352 3353 /* 3354 * Calculate values and add to syncache 3355 */ 3356 3357 newtoep = toepcb_alloc(); 3358 if (newtoep == NULL) 3359 goto reject; 3360 3361 bzero(&nam, sizeof(struct sockaddr_in)); 3362 3363 nam.sin_len = sizeof(struct sockaddr_in); 3364 nam.sin_family = AF_INET; 3365 nam.sin_addr.s_addr =req->peer_ip; 3366 dst = rtalloc2((struct sockaddr *)&nam, 1, 0); 3367 3368 if (dst == NULL) { 3369 printf("failed to find route\n"); 3370 goto reject; 3371 } 3372 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev, 3373 (struct sockaddr *)&nam); 3374 if (e == NULL) { 3375 DPRINTF("failed to get l2t\n"); 3376 } 3377 /* 3378 * Point to our listen socket until accept 3379 */ 3380 newtoep->tp_tp = tp; 3381 newtoep->tp_flags = TP_SYN_RCVD; 3382 newtoep->tp_tid = tid; 3383 newtoep->tp_toedev = tdev; 3384 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3385 3386 cxgb_insert_tid(cdev, d->client, newtoep, tid); 3387 so_lock(so); 3388 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); 3389 so_unlock(so); 3390 3391 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) && 3392 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 3393 3394 if (newtoep->tp_ulp_mode) { 3395 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3396 3397 if (ddp_mbuf == NULL) 3398 newtoep->tp_ulp_mode = 0; 3399 } 3400 3401 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d", 3402 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode); 3403 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); 3404 /* 3405 * XXX workaround for lack of syncache drop 3406 */ 3407 toepcb_hold(newtoep); 3408 syncache_add_accept_req(req, so, newtoep); 3409 3410 rpl = cplhdr(reply_mbuf); 3411 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl); 3412 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3413 rpl->wr.wr_lo = 0; 3414 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3415 rpl->opt2 = htonl(calc_opt2(so, tdev)); 3416 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3417 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten 3418 3419 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | 3420 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); 3421 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) | 3422 CPL_PASS_OPEN_ACCEPT); 3423 3424 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status); 3425 3426 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep)); 3427 3428 l2t_send(cdev, reply_mbuf, e); 3429 m_free(m); 3430 if (newtoep->tp_ulp_mode) { 3431 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS, 3432 V_TF_DDP_OFF(1) | 3433 TP_DDP_TIMER_WORKAROUND_MASK, 3434 V_TF_DDP_OFF(1) | 3435 TP_DDP_TIMER_WORKAROUND_VAL, 1); 3436 } else 3437 printf("not offloading\n"); 3438 3439 3440 3441 return; 3442reject: 3443 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3444 mk_pass_accept_rpl(reply_mbuf, m); 3445 else 3446 mk_tid_release(reply_mbuf, newtoep, tid); 3447 cxgb_ofld_send(cdev, reply_mbuf); 3448 m_free(m); 3449out: 3450#if 0 3451 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3452#else 3453 return; 3454#endif 3455} 3456 3457/* 3458 * Handle a CPL_PASS_ACCEPT_REQ message. 3459 */ 3460static int 3461do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3462{ 3463 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx; 3464 struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */ 3465 struct tom_data *d = listen_ctx->tom_data; 3466 3467#if VALIDATE_TID 3468 struct cpl_pass_accept_req *req = cplhdr(m); 3469 unsigned int tid = GET_TID(req); 3470 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps; 3471 3472 if (unlikely(!lsk)) { 3473 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n", 3474 cdev->name, 3475 (unsigned long)((union listen_entry *)ctx - 3476 t->stid_tab)); 3477 return CPL_RET_BUF_DONE; 3478 } 3479 if (unlikely(tid >= t->ntids)) { 3480 printk(KERN_ERR "%s: passive open TID %u too large\n", 3481 cdev->name, tid); 3482 return CPL_RET_BUF_DONE; 3483 } 3484 /* 3485 * For T3A the current user of the TID may have closed but its last 3486 * message(s) may have been backlogged so the TID appears to be still 3487 * in use. Just take the TID away, the connection can close at its 3488 * own leisure. For T3B this situation is a bug. 3489 */ 3490 if (!valid_new_tid(t, tid) && 3491 cdev->type != T3A) { 3492 printk(KERN_ERR "%s: passive open uses existing TID %u\n", 3493 cdev->name, tid); 3494 return CPL_RET_BUF_DONE; 3495 } 3496#endif 3497 3498 process_pass_accept_req(lso, m, &d->tdev, listen_ctx); 3499 return (0); 3500} 3501 3502/* 3503 * Called when a connection is established to translate the TCP options 3504 * reported by HW to FreeBSD's native format. 3505 */ 3506static void 3507assign_rxopt(struct socket *so, unsigned int opt) 3508{ 3509 struct tcpcb *tp = so_sototcpcb(so); 3510 struct toepcb *toep = tp->t_toe; 3511 const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep)); 3512 3513 inp_lock_assert(tp->t_inpcb); 3514 3515 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3516 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0; 3517 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0; 3518 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0; 3519 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 3520 (TF_RCVD_SCALE|TF_REQ_SCALE)) 3521 tp->rcv_scale = tp->request_r_scale; 3522} 3523 3524/* 3525 * Completes some final bits of initialization for just established connections 3526 * and changes their state to TCP_ESTABLISHED. 3527 * 3528 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1. 3529 */ 3530static void 3531make_established(struct socket *so, u32 snd_isn, unsigned int opt) 3532{ 3533 struct tcpcb *tp = so_sototcpcb(so); 3534 struct toepcb *toep = tp->t_toe; 3535 3536 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn; 3537 assign_rxopt(so, opt); 3538 3539 /* 3540 *XXXXXXXXXXX 3541 * 3542 */ 3543#ifdef notyet 3544 so->so_proto->pr_ctloutput = t3_ctloutput; 3545#endif 3546 3547#if 0 3548 inet_sk(sk)->id = tp->write_seq ^ jiffies; 3549#endif 3550 /* 3551 * XXX not clear what rcv_wup maps to 3552 */ 3553 /* 3554 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't 3555 * pass through opt0. 3556 */ 3557 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10)) 3558 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10); 3559 3560 dump_toepcb(toep); 3561 3562#ifdef notyet 3563/* 3564 * no clean interface for marking ARP up to date 3565 */ 3566 dst_confirm(sk->sk_dst_cache); 3567#endif 3568 tp->t_starttime = ticks; 3569 tp->t_state = TCPS_ESTABLISHED; 3570 soisconnected(so); 3571} 3572 3573static int 3574syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep) 3575{ 3576 3577 struct in_conninfo inc; 3578 struct tcpopt to; 3579 struct tcphdr th; 3580 int mss, wsf, sack, ts; 3581 struct mbuf *m = NULL; 3582 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev); 3583 unsigned int opt; 3584 3585#ifdef MAC 3586#error "no MAC support" 3587#endif 3588 3589 opt = ntohs(req->tcp_opt); 3590 3591 bzero(&to, sizeof(struct tcpopt)); 3592 3593 /* 3594 * Fill out information for entering us into the syncache 3595 */ 3596 inc.inc_fport = th.th_sport = req->peer_port; 3597 inc.inc_lport = th.th_dport = req->local_port; 3598 th.th_seq = req->rcv_isn; 3599 th.th_flags = TH_ACK; 3600 3601 inc.inc_isipv6 = 0; 3602 inc.inc_len = 0; 3603 inc.inc_faddr.s_addr = req->peer_ip; 3604 inc.inc_laddr.s_addr = req->local_ip; 3605 3606 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3607 wsf = G_TCPOPT_WSCALE_OK(opt); 3608 ts = G_TCPOPT_TSTAMP(opt); 3609 sack = G_TCPOPT_SACK(opt); 3610 3611 to.to_mss = mss; 3612 to.to_wscale = G_TCPOPT_SND_WSCALE(opt); 3613 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3614 3615 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n", 3616 ntohl(req->local_ip), ntohs(req->local_port), 3617 ntohl(req->peer_ip), ntohs(req->peer_port), 3618 mss, wsf, ts, sack); 3619 return syncache_offload_expand(&inc, &to, &th, so, m); 3620} 3621 3622 3623/* 3624 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work 3625 * if we are in TCP_SYN_RECV due to crossed SYNs 3626 */ 3627static int 3628do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3629{ 3630 struct cpl_pass_establish *req = cplhdr(m); 3631 struct toepcb *toep = (struct toepcb *)ctx; 3632 struct tcpcb *tp = toep->tp_tp; 3633 struct socket *so, *lso; 3634 struct t3c_data *td = T3C_DATA(cdev); 3635 struct sockbuf *snd, *rcv; 3636 3637 // Complete socket initialization now that we have the SND_ISN 3638 3639 struct toedev *tdev; 3640 3641 3642 tdev = toep->tp_toedev; 3643 3644 inp_wlock(tp->t_inpcb); 3645 3646 /* 3647 * 3648 * XXX need to add reference while we're manipulating 3649 */ 3650 so = lso = inp_inpcbtosocket(tp->t_inpcb); 3651 3652 inp_wunlock(tp->t_inpcb); 3653 3654 so_lock(so); 3655 LIST_REMOVE(toep, synq_entry); 3656 so_unlock(so); 3657 3658 if (!syncache_expand_establish_req(req, &so, toep)) { 3659 /* 3660 * No entry 3661 */ 3662 CXGB_UNIMPLEMENTED(); 3663 } 3664 if (so == NULL) { 3665 /* 3666 * Couldn't create the socket 3667 */ 3668 CXGB_UNIMPLEMENTED(); 3669 } 3670 3671 tp = so_sototcpcb(so); 3672 inp_wlock(tp->t_inpcb); 3673 3674 snd = so_sockbuf_snd(so); 3675 rcv = so_sockbuf_rcv(so); 3676 3677 snd->sb_flags |= SB_NOCOALESCE; 3678 rcv->sb_flags |= SB_NOCOALESCE; 3679 3680 toep->tp_tp = tp; 3681 toep->tp_flags = 0; 3682 tp->t_toe = toep; 3683 reset_wr_list(toep); 3684 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3685 tp->rcv_nxt = toep->tp_copied_seq; 3686 install_offload_ops(so); 3687 3688 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs); 3689 toep->tp_wr_unacked = 0; 3690 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3691 toep->tp_qset_idx = 0; 3692 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu); 3693 3694 /* 3695 * XXX Cancel any keep alive timer 3696 */ 3697 3698 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3699 3700 /* 3701 * XXX workaround for lack of syncache drop 3702 */ 3703 toepcb_release(toep); 3704 inp_wunlock(tp->t_inpcb); 3705 3706 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid); 3707 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3708#ifdef notyet 3709 /* 3710 * XXX not sure how these checks map to us 3711 */ 3712 if (unlikely(sk->sk_socket)) { // simultaneous opens only 3713 sk->sk_state_change(sk); 3714 sk_wake_async(so, 0, POLL_OUT); 3715 } 3716 /* 3717 * The state for the new connection is now up to date. 3718 * Next check if we should add the connection to the parent's 3719 * accept queue. When the parent closes it resets connections 3720 * on its SYN queue, so check if we are being reset. If so we 3721 * don't need to do anything more, the coming ABORT_RPL will 3722 * destroy this socket. Otherwise move the connection to the 3723 * accept queue. 3724 * 3725 * Note that we reset the synq before closing the server so if 3726 * we are not being reset the stid is still open. 3727 */ 3728 if (unlikely(!tp->forward_skb_hint)) { // removed from synq 3729 __kfree_skb(skb); 3730 goto unlock; 3731 } 3732#endif 3733 m_free(m); 3734 3735 return (0); 3736} 3737 3738/* 3739 * Fill in the right TID for CPL messages waiting in the out-of-order queue 3740 * and send them to the TOE. 3741 */ 3742static void 3743fixup_and_send_ofo(struct toepcb *toep) 3744{ 3745 struct mbuf *m; 3746 struct toedev *tdev = toep->tp_toedev; 3747 struct tcpcb *tp = toep->tp_tp; 3748 unsigned int tid = toep->tp_tid; 3749 3750 log(LOG_NOTICE, "fixup_and_send_ofo\n"); 3751 3752 inp_lock_assert(tp->t_inpcb); 3753 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { 3754 /* 3755 * A variety of messages can be waiting but the fields we'll 3756 * be touching are common to all so any message type will do. 3757 */ 3758 struct cpl_close_con_req *p = cplhdr(m); 3759 3760 p->wr.wr_lo = htonl(V_WR_TID(tid)); 3761 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid)); 3762 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3763 } 3764} 3765 3766/* 3767 * Updates socket state from an active establish CPL message. Runs with the 3768 * socket lock held. 3769 */ 3770static void 3771socket_act_establish(struct socket *so, struct mbuf *m) 3772{ 3773 struct cpl_act_establish *req = cplhdr(m); 3774 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ 3775 struct tcpcb *tp = so_sototcpcb(so); 3776 struct toepcb *toep = tp->t_toe; 3777 3778 if (__predict_false(tp->t_state != TCPS_SYN_SENT)) 3779 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n", 3780 toep->tp_tid, tp->t_state); 3781 3782 tp->ts_recent_age = ticks; 3783 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn; 3784 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs; 3785 3786 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3787 3788 /* 3789 * Now that we finally have a TID send any CPL messages that we had to 3790 * defer for lack of a TID. 3791 */ 3792 if (mbufq_len(&toep->out_of_order_queue)) 3793 fixup_and_send_ofo(toep); 3794 3795 if (__predict_false(so_state_get(so) & SS_NOFDREF)) { 3796 /* 3797 * XXX does this even make sense? 3798 */ 3799 so_sorwakeup(so); 3800 } 3801 m_free(m); 3802#ifdef notyet 3803/* 3804 * XXX assume no write requests permitted while socket connection is 3805 * incomplete 3806 */ 3807 /* 3808 * Currently the send queue must be empty at this point because the 3809 * socket layer does not send anything before a connection is 3810 * established. To be future proof though we handle the possibility 3811 * that there are pending buffers to send (either TX_DATA or 3812 * CLOSE_CON_REQ). First we need to adjust the sequence number of the 3813 * buffers according to the just learned write_seq, and then we send 3814 * them on their way. 3815 */ 3816 fixup_pending_writeq_buffers(sk); 3817 if (t3_push_frames(so, 1)) 3818 sk->sk_write_space(sk); 3819#endif 3820 3821 toep->tp_state = tp->t_state; 3822 tcpstat.tcps_connects++; 3823 3824} 3825 3826/* 3827 * Process a CPL_ACT_ESTABLISH message. 3828 */ 3829static int 3830do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3831{ 3832 struct cpl_act_establish *req = cplhdr(m); 3833 unsigned int tid = GET_TID(req); 3834 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); 3835 struct toepcb *toep = (struct toepcb *)ctx; 3836 struct tcpcb *tp = toep->tp_tp; 3837 struct socket *so; 3838 struct toedev *tdev; 3839 struct tom_data *d; 3840 3841 if (tp == NULL) { 3842 free_atid(cdev, atid); 3843 return (0); 3844 } 3845 inp_wlock(tp->t_inpcb); 3846 3847 /* 3848 * XXX 3849 */ 3850 so = inp_inpcbtosocket(tp->t_inpcb); 3851 tdev = toep->tp_toedev; /* blow up here if link was down */ 3852 d = TOM_DATA(tdev); 3853 3854 /* 3855 * It's OK if the TID is currently in use, the owning socket may have 3856 * backlogged its last CPL message(s). Just take it away. 3857 */ 3858 toep->tp_tid = tid; 3859 toep->tp_tp = tp; 3860 so_insert_tid(d, toep, tid); 3861 free_atid(cdev, atid); 3862 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3863 3864 socket_act_establish(so, m); 3865 inp_wunlock(tp->t_inpcb); 3866 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid); 3867 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3868 3869 return (0); 3870} 3871 3872/* 3873 * Process an acknowledgment of WR completion. Advance snd_una and send the 3874 * next batch of work requests from the write queue. 3875 */ 3876static void 3877wr_ack(struct toepcb *toep, struct mbuf *m) 3878{ 3879 struct tcpcb *tp = toep->tp_tp; 3880 struct cpl_wr_ack *hdr = cplhdr(m); 3881 struct socket *so; 3882 unsigned int credits = ntohs(hdr->credits); 3883 u32 snd_una = ntohl(hdr->snd_una); 3884 int bytes = 0; 3885 struct sockbuf *snd; 3886 3887 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits); 3888 3889 inp_wlock(tp->t_inpcb); 3890 so = inp_inpcbtosocket(tp->t_inpcb); 3891 toep->tp_wr_avail += credits; 3892 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) 3893 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; 3894 3895 while (credits) { 3896 struct mbuf *p = peek_wr(toep); 3897 3898 if (__predict_false(!p)) { 3899 log(LOG_ERR, "%u WR_ACK credits for TID %u with " 3900 "nothing pending, state %u wr_avail=%u\n", 3901 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail); 3902 break; 3903 } 3904 CTR2(KTR_TOM, 3905 "wr_ack: p->credits=%d p->bytes=%d", 3906 p->m_pkthdr.csum_data, p->m_pkthdr.len); 3907 KASSERT(p->m_pkthdr.csum_data != 0, 3908 ("empty request still on list")); 3909 3910 if (__predict_false(credits < p->m_pkthdr.csum_data)) { 3911 3912#if DEBUG_WR > 1 3913 struct tx_data_wr *w = cplhdr(p); 3914 log(LOG_ERR, 3915 "TID %u got %u WR credits, need %u, len %u, " 3916 "main body %u, frags %u, seq # %u, ACK una %u," 3917 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n", 3918 toep->tp_tid, credits, p->csum, p->len, 3919 p->len - p->data_len, skb_shinfo(p)->nr_frags, 3920 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt), 3921 toep->tp_wr_avail, count_pending_wrs(tp) - credits); 3922#endif 3923 p->m_pkthdr.csum_data -= credits; 3924 break; 3925 } else { 3926 dequeue_wr(toep); 3927 credits -= p->m_pkthdr.csum_data; 3928 bytes += p->m_pkthdr.len; 3929 CTR3(KTR_TOM, 3930 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d", 3931 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data); 3932 3933 m_free(p); 3934 } 3935 } 3936 3937#if DEBUG_WR 3938 check_wr_invariants(tp); 3939#endif 3940 3941 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 3942#if VALIDATE_SEQ 3943 struct tom_data *d = TOM_DATA(TOE_DEV(so)); 3944 3945 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK " 3946 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una, 3947 toep->tp_tid, tp->snd_una); 3948#endif 3949 goto out_free; 3950 } 3951 3952 if (tp->snd_una != snd_una) { 3953 tp->snd_una = snd_una; 3954 tp->ts_recent_age = ticks; 3955#ifdef notyet 3956 /* 3957 * Keep ARP entry "minty fresh" 3958 */ 3959 dst_confirm(sk->sk_dst_cache); 3960#endif 3961 if (tp->snd_una == tp->snd_nxt) 3962 toep->tp_flags &= ~TP_TX_WAIT_IDLE; 3963 } 3964 3965 snd = so_sockbuf_snd(so); 3966 if (bytes) { 3967 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes); 3968 snd = so_sockbuf_snd(so); 3969 sockbuf_lock(snd); 3970 sbdrop_locked(snd, bytes); 3971 so_sowwakeup_locked(so); 3972 } 3973 3974 if (snd->sb_sndptroff < snd->sb_cc) 3975 t3_push_frames(so, 0); 3976 3977out_free: 3978 inp_wunlock(tp->t_inpcb); 3979 m_free(m); 3980} 3981 3982/* 3983 * Handler for TX_DATA_ACK CPL messages. 3984 */ 3985static int 3986do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx) 3987{ 3988 struct toepcb *toep = (struct toepcb *)ctx; 3989 3990 VALIDATE_SOCK(so); 3991 3992 wr_ack(toep, m); 3993 return 0; 3994} 3995 3996/* 3997 * Handler for TRACE_PKT CPL messages. Just sink these packets. 3998 */ 3999static int 4000do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx) 4001{ 4002 m_freem(m); 4003 return 0; 4004} 4005 4006/* 4007 * Reset a connection that is on a listener's SYN queue or accept queue, 4008 * i.e., one that has not had a struct socket associated with it. 4009 * Must be called from process context. 4010 * 4011 * Modeled after code in inet_csk_listen_stop(). 4012 */ 4013static void 4014t3_reset_listen_child(struct socket *child) 4015{ 4016 struct tcpcb *tp = so_sototcpcb(child); 4017 4018 t3_send_reset(tp->t_toe); 4019} 4020 4021 4022static void 4023t3_child_disconnect(struct socket *so, void *arg) 4024{ 4025 struct tcpcb *tp = so_sototcpcb(so); 4026 4027 if (tp->t_flags & TF_TOE) { 4028 inp_wlock(tp->t_inpcb); 4029 t3_reset_listen_child(so); 4030 inp_wunlock(tp->t_inpcb); 4031 } 4032} 4033 4034/* 4035 * Disconnect offloaded established but not yet accepted connections sitting 4036 * on a server's accept_queue. We just send an ABORT_REQ at this point and 4037 * finish off the disconnect later as we may need to wait for the ABORT_RPL. 4038 */ 4039void 4040t3_disconnect_acceptq(struct socket *listen_so) 4041{ 4042 4043 so_lock(listen_so); 4044 so_listeners_apply_all(listen_so, t3_child_disconnect, NULL); 4045 so_unlock(listen_so); 4046} 4047 4048/* 4049 * Reset offloaded connections sitting on a server's syn queue. As above 4050 * we send ABORT_REQ and finish off when we get ABORT_RPL. 4051 */ 4052 4053void 4054t3_reset_synq(struct listen_ctx *lctx) 4055{ 4056 struct toepcb *toep; 4057 4058 so_lock(lctx->lso); 4059 while (!LIST_EMPTY(&lctx->synq_head)) { 4060 toep = LIST_FIRST(&lctx->synq_head); 4061 LIST_REMOVE(toep, synq_entry); 4062 toep->tp_tp = NULL; 4063 t3_send_reset(toep); 4064 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid); 4065 toepcb_release(toep); 4066 } 4067 so_unlock(lctx->lso); 4068} 4069 4070 4071int 4072t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl, 4073 unsigned int nppods, unsigned int tag, unsigned int maxoff, 4074 unsigned int pg_off, unsigned int color) 4075{ 4076 unsigned int i, j, pidx; 4077 struct pagepod *p; 4078 struct mbuf *m; 4079 struct ulp_mem_io *req; 4080 unsigned int tid = toep->tp_tid; 4081 const struct tom_data *td = TOM_DATA(toep->tp_toedev); 4082 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; 4083 4084 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)", 4085 gl, nppods, tag, maxoff, pg_off, color); 4086 4087 for (i = 0; i < nppods; ++i) { 4088 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE); 4089 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4090 req = mtod(m, struct ulp_mem_io *); 4091 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE; 4092 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4093 req->wr.wr_lo = 0; 4094 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) | 4095 V_ULPTX_CMD(ULP_MEM_WRITE)); 4096 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) | 4097 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1)); 4098 4099 p = (struct pagepod *)(req + 1); 4100 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) { 4101 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid)); 4102 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) | 4103 V_PPOD_COLOR(color)); 4104 p->pp_max_offset = htonl(maxoff); 4105 p->pp_page_offset = htonl(pg_off); 4106 p->pp_rsvd = 0; 4107 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx) 4108 p->pp_addr[j] = pidx < gl->dgl_nelem ? 4109 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0; 4110 } else 4111 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */ 4112 send_or_defer(toep, m, 0); 4113 ppod_addr += PPOD_SIZE; 4114 } 4115 return (0); 4116} 4117 4118/* 4119 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command. 4120 */ 4121static inline void 4122mk_cpl_barrier_ulp(struct cpl_barrier *b) 4123{ 4124 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b; 4125 4126 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4127 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8)); 4128 b->opcode = CPL_BARRIER; 4129} 4130 4131/* 4132 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command. 4133 */ 4134static inline void 4135mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno) 4136{ 4137 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4138 4139 txpkt = (struct ulp_txpkt *)req; 4140 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4141 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4142 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid)); 4143 req->cpuno = htons(cpuno); 4144} 4145 4146/* 4147 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command. 4148 */ 4149static inline void 4150mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, 4151 unsigned int word, uint64_t mask, uint64_t val) 4152{ 4153 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4154 4155 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 4156 tid, word, mask, val); 4157 4158 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4159 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4160 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); 4161 req->reply = V_NO_REPLY(1); 4162 req->cpu_idx = 0; 4163 req->word = htons(word); 4164 req->mask = htobe64(mask); 4165 req->val = htobe64(val); 4166} 4167 4168/* 4169 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command. 4170 */ 4171static void 4172mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack, 4173 unsigned int tid, unsigned int credits) 4174{ 4175 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack; 4176 4177 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4178 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8)); 4179 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); 4180 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 4181 V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) | 4182 V_RX_CREDITS(credits)); 4183} 4184 4185void 4186t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx) 4187{ 4188 unsigned int wrlen; 4189 struct mbuf *m; 4190 struct work_request_hdr *wr; 4191 struct cpl_barrier *lock; 4192 struct cpl_set_tcb_field *req; 4193 struct cpl_get_tcb *getreq; 4194 struct ddp_state *p = &toep->tp_ddp_state; 4195 4196#if 0 4197 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4198#endif 4199 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) + 4200 sizeof(*getreq); 4201 m = m_gethdr_nofail(wrlen); 4202 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4203 wr = mtod(m, struct work_request_hdr *); 4204 bzero(wr, wrlen); 4205 4206 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4207 m->m_pkthdr.len = m->m_len = wrlen; 4208 4209 lock = (struct cpl_barrier *)(wr + 1); 4210 mk_cpl_barrier_ulp(lock); 4211 4212 req = (struct cpl_set_tcb_field *)(lock + 1); 4213 4214 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx); 4215 4216 /* Hmmm, not sure if this actually a good thing: reactivating 4217 * the other buffer might be an issue if it has been completed 4218 * already. However, that is unlikely, since the fact that the UBUF 4219 * is not completed indicates that there is no oustanding data. 4220 */ 4221 if (bufidx == 0) 4222 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4223 V_TF_DDP_ACTIVE_BUF(1) | 4224 V_TF_DDP_BUF0_VALID(1), 4225 V_TF_DDP_ACTIVE_BUF(1)); 4226 else 4227 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4228 V_TF_DDP_ACTIVE_BUF(1) | 4229 V_TF_DDP_BUF1_VALID(1), 0); 4230 4231 getreq = (struct cpl_get_tcb *)(req + 1); 4232 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4233 4234 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1)); 4235 4236 /* Keep track of the number of oustanding CPL_GET_TCB requests 4237 */ 4238 p->get_tcb_count++; 4239 4240#ifdef T3_TRACE 4241 T3_TRACE1(TIDTB(so), 4242 "t3_cancel_ddpbuf: bufidx %u", bufidx); 4243#endif 4244 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4245} 4246 4247/** 4248 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one 4249 * @sk: the socket associated with the buffers 4250 * @bufidx: index of HW DDP buffer (0 or 1) 4251 * @tag0: new tag for HW buffer 0 4252 * @tag1: new tag for HW buffer 1 4253 * @len: new length for HW buf @bufidx 4254 * 4255 * Sends a compound WR to overlay a new DDP buffer on top of an existing 4256 * buffer by changing the buffer tag and length and setting the valid and 4257 * active flag accordingly. The caller must ensure the new buffer is at 4258 * least as big as the existing one. Since we typically reprogram both HW 4259 * buffers this function sets both tags for convenience. Read the TCB to 4260 * determine how made data was written into the buffer before the overlay 4261 * took place. 4262 */ 4263void 4264t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0, 4265 unsigned int tag1, unsigned int len) 4266{ 4267 unsigned int wrlen; 4268 struct mbuf *m; 4269 struct work_request_hdr *wr; 4270 struct cpl_get_tcb *getreq; 4271 struct cpl_set_tcb_field *req; 4272 struct ddp_state *p = &toep->tp_ddp_state; 4273 4274 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)", 4275 bufidx, tag0, tag1, len); 4276#if 0 4277 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4278#endif 4279 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq); 4280 m = m_gethdr_nofail(wrlen); 4281 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4282 wr = mtod(m, struct work_request_hdr *); 4283 m->m_pkthdr.len = m->m_len = wrlen; 4284 bzero(wr, wrlen); 4285 4286 4287 /* Set the ATOMIC flag to make sure that TP processes the following 4288 * CPLs in an atomic manner and no wire segments can be interleaved. 4289 */ 4290 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); 4291 req = (struct cpl_set_tcb_field *)(wr + 1); 4292 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG, 4293 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) | 4294 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32, 4295 V_TCB_RX_DDP_BUF0_TAG(tag0) | 4296 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32); 4297 req++; 4298 if (bufidx == 0) { 4299 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN, 4300 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4301 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 4302 req++; 4303 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4304 V_TF_DDP_PUSH_DISABLE_0(1) | 4305 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4306 V_TF_DDP_PUSH_DISABLE_0(0) | 4307 V_TF_DDP_BUF0_VALID(1)); 4308 } else { 4309 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN, 4310 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN), 4311 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len)); 4312 req++; 4313 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4314 V_TF_DDP_PUSH_DISABLE_1(1) | 4315 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4316 V_TF_DDP_PUSH_DISABLE_1(0) | 4317 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1)); 4318 } 4319 4320 getreq = (struct cpl_get_tcb *)(req + 1); 4321 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4322 4323 /* Keep track of the number of oustanding CPL_GET_TCB requests 4324 */ 4325 p->get_tcb_count++; 4326 4327#ifdef T3_TRACE 4328 T3_TRACE4(TIDTB(sk), 4329 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u " 4330 "len %d", 4331 bufidx, tag0, tag1, len); 4332#endif 4333 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4334} 4335 4336/* 4337 * Sends a compound WR containing all the CPL messages needed to program the 4338 * two HW DDP buffers, namely optionally setting up the length and offset of 4339 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK. 4340 */ 4341void 4342t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0, 4343 unsigned int len1, unsigned int offset1, 4344 uint64_t ddp_flags, uint64_t flag_mask, int modulate) 4345{ 4346 unsigned int wrlen; 4347 struct mbuf *m; 4348 struct work_request_hdr *wr; 4349 struct cpl_set_tcb_field *req; 4350 4351 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ", 4352 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff); 4353 4354#if 0 4355 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4356#endif 4357 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + 4358 (len1 ? sizeof(*req) : 0) + 4359 (modulate ? sizeof(struct cpl_rx_data_ack) : 0); 4360 m = m_gethdr_nofail(wrlen); 4361 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4362 wr = mtod(m, struct work_request_hdr *); 4363 bzero(wr, wrlen); 4364 4365 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4366 m->m_pkthdr.len = m->m_len = wrlen; 4367 4368 req = (struct cpl_set_tcb_field *)(wr + 1); 4369 if (len0) { /* program buffer 0 offset and length */ 4370 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET, 4371 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 4372 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4373 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) | 4374 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0)); 4375 req++; 4376 } 4377 if (len1) { /* program buffer 1 offset and length */ 4378 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET, 4379 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 4380 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32, 4381 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) | 4382 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32); 4383 req++; 4384 } 4385 4386 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask, 4387 ddp_flags); 4388 4389 if (modulate) { 4390 mk_rx_data_ack_ulp(toep, 4391 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid, 4392 toep->tp_copied_seq - toep->tp_rcv_wup); 4393 toep->tp_rcv_wup = toep->tp_copied_seq; 4394 } 4395 4396#ifdef T3_TRACE 4397 T3_TRACE5(TIDTB(sk), 4398 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x " 4399 "modulate %d", 4400 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff, 4401 modulate); 4402#endif 4403 4404 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4405} 4406 4407void 4408t3_init_wr_tab(unsigned int wr_len) 4409{ 4410 int i; 4411 4412 if (mbuf_wrs[1]) /* already initialized */ 4413 return; 4414 4415 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) { 4416 int sgl_len = (3 * i) / 2 + (i & 1); 4417 4418 sgl_len += 3; 4419 mbuf_wrs[i] = sgl_len <= wr_len ? 4420 1 : 1 + (sgl_len - 2) / (wr_len - 1); 4421 } 4422 4423 wrlen = wr_len * 8; 4424} 4425 4426int 4427t3_init_cpl_io(void) 4428{ 4429#ifdef notyet 4430 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL); 4431 if (!tcphdr_skb) { 4432 log(LOG_ERR, 4433 "Chelsio TCP offload: can't allocate sk_buff\n"); 4434 return -1; 4435 } 4436 skb_put(tcphdr_skb, sizeof(struct tcphdr)); 4437 tcphdr_skb->h.raw = tcphdr_skb->data; 4438 memset(tcphdr_skb->data, 0, tcphdr_skb->len); 4439#endif 4440 4441 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); 4442 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); 4443 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack); 4444 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data); 4445 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 4446 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 4447 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish); 4448 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 4449 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 4450 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 4451 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp); 4452 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); 4453 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify); 4454 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt); 4455 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); 4456 return (0); 4457} 4458 4459