cxgb_cpl_io.c revision 177530
1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c 177530 2008-03-23 22:34:16Z kmacy $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/fcntl.h> 36#include <sys/kernel.h> 37#include <sys/limits.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/mbuf.h> 41#include <sys/mutex.h> 42#include <sys/socket.h> 43#include <sys/sysctl.h> 44#include <sys/syslog.h> 45#include <sys/socketvar.h> 46#include <sys/protosw.h> 47#include <sys/priv.h> 48 49#include <net/if.h> 50#include <net/route.h> 51 52#include <netinet/in.h> 53#include <netinet/in_pcb.h> 54#include <netinet/in_systm.h> 55#include <netinet/in_var.h> 56 57 58#include <dev/cxgb/cxgb_osdep.h> 59#include <dev/cxgb/sys/mbufq.h> 60 61#include <netinet/ip.h> 62#include <netinet/tcp_var.h> 63#include <netinet/tcp_fsm.h> 64#include <netinet/tcp_offload.h> 65#include <netinet/tcp_seq.h> 66#include <netinet/tcp_syncache.h> 67#include <netinet/tcp_timer.h> 68#include <net/route.h> 69 70#include <dev/cxgb/t3cdev.h> 71#include <dev/cxgb/common/cxgb_firmware_exports.h> 72#include <dev/cxgb/common/cxgb_t3_cpl.h> 73#include <dev/cxgb/common/cxgb_tcb.h> 74#include <dev/cxgb/common/cxgb_ctl_defs.h> 75#include <dev/cxgb/cxgb_l2t.h> 76#include <dev/cxgb/cxgb_offload.h> 77#include <vm/vm.h> 78#include <vm/pmap.h> 79#include <machine/bus.h> 80#include <dev/cxgb/sys/mvec.h> 81#include <dev/cxgb/ulp/toecore/cxgb_toedev.h> 82#include <dev/cxgb/ulp/tom/cxgb_defs.h> 83#include <dev/cxgb/ulp/tom/cxgb_tom.h> 84#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h> 85#include <dev/cxgb/ulp/tom/cxgb_toepcb.h> 86#include <dev/cxgb/ulp/tom/cxgb_tcp.h> 87 88/* 89 * For ULP connections HW may add headers, e.g., for digests, that aren't part 90 * of the messages sent by the host but that are part of the TCP payload and 91 * therefore consume TCP sequence space. Tx connection parameters that 92 * operate in TCP sequence space are affected by the HW additions and need to 93 * compensate for them to accurately track TCP sequence numbers. This array 94 * contains the compensating extra lengths for ULP packets. It is indexed by 95 * a packet's ULP submode. 96 */ 97const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8}; 98 99#ifdef notyet 100/* 101 * This sk_buff holds a fake header-only TCP segment that we use whenever we 102 * need to exploit SW TCP functionality that expects TCP headers, such as 103 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple 104 * CPUs without locking. 105 */ 106static struct mbuf *tcphdr_mbuf __read_mostly; 107#endif 108 109/* 110 * Size of WRs in bytes. Note that we assume all devices we are handling have 111 * the same WR size. 112 */ 113static unsigned int wrlen __read_mostly; 114 115/* 116 * The number of WRs needed for an skb depends on the number of page fragments 117 * in the skb and whether it has any payload in its main body. This maps the 118 * length of the gather list represented by an skb into the # of necessary WRs. 119 */ 120static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly; 121 122/* 123 * Max receive window supported by HW in bytes. Only a small part of it can 124 * be set through option0, the rest needs to be set through RX_DATA_ACK. 125 */ 126#define MAX_RCV_WND ((1U << 27) - 1) 127 128/* 129 * Min receive window. We want it to be large enough to accommodate receive 130 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. 131 */ 132#define MIN_RCV_WND (24 * 1024U) 133#define SO_TOS(so) ((sotoinpcb(so)->inp_ip_tos >> 2) & M_TOS) 134 135#define VALIDATE_SEQ 0 136#define VALIDATE_SOCK(so) 137#define DEBUG_WR 0 138 139extern int tcp_do_autorcvbuf; 140extern int tcp_do_autosndbuf; 141extern int tcp_autorcvbuf_max; 142extern int tcp_autosndbuf_max; 143 144static void t3_send_reset(struct toepcb *toep); 145static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status); 146static inline void free_atid(struct t3cdev *cdev, unsigned int tid); 147static void handle_syncache_event(int event, void *arg); 148 149static inline void 150SBAPPEND(struct sockbuf *sb, struct mbuf *n) 151{ 152 struct mbuf * m; 153 154 m = sb->sb_mb; 155 while (m) { 156 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 157 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 158 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 159 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 160 m->m_next, m->m_nextpkt, m->m_flags)); 161 m = m->m_next; 162 } 163 m = n; 164 while (m) { 165 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 166 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 167 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 168 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 169 m->m_next, m->m_nextpkt, m->m_flags)); 170 m = m->m_next; 171 } 172 sbappend_locked(sb, n); 173 m = sb->sb_mb; 174 while (m) { 175 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 176 m->m_next, m->m_nextpkt, m->m_flags)); 177 m = m->m_next; 178 } 179} 180 181static inline int 182is_t3a(const struct toedev *dev) 183{ 184 return (dev->tod_ttid == TOE_ID_CHELSIO_T3); 185} 186 187static void 188dump_toepcb(struct toepcb *toep) 189{ 190 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n", 191 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode, 192 toep->tp_mtu_idx, toep->tp_tid); 193 194 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n", 195 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, 196 toep->tp_mss_clamp, toep->tp_flags); 197} 198 199#ifndef RTALLOC2_DEFINED 200static struct rtentry * 201rtalloc2(struct sockaddr *dst, int report, u_long ignflags) 202{ 203 struct rtentry *rt = NULL; 204 205 if ((rt = rtalloc1(dst, report, ignflags)) != NULL) 206 RT_UNLOCK(rt); 207 208 return (rt); 209} 210#endif 211/* 212 * Determine whether to send a CPL message now or defer it. A message is 213 * deferred if the connection is in SYN_SENT since we don't know the TID yet. 214 * For connections in other states the message is sent immediately. 215 * If through_l2t is set the message is subject to ARP processing, otherwise 216 * it is sent directly. 217 */ 218static inline void 219send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t) 220{ 221 struct tcpcb *tp = toep->tp_tp; 222 223 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) { 224 inp_wlock(tp->t_inpcb); 225 mbufq_tail(&toep->out_of_order_queue, m); // defer 226 inp_wunlock(tp->t_inpcb); 227 } else if (through_l2t) 228 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T 229 else 230 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly 231} 232 233static inline unsigned int 234mkprio(unsigned int cntrl, const struct toepcb *toep) 235{ 236 return (cntrl); 237} 238 239/* 240 * Populate a TID_RELEASE WR. The skb must be already propely sized. 241 */ 242static inline void 243mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid) 244{ 245 struct cpl_tid_release *req; 246 247 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep)); 248 m->m_pkthdr.len = m->m_len = sizeof(*req); 249 req = mtod(m, struct cpl_tid_release *); 250 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 251 req->wr.wr_lo = 0; 252 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); 253} 254 255static inline void 256make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) 257{ 258 struct tcpcb *tp = sototcpcb(so); 259 struct toepcb *toep = tp->t_toe; 260 struct tx_data_wr *req; 261 262 inp_wlock_assert(tp->t_inpcb); 263 264 req = mtod(m, struct tx_data_wr *); 265 m->m_len = sizeof(*req); 266 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 267 req->wr_lo = htonl(V_WR_TID(toep->tp_tid)); 268 /* len includes the length of any HW ULP additions */ 269 req->len = htonl(len); 270 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx)); 271 /* V_TX_ULP_SUBMODE sets both the mode and submode */ 272 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) | 273 V_TX_URG(/* skb_urgent(skb) */ 0 ) | 274 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) && 275 (tail ? 0 : 1)))); 276 req->sndseq = htonl(tp->snd_nxt); 277 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) { 278 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | 279 V_TX_CPU_IDX(toep->tp_qset)); 280 281 /* Sendbuffer is in units of 32KB. 282 */ 283 if (tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) 284 req->param |= htonl(V_TX_SNDBUF(tcp_autosndbuf_max >> 15)); 285 else 286 req->param |= htonl(V_TX_SNDBUF(so->so_snd.sb_hiwat >> 15)); 287 toep->tp_flags |= TP_DATASENT; 288 } 289} 290 291#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */ 292 293int 294t3_push_frames(struct socket *so, int req_completion) 295{ 296 struct tcpcb *tp = sototcpcb(so); 297 struct toepcb *toep = tp->t_toe; 298 299 struct mbuf *tail, *m0, *last; 300 struct t3cdev *cdev; 301 struct tom_data *d; 302 int i, bytes, count, total_bytes; 303 bus_dma_segment_t segs[TX_MAX_SEGS], *segp; 304 305 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) { 306 DPRINTF("tcp state=%d\n", tp->t_state); 307 return (0); 308 } 309 310 if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { 311 DPRINTF("disconnecting\n"); 312 313 return (0); 314 } 315 316 317 inp_wlock_assert(tp->t_inpcb); 318 SOCKBUF_LOCK(&so->so_snd); 319 d = TOM_DATA(TOE_DEV(so)); 320 cdev = d->cdev; 321 last = tail = so->so_snd.sb_sndptr ? so->so_snd.sb_sndptr : so->so_snd.sb_mb; 322 total_bytes = 0; 323 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n", 324 toep->tp_wr_avail, tail, so->so_snd.sb_cc, toep->tp_m_last); 325 326 if (last && toep->tp_m_last == last && so->so_snd.sb_sndptroff != 0) { 327 KASSERT(tail, ("sbdrop error")); 328 last = tail = tail->m_next; 329 } 330 331 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) { 332 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail); 333 SOCKBUF_UNLOCK(&so->so_snd); 334 return (0); 335 } 336 337 toep->tp_m_last = NULL; 338 while (toep->tp_wr_avail && (tail != NULL)) { 339 count = bytes = 0; 340 segp = segs; 341 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) { 342 SOCKBUF_UNLOCK(&so->so_snd); 343 return (0); 344 } 345 /* 346 * If the data in tail fits as in-line, then 347 * make an immediate data wr. 348 */ 349 if (tail->m_len <= IMM_LEN) { 350 count = 1; 351 bytes = tail->m_len; 352 last = tail; 353 tail = tail->m_next; 354 m_set_sgl(m0, NULL); 355 m_set_sgllen(m0, 0); 356 make_tx_data_wr(so, m0, bytes, tail); 357 m_append(m0, bytes, mtod(last, caddr_t)); 358 KASSERT(!m0->m_next, ("bad append")); 359 } else { 360 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) 361 && (tail != NULL) && (count < TX_MAX_SEGS-1)) { 362 bytes += tail->m_len; 363 last = tail; 364 count++; 365 /* 366 * technically an abuse to be using this for a VA 367 * but less gross than defining my own structure 368 * or calling pmap_kextract from here :-| 369 */ 370 segp->ds_addr = (bus_addr_t)tail->m_data; 371 segp->ds_len = tail->m_len; 372 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n", 373 count, mbuf_wrs[count], tail->m_data, tail->m_len); 374 segp++; 375 tail = tail->m_next; 376 } 377 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n", 378 toep->tp_wr_avail, count, mbuf_wrs[count], tail); 379 380 m_set_sgl(m0, segs); 381 m_set_sgllen(m0, count); 382 make_tx_data_wr(so, m0, bytes, tail); 383 } 384 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep)); 385 386 if (tail) { 387 so->so_snd.sb_sndptr = tail; 388 toep->tp_m_last = NULL; 389 } else 390 toep->tp_m_last = so->so_snd.sb_sndptr = last; 391 392 393 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last); 394 395 so->so_snd.sb_sndptroff += bytes; 396 total_bytes += bytes; 397 toep->tp_write_seq += bytes; 398 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d tail=%p sndptr=%p sndptroff=%d", 399 toep->tp_wr_avail, count, mbuf_wrs[count], tail, so->so_snd.sb_sndptr, so->so_snd.sb_sndptroff); 400 if (tail) 401 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p tailbuf=%p snd_una=0x%08x", 402 total_bytes, toep->tp_m_last, tail->m_data, tp->snd_una); 403 else 404 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p snd_una=0x%08x", 405 total_bytes, toep->tp_m_last, tp->snd_una); 406 407 408 i = 0; 409 while (i < count && m_get_sgllen(m0)) { 410 if ((count - i) >= 3) { 411 CTR6(KTR_TOM, 412 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d pa=0x%zx len=%d", 413 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len, 414 segs[i + 2].ds_addr, segs[i + 2].ds_len); 415 i += 3; 416 } else if ((count - i) == 2) { 417 CTR4(KTR_TOM, 418 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d", 419 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len); 420 i += 2; 421 } else { 422 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d", 423 segs[i].ds_addr, segs[i].ds_len); 424 i++; 425 } 426 427 } 428 429 /* 430 * remember credits used 431 */ 432 m0->m_pkthdr.csum_data = mbuf_wrs[count]; 433 m0->m_pkthdr.len = bytes; 434 toep->tp_wr_avail -= mbuf_wrs[count]; 435 toep->tp_wr_unacked += mbuf_wrs[count]; 436 437 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) || 438 toep->tp_wr_unacked >= toep->tp_wr_max / 2) { 439 struct work_request_hdr *wr = cplhdr(m0); 440 441 wr->wr_hi |= htonl(F_WR_COMPL); 442 toep->tp_wr_unacked = 0; 443 } 444 KASSERT((m0->m_pkthdr.csum_data > 0) && 445 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d", 446 m0->m_pkthdr.csum_data)); 447 m0->m_type = MT_DONTFREE; 448 enqueue_wr(toep, m0); 449 DPRINTF("sending offload tx with %d bytes in %d segments\n", 450 bytes, count); 451 l2t_send(cdev, m0, toep->tp_l2t); 452 } 453 SOCKBUF_UNLOCK(&so->so_snd); 454 return (total_bytes); 455} 456 457/* 458 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail 459 * under any circumstances. We take the easy way out and always queue the 460 * message to the write_queue. We can optimize the case where the queue is 461 * already empty though the optimization is probably not worth it. 462 */ 463static void 464close_conn(struct socket *so) 465{ 466 struct mbuf *m; 467 struct cpl_close_con_req *req; 468 struct tom_data *d; 469 struct inpcb *inp = sotoinpcb(so); 470 struct tcpcb *tp; 471 struct toepcb *toep; 472 unsigned int tid; 473 474 475 inp_wlock(inp); 476 tp = sototcpcb(so); 477 toep = tp->t_toe; 478 479 if (tp->t_state != TCPS_SYN_SENT) 480 t3_push_frames(so, 1); 481 482 if (toep->tp_flags & TP_FIN_SENT) { 483 inp_wunlock(inp); 484 return; 485 } 486 487 tid = toep->tp_tid; 488 489 d = TOM_DATA(toep->tp_toedev); 490 491 m = m_gethdr_nofail(sizeof(*req)); 492 493 toep->tp_flags |= TP_FIN_SENT; 494 req = mtod(m, struct cpl_close_con_req *); 495 496 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); 497 req->wr.wr_lo = htonl(V_WR_TID(tid)); 498 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 499 req->rsvd = htonl(toep->tp_write_seq); 500 inp_wunlock(inp); 501 /* 502 * XXX - need to defer shutdown while there is still data in the queue 503 * 504 */ 505 cxgb_ofld_send(d->cdev, m); 506 507} 508 509/* 510 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant 511 * and send it along. 512 */ 513static void 514abort_arp_failure(struct t3cdev *cdev, struct mbuf *m) 515{ 516 struct cpl_abort_req *req = cplhdr(m); 517 518 req->cmd = CPL_ABORT_NO_RST; 519 cxgb_ofld_send(cdev, m); 520} 521 522/* 523 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are 524 * permitted to return without sending the message in case we cannot allocate 525 * an sk_buff. Returns the number of credits sent. 526 */ 527uint32_t 528t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail) 529{ 530 struct mbuf *m; 531 struct cpl_rx_data_ack *req; 532 struct toepcb *toep = tp->t_toe; 533 struct toedev *tdev = toep->tp_toedev; 534 535 m = m_gethdr_nofail(sizeof(*req)); 536 537 DPRINTF("returning %u credits to HW\n", credits); 538 539 req = mtod(m, struct cpl_rx_data_ack *); 540 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 541 req->wr.wr_lo = 0; 542 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 543 req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); 544 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep)); 545 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 546 return (credits); 547} 548 549/* 550 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled. 551 * This is only used in DDP mode, so we take the opportunity to also set the 552 * DACK mode and flush any Rx credits. 553 */ 554void 555t3_send_rx_modulate(struct toepcb *toep) 556{ 557 struct mbuf *m; 558 struct cpl_rx_data_ack *req; 559 560 m = m_gethdr_nofail(sizeof(*req)); 561 562 req = mtod(m, struct cpl_rx_data_ack *); 563 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 564 req->wr.wr_lo = 0; 565 m->m_pkthdr.len = m->m_len = sizeof(*req); 566 567 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 568 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 569 V_RX_DACK_MODE(1) | 570 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup)); 571 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 572 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 573 toep->tp_rcv_wup = toep->tp_copied_seq; 574} 575 576/* 577 * Handle receipt of an urgent pointer. 578 */ 579static void 580handle_urg_ptr(struct socket *so, uint32_t urg_seq) 581{ 582#ifdef URGENT_DATA_SUPPORTED 583 struct tcpcb *tp = sototcpcb(so); 584 585 urg_seq--; /* initially points past the urgent data, per BSD */ 586 587 if (tp->urg_data && !after(urg_seq, tp->urg_seq)) 588 return; /* duplicate pointer */ 589 sk_send_sigurg(sk); 590 if (tp->urg_seq == tp->copied_seq && tp->urg_data && 591 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) { 592 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 593 594 tp->copied_seq++; 595 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len) 596 tom_eat_skb(sk, skb, 0); 597 } 598 tp->urg_data = TCP_URG_NOTYET; 599 tp->urg_seq = urg_seq; 600#endif 601} 602 603/* 604 * Returns true if a socket cannot accept new Rx data. 605 */ 606static inline int 607so_no_receive(const struct socket *so) 608{ 609 return (so->so_state & (SS_ISDISCONNECTED|SS_ISDISCONNECTING)); 610} 611 612/* 613 * Process an urgent data notification. 614 */ 615static void 616rx_urg_notify(struct toepcb *toep, struct mbuf *m) 617{ 618 struct cpl_rx_urg_notify *hdr = cplhdr(m); 619 struct socket *so = toeptoso(toep); 620 621 VALIDATE_SOCK(so); 622 623 if (!so_no_receive(so)) 624 handle_urg_ptr(so, ntohl(hdr->seq)); 625 626 m_freem(m); 627} 628 629/* 630 * Handler for RX_URG_NOTIFY CPL messages. 631 */ 632static int 633do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx) 634{ 635 struct toepcb *toep = (struct toepcb *)ctx; 636 637 rx_urg_notify(toep, m); 638 return (0); 639} 640 641static __inline int 642is_delack_mode_valid(struct toedev *dev, struct toepcb *toep) 643{ 644 return (toep->tp_ulp_mode || 645 (toep->tp_ulp_mode == ULP_MODE_TCPDDP && 646 dev->tod_ttid >= TOE_ID_CHELSIO_T3)); 647} 648 649/* 650 * Set of states for which we should return RX credits. 651 */ 652#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2) 653 654/* 655 * Called after some received data has been read. It returns RX credits 656 * to the HW for the amount of data processed. 657 */ 658void 659t3_cleanup_rbuf(struct tcpcb *tp, int copied) 660{ 661 struct toepcb *toep = tp->t_toe; 662 struct socket *so; 663 struct toedev *dev; 664 int dack_mode, must_send, read; 665 u32 thres, credits, dack = 0; 666 667 so = tp->t_inpcb->inp_socket; 668 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || 669 (tp->t_state == TCPS_FIN_WAIT_2))) { 670 if (copied) { 671 SOCKBUF_LOCK(&so->so_rcv); 672 toep->tp_copied_seq += copied; 673 SOCKBUF_UNLOCK(&so->so_rcv); 674 } 675 676 return; 677 } 678 679 inp_wlock_assert(tp->t_inpcb); 680 SOCKBUF_LOCK(&so->so_rcv); 681 if (copied) 682 toep->tp_copied_seq += copied; 683 else { 684 read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc; 685 toep->tp_copied_seq += read; 686 } 687 credits = toep->tp_copied_seq - toep->tp_rcv_wup; 688 toep->tp_enqueued_bytes = so->so_rcv.sb_cc; 689 SOCKBUF_UNLOCK(&so->so_rcv); 690 691 if (credits > so->so_rcv.sb_mbmax) { 692 printf("copied_seq=%u rcv_wup=%u credits=%u\n", 693 toep->tp_copied_seq, toep->tp_rcv_wup, credits); 694 credits = so->so_rcv.sb_mbmax; 695 } 696 697 698 /* 699 * XXX this won't accurately reflect credit return - we need 700 * to look at the difference between the amount that has been 701 * put in the recv sockbuf and what is there now 702 */ 703 704 if (__predict_false(!credits)) 705 return; 706 707 dev = toep->tp_toedev; 708 thres = TOM_TUNABLE(dev, rx_credit_thres); 709 710 if (__predict_false(thres == 0)) 711 return; 712 713 if (is_delack_mode_valid(dev, toep)) { 714 dack_mode = TOM_TUNABLE(dev, delack); 715 if (__predict_false(dack_mode != toep->tp_delack_mode)) { 716 u32 r = tp->rcv_nxt - toep->tp_delack_seq; 717 718 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp) 719 dack = F_RX_DACK_CHANGE | 720 V_RX_DACK_MODE(dack_mode); 721 } 722 } else 723 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 724 725 /* 726 * For coalescing to work effectively ensure the receive window has 727 * at least 16KB left. 728 */ 729 must_send = credits + 16384 >= tp->rcv_wnd; 730 731 if (must_send || credits >= thres) 732 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send); 733} 734 735static int 736cxgb_toe_disconnect(struct tcpcb *tp) 737{ 738 struct socket *so; 739 740 DPRINTF("cxgb_toe_disconnect\n"); 741 742 so = tp->t_inpcb->inp_socket; 743 close_conn(so); 744 return (0); 745} 746 747static int 748cxgb_toe_reset(struct tcpcb *tp) 749{ 750 struct toepcb *toep = tp->t_toe; 751 752 t3_send_reset(toep); 753 754 /* 755 * unhook from socket 756 */ 757 tp->t_flags &= ~TF_TOE; 758 toep->tp_tp = NULL; 759 tp->t_toe = NULL; 760 return (0); 761} 762 763static int 764cxgb_toe_send(struct tcpcb *tp) 765{ 766 struct socket *so; 767 768 DPRINTF("cxgb_toe_send\n"); 769 dump_toepcb(tp->t_toe); 770 771 so = tp->t_inpcb->inp_socket; 772 t3_push_frames(so, 1); 773 return (0); 774} 775 776static int 777cxgb_toe_rcvd(struct tcpcb *tp) 778{ 779 780 inp_wlock_assert(tp->t_inpcb); 781 t3_cleanup_rbuf(tp, 0); 782 783 return (0); 784} 785 786static void 787cxgb_toe_detach(struct tcpcb *tp) 788{ 789 struct toepcb *toep; 790 791 /* 792 * XXX how do we handle teardown in the SYN_SENT state? 793 * 794 */ 795 INP_INFO_WLOCK(&tcbinfo); 796 inp_wlock_assert(tp->t_inpcb); 797 toep = tp->t_toe; 798 toep->tp_tp = NULL; 799 800 /* 801 * unhook from socket 802 */ 803 tp->t_flags &= ~TF_TOE; 804 tp->t_toe = NULL; 805 INP_INFO_WUNLOCK(&tcbinfo); 806} 807 808 809static struct toe_usrreqs cxgb_toe_usrreqs = { 810 .tu_disconnect = cxgb_toe_disconnect, 811 .tu_reset = cxgb_toe_reset, 812 .tu_send = cxgb_toe_send, 813 .tu_rcvd = cxgb_toe_rcvd, 814 .tu_detach = cxgb_toe_detach, 815 .tu_detach = cxgb_toe_detach, 816 .tu_syncache_event = handle_syncache_event, 817}; 818 819 820static void 821__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word, 822 uint64_t mask, uint64_t val, int no_reply) 823{ 824 struct cpl_set_tcb_field *req; 825 826 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 827 toep->tp_tid, word, mask, val); 828 829 req = mtod(m, struct cpl_set_tcb_field *); 830 m->m_pkthdr.len = m->m_len = sizeof(*req); 831 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 832 req->wr.wr_lo = 0; 833 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid)); 834 req->reply = V_NO_REPLY(no_reply); 835 req->cpu_idx = 0; 836 req->word = htons(word); 837 req->mask = htobe64(mask); 838 req->val = htobe64(val); 839 840 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 841 send_or_defer(toep, m, 0); 842} 843 844static void 845t3_set_tcb_field(struct socket *so, uint16_t word, uint64_t mask, uint64_t val) 846{ 847 struct mbuf *m; 848 struct tcpcb *tp = sototcpcb(so); 849 struct toepcb *toep = tp->t_toe; 850 851 if (toep == NULL) 852 return; 853 854 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) { 855 printf("not seting field\n"); 856 return; 857 } 858 859 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field)); 860 861 __set_tcb_field(toep, m, word, mask, val, 1); 862} 863 864/* 865 * Set one of the t_flags bits in the TCB. 866 */ 867static void 868set_tcb_tflag(struct socket *so, unsigned int bit_pos, int val) 869{ 870 t3_set_tcb_field(so, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); 871} 872 873/* 874 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting. 875 */ 876static void 877t3_set_nagle(struct socket *so) 878{ 879 struct tcpcb *tp = sototcpcb(so); 880 881 set_tcb_tflag(so, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); 882} 883 884/* 885 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting. 886 */ 887void 888t3_set_keepalive(struct socket *so, int on_off) 889{ 890 set_tcb_tflag(so, S_TF_KEEPALIVE, on_off); 891} 892 893void 894t3_set_rcv_coalesce_enable(struct socket *so, int on_off) 895{ 896 set_tcb_tflag(so, S_TF_RCV_COALESCE_ENABLE, on_off); 897} 898 899void 900t3_set_dack_mss(struct socket *so, int on_off) 901{ 902 set_tcb_tflag(so, S_TF_DACK_MSS, on_off); 903} 904 905/* 906 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting. 907 */ 908static void 909t3_set_tos(struct socket *so) 910{ 911 t3_set_tcb_field(so, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), 912 V_TCB_TOS(SO_TOS(so))); 913} 914 915 916/* 917 * In DDP mode, TP fails to schedule a timer to push RX data to the host when 918 * DDP is disabled (data is delivered to freelist). [Note that, the peer should 919 * set the PSH bit in the last segment, which would trigger delivery.] 920 * We work around the issue by setting a DDP buffer in a partial placed state, 921 * which guarantees that TP will schedule a timer. 922 */ 923#define TP_DDP_TIMER_WORKAROUND_MASK\ 924 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\ 925 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\ 926 V_TCB_RX_DDP_BUF0_LEN(3)) << 32)) 927#define TP_DDP_TIMER_WORKAROUND_VAL\ 928 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\ 929 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\ 930 32)) 931 932static void 933t3_enable_ddp(struct socket *so, int on) 934{ 935 if (on) { 936 937 t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), 938 V_TF_DDP_OFF(0)); 939 } else 940 t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, 941 V_TF_DDP_OFF(1) | 942 TP_DDP_TIMER_WORKAROUND_MASK, 943 V_TF_DDP_OFF(1) | 944 TP_DDP_TIMER_WORKAROUND_VAL); 945 946} 947 948void 949t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag_color) 950{ 951 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_TAG + buf_idx, 952 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), 953 tag_color); 954} 955 956void 957t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset, 958 unsigned int len) 959{ 960 if (buf_idx == 0) 961 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_OFFSET, 962 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 963 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 964 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) | 965 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 966 else 967 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF1_OFFSET, 968 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 969 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32), 970 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) | 971 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32)); 972} 973 974static int 975t3_set_cong_control(struct socket *so, const char *name) 976{ 977#ifdef CONGESTION_CONTROL_SUPPORTED 978 int cong_algo; 979 980 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++) 981 if (!strcmp(name, t3_cong_ops[cong_algo].name)) 982 break; 983 984 if (cong_algo >= ARRAY_SIZE(t3_cong_ops)) 985 return -EINVAL; 986#endif 987 return 0; 988} 989 990int 991t3_get_tcb(struct socket *so) 992{ 993 struct cpl_get_tcb *req; 994 struct tcpcb *tp = sototcpcb(so); 995 struct toepcb *toep = tp->t_toe; 996 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 997 998 if (!m) 999 return (ENOMEM); 1000 1001 inp_wlock_assert(tp->t_inpcb); 1002 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 1003 req = mtod(m, struct cpl_get_tcb *); 1004 m->m_pkthdr.len = m->m_len = sizeof(*req); 1005 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1006 req->wr.wr_lo = 0; 1007 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid)); 1008 req->cpuno = htons(toep->tp_qset); 1009 req->rsvd = 0; 1010 if (sototcpcb(so)->t_state == TCPS_SYN_SENT) 1011 mbufq_tail(&toep->out_of_order_queue, m); // defer 1012 else 1013 cxgb_ofld_send(T3C_DEV(so), m); 1014 return 0; 1015} 1016 1017static inline void 1018so_insert_tid(struct tom_data *d, struct socket *so, unsigned int tid) 1019{ 1020 struct toepcb *toep = sototoep(so); 1021 toepcb_hold(toep); 1022 1023 cxgb_insert_tid(d->cdev, d->client, toep, tid); 1024} 1025 1026/** 1027 * find_best_mtu - find the entry in the MTU table closest to an MTU 1028 * @d: TOM state 1029 * @mtu: the target MTU 1030 * 1031 * Returns the index of the value in the MTU table that is closest to but 1032 * does not exceed the target MTU. 1033 */ 1034static unsigned int 1035find_best_mtu(const struct t3c_data *d, unsigned short mtu) 1036{ 1037 int i = 0; 1038 1039 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) 1040 ++i; 1041 return (i); 1042} 1043 1044static unsigned int 1045select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu) 1046{ 1047 unsigned int idx; 1048 1049#ifdef notyet 1050 struct rtentry *dst = sotoinpcb(so)->inp_route.ro_rt; 1051#endif 1052 if (tp) { 1053 tp->t_maxseg = pmtu - 40; 1054 if (tp->t_maxseg < td->mtus[0] - 40) 1055 tp->t_maxseg = td->mtus[0] - 40; 1056 idx = find_best_mtu(td, tp->t_maxseg + 40); 1057 1058 tp->t_maxseg = td->mtus[idx] - 40; 1059 } else 1060 idx = find_best_mtu(td, pmtu); 1061 1062 return (idx); 1063} 1064 1065static inline void 1066free_atid(struct t3cdev *cdev, unsigned int tid) 1067{ 1068 struct toepcb *toep = cxgb_free_atid(cdev, tid); 1069 1070 if (toep) 1071 toepcb_release(toep); 1072} 1073 1074/* 1075 * Release resources held by an offload connection (TID, L2T entry, etc.) 1076 */ 1077static void 1078t3_release_offload_resources(struct toepcb *toep) 1079{ 1080 struct tcpcb *tp = toep->tp_tp; 1081 struct toedev *tdev = toep->tp_toedev; 1082 struct t3cdev *cdev; 1083 unsigned int tid = toep->tp_tid; 1084 1085 if (!tdev) 1086 return; 1087 1088 cdev = TOEP_T3C_DEV(toep); 1089 if (!cdev) 1090 return; 1091 1092 toep->tp_qset = 0; 1093 t3_release_ddp_resources(toep); 1094 1095#ifdef CTRL_SKB_CACHE 1096 kfree_skb(CTRL_SKB_CACHE(tp)); 1097 CTRL_SKB_CACHE(tp) = NULL; 1098#endif 1099 1100 if (toep->tp_wr_avail != toep->tp_wr_max) { 1101 purge_wr_queue(toep); 1102 reset_wr_list(toep); 1103 } 1104 1105 if (toep->tp_l2t) { 1106 l2t_release(L2DATA(cdev), toep->tp_l2t); 1107 toep->tp_l2t = NULL; 1108 } 1109 toep->tp_tp = NULL; 1110 if (tp) { 1111 inp_wlock_assert(tp->t_inpcb); 1112 tp->t_toe = NULL; 1113 tp->t_flags &= ~TF_TOE; 1114 } 1115 1116 if (toep->tp_state == TCPS_SYN_SENT) { 1117 free_atid(cdev, tid); 1118#ifdef notyet 1119 __skb_queue_purge(&tp->out_of_order_queue); 1120#endif 1121 } else { // we have TID 1122 cxgb_remove_tid(cdev, toep, tid); 1123 toepcb_release(toep); 1124 } 1125#if 0 1126 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state); 1127#endif 1128} 1129 1130static void 1131install_offload_ops(struct socket *so) 1132{ 1133 struct tcpcb *tp = sototcpcb(so); 1134 1135 KASSERT(tp->t_toe != NULL, ("toepcb not set")); 1136 1137 t3_install_socket_ops(so); 1138 tp->t_flags |= TF_TOE; 1139 tp->t_tu = &cxgb_toe_usrreqs; 1140} 1141 1142/* 1143 * Determine the receive window scaling factor given a target max 1144 * receive window. 1145 */ 1146static __inline int 1147select_rcv_wscale(int space) 1148{ 1149 int wscale = 0; 1150 1151 if (space > MAX_RCV_WND) 1152 space = MAX_RCV_WND; 1153 1154 if (tcp_do_rfc1323) 1155 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ; 1156 1157 return (wscale); 1158} 1159 1160/* 1161 * Determine the receive window size for a socket. 1162 */ 1163static unsigned long 1164select_rcv_wnd(struct toedev *dev, struct socket *so) 1165{ 1166 struct tom_data *d = TOM_DATA(dev); 1167 unsigned int wnd; 1168 unsigned int max_rcv_wnd; 1169 1170 if (tcp_do_autorcvbuf) 1171 wnd = tcp_autorcvbuf_max; 1172 else 1173 wnd = so->so_rcv.sb_hiwat; 1174 1175 1176 1177 /* XXX 1178 * For receive coalescing to work effectively we need a receive window 1179 * that can accomodate a coalesced segment. 1180 */ 1181 if (wnd < MIN_RCV_WND) 1182 wnd = MIN_RCV_WND; 1183 1184 /* PR 5138 */ 1185 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ? 1186 (uint32_t)d->rx_page_size * 23 : 1187 MAX_RCV_WND); 1188 1189 return min(wnd, max_rcv_wnd); 1190} 1191 1192/* 1193 * Assign offload parameters to some socket fields. This code is used by 1194 * both active and passive opens. 1195 */ 1196static inline void 1197init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, 1198 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep) 1199{ 1200 struct tcpcb *tp = sototcpcb(so); 1201 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev); 1202 1203 SOCK_LOCK_ASSERT(so); 1204 1205 printf("initializing offload socket\n"); 1206 /* 1207 * We either need to fix push frames to work with sbcompress 1208 * or we need to add this 1209 */ 1210 so->so_snd.sb_flags |= SB_NOCOALESCE; 1211 so->so_rcv.sb_flags |= SB_NOCOALESCE; 1212 1213 tp->t_toe = toep; 1214 toep->tp_tp = tp; 1215 toep->tp_toedev = dev; 1216 1217 toep->tp_tid = tid; 1218 toep->tp_l2t = e; 1219 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs); 1220 toep->tp_wr_unacked = 0; 1221 toep->tp_delack_mode = 0; 1222 1223 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu); 1224 /* 1225 * XXX broken 1226 * 1227 */ 1228 tp->rcv_wnd = select_rcv_wnd(dev, so); 1229 1230 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) && 1231 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 1232 toep->tp_qset_idx = 0; 1233 1234 reset_wr_list(toep); 1235 DPRINTF("initialization done\n"); 1236} 1237 1238/* 1239 * The next two functions calculate the option 0 value for a socket. 1240 */ 1241static inline unsigned int 1242calc_opt0h(struct socket *so, int mtu_idx) 1243{ 1244 struct tcpcb *tp = sototcpcb(so); 1245 int wscale = select_rcv_wscale(tp->rcv_wnd); 1246 1247 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) | 1248 V_KEEP_ALIVE((so->so_options & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS | 1249 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx); 1250} 1251 1252static inline unsigned int 1253calc_opt0l(struct socket *so, int ulp_mode) 1254{ 1255 struct tcpcb *tp = sototcpcb(so); 1256 unsigned int val; 1257 1258 val = V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) | 1259 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ)); 1260 1261 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", SO_TOS(so), tp->rcv_wnd, val); 1262 return (val); 1263} 1264 1265static inline unsigned int 1266calc_opt2(const struct socket *so, struct toedev *dev) 1267{ 1268 int flv_valid; 1269 1270 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1); 1271 1272 return (V_FLAVORS_VALID(flv_valid) | 1273 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0)); 1274} 1275 1276#if DEBUG_WR > 1 1277static int 1278count_pending_wrs(const struct toepcb *toep) 1279{ 1280 const struct mbuf *m; 1281 int n = 0; 1282 1283 wr_queue_walk(toep, m) 1284 n += m->m_pkthdr.csum_data; 1285 return (n); 1286} 1287#endif 1288 1289#if 0 1290(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1) 1291#endif 1292 1293static void 1294mk_act_open_req(struct socket *so, struct mbuf *m, 1295 unsigned int atid, const struct l2t_entry *e) 1296{ 1297 struct cpl_act_open_req *req; 1298 struct inpcb *inp = sotoinpcb(so); 1299 struct tcpcb *tp = intotcpcb(inp); 1300 struct toepcb *toep = tp->t_toe; 1301 struct toedev *tdev = TOE_DEV(so); 1302 1303 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep)); 1304 1305 req = mtod(m, struct cpl_act_open_req *); 1306 m->m_pkthdr.len = m->m_len = sizeof(*req); 1307 1308 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1309 req->wr.wr_lo = 0; 1310 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 1311 req->local_port = inp->inp_lport; 1312 req->peer_port = inp->inp_fport; 1313 memcpy(&req->local_ip, &inp->inp_laddr, 4); 1314 memcpy(&req->peer_ip, &inp->inp_faddr, 4); 1315 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) | 1316 V_TX_CHANNEL(e->smt_idx)); 1317 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode)); 1318 req->params = 0; 1319 req->opt2 = htonl(calc_opt2(so, tdev)); 1320} 1321 1322 1323/* 1324 * Convert an ACT_OPEN_RPL status to an errno. 1325 */ 1326static int 1327act_open_rpl_status_to_errno(int status) 1328{ 1329 switch (status) { 1330 case CPL_ERR_CONN_RESET: 1331 return (ECONNREFUSED); 1332 case CPL_ERR_ARP_MISS: 1333 return (EHOSTUNREACH); 1334 case CPL_ERR_CONN_TIMEDOUT: 1335 return (ETIMEDOUT); 1336 case CPL_ERR_TCAM_FULL: 1337 return (ENOMEM); 1338 case CPL_ERR_CONN_EXIST: 1339 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); 1340 return (EADDRINUSE); 1341 default: 1342 return (EIO); 1343 } 1344} 1345 1346static void 1347fail_act_open(struct toepcb *toep, int errno) 1348{ 1349 struct tcpcb *tp = toep->tp_tp; 1350 1351 t3_release_offload_resources(toep); 1352 if (tp) { 1353 inp_wlock_assert(tp->t_inpcb); 1354 tcp_drop(tp, errno); 1355 } 1356 1357#ifdef notyet 1358 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1359#endif 1360} 1361 1362/* 1363 * Handle active open failures. 1364 */ 1365static void 1366active_open_failed(struct toepcb *toep, struct mbuf *m) 1367{ 1368 struct cpl_act_open_rpl *rpl = cplhdr(m); 1369 struct inpcb *inp; 1370 1371 INP_INFO_WLOCK(&tcbinfo); 1372 if (toep->tp_tp == NULL) 1373 goto done; 1374 1375 inp = toep->tp_tp->t_inpcb; 1376 inp_wlock(inp); 1377 1378/* 1379 * Don't handle connection retry for now 1380 */ 1381#ifdef notyet 1382 struct inet_connection_sock *icsk = inet_csk(sk); 1383 1384 if (rpl->status == CPL_ERR_CONN_EXIST && 1385 icsk->icsk_retransmit_timer.function != act_open_retry_timer) { 1386 icsk->icsk_retransmit_timer.function = act_open_retry_timer; 1387 sk_reset_timer(so, &icsk->icsk_retransmit_timer, 1388 jiffies + HZ / 2); 1389 } else 1390#endif 1391 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status)); 1392 inp_wunlock(inp); 1393done: 1394 INP_INFO_WUNLOCK(&tcbinfo); 1395 m_free(m); 1396} 1397 1398/* 1399 * Return whether a failed active open has allocated a TID 1400 */ 1401static inline int 1402act_open_has_tid(int status) 1403{ 1404 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && 1405 status != CPL_ERR_ARP_MISS; 1406} 1407 1408/* 1409 * Process an ACT_OPEN_RPL CPL message. 1410 */ 1411static int 1412do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1413{ 1414 struct toepcb *toep = (struct toepcb *)ctx; 1415 struct cpl_act_open_rpl *rpl = cplhdr(m); 1416 1417 if (cdev->type != T3A && act_open_has_tid(rpl->status)) 1418 cxgb_queue_tid_release(cdev, GET_TID(rpl)); 1419 1420 active_open_failed(toep, m); 1421 return (0); 1422} 1423 1424/* 1425 * Handle an ARP failure for an active open. XXX purge ofo queue 1426 * 1427 * XXX badly broken for crossed SYNs as the ATID is no longer valid. 1428 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should 1429 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't 1430 * free the atid. Hmm. 1431 */ 1432#ifdef notyet 1433static void 1434act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) 1435{ 1436 struct toepcb *toep = m_get_toep(m); 1437 struct tcpcb *tp = toep->tp_tp; 1438 struct inpcb *inp = tp->t_inpcb; 1439 struct socket *so = toeptoso(toep); 1440 1441 inp_wlock(inp); 1442 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 1443 fail_act_open(so, EHOSTUNREACH); 1444 printf("freeing %p\n", m); 1445 1446 m_free(m); 1447 } 1448 inp_wunlock(inp); 1449} 1450#endif 1451/* 1452 * Send an active open request. 1453 */ 1454int 1455t3_connect(struct toedev *tdev, struct socket *so, 1456 struct rtentry *rt, struct sockaddr *nam) 1457{ 1458 struct mbuf *m; 1459 struct l2t_entry *e; 1460 struct tom_data *d = TOM_DATA(tdev); 1461 struct inpcb *inp = sotoinpcb(so); 1462 struct tcpcb *tp = intotcpcb(inp); 1463 struct toepcb *toep; /* allocated by init_offload_socket */ 1464 1465 int atid; 1466 1467 toep = toepcb_alloc(); 1468 if (toep == NULL) 1469 goto out_err; 1470 1471 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0) 1472 goto out_err; 1473 1474 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam); 1475 if (!e) 1476 goto free_tid; 1477 1478 inp_wlock_assert(inp); 1479 m = m_gethdr(MT_DATA, M_WAITOK); 1480 1481#if 0 1482 m->m_toe.mt_toepcb = tp->t_toe; 1483 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure); 1484#endif 1485 SOCK_LOCK(so); 1486 1487 init_offload_socket(so, tdev, atid, e, rt, toep); 1488 1489 install_offload_ops(so); 1490 1491 mk_act_open_req(so, m, atid, e); 1492 SOCK_UNLOCK(so); 1493 1494 soisconnecting(so); 1495 toep = tp->t_toe; 1496 m_set_toep(m, tp->t_toe); 1497 1498 toep->tp_state = TCPS_SYN_SENT; 1499 l2t_send(d->cdev, (struct mbuf *)m, e); 1500 1501 if (toep->tp_ulp_mode) 1502 t3_enable_ddp(so, 0); 1503 return (0); 1504 1505free_tid: 1506 printf("failing connect - free atid\n"); 1507 1508 free_atid(d->cdev, atid); 1509out_err: 1510 printf("return ENOMEM\n"); 1511 return (ENOMEM); 1512} 1513 1514/* 1515 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do 1516 * not send multiple ABORT_REQs for the same connection and also that we do 1517 * not try to send a message after the connection has closed. Returns 1 if 1518 * an ABORT_REQ wasn't generated after all, 0 otherwise. 1519 */ 1520static void 1521t3_send_reset(struct toepcb *toep) 1522{ 1523 1524 struct cpl_abort_req *req; 1525 unsigned int tid = toep->tp_tid; 1526 int mode = CPL_ABORT_SEND_RST; 1527 struct tcpcb *tp = toep->tp_tp; 1528 struct toedev *tdev = toep->tp_toedev; 1529 struct socket *so = NULL; 1530 struct mbuf *m; 1531 1532 if (tp) { 1533 inp_wlock_assert(tp->t_inpcb); 1534 so = toeptoso(toep); 1535 } 1536 1537 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) || 1538 tdev == NULL)) 1539 return; 1540 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN); 1541 1542 /* Purge the send queue so we don't send anything after an abort. */ 1543 if (so) 1544 sbflush(&so->so_snd); 1545 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev)) 1546 mode |= CPL_ABORT_POST_CLOSE_REQ; 1547 1548 m = m_gethdr_nofail(sizeof(*req)); 1549 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep)); 1550 set_arp_failure_handler(m, abort_arp_failure); 1551 1552 req = mtod(m, struct cpl_abort_req *); 1553 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); 1554 req->wr.wr_lo = htonl(V_WR_TID(tid)); 1555 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); 1556 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0; 1557 req->rsvd1 = !(toep->tp_flags & TP_DATASENT); 1558 req->cmd = mode; 1559 if (tp && (tp->t_state == TCPS_SYN_SENT)) 1560 mbufq_tail(&toep->out_of_order_queue, m); // defer 1561 else 1562 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); 1563} 1564 1565static int 1566t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) 1567{ 1568 struct inpcb *inp; 1569 int error, optval; 1570 1571 if (sopt->sopt_name == IP_OPTIONS) 1572 return (ENOPROTOOPT); 1573 1574 if (sopt->sopt_name != IP_TOS) 1575 return (EOPNOTSUPP); 1576 1577 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 1578 1579 if (error) 1580 return (error); 1581 1582 if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread)) 1583 return (EPERM); 1584 1585 inp = sotoinpcb(so); 1586 inp->inp_ip_tos = optval; 1587 1588 t3_set_tos(so); 1589 1590 return (0); 1591} 1592 1593static int 1594t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1595{ 1596 int err = 0; 1597 size_t copied; 1598 1599 if (sopt->sopt_name != TCP_CONGESTION && 1600 sopt->sopt_name != TCP_NODELAY) 1601 return (EOPNOTSUPP); 1602 1603 if (sopt->sopt_name == TCP_CONGESTION) { 1604 char name[TCP_CA_NAME_MAX]; 1605 int optlen = sopt->sopt_valsize; 1606 struct tcpcb *tp; 1607 1608 if (optlen < 1) 1609 return (EINVAL); 1610 1611 err = copyinstr(sopt->sopt_val, name, 1612 min(TCP_CA_NAME_MAX - 1, optlen), &copied); 1613 if (err) 1614 return (err); 1615 if (copied < 1) 1616 return (EINVAL); 1617 1618 tp = sototcpcb(so); 1619 /* 1620 * XXX I need to revisit this 1621 */ 1622 if ((err = t3_set_cong_control(so, name)) == 0) { 1623#ifdef CONGESTION_CONTROL_SUPPORTED 1624 tp->t_cong_control = strdup(name, M_CXGB); 1625#endif 1626 } else 1627 return (err); 1628 } else { 1629 int optval, oldval; 1630 struct inpcb *inp; 1631 struct tcpcb *tp; 1632 1633 err = sooptcopyin(sopt, &optval, sizeof optval, 1634 sizeof optval); 1635 1636 if (err) 1637 return (err); 1638 1639 inp = sotoinpcb(so); 1640 tp = intotcpcb(inp); 1641 1642 inp_wlock(inp); 1643 1644 oldval = tp->t_flags; 1645 if (optval) 1646 tp->t_flags |= TF_NODELAY; 1647 else 1648 tp->t_flags &= ~TF_NODELAY; 1649 inp_wunlock(inp); 1650 1651 if (oldval != tp->t_flags) 1652 t3_set_nagle(so); 1653 1654 } 1655 1656 return (0); 1657} 1658 1659static int 1660t3_ctloutput(struct socket *so, struct sockopt *sopt) 1661{ 1662 int err; 1663 1664 if (sopt->sopt_level != IPPROTO_TCP) 1665 err = t3_ip_ctloutput(so, sopt); 1666 else 1667 err = t3_tcp_ctloutput(so, sopt); 1668 1669 if (err != EOPNOTSUPP) 1670 return (err); 1671 1672 return (tcp_ctloutput(so, sopt)); 1673} 1674 1675/* 1676 * Returns true if we need to explicitly request RST when we receive new data 1677 * on an RX-closed connection. 1678 */ 1679static inline int 1680need_rst_on_excess_rx(const struct toepcb *toep) 1681{ 1682 return (1); 1683} 1684 1685/* 1686 * Handles Rx data that arrives in a state where the socket isn't accepting 1687 * new data. 1688 */ 1689static void 1690handle_excess_rx(struct toepcb *toep, struct mbuf *m) 1691{ 1692 1693 if (need_rst_on_excess_rx(toep) && !(toep->tp_flags & TP_ABORT_SHUTDOWN)) 1694 t3_send_reset(toep); 1695 m_freem(m); 1696} 1697 1698/* 1699 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE) 1700 * by getting the DDP offset from the TCB. 1701 */ 1702static void 1703tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) 1704{ 1705 struct ddp_state *q = &toep->tp_ddp_state; 1706 struct ddp_buf_state *bsp; 1707 struct cpl_get_tcb_rpl *hdr; 1708 unsigned int ddp_offset; 1709 struct socket *so; 1710 struct tcpcb *tp; 1711 1712 uint64_t t; 1713 __be64 *tcb; 1714 1715 so = toeptoso(toep); 1716 tp = toep->tp_tp; 1717 1718 inp_wlock_assert(tp->t_inpcb); 1719 SOCKBUF_LOCK(&so->so_rcv); 1720 1721 /* Note that we only accout for CPL_GET_TCB issued by the DDP code. We 1722 * really need a cookie in order to dispatch the RPLs. 1723 */ 1724 q->get_tcb_count--; 1725 1726 /* It is a possible that a previous CPL already invalidated UBUF DDP 1727 * and moved the cur_buf idx and hence no further processing of this 1728 * skb is required. However, the app might be sleeping on 1729 * !q->get_tcb_count and we need to wake it up. 1730 */ 1731 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) { 1732 struct socket *so = toeptoso(toep); 1733 1734 m_freem(m); 1735 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 1736 sorwakeup_locked(so); 1737 else 1738 SOCKBUF_UNLOCK(&so->so_rcv); 1739 return; 1740 } 1741 1742 bsp = &q->buf_state[q->cur_buf]; 1743 hdr = cplhdr(m); 1744 tcb = (__be64 *)(hdr + 1); 1745 if (q->cur_buf == 0) { 1746 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]); 1747 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET); 1748 } else { 1749 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]); 1750 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET; 1751 } 1752 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET; 1753 m->m_cur_offset = bsp->cur_offset; 1754 bsp->cur_offset = ddp_offset; 1755 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset; 1756 1757 CTR5(KTR_TOM, 1758 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u", 1759 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset); 1760 KASSERT(ddp_offset >= m->m_cur_offset, ("ddp_offset=%u less than cur_offset=%u", 1761 ddp_offset, m->m_cur_offset)); 1762 1763#ifdef T3_TRACE 1764 T3_TRACE3(TIDTB(so), 1765 "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u ddp_offset %u", 1766 tp->rcv_nxt, q->cur_buf, ddp_offset); 1767#endif 1768 1769#if 0 1770{ 1771 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx; 1772 1773 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]); 1774 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS; 1775 1776 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]); 1777 rcv_nxt = t >> S_TCB_RCV_NXT; 1778 rcv_nxt &= M_TCB_RCV_NXT; 1779 1780 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]); 1781 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET); 1782 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET; 1783 1784 T3_TRACE2(TIDTB(sk), 1785 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x", 1786 ddp_flags, rcv_nxt - rx_hdr_offset); 1787 T3_TRACE4(TB(q), 1788 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u", 1789 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf); 1790 T3_TRACE3(TB(q), 1791 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u", 1792 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset); 1793 T3_TRACE2(TB(q), 1794 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x", 1795 q->buf_state[0].flags, q->buf_state[1].flags); 1796 1797} 1798#endif 1799 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) { 1800 handle_excess_rx(toep, m); 1801 return; 1802 } 1803 1804#ifdef T3_TRACE 1805 if ((int)m->m_pkthdr.len < 0) { 1806 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len"); 1807 } 1808#endif 1809 if (bsp->flags & DDP_BF_NOCOPY) { 1810#ifdef T3_TRACE 1811 T3_TRACE0(TB(q), 1812 "tcb_rpl_as_ddp_complete: CANCEL UBUF"); 1813 1814 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) { 1815 printk("!cancel_ubuf"); 1816 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf"); 1817 } 1818#endif 1819 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1; 1820 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA); 1821 q->cur_buf ^= 1; 1822 } else if (bsp->flags & DDP_BF_NOFLIP) { 1823 1824 m->m_ddp_flags = 1; /* always a kernel buffer */ 1825 1826 /* now HW buffer carries a user buffer */ 1827 bsp->flags &= ~DDP_BF_NOFLIP; 1828 bsp->flags |= DDP_BF_NOCOPY; 1829 1830 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate 1831 * any new data in which case we're done. If in addition the 1832 * offset is 0, then there wasn't a completion for the kbuf 1833 * and we need to decrement the posted count. 1834 */ 1835 if (m->m_pkthdr.len == 0) { 1836 if (ddp_offset == 0) { 1837 q->kbuf_posted--; 1838 bsp->flags |= DDP_BF_NODATA; 1839 } 1840 SOCKBUF_UNLOCK(&so->so_rcv); 1841 1842 m_free(m); 1843 return; 1844 } 1845 } else { 1846 SOCKBUF_UNLOCK(&so->so_rcv); 1847 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP, 1848 * but it got here way late and nobody cares anymore. 1849 */ 1850 m_free(m); 1851 return; 1852 } 1853 1854 m->m_ddp_gl = (unsigned char *)bsp->gl; 1855 m->m_flags |= M_DDP; 1856 m->m_seq = tp->rcv_nxt; 1857 tp->rcv_nxt += m->m_pkthdr.len; 1858 tp->t_rcvtime = ticks; 1859#ifdef T3_TRACE 1860 T3_TRACE3(TB(q), 1861 "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u", 1862 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1863#endif 1864 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u", 1865 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1866 if (m->m_pkthdr.len == 0) 1867 q->user_ddp_pending = 0; 1868 else 1869 SBAPPEND(&so->so_rcv, m); 1870 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 1871 sorwakeup_locked(so); 1872 else 1873 SOCKBUF_UNLOCK(&so->so_rcv); 1874} 1875 1876/* 1877 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code, 1878 * in that case they are similar to DDP completions. 1879 */ 1880static int 1881do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1882{ 1883 struct toepcb *toep = (struct toepcb *)ctx; 1884 1885 /* OK if socket doesn't exist */ 1886 if (toep == NULL) { 1887 printf("null toep in do_get_tcb_rpl\n"); 1888 return (CPL_RET_BUF_DONE); 1889 } 1890 1891 inp_wlock(toep->tp_tp->t_inpcb); 1892 tcb_rpl_as_ddp_complete(toep, m); 1893 inp_wunlock(toep->tp_tp->t_inpcb); 1894 1895 return (0); 1896} 1897 1898static void 1899handle_ddp_data(struct toepcb *toep, struct mbuf *m) 1900{ 1901 struct tcpcb *tp = toep->tp_tp; 1902 struct socket *so = toeptoso(toep); 1903 struct ddp_state *q; 1904 struct ddp_buf_state *bsp; 1905 struct cpl_rx_data *hdr = cplhdr(m); 1906 unsigned int rcv_nxt = ntohl(hdr->seq); 1907 1908 if (tp->rcv_nxt == rcv_nxt) 1909 return; 1910 1911 inp_wlock_assert(tp->t_inpcb); 1912 SOCKBUF_LOCK(&so->so_rcv); 1913 q = &toep->tp_ddp_state; 1914 bsp = &q->buf_state[q->cur_buf]; 1915 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x", 1916 rcv_nxt, tp->rcv_nxt)); 1917 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 1918 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 1919 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d", 1920 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len); 1921 1922#ifdef T3_TRACE 1923 if ((int)m->m_pkthdr.len < 0) { 1924 t3_ddp_error(so, "handle_ddp_data: neg len"); 1925 } 1926#endif 1927 1928 m->m_ddp_gl = (unsigned char *)bsp->gl; 1929 m->m_flags |= M_DDP; 1930 m->m_cur_offset = bsp->cur_offset; 1931 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 1932 if (bsp->flags & DDP_BF_NOCOPY) 1933 bsp->flags &= ~DDP_BF_NOCOPY; 1934 1935 m->m_seq = tp->rcv_nxt; 1936 tp->rcv_nxt = rcv_nxt; 1937 bsp->cur_offset += m->m_pkthdr.len; 1938 if (!(bsp->flags & DDP_BF_NOFLIP)) 1939 q->cur_buf ^= 1; 1940 /* 1941 * For now, don't re-enable DDP after a connection fell out of DDP 1942 * mode. 1943 */ 1944 q->ubuf_ddp_ready = 0; 1945 SOCKBUF_UNLOCK(&so->so_rcv); 1946} 1947 1948/* 1949 * Process new data received for a connection. 1950 */ 1951static void 1952new_rx_data(struct toepcb *toep, struct mbuf *m) 1953{ 1954 struct cpl_rx_data *hdr = cplhdr(m); 1955 struct tcpcb *tp = toep->tp_tp; 1956 struct socket *so = toeptoso(toep); 1957 int len = be16toh(hdr->len); 1958 1959 inp_wlock(tp->t_inpcb); 1960 1961 if (__predict_false(so_no_receive(so))) { 1962 handle_excess_rx(toep, m); 1963 inp_wunlock(tp->t_inpcb); 1964 TRACE_EXIT; 1965 return; 1966 } 1967 1968 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) 1969 handle_ddp_data(toep, m); 1970 1971 m->m_seq = ntohl(hdr->seq); 1972 m->m_ulp_mode = 0; /* for iSCSI */ 1973 1974#if VALIDATE_SEQ 1975 if (__predict_false(m->m_seq != tp->rcv_nxt)) { 1976 log(LOG_ERR, 1977 "%s: TID %u: Bad sequence number %u, expected %u\n", 1978 TOE_DEV(toeptoso(toep))->name, toep->tp_tid, m->m_seq, 1979 tp->rcv_nxt); 1980 m_freem(m); 1981 inp_wunlock(tp->t_inpcb); 1982 return; 1983 } 1984#endif 1985 m_adj(m, sizeof(*hdr)); 1986 1987#ifdef URGENT_DATA_SUPPORTED 1988 /* 1989 * We don't handle urgent data yet 1990 */ 1991 if (__predict_false(hdr->urg)) 1992 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg)); 1993 if (__predict_false(tp->urg_data == TCP_URG_NOTYET && 1994 tp->urg_seq - tp->rcv_nxt < skb->len)) 1995 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq - 1996 tp->rcv_nxt]; 1997#endif 1998 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) { 1999 toep->tp_delack_mode = hdr->dack_mode; 2000 toep->tp_delack_seq = tp->rcv_nxt; 2001 } 2002 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d", 2003 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes); 2004 2005 if (len < m->m_pkthdr.len) 2006 m->m_pkthdr.len = m->m_len = len; 2007 2008 tp->rcv_nxt += m->m_pkthdr.len; 2009 tp->t_rcvtime = ticks; 2010 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2011#ifdef T3_TRACE 2012 T3_TRACE2(TIDTB(sk), 2013 "new_rx_data: seq 0x%x len %u", 2014 m->m_seq, m->m_pkthdr.len); 2015#endif 2016 inp_wunlock(tp->t_inpcb); 2017 SOCKBUF_LOCK(&so->so_rcv); 2018 if (sb_notify(&so->so_rcv)) 2019 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len); 2020 2021 SBAPPEND(&so->so_rcv, m); 2022 2023#ifdef notyet 2024 /* 2025 * We're giving too many credits to the card - but disable this check so we can keep on moving :-| 2026 * 2027 */ 2028 KASSERT(so->so_rcv.sb_cc < (so->so_rcv.sb_mbmax << 1), 2029 2030 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d", 2031 so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax)); 2032#endif 2033 2034 2035 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d", 2036 so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt); 2037 2038 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2039 sorwakeup_locked(so); 2040 else 2041 SOCKBUF_UNLOCK(&so->so_rcv); 2042} 2043 2044/* 2045 * Handler for RX_DATA CPL messages. 2046 */ 2047static int 2048do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2049{ 2050 struct toepcb *toep = (struct toepcb *)ctx; 2051 2052 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len); 2053 2054 new_rx_data(toep, m); 2055 2056 return (0); 2057} 2058 2059static void 2060new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) 2061{ 2062 struct tcpcb *tp; 2063 struct ddp_state *q; 2064 struct ddp_buf_state *bsp; 2065 struct cpl_rx_data_ddp *hdr; 2066 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx; 2067 struct socket *so = toeptoso(toep); 2068 int nomoredata = 0; 2069 unsigned int delack_mode; 2070 2071 tp = sototcpcb(so); 2072 2073 inp_wlock(tp->t_inpcb); 2074 if (__predict_false(so_no_receive(so))) { 2075 2076 handle_excess_rx(toep, m); 2077 inp_wunlock(tp->t_inpcb); 2078 return; 2079 } 2080 2081 q = &toep->tp_ddp_state; 2082 hdr = cplhdr(m); 2083 ddp_report = ntohl(hdr->u.ddp_report); 2084 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2085 bsp = &q->buf_state[buf_idx]; 2086 2087#ifdef T3_TRACE 2088 T3_TRACE5(TIDTB(sk), 2089 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2090 "hdr seq 0x%x len %u offset %u", 2091 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2092 ntohs(hdr->len), G_DDP_OFFSET(ddp_report)); 2093 T3_TRACE1(TIDTB(sk), 2094 "new_rx_data_ddp: ddp_report 0x%x", 2095 ddp_report); 2096#endif 2097 CTR4(KTR_TOM, 2098 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2099 "hdr seq 0x%x len %u", 2100 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2101 ntohs(hdr->len)); 2102 CTR3(KTR_TOM, 2103 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d", 2104 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx); 2105 2106 ddp_len = ntohs(hdr->len); 2107 rcv_nxt = ntohl(hdr->seq) + ddp_len; 2108 2109 delack_mode = G_DDP_DACK_MODE(ddp_report); 2110 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2111 toep->tp_delack_mode = delack_mode; 2112 toep->tp_delack_seq = tp->rcv_nxt; 2113 } 2114 2115 m->m_seq = tp->rcv_nxt; 2116 tp->rcv_nxt = rcv_nxt; 2117 2118 tp->t_rcvtime = ticks; 2119 /* 2120 * Store the length in m->m_len. We are changing the meaning of 2121 * m->m_len here, we need to be very careful that nothing from now on 2122 * interprets ->len of this packet the usual way. 2123 */ 2124 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq; 2125 inp_wunlock(tp->t_inpcb); 2126 CTR3(KTR_TOM, 2127 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ", 2128 m->m_len, rcv_nxt, m->m_seq); 2129 /* 2130 * Figure out where the new data was placed in the buffer and store it 2131 * in when. Assumes the buffer offset starts at 0, consumer needs to 2132 * account for page pod's pg_offset. 2133 */ 2134 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; 2135 m->m_cur_offset = end_offset - m->m_pkthdr.len; 2136 2137 SOCKBUF_LOCK(&so->so_rcv); 2138 m->m_ddp_gl = (unsigned char *)bsp->gl; 2139 m->m_flags |= M_DDP; 2140 bsp->cur_offset = end_offset; 2141 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2142 2143 /* 2144 * Length is only meaningful for kbuf 2145 */ 2146 if (!(bsp->flags & DDP_BF_NOCOPY)) 2147 KASSERT(m->m_len <= bsp->gl->dgl_length, 2148 ("length received exceeds ddp pages: len=%d dgl_length=%d", 2149 m->m_len, bsp->gl->dgl_length)); 2150 2151 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2152 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next)); 2153 2154 2155 /* 2156 * Bit 0 of flags stores whether the DDP buffer is completed. 2157 * Note that other parts of the code depend on this being in bit 0. 2158 */ 2159 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) { 2160 panic("spurious ddp completion"); 2161 } else { 2162 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); 2163 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) 2164 q->cur_buf ^= 1; /* flip buffers */ 2165 } 2166 2167 if (bsp->flags & DDP_BF_NOCOPY) { 2168 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY); 2169 bsp->flags &= ~DDP_BF_NOCOPY; 2170 } 2171 2172 if (ddp_report & F_DDP_PSH) 2173 m->m_ddp_flags |= DDP_BF_PSH; 2174 if (nomoredata) 2175 m->m_ddp_flags |= DDP_BF_NODATA; 2176 2177#ifdef notyet 2178 skb_reset_transport_header(skb); 2179 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */ 2180#endif 2181 SBAPPEND(&so->so_rcv, m); 2182 2183 if ((so->so_state & SS_NOFDREF) == 0) 2184 sorwakeup_locked(so); 2185 else 2186 SOCKBUF_UNLOCK(&so->so_rcv); 2187} 2188 2189#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ 2190 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ 2191 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ 2192 F_DDP_INVALID_PPOD) 2193 2194/* 2195 * Handler for RX_DATA_DDP CPL messages. 2196 */ 2197static int 2198do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2199{ 2200 struct toepcb *toep = ctx; 2201 const struct cpl_rx_data_ddp *hdr = cplhdr(m); 2202 2203 VALIDATE_SOCK(so); 2204 2205 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) { 2206 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n", 2207 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status))); 2208 return (CPL_RET_BUF_DONE); 2209 } 2210#if 0 2211 skb->h.th = tcphdr_skb->h.th; 2212#endif 2213 new_rx_data_ddp(toep, m); 2214 return (0); 2215} 2216 2217static void 2218process_ddp_complete(struct toepcb *toep, struct mbuf *m) 2219{ 2220 struct tcpcb *tp = toep->tp_tp; 2221 struct socket *so = toeptoso(toep); 2222 struct ddp_state *q; 2223 struct ddp_buf_state *bsp; 2224 struct cpl_rx_ddp_complete *hdr; 2225 unsigned int ddp_report, buf_idx, when, delack_mode; 2226 int nomoredata = 0; 2227 2228 inp_wlock(tp->t_inpcb); 2229 if (__predict_false(so_no_receive(so))) { 2230 struct inpcb *inp = sotoinpcb(so); 2231 2232 handle_excess_rx(toep, m); 2233 inp_wunlock(inp); 2234 return; 2235 } 2236 q = &toep->tp_ddp_state; 2237 hdr = cplhdr(m); 2238 ddp_report = ntohl(hdr->ddp_report); 2239 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2240 m->m_pkthdr.csum_data = tp->rcv_nxt; 2241 2242 2243 SOCKBUF_LOCK(&so->so_rcv); 2244 bsp = &q->buf_state[buf_idx]; 2245 when = bsp->cur_offset; 2246 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; 2247 tp->rcv_nxt += m->m_len; 2248 tp->t_rcvtime = ticks; 2249 2250 delack_mode = G_DDP_DACK_MODE(ddp_report); 2251 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2252 toep->tp_delack_mode = delack_mode; 2253 toep->tp_delack_seq = tp->rcv_nxt; 2254 } 2255#ifdef notyet 2256 skb_reset_transport_header(skb); 2257 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2258#endif 2259 inp_wunlock(tp->t_inpcb); 2260 2261 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2262 CTR5(KTR_TOM, 2263 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2264 "ddp_report 0x%x offset %u, len %u", 2265 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2266 G_DDP_OFFSET(ddp_report), m->m_len); 2267 2268 bsp->cur_offset += m->m_len; 2269 2270 if (!(bsp->flags & DDP_BF_NOFLIP)) { 2271 q->cur_buf ^= 1; /* flip buffers */ 2272 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length) 2273 nomoredata=1; 2274 } 2275 2276 CTR4(KTR_TOM, 2277 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2278 "ddp_report %u offset %u", 2279 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2280 G_DDP_OFFSET(ddp_report)); 2281 2282 m->m_ddp_gl = (unsigned char *)bsp->gl; 2283 m->m_flags |= M_DDP; 2284 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; 2285 if (bsp->flags & DDP_BF_NOCOPY) 2286 bsp->flags &= ~DDP_BF_NOCOPY; 2287 if (nomoredata) 2288 m->m_ddp_flags |= DDP_BF_NODATA; 2289 2290 2291 SBAPPEND(&so->so_rcv, m); 2292 2293 if ((so->so_state & SS_NOFDREF) == 0) 2294 sorwakeup_locked(so); 2295 else 2296 SOCKBUF_UNLOCK(&so->so_rcv); 2297} 2298 2299/* 2300 * Handler for RX_DDP_COMPLETE CPL messages. 2301 */ 2302static int 2303do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2304{ 2305 struct toepcb *toep = ctx; 2306 2307 VALIDATE_SOCK(so); 2308#if 0 2309 skb->h.th = tcphdr_skb->h.th; 2310#endif 2311 process_ddp_complete(toep, m); 2312 return (0); 2313} 2314 2315/* 2316 * Move a socket to TIME_WAIT state. We need to make some adjustments to the 2317 * socket state before calling tcp_time_wait to comply with its expectations. 2318 */ 2319static void 2320enter_timewait(struct socket *so) 2321{ 2322 struct tcpcb *tp = sototcpcb(so); 2323 2324 inp_wlock_assert(tp->t_inpcb); 2325 /* 2326 * Bump rcv_nxt for the peer FIN. We don't do this at the time we 2327 * process peer_close because we don't want to carry the peer FIN in 2328 * the socket's receive queue and if we increment rcv_nxt without 2329 * having the FIN in the receive queue we'll confuse facilities such 2330 * as SIOCINQ. 2331 */ 2332 tp->rcv_nxt++; 2333 2334 tp->ts_recent_age = 0; /* defeat recycling */ 2335 tp->t_srtt = 0; /* defeat tcp_update_metrics */ 2336 tcp_twstart(tp); 2337} 2338 2339/* 2340 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This 2341 * function deals with the data that may be reported along with the FIN. 2342 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to 2343 * perform normal FIN-related processing. In the latter case 1 indicates that 2344 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the 2345 * skb can be freed. 2346 */ 2347static int 2348handle_peer_close_data(struct socket *so, struct mbuf *m) 2349{ 2350 struct tcpcb *tp = sototcpcb(so); 2351 struct toepcb *toep = tp->t_toe; 2352 struct ddp_state *q; 2353 struct ddp_buf_state *bsp; 2354 struct cpl_peer_close *req = cplhdr(m); 2355 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */ 2356 2357 if (tp->rcv_nxt == rcv_nxt) /* no data */ 2358 return (0); 2359 2360 if (__predict_false(so_no_receive(so))) { 2361 handle_excess_rx(toep, m); 2362 2363 /* 2364 * Although we discard the data we want to process the FIN so 2365 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP + 2366 * PEER_CLOSE without data. In particular this PEER_CLOSE 2367 * may be what will close the connection. We return 1 because 2368 * handle_excess_rx() already freed the packet. 2369 */ 2370 return (1); 2371 } 2372 2373 inp_wlock_assert(tp->t_inpcb); 2374 q = &toep->tp_ddp_state; 2375 SOCKBUF_LOCK(&so->so_rcv); 2376 bsp = &q->buf_state[q->cur_buf]; 2377 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2378 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2379 m->m_ddp_gl = (unsigned char *)bsp->gl; 2380 m->m_flags |= M_DDP; 2381 m->m_cur_offset = bsp->cur_offset; 2382 m->m_ddp_flags = 2383 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2384 m->m_seq = tp->rcv_nxt; 2385 tp->rcv_nxt = rcv_nxt; 2386 bsp->cur_offset += m->m_pkthdr.len; 2387 if (!(bsp->flags & DDP_BF_NOFLIP)) 2388 q->cur_buf ^= 1; 2389#ifdef notyet 2390 skb_reset_transport_header(skb); 2391 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2392#endif 2393 tp->t_rcvtime = ticks; 2394 SBAPPEND(&so->so_rcv, m); 2395 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2396 sorwakeup_locked(so); 2397 else 2398 SOCKBUF_UNLOCK(&so->so_rcv); 2399 return (1); 2400} 2401 2402/* 2403 * Handle a peer FIN. 2404 */ 2405static void 2406do_peer_fin(struct socket *so, struct mbuf *m) 2407{ 2408 struct tcpcb *tp = sototcpcb(so); 2409 struct toepcb *toep = tp->t_toe; 2410 int keep = 0; 2411 DPRINTF("do_peer_fin state=%d\n", tp->t_state); 2412 2413#ifdef T3_TRACE 2414 T3_TRACE0(TIDTB(sk),"do_peer_fin:"); 2415#endif 2416 2417 if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2418 printf("abort_pending set\n"); 2419 2420 goto out; 2421 } 2422 INP_INFO_WLOCK(&tcbinfo); 2423 inp_wlock(tp->t_inpcb); 2424 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) { 2425 keep = handle_peer_close_data(so, m); 2426 if (keep < 0) { 2427 INP_INFO_WUNLOCK(&tcbinfo); 2428 inp_wunlock(tp->t_inpcb); 2429 return; 2430 } 2431 } 2432 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 2433 socantrcvmore(so); 2434 /* 2435 * If connection is half-synchronized 2436 * (ie NEEDSYN flag on) then delay ACK, 2437 * so it may be piggybacked when SYN is sent. 2438 * Otherwise, since we received a FIN then no 2439 * more input can be expected, send ACK now. 2440 */ 2441 if (tp->t_flags & TF_NEEDSYN) 2442 tp->t_flags |= TF_DELACK; 2443 else 2444 tp->t_flags |= TF_ACKNOW; 2445 tp->rcv_nxt++; 2446 } 2447 2448 switch (tp->t_state) { 2449 case TCPS_SYN_RECEIVED: 2450 tp->t_starttime = ticks; 2451 /* FALLTHROUGH */ 2452 case TCPS_ESTABLISHED: 2453 tp->t_state = TCPS_CLOSE_WAIT; 2454 break; 2455 case TCPS_FIN_WAIT_1: 2456 tp->t_state = TCPS_CLOSING; 2457 break; 2458 case TCPS_FIN_WAIT_2: 2459 /* 2460 * If we've sent an abort_req we must have sent it too late, 2461 * HW will send us a reply telling us so, and this peer_close 2462 * is really the last message for this connection and needs to 2463 * be treated as an abort_rpl, i.e., transition the connection 2464 * to TCP_CLOSE (note that the host stack does this at the 2465 * time of generating the RST but we must wait for HW). 2466 * Otherwise we enter TIME_WAIT. 2467 */ 2468 t3_release_offload_resources(toep); 2469 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2470 tp = tcp_close(tp); 2471 } else { 2472 enter_timewait(so); 2473 } 2474 break; 2475 default: 2476 log(LOG_ERR, 2477 "%s: TID %u received PEER_CLOSE in bad state %d\n", 2478 TOE_DEV(so)->tod_name, toep->tp_tid, tp->t_state); 2479 } 2480 INP_INFO_WUNLOCK(&tcbinfo); 2481 if (tp) 2482 inp_wunlock(tp->t_inpcb); 2483 2484 DPRINTF("waking up waiters on %p rcv_notify=%d flags=0x%x\n", so, sb_notify(&so->so_rcv), so->so_rcv.sb_flags); 2485 2486#ifdef notyet 2487 /* Do not send POLL_HUP for half duplex close. */ 2488 if ((sk->sk_shutdown & SEND_SHUTDOWN) || 2489 sk->sk_state == TCP_CLOSE) 2490 sk_wake_async(so, 1, POLL_HUP); 2491 else 2492 sk_wake_async(so, 1, POLL_IN); 2493#endif 2494 2495out: 2496 if (!keep) 2497 m_free(m); 2498} 2499 2500/* 2501 * Handler for PEER_CLOSE CPL messages. 2502 */ 2503static int 2504do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2505{ 2506 struct toepcb *toep = (struct toepcb *)ctx; 2507 struct socket *so = toeptoso(toep); 2508 2509 VALIDATE_SOCK(so); 2510 2511 do_peer_fin(so, m); 2512 return (0); 2513} 2514 2515static void 2516process_close_con_rpl(struct socket *so, struct mbuf *m) 2517{ 2518 struct tcpcb *tp = sototcpcb(so); 2519 struct cpl_close_con_rpl *rpl = cplhdr(m); 2520 struct toepcb *toep = tp->t_toe; 2521 2522 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ 2523 2524 DPRINTF("process_close_con_rpl(%p) state=%d dead=%d\n", so, tp->t_state, 2525 !!(so->so_state & SS_NOFDREF)); 2526 if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) 2527 goto out; 2528 2529 INP_INFO_WLOCK(&tcbinfo); 2530 inp_wlock(tp->t_inpcb); 2531 switch (tp->t_state) { 2532 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */ 2533 t3_release_offload_resources(toep); 2534 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2535 tp = tcp_close(tp); 2536 2537 } else { 2538 enter_timewait(so); 2539 soisdisconnected(so); 2540 } 2541 break; 2542 case TCPS_LAST_ACK: 2543 /* 2544 * In this state we don't care about pending abort_rpl. 2545 * If we've sent abort_req it was post-close and was sent too 2546 * late, this close_con_rpl is the actual last message. 2547 */ 2548 t3_release_offload_resources(toep); 2549 tp = tcp_close(tp); 2550 break; 2551 case TCPS_FIN_WAIT_1: 2552 /* 2553 * If we can't receive any more 2554 * data, then closing user can proceed. 2555 * Starting the timer is contrary to the 2556 * specification, but if we don't get a FIN 2557 * we'll hang forever. 2558 * 2559 * XXXjl: 2560 * we should release the tp also, and use a 2561 * compressed state. 2562 */ 2563 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { 2564 int timeout; 2565 2566 soisdisconnected(so); 2567 timeout = (tcp_fast_finwait2_recycle) ? 2568 tcp_finwait2_timeout : tcp_maxidle; 2569 tcp_timer_activate(tp, TT_2MSL, timeout); 2570 } 2571 tp->t_state = TCPS_FIN_WAIT_2; 2572 if ((so->so_options & SO_LINGER) && so->so_linger == 0 && 2573 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) { 2574 tp = tcp_drop(tp, 0); 2575 } 2576 2577 break; 2578 default: 2579 log(LOG_ERR, 2580 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", 2581 TOE_DEV(so)->tod_name, toep->tp_tid, 2582 tp->t_state); 2583 } 2584 INP_INFO_WUNLOCK(&tcbinfo); 2585 if (tp) 2586 inp_wunlock(tp->t_inpcb); 2587out: 2588 m_freem(m); 2589} 2590 2591/* 2592 * Handler for CLOSE_CON_RPL CPL messages. 2593 */ 2594static int 2595do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, 2596 void *ctx) 2597{ 2598 struct toepcb *toep = (struct toepcb *)ctx; 2599 struct socket *so = toeptoso(toep); 2600 2601 VALIDATE_SOCK(so); 2602 2603 process_close_con_rpl(so, m); 2604 return (0); 2605} 2606 2607/* 2608 * Process abort replies. We only process these messages if we anticipate 2609 * them as the coordination between SW and HW in this area is somewhat lacking 2610 * and sometimes we get ABORT_RPLs after we are done with the connection that 2611 * originated the ABORT_REQ. 2612 */ 2613static void 2614process_abort_rpl(struct socket *so, struct mbuf *m) 2615{ 2616 struct tcpcb *tp = sototcpcb(so); 2617 struct toepcb *toep = tp->t_toe; 2618 2619#ifdef T3_TRACE 2620 T3_TRACE1(TIDTB(sk), 2621 "process_abort_rpl: GTS rpl pending %d", 2622 sock_flag(sk, ABORT_RPL_PENDING)); 2623#endif 2624 2625 INP_INFO_WLOCK(&tcbinfo); 2626 inp_wlock(tp->t_inpcb); 2627 2628 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2629 /* 2630 * XXX panic on tcpdrop 2631 */ 2632 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(TOE_DEV(so))) 2633 toep->tp_flags |= TP_ABORT_RPL_RCVD; 2634 else { 2635 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING); 2636 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) || 2637 !is_t3a(TOE_DEV(so))) { 2638 if (toep->tp_flags & TP_ABORT_REQ_RCVD) 2639 panic("TP_ABORT_REQ_RCVD set"); 2640 t3_release_offload_resources(toep); 2641 tp = tcp_close(tp); 2642 } 2643 } 2644 } 2645 if (tp) 2646 inp_wunlock(tp->t_inpcb); 2647 INP_INFO_WUNLOCK(&tcbinfo); 2648 2649 m_free(m); 2650} 2651 2652/* 2653 * Handle an ABORT_RPL_RSS CPL message. 2654 */ 2655static int 2656do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2657{ 2658 struct socket *so; 2659 struct cpl_abort_rpl_rss *rpl = cplhdr(m); 2660 struct toepcb *toep; 2661 2662 /* 2663 * Ignore replies to post-close aborts indicating that the abort was 2664 * requested too late. These connections are terminated when we get 2665 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss 2666 * arrives the TID is either no longer used or it has been recycled. 2667 */ 2668 if (rpl->status == CPL_ERR_ABORT_FAILED) { 2669discard: 2670 m_free(m); 2671 return (0); 2672 } 2673 2674 toep = (struct toepcb *)ctx; 2675 2676 /* 2677 * Sometimes we've already closed the socket, e.g., a post-close 2678 * abort races with ABORT_REQ_RSS, the latter frees the socket 2679 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, 2680 * but FW turns the ABORT_REQ into a regular one and so we get 2681 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A. 2682 */ 2683 if (!toep) 2684 goto discard; 2685 2686 if (toep->tp_tp == NULL) { 2687 printf("removing tid for abort\n"); 2688 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2689 if (toep->tp_l2t) 2690 l2t_release(L2DATA(cdev), toep->tp_l2t); 2691 2692 toepcb_release(toep); 2693 goto discard; 2694 } 2695 2696 printf("toep=%p\n", toep); 2697 printf("tp=%p\n", toep->tp_tp); 2698 2699 so = toeptoso(toep); /* <- XXX panic */ 2700 toepcb_hold(toep); 2701 process_abort_rpl(so, m); 2702 toepcb_release(toep); 2703 return (0); 2704} 2705 2706/* 2707 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also 2708 * indicate whether RST should be sent in response. 2709 */ 2710static int 2711abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst) 2712{ 2713 struct tcpcb *tp = sototcpcb(so); 2714 2715 switch (abort_reason) { 2716 case CPL_ERR_BAD_SYN: 2717#if 0 2718 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through 2719#endif 2720 case CPL_ERR_CONN_RESET: 2721 // XXX need to handle SYN_RECV due to crossed SYNs 2722 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 2723 case CPL_ERR_XMIT_TIMEDOUT: 2724 case CPL_ERR_PERSIST_TIMEDOUT: 2725 case CPL_ERR_FINWAIT2_TIMEDOUT: 2726 case CPL_ERR_KEEPALIVE_TIMEDOUT: 2727#if 0 2728 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); 2729#endif 2730 return (ETIMEDOUT); 2731 default: 2732 return (EIO); 2733 } 2734} 2735 2736static inline void 2737set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd) 2738{ 2739 struct cpl_abort_rpl *rpl = cplhdr(m); 2740 2741 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); 2742 rpl->wr.wr_lo = htonl(V_WR_TID(tid)); 2743 m->m_len = m->m_pkthdr.len = sizeof(*rpl); 2744 2745 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); 2746 rpl->cmd = cmd; 2747} 2748 2749static void 2750send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m) 2751{ 2752 struct mbuf *reply_mbuf; 2753 struct cpl_abort_req_rss *req = cplhdr(m); 2754 2755 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl)); 2756 m_set_priority(m, CPL_PRIORITY_DATA); 2757 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl); 2758 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status); 2759 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2760 m_free(m); 2761} 2762 2763/* 2764 * Returns whether an ABORT_REQ_RSS message is a negative advice. 2765 */ 2766static inline int 2767is_neg_adv_abort(unsigned int status) 2768{ 2769 return status == CPL_ERR_RTX_NEG_ADVICE || 2770 status == CPL_ERR_PERSIST_NEG_ADVICE; 2771} 2772 2773static void 2774send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status) 2775{ 2776 struct mbuf *reply_mbuf; 2777 struct cpl_abort_req_rss *req = cplhdr(m); 2778 2779 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 2780 2781 if (!reply_mbuf) { 2782 /* Defer the reply. Stick rst_status into req->cmd. */ 2783 req->status = rst_status; 2784 t3_defer_reply(m, tdev, send_deferred_abort_rpl); 2785 return; 2786 } 2787 2788 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA); 2789 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status); 2790 m_free(m); 2791 2792 /* 2793 * XXX need to sync with ARP as for SYN_RECV connections we can send 2794 * these messages while ARP is pending. For other connection states 2795 * it's not a problem. 2796 */ 2797 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2798} 2799 2800#ifdef notyet 2801static void 2802cleanup_syn_rcv_conn(struct socket *child, struct socket *parent) 2803{ 2804 CXGB_UNIMPLEMENTED(); 2805#ifdef notyet 2806 struct request_sock *req = child->sk_user_data; 2807 2808 inet_csk_reqsk_queue_removed(parent, req); 2809 synq_remove(tcp_sk(child)); 2810 __reqsk_free(req); 2811 child->sk_user_data = NULL; 2812#endif 2813} 2814 2815 2816/* 2817 * Performs the actual work to abort a SYN_RECV connection. 2818 */ 2819static void 2820do_abort_syn_rcv(struct socket *child, struct socket *parent) 2821{ 2822 struct tcpcb *parenttp = sototcpcb(parent); 2823 struct tcpcb *childtp = sototcpcb(child); 2824 2825 /* 2826 * If the server is still open we clean up the child connection, 2827 * otherwise the server already did the clean up as it was purging 2828 * its SYN queue and the skb was just sitting in its backlog. 2829 */ 2830 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { 2831 cleanup_syn_rcv_conn(child, parent); 2832 INP_INFO_WLOCK(&tcbinfo); 2833 inp_wlock(childtp->t_inpcb); 2834 t3_release_offload_resources(childtp->t_toe); 2835 childtp = tcp_close(childtp); 2836 INP_INFO_WUNLOCK(&tcbinfo); 2837 if (childtp) 2838 inp_wunlock(childtp->t_inpcb); 2839 } 2840} 2841#endif 2842 2843/* 2844 * Handle abort requests for a SYN_RECV connection. These need extra work 2845 * because the socket is on its parent's SYN queue. 2846 */ 2847static int 2848abort_syn_rcv(struct socket *so, struct mbuf *m) 2849{ 2850 CXGB_UNIMPLEMENTED(); 2851#ifdef notyet 2852 struct socket *parent; 2853 struct toedev *tdev = TOE_DEV(so); 2854 struct t3cdev *cdev = TOM_DATA(tdev)->cdev; 2855 struct socket *oreq = so->so_incomp; 2856 struct t3c_tid_entry *t3c_stid; 2857 struct tid_info *t; 2858 2859 if (!oreq) 2860 return -1; /* somehow we are not on the SYN queue */ 2861 2862 t = &(T3C_DATA(cdev))->tid_maps; 2863 t3c_stid = lookup_stid(t, oreq->ts_recent); 2864 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 2865 2866 SOCK_LOCK(parent); 2867 do_abort_syn_rcv(so, parent); 2868 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST); 2869 SOCK_UNLOCK(parent); 2870#endif 2871 return (0); 2872} 2873 2874/* 2875 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this 2876 * request except that we need to reply to it. 2877 */ 2878static void 2879process_abort_req(struct socket *so, struct mbuf *m, struct toedev *tdev) 2880{ 2881 int rst_status = CPL_ABORT_NO_RST; 2882 const struct cpl_abort_req_rss *req = cplhdr(m); 2883 struct tcpcb *tp = sototcpcb(so); 2884 struct toepcb *toep = tp->t_toe; 2885 2886 inp_wlock(tp->t_inpcb); 2887 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) { 2888 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN); 2889 m_free(m); 2890 goto skip; 2891 } 2892 2893 toep->tp_flags &= ~TP_ABORT_REQ_RCVD; 2894 /* 2895 * Three cases to consider: 2896 * a) We haven't sent an abort_req; close the connection. 2897 * b) We have sent a post-close abort_req that will get to TP too late 2898 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will 2899 * be ignored and the connection should be closed now. 2900 * c) We have sent a regular abort_req that will get to TP too late. 2901 * That will generate an abort_rpl with status 0, wait for it. 2902 */ 2903 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) || 2904 (is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) { 2905 so->so_error = abort_status_to_errno(so, req->status, 2906 &rst_status); 2907 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2908 sorwakeup(so); 2909 /* 2910 * SYN_RECV needs special processing. If abort_syn_rcv() 2911 * returns 0 is has taken care of the abort. 2912 */ 2913 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m)) 2914 goto skip; 2915 2916 t3_release_offload_resources(toep); 2917 tp = tcp_close(tp); 2918 } 2919 if (tp) 2920 inp_wunlock(tp->t_inpcb); 2921 send_abort_rpl(m, tdev, rst_status); 2922 return; 2923 2924skip: 2925 inp_wunlock(tp->t_inpcb); 2926} 2927 2928/* 2929 * Handle an ABORT_REQ_RSS CPL message. 2930 */ 2931static int 2932do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2933{ 2934 const struct cpl_abort_req_rss *req = cplhdr(m); 2935 struct toepcb *toep = (struct toepcb *)ctx; 2936 struct socket *so; 2937 struct inpcb *inp; 2938 2939 if (is_neg_adv_abort(req->status)) { 2940 m_free(m); 2941 return (0); 2942 } 2943 2944 printf("aborting tid=%d\n", toep->tp_tid); 2945 2946 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) { 2947 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2948 toep->tp_flags |= TP_ABORT_REQ_RCVD; 2949 printf("sending abort rpl\n"); 2950 2951 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST); 2952 printf("sent\n"); 2953 if (toep->tp_l2t) 2954 l2t_release(L2DATA(cdev), toep->tp_l2t); 2955 2956 /* 2957 * Unhook 2958 */ 2959 toep->tp_tp->t_toe = NULL; 2960 toep->tp_tp->t_flags &= ~TF_TOE; 2961 toep->tp_tp = NULL; 2962 /* 2963 * XXX need to call syncache_chkrst - but we don't 2964 * have a way of doing that yet 2965 */ 2966 toepcb_release(toep); 2967 printf("abort for unestablished connection :-(\n"); 2968 return (0); 2969 } 2970 if (toep->tp_tp == NULL) { 2971 printf("disconnected toepcb\n"); 2972 /* should be freed momentarily */ 2973 return (0); 2974 } 2975 2976 so = toeptoso(toep); 2977 inp = sotoinpcb(so); 2978 2979 VALIDATE_SOCK(so); 2980 toepcb_hold(toep); 2981 INP_INFO_WLOCK(&tcbinfo); 2982 process_abort_req(so, m, TOE_DEV(so)); 2983 INP_INFO_WUNLOCK(&tcbinfo); 2984 toepcb_release(toep); 2985 return (0); 2986} 2987#ifdef notyet 2988static void 2989pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m) 2990{ 2991 struct toedev *tdev = TOE_DEV(parent); 2992 2993 do_abort_syn_rcv(child, parent); 2994 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) { 2995 struct cpl_pass_accept_rpl *rpl = cplhdr(m); 2996 2997 rpl->opt0h = htonl(F_TCAM_BYPASS); 2998 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 2999 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3000 } else 3001 m_free(m); 3002} 3003#endif 3004static void 3005handle_pass_open_arp_failure(struct socket *so, struct mbuf *m) 3006{ 3007 CXGB_UNIMPLEMENTED(); 3008 3009#ifdef notyet 3010 struct t3cdev *cdev; 3011 struct socket *parent; 3012 struct socket *oreq; 3013 struct t3c_tid_entry *t3c_stid; 3014 struct tid_info *t; 3015 struct tcpcb *otp, *tp = sototcpcb(so); 3016 struct toepcb *toep = tp->t_toe; 3017 3018 /* 3019 * If the connection is being aborted due to the parent listening 3020 * socket going away there's nothing to do, the ABORT_REQ will close 3021 * the connection. 3022 */ 3023 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 3024 m_free(m); 3025 return; 3026 } 3027 3028 oreq = so->so_incomp; 3029 otp = sototcpcb(oreq); 3030 3031 cdev = T3C_DEV(so); 3032 t = &(T3C_DATA(cdev))->tid_maps; 3033 t3c_stid = lookup_stid(t, otp->ts_recent); 3034 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 3035 3036 SOCK_LOCK(parent); 3037 pass_open_abort(so, parent, m); 3038 SOCK_UNLOCK(parent); 3039#endif 3040} 3041 3042/* 3043 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly 3044 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV 3045 * connection. 3046 */ 3047static void 3048pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m) 3049{ 3050 3051#ifdef notyet 3052 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3053 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk); 3054#endif 3055 handle_pass_open_arp_failure(m_get_socket(m), m); 3056} 3057 3058/* 3059 * Populate a reject CPL_PASS_ACCEPT_RPL WR. 3060 */ 3061static void 3062mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf) 3063{ 3064 struct cpl_pass_accept_req *req = cplhdr(req_mbuf); 3065 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf); 3066 unsigned int tid = GET_TID(req); 3067 3068 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP); 3069 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3070 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3071 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet 3072 rpl->opt0h = htonl(F_TCAM_BYPASS); 3073 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3074 rpl->opt2 = 0; 3075 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3076} 3077 3078/* 3079 * Send a deferred reject to an accept request. 3080 */ 3081static void 3082reject_pass_request(struct toedev *tdev, struct mbuf *m) 3083{ 3084 struct mbuf *reply_mbuf; 3085 3086 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl)); 3087 mk_pass_accept_rpl(reply_mbuf, m); 3088 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 3089 m_free(m); 3090} 3091 3092static void 3093handle_syncache_event(int event, void *arg) 3094{ 3095 struct toepcb *toep = arg; 3096 3097 switch (event) { 3098 case TOE_SC_ENTRY_PRESENT: 3099 /* 3100 * entry already exists - free toepcb 3101 * and l2t 3102 */ 3103 printf("syncache entry present\n"); 3104 toepcb_release(toep); 3105 break; 3106 case TOE_SC_DROP: 3107 /* 3108 * The syncache has given up on this entry 3109 * either it timed out, or it was evicted 3110 * we need to explicitly release the tid 3111 */ 3112 printf("syncache entry dropped\n"); 3113 toepcb_release(toep); 3114 break; 3115 default: 3116 log(LOG_ERR, "unknown syncache event %d\n", event); 3117 break; 3118 } 3119} 3120 3121static void 3122syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep) 3123{ 3124 struct in_conninfo inc; 3125 struct tcpopt to; 3126 struct tcphdr th; 3127 struct inpcb *inp; 3128 int mss, wsf, sack, ts; 3129 uint32_t rcv_isn = ntohl(req->rcv_isn); 3130 3131 bzero(&to, sizeof(struct tcpopt)); 3132 inp = sotoinpcb(lso); 3133 3134 /* 3135 * Fill out information for entering us into the syncache 3136 */ 3137 inc.inc_fport = th.th_sport = req->peer_port; 3138 inc.inc_lport = th.th_dport = req->local_port; 3139 th.th_seq = req->rcv_isn; 3140 th.th_flags = TH_SYN; 3141 3142 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1; 3143 3144 3145 inc.inc_isipv6 = 0; 3146 inc.inc_len = 0; 3147 inc.inc_faddr.s_addr = req->peer_ip; 3148 inc.inc_laddr.s_addr = req->local_ip; 3149 3150 DPRINTF("syncache add of %d:%d %d:%d\n", 3151 ntohl(req->local_ip), ntohs(req->local_port), 3152 ntohl(req->peer_ip), ntohs(req->peer_port)); 3153 3154 mss = req->tcp_options.mss; 3155 wsf = req->tcp_options.wsf; 3156 ts = req->tcp_options.tstamp; 3157 sack = req->tcp_options.sack; 3158 to.to_mss = mss; 3159 to.to_wscale = wsf; 3160 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3161 INP_INFO_WLOCK(&tcbinfo); 3162 inp_wlock(inp); 3163 syncache_offload_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep); 3164} 3165 3166 3167/* 3168 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket 3169 * lock held. Note that the sock here is a listening socket that is not owned 3170 * by the TOE. 3171 */ 3172static void 3173process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev, 3174 struct listen_ctx *lctx) 3175{ 3176 int rt_flags; 3177 struct l2t_entry *e; 3178 struct iff_mac tim; 3179 struct mbuf *reply_mbuf, *ddp_mbuf = NULL; 3180 struct cpl_pass_accept_rpl *rpl; 3181 struct cpl_pass_accept_req *req = cplhdr(m); 3182 unsigned int tid = GET_TID(req); 3183 struct tom_data *d = TOM_DATA(tdev); 3184 struct t3cdev *cdev = d->cdev; 3185 struct tcpcb *tp = sototcpcb(so); 3186 struct toepcb *newtoep; 3187 struct rtentry *dst; 3188 struct sockaddr_in nam; 3189 struct t3c_data *td = T3C_DATA(cdev); 3190 3191 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3192 if (__predict_false(reply_mbuf == NULL)) { 3193 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3194 t3_defer_reply(m, tdev, reject_pass_request); 3195 else { 3196 cxgb_queue_tid_release(cdev, tid); 3197 m_free(m); 3198 } 3199 DPRINTF("failed to get reply_mbuf\n"); 3200 3201 goto out; 3202 } 3203 3204 if (tp->t_state != TCPS_LISTEN) { 3205 DPRINTF("socket not in listen state\n"); 3206 3207 goto reject; 3208 } 3209 3210 tim.mac_addr = req->dst_mac; 3211 tim.vlan_tag = ntohs(req->vlan_tag); 3212 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { 3213 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n"); 3214 goto reject; 3215 } 3216 3217#ifdef notyet 3218 /* 3219 * XXX do route lookup to confirm that we're still listening on this 3220 * address 3221 */ 3222 if (ip_route_input(skb, req->local_ip, req->peer_ip, 3223 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev)) 3224 goto reject; 3225 rt_flags = ((struct rtable *)skb->dst)->rt_flags & 3226 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); 3227 dst_release(skb->dst); // done with the input route, release it 3228 skb->dst = NULL; 3229 3230 if ((rt_flags & RTF_LOCAL) == 0) 3231 goto reject; 3232#endif 3233 /* 3234 * XXX 3235 */ 3236 rt_flags = RTF_LOCAL; 3237 if ((rt_flags & RTF_LOCAL) == 0) 3238 goto reject; 3239 3240 /* 3241 * Calculate values and add to syncache 3242 */ 3243 3244 newtoep = toepcb_alloc(); 3245 if (newtoep == NULL) 3246 goto reject; 3247 3248 bzero(&nam, sizeof(struct sockaddr_in)); 3249 3250 nam.sin_len = sizeof(struct sockaddr_in); 3251 nam.sin_family = AF_INET; 3252 nam.sin_addr.s_addr =req->peer_ip; 3253 dst = rtalloc2((struct sockaddr *)&nam, 1, 0); 3254 3255 if (dst == NULL) { 3256 printf("failed to find route\n"); 3257 goto reject; 3258 } 3259 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev, 3260 (struct sockaddr *)&nam); 3261 if (e == NULL) { 3262 DPRINTF("failed to get l2t\n"); 3263 } 3264 /* 3265 * Point to our listen socket until accept 3266 */ 3267 newtoep->tp_tp = tp; 3268 newtoep->tp_flags = TP_SYN_RCVD; 3269 newtoep->tp_tid = tid; 3270 newtoep->tp_toedev = tdev; 3271 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3272 3273 cxgb_insert_tid(cdev, d->client, newtoep, tid); 3274 SOCK_LOCK(so); 3275 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); 3276 SOCK_UNLOCK(so); 3277 3278 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so->so_options & SO_NO_DDP) && 3279 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 3280 3281 if (newtoep->tp_ulp_mode) { 3282 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3283 3284 if (ddp_mbuf == NULL) 3285 newtoep->tp_ulp_mode = 0; 3286 } 3287 3288 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d", 3289 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode); 3290 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); 3291 /* 3292 * XXX workaround for lack of syncache drop 3293 */ 3294 toepcb_hold(newtoep); 3295 syncache_add_accept_req(req, so, newtoep); 3296 3297 rpl = cplhdr(reply_mbuf); 3298 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl); 3299 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3300 rpl->wr.wr_lo = 0; 3301 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3302 rpl->opt2 = htonl(calc_opt2(so, tdev)); 3303 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3304 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten 3305 3306 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | 3307 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); 3308 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) | 3309 CPL_PASS_OPEN_ACCEPT); 3310 3311 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status); 3312 3313 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep)); 3314 3315 l2t_send(cdev, reply_mbuf, e); 3316 m_free(m); 3317 if (newtoep->tp_ulp_mode) { 3318 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS, 3319 V_TF_DDP_OFF(1) | 3320 TP_DDP_TIMER_WORKAROUND_MASK, 3321 V_TF_DDP_OFF(1) | 3322 TP_DDP_TIMER_WORKAROUND_VAL, 1); 3323 } else 3324 printf("not offloading\n"); 3325 3326 3327 3328 return; 3329reject: 3330 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3331 mk_pass_accept_rpl(reply_mbuf, m); 3332 else 3333 mk_tid_release(reply_mbuf, newtoep, tid); 3334 cxgb_ofld_send(cdev, reply_mbuf); 3335 m_free(m); 3336out: 3337#if 0 3338 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3339#else 3340 return; 3341#endif 3342} 3343 3344/* 3345 * Handle a CPL_PASS_ACCEPT_REQ message. 3346 */ 3347static int 3348do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3349{ 3350 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx; 3351 struct socket *lso = listen_ctx->lso; 3352 struct tom_data *d = listen_ctx->tom_data; 3353 3354#if VALIDATE_TID 3355 struct cpl_pass_accept_req *req = cplhdr(m); 3356 unsigned int tid = GET_TID(req); 3357 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps; 3358 3359 if (unlikely(!lsk)) { 3360 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n", 3361 cdev->name, 3362 (unsigned long)((union listen_entry *)ctx - 3363 t->stid_tab)); 3364 return CPL_RET_BUF_DONE; 3365 } 3366 if (unlikely(tid >= t->ntids)) { 3367 printk(KERN_ERR "%s: passive open TID %u too large\n", 3368 cdev->name, tid); 3369 return CPL_RET_BUF_DONE; 3370 } 3371 /* 3372 * For T3A the current user of the TID may have closed but its last 3373 * message(s) may have been backlogged so the TID appears to be still 3374 * in use. Just take the TID away, the connection can close at its 3375 * own leisure. For T3B this situation is a bug. 3376 */ 3377 if (!valid_new_tid(t, tid) && 3378 cdev->type != T3A) { 3379 printk(KERN_ERR "%s: passive open uses existing TID %u\n", 3380 cdev->name, tid); 3381 return CPL_RET_BUF_DONE; 3382 } 3383#endif 3384 3385 process_pass_accept_req(lso, m, &d->tdev, listen_ctx); 3386 return (0); 3387} 3388 3389/* 3390 * Called when a connection is established to translate the TCP options 3391 * reported by HW to FreeBSD's native format. 3392 */ 3393static void 3394assign_rxopt(struct socket *so, unsigned int opt) 3395{ 3396 const struct t3c_data *td = T3C_DATA(T3C_DEV(so)); 3397 struct tcpcb *tp = sototcpcb(so); 3398 struct toepcb *toep = tp->t_toe; 3399 3400 inp_wlock_assert(tp->t_inpcb); 3401 3402 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3403 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0; 3404 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0; 3405 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0; 3406 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 3407 (TF_RCVD_SCALE|TF_REQ_SCALE)) 3408 tp->rcv_scale = tp->request_r_scale; 3409} 3410 3411/* 3412 * Completes some final bits of initialization for just established connections 3413 * and changes their state to TCP_ESTABLISHED. 3414 * 3415 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1. 3416 */ 3417static void 3418make_established(struct socket *so, u32 snd_isn, unsigned int opt) 3419{ 3420 struct tcpcb *tp = sototcpcb(so); 3421 struct toepcb *toep = tp->t_toe; 3422 3423 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn; 3424 assign_rxopt(so, opt); 3425 so->so_proto->pr_ctloutput = t3_ctloutput; 3426 3427#if 0 3428 inet_sk(sk)->id = tp->write_seq ^ jiffies; 3429#endif 3430 /* 3431 * XXX not clear what rcv_wup maps to 3432 */ 3433 /* 3434 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't 3435 * pass through opt0. 3436 */ 3437 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10)) 3438 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10); 3439 3440 dump_toepcb(toep); 3441 3442#ifdef notyet 3443/* 3444 * no clean interface for marking ARP up to date 3445 */ 3446 dst_confirm(sk->sk_dst_cache); 3447#endif 3448 tp->t_starttime = ticks; 3449 tp->t_state = TCPS_ESTABLISHED; 3450 soisconnected(so); 3451} 3452 3453static int 3454syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep) 3455{ 3456 3457 struct in_conninfo inc; 3458 struct tcpopt to; 3459 struct tcphdr th; 3460 int mss, wsf, sack, ts; 3461 struct mbuf *m = NULL; 3462 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev); 3463 unsigned int opt; 3464 3465#ifdef MAC 3466#error "no MAC support" 3467#endif 3468 3469 opt = ntohs(req->tcp_opt); 3470 3471 bzero(&to, sizeof(struct tcpopt)); 3472 3473 /* 3474 * Fill out information for entering us into the syncache 3475 */ 3476 inc.inc_fport = th.th_sport = req->peer_port; 3477 inc.inc_lport = th.th_dport = req->local_port; 3478 th.th_seq = req->rcv_isn; 3479 th.th_flags = TH_ACK; 3480 3481 inc.inc_isipv6 = 0; 3482 inc.inc_len = 0; 3483 inc.inc_faddr.s_addr = req->peer_ip; 3484 inc.inc_laddr.s_addr = req->local_ip; 3485 3486 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3487 wsf = G_TCPOPT_WSCALE_OK(opt); 3488 ts = G_TCPOPT_TSTAMP(opt); 3489 sack = G_TCPOPT_SACK(opt); 3490 3491 to.to_mss = mss; 3492 to.to_wscale = G_TCPOPT_SND_WSCALE(opt); 3493 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3494 3495 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n", 3496 ntohl(req->local_ip), ntohs(req->local_port), 3497 ntohl(req->peer_ip), ntohs(req->peer_port), 3498 mss, wsf, ts, sack); 3499 return syncache_expand(&inc, &to, &th, so, m); 3500} 3501 3502 3503/* 3504 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work 3505 * if we are in TCP_SYN_RECV due to crossed SYNs 3506 */ 3507static int 3508do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3509{ 3510 struct cpl_pass_establish *req = cplhdr(m); 3511 struct toepcb *toep = (struct toepcb *)ctx; 3512 struct tcpcb *tp; 3513 struct socket *so, *lso; 3514 struct t3c_data *td = T3C_DATA(cdev); 3515 // Complete socket initialization now that we have the SND_ISN 3516 3517 struct toedev *tdev; 3518 3519 so = lso = toeptoso(toep); 3520 tdev = toep->tp_toedev; 3521 3522 SOCK_LOCK(so); 3523 LIST_REMOVE(toep, synq_entry); 3524 SOCK_UNLOCK(so); 3525 3526 INP_INFO_WLOCK(&tcbinfo); 3527 if (!syncache_expand_establish_req(req, &so, toep)) { 3528 /* 3529 * No entry 3530 */ 3531 CXGB_UNIMPLEMENTED(); 3532 } 3533 if (so == NULL) { 3534 /* 3535 * Couldn't create the socket 3536 */ 3537 CXGB_UNIMPLEMENTED(); 3538 } 3539 3540 /* 3541 * XXX workaround for lack of syncache drop 3542 */ 3543 toepcb_release(toep); 3544 3545 tp = sototcpcb(so); 3546 inp_wlock(tp->t_inpcb); 3547 3548 so->so_snd.sb_flags |= SB_NOCOALESCE; 3549 so->so_rcv.sb_flags |= SB_NOCOALESCE; 3550 3551 toep->tp_tp = tp; 3552 toep->tp_flags = 0; 3553 tp->t_toe = toep; 3554 reset_wr_list(toep); 3555 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3556 tp->rcv_nxt = toep->tp_copied_seq; 3557 install_offload_ops(so); 3558 3559 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs); 3560 toep->tp_wr_unacked = 0; 3561 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3562 toep->tp_qset_idx = 0; 3563 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu); 3564 3565 /* 3566 * XXX Cancel any keep alive timer 3567 */ 3568 3569 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3570 INP_INFO_WUNLOCK(&tcbinfo); 3571 inp_wunlock(tp->t_inpcb); 3572 3573 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid); 3574 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3575#ifdef notyet 3576 /* 3577 * XXX not sure how these checks map to us 3578 */ 3579 if (unlikely(sk->sk_socket)) { // simultaneous opens only 3580 sk->sk_state_change(sk); 3581 sk_wake_async(so, 0, POLL_OUT); 3582 } 3583 /* 3584 * The state for the new connection is now up to date. 3585 * Next check if we should add the connection to the parent's 3586 * accept queue. When the parent closes it resets connections 3587 * on its SYN queue, so check if we are being reset. If so we 3588 * don't need to do anything more, the coming ABORT_RPL will 3589 * destroy this socket. Otherwise move the connection to the 3590 * accept queue. 3591 * 3592 * Note that we reset the synq before closing the server so if 3593 * we are not being reset the stid is still open. 3594 */ 3595 if (unlikely(!tp->forward_skb_hint)) { // removed from synq 3596 __kfree_skb(skb); 3597 goto unlock; 3598 } 3599#endif 3600 m_free(m); 3601 3602 return (0); 3603} 3604 3605/* 3606 * Fill in the right TID for CPL messages waiting in the out-of-order queue 3607 * and send them to the TOE. 3608 */ 3609static void 3610fixup_and_send_ofo(struct socket *so) 3611{ 3612 struct mbuf *m; 3613 struct toedev *tdev = TOE_DEV(so); 3614 struct tcpcb *tp = sototcpcb(so); 3615 struct toepcb *toep = tp->t_toe; 3616 unsigned int tid = toep->tp_tid; 3617 3618 printf("fixup_and_send_ofo\n"); 3619 3620 inp_wlock_assert(tp->t_inpcb); 3621 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { 3622 /* 3623 * A variety of messages can be waiting but the fields we'll 3624 * be touching are common to all so any message type will do. 3625 */ 3626 struct cpl_close_con_req *p = cplhdr(m); 3627 3628 p->wr.wr_lo = htonl(V_WR_TID(tid)); 3629 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid)); 3630 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3631 } 3632} 3633 3634/* 3635 * Updates socket state from an active establish CPL message. Runs with the 3636 * socket lock held. 3637 */ 3638static void 3639socket_act_establish(struct socket *so, struct mbuf *m) 3640{ 3641 struct cpl_act_establish *req = cplhdr(m); 3642 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ 3643 struct tcpcb *tp = sototcpcb(so); 3644 struct toepcb *toep = tp->t_toe; 3645 3646 if (__predict_false(tp->t_state != TCPS_SYN_SENT)) 3647 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n", 3648 toep->tp_tid, tp->t_state); 3649 3650 tp->ts_recent_age = ticks; 3651 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn; 3652 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs; 3653 3654 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3655 3656 /* 3657 * Now that we finally have a TID send any CPL messages that we had to 3658 * defer for lack of a TID. 3659 */ 3660 if (mbufq_len(&toep->out_of_order_queue)) 3661 fixup_and_send_ofo(so); 3662 3663 if (__predict_false(so->so_state & SS_NOFDREF)) { 3664 /* 3665 * XXX does this even make sense? 3666 */ 3667 sorwakeup(so); 3668 } 3669 m_free(m); 3670#ifdef notyet 3671/* 3672 * XXX assume no write requests permitted while socket connection is 3673 * incomplete 3674 */ 3675 /* 3676 * Currently the send queue must be empty at this point because the 3677 * socket layer does not send anything before a connection is 3678 * established. To be future proof though we handle the possibility 3679 * that there are pending buffers to send (either TX_DATA or 3680 * CLOSE_CON_REQ). First we need to adjust the sequence number of the 3681 * buffers according to the just learned write_seq, and then we send 3682 * them on their way. 3683 */ 3684 fixup_pending_writeq_buffers(sk); 3685 if (t3_push_frames(so, 1)) 3686 sk->sk_write_space(sk); 3687#endif 3688 3689 toep->tp_state = tp->t_state; 3690 tcpstat.tcps_connects++; 3691 3692} 3693 3694/* 3695 * Process a CPL_ACT_ESTABLISH message. 3696 */ 3697static int 3698do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3699{ 3700 struct cpl_act_establish *req = cplhdr(m); 3701 unsigned int tid = GET_TID(req); 3702 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); 3703 struct toepcb *toep = (struct toepcb *)ctx; 3704 struct tcpcb *tp = toep->tp_tp; 3705 struct socket *so; 3706 struct toedev *tdev; 3707 struct tom_data *d; 3708 3709 if (tp == NULL) { 3710 free_atid(cdev, atid); 3711 return (0); 3712 } 3713 3714 so = toeptoso(toep); 3715 tdev = TOE_DEV(so); /* blow up here if link was down */ 3716 d = TOM_DATA(tdev); 3717 3718 inp_wlock(tp->t_inpcb); 3719 3720 /* 3721 * It's OK if the TID is currently in use, the owning socket may have 3722 * backlogged its last CPL message(s). Just take it away. 3723 */ 3724 toep->tp_tid = tid; 3725 toep->tp_tp = tp; 3726 so_insert_tid(d, so, tid); 3727 free_atid(cdev, atid); 3728 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3729 3730 socket_act_establish(so, m); 3731 inp_wunlock(tp->t_inpcb); 3732 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid); 3733 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3734 3735 return (0); 3736} 3737 3738/* 3739 * Process an acknowledgment of WR completion. Advance snd_una and send the 3740 * next batch of work requests from the write queue. 3741 */ 3742static void 3743wr_ack(struct toepcb *toep, struct mbuf *m) 3744{ 3745 struct tcpcb *tp = toep->tp_tp; 3746 struct cpl_wr_ack *hdr = cplhdr(m); 3747 struct socket *so = toeptoso(toep); 3748 unsigned int credits = ntohs(hdr->credits); 3749 u32 snd_una = ntohl(hdr->snd_una); 3750 int bytes = 0; 3751 3752 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits); 3753 3754 inp_wlock(tp->t_inpcb); 3755 3756 toep->tp_wr_avail += credits; 3757 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) 3758 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; 3759 3760 while (credits) { 3761 struct mbuf *p = peek_wr(toep); 3762 3763 if (__predict_false(!p)) { 3764 log(LOG_ERR, "%u WR_ACK credits for TID %u with " 3765 "nothing pending, state %u wr_avail=%u\n", 3766 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail); 3767 break; 3768 } 3769 CTR2(KTR_TOM, 3770 "wr_ack: p->credits=%d p->bytes=%d", p->m_pkthdr.csum_data, p->m_pkthdr.len); 3771 3772 KASSERT(p->m_pkthdr.csum_data != 0, ("empty request still on list")); 3773 if (__predict_false(credits < p->m_pkthdr.csum_data)) { 3774 3775#if DEBUG_WR > 1 3776 struct tx_data_wr *w = cplhdr(p); 3777 log(LOG_ERR, 3778 "TID %u got %u WR credits, need %u, len %u, " 3779 "main body %u, frags %u, seq # %u, ACK una %u," 3780 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n", 3781 toep->tp_tid, credits, p->csum, p->len, 3782 p->len - p->data_len, skb_shinfo(p)->nr_frags, 3783 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt), 3784 toep->tp_wr_avail, count_pending_wrs(tp) - credits); 3785#endif 3786 p->m_pkthdr.csum_data -= credits; 3787 break; 3788 } else { 3789 dequeue_wr(toep); 3790 credits -= p->m_pkthdr.csum_data; 3791 bytes += p->m_pkthdr.len; 3792 CTR3(KTR_TOM, 3793 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d", 3794 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data); 3795 3796 m_free(p); 3797 } 3798 } 3799 3800#if DEBUG_WR 3801 check_wr_invariants(tp); 3802#endif 3803 3804 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 3805#if VALIDATE_SEQ 3806 struct tom_data *d = TOM_DATA(TOE_DEV(so)); 3807 3808 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK " 3809 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una, 3810 toep->tp_tid, tp->snd_una); 3811#endif 3812 goto out_free; 3813 } 3814 3815 if (tp->snd_una != snd_una) { 3816 tp->snd_una = snd_una; 3817 tp->ts_recent_age = ticks; 3818#ifdef notyet 3819 /* 3820 * Keep ARP entry "minty fresh" 3821 */ 3822 dst_confirm(sk->sk_dst_cache); 3823#endif 3824 if (tp->snd_una == tp->snd_nxt) 3825 toep->tp_flags &= ~TP_TX_WAIT_IDLE; 3826 } 3827 if (bytes) { 3828 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes); 3829 SOCKBUF_LOCK(&so->so_snd); 3830 sbdrop_locked(&so->so_snd, bytes); 3831 sowwakeup_locked(so); 3832 } 3833 3834 if (so->so_snd.sb_sndptroff < so->so_snd.sb_cc) 3835 t3_push_frames(so, 0); 3836 3837out_free: 3838 inp_wunlock(tp->t_inpcb); 3839 m_free(m); 3840} 3841 3842/* 3843 * Handler for TX_DATA_ACK CPL messages. 3844 */ 3845static int 3846do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx) 3847{ 3848 struct toepcb *toep = (struct toepcb *)ctx; 3849 3850 VALIDATE_SOCK(so); 3851 3852 wr_ack(toep, m); 3853 return 0; 3854} 3855 3856/* 3857 * Handler for TRACE_PKT CPL messages. Just sink these packets. 3858 */ 3859static int 3860do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx) 3861{ 3862 m_freem(m); 3863 return 0; 3864} 3865 3866/* 3867 * Reset a connection that is on a listener's SYN queue or accept queue, 3868 * i.e., one that has not had a struct socket associated with it. 3869 * Must be called from process context. 3870 * 3871 * Modeled after code in inet_csk_listen_stop(). 3872 */ 3873static void 3874t3_reset_listen_child(struct socket *child) 3875{ 3876 struct tcpcb *tp = sototcpcb(child); 3877 3878 t3_send_reset(tp->t_toe); 3879} 3880 3881/* 3882 * Disconnect offloaded established but not yet accepted connections sitting 3883 * on a server's accept_queue. We just send an ABORT_REQ at this point and 3884 * finish off the disconnect later as we may need to wait for the ABORT_RPL. 3885 */ 3886void 3887t3_disconnect_acceptq(struct socket *listen_so) 3888{ 3889 struct socket *so; 3890 struct tcpcb *tp; 3891 3892 TAILQ_FOREACH(so, &listen_so->so_comp, so_list) { 3893 tp = sototcpcb(so); 3894 3895 if (tp->t_flags & TF_TOE) { 3896 inp_wlock(tp->t_inpcb); 3897 t3_reset_listen_child(so); 3898 inp_wunlock(tp->t_inpcb); 3899 } 3900 } 3901} 3902 3903/* 3904 * Reset offloaded connections sitting on a server's syn queue. As above 3905 * we send ABORT_REQ and finish off when we get ABORT_RPL. 3906 */ 3907 3908void 3909t3_reset_synq(struct listen_ctx *lctx) 3910{ 3911 struct toepcb *toep; 3912 3913 SOCK_LOCK(lctx->lso); 3914 while (!LIST_EMPTY(&lctx->synq_head)) { 3915 toep = LIST_FIRST(&lctx->synq_head); 3916 LIST_REMOVE(toep, synq_entry); 3917 toep->tp_tp = NULL; 3918 t3_send_reset(toep); 3919 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid); 3920 toepcb_release(toep); 3921 } 3922 SOCK_UNLOCK(lctx->lso); 3923} 3924 3925 3926int 3927t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl, 3928 unsigned int nppods, unsigned int tag, unsigned int maxoff, 3929 unsigned int pg_off, unsigned int color) 3930{ 3931 unsigned int i, j, pidx; 3932 struct pagepod *p; 3933 struct mbuf *m; 3934 struct ulp_mem_io *req; 3935 struct tcpcb *tp = sototcpcb(so); 3936 struct toepcb *toep = tp->t_toe; 3937 unsigned int tid = toep->tp_tid; 3938 const struct tom_data *td = TOM_DATA(TOE_DEV(so)); 3939 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; 3940 3941 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)", 3942 gl, nppods, tag, maxoff, pg_off, color); 3943 3944 for (i = 0; i < nppods; ++i) { 3945 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE); 3946 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 3947 req = mtod(m, struct ulp_mem_io *); 3948 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE; 3949 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 3950 req->wr.wr_lo = 0; 3951 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) | 3952 V_ULPTX_CMD(ULP_MEM_WRITE)); 3953 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) | 3954 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1)); 3955 3956 p = (struct pagepod *)(req + 1); 3957 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) { 3958 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid)); 3959 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) | 3960 V_PPOD_COLOR(color)); 3961 p->pp_max_offset = htonl(maxoff); 3962 p->pp_page_offset = htonl(pg_off); 3963 p->pp_rsvd = 0; 3964 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx) 3965 p->pp_addr[j] = pidx < gl->dgl_nelem ? 3966 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0; 3967 } else 3968 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */ 3969 send_or_defer(toep, m, 0); 3970 ppod_addr += PPOD_SIZE; 3971 } 3972 return (0); 3973} 3974 3975/* 3976 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command. 3977 */ 3978static inline void 3979mk_cpl_barrier_ulp(struct cpl_barrier *b) 3980{ 3981 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b; 3982 3983 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 3984 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8)); 3985 b->opcode = CPL_BARRIER; 3986} 3987 3988/* 3989 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command. 3990 */ 3991static inline void 3992mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno) 3993{ 3994 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 3995 3996 txpkt = (struct ulp_txpkt *)req; 3997 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 3998 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 3999 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid)); 4000 req->cpuno = htons(cpuno); 4001} 4002 4003/* 4004 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command. 4005 */ 4006static inline void 4007mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, 4008 unsigned int word, uint64_t mask, uint64_t val) 4009{ 4010 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4011 4012 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 4013 tid, word, mask, val); 4014 4015 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4016 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4017 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); 4018 req->reply = V_NO_REPLY(1); 4019 req->cpu_idx = 0; 4020 req->word = htons(word); 4021 req->mask = htobe64(mask); 4022 req->val = htobe64(val); 4023} 4024 4025/* 4026 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command. 4027 */ 4028static void 4029mk_rx_data_ack_ulp(struct socket *so,struct cpl_rx_data_ack *ack, 4030 unsigned int tid, unsigned int credits) 4031{ 4032 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack; 4033 4034 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4035 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8)); 4036 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); 4037 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 4038 V_RX_DACK_MODE(TOM_TUNABLE(TOE_DEV(so), delack)) | 4039 V_RX_CREDITS(credits)); 4040} 4041 4042void 4043t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx) 4044{ 4045 unsigned int wrlen; 4046 struct mbuf *m; 4047 struct work_request_hdr *wr; 4048 struct cpl_barrier *lock; 4049 struct cpl_set_tcb_field *req; 4050 struct cpl_get_tcb *getreq; 4051 struct ddp_state *p = &toep->tp_ddp_state; 4052 4053 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4054 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) + 4055 sizeof(*getreq); 4056 m = m_gethdr_nofail(wrlen); 4057 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4058 wr = mtod(m, struct work_request_hdr *); 4059 bzero(wr, wrlen); 4060 4061 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4062 m->m_pkthdr.len = m->m_len = wrlen; 4063 4064 lock = (struct cpl_barrier *)(wr + 1); 4065 mk_cpl_barrier_ulp(lock); 4066 4067 req = (struct cpl_set_tcb_field *)(lock + 1); 4068 4069 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx); 4070 4071 /* Hmmm, not sure if this actually a good thing: reactivating 4072 * the other buffer might be an issue if it has been completed 4073 * already. However, that is unlikely, since the fact that the UBUF 4074 * is not completed indicates that there is no oustanding data. 4075 */ 4076 if (bufidx == 0) 4077 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4078 V_TF_DDP_ACTIVE_BUF(1) | 4079 V_TF_DDP_BUF0_VALID(1), 4080 V_TF_DDP_ACTIVE_BUF(1)); 4081 else 4082 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4083 V_TF_DDP_ACTIVE_BUF(1) | 4084 V_TF_DDP_BUF1_VALID(1), 0); 4085 4086 getreq = (struct cpl_get_tcb *)(req + 1); 4087 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4088 4089 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1)); 4090 4091 /* Keep track of the number of oustanding CPL_GET_TCB requests 4092 */ 4093 p->get_tcb_count++; 4094 4095#ifdef T3_TRACE 4096 T3_TRACE1(TIDTB(so), 4097 "t3_cancel_ddpbuf: bufidx %u", bufidx); 4098#endif 4099 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4100} 4101 4102/** 4103 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one 4104 * @sk: the socket associated with the buffers 4105 * @bufidx: index of HW DDP buffer (0 or 1) 4106 * @tag0: new tag for HW buffer 0 4107 * @tag1: new tag for HW buffer 1 4108 * @len: new length for HW buf @bufidx 4109 * 4110 * Sends a compound WR to overlay a new DDP buffer on top of an existing 4111 * buffer by changing the buffer tag and length and setting the valid and 4112 * active flag accordingly. The caller must ensure the new buffer is at 4113 * least as big as the existing one. Since we typically reprogram both HW 4114 * buffers this function sets both tags for convenience. Read the TCB to 4115 * determine how made data was written into the buffer before the overlay 4116 * took place. 4117 */ 4118void 4119t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0, 4120 unsigned int tag1, unsigned int len) 4121{ 4122 unsigned int wrlen; 4123 struct mbuf *m; 4124 struct work_request_hdr *wr; 4125 struct cpl_get_tcb *getreq; 4126 struct cpl_set_tcb_field *req; 4127 struct ddp_state *p = &toep->tp_ddp_state; 4128 4129 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)", 4130 bufidx, tag0, tag1, len); 4131 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4132 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq); 4133 m = m_gethdr_nofail(wrlen); 4134 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4135 wr = mtod(m, struct work_request_hdr *); 4136 m->m_pkthdr.len = m->m_len = wrlen; 4137 bzero(wr, wrlen); 4138 4139 4140 /* Set the ATOMIC flag to make sure that TP processes the following 4141 * CPLs in an atomic manner and no wire segments can be interleaved. 4142 */ 4143 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); 4144 req = (struct cpl_set_tcb_field *)(wr + 1); 4145 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG, 4146 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) | 4147 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32, 4148 V_TCB_RX_DDP_BUF0_TAG(tag0) | 4149 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32); 4150 req++; 4151 if (bufidx == 0) { 4152 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN, 4153 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4154 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 4155 req++; 4156 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4157 V_TF_DDP_PUSH_DISABLE_0(1) | 4158 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4159 V_TF_DDP_PUSH_DISABLE_0(0) | 4160 V_TF_DDP_BUF0_VALID(1)); 4161 } else { 4162 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN, 4163 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN), 4164 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len)); 4165 req++; 4166 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4167 V_TF_DDP_PUSH_DISABLE_1(1) | 4168 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4169 V_TF_DDP_PUSH_DISABLE_1(0) | 4170 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1)); 4171 } 4172 4173 getreq = (struct cpl_get_tcb *)(req + 1); 4174 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4175 4176 /* Keep track of the number of oustanding CPL_GET_TCB requests 4177 */ 4178 p->get_tcb_count++; 4179 4180#ifdef T3_TRACE 4181 T3_TRACE4(TIDTB(sk), 4182 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u " 4183 "len %d", 4184 bufidx, tag0, tag1, len); 4185#endif 4186 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4187} 4188 4189/* 4190 * Sends a compound WR containing all the CPL messages needed to program the 4191 * two HW DDP buffers, namely optionally setting up the length and offset of 4192 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK. 4193 */ 4194void 4195t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0, 4196 unsigned int len1, unsigned int offset1, 4197 uint64_t ddp_flags, uint64_t flag_mask, int modulate) 4198{ 4199 unsigned int wrlen; 4200 struct mbuf *m; 4201 struct work_request_hdr *wr; 4202 struct cpl_set_tcb_field *req; 4203 4204 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ", 4205 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff); 4206 4207 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4208 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + 4209 (len1 ? sizeof(*req) : 0) + 4210 (modulate ? sizeof(struct cpl_rx_data_ack) : 0); 4211 m = m_gethdr_nofail(wrlen); 4212 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4213 wr = mtod(m, struct work_request_hdr *); 4214 bzero(wr, wrlen); 4215 4216 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4217 m->m_pkthdr.len = m->m_len = wrlen; 4218 4219 req = (struct cpl_set_tcb_field *)(wr + 1); 4220 if (len0) { /* program buffer 0 offset and length */ 4221 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET, 4222 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 4223 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4224 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) | 4225 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0)); 4226 req++; 4227 } 4228 if (len1) { /* program buffer 1 offset and length */ 4229 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET, 4230 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 4231 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32, 4232 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) | 4233 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32); 4234 req++; 4235 } 4236 4237 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask, 4238 ddp_flags); 4239 4240 if (modulate) { 4241 mk_rx_data_ack_ulp(toeptoso(toep), 4242 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid, 4243 toep->tp_copied_seq - toep->tp_rcv_wup); 4244 toep->tp_rcv_wup = toep->tp_copied_seq; 4245 } 4246 4247#ifdef T3_TRACE 4248 T3_TRACE5(TIDTB(sk), 4249 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x " 4250 "modulate %d", 4251 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff, 4252 modulate); 4253#endif 4254 4255 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4256} 4257 4258void 4259t3_init_wr_tab(unsigned int wr_len) 4260{ 4261 int i; 4262 4263 if (mbuf_wrs[1]) /* already initialized */ 4264 return; 4265 4266 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) { 4267 int sgl_len = (3 * i) / 2 + (i & 1); 4268 4269 sgl_len += 3; 4270 mbuf_wrs[i] = sgl_len <= wr_len ? 4271 1 : 1 + (sgl_len - 2) / (wr_len - 1); 4272 } 4273 4274 wrlen = wr_len * 8; 4275} 4276 4277int 4278t3_init_cpl_io(void) 4279{ 4280#ifdef notyet 4281 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL); 4282 if (!tcphdr_skb) { 4283 log(LOG_ERR, 4284 "Chelsio TCP offload: can't allocate sk_buff\n"); 4285 return -1; 4286 } 4287 skb_put(tcphdr_skb, sizeof(struct tcphdr)); 4288 tcphdr_skb->h.raw = tcphdr_skb->data; 4289 memset(tcphdr_skb->data, 0, tcphdr_skb->len); 4290#endif 4291 4292 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); 4293 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); 4294 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack); 4295 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data); 4296 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 4297 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 4298 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish); 4299 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 4300 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 4301 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 4302 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp); 4303 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); 4304 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify); 4305 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt); 4306 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); 4307 return (0); 4308} 4309 4310