cxgb_cpl_io.c revision 177340
1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c 177340 2008-03-18 03:55:12Z kmacy $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/fcntl.h> 36#include <sys/kernel.h> 37#include <sys/limits.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/mbuf.h> 41#include <sys/mutex.h> 42#include <sys/socket.h> 43#include <sys/sysctl.h> 44#include <sys/syslog.h> 45#include <sys/socketvar.h> 46#include <sys/protosw.h> 47#include <sys/priv.h> 48 49#include <net/if.h> 50#include <net/route.h> 51 52#include <netinet/in.h> 53#include <netinet/in_pcb.h> 54#include <netinet/in_systm.h> 55#include <netinet/in_var.h> 56 57 58#include <dev/cxgb/cxgb_osdep.h> 59#include <dev/cxgb/sys/mbufq.h> 60 61#include <netinet/ip.h> 62#include <netinet/tcp_var.h> 63#include <netinet/tcp_fsm.h> 64#include <netinet/tcp_offload.h> 65#include <netinet/tcp_seq.h> 66#include <netinet/tcp_syncache.h> 67#include <netinet/tcp_timer.h> 68#include <net/route.h> 69 70#include <dev/cxgb/t3cdev.h> 71#include <dev/cxgb/common/cxgb_firmware_exports.h> 72#include <dev/cxgb/common/cxgb_t3_cpl.h> 73#include <dev/cxgb/common/cxgb_tcb.h> 74#include <dev/cxgb/common/cxgb_ctl_defs.h> 75#include <dev/cxgb/cxgb_l2t.h> 76#include <dev/cxgb/cxgb_offload.h> 77#include <vm/vm.h> 78#include <vm/pmap.h> 79#include <machine/bus.h> 80#include <dev/cxgb/sys/mvec.h> 81#include <dev/cxgb/ulp/toecore/cxgb_toedev.h> 82#include <dev/cxgb/ulp/tom/cxgb_defs.h> 83#include <dev/cxgb/ulp/tom/cxgb_tom.h> 84#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h> 85#include <dev/cxgb/ulp/tom/cxgb_toepcb.h> 86#include <dev/cxgb/ulp/tom/cxgb_tcp.h> 87 88/* 89 * For ULP connections HW may add headers, e.g., for digests, that aren't part 90 * of the messages sent by the host but that are part of the TCP payload and 91 * therefore consume TCP sequence space. Tx connection parameters that 92 * operate in TCP sequence space are affected by the HW additions and need to 93 * compensate for them to accurately track TCP sequence numbers. This array 94 * contains the compensating extra lengths for ULP packets. It is indexed by 95 * a packet's ULP submode. 96 */ 97const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8}; 98 99#ifdef notyet 100/* 101 * This sk_buff holds a fake header-only TCP segment that we use whenever we 102 * need to exploit SW TCP functionality that expects TCP headers, such as 103 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple 104 * CPUs without locking. 105 */ 106static struct mbuf *tcphdr_mbuf __read_mostly; 107#endif 108 109/* 110 * Size of WRs in bytes. Note that we assume all devices we are handling have 111 * the same WR size. 112 */ 113static unsigned int wrlen __read_mostly; 114 115/* 116 * The number of WRs needed for an skb depends on the number of page fragments 117 * in the skb and whether it has any payload in its main body. This maps the 118 * length of the gather list represented by an skb into the # of necessary WRs. 119 */ 120static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly; 121 122/* 123 * Max receive window supported by HW in bytes. Only a small part of it can 124 * be set through option0, the rest needs to be set through RX_DATA_ACK. 125 */ 126#define MAX_RCV_WND ((1U << 27) - 1) 127 128/* 129 * Min receive window. We want it to be large enough to accommodate receive 130 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. 131 */ 132#define MIN_RCV_WND (24 * 1024U) 133#define SO_TOS(so) ((sotoinpcb(so)->inp_ip_tos >> 2) & M_TOS) 134 135#define VALIDATE_SEQ 0 136#define VALIDATE_SOCK(so) 137#define DEBUG_WR 0 138 139extern int tcp_do_autorcvbuf; 140extern int tcp_do_autosndbuf; 141extern int tcp_autorcvbuf_max; 142extern int tcp_autosndbuf_max; 143 144static void t3_send_reset(struct toepcb *toep); 145static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status); 146static inline void free_atid(struct t3cdev *cdev, unsigned int tid); 147static void handle_syncache_event(int event, void *arg); 148 149static inline void 150SBAPPEND(struct sockbuf *sb, struct mbuf *n) 151{ 152 struct mbuf * m; 153 154 m = sb->sb_mb; 155 while (m) { 156 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 157 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 158 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 159 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 160 m->m_next, m->m_nextpkt, m->m_flags)); 161 m = m->m_next; 162 } 163 m = n; 164 while (m) { 165 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 166 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 167 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 168 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 169 m->m_next, m->m_nextpkt, m->m_flags)); 170 m = m->m_next; 171 } 172 sbappend_locked(sb, n); 173 m = sb->sb_mb; 174 while (m) { 175 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 176 m->m_next, m->m_nextpkt, m->m_flags)); 177 m = m->m_next; 178 } 179} 180 181static inline int 182is_t3a(const struct toedev *dev) 183{ 184 return (dev->tod_ttid == TOE_ID_CHELSIO_T3); 185} 186 187static void 188dump_toepcb(struct toepcb *toep) 189{ 190 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n", 191 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode, 192 toep->tp_mtu_idx, toep->tp_tid); 193 194 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n", 195 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, 196 toep->tp_mss_clamp, toep->tp_flags); 197} 198 199#ifndef RTALLOC2_DEFINED 200static struct rtentry * 201rtalloc2(struct sockaddr *dst, int report, u_long ignflags) 202{ 203 struct rtentry *rt = NULL; 204 205 if ((rt = rtalloc1(dst, report, ignflags)) != NULL) 206 RT_UNLOCK(rt); 207 208 return (rt); 209} 210#endif 211/* 212 * Determine whether to send a CPL message now or defer it. A message is 213 * deferred if the connection is in SYN_SENT since we don't know the TID yet. 214 * For connections in other states the message is sent immediately. 215 * If through_l2t is set the message is subject to ARP processing, otherwise 216 * it is sent directly. 217 */ 218static inline void 219send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t) 220{ 221 struct tcpcb *tp = toep->tp_tp; 222 223 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) { 224 INP_LOCK(tp->t_inpcb); 225 mbufq_tail(&toep->out_of_order_queue, m); // defer 226 INP_UNLOCK(tp->t_inpcb); 227 } else if (through_l2t) 228 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T 229 else 230 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly 231} 232 233static inline unsigned int 234mkprio(unsigned int cntrl, const struct toepcb *toep) 235{ 236 return (cntrl); 237} 238 239/* 240 * Populate a TID_RELEASE WR. The skb must be already propely sized. 241 */ 242static inline void 243mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid) 244{ 245 struct cpl_tid_release *req; 246 247 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep)); 248 m->m_pkthdr.len = m->m_len = sizeof(*req); 249 req = mtod(m, struct cpl_tid_release *); 250 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 251 req->wr.wr_lo = 0; 252 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); 253} 254 255static inline void 256make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) 257{ 258 struct tcpcb *tp = sototcpcb(so); 259 struct toepcb *toep = tp->t_toe; 260 struct tx_data_wr *req; 261 262 INP_LOCK_ASSERT(tp->t_inpcb); 263 264 req = mtod(m, struct tx_data_wr *); 265 m->m_len = sizeof(*req); 266 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 267 req->wr_lo = htonl(V_WR_TID(toep->tp_tid)); 268 /* len includes the length of any HW ULP additions */ 269 req->len = htonl(len); 270 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx)); 271 /* V_TX_ULP_SUBMODE sets both the mode and submode */ 272 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) | 273 V_TX_URG(/* skb_urgent(skb) */ 0 ) | 274 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) && 275 (tail ? 0 : 1)))); 276 req->sndseq = htonl(tp->snd_nxt); 277 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) { 278 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | 279 V_TX_CPU_IDX(toep->tp_qset)); 280 281 /* Sendbuffer is in units of 32KB. 282 */ 283 if (tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) 284 req->param |= htonl(V_TX_SNDBUF(tcp_autosndbuf_max >> 15)); 285 else 286 req->param |= htonl(V_TX_SNDBUF(so->so_snd.sb_hiwat >> 15)); 287 toep->tp_flags |= TP_DATASENT; 288 } 289} 290 291#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */ 292 293int 294t3_push_frames(struct socket *so, int req_completion) 295{ 296 struct tcpcb *tp = sototcpcb(so); 297 struct toepcb *toep = tp->t_toe; 298 299 struct mbuf *tail, *m0, *last; 300 struct t3cdev *cdev; 301 struct tom_data *d; 302 int i, bytes, count, total_bytes; 303 bus_dma_segment_t segs[TX_MAX_SEGS], *segp; 304 305 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) { 306 DPRINTF("tcp state=%d\n", tp->t_state); 307 return (0); 308 } 309 310 if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { 311 DPRINTF("disconnecting\n"); 312 313 return (0); 314 } 315 316 317 INP_LOCK_ASSERT(tp->t_inpcb); 318 SOCKBUF_LOCK(&so->so_snd); 319 d = TOM_DATA(TOE_DEV(so)); 320 cdev = d->cdev; 321 last = tail = so->so_snd.sb_sndptr ? so->so_snd.sb_sndptr : so->so_snd.sb_mb; 322 total_bytes = 0; 323 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n", 324 toep->tp_wr_avail, tail, so->so_snd.sb_cc, toep->tp_m_last); 325 326 if (last && toep->tp_m_last == last && so->so_snd.sb_sndptroff != 0) { 327 KASSERT(tail, ("sbdrop error")); 328 last = tail = tail->m_next; 329 } 330 331 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) { 332 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail); 333 SOCKBUF_UNLOCK(&so->so_snd); 334 return (0); 335 } 336 337 toep->tp_m_last = NULL; 338 while (toep->tp_wr_avail && (tail != NULL)) { 339 count = bytes = 0; 340 segp = segs; 341 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) { 342 SOCKBUF_UNLOCK(&so->so_snd); 343 return (0); 344 } 345 /* 346 * If the data in tail fits as in-line, then 347 * make an immediate data wr. 348 */ 349 if (tail->m_len <= IMM_LEN) { 350 count = 1; 351 bytes = tail->m_len; 352 last = tail; 353 tail = tail->m_next; 354 m_set_sgl(m0, NULL); 355 m_set_sgllen(m0, 0); 356 make_tx_data_wr(so, m0, bytes, tail); 357 m_append(m0, bytes, mtod(last, caddr_t)); 358 KASSERT(!m0->m_next, ("bad append")); 359 } else { 360 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) 361 && (tail != NULL) && (count < TX_MAX_SEGS-1)) { 362 bytes += tail->m_len; 363 last = tail; 364 count++; 365 /* 366 * technically an abuse to be using this for a VA 367 * but less gross than defining my own structure 368 * or calling pmap_kextract from here :-| 369 */ 370 segp->ds_addr = (bus_addr_t)tail->m_data; 371 segp->ds_len = tail->m_len; 372 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n", 373 count, mbuf_wrs[count], tail->m_data, tail->m_len); 374 segp++; 375 tail = tail->m_next; 376 } 377 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n", 378 toep->tp_wr_avail, count, mbuf_wrs[count], tail); 379 380 m_set_sgl(m0, segs); 381 m_set_sgllen(m0, count); 382 make_tx_data_wr(so, m0, bytes, tail); 383 } 384 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep)); 385 386 if (tail) { 387 so->so_snd.sb_sndptr = tail; 388 toep->tp_m_last = NULL; 389 } else 390 toep->tp_m_last = so->so_snd.sb_sndptr = last; 391 392 393 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last); 394 395 so->so_snd.sb_sndptroff += bytes; 396 total_bytes += bytes; 397 toep->tp_write_seq += bytes; 398 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d tail=%p sndptr=%p sndptroff=%d", 399 toep->tp_wr_avail, count, mbuf_wrs[count], tail, so->so_snd.sb_sndptr, so->so_snd.sb_sndptroff); 400 if (tail) 401 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p tailbuf=%p snd_una=0x%08x", 402 total_bytes, toep->tp_m_last, tail->m_data, tp->snd_una); 403 else 404 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p snd_una=0x%08x", 405 total_bytes, toep->tp_m_last, tp->snd_una); 406 407 408 i = 0; 409 while (i < count && m_get_sgllen(m0)) { 410 if ((count - i) >= 3) { 411 CTR6(KTR_TOM, 412 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d pa=0x%zx len=%d", 413 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len, 414 segs[i + 2].ds_addr, segs[i + 2].ds_len); 415 i += 3; 416 } else if ((count - i) == 2) { 417 CTR4(KTR_TOM, 418 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d", 419 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len); 420 i += 2; 421 } else { 422 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d", 423 segs[i].ds_addr, segs[i].ds_len); 424 i++; 425 } 426 427 } 428 429 /* 430 * remember credits used 431 */ 432 m0->m_pkthdr.csum_data = mbuf_wrs[count]; 433 m0->m_pkthdr.len = bytes; 434 toep->tp_wr_avail -= mbuf_wrs[count]; 435 toep->tp_wr_unacked += mbuf_wrs[count]; 436 437 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) || 438 toep->tp_wr_unacked >= toep->tp_wr_max / 2) { 439 struct work_request_hdr *wr = cplhdr(m0); 440 441 wr->wr_hi |= htonl(F_WR_COMPL); 442 toep->tp_wr_unacked = 0; 443 } 444 KASSERT((m0->m_pkthdr.csum_data > 0) && 445 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d", 446 m0->m_pkthdr.csum_data)); 447 m0->m_type = MT_DONTFREE; 448 enqueue_wr(toep, m0); 449 DPRINTF("sending offload tx with %d bytes in %d segments\n", 450 bytes, count); 451 l2t_send(cdev, m0, toep->tp_l2t); 452 } 453 SOCKBUF_UNLOCK(&so->so_snd); 454 return (total_bytes); 455} 456 457/* 458 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail 459 * under any circumstances. We take the easy way out and always queue the 460 * message to the write_queue. We can optimize the case where the queue is 461 * already empty though the optimization is probably not worth it. 462 */ 463static void 464close_conn(struct socket *so) 465{ 466 struct mbuf *m; 467 struct cpl_close_con_req *req; 468 struct tom_data *d; 469 struct inpcb *inp = sotoinpcb(so); 470 struct tcpcb *tp; 471 struct toepcb *toep; 472 unsigned int tid; 473 474 475 INP_LOCK(inp); 476 tp = sototcpcb(so); 477 toep = tp->t_toe; 478 479 if (tp->t_state != TCPS_SYN_SENT) 480 t3_push_frames(so, 1); 481 482 if (toep->tp_flags & TP_FIN_SENT) { 483 INP_UNLOCK(inp); 484 return; 485 } 486 487 tid = toep->tp_tid; 488 489 d = TOM_DATA(toep->tp_toedev); 490 491 m = m_gethdr_nofail(sizeof(*req)); 492 493 toep->tp_flags |= TP_FIN_SENT; 494 req = mtod(m, struct cpl_close_con_req *); 495 496 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); 497 req->wr.wr_lo = htonl(V_WR_TID(tid)); 498 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 499 req->rsvd = htonl(toep->tp_write_seq); 500 INP_UNLOCK(inp); 501 /* 502 * XXX - need to defer shutdown while there is still data in the queue 503 * 504 */ 505 cxgb_ofld_send(d->cdev, m); 506 507} 508 509/* 510 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant 511 * and send it along. 512 */ 513static void 514abort_arp_failure(struct t3cdev *cdev, struct mbuf *m) 515{ 516 struct cpl_abort_req *req = cplhdr(m); 517 518 req->cmd = CPL_ABORT_NO_RST; 519 cxgb_ofld_send(cdev, m); 520} 521 522/* 523 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are 524 * permitted to return without sending the message in case we cannot allocate 525 * an sk_buff. Returns the number of credits sent. 526 */ 527uint32_t 528t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail) 529{ 530 struct mbuf *m; 531 struct cpl_rx_data_ack *req; 532 struct toepcb *toep = tp->t_toe; 533 struct toedev *tdev = toep->tp_toedev; 534 535 m = m_gethdr_nofail(sizeof(*req)); 536 537 DPRINTF("returning %u credits to HW\n", credits); 538 539 req = mtod(m, struct cpl_rx_data_ack *); 540 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 541 req->wr.wr_lo = 0; 542 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 543 req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); 544 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep)); 545 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 546 return (credits); 547} 548 549/* 550 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled. 551 * This is only used in DDP mode, so we take the opportunity to also set the 552 * DACK mode and flush any Rx credits. 553 */ 554void 555t3_send_rx_modulate(struct toepcb *toep) 556{ 557 struct mbuf *m; 558 struct cpl_rx_data_ack *req; 559 560 m = m_gethdr_nofail(sizeof(*req)); 561 562 req = mtod(m, struct cpl_rx_data_ack *); 563 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 564 req->wr.wr_lo = 0; 565 m->m_pkthdr.len = m->m_len = sizeof(*req); 566 567 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 568 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 569 V_RX_DACK_MODE(1) | 570 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup)); 571 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 572 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 573 toep->tp_rcv_wup = toep->tp_copied_seq; 574} 575 576/* 577 * Handle receipt of an urgent pointer. 578 */ 579static void 580handle_urg_ptr(struct socket *so, uint32_t urg_seq) 581{ 582#ifdef URGENT_DATA_SUPPORTED 583 struct tcpcb *tp = sototcpcb(so); 584 585 urg_seq--; /* initially points past the urgent data, per BSD */ 586 587 if (tp->urg_data && !after(urg_seq, tp->urg_seq)) 588 return; /* duplicate pointer */ 589 sk_send_sigurg(sk); 590 if (tp->urg_seq == tp->copied_seq && tp->urg_data && 591 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) { 592 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 593 594 tp->copied_seq++; 595 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len) 596 tom_eat_skb(sk, skb, 0); 597 } 598 tp->urg_data = TCP_URG_NOTYET; 599 tp->urg_seq = urg_seq; 600#endif 601} 602 603/* 604 * Returns true if a socket cannot accept new Rx data. 605 */ 606static inline int 607so_no_receive(const struct socket *so) 608{ 609 return (so->so_state & (SS_ISDISCONNECTED|SS_ISDISCONNECTING)); 610} 611 612/* 613 * Process an urgent data notification. 614 */ 615static void 616rx_urg_notify(struct toepcb *toep, struct mbuf *m) 617{ 618 struct cpl_rx_urg_notify *hdr = cplhdr(m); 619 struct socket *so = toeptoso(toep); 620 621 VALIDATE_SOCK(so); 622 623 if (!so_no_receive(so)) 624 handle_urg_ptr(so, ntohl(hdr->seq)); 625 626 m_freem(m); 627} 628 629/* 630 * Handler for RX_URG_NOTIFY CPL messages. 631 */ 632static int 633do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx) 634{ 635 struct toepcb *toep = (struct toepcb *)ctx; 636 637 rx_urg_notify(toep, m); 638 return (0); 639} 640 641static __inline int 642is_delack_mode_valid(struct toedev *dev, struct toepcb *toep) 643{ 644 return (toep->tp_ulp_mode || 645 (toep->tp_ulp_mode == ULP_MODE_TCPDDP && 646 dev->tod_ttid >= TOE_ID_CHELSIO_T3)); 647} 648 649/* 650 * Set of states for which we should return RX credits. 651 */ 652#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2) 653 654/* 655 * Called after some received data has been read. It returns RX credits 656 * to the HW for the amount of data processed. 657 */ 658void 659t3_cleanup_rbuf(struct tcpcb *tp, int copied) 660{ 661 struct toepcb *toep = tp->t_toe; 662 struct socket *so; 663 struct toedev *dev; 664 int dack_mode, must_send, read; 665 u32 thres, credits, dack = 0; 666 667 so = tp->t_inpcb->inp_socket; 668 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || 669 (tp->t_state == TCPS_FIN_WAIT_2))) { 670 if (copied) { 671 SOCKBUF_LOCK(&so->so_rcv); 672 toep->tp_copied_seq += copied; 673 SOCKBUF_UNLOCK(&so->so_rcv); 674 } 675 676 return; 677 } 678 679 INP_LOCK_ASSERT(tp->t_inpcb); 680 SOCKBUF_LOCK(&so->so_rcv); 681 if (copied) 682 toep->tp_copied_seq += copied; 683 else { 684 read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc; 685 toep->tp_copied_seq += read; 686 } 687 credits = toep->tp_copied_seq - toep->tp_rcv_wup; 688 toep->tp_enqueued_bytes = so->so_rcv.sb_cc; 689 SOCKBUF_UNLOCK(&so->so_rcv); 690 691 if (credits > so->so_rcv.sb_mbmax) { 692 printf("copied_seq=%u rcv_wup=%u credits=%u\n", 693 toep->tp_copied_seq, toep->tp_rcv_wup, credits); 694 credits = so->so_rcv.sb_mbmax; 695 } 696 697 698 /* 699 * XXX this won't accurately reflect credit return - we need 700 * to look at the difference between the amount that has been 701 * put in the recv sockbuf and what is there now 702 */ 703 704 if (__predict_false(!credits)) 705 return; 706 707 dev = toep->tp_toedev; 708 thres = TOM_TUNABLE(dev, rx_credit_thres); 709 710 if (__predict_false(thres == 0)) 711 return; 712 713 if (is_delack_mode_valid(dev, toep)) { 714 dack_mode = TOM_TUNABLE(dev, delack); 715 if (__predict_false(dack_mode != toep->tp_delack_mode)) { 716 u32 r = tp->rcv_nxt - toep->tp_delack_seq; 717 718 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp) 719 dack = F_RX_DACK_CHANGE | 720 V_RX_DACK_MODE(dack_mode); 721 } 722 } else 723 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 724 725 /* 726 * For coalescing to work effectively ensure the receive window has 727 * at least 16KB left. 728 */ 729 must_send = credits + 16384 >= tp->rcv_wnd; 730 731 if (must_send || credits >= thres) 732 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send); 733} 734 735static int 736cxgb_toe_disconnect(struct tcpcb *tp) 737{ 738 struct socket *so; 739 740 DPRINTF("cxgb_toe_disconnect\n"); 741 742 so = tp->t_inpcb->inp_socket; 743 close_conn(so); 744 return (0); 745} 746 747static int 748cxgb_toe_reset(struct tcpcb *tp) 749{ 750 struct toepcb *toep = tp->t_toe; 751 752 753 t3_send_reset(toep); 754 755 /* 756 * unhook from socket 757 */ 758 tp->t_flags &= ~TF_TOE; 759 toep->tp_tp = NULL; 760 tp->t_toe = NULL; 761 return (0); 762} 763 764static int 765cxgb_toe_send(struct tcpcb *tp) 766{ 767 struct socket *so; 768 769 DPRINTF("cxgb_toe_send\n"); 770 dump_toepcb(tp->t_toe); 771 772 so = tp->t_inpcb->inp_socket; 773 t3_push_frames(so, 1); 774 return (0); 775} 776 777static int 778cxgb_toe_rcvd(struct tcpcb *tp) 779{ 780 INP_LOCK_ASSERT(tp->t_inpcb); 781 t3_cleanup_rbuf(tp, 0); 782 783 return (0); 784} 785 786static void 787cxgb_toe_detach(struct tcpcb *tp) 788{ 789 struct toepcb *toep; 790 /* 791 * XXX how do we handle teardown in the SYN_SENT state? 792 * 793 */ 794 INP_INFO_WLOCK(&tcbinfo); 795 toep = tp->t_toe; 796 toep->tp_tp = NULL; 797 798 /* 799 * unhook from socket 800 */ 801 tp->t_flags &= ~TF_TOE; 802 tp->t_toe = NULL; 803 INP_INFO_WUNLOCK(&tcbinfo); 804} 805 806 807static struct toe_usrreqs cxgb_toe_usrreqs = { 808 .tu_disconnect = cxgb_toe_disconnect, 809 .tu_reset = cxgb_toe_reset, 810 .tu_send = cxgb_toe_send, 811 .tu_rcvd = cxgb_toe_rcvd, 812 .tu_detach = cxgb_toe_detach, 813 .tu_detach = cxgb_toe_detach, 814 .tu_syncache_event = handle_syncache_event, 815}; 816 817 818static void 819__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word, 820 uint64_t mask, uint64_t val, int no_reply) 821{ 822 struct cpl_set_tcb_field *req; 823 824 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 825 toep->tp_tid, word, mask, val); 826 827 req = mtod(m, struct cpl_set_tcb_field *); 828 m->m_pkthdr.len = m->m_len = sizeof(*req); 829 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 830 req->wr.wr_lo = 0; 831 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid)); 832 req->reply = V_NO_REPLY(no_reply); 833 req->cpu_idx = 0; 834 req->word = htons(word); 835 req->mask = htobe64(mask); 836 req->val = htobe64(val); 837 838 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 839 send_or_defer(toep, m, 0); 840} 841 842static void 843t3_set_tcb_field(struct socket *so, uint16_t word, uint64_t mask, uint64_t val) 844{ 845 struct mbuf *m; 846 struct tcpcb *tp = sototcpcb(so); 847 struct toepcb *toep = tp->t_toe; 848 849 if (toep == NULL) 850 return; 851 852 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) { 853 printf("not seting field\n"); 854 return; 855 } 856 857 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field)); 858 859 __set_tcb_field(toep, m, word, mask, val, 1); 860} 861 862/* 863 * Set one of the t_flags bits in the TCB. 864 */ 865static void 866set_tcb_tflag(struct socket *so, unsigned int bit_pos, int val) 867{ 868 t3_set_tcb_field(so, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); 869} 870 871/* 872 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting. 873 */ 874static void 875t3_set_nagle(struct socket *so) 876{ 877 struct tcpcb *tp = sototcpcb(so); 878 879 set_tcb_tflag(so, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); 880} 881 882/* 883 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting. 884 */ 885void 886t3_set_keepalive(struct socket *so, int on_off) 887{ 888 set_tcb_tflag(so, S_TF_KEEPALIVE, on_off); 889} 890 891void 892t3_set_rcv_coalesce_enable(struct socket *so, int on_off) 893{ 894 set_tcb_tflag(so, S_TF_RCV_COALESCE_ENABLE, on_off); 895} 896 897void 898t3_set_dack_mss(struct socket *so, int on_off) 899{ 900 set_tcb_tflag(so, S_TF_DACK_MSS, on_off); 901} 902 903/* 904 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting. 905 */ 906static void 907t3_set_tos(struct socket *so) 908{ 909 t3_set_tcb_field(so, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), 910 V_TCB_TOS(SO_TOS(so))); 911} 912 913 914/* 915 * In DDP mode, TP fails to schedule a timer to push RX data to the host when 916 * DDP is disabled (data is delivered to freelist). [Note that, the peer should 917 * set the PSH bit in the last segment, which would trigger delivery.] 918 * We work around the issue by setting a DDP buffer in a partial placed state, 919 * which guarantees that TP will schedule a timer. 920 */ 921#define TP_DDP_TIMER_WORKAROUND_MASK\ 922 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\ 923 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\ 924 V_TCB_RX_DDP_BUF0_LEN(3)) << 32)) 925#define TP_DDP_TIMER_WORKAROUND_VAL\ 926 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\ 927 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\ 928 32)) 929 930static void 931t3_enable_ddp(struct socket *so, int on) 932{ 933 if (on) { 934 935 t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), 936 V_TF_DDP_OFF(0)); 937 } else 938 t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, 939 V_TF_DDP_OFF(1) | 940 TP_DDP_TIMER_WORKAROUND_MASK, 941 V_TF_DDP_OFF(1) | 942 TP_DDP_TIMER_WORKAROUND_VAL); 943 944} 945 946void 947t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag_color) 948{ 949 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_TAG + buf_idx, 950 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), 951 tag_color); 952} 953 954void 955t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset, 956 unsigned int len) 957{ 958 if (buf_idx == 0) 959 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_OFFSET, 960 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 961 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 962 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) | 963 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 964 else 965 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF1_OFFSET, 966 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 967 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32), 968 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) | 969 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32)); 970} 971 972static int 973t3_set_cong_control(struct socket *so, const char *name) 974{ 975#ifdef CONGESTION_CONTROL_SUPPORTED 976 int cong_algo; 977 978 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++) 979 if (!strcmp(name, t3_cong_ops[cong_algo].name)) 980 break; 981 982 if (cong_algo >= ARRAY_SIZE(t3_cong_ops)) 983 return -EINVAL; 984#endif 985 return 0; 986} 987 988int 989t3_get_tcb(struct socket *so) 990{ 991 struct cpl_get_tcb *req; 992 struct tcpcb *tp = sototcpcb(so); 993 struct toepcb *toep = tp->t_toe; 994 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 995 996 if (!m) 997 return (ENOMEM); 998 999 INP_LOCK_ASSERT(tp->t_inpcb); 1000 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 1001 req = mtod(m, struct cpl_get_tcb *); 1002 m->m_pkthdr.len = m->m_len = sizeof(*req); 1003 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1004 req->wr.wr_lo = 0; 1005 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid)); 1006 req->cpuno = htons(toep->tp_qset); 1007 req->rsvd = 0; 1008 if (sototcpcb(so)->t_state == TCPS_SYN_SENT) 1009 mbufq_tail(&toep->out_of_order_queue, m); // defer 1010 else 1011 cxgb_ofld_send(T3C_DEV(so), m); 1012 return 0; 1013} 1014 1015static inline void 1016so_insert_tid(struct tom_data *d, struct socket *so, unsigned int tid) 1017{ 1018 struct toepcb *toep = sototoep(so); 1019 toepcb_hold(toep); 1020 1021 cxgb_insert_tid(d->cdev, d->client, toep, tid); 1022} 1023 1024/** 1025 * find_best_mtu - find the entry in the MTU table closest to an MTU 1026 * @d: TOM state 1027 * @mtu: the target MTU 1028 * 1029 * Returns the index of the value in the MTU table that is closest to but 1030 * does not exceed the target MTU. 1031 */ 1032static unsigned int 1033find_best_mtu(const struct t3c_data *d, unsigned short mtu) 1034{ 1035 int i = 0; 1036 1037 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) 1038 ++i; 1039 return (i); 1040} 1041 1042static unsigned int 1043select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu) 1044{ 1045 unsigned int idx; 1046 1047#ifdef notyet 1048 struct rtentry *dst = sotoinpcb(so)->inp_route.ro_rt; 1049#endif 1050 if (tp) { 1051 tp->t_maxseg = pmtu - 40; 1052 if (tp->t_maxseg < td->mtus[0] - 40) 1053 tp->t_maxseg = td->mtus[0] - 40; 1054 idx = find_best_mtu(td, tp->t_maxseg + 40); 1055 1056 tp->t_maxseg = td->mtus[idx] - 40; 1057 } else 1058 idx = find_best_mtu(td, pmtu); 1059 1060 return (idx); 1061} 1062 1063static inline void 1064free_atid(struct t3cdev *cdev, unsigned int tid) 1065{ 1066 struct toepcb *toep = cxgb_free_atid(cdev, tid); 1067 1068 if (toep) 1069 toepcb_release(toep); 1070} 1071 1072/* 1073 * Release resources held by an offload connection (TID, L2T entry, etc.) 1074 */ 1075static void 1076t3_release_offload_resources(struct toepcb *toep) 1077{ 1078 struct tcpcb *tp = toep->tp_tp; 1079 struct toedev *tdev = toep->tp_toedev; 1080 struct t3cdev *cdev; 1081 unsigned int tid = toep->tp_tid; 1082 1083 if (!tdev) 1084 return; 1085 1086 cdev = TOEP_T3C_DEV(toep); 1087 if (!cdev) 1088 return; 1089 1090 toep->tp_qset = 0; 1091 t3_release_ddp_resources(toep); 1092 1093#ifdef CTRL_SKB_CACHE 1094 kfree_skb(CTRL_SKB_CACHE(tp)); 1095 CTRL_SKB_CACHE(tp) = NULL; 1096#endif 1097 1098 if (toep->tp_wr_avail != toep->tp_wr_max) { 1099 purge_wr_queue(toep); 1100 reset_wr_list(toep); 1101 } 1102 1103 if (toep->tp_l2t) { 1104 l2t_release(L2DATA(cdev), toep->tp_l2t); 1105 toep->tp_l2t = NULL; 1106 } 1107 toep->tp_tp = NULL; 1108 if (tp) { 1109 INP_LOCK_ASSERT(tp->t_inpcb); 1110 tp->t_toe = NULL; 1111 tp->t_flags &= ~TF_TOE; 1112 } 1113 1114 if (toep->tp_state == TCPS_SYN_SENT) { 1115 free_atid(cdev, tid); 1116#ifdef notyet 1117 __skb_queue_purge(&tp->out_of_order_queue); 1118#endif 1119 } else { // we have TID 1120 cxgb_remove_tid(cdev, toep, tid); 1121 toepcb_release(toep); 1122 } 1123#if 0 1124 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state); 1125#endif 1126} 1127 1128static void 1129install_offload_ops(struct socket *so) 1130{ 1131 struct tcpcb *tp = sototcpcb(so); 1132 1133 KASSERT(tp->t_toe != NULL, ("toepcb not set")); 1134 1135 t3_install_socket_ops(so); 1136 tp->t_flags |= TF_TOE; 1137 tp->t_tu = &cxgb_toe_usrreqs; 1138} 1139 1140/* 1141 * Determine the receive window scaling factor given a target max 1142 * receive window. 1143 */ 1144static __inline int 1145select_rcv_wscale(int space) 1146{ 1147 int wscale = 0; 1148 1149 if (space > MAX_RCV_WND) 1150 space = MAX_RCV_WND; 1151 1152 if (tcp_do_rfc1323) 1153 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ; 1154 1155 return (wscale); 1156} 1157 1158/* 1159 * Determine the receive window size for a socket. 1160 */ 1161static unsigned long 1162select_rcv_wnd(struct toedev *dev, struct socket *so) 1163{ 1164 struct tom_data *d = TOM_DATA(dev); 1165 unsigned int wnd; 1166 unsigned int max_rcv_wnd; 1167 1168 if (tcp_do_autorcvbuf) 1169 wnd = tcp_autorcvbuf_max; 1170 else 1171 wnd = so->so_rcv.sb_hiwat; 1172 1173 1174 1175 /* XXX 1176 * For receive coalescing to work effectively we need a receive window 1177 * that can accomodate a coalesced segment. 1178 */ 1179 if (wnd < MIN_RCV_WND) 1180 wnd = MIN_RCV_WND; 1181 1182 /* PR 5138 */ 1183 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ? 1184 (uint32_t)d->rx_page_size * 23 : 1185 MAX_RCV_WND); 1186 1187 return min(wnd, max_rcv_wnd); 1188} 1189 1190/* 1191 * Assign offload parameters to some socket fields. This code is used by 1192 * both active and passive opens. 1193 */ 1194static inline void 1195init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, 1196 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep) 1197{ 1198 struct tcpcb *tp = sototcpcb(so); 1199 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev); 1200 1201 SOCK_LOCK_ASSERT(so); 1202 1203 printf("initializing offload socket\n"); 1204 /* 1205 * We either need to fix push frames to work with sbcompress 1206 * or we need to add this 1207 */ 1208 so->so_snd.sb_flags |= SB_NOCOALESCE; 1209 so->so_rcv.sb_flags |= SB_NOCOALESCE; 1210 1211 tp->t_toe = toep; 1212 toep->tp_tp = tp; 1213 toep->tp_toedev = dev; 1214 1215 toep->tp_tid = tid; 1216 toep->tp_l2t = e; 1217 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs); 1218 toep->tp_wr_unacked = 0; 1219 toep->tp_delack_mode = 0; 1220 1221 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu); 1222 /* 1223 * XXX broken 1224 * 1225 */ 1226 tp->rcv_wnd = select_rcv_wnd(dev, so); 1227 1228 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) && 1229 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 1230 toep->tp_qset_idx = 0; 1231 1232 reset_wr_list(toep); 1233 DPRINTF("initialization done\n"); 1234} 1235 1236/* 1237 * The next two functions calculate the option 0 value for a socket. 1238 */ 1239static inline unsigned int 1240calc_opt0h(struct socket *so, int mtu_idx) 1241{ 1242 struct tcpcb *tp = sototcpcb(so); 1243 int wscale = select_rcv_wscale(tp->rcv_wnd); 1244 1245 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) | 1246 V_KEEP_ALIVE((so->so_options & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS | 1247 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx); 1248} 1249 1250static inline unsigned int 1251calc_opt0l(struct socket *so, int ulp_mode) 1252{ 1253 struct tcpcb *tp = sototcpcb(so); 1254 unsigned int val; 1255 1256 val = V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) | 1257 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ)); 1258 1259 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", SO_TOS(so), tp->rcv_wnd, val); 1260 return (val); 1261} 1262 1263static inline unsigned int 1264calc_opt2(const struct socket *so, struct toedev *dev) 1265{ 1266 int flv_valid; 1267 1268 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1); 1269 1270 return (V_FLAVORS_VALID(flv_valid) | 1271 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0)); 1272} 1273 1274#if DEBUG_WR > 1 1275static int 1276count_pending_wrs(const struct toepcb *toep) 1277{ 1278 const struct mbuf *m; 1279 int n = 0; 1280 1281 wr_queue_walk(toep, m) 1282 n += m->m_pkthdr.csum_data; 1283 return (n); 1284} 1285#endif 1286 1287#if 0 1288(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1) 1289#endif 1290 1291static void 1292mk_act_open_req(struct socket *so, struct mbuf *m, 1293 unsigned int atid, const struct l2t_entry *e) 1294{ 1295 struct cpl_act_open_req *req; 1296 struct inpcb *inp = sotoinpcb(so); 1297 struct tcpcb *tp = intotcpcb(inp); 1298 struct toepcb *toep = tp->t_toe; 1299 struct toedev *tdev = TOE_DEV(so); 1300 1301 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep)); 1302 1303 req = mtod(m, struct cpl_act_open_req *); 1304 m->m_pkthdr.len = m->m_len = sizeof(*req); 1305 1306 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1307 req->wr.wr_lo = 0; 1308 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 1309 req->local_port = inp->inp_lport; 1310 req->peer_port = inp->inp_fport; 1311 memcpy(&req->local_ip, &inp->inp_laddr, 4); 1312 memcpy(&req->peer_ip, &inp->inp_faddr, 4); 1313 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) | 1314 V_TX_CHANNEL(e->smt_idx)); 1315 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode)); 1316 req->params = 0; 1317 req->opt2 = htonl(calc_opt2(so, tdev)); 1318} 1319 1320 1321/* 1322 * Convert an ACT_OPEN_RPL status to an errno. 1323 */ 1324static int 1325act_open_rpl_status_to_errno(int status) 1326{ 1327 switch (status) { 1328 case CPL_ERR_CONN_RESET: 1329 return (ECONNREFUSED); 1330 case CPL_ERR_ARP_MISS: 1331 return (EHOSTUNREACH); 1332 case CPL_ERR_CONN_TIMEDOUT: 1333 return (ETIMEDOUT); 1334 case CPL_ERR_TCAM_FULL: 1335 return (ENOMEM); 1336 case CPL_ERR_CONN_EXIST: 1337 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); 1338 return (EADDRINUSE); 1339 default: 1340 return (EIO); 1341 } 1342} 1343 1344static void 1345fail_act_open(struct toepcb *toep, int errno) 1346{ 1347 struct tcpcb *tp = toep->tp_tp; 1348 1349 t3_release_offload_resources(toep); 1350 if (tp) { 1351 INP_LOCK_ASSERT(tp->t_inpcb); 1352 tcp_drop(tp, errno); 1353 } 1354 1355#ifdef notyet 1356 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1357#endif 1358} 1359 1360/* 1361 * Handle active open failures. 1362 */ 1363static void 1364active_open_failed(struct toepcb *toep, struct mbuf *m) 1365{ 1366 struct cpl_act_open_rpl *rpl = cplhdr(m); 1367 struct inpcb *inp; 1368 1369 INP_INFO_WLOCK(&tcbinfo); 1370 if (toep->tp_tp == NULL) 1371 goto done; 1372 1373 inp = toep->tp_tp->t_inpcb; 1374 INP_LOCK(inp); 1375 1376/* 1377 * Don't handle connection retry for now 1378 */ 1379#ifdef notyet 1380 struct inet_connection_sock *icsk = inet_csk(sk); 1381 1382 if (rpl->status == CPL_ERR_CONN_EXIST && 1383 icsk->icsk_retransmit_timer.function != act_open_retry_timer) { 1384 icsk->icsk_retransmit_timer.function = act_open_retry_timer; 1385 sk_reset_timer(so, &icsk->icsk_retransmit_timer, 1386 jiffies + HZ / 2); 1387 } else 1388#endif 1389 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status)); 1390 INP_UNLOCK(inp); 1391done: 1392 INP_INFO_WUNLOCK(&tcbinfo); 1393 1394 m_free(m); 1395} 1396 1397/* 1398 * Return whether a failed active open has allocated a TID 1399 */ 1400static inline int 1401act_open_has_tid(int status) 1402{ 1403 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && 1404 status != CPL_ERR_ARP_MISS; 1405} 1406 1407/* 1408 * Process an ACT_OPEN_RPL CPL message. 1409 */ 1410static int 1411do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1412{ 1413 struct toepcb *toep = (struct toepcb *)ctx; 1414 struct cpl_act_open_rpl *rpl = cplhdr(m); 1415 1416 if (cdev->type != T3A && act_open_has_tid(rpl->status)) 1417 cxgb_queue_tid_release(cdev, GET_TID(rpl)); 1418 1419 active_open_failed(toep, m); 1420 return (0); 1421} 1422 1423/* 1424 * Handle an ARP failure for an active open. XXX purge ofo queue 1425 * 1426 * XXX badly broken for crossed SYNs as the ATID is no longer valid. 1427 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should 1428 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't 1429 * free the atid. Hmm. 1430 */ 1431#ifdef notyet 1432static void 1433act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) 1434{ 1435 struct toepcb *toep = m_get_toep(m); 1436 struct tcpcb *tp = toep->tp_tp; 1437 struct inpcb *inp = tp->t_inpcb; 1438 struct socket *so = toeptoso(toep); 1439 1440 INP_LOCK(inp); 1441 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 1442 fail_act_open(so, EHOSTUNREACH); 1443 printf("freeing %p\n", m); 1444 1445 m_free(m); 1446 } 1447 INP_UNLOCK(inp); 1448} 1449#endif 1450/* 1451 * Send an active open request. 1452 */ 1453int 1454t3_connect(struct toedev *tdev, struct socket *so, 1455 struct rtentry *rt, struct sockaddr *nam) 1456{ 1457 struct mbuf *m; 1458 struct l2t_entry *e; 1459 struct tom_data *d = TOM_DATA(tdev); 1460 struct inpcb *inp = sotoinpcb(so); 1461 struct tcpcb *tp = intotcpcb(inp); 1462 struct toepcb *toep; /* allocated by init_offload_socket */ 1463 1464 int atid; 1465 1466 toep = toepcb_alloc(); 1467 if (toep == NULL) 1468 goto out_err; 1469 1470 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0) 1471 goto out_err; 1472 1473 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam); 1474 if (!e) 1475 goto free_tid; 1476 1477 INP_LOCK_ASSERT(inp); 1478 m = m_gethdr(MT_DATA, M_WAITOK); 1479 1480#if 0 1481 m->m_toe.mt_toepcb = tp->t_toe; 1482 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure); 1483#endif 1484 SOCK_LOCK(so); 1485 1486 init_offload_socket(so, tdev, atid, e, rt, toep); 1487 1488 install_offload_ops(so); 1489 1490 mk_act_open_req(so, m, atid, e); 1491 SOCK_UNLOCK(so); 1492 1493 soisconnecting(so); 1494 toep = tp->t_toe; 1495 m_set_toep(m, tp->t_toe); 1496 1497 toep->tp_state = TCPS_SYN_SENT; 1498 l2t_send(d->cdev, (struct mbuf *)m, e); 1499 1500 if (toep->tp_ulp_mode) 1501 t3_enable_ddp(so, 0); 1502 return (0); 1503 1504free_tid: 1505 printf("failing connect - free atid\n"); 1506 1507 free_atid(d->cdev, atid); 1508out_err: 1509 printf("return ENOMEM\n"); 1510 return (ENOMEM); 1511} 1512 1513/* 1514 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do 1515 * not send multiple ABORT_REQs for the same connection and also that we do 1516 * not try to send a message after the connection has closed. Returns 1 if 1517 * an ABORT_REQ wasn't generated after all, 0 otherwise. 1518 */ 1519static void 1520t3_send_reset(struct toepcb *toep) 1521{ 1522 1523 struct cpl_abort_req *req; 1524 unsigned int tid = toep->tp_tid; 1525 int mode = CPL_ABORT_SEND_RST; 1526 struct tcpcb *tp = toep->tp_tp; 1527 struct toedev *tdev = toep->tp_toedev; 1528 struct socket *so = NULL; 1529 struct mbuf *m; 1530 1531 if (tp) { 1532 INP_LOCK_ASSERT(tp->t_inpcb); 1533 so = toeptoso(toep); 1534 } 1535 1536 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) || 1537 tdev == NULL)) 1538 return; 1539 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN); 1540 1541 /* Purge the send queue so we don't send anything after an abort. */ 1542 if (so) 1543 sbflush(&so->so_snd); 1544 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev)) 1545 mode |= CPL_ABORT_POST_CLOSE_REQ; 1546 1547 m = m_gethdr_nofail(sizeof(*req)); 1548 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep)); 1549 set_arp_failure_handler(m, abort_arp_failure); 1550 1551 req = mtod(m, struct cpl_abort_req *); 1552 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); 1553 req->wr.wr_lo = htonl(V_WR_TID(tid)); 1554 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); 1555 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0; 1556 req->rsvd1 = !(toep->tp_flags & TP_DATASENT); 1557 req->cmd = mode; 1558 if (tp && (tp->t_state == TCPS_SYN_SENT)) 1559 mbufq_tail(&toep->out_of_order_queue, m); // defer 1560 else 1561 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); 1562} 1563 1564static int 1565t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) 1566{ 1567 struct inpcb *inp; 1568 int error, optval; 1569 1570 if (sopt->sopt_name == IP_OPTIONS) 1571 return (ENOPROTOOPT); 1572 1573 if (sopt->sopt_name != IP_TOS) 1574 return (EOPNOTSUPP); 1575 1576 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 1577 1578 if (error) 1579 return (error); 1580 1581 if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread)) 1582 return (EPERM); 1583 1584 inp = sotoinpcb(so); 1585 inp->inp_ip_tos = optval; 1586 1587 t3_set_tos(so); 1588 1589 return (0); 1590} 1591 1592static int 1593t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1594{ 1595 int err = 0; 1596 size_t copied; 1597 1598 if (sopt->sopt_name != TCP_CONGESTION && 1599 sopt->sopt_name != TCP_NODELAY) 1600 return (EOPNOTSUPP); 1601 1602 if (sopt->sopt_name == TCP_CONGESTION) { 1603 char name[TCP_CA_NAME_MAX]; 1604 int optlen = sopt->sopt_valsize; 1605 struct tcpcb *tp; 1606 1607 if (optlen < 1) 1608 return (EINVAL); 1609 1610 err = copyinstr(sopt->sopt_val, name, 1611 min(TCP_CA_NAME_MAX - 1, optlen), &copied); 1612 if (err) 1613 return (err); 1614 if (copied < 1) 1615 return (EINVAL); 1616 1617 tp = sototcpcb(so); 1618 /* 1619 * XXX I need to revisit this 1620 */ 1621 if ((err = t3_set_cong_control(so, name)) == 0) { 1622#ifdef CONGESTION_CONTROL_SUPPORTED 1623 tp->t_cong_control = strdup(name, M_CXGB); 1624#endif 1625 } else 1626 return (err); 1627 } else { 1628 int optval, oldval; 1629 struct inpcb *inp; 1630 struct tcpcb *tp; 1631 1632 err = sooptcopyin(sopt, &optval, sizeof optval, 1633 sizeof optval); 1634 1635 if (err) 1636 return (err); 1637 1638 inp = sotoinpcb(so); 1639 tp = intotcpcb(inp); 1640 1641 INP_LOCK(inp); 1642 1643 oldval = tp->t_flags; 1644 if (optval) 1645 tp->t_flags |= TF_NODELAY; 1646 else 1647 tp->t_flags &= ~TF_NODELAY; 1648 INP_UNLOCK(inp); 1649 1650 if (oldval != tp->t_flags) 1651 t3_set_nagle(so); 1652 1653 } 1654 1655 return (0); 1656} 1657 1658static int 1659t3_ctloutput(struct socket *so, struct sockopt *sopt) 1660{ 1661 int err; 1662 1663 if (sopt->sopt_level != IPPROTO_TCP) 1664 err = t3_ip_ctloutput(so, sopt); 1665 else 1666 err = t3_tcp_ctloutput(so, sopt); 1667 1668 if (err != EOPNOTSUPP) 1669 return (err); 1670 1671 return (tcp_ctloutput(so, sopt)); 1672} 1673 1674/* 1675 * Returns true if we need to explicitly request RST when we receive new data 1676 * on an RX-closed connection. 1677 */ 1678static inline int 1679need_rst_on_excess_rx(const struct toepcb *toep) 1680{ 1681 return (1); 1682} 1683 1684/* 1685 * Handles Rx data that arrives in a state where the socket isn't accepting 1686 * new data. 1687 */ 1688static void 1689handle_excess_rx(struct toepcb *toep, struct mbuf *m) 1690{ 1691 1692 if (need_rst_on_excess_rx(toep) && !(toep->tp_flags & TP_ABORT_SHUTDOWN)) 1693 t3_send_reset(toep); 1694 m_freem(m); 1695} 1696 1697/* 1698 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE) 1699 * by getting the DDP offset from the TCB. 1700 */ 1701static void 1702tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) 1703{ 1704 struct ddp_state *q = &toep->tp_ddp_state; 1705 struct ddp_buf_state *bsp; 1706 struct cpl_get_tcb_rpl *hdr; 1707 unsigned int ddp_offset; 1708 struct socket *so; 1709 struct tcpcb *tp; 1710 1711 uint64_t t; 1712 __be64 *tcb; 1713 1714 so = toeptoso(toep); 1715 tp = toep->tp_tp; 1716 1717 INP_LOCK_ASSERT(tp->t_inpcb); 1718 SOCKBUF_LOCK(&so->so_rcv); 1719 1720 /* Note that we only accout for CPL_GET_TCB issued by the DDP code. We 1721 * really need a cookie in order to dispatch the RPLs. 1722 */ 1723 q->get_tcb_count--; 1724 1725 /* It is a possible that a previous CPL already invalidated UBUF DDP 1726 * and moved the cur_buf idx and hence no further processing of this 1727 * skb is required. However, the app might be sleeping on 1728 * !q->get_tcb_count and we need to wake it up. 1729 */ 1730 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) { 1731 struct socket *so = toeptoso(toep); 1732 1733 m_freem(m); 1734 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 1735 sorwakeup_locked(so); 1736 else 1737 SOCKBUF_UNLOCK(&so->so_rcv); 1738 return; 1739 } 1740 1741 bsp = &q->buf_state[q->cur_buf]; 1742 hdr = cplhdr(m); 1743 tcb = (__be64 *)(hdr + 1); 1744 if (q->cur_buf == 0) { 1745 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]); 1746 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET); 1747 } else { 1748 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]); 1749 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET; 1750 } 1751 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET; 1752 m->m_cur_offset = bsp->cur_offset; 1753 bsp->cur_offset = ddp_offset; 1754 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset; 1755 1756 CTR5(KTR_TOM, 1757 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u", 1758 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset); 1759 KASSERT(ddp_offset >= m->m_cur_offset, ("ddp_offset=%u less than cur_offset=%u", 1760 ddp_offset, m->m_cur_offset)); 1761 1762#ifdef T3_TRACE 1763 T3_TRACE3(TIDTB(so), 1764 "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u ddp_offset %u", 1765 tp->rcv_nxt, q->cur_buf, ddp_offset); 1766#endif 1767 1768#if 0 1769{ 1770 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx; 1771 1772 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]); 1773 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS; 1774 1775 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]); 1776 rcv_nxt = t >> S_TCB_RCV_NXT; 1777 rcv_nxt &= M_TCB_RCV_NXT; 1778 1779 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]); 1780 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET); 1781 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET; 1782 1783 T3_TRACE2(TIDTB(sk), 1784 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x", 1785 ddp_flags, rcv_nxt - rx_hdr_offset); 1786 T3_TRACE4(TB(q), 1787 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u", 1788 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf); 1789 T3_TRACE3(TB(q), 1790 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u", 1791 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset); 1792 T3_TRACE2(TB(q), 1793 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x", 1794 q->buf_state[0].flags, q->buf_state[1].flags); 1795 1796} 1797#endif 1798 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) { 1799 handle_excess_rx(toep, m); 1800 return; 1801 } 1802 1803#ifdef T3_TRACE 1804 if ((int)m->m_pkthdr.len < 0) { 1805 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len"); 1806 } 1807#endif 1808 if (bsp->flags & DDP_BF_NOCOPY) { 1809#ifdef T3_TRACE 1810 T3_TRACE0(TB(q), 1811 "tcb_rpl_as_ddp_complete: CANCEL UBUF"); 1812 1813 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) { 1814 printk("!cancel_ubuf"); 1815 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf"); 1816 } 1817#endif 1818 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1; 1819 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA); 1820 q->cur_buf ^= 1; 1821 } else if (bsp->flags & DDP_BF_NOFLIP) { 1822 1823 m->m_ddp_flags = 1; /* always a kernel buffer */ 1824 1825 /* now HW buffer carries a user buffer */ 1826 bsp->flags &= ~DDP_BF_NOFLIP; 1827 bsp->flags |= DDP_BF_NOCOPY; 1828 1829 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate 1830 * any new data in which case we're done. If in addition the 1831 * offset is 0, then there wasn't a completion for the kbuf 1832 * and we need to decrement the posted count. 1833 */ 1834 if (m->m_pkthdr.len == 0) { 1835 if (ddp_offset == 0) { 1836 q->kbuf_posted--; 1837 bsp->flags |= DDP_BF_NODATA; 1838 } 1839 SOCKBUF_UNLOCK(&so->so_rcv); 1840 1841 m_free(m); 1842 return; 1843 } 1844 } else { 1845 SOCKBUF_UNLOCK(&so->so_rcv); 1846 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP, 1847 * but it got here way late and nobody cares anymore. 1848 */ 1849 m_free(m); 1850 return; 1851 } 1852 1853 m->m_ddp_gl = (unsigned char *)bsp->gl; 1854 m->m_flags |= M_DDP; 1855 m->m_seq = tp->rcv_nxt; 1856 tp->rcv_nxt += m->m_pkthdr.len; 1857 tp->t_rcvtime = ticks; 1858#ifdef T3_TRACE 1859 T3_TRACE3(TB(q), 1860 "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u", 1861 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1862#endif 1863 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u", 1864 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1865 if (m->m_pkthdr.len == 0) 1866 q->user_ddp_pending = 0; 1867 else 1868 SBAPPEND(&so->so_rcv, m); 1869 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 1870 sorwakeup_locked(so); 1871 else 1872 SOCKBUF_UNLOCK(&so->so_rcv); 1873} 1874 1875/* 1876 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code, 1877 * in that case they are similar to DDP completions. 1878 */ 1879static int 1880do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1881{ 1882 struct toepcb *toep = (struct toepcb *)ctx; 1883 1884 /* OK if socket doesn't exist */ 1885 if (toep == NULL) { 1886 printf("null toep in do_get_tcb_rpl\n"); 1887 return (CPL_RET_BUF_DONE); 1888 } 1889 1890 INP_LOCK(toep->tp_tp->t_inpcb); 1891 tcb_rpl_as_ddp_complete(toep, m); 1892 INP_UNLOCK(toep->tp_tp->t_inpcb); 1893 1894 return (0); 1895} 1896 1897static void 1898handle_ddp_data(struct toepcb *toep, struct mbuf *m) 1899{ 1900 struct tcpcb *tp = toep->tp_tp; 1901 struct socket *so = toeptoso(toep); 1902 struct ddp_state *q; 1903 struct ddp_buf_state *bsp; 1904 struct cpl_rx_data *hdr = cplhdr(m); 1905 unsigned int rcv_nxt = ntohl(hdr->seq); 1906 1907 if (tp->rcv_nxt == rcv_nxt) 1908 return; 1909 1910 INP_LOCK_ASSERT(tp->t_inpcb); 1911 SOCKBUF_LOCK(&so->so_rcv); 1912 q = &toep->tp_ddp_state; 1913 bsp = &q->buf_state[q->cur_buf]; 1914 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x", 1915 rcv_nxt, tp->rcv_nxt)); 1916 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 1917 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 1918 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d", 1919 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len); 1920 1921#ifdef T3_TRACE 1922 if ((int)m->m_pkthdr.len < 0) { 1923 t3_ddp_error(so, "handle_ddp_data: neg len"); 1924 } 1925#endif 1926 1927 m->m_ddp_gl = (unsigned char *)bsp->gl; 1928 m->m_flags |= M_DDP; 1929 m->m_cur_offset = bsp->cur_offset; 1930 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 1931 if (bsp->flags & DDP_BF_NOCOPY) 1932 bsp->flags &= ~DDP_BF_NOCOPY; 1933 1934 m->m_seq = tp->rcv_nxt; 1935 tp->rcv_nxt = rcv_nxt; 1936 bsp->cur_offset += m->m_pkthdr.len; 1937 if (!(bsp->flags & DDP_BF_NOFLIP)) 1938 q->cur_buf ^= 1; 1939 /* 1940 * For now, don't re-enable DDP after a connection fell out of DDP 1941 * mode. 1942 */ 1943 q->ubuf_ddp_ready = 0; 1944 SOCKBUF_UNLOCK(&so->so_rcv); 1945} 1946 1947/* 1948 * Process new data received for a connection. 1949 */ 1950static void 1951new_rx_data(struct toepcb *toep, struct mbuf *m) 1952{ 1953 struct cpl_rx_data *hdr = cplhdr(m); 1954 struct tcpcb *tp = toep->tp_tp; 1955 struct socket *so = toeptoso(toep); 1956 int len = be16toh(hdr->len); 1957 1958 INP_LOCK(tp->t_inpcb); 1959 1960 if (__predict_false(so_no_receive(so))) { 1961 handle_excess_rx(toep, m); 1962 INP_UNLOCK(tp->t_inpcb); 1963 TRACE_EXIT; 1964 return; 1965 } 1966 1967 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) 1968 handle_ddp_data(toep, m); 1969 1970 m->m_seq = ntohl(hdr->seq); 1971 m->m_ulp_mode = 0; /* for iSCSI */ 1972 1973#if VALIDATE_SEQ 1974 if (__predict_false(m->m_seq != tp->rcv_nxt)) { 1975 log(LOG_ERR, 1976 "%s: TID %u: Bad sequence number %u, expected %u\n", 1977 TOE_DEV(toeptoso(toep))->name, toep->tp_tid, m->m_seq, 1978 tp->rcv_nxt); 1979 m_freem(m); 1980 INP_UNLOCK(tp->t_inpcb); 1981 return; 1982 } 1983#endif 1984 m_adj(m, sizeof(*hdr)); 1985 1986#ifdef URGENT_DATA_SUPPORTED 1987 /* 1988 * We don't handle urgent data yet 1989 */ 1990 if (__predict_false(hdr->urg)) 1991 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg)); 1992 if (__predict_false(tp->urg_data == TCP_URG_NOTYET && 1993 tp->urg_seq - tp->rcv_nxt < skb->len)) 1994 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq - 1995 tp->rcv_nxt]; 1996#endif 1997 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) { 1998 toep->tp_delack_mode = hdr->dack_mode; 1999 toep->tp_delack_seq = tp->rcv_nxt; 2000 } 2001 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d", 2002 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes); 2003 2004 if (len < m->m_pkthdr.len) 2005 m->m_pkthdr.len = m->m_len = len; 2006 2007 tp->rcv_nxt += m->m_pkthdr.len; 2008 tp->t_rcvtime = ticks; 2009 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2010#ifdef T3_TRACE 2011 T3_TRACE2(TIDTB(sk), 2012 "new_rx_data: seq 0x%x len %u", 2013 m->m_seq, m->m_pkthdr.len); 2014#endif 2015 INP_UNLOCK(tp->t_inpcb); 2016 SOCKBUF_LOCK(&so->so_rcv); 2017 if (sb_notify(&so->so_rcv)) 2018 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len); 2019 2020 SBAPPEND(&so->so_rcv, m); 2021 2022#ifdef notyet 2023 /* 2024 * We're giving too many credits to the card - but disable this check so we can keep on moving :-| 2025 * 2026 */ 2027 KASSERT(so->so_rcv.sb_cc < (so->so_rcv.sb_mbmax << 1), 2028 2029 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d", 2030 so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax)); 2031#endif 2032 2033 2034 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d", 2035 so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt); 2036 2037 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2038 sorwakeup_locked(so); 2039 else 2040 SOCKBUF_UNLOCK(&so->so_rcv); 2041} 2042 2043/* 2044 * Handler for RX_DATA CPL messages. 2045 */ 2046static int 2047do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2048{ 2049 struct toepcb *toep = (struct toepcb *)ctx; 2050 2051 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len); 2052 2053 new_rx_data(toep, m); 2054 2055 return (0); 2056} 2057 2058static void 2059new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) 2060{ 2061 struct tcpcb *tp; 2062 struct ddp_state *q; 2063 struct ddp_buf_state *bsp; 2064 struct cpl_rx_data_ddp *hdr; 2065 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx; 2066 struct socket *so = toeptoso(toep); 2067 int nomoredata = 0; 2068 unsigned int delack_mode; 2069 2070 tp = sototcpcb(so); 2071 2072 INP_LOCK(tp->t_inpcb); 2073 if (__predict_false(so_no_receive(so))) { 2074 2075 handle_excess_rx(toep, m); 2076 INP_UNLOCK(tp->t_inpcb); 2077 return; 2078 } 2079 2080 q = &toep->tp_ddp_state; 2081 hdr = cplhdr(m); 2082 ddp_report = ntohl(hdr->u.ddp_report); 2083 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2084 bsp = &q->buf_state[buf_idx]; 2085 2086#ifdef T3_TRACE 2087 T3_TRACE5(TIDTB(sk), 2088 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2089 "hdr seq 0x%x len %u offset %u", 2090 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2091 ntohs(hdr->len), G_DDP_OFFSET(ddp_report)); 2092 T3_TRACE1(TIDTB(sk), 2093 "new_rx_data_ddp: ddp_report 0x%x", 2094 ddp_report); 2095#endif 2096 CTR4(KTR_TOM, 2097 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2098 "hdr seq 0x%x len %u", 2099 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2100 ntohs(hdr->len)); 2101 CTR3(KTR_TOM, 2102 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d", 2103 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx); 2104 2105 ddp_len = ntohs(hdr->len); 2106 rcv_nxt = ntohl(hdr->seq) + ddp_len; 2107 2108 delack_mode = G_DDP_DACK_MODE(ddp_report); 2109 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2110 toep->tp_delack_mode = delack_mode; 2111 toep->tp_delack_seq = tp->rcv_nxt; 2112 } 2113 2114 m->m_seq = tp->rcv_nxt; 2115 tp->rcv_nxt = rcv_nxt; 2116 2117 tp->t_rcvtime = ticks; 2118 /* 2119 * Store the length in m->m_len. We are changing the meaning of 2120 * m->m_len here, we need to be very careful that nothing from now on 2121 * interprets ->len of this packet the usual way. 2122 */ 2123 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq; 2124 INP_UNLOCK(tp->t_inpcb); 2125 CTR3(KTR_TOM, 2126 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ", 2127 m->m_len, rcv_nxt, m->m_seq); 2128 /* 2129 * Figure out where the new data was placed in the buffer and store it 2130 * in when. Assumes the buffer offset starts at 0, consumer needs to 2131 * account for page pod's pg_offset. 2132 */ 2133 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; 2134 m->m_cur_offset = end_offset - m->m_pkthdr.len; 2135 2136 SOCKBUF_LOCK(&so->so_rcv); 2137 m->m_ddp_gl = (unsigned char *)bsp->gl; 2138 m->m_flags |= M_DDP; 2139 bsp->cur_offset = end_offset; 2140 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2141 2142 /* 2143 * Length is only meaningful for kbuf 2144 */ 2145 if (!(bsp->flags & DDP_BF_NOCOPY)) 2146 KASSERT(m->m_len <= bsp->gl->dgl_length, 2147 ("length received exceeds ddp pages: len=%d dgl_length=%d", 2148 m->m_len, bsp->gl->dgl_length)); 2149 2150 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2151 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next)); 2152 2153 2154 /* 2155 * Bit 0 of flags stores whether the DDP buffer is completed. 2156 * Note that other parts of the code depend on this being in bit 0. 2157 */ 2158 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) { 2159 panic("spurious ddp completion"); 2160 } else { 2161 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); 2162 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) 2163 q->cur_buf ^= 1; /* flip buffers */ 2164 } 2165 2166 if (bsp->flags & DDP_BF_NOCOPY) { 2167 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY); 2168 bsp->flags &= ~DDP_BF_NOCOPY; 2169 } 2170 2171 if (ddp_report & F_DDP_PSH) 2172 m->m_ddp_flags |= DDP_BF_PSH; 2173 if (nomoredata) 2174 m->m_ddp_flags |= DDP_BF_NODATA; 2175 2176#ifdef notyet 2177 skb_reset_transport_header(skb); 2178 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */ 2179#endif 2180 SBAPPEND(&so->so_rcv, m); 2181 2182 if ((so->so_state & SS_NOFDREF) == 0) 2183 sorwakeup_locked(so); 2184 else 2185 SOCKBUF_UNLOCK(&so->so_rcv); 2186} 2187 2188#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ 2189 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ 2190 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ 2191 F_DDP_INVALID_PPOD) 2192 2193/* 2194 * Handler for RX_DATA_DDP CPL messages. 2195 */ 2196static int 2197do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2198{ 2199 struct toepcb *toep = ctx; 2200 const struct cpl_rx_data_ddp *hdr = cplhdr(m); 2201 2202 VALIDATE_SOCK(so); 2203 2204 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) { 2205 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n", 2206 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status))); 2207 return (CPL_RET_BUF_DONE); 2208 } 2209#if 0 2210 skb->h.th = tcphdr_skb->h.th; 2211#endif 2212 new_rx_data_ddp(toep, m); 2213 return (0); 2214} 2215 2216static void 2217process_ddp_complete(struct toepcb *toep, struct mbuf *m) 2218{ 2219 struct tcpcb *tp = toep->tp_tp; 2220 struct socket *so = toeptoso(toep); 2221 struct ddp_state *q; 2222 struct ddp_buf_state *bsp; 2223 struct cpl_rx_ddp_complete *hdr; 2224 unsigned int ddp_report, buf_idx, when, delack_mode; 2225 int nomoredata = 0; 2226 2227 INP_LOCK(tp->t_inpcb); 2228 if (__predict_false(so_no_receive(so))) { 2229 struct inpcb *inp = sotoinpcb(so); 2230 2231 handle_excess_rx(toep, m); 2232 INP_UNLOCK(inp); 2233 return; 2234 } 2235 q = &toep->tp_ddp_state; 2236 hdr = cplhdr(m); 2237 ddp_report = ntohl(hdr->ddp_report); 2238 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2239 m->m_pkthdr.csum_data = tp->rcv_nxt; 2240 2241 2242 SOCKBUF_LOCK(&so->so_rcv); 2243 bsp = &q->buf_state[buf_idx]; 2244 when = bsp->cur_offset; 2245 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; 2246 tp->rcv_nxt += m->m_len; 2247 tp->t_rcvtime = ticks; 2248 2249 delack_mode = G_DDP_DACK_MODE(ddp_report); 2250 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2251 toep->tp_delack_mode = delack_mode; 2252 toep->tp_delack_seq = tp->rcv_nxt; 2253 } 2254#ifdef notyet 2255 skb_reset_transport_header(skb); 2256 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2257#endif 2258 INP_UNLOCK(tp->t_inpcb); 2259 2260 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2261 CTR5(KTR_TOM, 2262 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2263 "ddp_report 0x%x offset %u, len %u", 2264 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2265 G_DDP_OFFSET(ddp_report), m->m_len); 2266 2267 bsp->cur_offset += m->m_len; 2268 2269 if (!(bsp->flags & DDP_BF_NOFLIP)) { 2270 q->cur_buf ^= 1; /* flip buffers */ 2271 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length) 2272 nomoredata=1; 2273 } 2274 2275 CTR4(KTR_TOM, 2276 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2277 "ddp_report %u offset %u", 2278 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2279 G_DDP_OFFSET(ddp_report)); 2280 2281 m->m_ddp_gl = (unsigned char *)bsp->gl; 2282 m->m_flags |= M_DDP; 2283 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; 2284 if (bsp->flags & DDP_BF_NOCOPY) 2285 bsp->flags &= ~DDP_BF_NOCOPY; 2286 if (nomoredata) 2287 m->m_ddp_flags |= DDP_BF_NODATA; 2288 2289 2290 SBAPPEND(&so->so_rcv, m); 2291 2292 if ((so->so_state & SS_NOFDREF) == 0) 2293 sorwakeup_locked(so); 2294 else 2295 SOCKBUF_UNLOCK(&so->so_rcv); 2296} 2297 2298/* 2299 * Handler for RX_DDP_COMPLETE CPL messages. 2300 */ 2301static int 2302do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2303{ 2304 struct toepcb *toep = ctx; 2305 2306 VALIDATE_SOCK(so); 2307#if 0 2308 skb->h.th = tcphdr_skb->h.th; 2309#endif 2310 process_ddp_complete(toep, m); 2311 return (0); 2312} 2313 2314/* 2315 * Move a socket to TIME_WAIT state. We need to make some adjustments to the 2316 * socket state before calling tcp_time_wait to comply with its expectations. 2317 */ 2318static void 2319enter_timewait(struct socket *so) 2320{ 2321 struct tcpcb *tp = sototcpcb(so); 2322 2323 INP_LOCK_ASSERT(tp->t_inpcb); 2324 /* 2325 * Bump rcv_nxt for the peer FIN. We don't do this at the time we 2326 * process peer_close because we don't want to carry the peer FIN in 2327 * the socket's receive queue and if we increment rcv_nxt without 2328 * having the FIN in the receive queue we'll confuse facilities such 2329 * as SIOCINQ. 2330 */ 2331 tp->rcv_nxt++; 2332 2333 tp->ts_recent_age = 0; /* defeat recycling */ 2334 tp->t_srtt = 0; /* defeat tcp_update_metrics */ 2335 tcp_twstart(tp); 2336} 2337 2338/* 2339 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This 2340 * function deals with the data that may be reported along with the FIN. 2341 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to 2342 * perform normal FIN-related processing. In the latter case 1 indicates that 2343 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the 2344 * skb can be freed. 2345 */ 2346static int 2347handle_peer_close_data(struct socket *so, struct mbuf *m) 2348{ 2349 struct tcpcb *tp = sototcpcb(so); 2350 struct toepcb *toep = tp->t_toe; 2351 struct ddp_state *q; 2352 struct ddp_buf_state *bsp; 2353 struct cpl_peer_close *req = cplhdr(m); 2354 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */ 2355 2356 if (tp->rcv_nxt == rcv_nxt) /* no data */ 2357 return (0); 2358 2359 if (__predict_false(so_no_receive(so))) { 2360 handle_excess_rx(toep, m); 2361 2362 /* 2363 * Although we discard the data we want to process the FIN so 2364 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP + 2365 * PEER_CLOSE without data. In particular this PEER_CLOSE 2366 * may be what will close the connection. We return 1 because 2367 * handle_excess_rx() already freed the packet. 2368 */ 2369 return (1); 2370 } 2371 2372 INP_LOCK_ASSERT(tp->t_inpcb); 2373 q = &toep->tp_ddp_state; 2374 SOCKBUF_LOCK(&so->so_rcv); 2375 bsp = &q->buf_state[q->cur_buf]; 2376 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2377 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2378 m->m_ddp_gl = (unsigned char *)bsp->gl; 2379 m->m_flags |= M_DDP; 2380 m->m_cur_offset = bsp->cur_offset; 2381 m->m_ddp_flags = 2382 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2383 m->m_seq = tp->rcv_nxt; 2384 tp->rcv_nxt = rcv_nxt; 2385 bsp->cur_offset += m->m_pkthdr.len; 2386 if (!(bsp->flags & DDP_BF_NOFLIP)) 2387 q->cur_buf ^= 1; 2388#ifdef notyet 2389 skb_reset_transport_header(skb); 2390 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2391#endif 2392 tp->t_rcvtime = ticks; 2393 SBAPPEND(&so->so_rcv, m); 2394 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2395 sorwakeup_locked(so); 2396 else 2397 SOCKBUF_UNLOCK(&so->so_rcv); 2398 return (1); 2399} 2400 2401/* 2402 * Handle a peer FIN. 2403 */ 2404static void 2405do_peer_fin(struct socket *so, struct mbuf *m) 2406{ 2407 struct tcpcb *tp = sototcpcb(so); 2408 struct toepcb *toep = tp->t_toe; 2409 int keep = 0; 2410 DPRINTF("do_peer_fin state=%d\n", tp->t_state); 2411 2412#ifdef T3_TRACE 2413 T3_TRACE0(TIDTB(sk),"do_peer_fin:"); 2414#endif 2415 2416 if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2417 printf("abort_pending set\n"); 2418 2419 goto out; 2420 } 2421 INP_INFO_WLOCK(&tcbinfo); 2422 INP_LOCK(tp->t_inpcb); 2423 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) { 2424 keep = handle_peer_close_data(so, m); 2425 if (keep < 0) { 2426 INP_INFO_WUNLOCK(&tcbinfo); 2427 INP_UNLOCK(tp->t_inpcb); 2428 return; 2429 } 2430 } 2431 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 2432 socantrcvmore(so); 2433 /* 2434 * If connection is half-synchronized 2435 * (ie NEEDSYN flag on) then delay ACK, 2436 * so it may be piggybacked when SYN is sent. 2437 * Otherwise, since we received a FIN then no 2438 * more input can be expected, send ACK now. 2439 */ 2440 if (tp->t_flags & TF_NEEDSYN) 2441 tp->t_flags |= TF_DELACK; 2442 else 2443 tp->t_flags |= TF_ACKNOW; 2444 tp->rcv_nxt++; 2445 } 2446 2447 switch (tp->t_state) { 2448 case TCPS_SYN_RECEIVED: 2449 tp->t_starttime = ticks; 2450 /* FALLTHROUGH */ 2451 case TCPS_ESTABLISHED: 2452 tp->t_state = TCPS_CLOSE_WAIT; 2453 break; 2454 case TCPS_FIN_WAIT_1: 2455 tp->t_state = TCPS_CLOSING; 2456 break; 2457 case TCPS_FIN_WAIT_2: 2458 /* 2459 * If we've sent an abort_req we must have sent it too late, 2460 * HW will send us a reply telling us so, and this peer_close 2461 * is really the last message for this connection and needs to 2462 * be treated as an abort_rpl, i.e., transition the connection 2463 * to TCP_CLOSE (note that the host stack does this at the 2464 * time of generating the RST but we must wait for HW). 2465 * Otherwise we enter TIME_WAIT. 2466 */ 2467 t3_release_offload_resources(toep); 2468 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2469 tp = tcp_close(tp); 2470 } else { 2471 enter_timewait(so); 2472 } 2473 break; 2474 default: 2475 log(LOG_ERR, 2476 "%s: TID %u received PEER_CLOSE in bad state %d\n", 2477 TOE_DEV(so)->tod_name, toep->tp_tid, tp->t_state); 2478 } 2479 INP_INFO_WUNLOCK(&tcbinfo); 2480 if (tp) 2481 INP_UNLOCK(tp->t_inpcb); 2482 2483 DPRINTF("waking up waiters on %p rcv_notify=%d flags=0x%x\n", so, sb_notify(&so->so_rcv), so->so_rcv.sb_flags); 2484 2485#ifdef notyet 2486 /* Do not send POLL_HUP for half duplex close. */ 2487 if ((sk->sk_shutdown & SEND_SHUTDOWN) || 2488 sk->sk_state == TCP_CLOSE) 2489 sk_wake_async(so, 1, POLL_HUP); 2490 else 2491 sk_wake_async(so, 1, POLL_IN); 2492#endif 2493 2494out: 2495 if (!keep) 2496 m_free(m); 2497} 2498 2499/* 2500 * Handler for PEER_CLOSE CPL messages. 2501 */ 2502static int 2503do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2504{ 2505 struct toepcb *toep = (struct toepcb *)ctx; 2506 struct socket *so = toeptoso(toep); 2507 2508 VALIDATE_SOCK(so); 2509 2510 do_peer_fin(so, m); 2511 return (0); 2512} 2513 2514static void 2515process_close_con_rpl(struct socket *so, struct mbuf *m) 2516{ 2517 struct tcpcb *tp = sototcpcb(so); 2518 struct cpl_close_con_rpl *rpl = cplhdr(m); 2519 struct toepcb *toep = tp->t_toe; 2520 2521 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ 2522 2523 DPRINTF("process_close_con_rpl(%p) state=%d dead=%d\n", so, tp->t_state, 2524 !!(so->so_state & SS_NOFDREF)); 2525 if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) 2526 goto out; 2527 2528 INP_INFO_WLOCK(&tcbinfo); 2529 INP_LOCK(tp->t_inpcb); 2530 switch (tp->t_state) { 2531 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */ 2532 t3_release_offload_resources(toep); 2533 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2534 tp = tcp_close(tp); 2535 2536 } else { 2537 enter_timewait(so); 2538 soisdisconnected(so); 2539 } 2540 break; 2541 case TCPS_LAST_ACK: 2542 /* 2543 * In this state we don't care about pending abort_rpl. 2544 * If we've sent abort_req it was post-close and was sent too 2545 * late, this close_con_rpl is the actual last message. 2546 */ 2547 t3_release_offload_resources(toep); 2548 tp = tcp_close(tp); 2549 break; 2550 case TCPS_FIN_WAIT_1: 2551 /* 2552 * If we can't receive any more 2553 * data, then closing user can proceed. 2554 * Starting the timer is contrary to the 2555 * specification, but if we don't get a FIN 2556 * we'll hang forever. 2557 * 2558 * XXXjl: 2559 * we should release the tp also, and use a 2560 * compressed state. 2561 */ 2562 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { 2563 int timeout; 2564 2565 soisdisconnected(so); 2566 timeout = (tcp_fast_finwait2_recycle) ? 2567 tcp_finwait2_timeout : tcp_maxidle; 2568 tcp_timer_activate(tp, TT_2MSL, timeout); 2569 } 2570 tp->t_state = TCPS_FIN_WAIT_2; 2571 if ((so->so_options & SO_LINGER) && so->so_linger == 0 && 2572 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) { 2573 tp = tcp_drop(tp, 0); 2574 } 2575 2576 break; 2577 default: 2578 log(LOG_ERR, 2579 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", 2580 TOE_DEV(so)->tod_name, toep->tp_tid, 2581 tp->t_state); 2582 } 2583 INP_INFO_WUNLOCK(&tcbinfo); 2584 if (tp) 2585 INP_UNLOCK(tp->t_inpcb); 2586out: 2587 m_freem(m); 2588} 2589 2590/* 2591 * Handler for CLOSE_CON_RPL CPL messages. 2592 */ 2593static int 2594do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, 2595 void *ctx) 2596{ 2597 struct toepcb *toep = (struct toepcb *)ctx; 2598 struct socket *so = toeptoso(toep); 2599 2600 VALIDATE_SOCK(so); 2601 2602 process_close_con_rpl(so, m); 2603 return (0); 2604} 2605 2606/* 2607 * Process abort replies. We only process these messages if we anticipate 2608 * them as the coordination between SW and HW in this area is somewhat lacking 2609 * and sometimes we get ABORT_RPLs after we are done with the connection that 2610 * originated the ABORT_REQ. 2611 */ 2612static void 2613process_abort_rpl(struct socket *so, struct mbuf *m) 2614{ 2615 struct tcpcb *tp = sototcpcb(so); 2616 struct toepcb *toep = tp->t_toe; 2617 2618#ifdef T3_TRACE 2619 T3_TRACE1(TIDTB(sk), 2620 "process_abort_rpl: GTS rpl pending %d", 2621 sock_flag(sk, ABORT_RPL_PENDING)); 2622#endif 2623 2624 INP_INFO_WLOCK(&tcbinfo); 2625 INP_LOCK(tp->t_inpcb); 2626 2627 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2628 /* 2629 * XXX panic on tcpdrop 2630 */ 2631 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(TOE_DEV(so))) 2632 toep->tp_flags |= TP_ABORT_RPL_RCVD; 2633 else { 2634 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING); 2635 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) || 2636 !is_t3a(TOE_DEV(so))) { 2637 if (toep->tp_flags & TP_ABORT_REQ_RCVD) 2638 panic("TP_ABORT_REQ_RCVD set"); 2639 t3_release_offload_resources(toep); 2640 tp = tcp_close(tp); 2641 } 2642 } 2643 } 2644 if (tp) 2645 INP_UNLOCK(tp->t_inpcb); 2646 INP_INFO_WUNLOCK(&tcbinfo); 2647 2648 m_free(m); 2649} 2650 2651/* 2652 * Handle an ABORT_RPL_RSS CPL message. 2653 */ 2654static int 2655do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2656{ 2657 struct socket *so; 2658 struct cpl_abort_rpl_rss *rpl = cplhdr(m); 2659 struct toepcb *toep; 2660 2661 /* 2662 * Ignore replies to post-close aborts indicating that the abort was 2663 * requested too late. These connections are terminated when we get 2664 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss 2665 * arrives the TID is either no longer used or it has been recycled. 2666 */ 2667 if (rpl->status == CPL_ERR_ABORT_FAILED) { 2668discard: 2669 m_free(m); 2670 return (0); 2671 } 2672 2673 toep = (struct toepcb *)ctx; 2674 2675 /* 2676 * Sometimes we've already closed the socket, e.g., a post-close 2677 * abort races with ABORT_REQ_RSS, the latter frees the socket 2678 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, 2679 * but FW turns the ABORT_REQ into a regular one and so we get 2680 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A. 2681 */ 2682 if (!toep) 2683 goto discard; 2684 2685 if (toep->tp_tp == NULL) { 2686 printf("removing tid for abort\n"); 2687 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2688 if (toep->tp_l2t) 2689 l2t_release(L2DATA(cdev), toep->tp_l2t); 2690 2691 toepcb_release(toep); 2692 goto discard; 2693 } 2694 2695 printf("toep=%p\n", toep); 2696 printf("tp=%p\n", toep->tp_tp); 2697 2698 so = toeptoso(toep); /* <- XXX panic */ 2699 toepcb_hold(toep); 2700 process_abort_rpl(so, m); 2701 toepcb_release(toep); 2702 return (0); 2703} 2704 2705/* 2706 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also 2707 * indicate whether RST should be sent in response. 2708 */ 2709static int 2710abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst) 2711{ 2712 struct tcpcb *tp = sototcpcb(so); 2713 2714 switch (abort_reason) { 2715 case CPL_ERR_BAD_SYN: 2716#if 0 2717 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through 2718#endif 2719 case CPL_ERR_CONN_RESET: 2720 // XXX need to handle SYN_RECV due to crossed SYNs 2721 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 2722 case CPL_ERR_XMIT_TIMEDOUT: 2723 case CPL_ERR_PERSIST_TIMEDOUT: 2724 case CPL_ERR_FINWAIT2_TIMEDOUT: 2725 case CPL_ERR_KEEPALIVE_TIMEDOUT: 2726#if 0 2727 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); 2728#endif 2729 return (ETIMEDOUT); 2730 default: 2731 return (EIO); 2732 } 2733} 2734 2735static inline void 2736set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd) 2737{ 2738 struct cpl_abort_rpl *rpl = cplhdr(m); 2739 2740 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); 2741 rpl->wr.wr_lo = htonl(V_WR_TID(tid)); 2742 m->m_len = m->m_pkthdr.len = sizeof(*rpl); 2743 2744 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); 2745 rpl->cmd = cmd; 2746} 2747 2748static void 2749send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m) 2750{ 2751 struct mbuf *reply_mbuf; 2752 struct cpl_abort_req_rss *req = cplhdr(m); 2753 2754 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl)); 2755 m_set_priority(m, CPL_PRIORITY_DATA); 2756 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl); 2757 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status); 2758 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2759 m_free(m); 2760} 2761 2762/* 2763 * Returns whether an ABORT_REQ_RSS message is a negative advice. 2764 */ 2765static inline int 2766is_neg_adv_abort(unsigned int status) 2767{ 2768 return status == CPL_ERR_RTX_NEG_ADVICE || 2769 status == CPL_ERR_PERSIST_NEG_ADVICE; 2770} 2771 2772static void 2773send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status) 2774{ 2775 struct mbuf *reply_mbuf; 2776 struct cpl_abort_req_rss *req = cplhdr(m); 2777 2778 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 2779 2780 if (!reply_mbuf) { 2781 /* Defer the reply. Stick rst_status into req->cmd. */ 2782 req->status = rst_status; 2783 t3_defer_reply(m, tdev, send_deferred_abort_rpl); 2784 return; 2785 } 2786 2787 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA); 2788 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status); 2789 m_free(m); 2790 2791 /* 2792 * XXX need to sync with ARP as for SYN_RECV connections we can send 2793 * these messages while ARP is pending. For other connection states 2794 * it's not a problem. 2795 */ 2796 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2797} 2798 2799#ifdef notyet 2800static void 2801cleanup_syn_rcv_conn(struct socket *child, struct socket *parent) 2802{ 2803 CXGB_UNIMPLEMENTED(); 2804#ifdef notyet 2805 struct request_sock *req = child->sk_user_data; 2806 2807 inet_csk_reqsk_queue_removed(parent, req); 2808 synq_remove(tcp_sk(child)); 2809 __reqsk_free(req); 2810 child->sk_user_data = NULL; 2811#endif 2812} 2813 2814 2815/* 2816 * Performs the actual work to abort a SYN_RECV connection. 2817 */ 2818static void 2819do_abort_syn_rcv(struct socket *child, struct socket *parent) 2820{ 2821 struct tcpcb *parenttp = sototcpcb(parent); 2822 struct tcpcb *childtp = sototcpcb(child); 2823 2824 /* 2825 * If the server is still open we clean up the child connection, 2826 * otherwise the server already did the clean up as it was purging 2827 * its SYN queue and the skb was just sitting in its backlog. 2828 */ 2829 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { 2830 cleanup_syn_rcv_conn(child, parent); 2831 INP_INFO_WLOCK(&tcbinfo); 2832 INP_LOCK(childtp->t_inpcb); 2833 t3_release_offload_resources(childtp->t_toe); 2834 childtp = tcp_close(childtp); 2835 INP_INFO_WUNLOCK(&tcbinfo); 2836 if (childtp) 2837 INP_UNLOCK(childtp->t_inpcb); 2838 } 2839} 2840#endif 2841 2842/* 2843 * Handle abort requests for a SYN_RECV connection. These need extra work 2844 * because the socket is on its parent's SYN queue. 2845 */ 2846static int 2847abort_syn_rcv(struct socket *so, struct mbuf *m) 2848{ 2849 CXGB_UNIMPLEMENTED(); 2850#ifdef notyet 2851 struct socket *parent; 2852 struct toedev *tdev = TOE_DEV(so); 2853 struct t3cdev *cdev = TOM_DATA(tdev)->cdev; 2854 struct socket *oreq = so->so_incomp; 2855 struct t3c_tid_entry *t3c_stid; 2856 struct tid_info *t; 2857 2858 if (!oreq) 2859 return -1; /* somehow we are not on the SYN queue */ 2860 2861 t = &(T3C_DATA(cdev))->tid_maps; 2862 t3c_stid = lookup_stid(t, oreq->ts_recent); 2863 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 2864 2865 SOCK_LOCK(parent); 2866 do_abort_syn_rcv(so, parent); 2867 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST); 2868 SOCK_UNLOCK(parent); 2869#endif 2870 return (0); 2871} 2872 2873/* 2874 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this 2875 * request except that we need to reply to it. 2876 */ 2877static void 2878process_abort_req(struct socket *so, struct mbuf *m, struct toedev *tdev) 2879{ 2880 int rst_status = CPL_ABORT_NO_RST; 2881 const struct cpl_abort_req_rss *req = cplhdr(m); 2882 struct tcpcb *tp = sototcpcb(so); 2883 struct toepcb *toep = tp->t_toe; 2884 2885 INP_LOCK(tp->t_inpcb); 2886 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) { 2887 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN); 2888 m_free(m); 2889 goto skip; 2890 } 2891 2892 toep->tp_flags &= ~TP_ABORT_REQ_RCVD; 2893 /* 2894 * Three cases to consider: 2895 * a) We haven't sent an abort_req; close the connection. 2896 * b) We have sent a post-close abort_req that will get to TP too late 2897 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will 2898 * be ignored and the connection should be closed now. 2899 * c) We have sent a regular abort_req that will get to TP too late. 2900 * That will generate an abort_rpl with status 0, wait for it. 2901 */ 2902 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) || 2903 (is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) { 2904 so->so_error = abort_status_to_errno(so, req->status, 2905 &rst_status); 2906 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2907 sorwakeup(so); 2908 /* 2909 * SYN_RECV needs special processing. If abort_syn_rcv() 2910 * returns 0 is has taken care of the abort. 2911 */ 2912 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m)) 2913 goto skip; 2914 2915 t3_release_offload_resources(toep); 2916 tp = tcp_close(tp); 2917 } 2918 if (tp) 2919 INP_UNLOCK(tp->t_inpcb); 2920 send_abort_rpl(m, tdev, rst_status); 2921 return; 2922 2923skip: 2924 INP_UNLOCK(tp->t_inpcb); 2925} 2926 2927/* 2928 * Handle an ABORT_REQ_RSS CPL message. 2929 */ 2930static int 2931do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2932{ 2933 const struct cpl_abort_req_rss *req = cplhdr(m); 2934 struct toepcb *toep = (struct toepcb *)ctx; 2935 struct socket *so; 2936 struct inpcb *inp; 2937 2938 if (is_neg_adv_abort(req->status)) { 2939 m_free(m); 2940 return (0); 2941 } 2942 2943 printf("aborting tid=%d\n", toep->tp_tid); 2944 2945 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) { 2946 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2947 toep->tp_flags |= TP_ABORT_REQ_RCVD; 2948 printf("sending abort rpl\n"); 2949 2950 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST); 2951 printf("sent\n"); 2952 if (toep->tp_l2t) 2953 l2t_release(L2DATA(cdev), toep->tp_l2t); 2954 2955 /* 2956 * Unhook 2957 */ 2958 toep->tp_tp->t_toe = NULL; 2959 toep->tp_tp->t_flags &= ~TF_TOE; 2960 toep->tp_tp = NULL; 2961 /* 2962 * XXX need to call syncache_chkrst - but we don't 2963 * have a way of doing that yet 2964 */ 2965 toepcb_release(toep); 2966 printf("abort for unestablished connection :-(\n"); 2967 return (0); 2968 } 2969 if (toep->tp_tp == NULL) { 2970 printf("disconnected toepcb\n"); 2971 /* should be freed momentarily */ 2972 return (0); 2973 } 2974 2975 so = toeptoso(toep); 2976 inp = sotoinpcb(so); 2977 2978 VALIDATE_SOCK(so); 2979 toepcb_hold(toep); 2980 INP_INFO_WLOCK(&tcbinfo); 2981 process_abort_req(so, m, TOE_DEV(so)); 2982 INP_INFO_WUNLOCK(&tcbinfo); 2983 toepcb_release(toep); 2984 return (0); 2985} 2986#ifdef notyet 2987static void 2988pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m) 2989{ 2990 struct toedev *tdev = TOE_DEV(parent); 2991 2992 do_abort_syn_rcv(child, parent); 2993 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) { 2994 struct cpl_pass_accept_rpl *rpl = cplhdr(m); 2995 2996 rpl->opt0h = htonl(F_TCAM_BYPASS); 2997 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 2998 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 2999 } else 3000 m_free(m); 3001} 3002#endif 3003static void 3004handle_pass_open_arp_failure(struct socket *so, struct mbuf *m) 3005{ 3006 CXGB_UNIMPLEMENTED(); 3007 3008#ifdef notyet 3009 struct t3cdev *cdev; 3010 struct socket *parent; 3011 struct socket *oreq; 3012 struct t3c_tid_entry *t3c_stid; 3013 struct tid_info *t; 3014 struct tcpcb *otp, *tp = sototcpcb(so); 3015 struct toepcb *toep = tp->t_toe; 3016 3017 /* 3018 * If the connection is being aborted due to the parent listening 3019 * socket going away there's nothing to do, the ABORT_REQ will close 3020 * the connection. 3021 */ 3022 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 3023 m_free(m); 3024 return; 3025 } 3026 3027 oreq = so->so_incomp; 3028 otp = sototcpcb(oreq); 3029 3030 cdev = T3C_DEV(so); 3031 t = &(T3C_DATA(cdev))->tid_maps; 3032 t3c_stid = lookup_stid(t, otp->ts_recent); 3033 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 3034 3035 SOCK_LOCK(parent); 3036 pass_open_abort(so, parent, m); 3037 SOCK_UNLOCK(parent); 3038#endif 3039} 3040 3041/* 3042 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly 3043 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV 3044 * connection. 3045 */ 3046static void 3047pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m) 3048{ 3049 3050#ifdef notyet 3051 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3052 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk); 3053#endif 3054 handle_pass_open_arp_failure(m_get_socket(m), m); 3055} 3056 3057/* 3058 * Populate a reject CPL_PASS_ACCEPT_RPL WR. 3059 */ 3060static void 3061mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf) 3062{ 3063 struct cpl_pass_accept_req *req = cplhdr(req_mbuf); 3064 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf); 3065 unsigned int tid = GET_TID(req); 3066 3067 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP); 3068 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3069 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3070 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet 3071 rpl->opt0h = htonl(F_TCAM_BYPASS); 3072 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3073 rpl->opt2 = 0; 3074 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3075} 3076 3077/* 3078 * Send a deferred reject to an accept request. 3079 */ 3080static void 3081reject_pass_request(struct toedev *tdev, struct mbuf *m) 3082{ 3083 struct mbuf *reply_mbuf; 3084 3085 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl)); 3086 mk_pass_accept_rpl(reply_mbuf, m); 3087 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 3088 m_free(m); 3089} 3090 3091static void 3092handle_syncache_event(int event, void *arg) 3093{ 3094 struct toepcb *toep = arg; 3095 3096 switch (event) { 3097 case TOE_SC_ENTRY_PRESENT: 3098 /* 3099 * entry already exists - free toepcb 3100 * and l2t 3101 */ 3102 printf("syncache entry present\n"); 3103 toepcb_release(toep); 3104 break; 3105 case TOE_SC_DROP: 3106 /* 3107 * The syncache has given up on this entry 3108 * either it timed out, or it was evicted 3109 * we need to explicitly release the tid 3110 */ 3111 printf("syncache entry dropped\n"); 3112 toepcb_release(toep); 3113 break; 3114 default: 3115 log(LOG_ERR, "unknown syncache event %d\n", event); 3116 break; 3117 } 3118} 3119 3120static void 3121syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep) 3122{ 3123 struct in_conninfo inc; 3124 struct tcpopt to; 3125 struct tcphdr th; 3126 struct inpcb *inp; 3127 int mss, wsf, sack, ts; 3128 uint32_t rcv_isn = ntohl(req->rcv_isn); 3129 3130 bzero(&to, sizeof(struct tcpopt)); 3131 inp = sotoinpcb(lso); 3132 3133 /* 3134 * Fill out information for entering us into the syncache 3135 */ 3136 inc.inc_fport = th.th_sport = req->peer_port; 3137 inc.inc_lport = th.th_dport = req->local_port; 3138 th.th_seq = req->rcv_isn; 3139 th.th_flags = TH_SYN; 3140 3141 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1; 3142 3143 3144 inc.inc_isipv6 = 0; 3145 inc.inc_len = 0; 3146 inc.inc_faddr.s_addr = req->peer_ip; 3147 inc.inc_laddr.s_addr = req->local_ip; 3148 3149 DPRINTF("syncache add of %d:%d %d:%d\n", 3150 ntohl(req->local_ip), ntohs(req->local_port), 3151 ntohl(req->peer_ip), ntohs(req->peer_port)); 3152 3153 mss = req->tcp_options.mss; 3154 wsf = req->tcp_options.wsf; 3155 ts = req->tcp_options.tstamp; 3156 sack = req->tcp_options.sack; 3157 to.to_mss = mss; 3158 to.to_wscale = wsf; 3159 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3160 INP_INFO_WLOCK(&tcbinfo); 3161 INP_LOCK(inp); 3162 syncache_offload_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep); 3163} 3164 3165 3166/* 3167 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket 3168 * lock held. Note that the sock here is a listening socket that is not owned 3169 * by the TOE. 3170 */ 3171static void 3172process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev, 3173 struct listen_ctx *lctx) 3174{ 3175 int rt_flags; 3176 struct l2t_entry *e; 3177 struct iff_mac tim; 3178 struct mbuf *reply_mbuf, *ddp_mbuf = NULL; 3179 struct cpl_pass_accept_rpl *rpl; 3180 struct cpl_pass_accept_req *req = cplhdr(m); 3181 unsigned int tid = GET_TID(req); 3182 struct tom_data *d = TOM_DATA(tdev); 3183 struct t3cdev *cdev = d->cdev; 3184 struct tcpcb *tp = sototcpcb(so); 3185 struct toepcb *newtoep; 3186 struct rtentry *dst; 3187 struct sockaddr_in nam; 3188 struct t3c_data *td = T3C_DATA(cdev); 3189 3190 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3191 if (__predict_false(reply_mbuf == NULL)) { 3192 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3193 t3_defer_reply(m, tdev, reject_pass_request); 3194 else { 3195 cxgb_queue_tid_release(cdev, tid); 3196 m_free(m); 3197 } 3198 DPRINTF("failed to get reply_mbuf\n"); 3199 3200 goto out; 3201 } 3202 3203 if (tp->t_state != TCPS_LISTEN) { 3204 DPRINTF("socket not in listen state\n"); 3205 3206 goto reject; 3207 } 3208 3209 tim.mac_addr = req->dst_mac; 3210 tim.vlan_tag = ntohs(req->vlan_tag); 3211 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { 3212 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n"); 3213 goto reject; 3214 } 3215 3216#ifdef notyet 3217 /* 3218 * XXX do route lookup to confirm that we're still listening on this 3219 * address 3220 */ 3221 if (ip_route_input(skb, req->local_ip, req->peer_ip, 3222 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev)) 3223 goto reject; 3224 rt_flags = ((struct rtable *)skb->dst)->rt_flags & 3225 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); 3226 dst_release(skb->dst); // done with the input route, release it 3227 skb->dst = NULL; 3228 3229 if ((rt_flags & RTF_LOCAL) == 0) 3230 goto reject; 3231#endif 3232 /* 3233 * XXX 3234 */ 3235 rt_flags = RTF_LOCAL; 3236 if ((rt_flags & RTF_LOCAL) == 0) 3237 goto reject; 3238 3239 /* 3240 * Calculate values and add to syncache 3241 */ 3242 3243 newtoep = toepcb_alloc(); 3244 if (newtoep == NULL) 3245 goto reject; 3246 3247 bzero(&nam, sizeof(struct sockaddr_in)); 3248 3249 nam.sin_len = sizeof(struct sockaddr_in); 3250 nam.sin_family = AF_INET; 3251 nam.sin_addr.s_addr =req->peer_ip; 3252 dst = rtalloc2((struct sockaddr *)&nam, 1, 0); 3253 3254 if (dst == NULL) { 3255 printf("failed to find route\n"); 3256 goto reject; 3257 } 3258 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev, 3259 (struct sockaddr *)&nam); 3260 if (e == NULL) { 3261 DPRINTF("failed to get l2t\n"); 3262 } 3263 /* 3264 * Point to our listen socket until accept 3265 */ 3266 newtoep->tp_tp = tp; 3267 newtoep->tp_flags = TP_SYN_RCVD; 3268 newtoep->tp_tid = tid; 3269 newtoep->tp_toedev = tdev; 3270 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3271 3272 cxgb_insert_tid(cdev, d->client, newtoep, tid); 3273 SOCK_LOCK(so); 3274 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); 3275 SOCK_UNLOCK(so); 3276 3277 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so->so_options & SO_NO_DDP) && 3278 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 3279 3280 if (newtoep->tp_ulp_mode) { 3281 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3282 3283 if (ddp_mbuf == NULL) 3284 newtoep->tp_ulp_mode = 0; 3285 } 3286 3287 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d", 3288 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode); 3289 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); 3290 /* 3291 * XXX workaround for lack of syncache drop 3292 */ 3293 toepcb_hold(newtoep); 3294 syncache_add_accept_req(req, so, newtoep); 3295 3296 rpl = cplhdr(reply_mbuf); 3297 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl); 3298 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3299 rpl->wr.wr_lo = 0; 3300 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3301 rpl->opt2 = htonl(calc_opt2(so, tdev)); 3302 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3303 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten 3304 3305 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | 3306 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); 3307 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) | 3308 CPL_PASS_OPEN_ACCEPT); 3309 3310 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status); 3311 3312 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep)); 3313 3314 l2t_send(cdev, reply_mbuf, e); 3315 m_free(m); 3316 if (newtoep->tp_ulp_mode) { 3317 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS, 3318 V_TF_DDP_OFF(1) | 3319 TP_DDP_TIMER_WORKAROUND_MASK, 3320 V_TF_DDP_OFF(1) | 3321 TP_DDP_TIMER_WORKAROUND_VAL, 1); 3322 } else 3323 printf("not offloading\n"); 3324 3325 3326 3327 return; 3328reject: 3329 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3330 mk_pass_accept_rpl(reply_mbuf, m); 3331 else 3332 mk_tid_release(reply_mbuf, newtoep, tid); 3333 cxgb_ofld_send(cdev, reply_mbuf); 3334 m_free(m); 3335out: 3336#if 0 3337 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3338#else 3339 return; 3340#endif 3341} 3342 3343/* 3344 * Handle a CPL_PASS_ACCEPT_REQ message. 3345 */ 3346static int 3347do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3348{ 3349 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx; 3350 struct socket *lso = listen_ctx->lso; 3351 struct tom_data *d = listen_ctx->tom_data; 3352 3353#if VALIDATE_TID 3354 struct cpl_pass_accept_req *req = cplhdr(m); 3355 unsigned int tid = GET_TID(req); 3356 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps; 3357 3358 if (unlikely(!lsk)) { 3359 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n", 3360 cdev->name, 3361 (unsigned long)((union listen_entry *)ctx - 3362 t->stid_tab)); 3363 return CPL_RET_BUF_DONE; 3364 } 3365 if (unlikely(tid >= t->ntids)) { 3366 printk(KERN_ERR "%s: passive open TID %u too large\n", 3367 cdev->name, tid); 3368 return CPL_RET_BUF_DONE; 3369 } 3370 /* 3371 * For T3A the current user of the TID may have closed but its last 3372 * message(s) may have been backlogged so the TID appears to be still 3373 * in use. Just take the TID away, the connection can close at its 3374 * own leisure. For T3B this situation is a bug. 3375 */ 3376 if (!valid_new_tid(t, tid) && 3377 cdev->type != T3A) { 3378 printk(KERN_ERR "%s: passive open uses existing TID %u\n", 3379 cdev->name, tid); 3380 return CPL_RET_BUF_DONE; 3381 } 3382#endif 3383 3384 process_pass_accept_req(lso, m, &d->tdev, listen_ctx); 3385 return (0); 3386} 3387 3388/* 3389 * Called when a connection is established to translate the TCP options 3390 * reported by HW to FreeBSD's native format. 3391 */ 3392static void 3393assign_rxopt(struct socket *so, unsigned int opt) 3394{ 3395 const struct t3c_data *td = T3C_DATA(T3C_DEV(so)); 3396 struct tcpcb *tp = sototcpcb(so); 3397 struct toepcb *toep = tp->t_toe; 3398 3399 INP_LOCK_ASSERT(tp->t_inpcb); 3400 3401 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3402 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0; 3403 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0; 3404 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0; 3405 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 3406 (TF_RCVD_SCALE|TF_REQ_SCALE)) 3407 tp->rcv_scale = tp->request_r_scale; 3408} 3409 3410/* 3411 * Completes some final bits of initialization for just established connections 3412 * and changes their state to TCP_ESTABLISHED. 3413 * 3414 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1. 3415 */ 3416static void 3417make_established(struct socket *so, u32 snd_isn, unsigned int opt) 3418{ 3419 struct tcpcb *tp = sototcpcb(so); 3420 struct toepcb *toep = tp->t_toe; 3421 3422 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn; 3423 assign_rxopt(so, opt); 3424 so->so_proto->pr_ctloutput = t3_ctloutput; 3425 3426#if 0 3427 inet_sk(sk)->id = tp->write_seq ^ jiffies; 3428#endif 3429 /* 3430 * XXX not clear what rcv_wup maps to 3431 */ 3432 /* 3433 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't 3434 * pass through opt0. 3435 */ 3436 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10)) 3437 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10); 3438 3439 dump_toepcb(toep); 3440 3441#ifdef notyet 3442/* 3443 * no clean interface for marking ARP up to date 3444 */ 3445 dst_confirm(sk->sk_dst_cache); 3446#endif 3447 tp->t_starttime = ticks; 3448 tp->t_state = TCPS_ESTABLISHED; 3449 soisconnected(so); 3450} 3451 3452static int 3453syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep) 3454{ 3455 3456 struct in_conninfo inc; 3457 struct tcpopt to; 3458 struct tcphdr th; 3459 int mss, wsf, sack, ts; 3460 struct mbuf *m = NULL; 3461 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev); 3462 unsigned int opt; 3463 3464#ifdef MAC 3465#error "no MAC support" 3466#endif 3467 3468 opt = ntohs(req->tcp_opt); 3469 3470 bzero(&to, sizeof(struct tcpopt)); 3471 3472 /* 3473 * Fill out information for entering us into the syncache 3474 */ 3475 inc.inc_fport = th.th_sport = req->peer_port; 3476 inc.inc_lport = th.th_dport = req->local_port; 3477 th.th_seq = req->rcv_isn; 3478 th.th_flags = TH_ACK; 3479 3480 inc.inc_isipv6 = 0; 3481 inc.inc_len = 0; 3482 inc.inc_faddr.s_addr = req->peer_ip; 3483 inc.inc_laddr.s_addr = req->local_ip; 3484 3485 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3486 wsf = G_TCPOPT_WSCALE_OK(opt); 3487 ts = G_TCPOPT_TSTAMP(opt); 3488 sack = G_TCPOPT_SACK(opt); 3489 3490 to.to_mss = mss; 3491 to.to_wscale = G_TCPOPT_SND_WSCALE(opt); 3492 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3493 3494 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n", 3495 ntohl(req->local_ip), ntohs(req->local_port), 3496 ntohl(req->peer_ip), ntohs(req->peer_port), 3497 mss, wsf, ts, sack); 3498 return syncache_expand(&inc, &to, &th, so, m); 3499} 3500 3501 3502/* 3503 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work 3504 * if we are in TCP_SYN_RECV due to crossed SYNs 3505 */ 3506static int 3507do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3508{ 3509 struct cpl_pass_establish *req = cplhdr(m); 3510 struct toepcb *toep = (struct toepcb *)ctx; 3511 struct tcpcb *tp; 3512 struct socket *so, *lso; 3513 struct t3c_data *td = T3C_DATA(cdev); 3514 // Complete socket initialization now that we have the SND_ISN 3515 3516 struct toedev *tdev; 3517 3518 so = lso = toeptoso(toep); 3519 tdev = toep->tp_toedev; 3520 3521 SOCK_LOCK(so); 3522 LIST_REMOVE(toep, synq_entry); 3523 SOCK_UNLOCK(so); 3524 3525 INP_INFO_WLOCK(&tcbinfo); 3526 if (!syncache_expand_establish_req(req, &so, toep)) { 3527 /* 3528 * No entry 3529 */ 3530 CXGB_UNIMPLEMENTED(); 3531 } 3532 if (so == NULL) { 3533 /* 3534 * Couldn't create the socket 3535 */ 3536 CXGB_UNIMPLEMENTED(); 3537 } 3538 3539 /* 3540 * XXX workaround for lack of syncache drop 3541 */ 3542 toepcb_release(toep); 3543 3544 tp = sototcpcb(so); 3545 INP_LOCK(tp->t_inpcb); 3546 3547 so->so_snd.sb_flags |= SB_NOCOALESCE; 3548 so->so_rcv.sb_flags |= SB_NOCOALESCE; 3549 3550 toep->tp_tp = tp; 3551 toep->tp_flags = 0; 3552 tp->t_toe = toep; 3553 reset_wr_list(toep); 3554 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3555 tp->rcv_nxt = toep->tp_copied_seq; 3556 install_offload_ops(so); 3557 3558 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs); 3559 toep->tp_wr_unacked = 0; 3560 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3561 toep->tp_qset_idx = 0; 3562 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu); 3563 3564 /* 3565 * XXX Cancel any keep alive timer 3566 */ 3567 3568 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3569 INP_INFO_WUNLOCK(&tcbinfo); 3570 INP_UNLOCK(tp->t_inpcb); 3571 3572 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid); 3573 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3574#ifdef notyet 3575 /* 3576 * XXX not sure how these checks map to us 3577 */ 3578 if (unlikely(sk->sk_socket)) { // simultaneous opens only 3579 sk->sk_state_change(sk); 3580 sk_wake_async(so, 0, POLL_OUT); 3581 } 3582 /* 3583 * The state for the new connection is now up to date. 3584 * Next check if we should add the connection to the parent's 3585 * accept queue. When the parent closes it resets connections 3586 * on its SYN queue, so check if we are being reset. If so we 3587 * don't need to do anything more, the coming ABORT_RPL will 3588 * destroy this socket. Otherwise move the connection to the 3589 * accept queue. 3590 * 3591 * Note that we reset the synq before closing the server so if 3592 * we are not being reset the stid is still open. 3593 */ 3594 if (unlikely(!tp->forward_skb_hint)) { // removed from synq 3595 __kfree_skb(skb); 3596 goto unlock; 3597 } 3598#endif 3599 m_free(m); 3600 3601 return (0); 3602} 3603 3604/* 3605 * Fill in the right TID for CPL messages waiting in the out-of-order queue 3606 * and send them to the TOE. 3607 */ 3608static void 3609fixup_and_send_ofo(struct socket *so) 3610{ 3611 struct mbuf *m; 3612 struct toedev *tdev = TOE_DEV(so); 3613 struct tcpcb *tp = sototcpcb(so); 3614 struct toepcb *toep = tp->t_toe; 3615 unsigned int tid = toep->tp_tid; 3616 3617 printf("fixup_and_send_ofo\n"); 3618 3619 INP_LOCK_ASSERT(tp->t_inpcb); 3620 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { 3621 /* 3622 * A variety of messages can be waiting but the fields we'll 3623 * be touching are common to all so any message type will do. 3624 */ 3625 struct cpl_close_con_req *p = cplhdr(m); 3626 3627 p->wr.wr_lo = htonl(V_WR_TID(tid)); 3628 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid)); 3629 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3630 } 3631} 3632 3633/* 3634 * Updates socket state from an active establish CPL message. Runs with the 3635 * socket lock held. 3636 */ 3637static void 3638socket_act_establish(struct socket *so, struct mbuf *m) 3639{ 3640 struct cpl_act_establish *req = cplhdr(m); 3641 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ 3642 struct tcpcb *tp = sototcpcb(so); 3643 struct toepcb *toep = tp->t_toe; 3644 3645 if (__predict_false(tp->t_state != TCPS_SYN_SENT)) 3646 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n", 3647 toep->tp_tid, tp->t_state); 3648 3649 tp->ts_recent_age = ticks; 3650 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn; 3651 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs; 3652 3653 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3654 3655 /* 3656 * Now that we finally have a TID send any CPL messages that we had to 3657 * defer for lack of a TID. 3658 */ 3659 if (mbufq_len(&toep->out_of_order_queue)) 3660 fixup_and_send_ofo(so); 3661 3662 if (__predict_false(so->so_state & SS_NOFDREF)) { 3663 /* 3664 * XXX does this even make sense? 3665 */ 3666 sorwakeup(so); 3667 } 3668 m_free(m); 3669#ifdef notyet 3670/* 3671 * XXX assume no write requests permitted while socket connection is 3672 * incomplete 3673 */ 3674 /* 3675 * Currently the send queue must be empty at this point because the 3676 * socket layer does not send anything before a connection is 3677 * established. To be future proof though we handle the possibility 3678 * that there are pending buffers to send (either TX_DATA or 3679 * CLOSE_CON_REQ). First we need to adjust the sequence number of the 3680 * buffers according to the just learned write_seq, and then we send 3681 * them on their way. 3682 */ 3683 fixup_pending_writeq_buffers(sk); 3684 if (t3_push_frames(so, 1)) 3685 sk->sk_write_space(sk); 3686#endif 3687 3688 toep->tp_state = tp->t_state; 3689 tcpstat.tcps_connects++; 3690 3691} 3692 3693/* 3694 * Process a CPL_ACT_ESTABLISH message. 3695 */ 3696static int 3697do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3698{ 3699 struct cpl_act_establish *req = cplhdr(m); 3700 unsigned int tid = GET_TID(req); 3701 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); 3702 struct toepcb *toep = (struct toepcb *)ctx; 3703 struct tcpcb *tp = toep->tp_tp; 3704 struct socket *so; 3705 struct toedev *tdev; 3706 struct tom_data *d; 3707 3708 if (tp == NULL) { 3709 free_atid(cdev, atid); 3710 return (0); 3711 } 3712 3713 so = toeptoso(toep); 3714 tdev = TOE_DEV(so); /* blow up here if link was down */ 3715 d = TOM_DATA(tdev); 3716 3717 INP_LOCK(tp->t_inpcb); 3718 3719 /* 3720 * It's OK if the TID is currently in use, the owning socket may have 3721 * backlogged its last CPL message(s). Just take it away. 3722 */ 3723 toep->tp_tid = tid; 3724 toep->tp_tp = tp; 3725 so_insert_tid(d, so, tid); 3726 free_atid(cdev, atid); 3727 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3728 3729 socket_act_establish(so, m); 3730 INP_UNLOCK(tp->t_inpcb); 3731 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid); 3732 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3733 3734 return (0); 3735} 3736 3737/* 3738 * Process an acknowledgment of WR completion. Advance snd_una and send the 3739 * next batch of work requests from the write queue. 3740 */ 3741static void 3742wr_ack(struct toepcb *toep, struct mbuf *m) 3743{ 3744 struct tcpcb *tp = toep->tp_tp; 3745 struct cpl_wr_ack *hdr = cplhdr(m); 3746 struct socket *so = toeptoso(toep); 3747 unsigned int credits = ntohs(hdr->credits); 3748 u32 snd_una = ntohl(hdr->snd_una); 3749 int bytes = 0; 3750 3751 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits); 3752 3753 INP_LOCK(tp->t_inpcb); 3754 3755 toep->tp_wr_avail += credits; 3756 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) 3757 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; 3758 3759 while (credits) { 3760 struct mbuf *p = peek_wr(toep); 3761 3762 if (__predict_false(!p)) { 3763 log(LOG_ERR, "%u WR_ACK credits for TID %u with " 3764 "nothing pending, state %u wr_avail=%u\n", 3765 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail); 3766 break; 3767 } 3768 CTR2(KTR_TOM, 3769 "wr_ack: p->credits=%d p->bytes=%d", p->m_pkthdr.csum_data, p->m_pkthdr.len); 3770 3771 KASSERT(p->m_pkthdr.csum_data != 0, ("empty request still on list")); 3772 if (__predict_false(credits < p->m_pkthdr.csum_data)) { 3773 3774#if DEBUG_WR > 1 3775 struct tx_data_wr *w = cplhdr(p); 3776 log(LOG_ERR, 3777 "TID %u got %u WR credits, need %u, len %u, " 3778 "main body %u, frags %u, seq # %u, ACK una %u," 3779 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n", 3780 toep->tp_tid, credits, p->csum, p->len, 3781 p->len - p->data_len, skb_shinfo(p)->nr_frags, 3782 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt), 3783 toep->tp_wr_avail, count_pending_wrs(tp) - credits); 3784#endif 3785 p->m_pkthdr.csum_data -= credits; 3786 break; 3787 } else { 3788 dequeue_wr(toep); 3789 credits -= p->m_pkthdr.csum_data; 3790 bytes += p->m_pkthdr.len; 3791 CTR3(KTR_TOM, 3792 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d", 3793 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data); 3794 3795 m_free(p); 3796 } 3797 } 3798 3799#if DEBUG_WR 3800 check_wr_invariants(tp); 3801#endif 3802 3803 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 3804#if VALIDATE_SEQ 3805 struct tom_data *d = TOM_DATA(TOE_DEV(so)); 3806 3807 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK " 3808 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una, 3809 toep->tp_tid, tp->snd_una); 3810#endif 3811 goto out_free; 3812 } 3813 3814 if (tp->snd_una != snd_una) { 3815 tp->snd_una = snd_una; 3816 tp->ts_recent_age = ticks; 3817#ifdef notyet 3818 /* 3819 * Keep ARP entry "minty fresh" 3820 */ 3821 dst_confirm(sk->sk_dst_cache); 3822#endif 3823 if (tp->snd_una == tp->snd_nxt) 3824 toep->tp_flags &= ~TP_TX_WAIT_IDLE; 3825 } 3826 if (bytes) { 3827 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes); 3828 SOCKBUF_LOCK(&so->so_snd); 3829 sbdrop_locked(&so->so_snd, bytes); 3830 sowwakeup_locked(so); 3831 } 3832 3833 if (so->so_snd.sb_sndptroff < so->so_snd.sb_cc) 3834 t3_push_frames(so, 0); 3835 3836out_free: 3837 INP_UNLOCK(tp->t_inpcb); 3838 m_free(m); 3839} 3840 3841/* 3842 * Handler for TX_DATA_ACK CPL messages. 3843 */ 3844static int 3845do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx) 3846{ 3847 struct toepcb *toep = (struct toepcb *)ctx; 3848 3849 VALIDATE_SOCK(so); 3850 3851 wr_ack(toep, m); 3852 return 0; 3853} 3854 3855/* 3856 * Handler for TRACE_PKT CPL messages. Just sink these packets. 3857 */ 3858static int 3859do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx) 3860{ 3861 m_freem(m); 3862 return 0; 3863} 3864 3865/* 3866 * Reset a connection that is on a listener's SYN queue or accept queue, 3867 * i.e., one that has not had a struct socket associated with it. 3868 * Must be called from process context. 3869 * 3870 * Modeled after code in inet_csk_listen_stop(). 3871 */ 3872static void 3873t3_reset_listen_child(struct socket *child) 3874{ 3875 struct tcpcb *tp = sototcpcb(child); 3876 3877 t3_send_reset(tp->t_toe); 3878} 3879 3880/* 3881 * Disconnect offloaded established but not yet accepted connections sitting 3882 * on a server's accept_queue. We just send an ABORT_REQ at this point and 3883 * finish off the disconnect later as we may need to wait for the ABORT_RPL. 3884 */ 3885void 3886t3_disconnect_acceptq(struct socket *listen_so) 3887{ 3888 struct socket *so; 3889 struct tcpcb *tp; 3890 3891 TAILQ_FOREACH(so, &listen_so->so_comp, so_list) { 3892 tp = sototcpcb(so); 3893 3894 if (tp->t_flags & TF_TOE) { 3895 INP_LOCK(tp->t_inpcb); 3896 t3_reset_listen_child(so); 3897 INP_UNLOCK(tp->t_inpcb); 3898 } 3899 3900 } 3901} 3902 3903/* 3904 * Reset offloaded connections sitting on a server's syn queue. As above 3905 * we send ABORT_REQ and finish off when we get ABORT_RPL. 3906 */ 3907 3908void 3909t3_reset_synq(struct listen_ctx *lctx) 3910{ 3911 struct toepcb *toep; 3912 3913 SOCK_LOCK(lctx->lso); 3914 while (!LIST_EMPTY(&lctx->synq_head)) { 3915 toep = LIST_FIRST(&lctx->synq_head); 3916 LIST_REMOVE(toep, synq_entry); 3917 toep->tp_tp = NULL; 3918 t3_send_reset(toep); 3919 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid); 3920 toepcb_release(toep); 3921 } 3922 SOCK_UNLOCK(lctx->lso); 3923} 3924 3925 3926int 3927t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl, 3928 unsigned int nppods, unsigned int tag, unsigned int maxoff, 3929 unsigned int pg_off, unsigned int color) 3930{ 3931 unsigned int i, j, pidx; 3932 struct pagepod *p; 3933 struct mbuf *m; 3934 struct ulp_mem_io *req; 3935 struct tcpcb *tp = sototcpcb(so); 3936 struct toepcb *toep = tp->t_toe; 3937 unsigned int tid = toep->tp_tid; 3938 const struct tom_data *td = TOM_DATA(TOE_DEV(so)); 3939 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; 3940 3941 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)", 3942 gl, nppods, tag, maxoff, pg_off, color); 3943 3944 for (i = 0; i < nppods; ++i) { 3945 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE); 3946 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 3947 req = mtod(m, struct ulp_mem_io *); 3948 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE; 3949 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 3950 req->wr.wr_lo = 0; 3951 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) | 3952 V_ULPTX_CMD(ULP_MEM_WRITE)); 3953 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) | 3954 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1)); 3955 3956 p = (struct pagepod *)(req + 1); 3957 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) { 3958 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid)); 3959 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) | 3960 V_PPOD_COLOR(color)); 3961 p->pp_max_offset = htonl(maxoff); 3962 p->pp_page_offset = htonl(pg_off); 3963 p->pp_rsvd = 0; 3964 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx) 3965 p->pp_addr[j] = pidx < gl->dgl_nelem ? 3966 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0; 3967 } else 3968 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */ 3969 send_or_defer(toep, m, 0); 3970 ppod_addr += PPOD_SIZE; 3971 } 3972 return (0); 3973} 3974 3975/* 3976 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command. 3977 */ 3978static inline void 3979mk_cpl_barrier_ulp(struct cpl_barrier *b) 3980{ 3981 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b; 3982 3983 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 3984 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8)); 3985 b->opcode = CPL_BARRIER; 3986} 3987 3988/* 3989 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command. 3990 */ 3991static inline void 3992mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno) 3993{ 3994 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 3995 3996 txpkt = (struct ulp_txpkt *)req; 3997 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 3998 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 3999 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid)); 4000 req->cpuno = htons(cpuno); 4001} 4002 4003/* 4004 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command. 4005 */ 4006static inline void 4007mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, 4008 unsigned int word, uint64_t mask, uint64_t val) 4009{ 4010 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4011 4012 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 4013 tid, word, mask, val); 4014 4015 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4016 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4017 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); 4018 req->reply = V_NO_REPLY(1); 4019 req->cpu_idx = 0; 4020 req->word = htons(word); 4021 req->mask = htobe64(mask); 4022 req->val = htobe64(val); 4023} 4024 4025/* 4026 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command. 4027 */ 4028static void 4029mk_rx_data_ack_ulp(struct socket *so,struct cpl_rx_data_ack *ack, 4030 unsigned int tid, unsigned int credits) 4031{ 4032 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack; 4033 4034 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4035 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8)); 4036 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); 4037 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 4038 V_RX_DACK_MODE(TOM_TUNABLE(TOE_DEV(so), delack)) | 4039 V_RX_CREDITS(credits)); 4040} 4041 4042void 4043t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx) 4044{ 4045 unsigned int wrlen; 4046 struct mbuf *m; 4047 struct work_request_hdr *wr; 4048 struct cpl_barrier *lock; 4049 struct cpl_set_tcb_field *req; 4050 struct cpl_get_tcb *getreq; 4051 struct ddp_state *p = &toep->tp_ddp_state; 4052 4053 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4054 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) + 4055 sizeof(*getreq); 4056 m = m_gethdr_nofail(wrlen); 4057 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4058 wr = mtod(m, struct work_request_hdr *); 4059 bzero(wr, wrlen); 4060 4061 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4062 m->m_pkthdr.len = m->m_len = wrlen; 4063 4064 lock = (struct cpl_barrier *)(wr + 1); 4065 mk_cpl_barrier_ulp(lock); 4066 4067 req = (struct cpl_set_tcb_field *)(lock + 1); 4068 4069 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx); 4070 4071 /* Hmmm, not sure if this actually a good thing: reactivating 4072 * the other buffer might be an issue if it has been completed 4073 * already. However, that is unlikely, since the fact that the UBUF 4074 * is not completed indicates that there is no oustanding data. 4075 */ 4076 if (bufidx == 0) 4077 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4078 V_TF_DDP_ACTIVE_BUF(1) | 4079 V_TF_DDP_BUF0_VALID(1), 4080 V_TF_DDP_ACTIVE_BUF(1)); 4081 else 4082 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4083 V_TF_DDP_ACTIVE_BUF(1) | 4084 V_TF_DDP_BUF1_VALID(1), 0); 4085 4086 getreq = (struct cpl_get_tcb *)(req + 1); 4087 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4088 4089 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1)); 4090 4091 /* Keep track of the number of oustanding CPL_GET_TCB requests 4092 */ 4093 p->get_tcb_count++; 4094 4095#ifdef T3_TRACE 4096 T3_TRACE1(TIDTB(so), 4097 "t3_cancel_ddpbuf: bufidx %u", bufidx); 4098#endif 4099 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4100} 4101 4102/** 4103 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one 4104 * @sk: the socket associated with the buffers 4105 * @bufidx: index of HW DDP buffer (0 or 1) 4106 * @tag0: new tag for HW buffer 0 4107 * @tag1: new tag for HW buffer 1 4108 * @len: new length for HW buf @bufidx 4109 * 4110 * Sends a compound WR to overlay a new DDP buffer on top of an existing 4111 * buffer by changing the buffer tag and length and setting the valid and 4112 * active flag accordingly. The caller must ensure the new buffer is at 4113 * least as big as the existing one. Since we typically reprogram both HW 4114 * buffers this function sets both tags for convenience. Read the TCB to 4115 * determine how made data was written into the buffer before the overlay 4116 * took place. 4117 */ 4118void 4119t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0, 4120 unsigned int tag1, unsigned int len) 4121{ 4122 unsigned int wrlen; 4123 struct mbuf *m; 4124 struct work_request_hdr *wr; 4125 struct cpl_get_tcb *getreq; 4126 struct cpl_set_tcb_field *req; 4127 struct ddp_state *p = &toep->tp_ddp_state; 4128 4129 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)", 4130 bufidx, tag0, tag1, len); 4131 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4132 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq); 4133 m = m_gethdr_nofail(wrlen); 4134 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4135 wr = mtod(m, struct work_request_hdr *); 4136 m->m_pkthdr.len = m->m_len = wrlen; 4137 bzero(wr, wrlen); 4138 4139 4140 /* Set the ATOMIC flag to make sure that TP processes the following 4141 * CPLs in an atomic manner and no wire segments can be interleaved. 4142 */ 4143 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); 4144 req = (struct cpl_set_tcb_field *)(wr + 1); 4145 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG, 4146 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) | 4147 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32, 4148 V_TCB_RX_DDP_BUF0_TAG(tag0) | 4149 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32); 4150 req++; 4151 if (bufidx == 0) { 4152 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN, 4153 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4154 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 4155 req++; 4156 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4157 V_TF_DDP_PUSH_DISABLE_0(1) | 4158 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4159 V_TF_DDP_PUSH_DISABLE_0(0) | 4160 V_TF_DDP_BUF0_VALID(1)); 4161 } else { 4162 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN, 4163 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN), 4164 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len)); 4165 req++; 4166 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4167 V_TF_DDP_PUSH_DISABLE_1(1) | 4168 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4169 V_TF_DDP_PUSH_DISABLE_1(0) | 4170 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1)); 4171 } 4172 4173 getreq = (struct cpl_get_tcb *)(req + 1); 4174 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4175 4176 /* Keep track of the number of oustanding CPL_GET_TCB requests 4177 */ 4178 p->get_tcb_count++; 4179 4180#ifdef T3_TRACE 4181 T3_TRACE4(TIDTB(sk), 4182 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u " 4183 "len %d", 4184 bufidx, tag0, tag1, len); 4185#endif 4186 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4187} 4188 4189/* 4190 * Sends a compound WR containing all the CPL messages needed to program the 4191 * two HW DDP buffers, namely optionally setting up the length and offset of 4192 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK. 4193 */ 4194void 4195t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0, 4196 unsigned int len1, unsigned int offset1, 4197 uint64_t ddp_flags, uint64_t flag_mask, int modulate) 4198{ 4199 unsigned int wrlen; 4200 struct mbuf *m; 4201 struct work_request_hdr *wr; 4202 struct cpl_set_tcb_field *req; 4203 4204 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ", 4205 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff); 4206 4207 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4208 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + 4209 (len1 ? sizeof(*req) : 0) + 4210 (modulate ? sizeof(struct cpl_rx_data_ack) : 0); 4211 m = m_gethdr_nofail(wrlen); 4212 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4213 wr = mtod(m, struct work_request_hdr *); 4214 bzero(wr, wrlen); 4215 4216 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4217 m->m_pkthdr.len = m->m_len = wrlen; 4218 4219 req = (struct cpl_set_tcb_field *)(wr + 1); 4220 if (len0) { /* program buffer 0 offset and length */ 4221 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET, 4222 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 4223 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4224 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) | 4225 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0)); 4226 req++; 4227 } 4228 if (len1) { /* program buffer 1 offset and length */ 4229 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET, 4230 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 4231 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32, 4232 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) | 4233 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32); 4234 req++; 4235 } 4236 4237 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask, 4238 ddp_flags); 4239 4240 if (modulate) { 4241 mk_rx_data_ack_ulp(toeptoso(toep), 4242 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid, 4243 toep->tp_copied_seq - toep->tp_rcv_wup); 4244 toep->tp_rcv_wup = toep->tp_copied_seq; 4245 } 4246 4247#ifdef T3_TRACE 4248 T3_TRACE5(TIDTB(sk), 4249 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x " 4250 "modulate %d", 4251 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff, 4252 modulate); 4253#endif 4254 4255 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4256} 4257 4258void 4259t3_init_wr_tab(unsigned int wr_len) 4260{ 4261 int i; 4262 4263 if (mbuf_wrs[1]) /* already initialized */ 4264 return; 4265 4266 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) { 4267 int sgl_len = (3 * i) / 2 + (i & 1); 4268 4269 sgl_len += 3; 4270 mbuf_wrs[i] = sgl_len <= wr_len ? 4271 1 : 1 + (sgl_len - 2) / (wr_len - 1); 4272 } 4273 4274 wrlen = wr_len * 8; 4275} 4276 4277int 4278t3_init_cpl_io(void) 4279{ 4280#ifdef notyet 4281 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL); 4282 if (!tcphdr_skb) { 4283 log(LOG_ERR, 4284 "Chelsio TCP offload: can't allocate sk_buff\n"); 4285 return -1; 4286 } 4287 skb_put(tcphdr_skb, sizeof(struct tcphdr)); 4288 tcphdr_skb->h.raw = tcphdr_skb->data; 4289 memset(tcphdr_skb->data, 0, tcphdr_skb->len); 4290#endif 4291 4292 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); 4293 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); 4294 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack); 4295 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data); 4296 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 4297 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 4298 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish); 4299 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 4300 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 4301 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 4302 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp); 4303 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); 4304 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify); 4305 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt); 4306 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); 4307 return (0); 4308} 4309 4310