cxgb_cpl_io.c revision 176472
1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c 176472 2008-02-23 01:06:17Z kmacy $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/fcntl.h> 36#include <sys/kernel.h> 37#include <sys/limits.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/mbuf.h> 41#include <sys/mutex.h> 42#include <sys/socket.h> 43#include <sys/sysctl.h> 44#include <sys/syslog.h> 45#include <sys/socketvar.h> 46#include <sys/protosw.h> 47#include <sys/priv.h> 48 49#include <net/if.h> 50#include <net/route.h> 51 52#include <netinet/in.h> 53#include <netinet/in_pcb.h> 54#include <netinet/in_systm.h> 55#include <netinet/in_var.h> 56 57 58#include <dev/cxgb/cxgb_osdep.h> 59#include <dev/cxgb/sys/mbufq.h> 60 61#include <netinet/ip.h> 62#include <netinet/tcp_var.h> 63#include <netinet/tcp_fsm.h> 64#include <netinet/tcp_offload.h> 65#include <netinet/tcp_seq.h> 66#include <netinet/tcp_syncache.h> 67#include <netinet/tcp_timer.h> 68#include <net/route.h> 69 70#include <dev/cxgb/t3cdev.h> 71#include <dev/cxgb/common/cxgb_firmware_exports.h> 72#include <dev/cxgb/common/cxgb_t3_cpl.h> 73#include <dev/cxgb/common/cxgb_tcb.h> 74#include <dev/cxgb/common/cxgb_ctl_defs.h> 75#include <dev/cxgb/cxgb_l2t.h> 76#include <dev/cxgb/cxgb_offload.h> 77#include <vm/vm.h> 78#include <vm/pmap.h> 79#include <machine/bus.h> 80#include <dev/cxgb/sys/mvec.h> 81#include <dev/cxgb/ulp/toecore/cxgb_toedev.h> 82#include <dev/cxgb/ulp/tom/cxgb_defs.h> 83#include <dev/cxgb/ulp/tom/cxgb_tom.h> 84#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h> 85#include <dev/cxgb/ulp/tom/cxgb_toepcb.h> 86#include <dev/cxgb/ulp/tom/cxgb_tcp.h> 87 88/* 89 * For ULP connections HW may add headers, e.g., for digests, that aren't part 90 * of the messages sent by the host but that are part of the TCP payload and 91 * therefore consume TCP sequence space. Tx connection parameters that 92 * operate in TCP sequence space are affected by the HW additions and need to 93 * compensate for them to accurately track TCP sequence numbers. This array 94 * contains the compensating extra lengths for ULP packets. It is indexed by 95 * a packet's ULP submode. 96 */ 97const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8}; 98 99#ifdef notyet 100/* 101 * This sk_buff holds a fake header-only TCP segment that we use whenever we 102 * need to exploit SW TCP functionality that expects TCP headers, such as 103 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple 104 * CPUs without locking. 105 */ 106static struct mbuf *tcphdr_mbuf __read_mostly; 107#endif 108 109/* 110 * Size of WRs in bytes. Note that we assume all devices we are handling have 111 * the same WR size. 112 */ 113static unsigned int wrlen __read_mostly; 114 115/* 116 * The number of WRs needed for an skb depends on the number of page fragments 117 * in the skb and whether it has any payload in its main body. This maps the 118 * length of the gather list represented by an skb into the # of necessary WRs. 119 */ 120static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly; 121 122/* 123 * Max receive window supported by HW in bytes. Only a small part of it can 124 * be set through option0, the rest needs to be set through RX_DATA_ACK. 125 */ 126#define MAX_RCV_WND ((1U << 27) - 1) 127 128/* 129 * Min receive window. We want it to be large enough to accommodate receive 130 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. 131 */ 132#define MIN_RCV_WND (24 * 1024U) 133#define SO_TOS(so) ((sotoinpcb(so)->inp_ip_tos >> 2) & M_TOS) 134 135#define VALIDATE_SEQ 0 136#define VALIDATE_SOCK(so) 137#define DEBUG_WR 0 138 139extern int tcp_do_autorcvbuf; 140extern int tcp_do_autosndbuf; 141extern int tcp_autorcvbuf_max; 142extern int tcp_autosndbuf_max; 143 144static void t3_send_reset(struct toepcb *toep); 145static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status); 146static inline void free_atid(struct t3cdev *cdev, unsigned int tid); 147static void handle_syncache_event(int event, void *arg); 148 149static inline void 150SBAPPEND(struct sockbuf *sb, struct mbuf *n) 151{ 152 struct mbuf * m; 153 154 m = sb->sb_mb; 155 while (m) { 156 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 157 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 158 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 159 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 160 m->m_next, m->m_nextpkt, m->m_flags)); 161 m = m->m_next; 162 } 163 m = n; 164 while (m) { 165 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 166 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 167 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 168 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 169 m->m_next, m->m_nextpkt, m->m_flags)); 170 m = m->m_next; 171 } 172 sbappend_locked(sb, n); 173 m = sb->sb_mb; 174 while (m) { 175 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 176 m->m_next, m->m_nextpkt, m->m_flags)); 177 m = m->m_next; 178 } 179} 180 181static inline int 182is_t3a(const struct toedev *dev) 183{ 184 return (dev->tod_ttid == TOE_ID_CHELSIO_T3); 185} 186 187static void 188dump_toepcb(struct toepcb *toep) 189{ 190 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n", 191 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode, 192 toep->tp_mtu_idx, toep->tp_tid); 193 194 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n", 195 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, 196 toep->tp_mss_clamp, toep->tp_flags); 197} 198 199#ifndef RTALLOC2_DEFINED 200static struct rtentry * 201rtalloc2(struct sockaddr *dst, int report, u_long ignflags) 202{ 203 struct rtentry *rt = NULL; 204 205 if ((rt = rtalloc1(dst, report, ignflags)) != NULL) 206 RT_UNLOCK(rt); 207 208 return (rt); 209} 210#endif 211/* 212 * Determine whether to send a CPL message now or defer it. A message is 213 * deferred if the connection is in SYN_SENT since we don't know the TID yet. 214 * For connections in other states the message is sent immediately. 215 * If through_l2t is set the message is subject to ARP processing, otherwise 216 * it is sent directly. 217 */ 218static inline void 219send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t) 220{ 221 struct tcpcb *tp = toep->tp_tp; 222 223 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) { 224 INP_LOCK(tp->t_inpcb); 225 mbufq_tail(&toep->out_of_order_queue, m); // defer 226 INP_UNLOCK(tp->t_inpcb); 227 } else if (through_l2t) 228 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T 229 else 230 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly 231} 232 233static inline unsigned int 234mkprio(unsigned int cntrl, const struct toepcb *toep) 235{ 236 return (cntrl); 237} 238 239/* 240 * Populate a TID_RELEASE WR. The skb must be already propely sized. 241 */ 242static inline void 243mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid) 244{ 245 struct cpl_tid_release *req; 246 247 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep)); 248 m->m_pkthdr.len = m->m_len = sizeof(*req); 249 req = mtod(m, struct cpl_tid_release *); 250 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 251 req->wr.wr_lo = 0; 252 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); 253} 254 255static inline void 256make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) 257{ 258 struct tcpcb *tp = sototcpcb(so); 259 struct toepcb *toep = tp->t_toe; 260 struct tx_data_wr *req; 261 262 INP_LOCK_ASSERT(tp->t_inpcb); 263 264 req = mtod(m, struct tx_data_wr *); 265 m->m_len = sizeof(*req); 266 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 267 req->wr_lo = htonl(V_WR_TID(toep->tp_tid)); 268 /* len includes the length of any HW ULP additions */ 269 req->len = htonl(len); 270 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx)); 271 /* V_TX_ULP_SUBMODE sets both the mode and submode */ 272 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) | 273 V_TX_URG(/* skb_urgent(skb) */ 0 ) | 274 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) && 275 (tail ? 0 : 1)))); 276 req->sndseq = htonl(tp->snd_nxt); 277 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) { 278 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | 279 V_TX_CPU_IDX(toep->tp_qset)); 280 281 /* Sendbuffer is in units of 32KB. 282 */ 283 if (tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) 284 req->param |= htonl(V_TX_SNDBUF(tcp_autosndbuf_max >> 15)); 285 else 286 req->param |= htonl(V_TX_SNDBUF(so->so_snd.sb_hiwat >> 15)); 287 toep->tp_flags |= TP_DATASENT; 288 } 289} 290 291#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */ 292 293int 294t3_push_frames(struct socket *so, int req_completion) 295{ 296 struct tcpcb *tp = sototcpcb(so); 297 struct toepcb *toep = tp->t_toe; 298 299 struct mbuf *tail, *m0, *last; 300 struct t3cdev *cdev; 301 struct tom_data *d; 302 int i, bytes, count, total_bytes; 303 bus_dma_segment_t segs[TX_MAX_SEGS], *segp; 304 305 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) { 306 DPRINTF("tcp state=%d\n", tp->t_state); 307 return (0); 308 } 309 310 if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { 311 DPRINTF("disconnecting\n"); 312 313 return (0); 314 } 315 316 317 INP_LOCK_ASSERT(tp->t_inpcb); 318 SOCKBUF_LOCK(&so->so_snd); 319 d = TOM_DATA(TOE_DEV(so)); 320 cdev = d->cdev; 321 last = tail = so->so_snd.sb_sndptr ? so->so_snd.sb_sndptr : so->so_snd.sb_mb; 322 total_bytes = 0; 323 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n", 324 toep->tp_wr_avail, tail, so->so_snd.sb_cc, toep->tp_m_last); 325 326 if (last && toep->tp_m_last == last && so->so_snd.sb_sndptroff != 0) { 327 KASSERT(tail, ("sbdrop error")); 328 last = tail = tail->m_next; 329 } 330 331 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) { 332 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail); 333 SOCKBUF_UNLOCK(&so->so_snd); 334 return (0); 335 } 336 337 toep->tp_m_last = NULL; 338 while (toep->tp_wr_avail && (tail != NULL)) { 339 count = bytes = 0; 340 segp = segs; 341 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) { 342 SOCKBUF_UNLOCK(&so->so_snd); 343 return (0); 344 } 345 /* 346 * If the data in tail fits as in-line, then 347 * make an immediate data wr. 348 */ 349 if (tail->m_len <= IMM_LEN) { 350 count = 1; 351 bytes = tail->m_len; 352 last = tail; 353 tail = tail->m_next; 354 m_set_sgl(m0, NULL); 355 m_set_sgllen(m0, 0); 356 make_tx_data_wr(so, m0, bytes, tail); 357 m_append(m0, bytes, mtod(last, caddr_t)); 358 KASSERT(!m0->m_next, ("bad append")); 359 } else { 360 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) 361 && (tail != NULL) && (count < TX_MAX_SEGS-1)) { 362 bytes += tail->m_len; 363 last = tail; 364 count++; 365 /* 366 * technically an abuse to be using this for a VA 367 * but less gross than defining my own structure 368 * or calling pmap_kextract from here :-| 369 */ 370 segp->ds_addr = (bus_addr_t)tail->m_data; 371 segp->ds_len = tail->m_len; 372 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n", 373 count, mbuf_wrs[count], tail->m_data, tail->m_len); 374 segp++; 375 tail = tail->m_next; 376 } 377 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n", 378 toep->tp_wr_avail, count, mbuf_wrs[count], tail); 379 380 m_set_sgl(m0, segs); 381 m_set_sgllen(m0, count); 382 make_tx_data_wr(so, m0, bytes, tail); 383 } 384 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep)); 385 386 if (tail) { 387 so->so_snd.sb_sndptr = tail; 388 toep->tp_m_last = NULL; 389 } else 390 toep->tp_m_last = so->so_snd.sb_sndptr = last; 391 392 393 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last); 394 395 so->so_snd.sb_sndptroff += bytes; 396 total_bytes += bytes; 397 toep->tp_write_seq += bytes; 398 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d tail=%p sndptr=%p sndptroff=%d", 399 toep->tp_wr_avail, count, mbuf_wrs[count], tail, so->so_snd.sb_sndptr, so->so_snd.sb_sndptroff); 400 if (tail) 401 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p tailbuf=%p snd_una=0x%08x", 402 total_bytes, toep->tp_m_last, tail->m_data, tp->snd_una); 403 else 404 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p snd_una=0x%08x", 405 total_bytes, toep->tp_m_last, tp->snd_una); 406 407 408 i = 0; 409 while (i < count && m_get_sgllen(m0)) { 410 if ((count - i) >= 3) { 411 CTR6(KTR_TOM, 412 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d pa=0x%zx len=%d", 413 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len, 414 segs[i + 2].ds_addr, segs[i + 2].ds_len); 415 i += 3; 416 } else if ((count - i) == 2) { 417 CTR4(KTR_TOM, 418 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d", 419 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len); 420 i += 2; 421 } else { 422 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d", 423 segs[i].ds_addr, segs[i].ds_len); 424 i++; 425 } 426 427 } 428 429 /* 430 * remember credits used 431 */ 432 m0->m_pkthdr.csum_data = mbuf_wrs[count]; 433 m0->m_pkthdr.len = bytes; 434 toep->tp_wr_avail -= mbuf_wrs[count]; 435 toep->tp_wr_unacked += mbuf_wrs[count]; 436 437 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) || 438 toep->tp_wr_unacked >= toep->tp_wr_max / 2) { 439 struct work_request_hdr *wr = cplhdr(m0); 440 441 wr->wr_hi |= htonl(F_WR_COMPL); 442 toep->tp_wr_unacked = 0; 443 } 444 KASSERT((m0->m_pkthdr.csum_data > 0) && 445 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d", 446 m0->m_pkthdr.csum_data)); 447 m0->m_type = MT_DONTFREE; 448 enqueue_wr(toep, m0); 449 DPRINTF("sending offload tx with %d bytes in %d segments\n", 450 bytes, count); 451 l2t_send(cdev, m0, toep->tp_l2t); 452 } 453 SOCKBUF_UNLOCK(&so->so_snd); 454 return (total_bytes); 455} 456 457/* 458 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail 459 * under any circumstances. We take the easy way out and always queue the 460 * message to the write_queue. We can optimize the case where the queue is 461 * already empty though the optimization is probably not worth it. 462 */ 463static void 464close_conn(struct socket *so) 465{ 466 struct mbuf *m; 467 struct cpl_close_con_req *req; 468 struct tom_data *d; 469 struct inpcb *inp = sotoinpcb(so); 470 struct tcpcb *tp; 471 struct toepcb *toep; 472 unsigned int tid; 473 474 475 INP_LOCK(inp); 476 tp = sototcpcb(so); 477 toep = tp->t_toe; 478 479 if (tp->t_state != TCPS_SYN_SENT) 480 t3_push_frames(so, 1); 481 482 if (toep->tp_flags & TP_FIN_SENT) { 483 INP_UNLOCK(inp); 484 return; 485 } 486 487 tid = toep->tp_tid; 488 489 d = TOM_DATA(toep->tp_toedev); 490 491 m = m_gethdr_nofail(sizeof(*req)); 492 493 toep->tp_flags |= TP_FIN_SENT; 494 req = mtod(m, struct cpl_close_con_req *); 495 496 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); 497 req->wr.wr_lo = htonl(V_WR_TID(tid)); 498 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 499 req->rsvd = htonl(toep->tp_write_seq); 500 INP_UNLOCK(inp); 501 /* 502 * XXX - need to defer shutdown while there is still data in the queue 503 * 504 */ 505 cxgb_ofld_send(d->cdev, m); 506 507} 508 509/* 510 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant 511 * and send it along. 512 */ 513static void 514abort_arp_failure(struct t3cdev *cdev, struct mbuf *m) 515{ 516 struct cpl_abort_req *req = cplhdr(m); 517 518 req->cmd = CPL_ABORT_NO_RST; 519 cxgb_ofld_send(cdev, m); 520} 521 522/* 523 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are 524 * permitted to return without sending the message in case we cannot allocate 525 * an sk_buff. Returns the number of credits sent. 526 */ 527uint32_t 528t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail) 529{ 530 struct mbuf *m; 531 struct cpl_rx_data_ack *req; 532 struct toepcb *toep = tp->t_toe; 533 struct toedev *tdev = toep->tp_toedev; 534 535 m = m_gethdr_nofail(sizeof(*req)); 536 537 DPRINTF("returning %u credits to HW\n", credits); 538 539 req = mtod(m, struct cpl_rx_data_ack *); 540 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 541 req->wr.wr_lo = 0; 542 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 543 req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); 544 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep)); 545 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 546 return (credits); 547} 548 549/* 550 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled. 551 * This is only used in DDP mode, so we take the opportunity to also set the 552 * DACK mode and flush any Rx credits. 553 */ 554void 555t3_send_rx_modulate(struct toepcb *toep) 556{ 557 struct mbuf *m; 558 struct cpl_rx_data_ack *req; 559 560 m = m_gethdr_nofail(sizeof(*req)); 561 562 req = mtod(m, struct cpl_rx_data_ack *); 563 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 564 req->wr.wr_lo = 0; 565 m->m_pkthdr.len = m->m_len = sizeof(*req); 566 567 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 568 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 569 V_RX_DACK_MODE(1) | 570 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup)); 571 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 572 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 573 toep->tp_rcv_wup = toep->tp_copied_seq; 574} 575 576/* 577 * Handle receipt of an urgent pointer. 578 */ 579static void 580handle_urg_ptr(struct socket *so, uint32_t urg_seq) 581{ 582#ifdef URGENT_DATA_SUPPORTED 583 struct tcpcb *tp = sototcpcb(so); 584 585 urg_seq--; /* initially points past the urgent data, per BSD */ 586 587 if (tp->urg_data && !after(urg_seq, tp->urg_seq)) 588 return; /* duplicate pointer */ 589 sk_send_sigurg(sk); 590 if (tp->urg_seq == tp->copied_seq && tp->urg_data && 591 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) { 592 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 593 594 tp->copied_seq++; 595 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len) 596 tom_eat_skb(sk, skb, 0); 597 } 598 tp->urg_data = TCP_URG_NOTYET; 599 tp->urg_seq = urg_seq; 600#endif 601} 602 603/* 604 * Returns true if a socket cannot accept new Rx data. 605 */ 606static inline int 607so_no_receive(const struct socket *so) 608{ 609 return (so->so_state & (SS_ISDISCONNECTED|SS_ISDISCONNECTING)); 610} 611 612/* 613 * Process an urgent data notification. 614 */ 615static void 616rx_urg_notify(struct toepcb *toep, struct mbuf *m) 617{ 618 struct cpl_rx_urg_notify *hdr = cplhdr(m); 619 struct socket *so = toeptoso(toep); 620 621 VALIDATE_SOCK(so); 622 623 if (!so_no_receive(so)) 624 handle_urg_ptr(so, ntohl(hdr->seq)); 625 626 m_freem(m); 627} 628 629/* 630 * Handler for RX_URG_NOTIFY CPL messages. 631 */ 632static int 633do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx) 634{ 635 struct toepcb *toep = (struct toepcb *)ctx; 636 637 rx_urg_notify(toep, m); 638 return (0); 639} 640 641/* 642 * Set of states for which we should return RX credits. 643 */ 644#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2) 645 646/* 647 * Called after some received data has been read. It returns RX credits 648 * to the HW for the amount of data processed. 649 */ 650void 651t3_cleanup_rbuf(struct tcpcb *tp, int copied) 652{ 653 struct toepcb *toep = tp->t_toe; 654 struct socket *so; 655 struct toedev *dev; 656 int dack_mode, must_send, read; 657 u32 thres, credits, dack = 0; 658 659 so = tp->t_inpcb->inp_socket; 660 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || 661 (tp->t_state == TCPS_FIN_WAIT_2))) { 662 if (copied) { 663 SOCKBUF_LOCK(&so->so_rcv); 664 toep->tp_copied_seq += copied; 665 SOCKBUF_UNLOCK(&so->so_rcv); 666 } 667 668 return; 669 } 670 671 INP_LOCK_ASSERT(tp->t_inpcb); 672 SOCKBUF_LOCK(&so->so_rcv); 673 if (copied) 674 toep->tp_copied_seq += copied; 675 else { 676 read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc; 677 toep->tp_copied_seq += read; 678 } 679 credits = toep->tp_copied_seq - toep->tp_rcv_wup; 680 toep->tp_enqueued_bytes = so->so_rcv.sb_cc; 681 SOCKBUF_UNLOCK(&so->so_rcv); 682 683 if (credits > so->so_rcv.sb_mbmax) { 684 printf("copied_seq=%u rcv_wup=%u credits=%u\n", 685 toep->tp_copied_seq, toep->tp_rcv_wup, credits); 686 credits = so->so_rcv.sb_mbmax; 687 } 688 689 690 /* 691 * XXX this won't accurately reflect credit return - we need 692 * to look at the difference between the amount that has been 693 * put in the recv sockbuf and what is there now 694 */ 695 696 if (__predict_false(!credits)) 697 return; 698 699 dev = toep->tp_toedev; 700 thres = TOM_TUNABLE(dev, rx_credit_thres); 701 702 if (__predict_false(thres == 0)) 703 return; 704 705 if (toep->tp_ulp_mode) 706 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 707 else { 708 dack_mode = TOM_TUNABLE(dev, delack); 709 if (__predict_false(dack_mode != toep->tp_delack_mode)) { 710 u32 r = tp->rcv_nxt - toep->tp_delack_seq; 711 712 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp) 713 dack = F_RX_DACK_CHANGE | 714 V_RX_DACK_MODE(dack_mode); 715 } 716 } 717 718 /* 719 * For coalescing to work effectively ensure the receive window has 720 * at least 16KB left. 721 */ 722 must_send = credits + 16384 >= tp->rcv_wnd; 723 724 if (must_send || credits >= thres) 725 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send); 726} 727 728static int 729cxgb_toe_disconnect(struct tcpcb *tp) 730{ 731 struct socket *so; 732 733 DPRINTF("cxgb_toe_disconnect\n"); 734 735 so = tp->t_inpcb->inp_socket; 736 close_conn(so); 737 return (0); 738} 739 740static int 741cxgb_toe_reset(struct tcpcb *tp) 742{ 743 struct toepcb *toep = tp->t_toe; 744 745 746 t3_send_reset(toep); 747 748 /* 749 * unhook from socket 750 */ 751 tp->t_flags &= ~TF_TOE; 752 toep->tp_tp = NULL; 753 tp->t_toe = NULL; 754 return (0); 755} 756 757static int 758cxgb_toe_send(struct tcpcb *tp) 759{ 760 struct socket *so; 761 762 DPRINTF("cxgb_toe_send\n"); 763 dump_toepcb(tp->t_toe); 764 765 so = tp->t_inpcb->inp_socket; 766 t3_push_frames(so, 1); 767 return (0); 768} 769 770static int 771cxgb_toe_rcvd(struct tcpcb *tp) 772{ 773 INP_LOCK_ASSERT(tp->t_inpcb); 774 t3_cleanup_rbuf(tp, 0); 775 776 return (0); 777} 778 779static void 780cxgb_toe_detach(struct tcpcb *tp) 781{ 782 struct toepcb *toep; 783 /* 784 * XXX how do we handle teardown in the SYN_SENT state? 785 * 786 */ 787 INP_INFO_WLOCK(&tcbinfo); 788 toep = tp->t_toe; 789 toep->tp_tp = NULL; 790 791 /* 792 * unhook from socket 793 */ 794 tp->t_flags &= ~TF_TOE; 795 tp->t_toe = NULL; 796 INP_INFO_WUNLOCK(&tcbinfo); 797} 798 799 800static struct toe_usrreqs cxgb_toe_usrreqs = { 801 .tu_disconnect = cxgb_toe_disconnect, 802 .tu_reset = cxgb_toe_reset, 803 .tu_send = cxgb_toe_send, 804 .tu_rcvd = cxgb_toe_rcvd, 805 .tu_detach = cxgb_toe_detach, 806 .tu_detach = cxgb_toe_detach, 807 .tu_syncache_event = handle_syncache_event, 808}; 809 810 811static void 812__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word, 813 uint64_t mask, uint64_t val, int no_reply) 814{ 815 struct cpl_set_tcb_field *req; 816 817 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 818 toep->tp_tid, word, mask, val); 819 820 req = mtod(m, struct cpl_set_tcb_field *); 821 m->m_pkthdr.len = m->m_len = sizeof(*req); 822 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 823 req->wr.wr_lo = 0; 824 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid)); 825 req->reply = V_NO_REPLY(no_reply); 826 req->cpu_idx = 0; 827 req->word = htons(word); 828 req->mask = htobe64(mask); 829 req->val = htobe64(val); 830 831 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 832 send_or_defer(toep, m, 0); 833} 834 835static void 836t3_set_tcb_field(struct socket *so, uint16_t word, uint64_t mask, uint64_t val) 837{ 838 struct mbuf *m; 839 struct tcpcb *tp = sototcpcb(so); 840 struct toepcb *toep = tp->t_toe; 841 842 if (toep == NULL) 843 return; 844 845 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) { 846 printf("not seting field\n"); 847 return; 848 } 849 850 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field)); 851 852 __set_tcb_field(toep, m, word, mask, val, 1); 853} 854 855/* 856 * Set one of the t_flags bits in the TCB. 857 */ 858static void 859set_tcb_tflag(struct socket *so, unsigned int bit_pos, int val) 860{ 861 t3_set_tcb_field(so, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); 862} 863 864/* 865 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting. 866 */ 867static void 868t3_set_nagle(struct socket *so) 869{ 870 struct tcpcb *tp = sototcpcb(so); 871 872 set_tcb_tflag(so, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); 873} 874 875/* 876 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting. 877 */ 878void 879t3_set_keepalive(struct socket *so, int on_off) 880{ 881 set_tcb_tflag(so, S_TF_KEEPALIVE, on_off); 882} 883 884void 885t3_set_rcv_coalesce_enable(struct socket *so, int on_off) 886{ 887 set_tcb_tflag(so, S_TF_RCV_COALESCE_ENABLE, on_off); 888} 889 890/* 891 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting. 892 */ 893static void 894t3_set_tos(struct socket *so) 895{ 896 t3_set_tcb_field(so, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), 897 V_TCB_TOS(SO_TOS(so))); 898} 899 900 901/* 902 * In DDP mode, TP fails to schedule a timer to push RX data to the host when 903 * DDP is disabled (data is delivered to freelist). [Note that, the peer should 904 * set the PSH bit in the last segment, which would trigger delivery.] 905 * We work around the issue by setting a DDP buffer in a partial placed state, 906 * which guarantees that TP will schedule a timer. 907 */ 908#define TP_DDP_TIMER_WORKAROUND_MASK\ 909 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\ 910 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\ 911 V_TCB_RX_DDP_BUF0_LEN(3)) << 32)) 912#define TP_DDP_TIMER_WORKAROUND_VAL\ 913 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\ 914 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\ 915 32)) 916 917static void 918t3_enable_ddp(struct socket *so, int on) 919{ 920 if (on) { 921 922 t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), 923 V_TF_DDP_OFF(0)); 924 } else 925 t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, 926 V_TF_DDP_OFF(1) | 927 TP_DDP_TIMER_WORKAROUND_MASK, 928 V_TF_DDP_OFF(1) | 929 TP_DDP_TIMER_WORKAROUND_VAL); 930 931} 932 933void 934t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag_color) 935{ 936 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_TAG + buf_idx, 937 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), 938 tag_color); 939} 940 941void 942t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset, 943 unsigned int len) 944{ 945 if (buf_idx == 0) 946 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_OFFSET, 947 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 948 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 949 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) | 950 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 951 else 952 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF1_OFFSET, 953 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 954 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32), 955 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) | 956 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32)); 957} 958 959static int 960t3_set_cong_control(struct socket *so, const char *name) 961{ 962#ifdef CONGESTION_CONTROL_SUPPORTED 963 int cong_algo; 964 965 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++) 966 if (!strcmp(name, t3_cong_ops[cong_algo].name)) 967 break; 968 969 if (cong_algo >= ARRAY_SIZE(t3_cong_ops)) 970 return -EINVAL; 971#endif 972 return 0; 973} 974 975int 976t3_get_tcb(struct socket *so) 977{ 978 struct cpl_get_tcb *req; 979 struct tcpcb *tp = sototcpcb(so); 980 struct toepcb *toep = tp->t_toe; 981 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 982 983 if (!m) 984 return (ENOMEM); 985 986 INP_LOCK_ASSERT(tp->t_inpcb); 987 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 988 req = mtod(m, struct cpl_get_tcb *); 989 m->m_pkthdr.len = m->m_len = sizeof(*req); 990 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 991 req->wr.wr_lo = 0; 992 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid)); 993 req->cpuno = htons(toep->tp_qset); 994 req->rsvd = 0; 995 if (sototcpcb(so)->t_state == TCPS_SYN_SENT) 996 mbufq_tail(&toep->out_of_order_queue, m); // defer 997 else 998 cxgb_ofld_send(T3C_DEV(so), m); 999 return 0; 1000} 1001 1002static inline void 1003so_insert_tid(struct tom_data *d, struct socket *so, unsigned int tid) 1004{ 1005 struct toepcb *toep = sototoep(so); 1006 toepcb_hold(toep); 1007 1008 cxgb_insert_tid(d->cdev, d->client, toep, tid); 1009} 1010 1011/** 1012 * find_best_mtu - find the entry in the MTU table closest to an MTU 1013 * @d: TOM state 1014 * @mtu: the target MTU 1015 * 1016 * Returns the index of the value in the MTU table that is closest to but 1017 * does not exceed the target MTU. 1018 */ 1019static unsigned int 1020find_best_mtu(const struct t3c_data *d, unsigned short mtu) 1021{ 1022 int i = 0; 1023 1024 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) 1025 ++i; 1026 return (i); 1027} 1028 1029static unsigned int 1030select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu) 1031{ 1032 unsigned int idx; 1033 1034#ifdef notyet 1035 struct rtentry *dst = sotoinpcb(so)->inp_route.ro_rt; 1036#endif 1037 if (tp) { 1038 tp->t_maxseg = pmtu - 40; 1039 if (tp->t_maxseg < td->mtus[0] - 40) 1040 tp->t_maxseg = td->mtus[0] - 40; 1041 idx = find_best_mtu(td, tp->t_maxseg + 40); 1042 1043 tp->t_maxseg = td->mtus[idx] - 40; 1044 } else 1045 idx = find_best_mtu(td, pmtu); 1046 1047 return (idx); 1048} 1049 1050static inline void 1051free_atid(struct t3cdev *cdev, unsigned int tid) 1052{ 1053 struct toepcb *toep = cxgb_free_atid(cdev, tid); 1054 1055 if (toep) 1056 toepcb_release(toep); 1057} 1058 1059/* 1060 * Release resources held by an offload connection (TID, L2T entry, etc.) 1061 */ 1062static void 1063t3_release_offload_resources(struct toepcb *toep) 1064{ 1065 struct tcpcb *tp = toep->tp_tp; 1066 struct toedev *tdev = toep->tp_toedev; 1067 struct t3cdev *cdev; 1068 unsigned int tid = toep->tp_tid; 1069 1070 if (!tdev) 1071 return; 1072 1073 cdev = TOEP_T3C_DEV(toep); 1074 if (!cdev) 1075 return; 1076 1077 toep->tp_qset = 0; 1078 t3_release_ddp_resources(toep); 1079 1080#ifdef CTRL_SKB_CACHE 1081 kfree_skb(CTRL_SKB_CACHE(tp)); 1082 CTRL_SKB_CACHE(tp) = NULL; 1083#endif 1084 1085 if (toep->tp_wr_avail != toep->tp_wr_max) { 1086 purge_wr_queue(toep); 1087 reset_wr_list(toep); 1088 } 1089 1090 if (toep->tp_l2t) { 1091 l2t_release(L2DATA(cdev), toep->tp_l2t); 1092 toep->tp_l2t = NULL; 1093 } 1094 toep->tp_tp = NULL; 1095 if (tp) { 1096 INP_LOCK_ASSERT(tp->t_inpcb); 1097 tp->t_toe = NULL; 1098 tp->t_flags &= ~TF_TOE; 1099 } 1100 1101 if (toep->tp_state == TCPS_SYN_SENT) { 1102 free_atid(cdev, tid); 1103#ifdef notyet 1104 __skb_queue_purge(&tp->out_of_order_queue); 1105#endif 1106 } else { // we have TID 1107 cxgb_remove_tid(cdev, toep, tid); 1108 toepcb_release(toep); 1109 } 1110#if 0 1111 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state); 1112#endif 1113} 1114 1115static void 1116install_offload_ops(struct socket *so) 1117{ 1118 struct tcpcb *tp = sototcpcb(so); 1119 1120 KASSERT(tp->t_toe != NULL, ("toepcb not set")); 1121 1122 t3_install_socket_ops(so); 1123 tp->t_flags |= TF_TOE; 1124 tp->t_tu = &cxgb_toe_usrreqs; 1125} 1126 1127/* 1128 * Determine the receive window scaling factor given a target max 1129 * receive window. 1130 */ 1131static __inline int 1132select_rcv_wscale(int space) 1133{ 1134 int wscale = 0; 1135 1136 if (space > MAX_RCV_WND) 1137 space = MAX_RCV_WND; 1138 1139 if (tcp_do_rfc1323) 1140 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ; 1141 1142 return (wscale); 1143} 1144 1145/* 1146 * Determine the receive window size for a socket. 1147 */ 1148static unsigned long 1149select_rcv_wnd(struct toedev *dev, struct socket *so) 1150{ 1151 struct tom_data *d = TOM_DATA(dev); 1152 unsigned int wnd; 1153 unsigned int max_rcv_wnd; 1154 1155 if (tcp_do_autorcvbuf) 1156 wnd = tcp_autorcvbuf_max; 1157 else 1158 wnd = so->so_rcv.sb_hiwat; 1159 1160 1161 1162 /* XXX 1163 * For receive coalescing to work effectively we need a receive window 1164 * that can accomodate a coalesced segment. 1165 */ 1166 if (wnd < MIN_RCV_WND) 1167 wnd = MIN_RCV_WND; 1168 1169 /* PR 5138 */ 1170 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ? 1171 (uint32_t)d->rx_page_size * 23 : 1172 MAX_RCV_WND); 1173 1174 return min(wnd, max_rcv_wnd); 1175} 1176 1177/* 1178 * Assign offload parameters to some socket fields. This code is used by 1179 * both active and passive opens. 1180 */ 1181static inline void 1182init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, 1183 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep) 1184{ 1185 struct tcpcb *tp = sototcpcb(so); 1186 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev); 1187 1188 SOCK_LOCK_ASSERT(so); 1189 1190 printf("initializing offload socket\n"); 1191 /* 1192 * We either need to fix push frames to work with sbcompress 1193 * or we need to add this 1194 */ 1195 so->so_snd.sb_flags |= SB_NOCOALESCE; 1196 so->so_rcv.sb_flags |= SB_NOCOALESCE; 1197 1198 tp->t_toe = toep; 1199 toep->tp_tp = tp; 1200 toep->tp_toedev = dev; 1201 1202 toep->tp_tid = tid; 1203 toep->tp_l2t = e; 1204 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs); 1205 toep->tp_wr_unacked = 0; 1206 toep->tp_delack_mode = 0; 1207 1208 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu); 1209 /* 1210 * XXX broken 1211 * 1212 */ 1213 tp->rcv_wnd = select_rcv_wnd(dev, so); 1214 1215 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) && 1216 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 1217 toep->tp_qset_idx = 0; 1218 1219 reset_wr_list(toep); 1220 DPRINTF("initialization done\n"); 1221} 1222 1223/* 1224 * The next two functions calculate the option 0 value for a socket. 1225 */ 1226static inline unsigned int 1227calc_opt0h(struct socket *so, int mtu_idx) 1228{ 1229 struct tcpcb *tp = sototcpcb(so); 1230 int wscale = select_rcv_wscale(tp->rcv_wnd); 1231 1232 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) | 1233 V_KEEP_ALIVE((so->so_options & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS | 1234 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx); 1235} 1236 1237static inline unsigned int 1238calc_opt0l(struct socket *so, int ulp_mode) 1239{ 1240 struct tcpcb *tp = sototcpcb(so); 1241 unsigned int val; 1242 1243 val = V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) | 1244 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ)); 1245 1246 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", SO_TOS(so), tp->rcv_wnd, val); 1247 return (val); 1248} 1249 1250static inline unsigned int 1251calc_opt2(const struct socket *so, struct toedev *dev) 1252{ 1253 int flv_valid; 1254 1255 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1); 1256 1257 return (V_FLAVORS_VALID(flv_valid) | 1258 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0)); 1259} 1260 1261#if DEBUG_WR > 1 1262static int 1263count_pending_wrs(const struct toepcb *toep) 1264{ 1265 const struct mbuf *m; 1266 int n = 0; 1267 1268 wr_queue_walk(toep, m) 1269 n += m->m_pkthdr.csum_data; 1270 return (n); 1271} 1272#endif 1273 1274#if 0 1275(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1) 1276#endif 1277 1278static void 1279mk_act_open_req(struct socket *so, struct mbuf *m, 1280 unsigned int atid, const struct l2t_entry *e) 1281{ 1282 struct cpl_act_open_req *req; 1283 struct inpcb *inp = sotoinpcb(so); 1284 struct tcpcb *tp = intotcpcb(inp); 1285 struct toepcb *toep = tp->t_toe; 1286 struct toedev *tdev = TOE_DEV(so); 1287 1288 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep)); 1289 1290 req = mtod(m, struct cpl_act_open_req *); 1291 m->m_pkthdr.len = m->m_len = sizeof(*req); 1292 1293 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1294 req->wr.wr_lo = 0; 1295 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 1296 req->local_port = inp->inp_lport; 1297 req->peer_port = inp->inp_fport; 1298 memcpy(&req->local_ip, &inp->inp_laddr, 4); 1299 memcpy(&req->peer_ip, &inp->inp_faddr, 4); 1300 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) | 1301 V_TX_CHANNEL(e->smt_idx)); 1302 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode)); 1303 req->params = 0; 1304 req->opt2 = htonl(calc_opt2(so, tdev)); 1305} 1306 1307 1308/* 1309 * Convert an ACT_OPEN_RPL status to an errno. 1310 */ 1311static int 1312act_open_rpl_status_to_errno(int status) 1313{ 1314 switch (status) { 1315 case CPL_ERR_CONN_RESET: 1316 return (ECONNREFUSED); 1317 case CPL_ERR_ARP_MISS: 1318 return (EHOSTUNREACH); 1319 case CPL_ERR_CONN_TIMEDOUT: 1320 return (ETIMEDOUT); 1321 case CPL_ERR_TCAM_FULL: 1322 return (ENOMEM); 1323 case CPL_ERR_CONN_EXIST: 1324 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); 1325 return (EADDRINUSE); 1326 default: 1327 return (EIO); 1328 } 1329} 1330 1331static void 1332fail_act_open(struct toepcb *toep, int errno) 1333{ 1334 struct tcpcb *tp = toep->tp_tp; 1335 1336 t3_release_offload_resources(toep); 1337 if (tp) { 1338 INP_LOCK_ASSERT(tp->t_inpcb); 1339 tcp_drop(tp, errno); 1340 } 1341 1342#ifdef notyet 1343 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1344#endif 1345} 1346 1347/* 1348 * Handle active open failures. 1349 */ 1350static void 1351active_open_failed(struct toepcb *toep, struct mbuf *m) 1352{ 1353 struct cpl_act_open_rpl *rpl = cplhdr(m); 1354 struct inpcb *inp; 1355 1356 INP_INFO_WLOCK(&tcbinfo); 1357 if (toep->tp_tp == NULL) 1358 goto done; 1359 1360 inp = toep->tp_tp->t_inpcb; 1361 INP_LOCK(inp); 1362 1363/* 1364 * Don't handle connection retry for now 1365 */ 1366#ifdef notyet 1367 struct inet_connection_sock *icsk = inet_csk(sk); 1368 1369 if (rpl->status == CPL_ERR_CONN_EXIST && 1370 icsk->icsk_retransmit_timer.function != act_open_retry_timer) { 1371 icsk->icsk_retransmit_timer.function = act_open_retry_timer; 1372 sk_reset_timer(so, &icsk->icsk_retransmit_timer, 1373 jiffies + HZ / 2); 1374 } else 1375#endif 1376 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status)); 1377 INP_UNLOCK(inp); 1378done: 1379 INP_INFO_WUNLOCK(&tcbinfo); 1380 1381 m_free(m); 1382} 1383 1384/* 1385 * Return whether a failed active open has allocated a TID 1386 */ 1387static inline int 1388act_open_has_tid(int status) 1389{ 1390 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && 1391 status != CPL_ERR_ARP_MISS; 1392} 1393 1394/* 1395 * Process an ACT_OPEN_RPL CPL message. 1396 */ 1397static int 1398do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1399{ 1400 struct toepcb *toep = (struct toepcb *)ctx; 1401 struct cpl_act_open_rpl *rpl = cplhdr(m); 1402 1403 if (cdev->type != T3A && act_open_has_tid(rpl->status)) 1404 cxgb_queue_tid_release(cdev, GET_TID(rpl)); 1405 1406 active_open_failed(toep, m); 1407 return (0); 1408} 1409 1410/* 1411 * Handle an ARP failure for an active open. XXX purge ofo queue 1412 * 1413 * XXX badly broken for crossed SYNs as the ATID is no longer valid. 1414 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should 1415 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't 1416 * free the atid. Hmm. 1417 */ 1418#ifdef notyet 1419static void 1420act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) 1421{ 1422 struct toepcb *toep = m_get_toep(m); 1423 struct tcpcb *tp = toep->tp_tp; 1424 struct inpcb *inp = tp->t_inpcb; 1425 struct socket *so = toeptoso(toep); 1426 1427 INP_LOCK(inp); 1428 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 1429 fail_act_open(so, EHOSTUNREACH); 1430 printf("freeing %p\n", m); 1431 1432 m_free(m); 1433 } 1434 INP_UNLOCK(inp); 1435} 1436#endif 1437/* 1438 * Send an active open request. 1439 */ 1440int 1441t3_connect(struct toedev *tdev, struct socket *so, 1442 struct rtentry *rt, struct sockaddr *nam) 1443{ 1444 struct mbuf *m; 1445 struct l2t_entry *e; 1446 struct tom_data *d = TOM_DATA(tdev); 1447 struct inpcb *inp = sotoinpcb(so); 1448 struct tcpcb *tp = intotcpcb(inp); 1449 struct toepcb *toep; /* allocated by init_offload_socket */ 1450 1451 int atid; 1452 1453 toep = toepcb_alloc(); 1454 if (toep == NULL) 1455 goto out_err; 1456 1457 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0) 1458 goto out_err; 1459 1460 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam); 1461 if (!e) 1462 goto free_tid; 1463 1464 INP_LOCK_ASSERT(inp); 1465 m = m_gethdr(MT_DATA, M_WAITOK); 1466 1467#if 0 1468 m->m_toe.mt_toepcb = tp->t_toe; 1469 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure); 1470#endif 1471 SOCK_LOCK(so); 1472 1473 init_offload_socket(so, tdev, atid, e, rt, toep); 1474 1475 install_offload_ops(so); 1476 1477 mk_act_open_req(so, m, atid, e); 1478 SOCK_UNLOCK(so); 1479 1480 soisconnecting(so); 1481 toep = tp->t_toe; 1482 m_set_toep(m, tp->t_toe); 1483 1484 toep->tp_state = TCPS_SYN_SENT; 1485 l2t_send(d->cdev, (struct mbuf *)m, e); 1486 1487 if (toep->tp_ulp_mode) 1488 t3_enable_ddp(so, 0); 1489 return (0); 1490 1491free_tid: 1492 printf("failing connect - free atid\n"); 1493 1494 free_atid(d->cdev, atid); 1495out_err: 1496 printf("return ENOMEM\n"); 1497 return (ENOMEM); 1498} 1499 1500/* 1501 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do 1502 * not send multiple ABORT_REQs for the same connection and also that we do 1503 * not try to send a message after the connection has closed. Returns 1 if 1504 * an ABORT_REQ wasn't generated after all, 0 otherwise. 1505 */ 1506static void 1507t3_send_reset(struct toepcb *toep) 1508{ 1509 1510 struct cpl_abort_req *req; 1511 unsigned int tid = toep->tp_tid; 1512 int mode = CPL_ABORT_SEND_RST; 1513 struct tcpcb *tp = toep->tp_tp; 1514 struct toedev *tdev = toep->tp_toedev; 1515 struct socket *so = NULL; 1516 struct mbuf *m; 1517 1518 if (tp) { 1519 INP_LOCK_ASSERT(tp->t_inpcb); 1520 so = toeptoso(toep); 1521 } 1522 1523 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) || 1524 tdev == NULL)) 1525 return; 1526 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN); 1527 1528 /* Purge the send queue so we don't send anything after an abort. */ 1529 if (so) 1530 sbflush(&so->so_snd); 1531 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev)) 1532 mode |= CPL_ABORT_POST_CLOSE_REQ; 1533 1534 m = m_gethdr_nofail(sizeof(*req)); 1535 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep)); 1536 set_arp_failure_handler(m, abort_arp_failure); 1537 1538 req = mtod(m, struct cpl_abort_req *); 1539 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); 1540 req->wr.wr_lo = htonl(V_WR_TID(tid)); 1541 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); 1542 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0; 1543 req->rsvd1 = !(toep->tp_flags & TP_DATASENT); 1544 req->cmd = mode; 1545 if (tp && (tp->t_state == TCPS_SYN_SENT)) 1546 mbufq_tail(&toep->out_of_order_queue, m); // defer 1547 else 1548 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); 1549} 1550 1551static int 1552t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) 1553{ 1554 struct inpcb *inp; 1555 int error, optval; 1556 1557 if (sopt->sopt_name == IP_OPTIONS) 1558 return (ENOPROTOOPT); 1559 1560 if (sopt->sopt_name != IP_TOS) 1561 return (EOPNOTSUPP); 1562 1563 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 1564 1565 if (error) 1566 return (error); 1567 1568 if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread)) 1569 return (EPERM); 1570 1571 inp = sotoinpcb(so); 1572 inp->inp_ip_tos = optval; 1573 1574 t3_set_tos(so); 1575 1576 return (0); 1577} 1578 1579static int 1580t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1581{ 1582 int err = 0; 1583 size_t copied; 1584 1585 if (sopt->sopt_name != TCP_CONGESTION && 1586 sopt->sopt_name != TCP_NODELAY) 1587 return (EOPNOTSUPP); 1588 1589 if (sopt->sopt_name == TCP_CONGESTION) { 1590 char name[TCP_CA_NAME_MAX]; 1591 int optlen = sopt->sopt_valsize; 1592 struct tcpcb *tp; 1593 1594 if (optlen < 1) 1595 return (EINVAL); 1596 1597 err = copyinstr(sopt->sopt_val, name, 1598 min(TCP_CA_NAME_MAX - 1, optlen), &copied); 1599 if (err) 1600 return (err); 1601 if (copied < 1) 1602 return (EINVAL); 1603 1604 tp = sototcpcb(so); 1605 /* 1606 * XXX I need to revisit this 1607 */ 1608 if ((err = t3_set_cong_control(so, name)) == 0) { 1609#ifdef CONGESTION_CONTROL_SUPPORTED 1610 tp->t_cong_control = strdup(name, M_CXGB); 1611#endif 1612 } else 1613 return (err); 1614 } else { 1615 int optval, oldval; 1616 struct inpcb *inp; 1617 struct tcpcb *tp; 1618 1619 err = sooptcopyin(sopt, &optval, sizeof optval, 1620 sizeof optval); 1621 1622 if (err) 1623 return (err); 1624 1625 inp = sotoinpcb(so); 1626 tp = intotcpcb(inp); 1627 1628 INP_LOCK(inp); 1629 1630 oldval = tp->t_flags; 1631 if (optval) 1632 tp->t_flags |= TF_NODELAY; 1633 else 1634 tp->t_flags &= ~TF_NODELAY; 1635 INP_UNLOCK(inp); 1636 1637 if (oldval != tp->t_flags) 1638 t3_set_nagle(so); 1639 1640 } 1641 1642 return (0); 1643} 1644 1645static int 1646t3_ctloutput(struct socket *so, struct sockopt *sopt) 1647{ 1648 int err; 1649 1650 if (sopt->sopt_level != IPPROTO_TCP) 1651 err = t3_ip_ctloutput(so, sopt); 1652 else 1653 err = t3_tcp_ctloutput(so, sopt); 1654 1655 if (err != EOPNOTSUPP) 1656 return (err); 1657 1658 return (tcp_ctloutput(so, sopt)); 1659} 1660 1661/* 1662 * Returns true if we need to explicitly request RST when we receive new data 1663 * on an RX-closed connection. 1664 */ 1665static inline int 1666need_rst_on_excess_rx(const struct toepcb *toep) 1667{ 1668 return (1); 1669} 1670 1671/* 1672 * Handles Rx data that arrives in a state where the socket isn't accepting 1673 * new data. 1674 */ 1675static void 1676handle_excess_rx(struct toepcb *toep, struct mbuf *m) 1677{ 1678 1679 if (need_rst_on_excess_rx(toep) && !(toep->tp_flags & TP_ABORT_SHUTDOWN)) 1680 t3_send_reset(toep); 1681 m_freem(m); 1682} 1683 1684/* 1685 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE) 1686 * by getting the DDP offset from the TCB. 1687 */ 1688static void 1689tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) 1690{ 1691 struct ddp_state *q = &toep->tp_ddp_state; 1692 struct ddp_buf_state *bsp; 1693 struct cpl_get_tcb_rpl *hdr; 1694 unsigned int ddp_offset; 1695 struct socket *so; 1696 struct tcpcb *tp; 1697 1698 uint64_t t; 1699 __be64 *tcb; 1700 1701 so = toeptoso(toep); 1702 tp = toep->tp_tp; 1703 1704 INP_LOCK_ASSERT(tp->t_inpcb); 1705 SOCKBUF_LOCK(&so->so_rcv); 1706 1707 /* Note that we only accout for CPL_GET_TCB issued by the DDP code. We 1708 * really need a cookie in order to dispatch the RPLs. 1709 */ 1710 q->get_tcb_count--; 1711 1712 /* It is a possible that a previous CPL already invalidated UBUF DDP 1713 * and moved the cur_buf idx and hence no further processing of this 1714 * skb is required. However, the app might be sleeping on 1715 * !q->get_tcb_count and we need to wake it up. 1716 */ 1717 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) { 1718 struct socket *so = toeptoso(toep); 1719 1720 m_freem(m); 1721 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 1722 sorwakeup_locked(so); 1723 else 1724 SOCKBUF_UNLOCK(&so->so_rcv); 1725 return; 1726 } 1727 1728 bsp = &q->buf_state[q->cur_buf]; 1729 hdr = cplhdr(m); 1730 tcb = (__be64 *)(hdr + 1); 1731 if (q->cur_buf == 0) { 1732 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]); 1733 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET); 1734 } else { 1735 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]); 1736 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET; 1737 } 1738 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET; 1739 m->m_cur_offset = bsp->cur_offset; 1740 bsp->cur_offset = ddp_offset; 1741 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset; 1742 1743 CTR5(KTR_TOM, 1744 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u", 1745 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset); 1746 KASSERT(ddp_offset >= m->m_cur_offset, ("ddp_offset=%u less than cur_offset=%u", 1747 ddp_offset, m->m_cur_offset)); 1748 1749#ifdef T3_TRACE 1750 T3_TRACE3(TIDTB(so), 1751 "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u ddp_offset %u", 1752 tp->rcv_nxt, q->cur_buf, ddp_offset); 1753#endif 1754 1755#if 0 1756{ 1757 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx; 1758 1759 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]); 1760 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS; 1761 1762 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]); 1763 rcv_nxt = t >> S_TCB_RCV_NXT; 1764 rcv_nxt &= M_TCB_RCV_NXT; 1765 1766 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]); 1767 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET); 1768 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET; 1769 1770 T3_TRACE2(TIDTB(sk), 1771 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x", 1772 ddp_flags, rcv_nxt - rx_hdr_offset); 1773 T3_TRACE4(TB(q), 1774 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u", 1775 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf); 1776 T3_TRACE3(TB(q), 1777 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u", 1778 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset); 1779 T3_TRACE2(TB(q), 1780 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x", 1781 q->buf_state[0].flags, q->buf_state[1].flags); 1782 1783} 1784#endif 1785 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) { 1786 handle_excess_rx(toep, m); 1787 return; 1788 } 1789 1790#ifdef T3_TRACE 1791 if ((int)m->m_pkthdr.len < 0) { 1792 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len"); 1793 } 1794#endif 1795 if (bsp->flags & DDP_BF_NOCOPY) { 1796#ifdef T3_TRACE 1797 T3_TRACE0(TB(q), 1798 "tcb_rpl_as_ddp_complete: CANCEL UBUF"); 1799 1800 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) { 1801 printk("!cancel_ubuf"); 1802 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf"); 1803 } 1804#endif 1805 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1; 1806 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA); 1807 q->cur_buf ^= 1; 1808 } else if (bsp->flags & DDP_BF_NOFLIP) { 1809 1810 m->m_ddp_flags = 1; /* always a kernel buffer */ 1811 1812 /* now HW buffer carries a user buffer */ 1813 bsp->flags &= ~DDP_BF_NOFLIP; 1814 bsp->flags |= DDP_BF_NOCOPY; 1815 1816 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate 1817 * any new data in which case we're done. If in addition the 1818 * offset is 0, then there wasn't a completion for the kbuf 1819 * and we need to decrement the posted count. 1820 */ 1821 if (m->m_pkthdr.len == 0) { 1822 if (ddp_offset == 0) { 1823 q->kbuf_posted--; 1824 bsp->flags |= DDP_BF_NODATA; 1825 } 1826 SOCKBUF_UNLOCK(&so->so_rcv); 1827 1828 m_free(m); 1829 return; 1830 } 1831 } else { 1832 SOCKBUF_UNLOCK(&so->so_rcv); 1833 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP, 1834 * but it got here way late and nobody cares anymore. 1835 */ 1836 m_free(m); 1837 return; 1838 } 1839 1840 m->m_ddp_gl = (unsigned char *)bsp->gl; 1841 m->m_flags |= M_DDP; 1842 m->m_seq = tp->rcv_nxt; 1843 tp->rcv_nxt += m->m_pkthdr.len; 1844 tp->t_rcvtime = ticks; 1845#ifdef T3_TRACE 1846 T3_TRACE3(TB(q), 1847 "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u", 1848 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1849#endif 1850 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u", 1851 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1852 if (m->m_pkthdr.len == 0) 1853 q->user_ddp_pending = 0; 1854 else 1855 SBAPPEND(&so->so_rcv, m); 1856 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 1857 sorwakeup_locked(so); 1858 else 1859 SOCKBUF_UNLOCK(&so->so_rcv); 1860} 1861 1862/* 1863 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code, 1864 * in that case they are similar to DDP completions. 1865 */ 1866static int 1867do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1868{ 1869 struct toepcb *toep = (struct toepcb *)ctx; 1870 1871 /* OK if socket doesn't exist */ 1872 if (toep == NULL) { 1873 printf("null toep in do_get_tcb_rpl\n"); 1874 return (CPL_RET_BUF_DONE); 1875 } 1876 1877 INP_LOCK(toep->tp_tp->t_inpcb); 1878 tcb_rpl_as_ddp_complete(toep, m); 1879 INP_UNLOCK(toep->tp_tp->t_inpcb); 1880 1881 return (0); 1882} 1883 1884static void 1885handle_ddp_data(struct toepcb *toep, struct mbuf *m) 1886{ 1887 struct tcpcb *tp = toep->tp_tp; 1888 struct socket *so = toeptoso(toep); 1889 struct ddp_state *q; 1890 struct ddp_buf_state *bsp; 1891 struct cpl_rx_data *hdr = cplhdr(m); 1892 unsigned int rcv_nxt = ntohl(hdr->seq); 1893 1894 if (tp->rcv_nxt == rcv_nxt) 1895 return; 1896 1897 INP_LOCK_ASSERT(tp->t_inpcb); 1898 SOCKBUF_LOCK(&so->so_rcv); 1899 q = &toep->tp_ddp_state; 1900 bsp = &q->buf_state[q->cur_buf]; 1901 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x", 1902 rcv_nxt, tp->rcv_nxt)); 1903 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 1904 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 1905 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d", 1906 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len); 1907 1908#ifdef T3_TRACE 1909 if ((int)m->m_pkthdr.len < 0) { 1910 t3_ddp_error(so, "handle_ddp_data: neg len"); 1911 } 1912#endif 1913 1914 m->m_ddp_gl = (unsigned char *)bsp->gl; 1915 m->m_flags |= M_DDP; 1916 m->m_cur_offset = bsp->cur_offset; 1917 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 1918 if (bsp->flags & DDP_BF_NOCOPY) 1919 bsp->flags &= ~DDP_BF_NOCOPY; 1920 1921 m->m_seq = tp->rcv_nxt; 1922 tp->rcv_nxt = rcv_nxt; 1923 bsp->cur_offset += m->m_pkthdr.len; 1924 if (!(bsp->flags & DDP_BF_NOFLIP)) 1925 q->cur_buf ^= 1; 1926 /* 1927 * For now, don't re-enable DDP after a connection fell out of DDP 1928 * mode. 1929 */ 1930 q->ubuf_ddp_ready = 0; 1931 SOCKBUF_UNLOCK(&so->so_rcv); 1932} 1933 1934/* 1935 * Process new data received for a connection. 1936 */ 1937static void 1938new_rx_data(struct toepcb *toep, struct mbuf *m) 1939{ 1940 struct cpl_rx_data *hdr = cplhdr(m); 1941 struct tcpcb *tp = toep->tp_tp; 1942 struct socket *so = toeptoso(toep); 1943 int len = be16toh(hdr->len); 1944 1945 INP_LOCK(tp->t_inpcb); 1946 1947 if (__predict_false(so_no_receive(so))) { 1948 handle_excess_rx(toep, m); 1949 INP_UNLOCK(tp->t_inpcb); 1950 TRACE_EXIT; 1951 return; 1952 } 1953 1954 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) 1955 handle_ddp_data(toep, m); 1956 1957 m->m_seq = ntohl(hdr->seq); 1958 m->m_ulp_mode = 0; /* for iSCSI */ 1959 1960#if VALIDATE_SEQ 1961 if (__predict_false(m->m_seq != tp->rcv_nxt)) { 1962 log(LOG_ERR, 1963 "%s: TID %u: Bad sequence number %u, expected %u\n", 1964 TOE_DEV(toeptoso(toep))->name, toep->tp_tid, m->m_seq, 1965 tp->rcv_nxt); 1966 m_freem(m); 1967 INP_UNLOCK(tp->t_inpcb); 1968 return; 1969 } 1970#endif 1971 m_adj(m, sizeof(*hdr)); 1972 1973#ifdef URGENT_DATA_SUPPORTED 1974 /* 1975 * We don't handle urgent data yet 1976 */ 1977 if (__predict_false(hdr->urg)) 1978 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg)); 1979 if (__predict_false(tp->urg_data == TCP_URG_NOTYET && 1980 tp->urg_seq - tp->rcv_nxt < skb->len)) 1981 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq - 1982 tp->rcv_nxt]; 1983#endif 1984 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) { 1985 toep->tp_delack_mode = hdr->dack_mode; 1986 toep->tp_delack_seq = tp->rcv_nxt; 1987 } 1988 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d", 1989 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes); 1990 1991 if (len < m->m_pkthdr.len) 1992 m->m_pkthdr.len = m->m_len = len; 1993 1994 tp->rcv_nxt += m->m_pkthdr.len; 1995 tp->t_rcvtime = ticks; 1996 toep->tp_enqueued_bytes += m->m_pkthdr.len; 1997#ifdef T3_TRACE 1998 T3_TRACE2(TIDTB(sk), 1999 "new_rx_data: seq 0x%x len %u", 2000 m->m_seq, m->m_pkthdr.len); 2001#endif 2002 INP_UNLOCK(tp->t_inpcb); 2003 SOCKBUF_LOCK(&so->so_rcv); 2004 if (sb_notify(&so->so_rcv)) 2005 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len); 2006 2007 SBAPPEND(&so->so_rcv, m); 2008 2009#ifdef notyet 2010 /* 2011 * We're giving too many credits to the card - but disable this check so we can keep on moving :-| 2012 * 2013 */ 2014 KASSERT(so->so_rcv.sb_cc < (so->so_rcv.sb_mbmax << 1), 2015 2016 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d", 2017 so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax)); 2018#endif 2019 2020 2021 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d", 2022 so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt); 2023 2024 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2025 sorwakeup_locked(so); 2026 else 2027 SOCKBUF_UNLOCK(&so->so_rcv); 2028} 2029 2030/* 2031 * Handler for RX_DATA CPL messages. 2032 */ 2033static int 2034do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2035{ 2036 struct toepcb *toep = (struct toepcb *)ctx; 2037 2038 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len); 2039 2040 new_rx_data(toep, m); 2041 2042 return (0); 2043} 2044 2045static void 2046new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) 2047{ 2048 struct tcpcb *tp; 2049 struct ddp_state *q; 2050 struct ddp_buf_state *bsp; 2051 struct cpl_rx_data_ddp *hdr; 2052 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx; 2053 struct socket *so = toeptoso(toep); 2054 int nomoredata = 0; 2055 2056 tp = sototcpcb(so); 2057 2058 INP_LOCK(tp->t_inpcb); 2059 if (__predict_false(so_no_receive(so))) { 2060 2061 handle_excess_rx(toep, m); 2062 INP_UNLOCK(tp->t_inpcb); 2063 return; 2064 } 2065 2066 q = &toep->tp_ddp_state; 2067 hdr = cplhdr(m); 2068 ddp_report = ntohl(hdr->u.ddp_report); 2069 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2070 bsp = &q->buf_state[buf_idx]; 2071 2072#ifdef T3_TRACE 2073 T3_TRACE5(TIDTB(sk), 2074 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2075 "hdr seq 0x%x len %u offset %u", 2076 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2077 ntohs(hdr->len), G_DDP_OFFSET(ddp_report)); 2078 T3_TRACE1(TIDTB(sk), 2079 "new_rx_data_ddp: ddp_report 0x%x", 2080 ddp_report); 2081#endif 2082 CTR4(KTR_TOM, 2083 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2084 "hdr seq 0x%x len %u", 2085 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2086 ntohs(hdr->len)); 2087 CTR3(KTR_TOM, 2088 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d", 2089 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx); 2090 2091 ddp_len = ntohs(hdr->len); 2092 rcv_nxt = ntohl(hdr->seq) + ddp_len; 2093 2094 m->m_seq = tp->rcv_nxt; 2095 tp->rcv_nxt = rcv_nxt; 2096 2097 tp->t_rcvtime = ticks; 2098 /* 2099 * Store the length in m->m_len. We are changing the meaning of 2100 * m->m_len here, we need to be very careful that nothing from now on 2101 * interprets ->len of this packet the usual way. 2102 */ 2103 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq; 2104 INP_UNLOCK(tp->t_inpcb); 2105 CTR3(KTR_TOM, 2106 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ", 2107 m->m_len, rcv_nxt, m->m_seq); 2108 /* 2109 * Figure out where the new data was placed in the buffer and store it 2110 * in when. Assumes the buffer offset starts at 0, consumer needs to 2111 * account for page pod's pg_offset. 2112 */ 2113 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; 2114 m->m_cur_offset = end_offset - m->m_pkthdr.len; 2115 2116 SOCKBUF_LOCK(&so->so_rcv); 2117 m->m_ddp_gl = (unsigned char *)bsp->gl; 2118 m->m_flags |= M_DDP; 2119 bsp->cur_offset = end_offset; 2120 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2121 2122 /* 2123 * Length is only meaningful for kbuf 2124 */ 2125 if (!(bsp->flags & DDP_BF_NOCOPY)) 2126 KASSERT(m->m_len <= bsp->gl->dgl_length, 2127 ("length received exceeds ddp pages: len=%d dgl_length=%d", 2128 m->m_len, bsp->gl->dgl_length)); 2129 2130 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2131 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next)); 2132 2133 2134 /* 2135 * Bit 0 of flags stores whether the DDP buffer is completed. 2136 * Note that other parts of the code depend on this being in bit 0. 2137 */ 2138 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) { 2139 panic("spurious ddp completion"); 2140 } else { 2141 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); 2142 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) 2143 q->cur_buf ^= 1; /* flip buffers */ 2144 } 2145 2146 if (bsp->flags & DDP_BF_NOCOPY) { 2147 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY); 2148 bsp->flags &= ~DDP_BF_NOCOPY; 2149 } 2150 2151 if (ddp_report & F_DDP_PSH) 2152 m->m_ddp_flags |= DDP_BF_PSH; 2153 if (nomoredata) 2154 m->m_ddp_flags |= DDP_BF_NODATA; 2155 2156 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2157 toep->tp_delack_mode = G_DDP_DACK_MODE(ddp_report); 2158 toep->tp_delack_seq = tp->rcv_nxt; 2159 } 2160 2161 SBAPPEND(&so->so_rcv, m); 2162 2163 if ((so->so_state & SS_NOFDREF) == 0) 2164 sorwakeup_locked(so); 2165 else 2166 SOCKBUF_UNLOCK(&so->so_rcv); 2167} 2168 2169#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ 2170 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ 2171 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ 2172 F_DDP_INVALID_PPOD) 2173 2174/* 2175 * Handler for RX_DATA_DDP CPL messages. 2176 */ 2177static int 2178do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2179{ 2180 struct toepcb *toep = ctx; 2181 const struct cpl_rx_data_ddp *hdr = cplhdr(m); 2182 2183 VALIDATE_SOCK(so); 2184 2185 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) { 2186 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n", 2187 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status))); 2188 return (CPL_RET_BUF_DONE); 2189 } 2190#if 0 2191 skb->h.th = tcphdr_skb->h.th; 2192#endif 2193 new_rx_data_ddp(toep, m); 2194 return (0); 2195} 2196 2197static void 2198process_ddp_complete(struct toepcb *toep, struct mbuf *m) 2199{ 2200 struct tcpcb *tp = toep->tp_tp; 2201 struct socket *so = toeptoso(toep); 2202 struct ddp_state *q; 2203 struct ddp_buf_state *bsp; 2204 struct cpl_rx_ddp_complete *hdr; 2205 unsigned int ddp_report, buf_idx, when; 2206 int nomoredata = 0; 2207 2208 INP_LOCK(tp->t_inpcb); 2209 if (__predict_false(so_no_receive(so))) { 2210 struct inpcb *inp = sotoinpcb(so); 2211 2212 handle_excess_rx(toep, m); 2213 INP_UNLOCK(inp); 2214 return; 2215 } 2216 q = &toep->tp_ddp_state; 2217 hdr = cplhdr(m); 2218 ddp_report = ntohl(hdr->ddp_report); 2219 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2220 m->m_pkthdr.csum_data = tp->rcv_nxt; 2221 2222 2223 SOCKBUF_LOCK(&so->so_rcv); 2224 bsp = &q->buf_state[buf_idx]; 2225 when = bsp->cur_offset; 2226 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; 2227 tp->rcv_nxt += m->m_len; 2228 tp->t_rcvtime = ticks; 2229 INP_UNLOCK(tp->t_inpcb); 2230 2231 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2232#ifdef T3_TRACE 2233 T3_TRACE5(TIDTB(sk), 2234 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2235 "ddp_report 0x%x offset %u, len %u", 2236 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2237 G_DDP_OFFSET(ddp_report), skb->len); 2238#endif 2239 CTR5(KTR_TOM, 2240 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2241 "ddp_report 0x%x offset %u, len %u", 2242 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2243 G_DDP_OFFSET(ddp_report), m->m_len); 2244 2245 bsp->cur_offset += m->m_len; 2246 2247 if (!(bsp->flags & DDP_BF_NOFLIP)) { 2248 q->cur_buf ^= 1; /* flip buffers */ 2249 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length) 2250 nomoredata=1; 2251 } 2252 2253#ifdef T3_TRACE 2254 T3_TRACE4(TIDTB(sk), 2255 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2256 "ddp_report %u offset %u", 2257 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2258 G_DDP_OFFSET(ddp_report)); 2259#endif 2260 CTR4(KTR_TOM, 2261 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2262 "ddp_report %u offset %u", 2263 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2264 G_DDP_OFFSET(ddp_report)); 2265 2266 m->m_ddp_gl = (unsigned char *)bsp->gl; 2267 m->m_flags |= M_DDP; 2268 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; 2269 if (bsp->flags & DDP_BF_NOCOPY) 2270 bsp->flags &= ~DDP_BF_NOCOPY; 2271 if (nomoredata) 2272 m->m_ddp_flags |= DDP_BF_NODATA; 2273 2274 SBAPPEND(&so->so_rcv, m); 2275 2276 if ((so->so_state & SS_NOFDREF) == 0) 2277 sorwakeup_locked(so); 2278 else 2279 SOCKBUF_UNLOCK(&so->so_rcv); 2280} 2281 2282/* 2283 * Handler for RX_DDP_COMPLETE CPL messages. 2284 */ 2285static int 2286do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2287{ 2288 struct toepcb *toep = ctx; 2289 2290 VALIDATE_SOCK(so); 2291#if 0 2292 skb->h.th = tcphdr_skb->h.th; 2293#endif 2294 process_ddp_complete(toep, m); 2295 return (0); 2296} 2297 2298/* 2299 * Move a socket to TIME_WAIT state. We need to make some adjustments to the 2300 * socket state before calling tcp_time_wait to comply with its expectations. 2301 */ 2302static void 2303enter_timewait(struct socket *so) 2304{ 2305 struct tcpcb *tp = sototcpcb(so); 2306 2307 INP_LOCK_ASSERT(tp->t_inpcb); 2308 /* 2309 * Bump rcv_nxt for the peer FIN. We don't do this at the time we 2310 * process peer_close because we don't want to carry the peer FIN in 2311 * the socket's receive queue and if we increment rcv_nxt without 2312 * having the FIN in the receive queue we'll confuse facilities such 2313 * as SIOCINQ. 2314 */ 2315 tp->rcv_nxt++; 2316 2317 tp->ts_recent_age = 0; /* defeat recycling */ 2318 tp->t_srtt = 0; /* defeat tcp_update_metrics */ 2319 tcp_twstart(tp); 2320} 2321 2322/* 2323 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This 2324 * function deals with the data that may be reported along with the FIN. 2325 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to 2326 * perform normal FIN-related processing. In the latter case 1 indicates that 2327 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the 2328 * skb can be freed. 2329 */ 2330static int 2331handle_peer_close_data(struct socket *so, struct mbuf *m) 2332{ 2333 struct tcpcb *tp = sototcpcb(so); 2334 struct toepcb *toep = tp->t_toe; 2335 struct ddp_state *q; 2336 struct ddp_buf_state *bsp; 2337 struct cpl_peer_close *req = cplhdr(m); 2338 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */ 2339 2340 if (tp->rcv_nxt == rcv_nxt) /* no data */ 2341 return (0); 2342 2343 if (__predict_false(so_no_receive(so))) { 2344 handle_excess_rx(toep, m); 2345 2346 /* 2347 * Although we discard the data we want to process the FIN so 2348 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP + 2349 * PEER_CLOSE without data. In particular this PEER_CLOSE 2350 * may be what will close the connection. We return 1 because 2351 * handle_excess_rx() already freed the packet. 2352 */ 2353 return (1); 2354 } 2355 2356 INP_LOCK_ASSERT(tp->t_inpcb); 2357 q = &toep->tp_ddp_state; 2358 SOCKBUF_LOCK(&so->so_rcv); 2359 bsp = &q->buf_state[q->cur_buf]; 2360 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2361 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2362 m->m_ddp_gl = (unsigned char *)bsp->gl; 2363 m->m_flags |= M_DDP; 2364 m->m_cur_offset = bsp->cur_offset; 2365 m->m_ddp_flags = 2366 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2367 m->m_seq = tp->rcv_nxt; 2368 tp->rcv_nxt = rcv_nxt; 2369 bsp->cur_offset += m->m_pkthdr.len; 2370 if (!(bsp->flags & DDP_BF_NOFLIP)) 2371 q->cur_buf ^= 1; 2372 tp->t_rcvtime = ticks; 2373 SBAPPEND(&so->so_rcv, m); 2374 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2375 sorwakeup_locked(so); 2376 else 2377 SOCKBUF_UNLOCK(&so->so_rcv); 2378 return (1); 2379} 2380 2381/* 2382 * Handle a peer FIN. 2383 */ 2384static void 2385do_peer_fin(struct socket *so, struct mbuf *m) 2386{ 2387 struct tcpcb *tp = sototcpcb(so); 2388 struct toepcb *toep = tp->t_toe; 2389 int keep = 0; 2390 DPRINTF("do_peer_fin state=%d\n", tp->t_state); 2391 2392#ifdef T3_TRACE 2393 T3_TRACE0(TIDTB(sk),"do_peer_fin:"); 2394#endif 2395 2396 if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2397 printf("abort_pending set\n"); 2398 2399 goto out; 2400 } 2401 INP_INFO_WLOCK(&tcbinfo); 2402 INP_LOCK(tp->t_inpcb); 2403 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) { 2404 keep = handle_peer_close_data(so, m); 2405 if (keep < 0) { 2406 INP_INFO_WUNLOCK(&tcbinfo); 2407 INP_UNLOCK(tp->t_inpcb); 2408 return; 2409 } 2410 } 2411 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 2412 socantrcvmore(so); 2413 /* 2414 * If connection is half-synchronized 2415 * (ie NEEDSYN flag on) then delay ACK, 2416 * so it may be piggybacked when SYN is sent. 2417 * Otherwise, since we received a FIN then no 2418 * more input can be expected, send ACK now. 2419 */ 2420 if (tp->t_flags & TF_NEEDSYN) 2421 tp->t_flags |= TF_DELACK; 2422 else 2423 tp->t_flags |= TF_ACKNOW; 2424 tp->rcv_nxt++; 2425 } 2426 2427 switch (tp->t_state) { 2428 case TCPS_SYN_RECEIVED: 2429 tp->t_starttime = ticks; 2430 /* FALLTHROUGH */ 2431 case TCPS_ESTABLISHED: 2432 tp->t_state = TCPS_CLOSE_WAIT; 2433 break; 2434 case TCPS_FIN_WAIT_1: 2435 tp->t_state = TCPS_CLOSING; 2436 break; 2437 case TCPS_FIN_WAIT_2: 2438 /* 2439 * If we've sent an abort_req we must have sent it too late, 2440 * HW will send us a reply telling us so, and this peer_close 2441 * is really the last message for this connection and needs to 2442 * be treated as an abort_rpl, i.e., transition the connection 2443 * to TCP_CLOSE (note that the host stack does this at the 2444 * time of generating the RST but we must wait for HW). 2445 * Otherwise we enter TIME_WAIT. 2446 */ 2447 t3_release_offload_resources(toep); 2448 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2449 tp = tcp_close(tp); 2450 } else { 2451 enter_timewait(so); 2452 } 2453 break; 2454 default: 2455 log(LOG_ERR, 2456 "%s: TID %u received PEER_CLOSE in bad state %d\n", 2457 TOE_DEV(so)->tod_name, toep->tp_tid, tp->t_state); 2458 } 2459 INP_INFO_WUNLOCK(&tcbinfo); 2460 if (tp) 2461 INP_UNLOCK(tp->t_inpcb); 2462 2463 DPRINTF("waking up waiters on %p rcv_notify=%d flags=0x%x\n", so, sb_notify(&so->so_rcv), so->so_rcv.sb_flags); 2464 2465#ifdef notyet 2466 /* Do not send POLL_HUP for half duplex close. */ 2467 if ((sk->sk_shutdown & SEND_SHUTDOWN) || 2468 sk->sk_state == TCP_CLOSE) 2469 sk_wake_async(so, 1, POLL_HUP); 2470 else 2471 sk_wake_async(so, 1, POLL_IN); 2472#endif 2473 2474out: 2475 if (!keep) 2476 m_free(m); 2477} 2478 2479/* 2480 * Handler for PEER_CLOSE CPL messages. 2481 */ 2482static int 2483do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2484{ 2485 struct toepcb *toep = (struct toepcb *)ctx; 2486 struct socket *so = toeptoso(toep); 2487 2488 VALIDATE_SOCK(so); 2489 2490 do_peer_fin(so, m); 2491 return (0); 2492} 2493 2494static void 2495process_close_con_rpl(struct socket *so, struct mbuf *m) 2496{ 2497 struct tcpcb *tp = sototcpcb(so); 2498 struct cpl_close_con_rpl *rpl = cplhdr(m); 2499 struct toepcb *toep = tp->t_toe; 2500 2501 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ 2502 2503 DPRINTF("process_close_con_rpl(%p) state=%d dead=%d\n", so, tp->t_state, 2504 !!(so->so_state & SS_NOFDREF)); 2505 if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) 2506 goto out; 2507 2508 INP_INFO_WLOCK(&tcbinfo); 2509 INP_LOCK(tp->t_inpcb); 2510 switch (tp->t_state) { 2511 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */ 2512 t3_release_offload_resources(toep); 2513 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2514 tp = tcp_close(tp); 2515 2516 } else { 2517 enter_timewait(so); 2518 soisdisconnected(so); 2519 } 2520 break; 2521 case TCPS_LAST_ACK: 2522 /* 2523 * In this state we don't care about pending abort_rpl. 2524 * If we've sent abort_req it was post-close and was sent too 2525 * late, this close_con_rpl is the actual last message. 2526 */ 2527 t3_release_offload_resources(toep); 2528 tp = tcp_close(tp); 2529 break; 2530 case TCPS_FIN_WAIT_1: 2531 /* 2532 * If we can't receive any more 2533 * data, then closing user can proceed. 2534 * Starting the timer is contrary to the 2535 * specification, but if we don't get a FIN 2536 * we'll hang forever. 2537 * 2538 * XXXjl: 2539 * we should release the tp also, and use a 2540 * compressed state. 2541 */ 2542 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { 2543 int timeout; 2544 2545 soisdisconnected(so); 2546 timeout = (tcp_fast_finwait2_recycle) ? 2547 tcp_finwait2_timeout : tcp_maxidle; 2548 tcp_timer_activate(tp, TT_2MSL, timeout); 2549 } 2550 tp->t_state = TCPS_FIN_WAIT_2; 2551 if ((so->so_options & SO_LINGER) && so->so_linger == 0 && 2552 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) { 2553 tp = tcp_drop(tp, 0); 2554 } 2555 2556 break; 2557 default: 2558 log(LOG_ERR, 2559 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", 2560 TOE_DEV(so)->tod_name, toep->tp_tid, 2561 tp->t_state); 2562 } 2563 INP_INFO_WUNLOCK(&tcbinfo); 2564 if (tp) 2565 INP_UNLOCK(tp->t_inpcb); 2566out: 2567 m_freem(m); 2568} 2569 2570/* 2571 * Handler for CLOSE_CON_RPL CPL messages. 2572 */ 2573static int 2574do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, 2575 void *ctx) 2576{ 2577 struct toepcb *toep = (struct toepcb *)ctx; 2578 struct socket *so = toeptoso(toep); 2579 2580 VALIDATE_SOCK(so); 2581 2582 process_close_con_rpl(so, m); 2583 return (0); 2584} 2585 2586/* 2587 * Process abort replies. We only process these messages if we anticipate 2588 * them as the coordination between SW and HW in this area is somewhat lacking 2589 * and sometimes we get ABORT_RPLs after we are done with the connection that 2590 * originated the ABORT_REQ. 2591 */ 2592static void 2593process_abort_rpl(struct socket *so, struct mbuf *m) 2594{ 2595 struct tcpcb *tp = sototcpcb(so); 2596 struct toepcb *toep = tp->t_toe; 2597 2598#ifdef T3_TRACE 2599 T3_TRACE1(TIDTB(sk), 2600 "process_abort_rpl: GTS rpl pending %d", 2601 sock_flag(sk, ABORT_RPL_PENDING)); 2602#endif 2603 2604 INP_INFO_WLOCK(&tcbinfo); 2605 INP_LOCK(tp->t_inpcb); 2606 2607 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2608 /* 2609 * XXX panic on tcpdrop 2610 */ 2611 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(TOE_DEV(so))) 2612 toep->tp_flags |= TP_ABORT_RPL_RCVD; 2613 else { 2614 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING); 2615 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) || 2616 !is_t3a(TOE_DEV(so))) { 2617 if (toep->tp_flags & TP_ABORT_REQ_RCVD) 2618 panic("TP_ABORT_REQ_RCVD set"); 2619 t3_release_offload_resources(toep); 2620 tp = tcp_close(tp); 2621 } 2622 } 2623 } 2624 if (tp) 2625 INP_UNLOCK(tp->t_inpcb); 2626 INP_INFO_WUNLOCK(&tcbinfo); 2627 2628 m_free(m); 2629} 2630 2631/* 2632 * Handle an ABORT_RPL_RSS CPL message. 2633 */ 2634static int 2635do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2636{ 2637 struct socket *so; 2638 struct cpl_abort_rpl_rss *rpl = cplhdr(m); 2639 struct toepcb *toep; 2640 2641 /* 2642 * Ignore replies to post-close aborts indicating that the abort was 2643 * requested too late. These connections are terminated when we get 2644 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss 2645 * arrives the TID is either no longer used or it has been recycled. 2646 */ 2647 if (rpl->status == CPL_ERR_ABORT_FAILED) { 2648discard: 2649 m_free(m); 2650 return (0); 2651 } 2652 2653 toep = (struct toepcb *)ctx; 2654 2655 /* 2656 * Sometimes we've already closed the socket, e.g., a post-close 2657 * abort races with ABORT_REQ_RSS, the latter frees the socket 2658 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, 2659 * but FW turns the ABORT_REQ into a regular one and so we get 2660 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A. 2661 */ 2662 if (!toep) 2663 goto discard; 2664 2665 if (toep->tp_tp == NULL) { 2666 printf("removing tid for abort\n"); 2667 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2668 if (toep->tp_l2t) 2669 l2t_release(L2DATA(cdev), toep->tp_l2t); 2670 2671 toepcb_release(toep); 2672 goto discard; 2673 } 2674 2675 printf("toep=%p\n", toep); 2676 printf("tp=%p\n", toep->tp_tp); 2677 2678 so = toeptoso(toep); /* <- XXX panic */ 2679 toepcb_hold(toep); 2680 process_abort_rpl(so, m); 2681 toepcb_release(toep); 2682 return (0); 2683} 2684 2685/* 2686 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also 2687 * indicate whether RST should be sent in response. 2688 */ 2689static int 2690abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst) 2691{ 2692 struct tcpcb *tp = sototcpcb(so); 2693 2694 switch (abort_reason) { 2695 case CPL_ERR_BAD_SYN: 2696#if 0 2697 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through 2698#endif 2699 case CPL_ERR_CONN_RESET: 2700 // XXX need to handle SYN_RECV due to crossed SYNs 2701 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 2702 case CPL_ERR_XMIT_TIMEDOUT: 2703 case CPL_ERR_PERSIST_TIMEDOUT: 2704 case CPL_ERR_FINWAIT2_TIMEDOUT: 2705 case CPL_ERR_KEEPALIVE_TIMEDOUT: 2706#if 0 2707 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); 2708#endif 2709 return (ETIMEDOUT); 2710 default: 2711 return (EIO); 2712 } 2713} 2714 2715static inline void 2716set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd) 2717{ 2718 struct cpl_abort_rpl *rpl = cplhdr(m); 2719 2720 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); 2721 rpl->wr.wr_lo = htonl(V_WR_TID(tid)); 2722 m->m_len = m->m_pkthdr.len = sizeof(*rpl); 2723 2724 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); 2725 rpl->cmd = cmd; 2726} 2727 2728static void 2729send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m) 2730{ 2731 struct mbuf *reply_mbuf; 2732 struct cpl_abort_req_rss *req = cplhdr(m); 2733 2734 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl)); 2735 m_set_priority(m, CPL_PRIORITY_DATA); 2736 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl); 2737 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status); 2738 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2739 m_free(m); 2740} 2741 2742/* 2743 * Returns whether an ABORT_REQ_RSS message is a negative advice. 2744 */ 2745static inline int 2746is_neg_adv_abort(unsigned int status) 2747{ 2748 return status == CPL_ERR_RTX_NEG_ADVICE || 2749 status == CPL_ERR_PERSIST_NEG_ADVICE; 2750} 2751 2752static void 2753send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status) 2754{ 2755 struct mbuf *reply_mbuf; 2756 struct cpl_abort_req_rss *req = cplhdr(m); 2757 2758 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 2759 2760 if (!reply_mbuf) { 2761 /* Defer the reply. Stick rst_status into req->cmd. */ 2762 req->status = rst_status; 2763 t3_defer_reply(m, tdev, send_deferred_abort_rpl); 2764 return; 2765 } 2766 2767 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA); 2768 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status); 2769 m_free(m); 2770 2771 /* 2772 * XXX need to sync with ARP as for SYN_RECV connections we can send 2773 * these messages while ARP is pending. For other connection states 2774 * it's not a problem. 2775 */ 2776 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2777} 2778 2779#ifdef notyet 2780static void 2781cleanup_syn_rcv_conn(struct socket *child, struct socket *parent) 2782{ 2783 UNIMPLEMENTED(); 2784#ifdef notyet 2785 struct request_sock *req = child->sk_user_data; 2786 2787 inet_csk_reqsk_queue_removed(parent, req); 2788 synq_remove(tcp_sk(child)); 2789 __reqsk_free(req); 2790 child->sk_user_data = NULL; 2791#endif 2792} 2793 2794 2795/* 2796 * Performs the actual work to abort a SYN_RECV connection. 2797 */ 2798static void 2799do_abort_syn_rcv(struct socket *child, struct socket *parent) 2800{ 2801 struct tcpcb *parenttp = sototcpcb(parent); 2802 struct tcpcb *childtp = sototcpcb(child); 2803 2804 /* 2805 * If the server is still open we clean up the child connection, 2806 * otherwise the server already did the clean up as it was purging 2807 * its SYN queue and the skb was just sitting in its backlog. 2808 */ 2809 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { 2810 cleanup_syn_rcv_conn(child, parent); 2811 INP_INFO_WLOCK(&tcbinfo); 2812 INP_LOCK(childtp->t_inpcb); 2813 t3_release_offload_resources(childtp->t_toe); 2814 childtp = tcp_close(childtp); 2815 INP_INFO_WUNLOCK(&tcbinfo); 2816 if (childtp) 2817 INP_UNLOCK(childtp->t_inpcb); 2818 } 2819} 2820#endif 2821 2822/* 2823 * Handle abort requests for a SYN_RECV connection. These need extra work 2824 * because the socket is on its parent's SYN queue. 2825 */ 2826static int 2827abort_syn_rcv(struct socket *so, struct mbuf *m) 2828{ 2829 UNIMPLEMENTED(); 2830#ifdef notyet 2831 struct socket *parent; 2832 struct toedev *tdev = TOE_DEV(so); 2833 struct t3cdev *cdev = TOM_DATA(tdev)->cdev; 2834 struct socket *oreq = so->so_incomp; 2835 struct t3c_tid_entry *t3c_stid; 2836 struct tid_info *t; 2837 2838 if (!oreq) 2839 return -1; /* somehow we are not on the SYN queue */ 2840 2841 t = &(T3C_DATA(cdev))->tid_maps; 2842 t3c_stid = lookup_stid(t, oreq->ts_recent); 2843 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 2844 2845 SOCK_LOCK(parent); 2846 do_abort_syn_rcv(so, parent); 2847 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST); 2848 SOCK_UNLOCK(parent); 2849#endif 2850 return (0); 2851} 2852 2853/* 2854 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this 2855 * request except that we need to reply to it. 2856 */ 2857static void 2858process_abort_req(struct socket *so, struct mbuf *m, struct toedev *tdev) 2859{ 2860 int rst_status = CPL_ABORT_NO_RST; 2861 const struct cpl_abort_req_rss *req = cplhdr(m); 2862 struct tcpcb *tp = sototcpcb(so); 2863 struct toepcb *toep = tp->t_toe; 2864 2865 INP_LOCK(tp->t_inpcb); 2866 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) { 2867 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN); 2868 m_free(m); 2869 goto skip; 2870 } 2871 2872 toep->tp_flags &= ~TP_ABORT_REQ_RCVD; 2873 /* 2874 * Three cases to consider: 2875 * a) We haven't sent an abort_req; close the connection. 2876 * b) We have sent a post-close abort_req that will get to TP too late 2877 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will 2878 * be ignored and the connection should be closed now. 2879 * c) We have sent a regular abort_req that will get to TP too late. 2880 * That will generate an abort_rpl with status 0, wait for it. 2881 */ 2882 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) || 2883 (is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) { 2884 so->so_error = abort_status_to_errno(so, req->status, 2885 &rst_status); 2886 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2887 sorwakeup(so); 2888 /* 2889 * SYN_RECV needs special processing. If abort_syn_rcv() 2890 * returns 0 is has taken care of the abort. 2891 */ 2892 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m)) 2893 goto skip; 2894 2895 t3_release_offload_resources(toep); 2896 tp = tcp_close(tp); 2897 } 2898 if (tp) 2899 INP_UNLOCK(tp->t_inpcb); 2900 send_abort_rpl(m, tdev, rst_status); 2901 return; 2902 2903skip: 2904 INP_UNLOCK(tp->t_inpcb); 2905} 2906 2907/* 2908 * Handle an ABORT_REQ_RSS CPL message. 2909 */ 2910static int 2911do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2912{ 2913 const struct cpl_abort_req_rss *req = cplhdr(m); 2914 struct toepcb *toep = (struct toepcb *)ctx; 2915 struct socket *so; 2916 struct inpcb *inp; 2917 2918 if (is_neg_adv_abort(req->status)) { 2919 m_free(m); 2920 return (0); 2921 } 2922 2923 printf("aborting tid=%d\n", toep->tp_tid); 2924 2925 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) { 2926 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2927 toep->tp_flags |= TP_ABORT_REQ_RCVD; 2928 printf("sending abort rpl\n"); 2929 2930 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST); 2931 printf("sent\n"); 2932 if (toep->tp_l2t) 2933 l2t_release(L2DATA(cdev), toep->tp_l2t); 2934 2935 /* 2936 * Unhook 2937 */ 2938 toep->tp_tp->t_toe = NULL; 2939 toep->tp_tp->t_flags &= ~TF_TOE; 2940 toep->tp_tp = NULL; 2941 /* 2942 * XXX need to call syncache_chkrst - but we don't 2943 * have a way of doing that yet 2944 */ 2945 toepcb_release(toep); 2946 printf("abort for unestablished connection :-(\n"); 2947 return (0); 2948 } 2949 if (toep->tp_tp == NULL) { 2950 printf("disconnected toepcb\n"); 2951 /* should be freed momentarily */ 2952 return (0); 2953 } 2954 2955 so = toeptoso(toep); 2956 inp = sotoinpcb(so); 2957 2958 VALIDATE_SOCK(so); 2959 toepcb_hold(toep); 2960 INP_INFO_WLOCK(&tcbinfo); 2961 process_abort_req(so, m, TOE_DEV(so)); 2962 INP_INFO_WUNLOCK(&tcbinfo); 2963 toepcb_release(toep); 2964 return (0); 2965} 2966#ifdef notyet 2967static void 2968pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m) 2969{ 2970 struct toedev *tdev = TOE_DEV(parent); 2971 2972 do_abort_syn_rcv(child, parent); 2973 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) { 2974 struct cpl_pass_accept_rpl *rpl = cplhdr(m); 2975 2976 rpl->opt0h = htonl(F_TCAM_BYPASS); 2977 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 2978 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 2979 } else 2980 m_free(m); 2981} 2982#endif 2983static void 2984handle_pass_open_arp_failure(struct socket *so, struct mbuf *m) 2985{ 2986 UNIMPLEMENTED(); 2987 2988#ifdef notyet 2989 struct t3cdev *cdev; 2990 struct socket *parent; 2991 struct socket *oreq; 2992 struct t3c_tid_entry *t3c_stid; 2993 struct tid_info *t; 2994 struct tcpcb *otp, *tp = sototcpcb(so); 2995 struct toepcb *toep = tp->t_toe; 2996 2997 /* 2998 * If the connection is being aborted due to the parent listening 2999 * socket going away there's nothing to do, the ABORT_REQ will close 3000 * the connection. 3001 */ 3002 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 3003 m_free(m); 3004 return; 3005 } 3006 3007 oreq = so->so_incomp; 3008 otp = sototcpcb(oreq); 3009 3010 cdev = T3C_DEV(so); 3011 t = &(T3C_DATA(cdev))->tid_maps; 3012 t3c_stid = lookup_stid(t, otp->ts_recent); 3013 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 3014 3015 SOCK_LOCK(parent); 3016 pass_open_abort(so, parent, m); 3017 SOCK_UNLOCK(parent); 3018#endif 3019} 3020 3021/* 3022 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly 3023 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV 3024 * connection. 3025 */ 3026static void 3027pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m) 3028{ 3029 3030#ifdef notyet 3031 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3032 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk); 3033#endif 3034 handle_pass_open_arp_failure(m_get_socket(m), m); 3035} 3036 3037/* 3038 * Populate a reject CPL_PASS_ACCEPT_RPL WR. 3039 */ 3040static void 3041mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf) 3042{ 3043 struct cpl_pass_accept_req *req = cplhdr(req_mbuf); 3044 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf); 3045 unsigned int tid = GET_TID(req); 3046 3047 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP); 3048 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3049 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3050 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet 3051 rpl->opt0h = htonl(F_TCAM_BYPASS); 3052 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3053 rpl->opt2 = 0; 3054 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3055} 3056 3057/* 3058 * Send a deferred reject to an accept request. 3059 */ 3060static void 3061reject_pass_request(struct toedev *tdev, struct mbuf *m) 3062{ 3063 struct mbuf *reply_mbuf; 3064 3065 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl)); 3066 mk_pass_accept_rpl(reply_mbuf, m); 3067 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 3068 m_free(m); 3069} 3070 3071static void 3072handle_syncache_event(int event, void *arg) 3073{ 3074 struct toepcb *toep = arg; 3075 3076 switch (event) { 3077 case TOE_SC_ENTRY_PRESENT: 3078 /* 3079 * entry already exists - free toepcb 3080 * and l2t 3081 */ 3082 printf("syncache entry present\n"); 3083 toepcb_release(toep); 3084 break; 3085 case TOE_SC_DROP: 3086 /* 3087 * The syncache has given up on this entry 3088 * either it timed out, or it was evicted 3089 * we need to explicitly release the tid 3090 */ 3091 printf("syncache entry dropped\n"); 3092 toepcb_release(toep); 3093 break; 3094 default: 3095 log(LOG_ERR, "unknown syncache event %d\n", event); 3096 break; 3097 } 3098} 3099 3100static void 3101syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep) 3102{ 3103 struct in_conninfo inc; 3104 struct tcpopt to; 3105 struct tcphdr th; 3106 struct inpcb *inp; 3107 int mss, wsf, sack, ts; 3108 uint32_t rcv_isn = ntohl(req->rcv_isn); 3109 3110 bzero(&to, sizeof(struct tcpopt)); 3111 inp = sotoinpcb(lso); 3112 3113 /* 3114 * Fill out information for entering us into the syncache 3115 */ 3116 inc.inc_fport = th.th_sport = req->peer_port; 3117 inc.inc_lport = th.th_dport = req->local_port; 3118 th.th_seq = req->rcv_isn; 3119 th.th_flags = TH_SYN; 3120 3121 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1; 3122 3123 3124 inc.inc_isipv6 = 0; 3125 inc.inc_len = 0; 3126 inc.inc_faddr.s_addr = req->peer_ip; 3127 inc.inc_laddr.s_addr = req->local_ip; 3128 3129 DPRINTF("syncache add of %d:%d %d:%d\n", 3130 ntohl(req->local_ip), ntohs(req->local_port), 3131 ntohl(req->peer_ip), ntohs(req->peer_port)); 3132 3133 mss = req->tcp_options.mss; 3134 wsf = req->tcp_options.wsf; 3135 ts = req->tcp_options.tstamp; 3136 sack = req->tcp_options.sack; 3137 to.to_mss = mss; 3138 to.to_wscale = wsf; 3139 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3140 INP_INFO_WLOCK(&tcbinfo); 3141 INP_LOCK(inp); 3142 syncache_offload_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep); 3143} 3144 3145 3146/* 3147 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket 3148 * lock held. Note that the sock here is a listening socket that is not owned 3149 * by the TOE. 3150 */ 3151static void 3152process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev, 3153 struct listen_ctx *lctx) 3154{ 3155 int rt_flags; 3156 struct l2t_entry *e; 3157 struct iff_mac tim; 3158 struct mbuf *reply_mbuf, *ddp_mbuf = NULL; 3159 struct cpl_pass_accept_rpl *rpl; 3160 struct cpl_pass_accept_req *req = cplhdr(m); 3161 unsigned int tid = GET_TID(req); 3162 struct tom_data *d = TOM_DATA(tdev); 3163 struct t3cdev *cdev = d->cdev; 3164 struct tcpcb *tp = sototcpcb(so); 3165 struct toepcb *newtoep; 3166 struct rtentry *dst; 3167 struct sockaddr_in nam; 3168 struct t3c_data *td = T3C_DATA(cdev); 3169 3170 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3171 if (__predict_false(reply_mbuf == NULL)) { 3172 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3173 t3_defer_reply(m, tdev, reject_pass_request); 3174 else { 3175 cxgb_queue_tid_release(cdev, tid); 3176 m_free(m); 3177 } 3178 DPRINTF("failed to get reply_mbuf\n"); 3179 3180 goto out; 3181 } 3182 3183 if (tp->t_state != TCPS_LISTEN) { 3184 DPRINTF("socket not in listen state\n"); 3185 3186 goto reject; 3187 } 3188 3189 tim.mac_addr = req->dst_mac; 3190 tim.vlan_tag = ntohs(req->vlan_tag); 3191 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { 3192 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n"); 3193 goto reject; 3194 } 3195 3196#ifdef notyet 3197 /* 3198 * XXX do route lookup to confirm that we're still listening on this 3199 * address 3200 */ 3201 if (ip_route_input(skb, req->local_ip, req->peer_ip, 3202 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev)) 3203 goto reject; 3204 rt_flags = ((struct rtable *)skb->dst)->rt_flags & 3205 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); 3206 dst_release(skb->dst); // done with the input route, release it 3207 skb->dst = NULL; 3208 3209 if ((rt_flags & RTF_LOCAL) == 0) 3210 goto reject; 3211#endif 3212 /* 3213 * XXX 3214 */ 3215 rt_flags = RTF_LOCAL; 3216 if ((rt_flags & RTF_LOCAL) == 0) 3217 goto reject; 3218 3219 /* 3220 * Calculate values and add to syncache 3221 */ 3222 3223 newtoep = toepcb_alloc(); 3224 if (newtoep == NULL) 3225 goto reject; 3226 3227 bzero(&nam, sizeof(struct sockaddr_in)); 3228 3229 nam.sin_len = sizeof(struct sockaddr_in); 3230 nam.sin_family = AF_INET; 3231 nam.sin_addr.s_addr =req->peer_ip; 3232 dst = rtalloc2((struct sockaddr *)&nam, 1, 0); 3233 3234 if (dst == NULL) { 3235 printf("failed to find route\n"); 3236 goto reject; 3237 } 3238 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev, 3239 (struct sockaddr *)&nam); 3240 if (e == NULL) { 3241 DPRINTF("failed to get l2t\n"); 3242 } 3243 /* 3244 * Point to our listen socket until accept 3245 */ 3246 newtoep->tp_tp = tp; 3247 newtoep->tp_flags = TP_SYN_RCVD; 3248 newtoep->tp_tid = tid; 3249 newtoep->tp_toedev = tdev; 3250 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3251 3252 cxgb_insert_tid(cdev, d->client, newtoep, tid); 3253 SOCK_LOCK(so); 3254 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); 3255 SOCK_UNLOCK(so); 3256 3257 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so->so_options & SO_NO_DDP) && 3258 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 3259 3260 if (newtoep->tp_ulp_mode) { 3261 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3262 3263 if (ddp_mbuf == NULL) 3264 newtoep->tp_ulp_mode = 0; 3265 } 3266 3267 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d", 3268 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode); 3269 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); 3270 /* 3271 * XXX workaround for lack of syncache drop 3272 */ 3273 toepcb_hold(newtoep); 3274 syncache_add_accept_req(req, so, newtoep); 3275 3276 rpl = cplhdr(reply_mbuf); 3277 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl); 3278 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3279 rpl->wr.wr_lo = 0; 3280 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3281 rpl->opt2 = htonl(calc_opt2(so, tdev)); 3282 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3283 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten 3284 3285 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | 3286 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); 3287 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) | 3288 CPL_PASS_OPEN_ACCEPT); 3289 3290 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status); 3291 3292 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep)); 3293 3294 l2t_send(cdev, reply_mbuf, e); 3295 m_free(m); 3296 if (newtoep->tp_ulp_mode) { 3297 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS, 3298 V_TF_DDP_OFF(1) | 3299 TP_DDP_TIMER_WORKAROUND_MASK, 3300 V_TF_DDP_OFF(1) | 3301 TP_DDP_TIMER_WORKAROUND_VAL, 1); 3302 } else 3303 printf("not offloading\n"); 3304 3305 3306 3307 return; 3308reject: 3309 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3310 mk_pass_accept_rpl(reply_mbuf, m); 3311 else 3312 mk_tid_release(reply_mbuf, newtoep, tid); 3313 cxgb_ofld_send(cdev, reply_mbuf); 3314 m_free(m); 3315out: 3316#if 0 3317 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3318#else 3319 return; 3320#endif 3321} 3322 3323/* 3324 * Handle a CPL_PASS_ACCEPT_REQ message. 3325 */ 3326static int 3327do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3328{ 3329 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx; 3330 struct socket *lso = listen_ctx->lso; 3331 struct tom_data *d = listen_ctx->tom_data; 3332 3333#if VALIDATE_TID 3334 struct cpl_pass_accept_req *req = cplhdr(m); 3335 unsigned int tid = GET_TID(req); 3336 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps; 3337 3338 if (unlikely(!lsk)) { 3339 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n", 3340 cdev->name, 3341 (unsigned long)((union listen_entry *)ctx - 3342 t->stid_tab)); 3343 return CPL_RET_BUF_DONE; 3344 } 3345 if (unlikely(tid >= t->ntids)) { 3346 printk(KERN_ERR "%s: passive open TID %u too large\n", 3347 cdev->name, tid); 3348 return CPL_RET_BUF_DONE; 3349 } 3350 /* 3351 * For T3A the current user of the TID may have closed but its last 3352 * message(s) may have been backlogged so the TID appears to be still 3353 * in use. Just take the TID away, the connection can close at its 3354 * own leisure. For T3B this situation is a bug. 3355 */ 3356 if (!valid_new_tid(t, tid) && 3357 cdev->type != T3A) { 3358 printk(KERN_ERR "%s: passive open uses existing TID %u\n", 3359 cdev->name, tid); 3360 return CPL_RET_BUF_DONE; 3361 } 3362#endif 3363 3364 process_pass_accept_req(lso, m, &d->tdev, listen_ctx); 3365 return (0); 3366} 3367 3368/* 3369 * Called when a connection is established to translate the TCP options 3370 * reported by HW to FreeBSD's native format. 3371 */ 3372static void 3373assign_rxopt(struct socket *so, unsigned int opt) 3374{ 3375 const struct t3c_data *td = T3C_DATA(T3C_DEV(so)); 3376 struct tcpcb *tp = sototcpcb(so); 3377 struct toepcb *toep = tp->t_toe; 3378 3379 INP_LOCK_ASSERT(tp->t_inpcb); 3380 3381 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3382 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0; 3383 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0; 3384 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0; 3385 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 3386 (TF_RCVD_SCALE|TF_REQ_SCALE)) 3387 tp->rcv_scale = tp->request_r_scale; 3388} 3389 3390/* 3391 * Completes some final bits of initialization for just established connections 3392 * and changes their state to TCP_ESTABLISHED. 3393 * 3394 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1. 3395 */ 3396static void 3397make_established(struct socket *so, u32 snd_isn, unsigned int opt) 3398{ 3399 struct tcpcb *tp = sototcpcb(so); 3400 struct toepcb *toep = tp->t_toe; 3401 3402 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn; 3403 assign_rxopt(so, opt); 3404 so->so_proto->pr_ctloutput = t3_ctloutput; 3405 3406#if 0 3407 inet_sk(sk)->id = tp->write_seq ^ jiffies; 3408#endif 3409 /* 3410 * XXX not clear what rcv_wup maps to 3411 */ 3412 /* 3413 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't 3414 * pass through opt0. 3415 */ 3416 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10)) 3417 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10); 3418 3419 dump_toepcb(toep); 3420 3421#ifdef notyet 3422/* 3423 * no clean interface for marking ARP up to date 3424 */ 3425 dst_confirm(sk->sk_dst_cache); 3426#endif 3427 tp->t_starttime = ticks; 3428 tp->t_state = TCPS_ESTABLISHED; 3429 soisconnected(so); 3430} 3431 3432static int 3433syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep) 3434{ 3435 3436 struct in_conninfo inc; 3437 struct tcpopt to; 3438 struct tcphdr th; 3439 int mss, wsf, sack, ts; 3440 struct mbuf *m = NULL; 3441 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev); 3442 unsigned int opt; 3443 3444#ifdef MAC 3445#error "no MAC support" 3446#endif 3447 3448 opt = ntohs(req->tcp_opt); 3449 3450 bzero(&to, sizeof(struct tcpopt)); 3451 3452 /* 3453 * Fill out information for entering us into the syncache 3454 */ 3455 inc.inc_fport = th.th_sport = req->peer_port; 3456 inc.inc_lport = th.th_dport = req->local_port; 3457 th.th_seq = req->rcv_isn; 3458 th.th_flags = TH_ACK; 3459 3460 inc.inc_isipv6 = 0; 3461 inc.inc_len = 0; 3462 inc.inc_faddr.s_addr = req->peer_ip; 3463 inc.inc_laddr.s_addr = req->local_ip; 3464 3465 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3466 wsf = G_TCPOPT_WSCALE_OK(opt); 3467 ts = G_TCPOPT_TSTAMP(opt); 3468 sack = G_TCPOPT_SACK(opt); 3469 3470 to.to_mss = mss; 3471 to.to_wscale = G_TCPOPT_SND_WSCALE(opt); 3472 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3473 3474 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n", 3475 ntohl(req->local_ip), ntohs(req->local_port), 3476 ntohl(req->peer_ip), ntohs(req->peer_port), 3477 mss, wsf, ts, sack); 3478 return syncache_expand(&inc, &to, &th, so, m); 3479} 3480 3481 3482/* 3483 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work 3484 * if we are in TCP_SYN_RECV due to crossed SYNs 3485 */ 3486static int 3487do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3488{ 3489 struct cpl_pass_establish *req = cplhdr(m); 3490 struct toepcb *toep = (struct toepcb *)ctx; 3491 struct tcpcb *tp; 3492 struct socket *so, *lso; 3493 struct t3c_data *td = T3C_DATA(cdev); 3494 // Complete socket initialization now that we have the SND_ISN 3495 3496 struct toedev *tdev; 3497 3498 so = lso = toeptoso(toep); 3499 tdev = toep->tp_toedev; 3500 3501 SOCK_LOCK(so); 3502 LIST_REMOVE(toep, synq_entry); 3503 SOCK_UNLOCK(so); 3504 3505 INP_INFO_WLOCK(&tcbinfo); 3506 if (!syncache_expand_establish_req(req, &so, toep)) { 3507 /* 3508 * No entry 3509 */ 3510 UNIMPLEMENTED(); 3511 } 3512 if (so == NULL) { 3513 /* 3514 * Couldn't create the socket 3515 */ 3516 UNIMPLEMENTED(); 3517 } 3518 3519 /* 3520 * XXX workaround for lack of syncache drop 3521 */ 3522 toepcb_release(toep); 3523 3524 tp = sototcpcb(so); 3525 INP_LOCK(tp->t_inpcb); 3526 3527 so->so_snd.sb_flags |= SB_NOCOALESCE; 3528 so->so_rcv.sb_flags |= SB_NOCOALESCE; 3529 3530 toep->tp_tp = tp; 3531 toep->tp_flags = 0; 3532 tp->t_toe = toep; 3533 reset_wr_list(toep); 3534 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3535 tp->rcv_nxt = toep->tp_copied_seq; 3536 install_offload_ops(so); 3537 3538 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs); 3539 toep->tp_wr_unacked = 0; 3540 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3541 toep->tp_qset_idx = 0; 3542 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu); 3543 3544 /* 3545 * XXX Cancel any keep alive timer 3546 */ 3547 3548 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3549 INP_INFO_WUNLOCK(&tcbinfo); 3550 INP_UNLOCK(tp->t_inpcb); 3551 3552 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid); 3553 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3554#ifdef notyet 3555 /* 3556 * XXX not sure how these checks map to us 3557 */ 3558 if (unlikely(sk->sk_socket)) { // simultaneous opens only 3559 sk->sk_state_change(sk); 3560 sk_wake_async(so, 0, POLL_OUT); 3561 } 3562 /* 3563 * The state for the new connection is now up to date. 3564 * Next check if we should add the connection to the parent's 3565 * accept queue. When the parent closes it resets connections 3566 * on its SYN queue, so check if we are being reset. If so we 3567 * don't need to do anything more, the coming ABORT_RPL will 3568 * destroy this socket. Otherwise move the connection to the 3569 * accept queue. 3570 * 3571 * Note that we reset the synq before closing the server so if 3572 * we are not being reset the stid is still open. 3573 */ 3574 if (unlikely(!tp->forward_skb_hint)) { // removed from synq 3575 __kfree_skb(skb); 3576 goto unlock; 3577 } 3578#endif 3579 m_free(m); 3580 3581 return (0); 3582} 3583 3584/* 3585 * Fill in the right TID for CPL messages waiting in the out-of-order queue 3586 * and send them to the TOE. 3587 */ 3588static void 3589fixup_and_send_ofo(struct socket *so) 3590{ 3591 struct mbuf *m; 3592 struct toedev *tdev = TOE_DEV(so); 3593 struct tcpcb *tp = sototcpcb(so); 3594 struct toepcb *toep = tp->t_toe; 3595 unsigned int tid = toep->tp_tid; 3596 3597 printf("fixup_and_send_ofo\n"); 3598 3599 INP_LOCK_ASSERT(tp->t_inpcb); 3600 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { 3601 /* 3602 * A variety of messages can be waiting but the fields we'll 3603 * be touching are common to all so any message type will do. 3604 */ 3605 struct cpl_close_con_req *p = cplhdr(m); 3606 3607 p->wr.wr_lo = htonl(V_WR_TID(tid)); 3608 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid)); 3609 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3610 } 3611} 3612 3613/* 3614 * Updates socket state from an active establish CPL message. Runs with the 3615 * socket lock held. 3616 */ 3617static void 3618socket_act_establish(struct socket *so, struct mbuf *m) 3619{ 3620 struct cpl_act_establish *req = cplhdr(m); 3621 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ 3622 struct tcpcb *tp = sototcpcb(so); 3623 struct toepcb *toep = tp->t_toe; 3624 3625 if (__predict_false(tp->t_state != TCPS_SYN_SENT)) 3626 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n", 3627 toep->tp_tid, tp->t_state); 3628 3629 tp->ts_recent_age = ticks; 3630 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn; 3631 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs; 3632 3633 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3634 3635 /* 3636 * Now that we finally have a TID send any CPL messages that we had to 3637 * defer for lack of a TID. 3638 */ 3639 if (mbufq_len(&toep->out_of_order_queue)) 3640 fixup_and_send_ofo(so); 3641 3642 if (__predict_false(so->so_state & SS_NOFDREF)) { 3643 /* 3644 * XXX does this even make sense? 3645 */ 3646 sorwakeup(so); 3647 } 3648 m_free(m); 3649#ifdef notyet 3650/* 3651 * XXX assume no write requests permitted while socket connection is 3652 * incomplete 3653 */ 3654 /* 3655 * Currently the send queue must be empty at this point because the 3656 * socket layer does not send anything before a connection is 3657 * established. To be future proof though we handle the possibility 3658 * that there are pending buffers to send (either TX_DATA or 3659 * CLOSE_CON_REQ). First we need to adjust the sequence number of the 3660 * buffers according to the just learned write_seq, and then we send 3661 * them on their way. 3662 */ 3663 fixup_pending_writeq_buffers(sk); 3664 if (t3_push_frames(so, 1)) 3665 sk->sk_write_space(sk); 3666#endif 3667 3668 toep->tp_state = tp->t_state; 3669 tcpstat.tcps_connects++; 3670 3671} 3672 3673/* 3674 * Process a CPL_ACT_ESTABLISH message. 3675 */ 3676static int 3677do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3678{ 3679 struct cpl_act_establish *req = cplhdr(m); 3680 unsigned int tid = GET_TID(req); 3681 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); 3682 struct toepcb *toep = (struct toepcb *)ctx; 3683 struct tcpcb *tp = toep->tp_tp; 3684 struct socket *so; 3685 struct toedev *tdev; 3686 struct tom_data *d; 3687 3688 if (tp == NULL) { 3689 free_atid(cdev, atid); 3690 return (0); 3691 } 3692 3693 so = toeptoso(toep); 3694 tdev = TOE_DEV(so); /* blow up here if link was down */ 3695 d = TOM_DATA(tdev); 3696 3697 INP_LOCK(tp->t_inpcb); 3698 3699 /* 3700 * It's OK if the TID is currently in use, the owning socket may have 3701 * backlogged its last CPL message(s). Just take it away. 3702 */ 3703 toep->tp_tid = tid; 3704 toep->tp_tp = tp; 3705 so_insert_tid(d, so, tid); 3706 free_atid(cdev, atid); 3707 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3708 3709 socket_act_establish(so, m); 3710 INP_UNLOCK(tp->t_inpcb); 3711 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid); 3712 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3713 3714 return (0); 3715} 3716 3717/* 3718 * Process an acknowledgment of WR completion. Advance snd_una and send the 3719 * next batch of work requests from the write queue. 3720 */ 3721static void 3722wr_ack(struct toepcb *toep, struct mbuf *m) 3723{ 3724 struct tcpcb *tp = toep->tp_tp; 3725 struct cpl_wr_ack *hdr = cplhdr(m); 3726 struct socket *so = toeptoso(toep); 3727 unsigned int credits = ntohs(hdr->credits); 3728 u32 snd_una = ntohl(hdr->snd_una); 3729 int bytes = 0; 3730 3731 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits); 3732 3733 INP_LOCK(tp->t_inpcb); 3734 3735 toep->tp_wr_avail += credits; 3736 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) 3737 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; 3738 3739 while (credits) { 3740 struct mbuf *p = peek_wr(toep); 3741 3742 if (__predict_false(!p)) { 3743 log(LOG_ERR, "%u WR_ACK credits for TID %u with " 3744 "nothing pending, state %u wr_avail=%u\n", 3745 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail); 3746 break; 3747 } 3748 CTR2(KTR_TOM, 3749 "wr_ack: p->credits=%d p->bytes=%d", p->m_pkthdr.csum_data, p->m_pkthdr.len); 3750 3751 KASSERT(p->m_pkthdr.csum_data != 0, ("empty request still on list")); 3752 if (__predict_false(credits < p->m_pkthdr.csum_data)) { 3753 3754#if DEBUG_WR > 1 3755 struct tx_data_wr *w = cplhdr(p); 3756 log(LOG_ERR, 3757 "TID %u got %u WR credits, need %u, len %u, " 3758 "main body %u, frags %u, seq # %u, ACK una %u," 3759 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n", 3760 toep->tp_tid, credits, p->csum, p->len, 3761 p->len - p->data_len, skb_shinfo(p)->nr_frags, 3762 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt), 3763 toep->tp_wr_avail, count_pending_wrs(tp) - credits); 3764#endif 3765 p->m_pkthdr.csum_data -= credits; 3766 break; 3767 } else { 3768 dequeue_wr(toep); 3769 credits -= p->m_pkthdr.csum_data; 3770 bytes += p->m_pkthdr.len; 3771 CTR3(KTR_TOM, 3772 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d", 3773 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data); 3774 3775 m_free(p); 3776 } 3777 } 3778 3779#if DEBUG_WR 3780 check_wr_invariants(tp); 3781#endif 3782 3783 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 3784#if VALIDATE_SEQ 3785 struct tom_data *d = TOM_DATA(TOE_DEV(so)); 3786 3787 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK " 3788 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una, 3789 toep->tp_tid, tp->snd_una); 3790#endif 3791 goto out_free; 3792 } 3793 3794 if (tp->snd_una != snd_una) { 3795 tp->snd_una = snd_una; 3796 tp->ts_recent_age = ticks; 3797#ifdef notyet 3798 /* 3799 * Keep ARP entry "minty fresh" 3800 */ 3801 dst_confirm(sk->sk_dst_cache); 3802#endif 3803 if (tp->snd_una == tp->snd_nxt) 3804 toep->tp_flags &= ~TP_TX_WAIT_IDLE; 3805 } 3806 if (bytes) { 3807 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes); 3808 SOCKBUF_LOCK(&so->so_snd); 3809 sbdrop_locked(&so->so_snd, bytes); 3810 sowwakeup_locked(so); 3811 } 3812 3813 if (so->so_snd.sb_sndptroff < so->so_snd.sb_cc) 3814 t3_push_frames(so, 0); 3815 3816out_free: 3817 INP_UNLOCK(tp->t_inpcb); 3818 m_free(m); 3819} 3820 3821/* 3822 * Handler for TX_DATA_ACK CPL messages. 3823 */ 3824static int 3825do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx) 3826{ 3827 struct toepcb *toep = (struct toepcb *)ctx; 3828 3829 VALIDATE_SOCK(so); 3830 3831 wr_ack(toep, m); 3832 return 0; 3833} 3834 3835/* 3836 * Handler for TRACE_PKT CPL messages. Just sink these packets. 3837 */ 3838static int 3839do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx) 3840{ 3841 m_freem(m); 3842 return 0; 3843} 3844 3845/* 3846 * Reset a connection that is on a listener's SYN queue or accept queue, 3847 * i.e., one that has not had a struct socket associated with it. 3848 * Must be called from process context. 3849 * 3850 * Modeled after code in inet_csk_listen_stop(). 3851 */ 3852static void 3853t3_reset_listen_child(struct socket *child) 3854{ 3855 struct tcpcb *tp = sototcpcb(child); 3856 3857 t3_send_reset(tp->t_toe); 3858} 3859 3860/* 3861 * Disconnect offloaded established but not yet accepted connections sitting 3862 * on a server's accept_queue. We just send an ABORT_REQ at this point and 3863 * finish off the disconnect later as we may need to wait for the ABORT_RPL. 3864 */ 3865void 3866t3_disconnect_acceptq(struct socket *listen_so) 3867{ 3868 struct socket *so; 3869 struct tcpcb *tp; 3870 3871 TAILQ_FOREACH(so, &listen_so->so_comp, so_list) { 3872 tp = sototcpcb(so); 3873 3874 if (tp->t_flags & TF_TOE) { 3875 INP_LOCK(tp->t_inpcb); 3876 t3_reset_listen_child(so); 3877 INP_UNLOCK(tp->t_inpcb); 3878 } 3879 3880 } 3881} 3882 3883/* 3884 * Reset offloaded connections sitting on a server's syn queue. As above 3885 * we send ABORT_REQ and finish off when we get ABORT_RPL. 3886 */ 3887 3888void 3889t3_reset_synq(struct listen_ctx *lctx) 3890{ 3891 struct toepcb *toep; 3892 3893 SOCK_LOCK(lctx->lso); 3894 while (!LIST_EMPTY(&lctx->synq_head)) { 3895 toep = LIST_FIRST(&lctx->synq_head); 3896 LIST_REMOVE(toep, synq_entry); 3897 toep->tp_tp = NULL; 3898 t3_send_reset(toep); 3899 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid); 3900 toepcb_release(toep); 3901 } 3902 SOCK_UNLOCK(lctx->lso); 3903} 3904 3905 3906int 3907t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl, 3908 unsigned int nppods, unsigned int tag, unsigned int maxoff, 3909 unsigned int pg_off, unsigned int color) 3910{ 3911 unsigned int i, j, pidx; 3912 struct pagepod *p; 3913 struct mbuf *m; 3914 struct ulp_mem_io *req; 3915 struct tcpcb *tp = sototcpcb(so); 3916 struct toepcb *toep = tp->t_toe; 3917 unsigned int tid = toep->tp_tid; 3918 const struct tom_data *td = TOM_DATA(TOE_DEV(so)); 3919 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; 3920 3921 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)", 3922 gl, nppods, tag, maxoff, pg_off, color); 3923 3924 for (i = 0; i < nppods; ++i) { 3925 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE); 3926 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 3927 req = mtod(m, struct ulp_mem_io *); 3928 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE; 3929 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 3930 req->wr.wr_lo = 0; 3931 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) | 3932 V_ULPTX_CMD(ULP_MEM_WRITE)); 3933 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) | 3934 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1)); 3935 3936 p = (struct pagepod *)(req + 1); 3937 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) { 3938 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid)); 3939 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) | 3940 V_PPOD_COLOR(color)); 3941 p->pp_max_offset = htonl(maxoff); 3942 p->pp_page_offset = htonl(pg_off); 3943 p->pp_rsvd = 0; 3944 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx) 3945 p->pp_addr[j] = pidx < gl->dgl_nelem ? 3946 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0; 3947 } else 3948 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */ 3949 send_or_defer(toep, m, 0); 3950 ppod_addr += PPOD_SIZE; 3951 } 3952 return (0); 3953} 3954 3955/* 3956 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command. 3957 */ 3958static inline void 3959mk_cpl_barrier_ulp(struct cpl_barrier *b) 3960{ 3961 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b; 3962 3963 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 3964 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8)); 3965 b->opcode = CPL_BARRIER; 3966} 3967 3968/* 3969 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command. 3970 */ 3971static inline void 3972mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno) 3973{ 3974 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 3975 3976 txpkt = (struct ulp_txpkt *)req; 3977 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 3978 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 3979 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid)); 3980 req->cpuno = htons(cpuno); 3981} 3982 3983/* 3984 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command. 3985 */ 3986static inline void 3987mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, 3988 unsigned int word, uint64_t mask, uint64_t val) 3989{ 3990 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 3991 3992 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 3993 tid, word, mask, val); 3994 3995 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 3996 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 3997 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); 3998 req->reply = V_NO_REPLY(1); 3999 req->cpu_idx = 0; 4000 req->word = htons(word); 4001 req->mask = htobe64(mask); 4002 req->val = htobe64(val); 4003} 4004 4005/* 4006 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command. 4007 */ 4008static void 4009mk_rx_data_ack_ulp(struct cpl_rx_data_ack *ack, unsigned int tid, unsigned int credits) 4010{ 4011 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack; 4012 4013 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4014 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8)); 4015 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); 4016 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 4017 V_RX_DACK_MODE(1) | V_RX_CREDITS(credits)); 4018} 4019 4020void 4021t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx) 4022{ 4023 unsigned int wrlen; 4024 struct mbuf *m; 4025 struct work_request_hdr *wr; 4026 struct cpl_barrier *lock; 4027 struct cpl_set_tcb_field *req; 4028 struct cpl_get_tcb *getreq; 4029 struct ddp_state *p = &toep->tp_ddp_state; 4030 4031 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4032 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) + 4033 sizeof(*getreq); 4034 m = m_gethdr_nofail(wrlen); 4035 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4036 wr = mtod(m, struct work_request_hdr *); 4037 bzero(wr, wrlen); 4038 4039 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4040 m->m_pkthdr.len = m->m_len = wrlen; 4041 4042 lock = (struct cpl_barrier *)(wr + 1); 4043 mk_cpl_barrier_ulp(lock); 4044 4045 req = (struct cpl_set_tcb_field *)(lock + 1); 4046 4047 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx); 4048 4049 /* Hmmm, not sure if this actually a good thing: reactivating 4050 * the other buffer might be an issue if it has been completed 4051 * already. However, that is unlikely, since the fact that the UBUF 4052 * is not completed indicates that there is no oustanding data. 4053 */ 4054 if (bufidx == 0) 4055 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4056 V_TF_DDP_ACTIVE_BUF(1) | 4057 V_TF_DDP_BUF0_VALID(1), 4058 V_TF_DDP_ACTIVE_BUF(1)); 4059 else 4060 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4061 V_TF_DDP_ACTIVE_BUF(1) | 4062 V_TF_DDP_BUF1_VALID(1), 0); 4063 4064 getreq = (struct cpl_get_tcb *)(req + 1); 4065 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4066 4067 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1)); 4068 4069 /* Keep track of the number of oustanding CPL_GET_TCB requests 4070 */ 4071 p->get_tcb_count++; 4072 4073#ifdef T3_TRACE 4074 T3_TRACE1(TIDTB(so), 4075 "t3_cancel_ddpbuf: bufidx %u", bufidx); 4076#endif 4077 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4078} 4079 4080/** 4081 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one 4082 * @sk: the socket associated with the buffers 4083 * @bufidx: index of HW DDP buffer (0 or 1) 4084 * @tag0: new tag for HW buffer 0 4085 * @tag1: new tag for HW buffer 1 4086 * @len: new length for HW buf @bufidx 4087 * 4088 * Sends a compound WR to overlay a new DDP buffer on top of an existing 4089 * buffer by changing the buffer tag and length and setting the valid and 4090 * active flag accordingly. The caller must ensure the new buffer is at 4091 * least as big as the existing one. Since we typically reprogram both HW 4092 * buffers this function sets both tags for convenience. Read the TCB to 4093 * determine how made data was written into the buffer before the overlay 4094 * took place. 4095 */ 4096void 4097t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0, 4098 unsigned int tag1, unsigned int len) 4099{ 4100 unsigned int wrlen; 4101 struct mbuf *m; 4102 struct work_request_hdr *wr; 4103 struct cpl_get_tcb *getreq; 4104 struct cpl_set_tcb_field *req; 4105 struct ddp_state *p = &toep->tp_ddp_state; 4106 4107 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)", 4108 bufidx, tag0, tag1, len); 4109 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4110 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq); 4111 m = m_gethdr_nofail(wrlen); 4112 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4113 wr = mtod(m, struct work_request_hdr *); 4114 m->m_pkthdr.len = m->m_len = wrlen; 4115 bzero(wr, wrlen); 4116 4117 4118 /* Set the ATOMIC flag to make sure that TP processes the following 4119 * CPLs in an atomic manner and no wire segments can be interleaved. 4120 */ 4121 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); 4122 req = (struct cpl_set_tcb_field *)(wr + 1); 4123 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG, 4124 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) | 4125 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32, 4126 V_TCB_RX_DDP_BUF0_TAG(tag0) | 4127 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32); 4128 req++; 4129 if (bufidx == 0) { 4130 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN, 4131 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4132 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 4133 req++; 4134 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4135 V_TF_DDP_PUSH_DISABLE_0(1) | 4136 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4137 V_TF_DDP_PUSH_DISABLE_0(0) | 4138 V_TF_DDP_BUF0_VALID(1)); 4139 } else { 4140 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN, 4141 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN), 4142 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len)); 4143 req++; 4144 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4145 V_TF_DDP_PUSH_DISABLE_1(1) | 4146 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4147 V_TF_DDP_PUSH_DISABLE_1(0) | 4148 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1)); 4149 } 4150 4151 getreq = (struct cpl_get_tcb *)(req + 1); 4152 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4153 4154 /* Keep track of the number of oustanding CPL_GET_TCB requests 4155 */ 4156 p->get_tcb_count++; 4157 4158#ifdef T3_TRACE 4159 T3_TRACE4(TIDTB(sk), 4160 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u " 4161 "len %d", 4162 bufidx, tag0, tag1, len); 4163#endif 4164 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4165} 4166 4167/* 4168 * Sends a compound WR containing all the CPL messages needed to program the 4169 * two HW DDP buffers, namely optionally setting up the length and offset of 4170 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK. 4171 */ 4172void 4173t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0, 4174 unsigned int len1, unsigned int offset1, 4175 uint64_t ddp_flags, uint64_t flag_mask, int modulate) 4176{ 4177 unsigned int wrlen; 4178 struct mbuf *m; 4179 struct work_request_hdr *wr; 4180 struct cpl_set_tcb_field *req; 4181 4182 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ", 4183 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff); 4184 4185 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4186 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + 4187 (len1 ? sizeof(*req) : 0) + 4188 (modulate ? sizeof(struct cpl_rx_data_ack) : 0); 4189 m = m_gethdr_nofail(wrlen); 4190 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4191 wr = mtod(m, struct work_request_hdr *); 4192 bzero(wr, wrlen); 4193 4194 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4195 m->m_pkthdr.len = m->m_len = wrlen; 4196 4197 req = (struct cpl_set_tcb_field *)(wr + 1); 4198 if (len0) { /* program buffer 0 offset and length */ 4199 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET, 4200 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 4201 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4202 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) | 4203 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0)); 4204 req++; 4205 } 4206 if (len1) { /* program buffer 1 offset and length */ 4207 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET, 4208 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 4209 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32, 4210 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) | 4211 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32); 4212 req++; 4213 } 4214 4215 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask, 4216 ddp_flags); 4217 4218 if (modulate) { 4219 mk_rx_data_ack_ulp((struct cpl_rx_data_ack *)(req + 1), toep->tp_tid, 4220 toep->tp_copied_seq - toep->tp_rcv_wup); 4221 toep->tp_rcv_wup = toep->tp_copied_seq; 4222 } 4223 4224#ifdef T3_TRACE 4225 T3_TRACE5(TIDTB(sk), 4226 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x " 4227 "modulate %d", 4228 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff, 4229 modulate); 4230#endif 4231 4232 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4233} 4234 4235void 4236t3_init_wr_tab(unsigned int wr_len) 4237{ 4238 int i; 4239 4240 if (mbuf_wrs[1]) /* already initialized */ 4241 return; 4242 4243 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) { 4244 int sgl_len = (3 * i) / 2 + (i & 1); 4245 4246 sgl_len += 3; 4247 mbuf_wrs[i] = sgl_len <= wr_len ? 4248 1 : 1 + (sgl_len - 2) / (wr_len - 1); 4249 } 4250 4251 wrlen = wr_len * 8; 4252} 4253 4254int 4255t3_init_cpl_io(void) 4256{ 4257#ifdef notyet 4258 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL); 4259 if (!tcphdr_skb) { 4260 log(LOG_ERR, 4261 "Chelsio TCP offload: can't allocate sk_buff\n"); 4262 return -1; 4263 } 4264 skb_put(tcphdr_skb, sizeof(struct tcphdr)); 4265 tcphdr_skb->h.raw = tcphdr_skb->data; 4266 memset(tcphdr_skb->data, 0, tcphdr_skb->len); 4267#endif 4268 4269 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); 4270 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); 4271 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack); 4272 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data); 4273 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 4274 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 4275 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish); 4276 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 4277 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 4278 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 4279 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp); 4280 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); 4281 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify); 4282 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt); 4283 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); 4284 return (0); 4285} 4286 4287