cxgb_cpl_io.c revision 177540
1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c 177540 2008-03-24 05:21:10Z kmacy $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/fcntl.h> 36#include <sys/kernel.h> 37#include <sys/limits.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/mbuf.h> 41#include <sys/mutex.h> 42#include <sys/socket.h> 43#include <sys/sysctl.h> 44#include <sys/syslog.h> 45#include <sys/socketvar.h> 46#include <sys/protosw.h> 47#include <sys/priv.h> 48 49#include <net/if.h> 50#include <net/route.h> 51 52#include <netinet/in.h> 53#include <netinet/in_pcb.h> 54#include <netinet/in_systm.h> 55#include <netinet/in_var.h> 56 57 58#include <dev/cxgb/cxgb_osdep.h> 59#include <dev/cxgb/sys/mbufq.h> 60 61#include <netinet/ip.h> 62#include <netinet/tcp_var.h> 63#include <netinet/tcp_fsm.h> 64#include <netinet/tcp_offload.h> 65#include <netinet/tcp_seq.h> 66#include <netinet/tcp_syncache.h> 67#include <netinet/tcp_timer.h> 68#include <net/route.h> 69 70#include <dev/cxgb/t3cdev.h> 71#include <dev/cxgb/common/cxgb_firmware_exports.h> 72#include <dev/cxgb/common/cxgb_t3_cpl.h> 73#include <dev/cxgb/common/cxgb_tcb.h> 74#include <dev/cxgb/common/cxgb_ctl_defs.h> 75#include <dev/cxgb/cxgb_l2t.h> 76#include <dev/cxgb/cxgb_offload.h> 77#include <vm/vm.h> 78#include <vm/pmap.h> 79#include <machine/bus.h> 80#include <dev/cxgb/sys/mvec.h> 81#include <dev/cxgb/ulp/toecore/cxgb_toedev.h> 82#include <dev/cxgb/ulp/tom/cxgb_defs.h> 83#include <dev/cxgb/ulp/tom/cxgb_tom.h> 84#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h> 85#include <dev/cxgb/ulp/tom/cxgb_toepcb.h> 86#include <dev/cxgb/ulp/tom/cxgb_tcp.h> 87 88/* 89 * For ULP connections HW may add headers, e.g., for digests, that aren't part 90 * of the messages sent by the host but that are part of the TCP payload and 91 * therefore consume TCP sequence space. Tx connection parameters that 92 * operate in TCP sequence space are affected by the HW additions and need to 93 * compensate for them to accurately track TCP sequence numbers. This array 94 * contains the compensating extra lengths for ULP packets. It is indexed by 95 * a packet's ULP submode. 96 */ 97const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8}; 98 99#ifdef notyet 100/* 101 * This sk_buff holds a fake header-only TCP segment that we use whenever we 102 * need to exploit SW TCP functionality that expects TCP headers, such as 103 * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple 104 * CPUs without locking. 105 */ 106static struct mbuf *tcphdr_mbuf __read_mostly; 107#endif 108 109/* 110 * Size of WRs in bytes. Note that we assume all devices we are handling have 111 * the same WR size. 112 */ 113static unsigned int wrlen __read_mostly; 114 115/* 116 * The number of WRs needed for an skb depends on the number of page fragments 117 * in the skb and whether it has any payload in its main body. This maps the 118 * length of the gather list represented by an skb into the # of necessary WRs. 119 */ 120static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly; 121 122/* 123 * Max receive window supported by HW in bytes. Only a small part of it can 124 * be set through option0, the rest needs to be set through RX_DATA_ACK. 125 */ 126#define MAX_RCV_WND ((1U << 27) - 1) 127 128/* 129 * Min receive window. We want it to be large enough to accommodate receive 130 * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. 131 */ 132#define MIN_RCV_WND (24 * 1024U) 133#define SO_TOS(so) ((sotoinpcb(so)->inp_ip_tos >> 2) & M_TOS) 134 135#define VALIDATE_SEQ 0 136#define VALIDATE_SOCK(so) 137#define DEBUG_WR 0 138 139extern int tcp_do_autorcvbuf; 140extern int tcp_do_autosndbuf; 141extern int tcp_autorcvbuf_max; 142extern int tcp_autosndbuf_max; 143 144static void t3_send_reset(struct toepcb *toep); 145static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status); 146static inline void free_atid(struct t3cdev *cdev, unsigned int tid); 147static void handle_syncache_event(int event, void *arg); 148 149static inline void 150SBAPPEND(struct sockbuf *sb, struct mbuf *n) 151{ 152 struct mbuf * m; 153 154 m = sb->sb_mb; 155 while (m) { 156 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 157 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 158 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 159 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 160 m->m_next, m->m_nextpkt, m->m_flags)); 161 m = m->m_next; 162 } 163 m = n; 164 while (m) { 165 KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 166 !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 167 !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 168 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 169 m->m_next, m->m_nextpkt, m->m_flags)); 170 m = m->m_next; 171 } 172 sbappend_locked(sb, n); 173 m = sb->sb_mb; 174 while (m) { 175 KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 176 m->m_next, m->m_nextpkt, m->m_flags)); 177 m = m->m_next; 178 } 179} 180 181static inline int 182is_t3a(const struct toedev *dev) 183{ 184 return (dev->tod_ttid == TOE_ID_CHELSIO_T3); 185} 186 187static void 188dump_toepcb(struct toepcb *toep) 189{ 190 DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n", 191 toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode, 192 toep->tp_mtu_idx, toep->tp_tid); 193 194 DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n", 195 toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, 196 toep->tp_mss_clamp, toep->tp_flags); 197} 198 199#ifndef RTALLOC2_DEFINED 200static struct rtentry * 201rtalloc2(struct sockaddr *dst, int report, u_long ignflags) 202{ 203 struct rtentry *rt = NULL; 204 205 if ((rt = rtalloc1(dst, report, ignflags)) != NULL) 206 RT_UNLOCK(rt); 207 208 return (rt); 209} 210#endif 211/* 212 * Determine whether to send a CPL message now or defer it. A message is 213 * deferred if the connection is in SYN_SENT since we don't know the TID yet. 214 * For connections in other states the message is sent immediately. 215 * If through_l2t is set the message is subject to ARP processing, otherwise 216 * it is sent directly. 217 */ 218static inline void 219send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t) 220{ 221 struct tcpcb *tp = toep->tp_tp; 222 223 if (__predict_false(tp->t_state == TCPS_SYN_SENT)) { 224 inp_wlock(tp->t_inpcb); 225 mbufq_tail(&toep->out_of_order_queue, m); // defer 226 inp_wunlock(tp->t_inpcb); 227 } else if (through_l2t) 228 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T 229 else 230 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly 231} 232 233static inline unsigned int 234mkprio(unsigned int cntrl, const struct toepcb *toep) 235{ 236 return (cntrl); 237} 238 239/* 240 * Populate a TID_RELEASE WR. The skb must be already propely sized. 241 */ 242static inline void 243mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid) 244{ 245 struct cpl_tid_release *req; 246 247 m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep)); 248 m->m_pkthdr.len = m->m_len = sizeof(*req); 249 req = mtod(m, struct cpl_tid_release *); 250 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 251 req->wr.wr_lo = 0; 252 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); 253} 254 255static inline void 256make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) 257{ 258 struct tcpcb *tp = sototcpcb(so); 259 struct toepcb *toep = tp->t_toe; 260 struct tx_data_wr *req; 261 262 inp_wlock_assert(tp->t_inpcb); 263 264 req = mtod(m, struct tx_data_wr *); 265 m->m_len = sizeof(*req); 266 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 267 req->wr_lo = htonl(V_WR_TID(toep->tp_tid)); 268 /* len includes the length of any HW ULP additions */ 269 req->len = htonl(len); 270 req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx)); 271 /* V_TX_ULP_SUBMODE sets both the mode and submode */ 272 req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) | 273 V_TX_URG(/* skb_urgent(skb) */ 0 ) | 274 V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) && 275 (tail ? 0 : 1)))); 276 req->sndseq = htonl(tp->snd_nxt); 277 if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) { 278 req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | 279 V_TX_CPU_IDX(toep->tp_qset)); 280 281 /* Sendbuffer is in units of 32KB. 282 */ 283 if (tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) 284 req->param |= htonl(V_TX_SNDBUF(tcp_autosndbuf_max >> 15)); 285 else 286 req->param |= htonl(V_TX_SNDBUF(so->so_snd.sb_hiwat >> 15)); 287 toep->tp_flags |= TP_DATASENT; 288 } 289} 290 291#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */ 292 293int 294t3_push_frames(struct socket *so, int req_completion) 295{ 296 struct tcpcb *tp = sototcpcb(so); 297 struct toepcb *toep = tp->t_toe; 298 299 struct mbuf *tail, *m0, *last; 300 struct t3cdev *cdev; 301 struct tom_data *d; 302 int i, bytes, count, total_bytes; 303 bus_dma_segment_t segs[TX_MAX_SEGS], *segp; 304 305 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) { 306 DPRINTF("tcp state=%d\n", tp->t_state); 307 return (0); 308 } 309 310 if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { 311 DPRINTF("disconnecting\n"); 312 313 return (0); 314 } 315 316 317 inp_wlock_assert(tp->t_inpcb); 318 SOCKBUF_LOCK(&so->so_snd); 319 d = TOM_DATA(TOE_DEV(so)); 320 cdev = d->cdev; 321 last = tail = so->so_snd.sb_sndptr ? so->so_snd.sb_sndptr : so->so_snd.sb_mb; 322 total_bytes = 0; 323 DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n", 324 toep->tp_wr_avail, tail, so->so_snd.sb_cc, toep->tp_m_last); 325 326 if (last && toep->tp_m_last == last && so->so_snd.sb_sndptroff != 0) { 327 KASSERT(tail, ("sbdrop error")); 328 last = tail = tail->m_next; 329 } 330 331 if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) { 332 DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail); 333 SOCKBUF_UNLOCK(&so->so_snd); 334 return (0); 335 } 336 337 toep->tp_m_last = NULL; 338 while (toep->tp_wr_avail && (tail != NULL)) { 339 count = bytes = 0; 340 segp = segs; 341 if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) { 342 SOCKBUF_UNLOCK(&so->so_snd); 343 return (0); 344 } 345 /* 346 * If the data in tail fits as in-line, then 347 * make an immediate data wr. 348 */ 349 if (tail->m_len <= IMM_LEN) { 350 count = 1; 351 bytes = tail->m_len; 352 last = tail; 353 tail = tail->m_next; 354 m_set_sgl(m0, NULL); 355 m_set_sgllen(m0, 0); 356 make_tx_data_wr(so, m0, bytes, tail); 357 m_append(m0, bytes, mtod(last, caddr_t)); 358 KASSERT(!m0->m_next, ("bad append")); 359 } else { 360 while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) 361 && (tail != NULL) && (count < TX_MAX_SEGS-1)) { 362 bytes += tail->m_len; 363 last = tail; 364 count++; 365 /* 366 * technically an abuse to be using this for a VA 367 * but less gross than defining my own structure 368 * or calling pmap_kextract from here :-| 369 */ 370 segp->ds_addr = (bus_addr_t)tail->m_data; 371 segp->ds_len = tail->m_len; 372 DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n", 373 count, mbuf_wrs[count], tail->m_data, tail->m_len); 374 segp++; 375 tail = tail->m_next; 376 } 377 DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n", 378 toep->tp_wr_avail, count, mbuf_wrs[count], tail); 379 380 m_set_sgl(m0, segs); 381 m_set_sgllen(m0, count); 382 make_tx_data_wr(so, m0, bytes, tail); 383 } 384 m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep)); 385 386 if (tail) { 387 so->so_snd.sb_sndptr = tail; 388 toep->tp_m_last = NULL; 389 } else 390 toep->tp_m_last = so->so_snd.sb_sndptr = last; 391 392 393 DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last); 394 395 so->so_snd.sb_sndptroff += bytes; 396 total_bytes += bytes; 397 toep->tp_write_seq += bytes; 398 CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d tail=%p sndptr=%p sndptroff=%d", 399 toep->tp_wr_avail, count, mbuf_wrs[count], tail, so->so_snd.sb_sndptr, so->so_snd.sb_sndptroff); 400 if (tail) 401 CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p tailbuf=%p snd_una=0x%08x", 402 total_bytes, toep->tp_m_last, tail->m_data, tp->snd_una); 403 else 404 CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d tp_m_last=%p snd_una=0x%08x", 405 total_bytes, toep->tp_m_last, tp->snd_una); 406 407 408 i = 0; 409 while (i < count && m_get_sgllen(m0)) { 410 if ((count - i) >= 3) { 411 CTR6(KTR_TOM, 412 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d pa=0x%zx len=%d", 413 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len, 414 segs[i + 2].ds_addr, segs[i + 2].ds_len); 415 i += 3; 416 } else if ((count - i) == 2) { 417 CTR4(KTR_TOM, 418 "t3_push_frames: pa=0x%zx len=%d pa=0x%zx len=%d", 419 segs[i].ds_addr, segs[i].ds_len, segs[i + 1].ds_addr, segs[i + 1].ds_len); 420 i += 2; 421 } else { 422 CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d", 423 segs[i].ds_addr, segs[i].ds_len); 424 i++; 425 } 426 427 } 428 429 /* 430 * remember credits used 431 */ 432 m0->m_pkthdr.csum_data = mbuf_wrs[count]; 433 m0->m_pkthdr.len = bytes; 434 toep->tp_wr_avail -= mbuf_wrs[count]; 435 toep->tp_wr_unacked += mbuf_wrs[count]; 436 437 if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) || 438 toep->tp_wr_unacked >= toep->tp_wr_max / 2) { 439 struct work_request_hdr *wr = cplhdr(m0); 440 441 wr->wr_hi |= htonl(F_WR_COMPL); 442 toep->tp_wr_unacked = 0; 443 } 444 KASSERT((m0->m_pkthdr.csum_data > 0) && 445 (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d", 446 m0->m_pkthdr.csum_data)); 447 m0->m_type = MT_DONTFREE; 448 enqueue_wr(toep, m0); 449 DPRINTF("sending offload tx with %d bytes in %d segments\n", 450 bytes, count); 451 l2t_send(cdev, m0, toep->tp_l2t); 452 } 453 SOCKBUF_UNLOCK(&so->so_snd); 454 return (total_bytes); 455} 456 457/* 458 * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail 459 * under any circumstances. We take the easy way out and always queue the 460 * message to the write_queue. We can optimize the case where the queue is 461 * already empty though the optimization is probably not worth it. 462 */ 463static void 464close_conn(struct socket *so) 465{ 466 struct mbuf *m; 467 struct cpl_close_con_req *req; 468 struct tom_data *d; 469 struct inpcb *inp = sotoinpcb(so); 470 struct tcpcb *tp; 471 struct toepcb *toep; 472 unsigned int tid; 473 474 475 inp_wlock(inp); 476 tp = sototcpcb(so); 477 toep = tp->t_toe; 478 479 if (tp->t_state != TCPS_SYN_SENT) 480 t3_push_frames(so, 1); 481 482 if (toep->tp_flags & TP_FIN_SENT) { 483 inp_wunlock(inp); 484 return; 485 } 486 487 tid = toep->tp_tid; 488 489 d = TOM_DATA(toep->tp_toedev); 490 491 m = m_gethdr_nofail(sizeof(*req)); 492 493 toep->tp_flags |= TP_FIN_SENT; 494 req = mtod(m, struct cpl_close_con_req *); 495 496 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); 497 req->wr.wr_lo = htonl(V_WR_TID(tid)); 498 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 499 req->rsvd = htonl(toep->tp_write_seq); 500 inp_wunlock(inp); 501 /* 502 * XXX - need to defer shutdown while there is still data in the queue 503 * 504 */ 505 cxgb_ofld_send(d->cdev, m); 506 507} 508 509/* 510 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant 511 * and send it along. 512 */ 513static void 514abort_arp_failure(struct t3cdev *cdev, struct mbuf *m) 515{ 516 struct cpl_abort_req *req = cplhdr(m); 517 518 req->cmd = CPL_ABORT_NO_RST; 519 cxgb_ofld_send(cdev, m); 520} 521 522/* 523 * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are 524 * permitted to return without sending the message in case we cannot allocate 525 * an sk_buff. Returns the number of credits sent. 526 */ 527uint32_t 528t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail) 529{ 530 struct mbuf *m; 531 struct cpl_rx_data_ack *req; 532 struct toepcb *toep = tp->t_toe; 533 struct toedev *tdev = toep->tp_toedev; 534 535 m = m_gethdr_nofail(sizeof(*req)); 536 537 DPRINTF("returning %u credits to HW\n", credits); 538 539 req = mtod(m, struct cpl_rx_data_ack *); 540 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 541 req->wr.wr_lo = 0; 542 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 543 req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); 544 m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep)); 545 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 546 return (credits); 547} 548 549/* 550 * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled. 551 * This is only used in DDP mode, so we take the opportunity to also set the 552 * DACK mode and flush any Rx credits. 553 */ 554void 555t3_send_rx_modulate(struct toepcb *toep) 556{ 557 struct mbuf *m; 558 struct cpl_rx_data_ack *req; 559 560 m = m_gethdr_nofail(sizeof(*req)); 561 562 req = mtod(m, struct cpl_rx_data_ack *); 563 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 564 req->wr.wr_lo = 0; 565 m->m_pkthdr.len = m->m_len = sizeof(*req); 566 567 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 568 req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 569 V_RX_DACK_MODE(1) | 570 V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup)); 571 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 572 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 573 toep->tp_rcv_wup = toep->tp_copied_seq; 574} 575 576/* 577 * Handle receipt of an urgent pointer. 578 */ 579static void 580handle_urg_ptr(struct socket *so, uint32_t urg_seq) 581{ 582#ifdef URGENT_DATA_SUPPORTED 583 struct tcpcb *tp = sototcpcb(so); 584 585 urg_seq--; /* initially points past the urgent data, per BSD */ 586 587 if (tp->urg_data && !after(urg_seq, tp->urg_seq)) 588 return; /* duplicate pointer */ 589 sk_send_sigurg(sk); 590 if (tp->urg_seq == tp->copied_seq && tp->urg_data && 591 !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) { 592 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 593 594 tp->copied_seq++; 595 if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len) 596 tom_eat_skb(sk, skb, 0); 597 } 598 tp->urg_data = TCP_URG_NOTYET; 599 tp->urg_seq = urg_seq; 600#endif 601} 602 603/* 604 * Returns true if a socket cannot accept new Rx data. 605 */ 606static inline int 607so_no_receive(const struct socket *so) 608{ 609 return (so->so_state & (SS_ISDISCONNECTED|SS_ISDISCONNECTING)); 610} 611 612/* 613 * Process an urgent data notification. 614 */ 615static void 616rx_urg_notify(struct toepcb *toep, struct mbuf *m) 617{ 618 struct cpl_rx_urg_notify *hdr = cplhdr(m); 619 struct socket *so = toeptoso(toep); 620 621 VALIDATE_SOCK(so); 622 623 if (!so_no_receive(so)) 624 handle_urg_ptr(so, ntohl(hdr->seq)); 625 626 m_freem(m); 627} 628 629/* 630 * Handler for RX_URG_NOTIFY CPL messages. 631 */ 632static int 633do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx) 634{ 635 struct toepcb *toep = (struct toepcb *)ctx; 636 637 rx_urg_notify(toep, m); 638 return (0); 639} 640 641static __inline int 642is_delack_mode_valid(struct toedev *dev, struct toepcb *toep) 643{ 644 return (toep->tp_ulp_mode || 645 (toep->tp_ulp_mode == ULP_MODE_TCPDDP && 646 dev->tod_ttid >= TOE_ID_CHELSIO_T3)); 647} 648 649/* 650 * Set of states for which we should return RX credits. 651 */ 652#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2) 653 654/* 655 * Called after some received data has been read. It returns RX credits 656 * to the HW for the amount of data processed. 657 */ 658void 659t3_cleanup_rbuf(struct tcpcb *tp, int copied) 660{ 661 struct toepcb *toep = tp->t_toe; 662 struct socket *so; 663 struct toedev *dev; 664 int dack_mode, must_send, read; 665 u32 thres, credits, dack = 0; 666 667 so = tp->t_inpcb->inp_socket; 668 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || 669 (tp->t_state == TCPS_FIN_WAIT_2))) { 670 if (copied) { 671 SOCKBUF_LOCK(&so->so_rcv); 672 toep->tp_copied_seq += copied; 673 SOCKBUF_UNLOCK(&so->so_rcv); 674 } 675 676 return; 677 } 678 679 inp_wlock_assert(tp->t_inpcb); 680 SOCKBUF_LOCK(&so->so_rcv); 681 if (copied) 682 toep->tp_copied_seq += copied; 683 else { 684 read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc; 685 toep->tp_copied_seq += read; 686 } 687 credits = toep->tp_copied_seq - toep->tp_rcv_wup; 688 toep->tp_enqueued_bytes = so->so_rcv.sb_cc; 689 SOCKBUF_UNLOCK(&so->so_rcv); 690 691 if (credits > so->so_rcv.sb_mbmax) { 692 printf("copied_seq=%u rcv_wup=%u credits=%u\n", 693 toep->tp_copied_seq, toep->tp_rcv_wup, credits); 694 credits = so->so_rcv.sb_mbmax; 695 } 696 697 698 /* 699 * XXX this won't accurately reflect credit return - we need 700 * to look at the difference between the amount that has been 701 * put in the recv sockbuf and what is there now 702 */ 703 704 if (__predict_false(!credits)) 705 return; 706 707 dev = toep->tp_toedev; 708 thres = TOM_TUNABLE(dev, rx_credit_thres); 709 710 if (__predict_false(thres == 0)) 711 return; 712 713 if (is_delack_mode_valid(dev, toep)) { 714 dack_mode = TOM_TUNABLE(dev, delack); 715 if (__predict_false(dack_mode != toep->tp_delack_mode)) { 716 u32 r = tp->rcv_nxt - toep->tp_delack_seq; 717 718 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp) 719 dack = F_RX_DACK_CHANGE | 720 V_RX_DACK_MODE(dack_mode); 721 } 722 } else 723 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 724 725 /* 726 * For coalescing to work effectively ensure the receive window has 727 * at least 16KB left. 728 */ 729 must_send = credits + 16384 >= tp->rcv_wnd; 730 731 if (must_send || credits >= thres) 732 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send); 733} 734 735static int 736cxgb_toe_disconnect(struct tcpcb *tp) 737{ 738 struct socket *so; 739 740 DPRINTF("cxgb_toe_disconnect\n"); 741 742 so = tp->t_inpcb->inp_socket; 743 close_conn(so); 744 return (0); 745} 746 747static int 748cxgb_toe_reset(struct tcpcb *tp) 749{ 750 struct toepcb *toep = tp->t_toe; 751 752 t3_send_reset(toep); 753 754 /* 755 * unhook from socket 756 */ 757 tp->t_flags &= ~TF_TOE; 758 toep->tp_tp = NULL; 759 tp->t_toe = NULL; 760 return (0); 761} 762 763static int 764cxgb_toe_send(struct tcpcb *tp) 765{ 766 struct socket *so; 767 768 DPRINTF("cxgb_toe_send\n"); 769 dump_toepcb(tp->t_toe); 770 771 so = tp->t_inpcb->inp_socket; 772 t3_push_frames(so, 1); 773 return (0); 774} 775 776static int 777cxgb_toe_rcvd(struct tcpcb *tp) 778{ 779 780 inp_wlock_assert(tp->t_inpcb); 781 t3_cleanup_rbuf(tp, 0); 782 783 return (0); 784} 785 786static void 787cxgb_toe_detach(struct tcpcb *tp) 788{ 789 struct toepcb *toep; 790 791 /* 792 * XXX how do we handle teardown in the SYN_SENT state? 793 * 794 */ 795 inp_wlock_assert(tp->t_inpcb); 796 toep = tp->t_toe; 797 toep->tp_tp = NULL; 798 799 /* 800 * unhook from socket 801 */ 802 tp->t_flags &= ~TF_TOE; 803 tp->t_toe = NULL; 804} 805 806 807static struct toe_usrreqs cxgb_toe_usrreqs = { 808 .tu_disconnect = cxgb_toe_disconnect, 809 .tu_reset = cxgb_toe_reset, 810 .tu_send = cxgb_toe_send, 811 .tu_rcvd = cxgb_toe_rcvd, 812 .tu_detach = cxgb_toe_detach, 813 .tu_detach = cxgb_toe_detach, 814 .tu_syncache_event = handle_syncache_event, 815}; 816 817 818static void 819__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word, 820 uint64_t mask, uint64_t val, int no_reply) 821{ 822 struct cpl_set_tcb_field *req; 823 824 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 825 toep->tp_tid, word, mask, val); 826 827 req = mtod(m, struct cpl_set_tcb_field *); 828 m->m_pkthdr.len = m->m_len = sizeof(*req); 829 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 830 req->wr.wr_lo = 0; 831 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid)); 832 req->reply = V_NO_REPLY(no_reply); 833 req->cpu_idx = 0; 834 req->word = htons(word); 835 req->mask = htobe64(mask); 836 req->val = htobe64(val); 837 838 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 839 send_or_defer(toep, m, 0); 840} 841 842static void 843t3_set_tcb_field(struct socket *so, uint16_t word, uint64_t mask, uint64_t val) 844{ 845 struct mbuf *m; 846 struct tcpcb *tp = sototcpcb(so); 847 struct toepcb *toep = tp->t_toe; 848 849 if (toep == NULL) 850 return; 851 852 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) { 853 printf("not seting field\n"); 854 return; 855 } 856 857 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field)); 858 859 __set_tcb_field(toep, m, word, mask, val, 1); 860} 861 862/* 863 * Set one of the t_flags bits in the TCB. 864 */ 865static void 866set_tcb_tflag(struct socket *so, unsigned int bit_pos, int val) 867{ 868 t3_set_tcb_field(so, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); 869} 870 871/* 872 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting. 873 */ 874static void 875t3_set_nagle(struct socket *so) 876{ 877 struct tcpcb *tp = sototcpcb(so); 878 879 set_tcb_tflag(so, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); 880} 881 882/* 883 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting. 884 */ 885void 886t3_set_keepalive(struct socket *so, int on_off) 887{ 888 set_tcb_tflag(so, S_TF_KEEPALIVE, on_off); 889} 890 891void 892t3_set_rcv_coalesce_enable(struct socket *so, int on_off) 893{ 894 set_tcb_tflag(so, S_TF_RCV_COALESCE_ENABLE, on_off); 895} 896 897void 898t3_set_dack_mss(struct socket *so, int on_off) 899{ 900 set_tcb_tflag(so, S_TF_DACK_MSS, on_off); 901} 902 903/* 904 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting. 905 */ 906static void 907t3_set_tos(struct socket *so) 908{ 909 t3_set_tcb_field(so, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), 910 V_TCB_TOS(SO_TOS(so))); 911} 912 913 914/* 915 * In DDP mode, TP fails to schedule a timer to push RX data to the host when 916 * DDP is disabled (data is delivered to freelist). [Note that, the peer should 917 * set the PSH bit in the last segment, which would trigger delivery.] 918 * We work around the issue by setting a DDP buffer in a partial placed state, 919 * which guarantees that TP will schedule a timer. 920 */ 921#define TP_DDP_TIMER_WORKAROUND_MASK\ 922 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\ 923 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\ 924 V_TCB_RX_DDP_BUF0_LEN(3)) << 32)) 925#define TP_DDP_TIMER_WORKAROUND_VAL\ 926 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\ 927 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\ 928 32)) 929 930static void 931t3_enable_ddp(struct socket *so, int on) 932{ 933 if (on) { 934 935 t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), 936 V_TF_DDP_OFF(0)); 937 } else 938 t3_set_tcb_field(so, W_TCB_RX_DDP_FLAGS, 939 V_TF_DDP_OFF(1) | 940 TP_DDP_TIMER_WORKAROUND_MASK, 941 V_TF_DDP_OFF(1) | 942 TP_DDP_TIMER_WORKAROUND_VAL); 943 944} 945 946void 947t3_set_ddp_tag(struct socket *so, int buf_idx, unsigned int tag_color) 948{ 949 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_TAG + buf_idx, 950 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), 951 tag_color); 952} 953 954void 955t3_set_ddp_buf(struct socket *so, int buf_idx, unsigned int offset, 956 unsigned int len) 957{ 958 if (buf_idx == 0) 959 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF0_OFFSET, 960 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 961 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 962 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) | 963 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 964 else 965 t3_set_tcb_field(so, W_TCB_RX_DDP_BUF1_OFFSET, 966 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 967 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32), 968 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) | 969 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32)); 970} 971 972static int 973t3_set_cong_control(struct socket *so, const char *name) 974{ 975#ifdef CONGESTION_CONTROL_SUPPORTED 976 int cong_algo; 977 978 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++) 979 if (!strcmp(name, t3_cong_ops[cong_algo].name)) 980 break; 981 982 if (cong_algo >= ARRAY_SIZE(t3_cong_ops)) 983 return -EINVAL; 984#endif 985 return 0; 986} 987 988int 989t3_get_tcb(struct socket *so) 990{ 991 struct cpl_get_tcb *req; 992 struct tcpcb *tp = sototcpcb(so); 993 struct toepcb *toep = tp->t_toe; 994 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 995 996 if (!m) 997 return (ENOMEM); 998 999 inp_wlock_assert(tp->t_inpcb); 1000 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 1001 req = mtod(m, struct cpl_get_tcb *); 1002 m->m_pkthdr.len = m->m_len = sizeof(*req); 1003 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1004 req->wr.wr_lo = 0; 1005 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid)); 1006 req->cpuno = htons(toep->tp_qset); 1007 req->rsvd = 0; 1008 if (sototcpcb(so)->t_state == TCPS_SYN_SENT) 1009 mbufq_tail(&toep->out_of_order_queue, m); // defer 1010 else 1011 cxgb_ofld_send(T3C_DEV(so), m); 1012 return 0; 1013} 1014 1015static inline void 1016so_insert_tid(struct tom_data *d, struct socket *so, unsigned int tid) 1017{ 1018 struct toepcb *toep = sototoep(so); 1019 toepcb_hold(toep); 1020 1021 cxgb_insert_tid(d->cdev, d->client, toep, tid); 1022} 1023 1024/** 1025 * find_best_mtu - find the entry in the MTU table closest to an MTU 1026 * @d: TOM state 1027 * @mtu: the target MTU 1028 * 1029 * Returns the index of the value in the MTU table that is closest to but 1030 * does not exceed the target MTU. 1031 */ 1032static unsigned int 1033find_best_mtu(const struct t3c_data *d, unsigned short mtu) 1034{ 1035 int i = 0; 1036 1037 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) 1038 ++i; 1039 return (i); 1040} 1041 1042static unsigned int 1043select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu) 1044{ 1045 unsigned int idx; 1046 1047#ifdef notyet 1048 struct rtentry *dst = sotoinpcb(so)->inp_route.ro_rt; 1049#endif 1050 if (tp) { 1051 tp->t_maxseg = pmtu - 40; 1052 if (tp->t_maxseg < td->mtus[0] - 40) 1053 tp->t_maxseg = td->mtus[0] - 40; 1054 idx = find_best_mtu(td, tp->t_maxseg + 40); 1055 1056 tp->t_maxseg = td->mtus[idx] - 40; 1057 } else 1058 idx = find_best_mtu(td, pmtu); 1059 1060 return (idx); 1061} 1062 1063static inline void 1064free_atid(struct t3cdev *cdev, unsigned int tid) 1065{ 1066 struct toepcb *toep = cxgb_free_atid(cdev, tid); 1067 1068 if (toep) 1069 toepcb_release(toep); 1070} 1071 1072/* 1073 * Release resources held by an offload connection (TID, L2T entry, etc.) 1074 */ 1075static void 1076t3_release_offload_resources(struct toepcb *toep) 1077{ 1078 struct tcpcb *tp = toep->tp_tp; 1079 struct toedev *tdev = toep->tp_toedev; 1080 struct t3cdev *cdev; 1081 unsigned int tid = toep->tp_tid; 1082 1083 if (!tdev) 1084 return; 1085 1086 cdev = TOEP_T3C_DEV(toep); 1087 if (!cdev) 1088 return; 1089 1090 toep->tp_qset = 0; 1091 t3_release_ddp_resources(toep); 1092 1093#ifdef CTRL_SKB_CACHE 1094 kfree_skb(CTRL_SKB_CACHE(tp)); 1095 CTRL_SKB_CACHE(tp) = NULL; 1096#endif 1097 1098 if (toep->tp_wr_avail != toep->tp_wr_max) { 1099 purge_wr_queue(toep); 1100 reset_wr_list(toep); 1101 } 1102 1103 if (toep->tp_l2t) { 1104 l2t_release(L2DATA(cdev), toep->tp_l2t); 1105 toep->tp_l2t = NULL; 1106 } 1107 toep->tp_tp = NULL; 1108 if (tp) { 1109 inp_wlock_assert(tp->t_inpcb); 1110 tp->t_toe = NULL; 1111 tp->t_flags &= ~TF_TOE; 1112 } 1113 1114 if (toep->tp_state == TCPS_SYN_SENT) { 1115 free_atid(cdev, tid); 1116#ifdef notyet 1117 __skb_queue_purge(&tp->out_of_order_queue); 1118#endif 1119 } else { // we have TID 1120 cxgb_remove_tid(cdev, toep, tid); 1121 toepcb_release(toep); 1122 } 1123#if 0 1124 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state); 1125#endif 1126} 1127 1128static void 1129install_offload_ops(struct socket *so) 1130{ 1131 struct tcpcb *tp = sototcpcb(so); 1132 1133 KASSERT(tp->t_toe != NULL, ("toepcb not set")); 1134 1135 t3_install_socket_ops(so); 1136 tp->t_flags |= TF_TOE; 1137 tp->t_tu = &cxgb_toe_usrreqs; 1138} 1139 1140/* 1141 * Determine the receive window scaling factor given a target max 1142 * receive window. 1143 */ 1144static __inline int 1145select_rcv_wscale(int space) 1146{ 1147 int wscale = 0; 1148 1149 if (space > MAX_RCV_WND) 1150 space = MAX_RCV_WND; 1151 1152 if (tcp_do_rfc1323) 1153 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ; 1154 1155 return (wscale); 1156} 1157 1158/* 1159 * Determine the receive window size for a socket. 1160 */ 1161static unsigned long 1162select_rcv_wnd(struct toedev *dev, struct socket *so) 1163{ 1164 struct tom_data *d = TOM_DATA(dev); 1165 unsigned int wnd; 1166 unsigned int max_rcv_wnd; 1167 1168 if (tcp_do_autorcvbuf) 1169 wnd = tcp_autorcvbuf_max; 1170 else 1171 wnd = so->so_rcv.sb_hiwat; 1172 1173 1174 1175 /* XXX 1176 * For receive coalescing to work effectively we need a receive window 1177 * that can accomodate a coalesced segment. 1178 */ 1179 if (wnd < MIN_RCV_WND) 1180 wnd = MIN_RCV_WND; 1181 1182 /* PR 5138 */ 1183 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ? 1184 (uint32_t)d->rx_page_size * 23 : 1185 MAX_RCV_WND); 1186 1187 return min(wnd, max_rcv_wnd); 1188} 1189 1190/* 1191 * Assign offload parameters to some socket fields. This code is used by 1192 * both active and passive opens. 1193 */ 1194static inline void 1195init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, 1196 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep) 1197{ 1198 struct tcpcb *tp = sototcpcb(so); 1199 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev); 1200 1201 SOCK_LOCK_ASSERT(so); 1202 1203 printf("initializing offload socket\n"); 1204 /* 1205 * We either need to fix push frames to work with sbcompress 1206 * or we need to add this 1207 */ 1208 so->so_snd.sb_flags |= SB_NOCOALESCE; 1209 so->so_rcv.sb_flags |= SB_NOCOALESCE; 1210 1211 tp->t_toe = toep; 1212 toep->tp_tp = tp; 1213 toep->tp_toedev = dev; 1214 1215 toep->tp_tid = tid; 1216 toep->tp_l2t = e; 1217 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs); 1218 toep->tp_wr_unacked = 0; 1219 toep->tp_delack_mode = 0; 1220 1221 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu); 1222 /* 1223 * XXX broken 1224 * 1225 */ 1226 tp->rcv_wnd = select_rcv_wnd(dev, so); 1227 1228 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) && 1229 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 1230 toep->tp_qset_idx = 0; 1231 1232 reset_wr_list(toep); 1233 DPRINTF("initialization done\n"); 1234} 1235 1236/* 1237 * The next two functions calculate the option 0 value for a socket. 1238 */ 1239static inline unsigned int 1240calc_opt0h(struct socket *so, int mtu_idx) 1241{ 1242 struct tcpcb *tp = sototcpcb(so); 1243 int wscale = select_rcv_wscale(tp->rcv_wnd); 1244 1245 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) | 1246 V_KEEP_ALIVE((so->so_options & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS | 1247 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx); 1248} 1249 1250static inline unsigned int 1251calc_opt0l(struct socket *so, int ulp_mode) 1252{ 1253 struct tcpcb *tp = sototcpcb(so); 1254 unsigned int val; 1255 1256 val = V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) | 1257 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ)); 1258 1259 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", SO_TOS(so), tp->rcv_wnd, val); 1260 return (val); 1261} 1262 1263static inline unsigned int 1264calc_opt2(const struct socket *so, struct toedev *dev) 1265{ 1266 int flv_valid; 1267 1268 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1); 1269 1270 return (V_FLAVORS_VALID(flv_valid) | 1271 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0)); 1272} 1273 1274#if DEBUG_WR > 1 1275static int 1276count_pending_wrs(const struct toepcb *toep) 1277{ 1278 const struct mbuf *m; 1279 int n = 0; 1280 1281 wr_queue_walk(toep, m) 1282 n += m->m_pkthdr.csum_data; 1283 return (n); 1284} 1285#endif 1286 1287#if 0 1288(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1) 1289#endif 1290 1291static void 1292mk_act_open_req(struct socket *so, struct mbuf *m, 1293 unsigned int atid, const struct l2t_entry *e) 1294{ 1295 struct cpl_act_open_req *req; 1296 struct inpcb *inp = sotoinpcb(so); 1297 struct tcpcb *tp = intotcpcb(inp); 1298 struct toepcb *toep = tp->t_toe; 1299 struct toedev *tdev = TOE_DEV(so); 1300 1301 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep)); 1302 1303 req = mtod(m, struct cpl_act_open_req *); 1304 m->m_pkthdr.len = m->m_len = sizeof(*req); 1305 1306 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1307 req->wr.wr_lo = 0; 1308 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 1309 req->local_port = inp->inp_lport; 1310 req->peer_port = inp->inp_fport; 1311 memcpy(&req->local_ip, &inp->inp_laddr, 4); 1312 memcpy(&req->peer_ip, &inp->inp_faddr, 4); 1313 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) | 1314 V_TX_CHANNEL(e->smt_idx)); 1315 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode)); 1316 req->params = 0; 1317 req->opt2 = htonl(calc_opt2(so, tdev)); 1318} 1319 1320 1321/* 1322 * Convert an ACT_OPEN_RPL status to an errno. 1323 */ 1324static int 1325act_open_rpl_status_to_errno(int status) 1326{ 1327 switch (status) { 1328 case CPL_ERR_CONN_RESET: 1329 return (ECONNREFUSED); 1330 case CPL_ERR_ARP_MISS: 1331 return (EHOSTUNREACH); 1332 case CPL_ERR_CONN_TIMEDOUT: 1333 return (ETIMEDOUT); 1334 case CPL_ERR_TCAM_FULL: 1335 return (ENOMEM); 1336 case CPL_ERR_CONN_EXIST: 1337 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); 1338 return (EADDRINUSE); 1339 default: 1340 return (EIO); 1341 } 1342} 1343 1344static void 1345fail_act_open(struct toepcb *toep, int errno) 1346{ 1347 struct tcpcb *tp = toep->tp_tp; 1348 1349 t3_release_offload_resources(toep); 1350 if (tp) { 1351 inp_wlock_assert(tp->t_inpcb); 1352 tcp_drop(tp, errno); 1353 } 1354 1355#ifdef notyet 1356 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1357#endif 1358} 1359 1360/* 1361 * Handle active open failures. 1362 */ 1363static void 1364active_open_failed(struct toepcb *toep, struct mbuf *m) 1365{ 1366 struct cpl_act_open_rpl *rpl = cplhdr(m); 1367 struct inpcb *inp; 1368 1369 if (toep->tp_tp == NULL) 1370 goto done; 1371 1372 inp = toep->tp_tp->t_inpcb; 1373 inp_wlock(inp); 1374 1375/* 1376 * Don't handle connection retry for now 1377 */ 1378#ifdef notyet 1379 struct inet_connection_sock *icsk = inet_csk(sk); 1380 1381 if (rpl->status == CPL_ERR_CONN_EXIST && 1382 icsk->icsk_retransmit_timer.function != act_open_retry_timer) { 1383 icsk->icsk_retransmit_timer.function = act_open_retry_timer; 1384 sk_reset_timer(so, &icsk->icsk_retransmit_timer, 1385 jiffies + HZ / 2); 1386 } else 1387#endif 1388 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status)); 1389 inp_wunlock(inp); 1390done: 1391 m_free(m); 1392} 1393 1394/* 1395 * Return whether a failed active open has allocated a TID 1396 */ 1397static inline int 1398act_open_has_tid(int status) 1399{ 1400 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && 1401 status != CPL_ERR_ARP_MISS; 1402} 1403 1404/* 1405 * Process an ACT_OPEN_RPL CPL message. 1406 */ 1407static int 1408do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1409{ 1410 struct toepcb *toep = (struct toepcb *)ctx; 1411 struct cpl_act_open_rpl *rpl = cplhdr(m); 1412 1413 if (cdev->type != T3A && act_open_has_tid(rpl->status)) 1414 cxgb_queue_tid_release(cdev, GET_TID(rpl)); 1415 1416 active_open_failed(toep, m); 1417 return (0); 1418} 1419 1420/* 1421 * Handle an ARP failure for an active open. XXX purge ofo queue 1422 * 1423 * XXX badly broken for crossed SYNs as the ATID is no longer valid. 1424 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should 1425 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't 1426 * free the atid. Hmm. 1427 */ 1428#ifdef notyet 1429static void 1430act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) 1431{ 1432 struct toepcb *toep = m_get_toep(m); 1433 struct tcpcb *tp = toep->tp_tp; 1434 struct inpcb *inp = tp->t_inpcb; 1435 struct socket *so = toeptoso(toep); 1436 1437 inp_wlock(inp); 1438 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 1439 fail_act_open(so, EHOSTUNREACH); 1440 printf("freeing %p\n", m); 1441 1442 m_free(m); 1443 } 1444 inp_wunlock(inp); 1445} 1446#endif 1447/* 1448 * Send an active open request. 1449 */ 1450int 1451t3_connect(struct toedev *tdev, struct socket *so, 1452 struct rtentry *rt, struct sockaddr *nam) 1453{ 1454 struct mbuf *m; 1455 struct l2t_entry *e; 1456 struct tom_data *d = TOM_DATA(tdev); 1457 struct inpcb *inp = sotoinpcb(so); 1458 struct tcpcb *tp = intotcpcb(inp); 1459 struct toepcb *toep; /* allocated by init_offload_socket */ 1460 1461 int atid; 1462 1463 toep = toepcb_alloc(); 1464 if (toep == NULL) 1465 goto out_err; 1466 1467 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0) 1468 goto out_err; 1469 1470 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam); 1471 if (!e) 1472 goto free_tid; 1473 1474 inp_wlock_assert(inp); 1475 m = m_gethdr(MT_DATA, M_WAITOK); 1476 1477#if 0 1478 m->m_toe.mt_toepcb = tp->t_toe; 1479 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure); 1480#endif 1481 SOCK_LOCK(so); 1482 1483 init_offload_socket(so, tdev, atid, e, rt, toep); 1484 1485 install_offload_ops(so); 1486 1487 mk_act_open_req(so, m, atid, e); 1488 SOCK_UNLOCK(so); 1489 1490 soisconnecting(so); 1491 toep = tp->t_toe; 1492 m_set_toep(m, tp->t_toe); 1493 1494 toep->tp_state = TCPS_SYN_SENT; 1495 l2t_send(d->cdev, (struct mbuf *)m, e); 1496 1497 if (toep->tp_ulp_mode) 1498 t3_enable_ddp(so, 0); 1499 return (0); 1500 1501free_tid: 1502 printf("failing connect - free atid\n"); 1503 1504 free_atid(d->cdev, atid); 1505out_err: 1506 printf("return ENOMEM\n"); 1507 return (ENOMEM); 1508} 1509 1510/* 1511 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do 1512 * not send multiple ABORT_REQs for the same connection and also that we do 1513 * not try to send a message after the connection has closed. Returns 1 if 1514 * an ABORT_REQ wasn't generated after all, 0 otherwise. 1515 */ 1516static void 1517t3_send_reset(struct toepcb *toep) 1518{ 1519 1520 struct cpl_abort_req *req; 1521 unsigned int tid = toep->tp_tid; 1522 int mode = CPL_ABORT_SEND_RST; 1523 struct tcpcb *tp = toep->tp_tp; 1524 struct toedev *tdev = toep->tp_toedev; 1525 struct socket *so = NULL; 1526 struct mbuf *m; 1527 1528 if (tp) { 1529 inp_wlock_assert(tp->t_inpcb); 1530 so = toeptoso(toep); 1531 } 1532 1533 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) || 1534 tdev == NULL)) 1535 return; 1536 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN); 1537 1538 /* Purge the send queue so we don't send anything after an abort. */ 1539 if (so) 1540 sbflush(&so->so_snd); 1541 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev)) 1542 mode |= CPL_ABORT_POST_CLOSE_REQ; 1543 1544 m = m_gethdr_nofail(sizeof(*req)); 1545 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep)); 1546 set_arp_failure_handler(m, abort_arp_failure); 1547 1548 req = mtod(m, struct cpl_abort_req *); 1549 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); 1550 req->wr.wr_lo = htonl(V_WR_TID(tid)); 1551 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); 1552 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0; 1553 req->rsvd1 = !(toep->tp_flags & TP_DATASENT); 1554 req->cmd = mode; 1555 if (tp && (tp->t_state == TCPS_SYN_SENT)) 1556 mbufq_tail(&toep->out_of_order_queue, m); // defer 1557 else 1558 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); 1559} 1560 1561static int 1562t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) 1563{ 1564 struct inpcb *inp; 1565 int error, optval; 1566 1567 if (sopt->sopt_name == IP_OPTIONS) 1568 return (ENOPROTOOPT); 1569 1570 if (sopt->sopt_name != IP_TOS) 1571 return (EOPNOTSUPP); 1572 1573 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 1574 1575 if (error) 1576 return (error); 1577 1578 if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread)) 1579 return (EPERM); 1580 1581 inp = sotoinpcb(so); 1582 inp->inp_ip_tos = optval; 1583 1584 t3_set_tos(so); 1585 1586 return (0); 1587} 1588 1589static int 1590t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1591{ 1592 int err = 0; 1593 size_t copied; 1594 1595 if (sopt->sopt_name != TCP_CONGESTION && 1596 sopt->sopt_name != TCP_NODELAY) 1597 return (EOPNOTSUPP); 1598 1599 if (sopt->sopt_name == TCP_CONGESTION) { 1600 char name[TCP_CA_NAME_MAX]; 1601 int optlen = sopt->sopt_valsize; 1602 struct tcpcb *tp; 1603 1604 if (optlen < 1) 1605 return (EINVAL); 1606 1607 err = copyinstr(sopt->sopt_val, name, 1608 min(TCP_CA_NAME_MAX - 1, optlen), &copied); 1609 if (err) 1610 return (err); 1611 if (copied < 1) 1612 return (EINVAL); 1613 1614 tp = sototcpcb(so); 1615 /* 1616 * XXX I need to revisit this 1617 */ 1618 if ((err = t3_set_cong_control(so, name)) == 0) { 1619#ifdef CONGESTION_CONTROL_SUPPORTED 1620 tp->t_cong_control = strdup(name, M_CXGB); 1621#endif 1622 } else 1623 return (err); 1624 } else { 1625 int optval, oldval; 1626 struct inpcb *inp; 1627 struct tcpcb *tp; 1628 1629 err = sooptcopyin(sopt, &optval, sizeof optval, 1630 sizeof optval); 1631 1632 if (err) 1633 return (err); 1634 1635 inp = sotoinpcb(so); 1636 tp = intotcpcb(inp); 1637 1638 inp_wlock(inp); 1639 1640 oldval = tp->t_flags; 1641 if (optval) 1642 tp->t_flags |= TF_NODELAY; 1643 else 1644 tp->t_flags &= ~TF_NODELAY; 1645 inp_wunlock(inp); 1646 1647 if (oldval != tp->t_flags) 1648 t3_set_nagle(so); 1649 1650 } 1651 1652 return (0); 1653} 1654 1655static int 1656t3_ctloutput(struct socket *so, struct sockopt *sopt) 1657{ 1658 int err; 1659 1660 if (sopt->sopt_level != IPPROTO_TCP) 1661 err = t3_ip_ctloutput(so, sopt); 1662 else 1663 err = t3_tcp_ctloutput(so, sopt); 1664 1665 if (err != EOPNOTSUPP) 1666 return (err); 1667 1668 return (tcp_ctloutput(so, sopt)); 1669} 1670 1671/* 1672 * Returns true if we need to explicitly request RST when we receive new data 1673 * on an RX-closed connection. 1674 */ 1675static inline int 1676need_rst_on_excess_rx(const struct toepcb *toep) 1677{ 1678 return (1); 1679} 1680 1681/* 1682 * Handles Rx data that arrives in a state where the socket isn't accepting 1683 * new data. 1684 */ 1685static void 1686handle_excess_rx(struct toepcb *toep, struct mbuf *m) 1687{ 1688 1689 if (need_rst_on_excess_rx(toep) && !(toep->tp_flags & TP_ABORT_SHUTDOWN)) 1690 t3_send_reset(toep); 1691 m_freem(m); 1692} 1693 1694/* 1695 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE) 1696 * by getting the DDP offset from the TCB. 1697 */ 1698static void 1699tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) 1700{ 1701 struct ddp_state *q = &toep->tp_ddp_state; 1702 struct ddp_buf_state *bsp; 1703 struct cpl_get_tcb_rpl *hdr; 1704 unsigned int ddp_offset; 1705 struct socket *so; 1706 struct tcpcb *tp; 1707 1708 uint64_t t; 1709 __be64 *tcb; 1710 1711 so = toeptoso(toep); 1712 tp = toep->tp_tp; 1713 1714 inp_wlock_assert(tp->t_inpcb); 1715 SOCKBUF_LOCK(&so->so_rcv); 1716 1717 /* Note that we only accout for CPL_GET_TCB issued by the DDP code. We 1718 * really need a cookie in order to dispatch the RPLs. 1719 */ 1720 q->get_tcb_count--; 1721 1722 /* It is a possible that a previous CPL already invalidated UBUF DDP 1723 * and moved the cur_buf idx and hence no further processing of this 1724 * skb is required. However, the app might be sleeping on 1725 * !q->get_tcb_count and we need to wake it up. 1726 */ 1727 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) { 1728 struct socket *so = toeptoso(toep); 1729 1730 m_freem(m); 1731 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 1732 sorwakeup_locked(so); 1733 else 1734 SOCKBUF_UNLOCK(&so->so_rcv); 1735 return; 1736 } 1737 1738 bsp = &q->buf_state[q->cur_buf]; 1739 hdr = cplhdr(m); 1740 tcb = (__be64 *)(hdr + 1); 1741 if (q->cur_buf == 0) { 1742 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]); 1743 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET); 1744 } else { 1745 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]); 1746 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET; 1747 } 1748 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET; 1749 m->m_cur_offset = bsp->cur_offset; 1750 bsp->cur_offset = ddp_offset; 1751 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset; 1752 1753 CTR5(KTR_TOM, 1754 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u", 1755 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset); 1756 KASSERT(ddp_offset >= m->m_cur_offset, ("ddp_offset=%u less than cur_offset=%u", 1757 ddp_offset, m->m_cur_offset)); 1758 1759#ifdef T3_TRACE 1760 T3_TRACE3(TIDTB(so), 1761 "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u ddp_offset %u", 1762 tp->rcv_nxt, q->cur_buf, ddp_offset); 1763#endif 1764 1765#if 0 1766{ 1767 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx; 1768 1769 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]); 1770 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS; 1771 1772 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]); 1773 rcv_nxt = t >> S_TCB_RCV_NXT; 1774 rcv_nxt &= M_TCB_RCV_NXT; 1775 1776 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]); 1777 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET); 1778 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET; 1779 1780 T3_TRACE2(TIDTB(sk), 1781 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x", 1782 ddp_flags, rcv_nxt - rx_hdr_offset); 1783 T3_TRACE4(TB(q), 1784 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u", 1785 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf); 1786 T3_TRACE3(TB(q), 1787 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u", 1788 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset); 1789 T3_TRACE2(TB(q), 1790 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x", 1791 q->buf_state[0].flags, q->buf_state[1].flags); 1792 1793} 1794#endif 1795 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) { 1796 handle_excess_rx(toep, m); 1797 return; 1798 } 1799 1800#ifdef T3_TRACE 1801 if ((int)m->m_pkthdr.len < 0) { 1802 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len"); 1803 } 1804#endif 1805 if (bsp->flags & DDP_BF_NOCOPY) { 1806#ifdef T3_TRACE 1807 T3_TRACE0(TB(q), 1808 "tcb_rpl_as_ddp_complete: CANCEL UBUF"); 1809 1810 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) { 1811 printk("!cancel_ubuf"); 1812 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf"); 1813 } 1814#endif 1815 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1; 1816 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA); 1817 q->cur_buf ^= 1; 1818 } else if (bsp->flags & DDP_BF_NOFLIP) { 1819 1820 m->m_ddp_flags = 1; /* always a kernel buffer */ 1821 1822 /* now HW buffer carries a user buffer */ 1823 bsp->flags &= ~DDP_BF_NOFLIP; 1824 bsp->flags |= DDP_BF_NOCOPY; 1825 1826 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate 1827 * any new data in which case we're done. If in addition the 1828 * offset is 0, then there wasn't a completion for the kbuf 1829 * and we need to decrement the posted count. 1830 */ 1831 if (m->m_pkthdr.len == 0) { 1832 if (ddp_offset == 0) { 1833 q->kbuf_posted--; 1834 bsp->flags |= DDP_BF_NODATA; 1835 } 1836 SOCKBUF_UNLOCK(&so->so_rcv); 1837 1838 m_free(m); 1839 return; 1840 } 1841 } else { 1842 SOCKBUF_UNLOCK(&so->so_rcv); 1843 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP, 1844 * but it got here way late and nobody cares anymore. 1845 */ 1846 m_free(m); 1847 return; 1848 } 1849 1850 m->m_ddp_gl = (unsigned char *)bsp->gl; 1851 m->m_flags |= M_DDP; 1852 m->m_seq = tp->rcv_nxt; 1853 tp->rcv_nxt += m->m_pkthdr.len; 1854 tp->t_rcvtime = ticks; 1855#ifdef T3_TRACE 1856 T3_TRACE3(TB(q), 1857 "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u lskb->len %u", 1858 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1859#endif 1860 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u", 1861 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1862 if (m->m_pkthdr.len == 0) 1863 q->user_ddp_pending = 0; 1864 else 1865 SBAPPEND(&so->so_rcv, m); 1866 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 1867 sorwakeup_locked(so); 1868 else 1869 SOCKBUF_UNLOCK(&so->so_rcv); 1870} 1871 1872/* 1873 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code, 1874 * in that case they are similar to DDP completions. 1875 */ 1876static int 1877do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1878{ 1879 struct toepcb *toep = (struct toepcb *)ctx; 1880 1881 /* OK if socket doesn't exist */ 1882 if (toep == NULL) { 1883 printf("null toep in do_get_tcb_rpl\n"); 1884 return (CPL_RET_BUF_DONE); 1885 } 1886 1887 inp_wlock(toep->tp_tp->t_inpcb); 1888 tcb_rpl_as_ddp_complete(toep, m); 1889 inp_wunlock(toep->tp_tp->t_inpcb); 1890 1891 return (0); 1892} 1893 1894static void 1895handle_ddp_data(struct toepcb *toep, struct mbuf *m) 1896{ 1897 struct tcpcb *tp = toep->tp_tp; 1898 struct socket *so = toeptoso(toep); 1899 struct ddp_state *q; 1900 struct ddp_buf_state *bsp; 1901 struct cpl_rx_data *hdr = cplhdr(m); 1902 unsigned int rcv_nxt = ntohl(hdr->seq); 1903 1904 if (tp->rcv_nxt == rcv_nxt) 1905 return; 1906 1907 inp_wlock_assert(tp->t_inpcb); 1908 SOCKBUF_LOCK(&so->so_rcv); 1909 q = &toep->tp_ddp_state; 1910 bsp = &q->buf_state[q->cur_buf]; 1911 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x", 1912 rcv_nxt, tp->rcv_nxt)); 1913 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 1914 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 1915 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d", 1916 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len); 1917 1918#ifdef T3_TRACE 1919 if ((int)m->m_pkthdr.len < 0) { 1920 t3_ddp_error(so, "handle_ddp_data: neg len"); 1921 } 1922#endif 1923 1924 m->m_ddp_gl = (unsigned char *)bsp->gl; 1925 m->m_flags |= M_DDP; 1926 m->m_cur_offset = bsp->cur_offset; 1927 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 1928 if (bsp->flags & DDP_BF_NOCOPY) 1929 bsp->flags &= ~DDP_BF_NOCOPY; 1930 1931 m->m_seq = tp->rcv_nxt; 1932 tp->rcv_nxt = rcv_nxt; 1933 bsp->cur_offset += m->m_pkthdr.len; 1934 if (!(bsp->flags & DDP_BF_NOFLIP)) 1935 q->cur_buf ^= 1; 1936 /* 1937 * For now, don't re-enable DDP after a connection fell out of DDP 1938 * mode. 1939 */ 1940 q->ubuf_ddp_ready = 0; 1941 SOCKBUF_UNLOCK(&so->so_rcv); 1942} 1943 1944/* 1945 * Process new data received for a connection. 1946 */ 1947static void 1948new_rx_data(struct toepcb *toep, struct mbuf *m) 1949{ 1950 struct cpl_rx_data *hdr = cplhdr(m); 1951 struct tcpcb *tp = toep->tp_tp; 1952 struct socket *so = toeptoso(toep); 1953 int len = be16toh(hdr->len); 1954 1955 inp_wlock(tp->t_inpcb); 1956 1957 if (__predict_false(so_no_receive(so))) { 1958 handle_excess_rx(toep, m); 1959 inp_wunlock(tp->t_inpcb); 1960 TRACE_EXIT; 1961 return; 1962 } 1963 1964 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) 1965 handle_ddp_data(toep, m); 1966 1967 m->m_seq = ntohl(hdr->seq); 1968 m->m_ulp_mode = 0; /* for iSCSI */ 1969 1970#if VALIDATE_SEQ 1971 if (__predict_false(m->m_seq != tp->rcv_nxt)) { 1972 log(LOG_ERR, 1973 "%s: TID %u: Bad sequence number %u, expected %u\n", 1974 TOE_DEV(toeptoso(toep))->name, toep->tp_tid, m->m_seq, 1975 tp->rcv_nxt); 1976 m_freem(m); 1977 inp_wunlock(tp->t_inpcb); 1978 return; 1979 } 1980#endif 1981 m_adj(m, sizeof(*hdr)); 1982 1983#ifdef URGENT_DATA_SUPPORTED 1984 /* 1985 * We don't handle urgent data yet 1986 */ 1987 if (__predict_false(hdr->urg)) 1988 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg)); 1989 if (__predict_false(tp->urg_data == TCP_URG_NOTYET && 1990 tp->urg_seq - tp->rcv_nxt < skb->len)) 1991 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq - 1992 tp->rcv_nxt]; 1993#endif 1994 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) { 1995 toep->tp_delack_mode = hdr->dack_mode; 1996 toep->tp_delack_seq = tp->rcv_nxt; 1997 } 1998 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d", 1999 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes); 2000 2001 if (len < m->m_pkthdr.len) 2002 m->m_pkthdr.len = m->m_len = len; 2003 2004 tp->rcv_nxt += m->m_pkthdr.len; 2005 tp->t_rcvtime = ticks; 2006 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2007#ifdef T3_TRACE 2008 T3_TRACE2(TIDTB(sk), 2009 "new_rx_data: seq 0x%x len %u", 2010 m->m_seq, m->m_pkthdr.len); 2011#endif 2012 inp_wunlock(tp->t_inpcb); 2013 SOCKBUF_LOCK(&so->so_rcv); 2014 if (sb_notify(&so->so_rcv)) 2015 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len); 2016 2017 SBAPPEND(&so->so_rcv, m); 2018 2019#ifdef notyet 2020 /* 2021 * We're giving too many credits to the card - but disable this check so we can keep on moving :-| 2022 * 2023 */ 2024 KASSERT(so->so_rcv.sb_cc < (so->so_rcv.sb_mbmax << 1), 2025 2026 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d", 2027 so, so->so_rcv.sb_cc, so->so_rcv.sb_mbmax)); 2028#endif 2029 2030 2031 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d", 2032 so->so_rcv.sb_cc, so->so_rcv.sb_mbcnt); 2033 2034 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2035 sorwakeup_locked(so); 2036 else 2037 SOCKBUF_UNLOCK(&so->so_rcv); 2038} 2039 2040/* 2041 * Handler for RX_DATA CPL messages. 2042 */ 2043static int 2044do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2045{ 2046 struct toepcb *toep = (struct toepcb *)ctx; 2047 2048 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len); 2049 2050 new_rx_data(toep, m); 2051 2052 return (0); 2053} 2054 2055static void 2056new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) 2057{ 2058 struct tcpcb *tp; 2059 struct ddp_state *q; 2060 struct ddp_buf_state *bsp; 2061 struct cpl_rx_data_ddp *hdr; 2062 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx; 2063 struct socket *so = toeptoso(toep); 2064 int nomoredata = 0; 2065 unsigned int delack_mode; 2066 2067 tp = sototcpcb(so); 2068 2069 inp_wlock(tp->t_inpcb); 2070 if (__predict_false(so_no_receive(so))) { 2071 2072 handle_excess_rx(toep, m); 2073 inp_wunlock(tp->t_inpcb); 2074 return; 2075 } 2076 2077 q = &toep->tp_ddp_state; 2078 hdr = cplhdr(m); 2079 ddp_report = ntohl(hdr->u.ddp_report); 2080 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2081 bsp = &q->buf_state[buf_idx]; 2082 2083#ifdef T3_TRACE 2084 T3_TRACE5(TIDTB(sk), 2085 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2086 "hdr seq 0x%x len %u offset %u", 2087 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2088 ntohs(hdr->len), G_DDP_OFFSET(ddp_report)); 2089 T3_TRACE1(TIDTB(sk), 2090 "new_rx_data_ddp: ddp_report 0x%x", 2091 ddp_report); 2092#endif 2093 CTR4(KTR_TOM, 2094 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2095 "hdr seq 0x%x len %u", 2096 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2097 ntohs(hdr->len)); 2098 CTR3(KTR_TOM, 2099 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d", 2100 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx); 2101 2102 ddp_len = ntohs(hdr->len); 2103 rcv_nxt = ntohl(hdr->seq) + ddp_len; 2104 2105 delack_mode = G_DDP_DACK_MODE(ddp_report); 2106 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2107 toep->tp_delack_mode = delack_mode; 2108 toep->tp_delack_seq = tp->rcv_nxt; 2109 } 2110 2111 m->m_seq = tp->rcv_nxt; 2112 tp->rcv_nxt = rcv_nxt; 2113 2114 tp->t_rcvtime = ticks; 2115 /* 2116 * Store the length in m->m_len. We are changing the meaning of 2117 * m->m_len here, we need to be very careful that nothing from now on 2118 * interprets ->len of this packet the usual way. 2119 */ 2120 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq; 2121 inp_wunlock(tp->t_inpcb); 2122 CTR3(KTR_TOM, 2123 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ", 2124 m->m_len, rcv_nxt, m->m_seq); 2125 /* 2126 * Figure out where the new data was placed in the buffer and store it 2127 * in when. Assumes the buffer offset starts at 0, consumer needs to 2128 * account for page pod's pg_offset. 2129 */ 2130 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; 2131 m->m_cur_offset = end_offset - m->m_pkthdr.len; 2132 2133 SOCKBUF_LOCK(&so->so_rcv); 2134 m->m_ddp_gl = (unsigned char *)bsp->gl; 2135 m->m_flags |= M_DDP; 2136 bsp->cur_offset = end_offset; 2137 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2138 2139 /* 2140 * Length is only meaningful for kbuf 2141 */ 2142 if (!(bsp->flags & DDP_BF_NOCOPY)) 2143 KASSERT(m->m_len <= bsp->gl->dgl_length, 2144 ("length received exceeds ddp pages: len=%d dgl_length=%d", 2145 m->m_len, bsp->gl->dgl_length)); 2146 2147 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2148 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next)); 2149 2150 2151 /* 2152 * Bit 0 of flags stores whether the DDP buffer is completed. 2153 * Note that other parts of the code depend on this being in bit 0. 2154 */ 2155 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) { 2156 panic("spurious ddp completion"); 2157 } else { 2158 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); 2159 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) 2160 q->cur_buf ^= 1; /* flip buffers */ 2161 } 2162 2163 if (bsp->flags & DDP_BF_NOCOPY) { 2164 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY); 2165 bsp->flags &= ~DDP_BF_NOCOPY; 2166 } 2167 2168 if (ddp_report & F_DDP_PSH) 2169 m->m_ddp_flags |= DDP_BF_PSH; 2170 if (nomoredata) 2171 m->m_ddp_flags |= DDP_BF_NODATA; 2172 2173#ifdef notyet 2174 skb_reset_transport_header(skb); 2175 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */ 2176#endif 2177 SBAPPEND(&so->so_rcv, m); 2178 2179 if ((so->so_state & SS_NOFDREF) == 0) 2180 sorwakeup_locked(so); 2181 else 2182 SOCKBUF_UNLOCK(&so->so_rcv); 2183} 2184 2185#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ 2186 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ 2187 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ 2188 F_DDP_INVALID_PPOD) 2189 2190/* 2191 * Handler for RX_DATA_DDP CPL messages. 2192 */ 2193static int 2194do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2195{ 2196 struct toepcb *toep = ctx; 2197 const struct cpl_rx_data_ddp *hdr = cplhdr(m); 2198 2199 VALIDATE_SOCK(so); 2200 2201 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) { 2202 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n", 2203 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status))); 2204 return (CPL_RET_BUF_DONE); 2205 } 2206#if 0 2207 skb->h.th = tcphdr_skb->h.th; 2208#endif 2209 new_rx_data_ddp(toep, m); 2210 return (0); 2211} 2212 2213static void 2214process_ddp_complete(struct toepcb *toep, struct mbuf *m) 2215{ 2216 struct tcpcb *tp = toep->tp_tp; 2217 struct socket *so = toeptoso(toep); 2218 struct ddp_state *q; 2219 struct ddp_buf_state *bsp; 2220 struct cpl_rx_ddp_complete *hdr; 2221 unsigned int ddp_report, buf_idx, when, delack_mode; 2222 int nomoredata = 0; 2223 2224 inp_wlock(tp->t_inpcb); 2225 if (__predict_false(so_no_receive(so))) { 2226 struct inpcb *inp = sotoinpcb(so); 2227 2228 handle_excess_rx(toep, m); 2229 inp_wunlock(inp); 2230 return; 2231 } 2232 q = &toep->tp_ddp_state; 2233 hdr = cplhdr(m); 2234 ddp_report = ntohl(hdr->ddp_report); 2235 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2236 m->m_pkthdr.csum_data = tp->rcv_nxt; 2237 2238 2239 SOCKBUF_LOCK(&so->so_rcv); 2240 bsp = &q->buf_state[buf_idx]; 2241 when = bsp->cur_offset; 2242 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; 2243 tp->rcv_nxt += m->m_len; 2244 tp->t_rcvtime = ticks; 2245 2246 delack_mode = G_DDP_DACK_MODE(ddp_report); 2247 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2248 toep->tp_delack_mode = delack_mode; 2249 toep->tp_delack_seq = tp->rcv_nxt; 2250 } 2251#ifdef notyet 2252 skb_reset_transport_header(skb); 2253 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2254#endif 2255 inp_wunlock(tp->t_inpcb); 2256 2257 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2258 CTR5(KTR_TOM, 2259 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2260 "ddp_report 0x%x offset %u, len %u", 2261 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2262 G_DDP_OFFSET(ddp_report), m->m_len); 2263 2264 bsp->cur_offset += m->m_len; 2265 2266 if (!(bsp->flags & DDP_BF_NOFLIP)) { 2267 q->cur_buf ^= 1; /* flip buffers */ 2268 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length) 2269 nomoredata=1; 2270 } 2271 2272 CTR4(KTR_TOM, 2273 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2274 "ddp_report %u offset %u", 2275 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2276 G_DDP_OFFSET(ddp_report)); 2277 2278 m->m_ddp_gl = (unsigned char *)bsp->gl; 2279 m->m_flags |= M_DDP; 2280 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; 2281 if (bsp->flags & DDP_BF_NOCOPY) 2282 bsp->flags &= ~DDP_BF_NOCOPY; 2283 if (nomoredata) 2284 m->m_ddp_flags |= DDP_BF_NODATA; 2285 2286 2287 SBAPPEND(&so->so_rcv, m); 2288 2289 if ((so->so_state & SS_NOFDREF) == 0) 2290 sorwakeup_locked(so); 2291 else 2292 SOCKBUF_UNLOCK(&so->so_rcv); 2293} 2294 2295/* 2296 * Handler for RX_DDP_COMPLETE CPL messages. 2297 */ 2298static int 2299do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2300{ 2301 struct toepcb *toep = ctx; 2302 2303 VALIDATE_SOCK(so); 2304#if 0 2305 skb->h.th = tcphdr_skb->h.th; 2306#endif 2307 process_ddp_complete(toep, m); 2308 return (0); 2309} 2310 2311/* 2312 * Move a socket to TIME_WAIT state. We need to make some adjustments to the 2313 * socket state before calling tcp_time_wait to comply with its expectations. 2314 */ 2315static void 2316enter_timewait(struct socket *so) 2317{ 2318 struct tcpcb *tp = sototcpcb(so); 2319 2320 inp_wlock_assert(tp->t_inpcb); 2321 /* 2322 * Bump rcv_nxt for the peer FIN. We don't do this at the time we 2323 * process peer_close because we don't want to carry the peer FIN in 2324 * the socket's receive queue and if we increment rcv_nxt without 2325 * having the FIN in the receive queue we'll confuse facilities such 2326 * as SIOCINQ. 2327 */ 2328 tp->rcv_nxt++; 2329 2330 tp->ts_recent_age = 0; /* defeat recycling */ 2331 tp->t_srtt = 0; /* defeat tcp_update_metrics */ 2332 tcp_twstart(tp); 2333} 2334 2335/* 2336 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This 2337 * function deals with the data that may be reported along with the FIN. 2338 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to 2339 * perform normal FIN-related processing. In the latter case 1 indicates that 2340 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the 2341 * skb can be freed. 2342 */ 2343static int 2344handle_peer_close_data(struct socket *so, struct mbuf *m) 2345{ 2346 struct tcpcb *tp = sototcpcb(so); 2347 struct toepcb *toep = tp->t_toe; 2348 struct ddp_state *q; 2349 struct ddp_buf_state *bsp; 2350 struct cpl_peer_close *req = cplhdr(m); 2351 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */ 2352 2353 if (tp->rcv_nxt == rcv_nxt) /* no data */ 2354 return (0); 2355 2356 if (__predict_false(so_no_receive(so))) { 2357 handle_excess_rx(toep, m); 2358 2359 /* 2360 * Although we discard the data we want to process the FIN so 2361 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP + 2362 * PEER_CLOSE without data. In particular this PEER_CLOSE 2363 * may be what will close the connection. We return 1 because 2364 * handle_excess_rx() already freed the packet. 2365 */ 2366 return (1); 2367 } 2368 2369 inp_wlock_assert(tp->t_inpcb); 2370 q = &toep->tp_ddp_state; 2371 SOCKBUF_LOCK(&so->so_rcv); 2372 bsp = &q->buf_state[q->cur_buf]; 2373 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2374 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2375 m->m_ddp_gl = (unsigned char *)bsp->gl; 2376 m->m_flags |= M_DDP; 2377 m->m_cur_offset = bsp->cur_offset; 2378 m->m_ddp_flags = 2379 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2380 m->m_seq = tp->rcv_nxt; 2381 tp->rcv_nxt = rcv_nxt; 2382 bsp->cur_offset += m->m_pkthdr.len; 2383 if (!(bsp->flags & DDP_BF_NOFLIP)) 2384 q->cur_buf ^= 1; 2385#ifdef notyet 2386 skb_reset_transport_header(skb); 2387 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2388#endif 2389 tp->t_rcvtime = ticks; 2390 SBAPPEND(&so->so_rcv, m); 2391 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2392 sorwakeup_locked(so); 2393 else 2394 SOCKBUF_UNLOCK(&so->so_rcv); 2395 return (1); 2396} 2397 2398/* 2399 * Handle a peer FIN. 2400 */ 2401static void 2402do_peer_fin(struct socket *so, struct mbuf *m) 2403{ 2404 struct tcpcb *tp = sototcpcb(so); 2405 struct toepcb *toep = tp->t_toe; 2406 int keep = 0; 2407 DPRINTF("do_peer_fin state=%d\n", tp->t_state); 2408 2409#ifdef T3_TRACE 2410 T3_TRACE0(TIDTB(sk),"do_peer_fin:"); 2411#endif 2412 2413 if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2414 printf("abort_pending set\n"); 2415 2416 goto out; 2417 } 2418 INP_INFO_WLOCK(&tcbinfo); 2419 inp_wlock(tp->t_inpcb); 2420 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) { 2421 keep = handle_peer_close_data(so, m); 2422 if (keep < 0) { 2423 INP_INFO_WUNLOCK(&tcbinfo); 2424 inp_wunlock(tp->t_inpcb); 2425 return; 2426 } 2427 } 2428 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 2429 socantrcvmore(so); 2430 /* 2431 * If connection is half-synchronized 2432 * (ie NEEDSYN flag on) then delay ACK, 2433 * so it may be piggybacked when SYN is sent. 2434 * Otherwise, since we received a FIN then no 2435 * more input can be expected, send ACK now. 2436 */ 2437 if (tp->t_flags & TF_NEEDSYN) 2438 tp->t_flags |= TF_DELACK; 2439 else 2440 tp->t_flags |= TF_ACKNOW; 2441 tp->rcv_nxt++; 2442 } 2443 2444 switch (tp->t_state) { 2445 case TCPS_SYN_RECEIVED: 2446 tp->t_starttime = ticks; 2447 /* FALLTHROUGH */ 2448 case TCPS_ESTABLISHED: 2449 tp->t_state = TCPS_CLOSE_WAIT; 2450 break; 2451 case TCPS_FIN_WAIT_1: 2452 tp->t_state = TCPS_CLOSING; 2453 break; 2454 case TCPS_FIN_WAIT_2: 2455 /* 2456 * If we've sent an abort_req we must have sent it too late, 2457 * HW will send us a reply telling us so, and this peer_close 2458 * is really the last message for this connection and needs to 2459 * be treated as an abort_rpl, i.e., transition the connection 2460 * to TCP_CLOSE (note that the host stack does this at the 2461 * time of generating the RST but we must wait for HW). 2462 * Otherwise we enter TIME_WAIT. 2463 */ 2464 t3_release_offload_resources(toep); 2465 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2466 tp = tcp_close(tp); 2467 } else { 2468 enter_timewait(so); 2469 tp = NULL; 2470 } 2471 break; 2472 default: 2473 log(LOG_ERR, 2474 "%s: TID %u received PEER_CLOSE in bad state %d\n", 2475 TOE_DEV(so)->tod_name, toep->tp_tid, tp->t_state); 2476 } 2477 INP_INFO_WUNLOCK(&tcbinfo); 2478 if (tp) 2479 inp_wunlock(tp->t_inpcb); 2480 2481 DPRINTF("waking up waiters on %p rcv_notify=%d flags=0x%x\n", so, sb_notify(&so->so_rcv), so->so_rcv.sb_flags); 2482 2483#ifdef notyet 2484 /* Do not send POLL_HUP for half duplex close. */ 2485 if ((sk->sk_shutdown & SEND_SHUTDOWN) || 2486 sk->sk_state == TCP_CLOSE) 2487 sk_wake_async(so, 1, POLL_HUP); 2488 else 2489 sk_wake_async(so, 1, POLL_IN); 2490#endif 2491 2492out: 2493 if (!keep) 2494 m_free(m); 2495} 2496 2497/* 2498 * Handler for PEER_CLOSE CPL messages. 2499 */ 2500static int 2501do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2502{ 2503 struct toepcb *toep = (struct toepcb *)ctx; 2504 struct socket *so = toeptoso(toep); 2505 2506 VALIDATE_SOCK(so); 2507 2508 do_peer_fin(so, m); 2509 return (0); 2510} 2511 2512static void 2513process_close_con_rpl(struct socket *so, struct mbuf *m) 2514{ 2515 struct tcpcb *tp = sototcpcb(so); 2516 struct cpl_close_con_rpl *rpl = cplhdr(m); 2517 struct toepcb *toep = tp->t_toe; 2518 2519 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ 2520 2521 DPRINTF("process_close_con_rpl(%p) state=%d dead=%d\n", so, tp->t_state, 2522 !!(so->so_state & SS_NOFDREF)); 2523 if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) 2524 goto out; 2525 2526 INP_INFO_WLOCK(&tcbinfo); 2527 inp_wlock(tp->t_inpcb); 2528 switch (tp->t_state) { 2529 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */ 2530 t3_release_offload_resources(toep); 2531 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2532 tp = tcp_close(tp); 2533 2534 } else { 2535 enter_timewait(so); 2536 tp = NULL; 2537 soisdisconnected(so); 2538 } 2539 break; 2540 case TCPS_LAST_ACK: 2541 /* 2542 * In this state we don't care about pending abort_rpl. 2543 * If we've sent abort_req it was post-close and was sent too 2544 * late, this close_con_rpl is the actual last message. 2545 */ 2546 t3_release_offload_resources(toep); 2547 tp = tcp_close(tp); 2548 break; 2549 case TCPS_FIN_WAIT_1: 2550 /* 2551 * If we can't receive any more 2552 * data, then closing user can proceed. 2553 * Starting the timer is contrary to the 2554 * specification, but if we don't get a FIN 2555 * we'll hang forever. 2556 * 2557 * XXXjl: 2558 * we should release the tp also, and use a 2559 * compressed state. 2560 */ 2561 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { 2562 int timeout; 2563 2564 soisdisconnected(so); 2565 timeout = (tcp_fast_finwait2_recycle) ? 2566 tcp_finwait2_timeout : tcp_maxidle; 2567 tcp_timer_activate(tp, TT_2MSL, timeout); 2568 } 2569 tp->t_state = TCPS_FIN_WAIT_2; 2570 if ((so->so_options & SO_LINGER) && so->so_linger == 0 && 2571 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) { 2572 tp = tcp_drop(tp, 0); 2573 } 2574 2575 break; 2576 default: 2577 log(LOG_ERR, 2578 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", 2579 TOE_DEV(so)->tod_name, toep->tp_tid, 2580 tp->t_state); 2581 } 2582 INP_INFO_WUNLOCK(&tcbinfo); 2583 if (tp) 2584 inp_wunlock(tp->t_inpcb); 2585out: 2586 m_freem(m); 2587} 2588 2589/* 2590 * Handler for CLOSE_CON_RPL CPL messages. 2591 */ 2592static int 2593do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, 2594 void *ctx) 2595{ 2596 struct toepcb *toep = (struct toepcb *)ctx; 2597 struct socket *so = toeptoso(toep); 2598 2599 VALIDATE_SOCK(so); 2600 2601 process_close_con_rpl(so, m); 2602 return (0); 2603} 2604 2605/* 2606 * Process abort replies. We only process these messages if we anticipate 2607 * them as the coordination between SW and HW in this area is somewhat lacking 2608 * and sometimes we get ABORT_RPLs after we are done with the connection that 2609 * originated the ABORT_REQ. 2610 */ 2611static void 2612process_abort_rpl(struct socket *so, struct mbuf *m) 2613{ 2614 struct tcpcb *tp = sototcpcb(so); 2615 struct toepcb *toep = tp->t_toe; 2616 2617#ifdef T3_TRACE 2618 T3_TRACE1(TIDTB(sk), 2619 "process_abort_rpl: GTS rpl pending %d", 2620 sock_flag(sk, ABORT_RPL_PENDING)); 2621#endif 2622 2623 INP_INFO_WLOCK(&tcbinfo); 2624 inp_wlock(tp->t_inpcb); 2625 2626 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2627 /* 2628 * XXX panic on tcpdrop 2629 */ 2630 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(TOE_DEV(so))) 2631 toep->tp_flags |= TP_ABORT_RPL_RCVD; 2632 else { 2633 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING); 2634 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) || 2635 !is_t3a(TOE_DEV(so))) { 2636 if (toep->tp_flags & TP_ABORT_REQ_RCVD) 2637 panic("TP_ABORT_REQ_RCVD set"); 2638 t3_release_offload_resources(toep); 2639 tp = tcp_close(tp); 2640 } 2641 } 2642 } 2643 if (tp) 2644 inp_wunlock(tp->t_inpcb); 2645 INP_INFO_WUNLOCK(&tcbinfo); 2646 2647 m_free(m); 2648} 2649 2650/* 2651 * Handle an ABORT_RPL_RSS CPL message. 2652 */ 2653static int 2654do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2655{ 2656 struct socket *so; 2657 struct cpl_abort_rpl_rss *rpl = cplhdr(m); 2658 struct toepcb *toep; 2659 2660 /* 2661 * Ignore replies to post-close aborts indicating that the abort was 2662 * requested too late. These connections are terminated when we get 2663 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss 2664 * arrives the TID is either no longer used or it has been recycled. 2665 */ 2666 if (rpl->status == CPL_ERR_ABORT_FAILED) { 2667discard: 2668 m_free(m); 2669 return (0); 2670 } 2671 2672 toep = (struct toepcb *)ctx; 2673 2674 /* 2675 * Sometimes we've already closed the socket, e.g., a post-close 2676 * abort races with ABORT_REQ_RSS, the latter frees the socket 2677 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, 2678 * but FW turns the ABORT_REQ into a regular one and so we get 2679 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A. 2680 */ 2681 if (!toep) 2682 goto discard; 2683 2684 if (toep->tp_tp == NULL) { 2685 printf("removing tid for abort\n"); 2686 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2687 if (toep->tp_l2t) 2688 l2t_release(L2DATA(cdev), toep->tp_l2t); 2689 2690 toepcb_release(toep); 2691 goto discard; 2692 } 2693 2694 printf("toep=%p\n", toep); 2695 printf("tp=%p\n", toep->tp_tp); 2696 2697 so = toeptoso(toep); /* <- XXX panic */ 2698 toepcb_hold(toep); 2699 process_abort_rpl(so, m); 2700 toepcb_release(toep); 2701 return (0); 2702} 2703 2704/* 2705 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also 2706 * indicate whether RST should be sent in response. 2707 */ 2708static int 2709abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst) 2710{ 2711 struct tcpcb *tp = sototcpcb(so); 2712 2713 switch (abort_reason) { 2714 case CPL_ERR_BAD_SYN: 2715#if 0 2716 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through 2717#endif 2718 case CPL_ERR_CONN_RESET: 2719 // XXX need to handle SYN_RECV due to crossed SYNs 2720 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 2721 case CPL_ERR_XMIT_TIMEDOUT: 2722 case CPL_ERR_PERSIST_TIMEDOUT: 2723 case CPL_ERR_FINWAIT2_TIMEDOUT: 2724 case CPL_ERR_KEEPALIVE_TIMEDOUT: 2725#if 0 2726 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); 2727#endif 2728 return (ETIMEDOUT); 2729 default: 2730 return (EIO); 2731 } 2732} 2733 2734static inline void 2735set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd) 2736{ 2737 struct cpl_abort_rpl *rpl = cplhdr(m); 2738 2739 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); 2740 rpl->wr.wr_lo = htonl(V_WR_TID(tid)); 2741 m->m_len = m->m_pkthdr.len = sizeof(*rpl); 2742 2743 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); 2744 rpl->cmd = cmd; 2745} 2746 2747static void 2748send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m) 2749{ 2750 struct mbuf *reply_mbuf; 2751 struct cpl_abort_req_rss *req = cplhdr(m); 2752 2753 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl)); 2754 m_set_priority(m, CPL_PRIORITY_DATA); 2755 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl); 2756 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status); 2757 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2758 m_free(m); 2759} 2760 2761/* 2762 * Returns whether an ABORT_REQ_RSS message is a negative advice. 2763 */ 2764static inline int 2765is_neg_adv_abort(unsigned int status) 2766{ 2767 return status == CPL_ERR_RTX_NEG_ADVICE || 2768 status == CPL_ERR_PERSIST_NEG_ADVICE; 2769} 2770 2771static void 2772send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status) 2773{ 2774 struct mbuf *reply_mbuf; 2775 struct cpl_abort_req_rss *req = cplhdr(m); 2776 2777 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 2778 2779 if (!reply_mbuf) { 2780 /* Defer the reply. Stick rst_status into req->cmd. */ 2781 req->status = rst_status; 2782 t3_defer_reply(m, tdev, send_deferred_abort_rpl); 2783 return; 2784 } 2785 2786 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA); 2787 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status); 2788 m_free(m); 2789 2790 /* 2791 * XXX need to sync with ARP as for SYN_RECV connections we can send 2792 * these messages while ARP is pending. For other connection states 2793 * it's not a problem. 2794 */ 2795 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2796} 2797 2798#ifdef notyet 2799static void 2800cleanup_syn_rcv_conn(struct socket *child, struct socket *parent) 2801{ 2802 CXGB_UNIMPLEMENTED(); 2803#ifdef notyet 2804 struct request_sock *req = child->sk_user_data; 2805 2806 inet_csk_reqsk_queue_removed(parent, req); 2807 synq_remove(tcp_sk(child)); 2808 __reqsk_free(req); 2809 child->sk_user_data = NULL; 2810#endif 2811} 2812 2813 2814/* 2815 * Performs the actual work to abort a SYN_RECV connection. 2816 */ 2817static void 2818do_abort_syn_rcv(struct socket *child, struct socket *parent) 2819{ 2820 struct tcpcb *parenttp = sototcpcb(parent); 2821 struct tcpcb *childtp = sototcpcb(child); 2822 2823 /* 2824 * If the server is still open we clean up the child connection, 2825 * otherwise the server already did the clean up as it was purging 2826 * its SYN queue and the skb was just sitting in its backlog. 2827 */ 2828 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { 2829 cleanup_syn_rcv_conn(child, parent); 2830 INP_INFO_WLOCK(&tcbinfo); 2831 inp_wlock(childtp->t_inpcb); 2832 t3_release_offload_resources(childtp->t_toe); 2833 childtp = tcp_close(childtp); 2834 INP_INFO_WUNLOCK(&tcbinfo); 2835 if (childtp) 2836 inp_wunlock(childtp->t_inpcb); 2837 } 2838} 2839#endif 2840 2841/* 2842 * Handle abort requests for a SYN_RECV connection. These need extra work 2843 * because the socket is on its parent's SYN queue. 2844 */ 2845static int 2846abort_syn_rcv(struct socket *so, struct mbuf *m) 2847{ 2848 CXGB_UNIMPLEMENTED(); 2849#ifdef notyet 2850 struct socket *parent; 2851 struct toedev *tdev = TOE_DEV(so); 2852 struct t3cdev *cdev = TOM_DATA(tdev)->cdev; 2853 struct socket *oreq = so->so_incomp; 2854 struct t3c_tid_entry *t3c_stid; 2855 struct tid_info *t; 2856 2857 if (!oreq) 2858 return -1; /* somehow we are not on the SYN queue */ 2859 2860 t = &(T3C_DATA(cdev))->tid_maps; 2861 t3c_stid = lookup_stid(t, oreq->ts_recent); 2862 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 2863 2864 SOCK_LOCK(parent); 2865 do_abort_syn_rcv(so, parent); 2866 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST); 2867 SOCK_UNLOCK(parent); 2868#endif 2869 return (0); 2870} 2871 2872/* 2873 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this 2874 * request except that we need to reply to it. 2875 */ 2876static void 2877process_abort_req(struct socket *so, struct mbuf *m, struct toedev *tdev) 2878{ 2879 int rst_status = CPL_ABORT_NO_RST; 2880 const struct cpl_abort_req_rss *req = cplhdr(m); 2881 struct tcpcb *tp = sototcpcb(so); 2882 struct toepcb *toep = tp->t_toe; 2883 2884 inp_wlock(tp->t_inpcb); 2885 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) { 2886 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN); 2887 m_free(m); 2888 goto skip; 2889 } 2890 2891 toep->tp_flags &= ~TP_ABORT_REQ_RCVD; 2892 /* 2893 * Three cases to consider: 2894 * a) We haven't sent an abort_req; close the connection. 2895 * b) We have sent a post-close abort_req that will get to TP too late 2896 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will 2897 * be ignored and the connection should be closed now. 2898 * c) We have sent a regular abort_req that will get to TP too late. 2899 * That will generate an abort_rpl with status 0, wait for it. 2900 */ 2901 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) || 2902 (is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) { 2903 so->so_error = abort_status_to_errno(so, req->status, 2904 &rst_status); 2905 if (__predict_true((so->so_state & SS_NOFDREF) == 0)) 2906 sorwakeup(so); 2907 /* 2908 * SYN_RECV needs special processing. If abort_syn_rcv() 2909 * returns 0 is has taken care of the abort. 2910 */ 2911 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m)) 2912 goto skip; 2913 2914 t3_release_offload_resources(toep); 2915 tp = tcp_close(tp); 2916 } 2917 if (tp) 2918 inp_wunlock(tp->t_inpcb); 2919 send_abort_rpl(m, tdev, rst_status); 2920 return; 2921 2922skip: 2923 inp_wunlock(tp->t_inpcb); 2924} 2925 2926/* 2927 * Handle an ABORT_REQ_RSS CPL message. 2928 */ 2929static int 2930do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2931{ 2932 const struct cpl_abort_req_rss *req = cplhdr(m); 2933 struct toepcb *toep = (struct toepcb *)ctx; 2934 struct socket *so; 2935 struct inpcb *inp; 2936 2937 if (is_neg_adv_abort(req->status)) { 2938 m_free(m); 2939 return (0); 2940 } 2941 2942 printf("aborting tid=%d\n", toep->tp_tid); 2943 2944 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) { 2945 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2946 toep->tp_flags |= TP_ABORT_REQ_RCVD; 2947 printf("sending abort rpl\n"); 2948 2949 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST); 2950 printf("sent\n"); 2951 if (toep->tp_l2t) 2952 l2t_release(L2DATA(cdev), toep->tp_l2t); 2953 2954 /* 2955 * Unhook 2956 */ 2957 toep->tp_tp->t_toe = NULL; 2958 toep->tp_tp->t_flags &= ~TF_TOE; 2959 toep->tp_tp = NULL; 2960 /* 2961 * XXX need to call syncache_chkrst - but we don't 2962 * have a way of doing that yet 2963 */ 2964 toepcb_release(toep); 2965 printf("abort for unestablished connection :-(\n"); 2966 return (0); 2967 } 2968 if (toep->tp_tp == NULL) { 2969 printf("disconnected toepcb\n"); 2970 /* should be freed momentarily */ 2971 return (0); 2972 } 2973 2974 so = toeptoso(toep); 2975 inp = sotoinpcb(so); 2976 2977 VALIDATE_SOCK(so); 2978 toepcb_hold(toep); 2979 INP_INFO_WLOCK(&tcbinfo); 2980 process_abort_req(so, m, TOE_DEV(so)); 2981 INP_INFO_WUNLOCK(&tcbinfo); 2982 toepcb_release(toep); 2983 return (0); 2984} 2985#ifdef notyet 2986static void 2987pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m) 2988{ 2989 struct toedev *tdev = TOE_DEV(parent); 2990 2991 do_abort_syn_rcv(child, parent); 2992 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) { 2993 struct cpl_pass_accept_rpl *rpl = cplhdr(m); 2994 2995 rpl->opt0h = htonl(F_TCAM_BYPASS); 2996 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 2997 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 2998 } else 2999 m_free(m); 3000} 3001#endif 3002static void 3003handle_pass_open_arp_failure(struct socket *so, struct mbuf *m) 3004{ 3005 CXGB_UNIMPLEMENTED(); 3006 3007#ifdef notyet 3008 struct t3cdev *cdev; 3009 struct socket *parent; 3010 struct socket *oreq; 3011 struct t3c_tid_entry *t3c_stid; 3012 struct tid_info *t; 3013 struct tcpcb *otp, *tp = sototcpcb(so); 3014 struct toepcb *toep = tp->t_toe; 3015 3016 /* 3017 * If the connection is being aborted due to the parent listening 3018 * socket going away there's nothing to do, the ABORT_REQ will close 3019 * the connection. 3020 */ 3021 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 3022 m_free(m); 3023 return; 3024 } 3025 3026 oreq = so->so_incomp; 3027 otp = sototcpcb(oreq); 3028 3029 cdev = T3C_DEV(so); 3030 t = &(T3C_DATA(cdev))->tid_maps; 3031 t3c_stid = lookup_stid(t, otp->ts_recent); 3032 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 3033 3034 SOCK_LOCK(parent); 3035 pass_open_abort(so, parent, m); 3036 SOCK_UNLOCK(parent); 3037#endif 3038} 3039 3040/* 3041 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly 3042 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV 3043 * connection. 3044 */ 3045static void 3046pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m) 3047{ 3048 3049#ifdef notyet 3050 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3051 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk); 3052#endif 3053 handle_pass_open_arp_failure(m_get_socket(m), m); 3054} 3055 3056/* 3057 * Populate a reject CPL_PASS_ACCEPT_RPL WR. 3058 */ 3059static void 3060mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf) 3061{ 3062 struct cpl_pass_accept_req *req = cplhdr(req_mbuf); 3063 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf); 3064 unsigned int tid = GET_TID(req); 3065 3066 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP); 3067 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3068 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3069 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet 3070 rpl->opt0h = htonl(F_TCAM_BYPASS); 3071 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3072 rpl->opt2 = 0; 3073 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3074} 3075 3076/* 3077 * Send a deferred reject to an accept request. 3078 */ 3079static void 3080reject_pass_request(struct toedev *tdev, struct mbuf *m) 3081{ 3082 struct mbuf *reply_mbuf; 3083 3084 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl)); 3085 mk_pass_accept_rpl(reply_mbuf, m); 3086 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 3087 m_free(m); 3088} 3089 3090static void 3091handle_syncache_event(int event, void *arg) 3092{ 3093 struct toepcb *toep = arg; 3094 3095 switch (event) { 3096 case TOE_SC_ENTRY_PRESENT: 3097 /* 3098 * entry already exists - free toepcb 3099 * and l2t 3100 */ 3101 printf("syncache entry present\n"); 3102 toepcb_release(toep); 3103 break; 3104 case TOE_SC_DROP: 3105 /* 3106 * The syncache has given up on this entry 3107 * either it timed out, or it was evicted 3108 * we need to explicitly release the tid 3109 */ 3110 printf("syncache entry dropped\n"); 3111 toepcb_release(toep); 3112 break; 3113 default: 3114 log(LOG_ERR, "unknown syncache event %d\n", event); 3115 break; 3116 } 3117} 3118 3119static void 3120syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep) 3121{ 3122 struct in_conninfo inc; 3123 struct tcpopt to; 3124 struct tcphdr th; 3125 struct inpcb *inp; 3126 int mss, wsf, sack, ts; 3127 uint32_t rcv_isn = ntohl(req->rcv_isn); 3128 3129 bzero(&to, sizeof(struct tcpopt)); 3130 inp = sotoinpcb(lso); 3131 3132 /* 3133 * Fill out information for entering us into the syncache 3134 */ 3135 inc.inc_fport = th.th_sport = req->peer_port; 3136 inc.inc_lport = th.th_dport = req->local_port; 3137 th.th_seq = req->rcv_isn; 3138 th.th_flags = TH_SYN; 3139 3140 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1; 3141 3142 3143 inc.inc_isipv6 = 0; 3144 inc.inc_len = 0; 3145 inc.inc_faddr.s_addr = req->peer_ip; 3146 inc.inc_laddr.s_addr = req->local_ip; 3147 3148 DPRINTF("syncache add of %d:%d %d:%d\n", 3149 ntohl(req->local_ip), ntohs(req->local_port), 3150 ntohl(req->peer_ip), ntohs(req->peer_port)); 3151 3152 mss = req->tcp_options.mss; 3153 wsf = req->tcp_options.wsf; 3154 ts = req->tcp_options.tstamp; 3155 sack = req->tcp_options.sack; 3156 to.to_mss = mss; 3157 to.to_wscale = wsf; 3158 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3159 INP_INFO_WLOCK(&tcbinfo); 3160 inp_wlock(inp); 3161 syncache_offload_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep); 3162} 3163 3164 3165/* 3166 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket 3167 * lock held. Note that the sock here is a listening socket that is not owned 3168 * by the TOE. 3169 */ 3170static void 3171process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev, 3172 struct listen_ctx *lctx) 3173{ 3174 int rt_flags; 3175 struct l2t_entry *e; 3176 struct iff_mac tim; 3177 struct mbuf *reply_mbuf, *ddp_mbuf = NULL; 3178 struct cpl_pass_accept_rpl *rpl; 3179 struct cpl_pass_accept_req *req = cplhdr(m); 3180 unsigned int tid = GET_TID(req); 3181 struct tom_data *d = TOM_DATA(tdev); 3182 struct t3cdev *cdev = d->cdev; 3183 struct tcpcb *tp = sototcpcb(so); 3184 struct toepcb *newtoep; 3185 struct rtentry *dst; 3186 struct sockaddr_in nam; 3187 struct t3c_data *td = T3C_DATA(cdev); 3188 3189 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3190 if (__predict_false(reply_mbuf == NULL)) { 3191 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3192 t3_defer_reply(m, tdev, reject_pass_request); 3193 else { 3194 cxgb_queue_tid_release(cdev, tid); 3195 m_free(m); 3196 } 3197 DPRINTF("failed to get reply_mbuf\n"); 3198 3199 goto out; 3200 } 3201 3202 if (tp->t_state != TCPS_LISTEN) { 3203 DPRINTF("socket not in listen state\n"); 3204 3205 goto reject; 3206 } 3207 3208 tim.mac_addr = req->dst_mac; 3209 tim.vlan_tag = ntohs(req->vlan_tag); 3210 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { 3211 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n"); 3212 goto reject; 3213 } 3214 3215#ifdef notyet 3216 /* 3217 * XXX do route lookup to confirm that we're still listening on this 3218 * address 3219 */ 3220 if (ip_route_input(skb, req->local_ip, req->peer_ip, 3221 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev)) 3222 goto reject; 3223 rt_flags = ((struct rtable *)skb->dst)->rt_flags & 3224 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); 3225 dst_release(skb->dst); // done with the input route, release it 3226 skb->dst = NULL; 3227 3228 if ((rt_flags & RTF_LOCAL) == 0) 3229 goto reject; 3230#endif 3231 /* 3232 * XXX 3233 */ 3234 rt_flags = RTF_LOCAL; 3235 if ((rt_flags & RTF_LOCAL) == 0) 3236 goto reject; 3237 3238 /* 3239 * Calculate values and add to syncache 3240 */ 3241 3242 newtoep = toepcb_alloc(); 3243 if (newtoep == NULL) 3244 goto reject; 3245 3246 bzero(&nam, sizeof(struct sockaddr_in)); 3247 3248 nam.sin_len = sizeof(struct sockaddr_in); 3249 nam.sin_family = AF_INET; 3250 nam.sin_addr.s_addr =req->peer_ip; 3251 dst = rtalloc2((struct sockaddr *)&nam, 1, 0); 3252 3253 if (dst == NULL) { 3254 printf("failed to find route\n"); 3255 goto reject; 3256 } 3257 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev, 3258 (struct sockaddr *)&nam); 3259 if (e == NULL) { 3260 DPRINTF("failed to get l2t\n"); 3261 } 3262 /* 3263 * Point to our listen socket until accept 3264 */ 3265 newtoep->tp_tp = tp; 3266 newtoep->tp_flags = TP_SYN_RCVD; 3267 newtoep->tp_tid = tid; 3268 newtoep->tp_toedev = tdev; 3269 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3270 3271 cxgb_insert_tid(cdev, d->client, newtoep, tid); 3272 SOCK_LOCK(so); 3273 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); 3274 SOCK_UNLOCK(so); 3275 3276 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so->so_options & SO_NO_DDP) && 3277 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 3278 3279 if (newtoep->tp_ulp_mode) { 3280 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3281 3282 if (ddp_mbuf == NULL) 3283 newtoep->tp_ulp_mode = 0; 3284 } 3285 3286 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d", 3287 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode); 3288 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); 3289 /* 3290 * XXX workaround for lack of syncache drop 3291 */ 3292 toepcb_hold(newtoep); 3293 syncache_add_accept_req(req, so, newtoep); 3294 3295 rpl = cplhdr(reply_mbuf); 3296 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl); 3297 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3298 rpl->wr.wr_lo = 0; 3299 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3300 rpl->opt2 = htonl(calc_opt2(so, tdev)); 3301 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3302 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten 3303 3304 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | 3305 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); 3306 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) | 3307 CPL_PASS_OPEN_ACCEPT); 3308 3309 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status); 3310 3311 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep)); 3312 3313 l2t_send(cdev, reply_mbuf, e); 3314 m_free(m); 3315 if (newtoep->tp_ulp_mode) { 3316 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS, 3317 V_TF_DDP_OFF(1) | 3318 TP_DDP_TIMER_WORKAROUND_MASK, 3319 V_TF_DDP_OFF(1) | 3320 TP_DDP_TIMER_WORKAROUND_VAL, 1); 3321 } else 3322 printf("not offloading\n"); 3323 3324 3325 3326 return; 3327reject: 3328 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3329 mk_pass_accept_rpl(reply_mbuf, m); 3330 else 3331 mk_tid_release(reply_mbuf, newtoep, tid); 3332 cxgb_ofld_send(cdev, reply_mbuf); 3333 m_free(m); 3334out: 3335#if 0 3336 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3337#else 3338 return; 3339#endif 3340} 3341 3342/* 3343 * Handle a CPL_PASS_ACCEPT_REQ message. 3344 */ 3345static int 3346do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3347{ 3348 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx; 3349 struct socket *lso = listen_ctx->lso; 3350 struct tom_data *d = listen_ctx->tom_data; 3351 3352#if VALIDATE_TID 3353 struct cpl_pass_accept_req *req = cplhdr(m); 3354 unsigned int tid = GET_TID(req); 3355 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps; 3356 3357 if (unlikely(!lsk)) { 3358 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n", 3359 cdev->name, 3360 (unsigned long)((union listen_entry *)ctx - 3361 t->stid_tab)); 3362 return CPL_RET_BUF_DONE; 3363 } 3364 if (unlikely(tid >= t->ntids)) { 3365 printk(KERN_ERR "%s: passive open TID %u too large\n", 3366 cdev->name, tid); 3367 return CPL_RET_BUF_DONE; 3368 } 3369 /* 3370 * For T3A the current user of the TID may have closed but its last 3371 * message(s) may have been backlogged so the TID appears to be still 3372 * in use. Just take the TID away, the connection can close at its 3373 * own leisure. For T3B this situation is a bug. 3374 */ 3375 if (!valid_new_tid(t, tid) && 3376 cdev->type != T3A) { 3377 printk(KERN_ERR "%s: passive open uses existing TID %u\n", 3378 cdev->name, tid); 3379 return CPL_RET_BUF_DONE; 3380 } 3381#endif 3382 3383 process_pass_accept_req(lso, m, &d->tdev, listen_ctx); 3384 return (0); 3385} 3386 3387/* 3388 * Called when a connection is established to translate the TCP options 3389 * reported by HW to FreeBSD's native format. 3390 */ 3391static void 3392assign_rxopt(struct socket *so, unsigned int opt) 3393{ 3394 const struct t3c_data *td = T3C_DATA(T3C_DEV(so)); 3395 struct tcpcb *tp = sototcpcb(so); 3396 struct toepcb *toep = tp->t_toe; 3397 3398 inp_wlock_assert(tp->t_inpcb); 3399 3400 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3401 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0; 3402 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0; 3403 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0; 3404 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 3405 (TF_RCVD_SCALE|TF_REQ_SCALE)) 3406 tp->rcv_scale = tp->request_r_scale; 3407} 3408 3409/* 3410 * Completes some final bits of initialization for just established connections 3411 * and changes their state to TCP_ESTABLISHED. 3412 * 3413 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1. 3414 */ 3415static void 3416make_established(struct socket *so, u32 snd_isn, unsigned int opt) 3417{ 3418 struct tcpcb *tp = sototcpcb(so); 3419 struct toepcb *toep = tp->t_toe; 3420 3421 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn; 3422 assign_rxopt(so, opt); 3423 so->so_proto->pr_ctloutput = t3_ctloutput; 3424 3425#if 0 3426 inet_sk(sk)->id = tp->write_seq ^ jiffies; 3427#endif 3428 /* 3429 * XXX not clear what rcv_wup maps to 3430 */ 3431 /* 3432 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't 3433 * pass through opt0. 3434 */ 3435 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10)) 3436 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10); 3437 3438 dump_toepcb(toep); 3439 3440#ifdef notyet 3441/* 3442 * no clean interface for marking ARP up to date 3443 */ 3444 dst_confirm(sk->sk_dst_cache); 3445#endif 3446 tp->t_starttime = ticks; 3447 tp->t_state = TCPS_ESTABLISHED; 3448 soisconnected(so); 3449} 3450 3451static int 3452syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep) 3453{ 3454 3455 struct in_conninfo inc; 3456 struct tcpopt to; 3457 struct tcphdr th; 3458 int mss, wsf, sack, ts; 3459 struct mbuf *m = NULL; 3460 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev); 3461 unsigned int opt; 3462 3463#ifdef MAC 3464#error "no MAC support" 3465#endif 3466 3467 opt = ntohs(req->tcp_opt); 3468 3469 bzero(&to, sizeof(struct tcpopt)); 3470 3471 /* 3472 * Fill out information for entering us into the syncache 3473 */ 3474 inc.inc_fport = th.th_sport = req->peer_port; 3475 inc.inc_lport = th.th_dport = req->local_port; 3476 th.th_seq = req->rcv_isn; 3477 th.th_flags = TH_ACK; 3478 3479 inc.inc_isipv6 = 0; 3480 inc.inc_len = 0; 3481 inc.inc_faddr.s_addr = req->peer_ip; 3482 inc.inc_laddr.s_addr = req->local_ip; 3483 3484 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3485 wsf = G_TCPOPT_WSCALE_OK(opt); 3486 ts = G_TCPOPT_TSTAMP(opt); 3487 sack = G_TCPOPT_SACK(opt); 3488 3489 to.to_mss = mss; 3490 to.to_wscale = G_TCPOPT_SND_WSCALE(opt); 3491 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3492 3493 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n", 3494 ntohl(req->local_ip), ntohs(req->local_port), 3495 ntohl(req->peer_ip), ntohs(req->peer_port), 3496 mss, wsf, ts, sack); 3497 return syncache_expand(&inc, &to, &th, so, m); 3498} 3499 3500 3501/* 3502 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work 3503 * if we are in TCP_SYN_RECV due to crossed SYNs 3504 */ 3505static int 3506do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3507{ 3508 struct cpl_pass_establish *req = cplhdr(m); 3509 struct toepcb *toep = (struct toepcb *)ctx; 3510 struct tcpcb *tp; 3511 struct socket *so, *lso; 3512 struct t3c_data *td = T3C_DATA(cdev); 3513 // Complete socket initialization now that we have the SND_ISN 3514 3515 struct toedev *tdev; 3516 3517 so = lso = toeptoso(toep); 3518 tdev = toep->tp_toedev; 3519 3520 SOCK_LOCK(so); 3521 LIST_REMOVE(toep, synq_entry); 3522 SOCK_UNLOCK(so); 3523 3524 INP_INFO_WLOCK(&tcbinfo); 3525 if (!syncache_expand_establish_req(req, &so, toep)) { 3526 /* 3527 * No entry 3528 */ 3529 CXGB_UNIMPLEMENTED(); 3530 } 3531 if (so == NULL) { 3532 /* 3533 * Couldn't create the socket 3534 */ 3535 CXGB_UNIMPLEMENTED(); 3536 } 3537 3538 /* 3539 * XXX workaround for lack of syncache drop 3540 */ 3541 toepcb_release(toep); 3542 3543 tp = sototcpcb(so); 3544 inp_wlock(tp->t_inpcb); 3545 3546 so->so_snd.sb_flags |= SB_NOCOALESCE; 3547 so->so_rcv.sb_flags |= SB_NOCOALESCE; 3548 3549 toep->tp_tp = tp; 3550 toep->tp_flags = 0; 3551 tp->t_toe = toep; 3552 reset_wr_list(toep); 3553 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3554 tp->rcv_nxt = toep->tp_copied_seq; 3555 install_offload_ops(so); 3556 3557 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs); 3558 toep->tp_wr_unacked = 0; 3559 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3560 toep->tp_qset_idx = 0; 3561 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu); 3562 3563 /* 3564 * XXX Cancel any keep alive timer 3565 */ 3566 3567 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3568 INP_INFO_WUNLOCK(&tcbinfo); 3569 inp_wunlock(tp->t_inpcb); 3570 3571 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid); 3572 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3573#ifdef notyet 3574 /* 3575 * XXX not sure how these checks map to us 3576 */ 3577 if (unlikely(sk->sk_socket)) { // simultaneous opens only 3578 sk->sk_state_change(sk); 3579 sk_wake_async(so, 0, POLL_OUT); 3580 } 3581 /* 3582 * The state for the new connection is now up to date. 3583 * Next check if we should add the connection to the parent's 3584 * accept queue. When the parent closes it resets connections 3585 * on its SYN queue, so check if we are being reset. If so we 3586 * don't need to do anything more, the coming ABORT_RPL will 3587 * destroy this socket. Otherwise move the connection to the 3588 * accept queue. 3589 * 3590 * Note that we reset the synq before closing the server so if 3591 * we are not being reset the stid is still open. 3592 */ 3593 if (unlikely(!tp->forward_skb_hint)) { // removed from synq 3594 __kfree_skb(skb); 3595 goto unlock; 3596 } 3597#endif 3598 m_free(m); 3599 3600 return (0); 3601} 3602 3603/* 3604 * Fill in the right TID for CPL messages waiting in the out-of-order queue 3605 * and send them to the TOE. 3606 */ 3607static void 3608fixup_and_send_ofo(struct socket *so) 3609{ 3610 struct mbuf *m; 3611 struct toedev *tdev = TOE_DEV(so); 3612 struct tcpcb *tp = sototcpcb(so); 3613 struct toepcb *toep = tp->t_toe; 3614 unsigned int tid = toep->tp_tid; 3615 3616 printf("fixup_and_send_ofo\n"); 3617 3618 inp_wlock_assert(tp->t_inpcb); 3619 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { 3620 /* 3621 * A variety of messages can be waiting but the fields we'll 3622 * be touching are common to all so any message type will do. 3623 */ 3624 struct cpl_close_con_req *p = cplhdr(m); 3625 3626 p->wr.wr_lo = htonl(V_WR_TID(tid)); 3627 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid)); 3628 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3629 } 3630} 3631 3632/* 3633 * Updates socket state from an active establish CPL message. Runs with the 3634 * socket lock held. 3635 */ 3636static void 3637socket_act_establish(struct socket *so, struct mbuf *m) 3638{ 3639 struct cpl_act_establish *req = cplhdr(m); 3640 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ 3641 struct tcpcb *tp = sototcpcb(so); 3642 struct toepcb *toep = tp->t_toe; 3643 3644 if (__predict_false(tp->t_state != TCPS_SYN_SENT)) 3645 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n", 3646 toep->tp_tid, tp->t_state); 3647 3648 tp->ts_recent_age = ticks; 3649 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn; 3650 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs; 3651 3652 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3653 3654 /* 3655 * Now that we finally have a TID send any CPL messages that we had to 3656 * defer for lack of a TID. 3657 */ 3658 if (mbufq_len(&toep->out_of_order_queue)) 3659 fixup_and_send_ofo(so); 3660 3661 if (__predict_false(so->so_state & SS_NOFDREF)) { 3662 /* 3663 * XXX does this even make sense? 3664 */ 3665 sorwakeup(so); 3666 } 3667 m_free(m); 3668#ifdef notyet 3669/* 3670 * XXX assume no write requests permitted while socket connection is 3671 * incomplete 3672 */ 3673 /* 3674 * Currently the send queue must be empty at this point because the 3675 * socket layer does not send anything before a connection is 3676 * established. To be future proof though we handle the possibility 3677 * that there are pending buffers to send (either TX_DATA or 3678 * CLOSE_CON_REQ). First we need to adjust the sequence number of the 3679 * buffers according to the just learned write_seq, and then we send 3680 * them on their way. 3681 */ 3682 fixup_pending_writeq_buffers(sk); 3683 if (t3_push_frames(so, 1)) 3684 sk->sk_write_space(sk); 3685#endif 3686 3687 toep->tp_state = tp->t_state; 3688 tcpstat.tcps_connects++; 3689 3690} 3691 3692/* 3693 * Process a CPL_ACT_ESTABLISH message. 3694 */ 3695static int 3696do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3697{ 3698 struct cpl_act_establish *req = cplhdr(m); 3699 unsigned int tid = GET_TID(req); 3700 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); 3701 struct toepcb *toep = (struct toepcb *)ctx; 3702 struct tcpcb *tp = toep->tp_tp; 3703 struct socket *so; 3704 struct toedev *tdev; 3705 struct tom_data *d; 3706 3707 if (tp == NULL) { 3708 free_atid(cdev, atid); 3709 return (0); 3710 } 3711 3712 so = toeptoso(toep); 3713 tdev = TOE_DEV(so); /* blow up here if link was down */ 3714 d = TOM_DATA(tdev); 3715 3716 inp_wlock(tp->t_inpcb); 3717 3718 /* 3719 * It's OK if the TID is currently in use, the owning socket may have 3720 * backlogged its last CPL message(s). Just take it away. 3721 */ 3722 toep->tp_tid = tid; 3723 toep->tp_tp = tp; 3724 so_insert_tid(d, so, tid); 3725 free_atid(cdev, atid); 3726 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3727 3728 socket_act_establish(so, m); 3729 inp_wunlock(tp->t_inpcb); 3730 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid); 3731 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3732 3733 return (0); 3734} 3735 3736/* 3737 * Process an acknowledgment of WR completion. Advance snd_una and send the 3738 * next batch of work requests from the write queue. 3739 */ 3740static void 3741wr_ack(struct toepcb *toep, struct mbuf *m) 3742{ 3743 struct tcpcb *tp = toep->tp_tp; 3744 struct cpl_wr_ack *hdr = cplhdr(m); 3745 struct socket *so = toeptoso(toep); 3746 unsigned int credits = ntohs(hdr->credits); 3747 u32 snd_una = ntohl(hdr->snd_una); 3748 int bytes = 0; 3749 3750 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits); 3751 3752 inp_wlock(tp->t_inpcb); 3753 3754 toep->tp_wr_avail += credits; 3755 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) 3756 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; 3757 3758 while (credits) { 3759 struct mbuf *p = peek_wr(toep); 3760 3761 if (__predict_false(!p)) { 3762 log(LOG_ERR, "%u WR_ACK credits for TID %u with " 3763 "nothing pending, state %u wr_avail=%u\n", 3764 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail); 3765 break; 3766 } 3767 CTR2(KTR_TOM, 3768 "wr_ack: p->credits=%d p->bytes=%d", p->m_pkthdr.csum_data, p->m_pkthdr.len); 3769 3770 KASSERT(p->m_pkthdr.csum_data != 0, ("empty request still on list")); 3771 if (__predict_false(credits < p->m_pkthdr.csum_data)) { 3772 3773#if DEBUG_WR > 1 3774 struct tx_data_wr *w = cplhdr(p); 3775 log(LOG_ERR, 3776 "TID %u got %u WR credits, need %u, len %u, " 3777 "main body %u, frags %u, seq # %u, ACK una %u," 3778 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n", 3779 toep->tp_tid, credits, p->csum, p->len, 3780 p->len - p->data_len, skb_shinfo(p)->nr_frags, 3781 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt), 3782 toep->tp_wr_avail, count_pending_wrs(tp) - credits); 3783#endif 3784 p->m_pkthdr.csum_data -= credits; 3785 break; 3786 } else { 3787 dequeue_wr(toep); 3788 credits -= p->m_pkthdr.csum_data; 3789 bytes += p->m_pkthdr.len; 3790 CTR3(KTR_TOM, 3791 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d", 3792 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data); 3793 3794 m_free(p); 3795 } 3796 } 3797 3798#if DEBUG_WR 3799 check_wr_invariants(tp); 3800#endif 3801 3802 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 3803#if VALIDATE_SEQ 3804 struct tom_data *d = TOM_DATA(TOE_DEV(so)); 3805 3806 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK " 3807 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una, 3808 toep->tp_tid, tp->snd_una); 3809#endif 3810 goto out_free; 3811 } 3812 3813 if (tp->snd_una != snd_una) { 3814 tp->snd_una = snd_una; 3815 tp->ts_recent_age = ticks; 3816#ifdef notyet 3817 /* 3818 * Keep ARP entry "minty fresh" 3819 */ 3820 dst_confirm(sk->sk_dst_cache); 3821#endif 3822 if (tp->snd_una == tp->snd_nxt) 3823 toep->tp_flags &= ~TP_TX_WAIT_IDLE; 3824 } 3825 if (bytes) { 3826 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes); 3827 SOCKBUF_LOCK(&so->so_snd); 3828 sbdrop_locked(&so->so_snd, bytes); 3829 sowwakeup_locked(so); 3830 } 3831 3832 if (so->so_snd.sb_sndptroff < so->so_snd.sb_cc) 3833 t3_push_frames(so, 0); 3834 3835out_free: 3836 inp_wunlock(tp->t_inpcb); 3837 m_free(m); 3838} 3839 3840/* 3841 * Handler for TX_DATA_ACK CPL messages. 3842 */ 3843static int 3844do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx) 3845{ 3846 struct toepcb *toep = (struct toepcb *)ctx; 3847 3848 VALIDATE_SOCK(so); 3849 3850 wr_ack(toep, m); 3851 return 0; 3852} 3853 3854/* 3855 * Handler for TRACE_PKT CPL messages. Just sink these packets. 3856 */ 3857static int 3858do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx) 3859{ 3860 m_freem(m); 3861 return 0; 3862} 3863 3864/* 3865 * Reset a connection that is on a listener's SYN queue or accept queue, 3866 * i.e., one that has not had a struct socket associated with it. 3867 * Must be called from process context. 3868 * 3869 * Modeled after code in inet_csk_listen_stop(). 3870 */ 3871static void 3872t3_reset_listen_child(struct socket *child) 3873{ 3874 struct tcpcb *tp = sototcpcb(child); 3875 3876 t3_send_reset(tp->t_toe); 3877} 3878 3879/* 3880 * Disconnect offloaded established but not yet accepted connections sitting 3881 * on a server's accept_queue. We just send an ABORT_REQ at this point and 3882 * finish off the disconnect later as we may need to wait for the ABORT_RPL. 3883 */ 3884void 3885t3_disconnect_acceptq(struct socket *listen_so) 3886{ 3887 struct socket *so; 3888 struct tcpcb *tp; 3889 3890 TAILQ_FOREACH(so, &listen_so->so_comp, so_list) { 3891 tp = sototcpcb(so); 3892 3893 if (tp->t_flags & TF_TOE) { 3894 inp_wlock(tp->t_inpcb); 3895 t3_reset_listen_child(so); 3896 inp_wunlock(tp->t_inpcb); 3897 } 3898 } 3899} 3900 3901/* 3902 * Reset offloaded connections sitting on a server's syn queue. As above 3903 * we send ABORT_REQ and finish off when we get ABORT_RPL. 3904 */ 3905 3906void 3907t3_reset_synq(struct listen_ctx *lctx) 3908{ 3909 struct toepcb *toep; 3910 3911 SOCK_LOCK(lctx->lso); 3912 while (!LIST_EMPTY(&lctx->synq_head)) { 3913 toep = LIST_FIRST(&lctx->synq_head); 3914 LIST_REMOVE(toep, synq_entry); 3915 toep->tp_tp = NULL; 3916 t3_send_reset(toep); 3917 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid); 3918 toepcb_release(toep); 3919 } 3920 SOCK_UNLOCK(lctx->lso); 3921} 3922 3923 3924int 3925t3_setup_ppods(struct socket *so, const struct ddp_gather_list *gl, 3926 unsigned int nppods, unsigned int tag, unsigned int maxoff, 3927 unsigned int pg_off, unsigned int color) 3928{ 3929 unsigned int i, j, pidx; 3930 struct pagepod *p; 3931 struct mbuf *m; 3932 struct ulp_mem_io *req; 3933 struct tcpcb *tp = sototcpcb(so); 3934 struct toepcb *toep = tp->t_toe; 3935 unsigned int tid = toep->tp_tid; 3936 const struct tom_data *td = TOM_DATA(TOE_DEV(so)); 3937 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; 3938 3939 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)", 3940 gl, nppods, tag, maxoff, pg_off, color); 3941 3942 for (i = 0; i < nppods; ++i) { 3943 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE); 3944 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 3945 req = mtod(m, struct ulp_mem_io *); 3946 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE; 3947 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 3948 req->wr.wr_lo = 0; 3949 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) | 3950 V_ULPTX_CMD(ULP_MEM_WRITE)); 3951 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) | 3952 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1)); 3953 3954 p = (struct pagepod *)(req + 1); 3955 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) { 3956 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid)); 3957 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) | 3958 V_PPOD_COLOR(color)); 3959 p->pp_max_offset = htonl(maxoff); 3960 p->pp_page_offset = htonl(pg_off); 3961 p->pp_rsvd = 0; 3962 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx) 3963 p->pp_addr[j] = pidx < gl->dgl_nelem ? 3964 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0; 3965 } else 3966 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */ 3967 send_or_defer(toep, m, 0); 3968 ppod_addr += PPOD_SIZE; 3969 } 3970 return (0); 3971} 3972 3973/* 3974 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command. 3975 */ 3976static inline void 3977mk_cpl_barrier_ulp(struct cpl_barrier *b) 3978{ 3979 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b; 3980 3981 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 3982 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8)); 3983 b->opcode = CPL_BARRIER; 3984} 3985 3986/* 3987 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command. 3988 */ 3989static inline void 3990mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno) 3991{ 3992 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 3993 3994 txpkt = (struct ulp_txpkt *)req; 3995 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 3996 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 3997 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid)); 3998 req->cpuno = htons(cpuno); 3999} 4000 4001/* 4002 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command. 4003 */ 4004static inline void 4005mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, 4006 unsigned int word, uint64_t mask, uint64_t val) 4007{ 4008 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4009 4010 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 4011 tid, word, mask, val); 4012 4013 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4014 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4015 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); 4016 req->reply = V_NO_REPLY(1); 4017 req->cpu_idx = 0; 4018 req->word = htons(word); 4019 req->mask = htobe64(mask); 4020 req->val = htobe64(val); 4021} 4022 4023/* 4024 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command. 4025 */ 4026static void 4027mk_rx_data_ack_ulp(struct socket *so,struct cpl_rx_data_ack *ack, 4028 unsigned int tid, unsigned int credits) 4029{ 4030 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack; 4031 4032 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4033 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8)); 4034 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); 4035 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 4036 V_RX_DACK_MODE(TOM_TUNABLE(TOE_DEV(so), delack)) | 4037 V_RX_CREDITS(credits)); 4038} 4039 4040void 4041t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx) 4042{ 4043 unsigned int wrlen; 4044 struct mbuf *m; 4045 struct work_request_hdr *wr; 4046 struct cpl_barrier *lock; 4047 struct cpl_set_tcb_field *req; 4048 struct cpl_get_tcb *getreq; 4049 struct ddp_state *p = &toep->tp_ddp_state; 4050 4051 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4052 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) + 4053 sizeof(*getreq); 4054 m = m_gethdr_nofail(wrlen); 4055 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4056 wr = mtod(m, struct work_request_hdr *); 4057 bzero(wr, wrlen); 4058 4059 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4060 m->m_pkthdr.len = m->m_len = wrlen; 4061 4062 lock = (struct cpl_barrier *)(wr + 1); 4063 mk_cpl_barrier_ulp(lock); 4064 4065 req = (struct cpl_set_tcb_field *)(lock + 1); 4066 4067 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx); 4068 4069 /* Hmmm, not sure if this actually a good thing: reactivating 4070 * the other buffer might be an issue if it has been completed 4071 * already. However, that is unlikely, since the fact that the UBUF 4072 * is not completed indicates that there is no oustanding data. 4073 */ 4074 if (bufidx == 0) 4075 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4076 V_TF_DDP_ACTIVE_BUF(1) | 4077 V_TF_DDP_BUF0_VALID(1), 4078 V_TF_DDP_ACTIVE_BUF(1)); 4079 else 4080 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4081 V_TF_DDP_ACTIVE_BUF(1) | 4082 V_TF_DDP_BUF1_VALID(1), 0); 4083 4084 getreq = (struct cpl_get_tcb *)(req + 1); 4085 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4086 4087 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1)); 4088 4089 /* Keep track of the number of oustanding CPL_GET_TCB requests 4090 */ 4091 p->get_tcb_count++; 4092 4093#ifdef T3_TRACE 4094 T3_TRACE1(TIDTB(so), 4095 "t3_cancel_ddpbuf: bufidx %u", bufidx); 4096#endif 4097 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4098} 4099 4100/** 4101 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one 4102 * @sk: the socket associated with the buffers 4103 * @bufidx: index of HW DDP buffer (0 or 1) 4104 * @tag0: new tag for HW buffer 0 4105 * @tag1: new tag for HW buffer 1 4106 * @len: new length for HW buf @bufidx 4107 * 4108 * Sends a compound WR to overlay a new DDP buffer on top of an existing 4109 * buffer by changing the buffer tag and length and setting the valid and 4110 * active flag accordingly. The caller must ensure the new buffer is at 4111 * least as big as the existing one. Since we typically reprogram both HW 4112 * buffers this function sets both tags for convenience. Read the TCB to 4113 * determine how made data was written into the buffer before the overlay 4114 * took place. 4115 */ 4116void 4117t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0, 4118 unsigned int tag1, unsigned int len) 4119{ 4120 unsigned int wrlen; 4121 struct mbuf *m; 4122 struct work_request_hdr *wr; 4123 struct cpl_get_tcb *getreq; 4124 struct cpl_set_tcb_field *req; 4125 struct ddp_state *p = &toep->tp_ddp_state; 4126 4127 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)", 4128 bufidx, tag0, tag1, len); 4129 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4130 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq); 4131 m = m_gethdr_nofail(wrlen); 4132 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4133 wr = mtod(m, struct work_request_hdr *); 4134 m->m_pkthdr.len = m->m_len = wrlen; 4135 bzero(wr, wrlen); 4136 4137 4138 /* Set the ATOMIC flag to make sure that TP processes the following 4139 * CPLs in an atomic manner and no wire segments can be interleaved. 4140 */ 4141 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); 4142 req = (struct cpl_set_tcb_field *)(wr + 1); 4143 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG, 4144 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) | 4145 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32, 4146 V_TCB_RX_DDP_BUF0_TAG(tag0) | 4147 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32); 4148 req++; 4149 if (bufidx == 0) { 4150 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN, 4151 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4152 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 4153 req++; 4154 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4155 V_TF_DDP_PUSH_DISABLE_0(1) | 4156 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4157 V_TF_DDP_PUSH_DISABLE_0(0) | 4158 V_TF_DDP_BUF0_VALID(1)); 4159 } else { 4160 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN, 4161 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN), 4162 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len)); 4163 req++; 4164 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4165 V_TF_DDP_PUSH_DISABLE_1(1) | 4166 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4167 V_TF_DDP_PUSH_DISABLE_1(0) | 4168 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1)); 4169 } 4170 4171 getreq = (struct cpl_get_tcb *)(req + 1); 4172 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4173 4174 /* Keep track of the number of oustanding CPL_GET_TCB requests 4175 */ 4176 p->get_tcb_count++; 4177 4178#ifdef T3_TRACE 4179 T3_TRACE4(TIDTB(sk), 4180 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u " 4181 "len %d", 4182 bufidx, tag0, tag1, len); 4183#endif 4184 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4185} 4186 4187/* 4188 * Sends a compound WR containing all the CPL messages needed to program the 4189 * two HW DDP buffers, namely optionally setting up the length and offset of 4190 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK. 4191 */ 4192void 4193t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0, 4194 unsigned int len1, unsigned int offset1, 4195 uint64_t ddp_flags, uint64_t flag_mask, int modulate) 4196{ 4197 unsigned int wrlen; 4198 struct mbuf *m; 4199 struct work_request_hdr *wr; 4200 struct cpl_set_tcb_field *req; 4201 4202 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ", 4203 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff); 4204 4205 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4206 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + 4207 (len1 ? sizeof(*req) : 0) + 4208 (modulate ? sizeof(struct cpl_rx_data_ack) : 0); 4209 m = m_gethdr_nofail(wrlen); 4210 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4211 wr = mtod(m, struct work_request_hdr *); 4212 bzero(wr, wrlen); 4213 4214 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4215 m->m_pkthdr.len = m->m_len = wrlen; 4216 4217 req = (struct cpl_set_tcb_field *)(wr + 1); 4218 if (len0) { /* program buffer 0 offset and length */ 4219 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET, 4220 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 4221 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4222 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) | 4223 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0)); 4224 req++; 4225 } 4226 if (len1) { /* program buffer 1 offset and length */ 4227 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET, 4228 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 4229 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32, 4230 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) | 4231 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32); 4232 req++; 4233 } 4234 4235 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask, 4236 ddp_flags); 4237 4238 if (modulate) { 4239 mk_rx_data_ack_ulp(toeptoso(toep), 4240 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid, 4241 toep->tp_copied_seq - toep->tp_rcv_wup); 4242 toep->tp_rcv_wup = toep->tp_copied_seq; 4243 } 4244 4245#ifdef T3_TRACE 4246 T3_TRACE5(TIDTB(sk), 4247 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x " 4248 "modulate %d", 4249 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff, 4250 modulate); 4251#endif 4252 4253 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4254} 4255 4256void 4257t3_init_wr_tab(unsigned int wr_len) 4258{ 4259 int i; 4260 4261 if (mbuf_wrs[1]) /* already initialized */ 4262 return; 4263 4264 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) { 4265 int sgl_len = (3 * i) / 2 + (i & 1); 4266 4267 sgl_len += 3; 4268 mbuf_wrs[i] = sgl_len <= wr_len ? 4269 1 : 1 + (sgl_len - 2) / (wr_len - 1); 4270 } 4271 4272 wrlen = wr_len * 8; 4273} 4274 4275int 4276t3_init_cpl_io(void) 4277{ 4278#ifdef notyet 4279 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL); 4280 if (!tcphdr_skb) { 4281 log(LOG_ERR, 4282 "Chelsio TCP offload: can't allocate sk_buff\n"); 4283 return -1; 4284 } 4285 skb_put(tcphdr_skb, sizeof(struct tcphdr)); 4286 tcphdr_skb->h.raw = tcphdr_skb->data; 4287 memset(tcphdr_skb->data, 0, tcphdr_skb->len); 4288#endif 4289 4290 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); 4291 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); 4292 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack); 4293 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data); 4294 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 4295 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 4296 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish); 4297 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 4298 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 4299 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 4300 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp); 4301 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); 4302 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify); 4303 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt); 4304 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); 4305 return (0); 4306} 4307 4308