111798Smichaelm/*- 211798Smichaelm * Copyright (c) 2012, 2015 Chelsio Communications, Inc. 311798Smichaelm * All rights reserved. 411798Smichaelm * Written by: Navdeep Parhar <np@FreeBSD.org> 511798Smichaelm * 611798Smichaelm * Redistribution and use in source and binary forms, with or without 711798Smichaelm * modification, are permitted provided that the following conditions 811798Smichaelm * are met: 911798Smichaelm * 1. Redistributions of source code must retain the above copyright 1011798Smichaelm * notice, this list of conditions and the following disclaimer. 1111798Smichaelm * 2. Redistributions in binary form must reproduce the above copyright 1211798Smichaelm * notice, this list of conditions and the following disclaimer in the 1311798Smichaelm * documentation and/or other materials provided with the distribution. 1411798Smichaelm * 1511798Smichaelm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1611798Smichaelm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1711798Smichaelm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1811798Smichaelm * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1911798Smichaelm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2011798Smichaelm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2111798Smichaelm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2211798Smichaelm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2311798Smichaelm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2411797SN/A * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2511797SN/A * SUCH DAMAGE. 2611797SN/A */ 2711797SN/A 2811797SN/A#include <sys/cdefs.h> 2911797SN/A__FBSDID("$FreeBSD: stable/10/sys/dev/cxgbe/tom/t4_cpl_io.c 342583 2018-12-29 00:30:17Z jhb $"); 3011797SN/A 3111797SN/A#include "opt_inet.h" 3211797SN/A 3311797SN/A#ifdef TCP_OFFLOAD 3411797SN/A#include <sys/param.h> 3511797SN/A#include <sys/types.h> 3611797SN/A#include <sys/kernel.h> 3711797SN/A#include <sys/ktr.h> 3811797SN/A#include <sys/module.h> 3912522Skshefov#include <sys/protosw.h> 4011797SN/A#include <sys/domain.h> 4111797SN/A#include <sys/socket.h> 4211797SN/A#include <sys/socketvar.h> 4311797SN/A#include <sys/sglist.h> 4411797SN/A#include <netinet/in.h> 4511797SN/A#include <netinet/in_pcb.h> 4611797SN/A#include <netinet/ip.h> 4711797SN/A#include <netinet/ip6.h> 4811797SN/A#include <netinet/tcp_var.h> 4911797SN/A#define TCPSTATES 5011797SN/A#include <netinet/tcp_fsm.h> 5111797SN/A#include <netinet/tcp_seq.h> 5211797SN/A#include <netinet/toecore.h> 5311797SN/A 5411797SN/A#include "common/common.h" 5511797SN/A#include "common/t4_msg.h" 5611797SN/A#include "common/t4_regs.h" 5711797SN/A#include "common/t4_tcb.h" 5811797SN/A#include "tom/t4_tom_l2t.h" 5911797SN/A#include "tom/t4_tom.h" 6011797SN/A 6111797SN/AVNET_DECLARE(int, tcp_do_autosndbuf); 6211797SN/A#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) 6311797SN/AVNET_DECLARE(int, tcp_autosndbuf_inc); 6411797SN/A#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) 6511797SN/AVNET_DECLARE(int, tcp_autosndbuf_max); 6611797SN/A#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) 6711797SN/AVNET_DECLARE(int, tcp_do_autorcvbuf); 6811797SN/A#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) 6911797SN/AVNET_DECLARE(int, tcp_autorcvbuf_inc); 7011797SN/A#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) 7111797SN/AVNET_DECLARE(int, tcp_autorcvbuf_max); 7211797SN/A#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) 7311797SN/A 7411797SN/Astatic inline struct mbuf * 7511797SN/Ambufq_dequeue(struct mbufq *q) 7611797SN/A{ 7711797SN/A struct mbuf *m; 7811797SN/A 7911797SN/A m = q->head; 8011797SN/A if (m) { 8111797SN/A if (q->tail == m) 8211797SN/A q->tail = NULL; 8311797SN/A q->head = m->m_nextpkt; 8411797SN/A m->m_nextpkt = NULL; 8511797SN/A } 8611797SN/A return (m); 8711797SN/A} 8811797SN/A 8911797SN/Astatic inline void 9011797SN/Ambufq_enqueue(struct mbufq *q, struct mbuf *m) 9111797SN/A{ 92 93 m->m_nextpkt = NULL; 94 if (q->tail) 95 q->tail->m_nextpkt = m; 96 else 97 q->head = m; 98 q->tail = m; 99} 100 101static inline struct mbuf * 102mbufq_first(const struct mbufq *q) 103{ 104 105 return (q->head); 106} 107 108void 109send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) 110{ 111 struct wrqe *wr; 112 struct fw_flowc_wr *flowc; 113 unsigned int nparams = ftxp ? 8 : 6, flowclen; 114 struct vi_info *vi = toep->vi; 115 struct port_info *pi = vi->pi; 116 struct adapter *sc = pi->adapter; 117 unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; 118 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 119 120 KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), 121 ("%s: flowc for tid %u sent already", __func__, toep->tid)); 122 123 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 124 125 wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); 126 if (wr == NULL) { 127 /* XXX */ 128 panic("%s: allocation failure.", __func__); 129 } 130 flowc = wrtod(wr); 131 memset(flowc, 0, wr->wr_len); 132 133 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 134 V_FW_FLOWC_WR_NPARAMS(nparams)); 135 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 136 V_FW_WR_FLOWID(toep->tid)); 137 138 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 139 flowc->mnemval[0].val = htobe32(pfvf); 140 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 141 flowc->mnemval[1].val = htobe32(pi->tx_chan); 142 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 143 flowc->mnemval[2].val = htobe32(pi->tx_chan); 144 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 145 flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id); 146 if (ftxp) { 147 uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); 148 149 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; 150 flowc->mnemval[4].val = htobe32(ftxp->snd_nxt); 151 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; 152 flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt); 153 flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; 154 flowc->mnemval[6].val = htobe32(sndbuf); 155 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; 156 flowc->mnemval[7].val = htobe32(ftxp->mss); 157 158 CTR6(KTR_CXGBE, 159 "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", 160 __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt, 161 ftxp->rcv_nxt); 162 } else { 163 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; 164 flowc->mnemval[4].val = htobe32(512); 165 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; 166 flowc->mnemval[5].val = htobe32(512); 167 168 CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); 169 } 170 171 txsd->tx_credits = howmany(flowclen, 16); 172 txsd->plen = 0; 173 KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, 174 ("%s: not enough credits (%d)", __func__, toep->tx_credits)); 175 toep->tx_credits -= txsd->tx_credits; 176 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 177 toep->txsd_pidx = 0; 178 toep->txsd_avail--; 179 180 toep->flags |= TPF_FLOWC_WR_SENT; 181 t4_wrq_tx(sc, wr); 182} 183 184void 185send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) 186{ 187 struct wrqe *wr; 188 struct cpl_abort_req *req; 189 int tid = toep->tid; 190 struct inpcb *inp = toep->inp; 191 struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ 192 193 INP_WLOCK_ASSERT(inp); 194 195 CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", 196 __func__, toep->tid, 197 inp->inp_flags & INP_DROPPED ? "inp dropped" : 198 tcpstates[tp->t_state], 199 toep->flags, inp->inp_flags, 200 toep->flags & TPF_ABORT_SHUTDOWN ? 201 " (abort already in progress)" : ""); 202 203 if (toep->flags & TPF_ABORT_SHUTDOWN) 204 return; /* abort already in progress */ 205 206 toep->flags |= TPF_ABORT_SHUTDOWN; 207 208 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 209 ("%s: flowc_wr not sent for tid %d.", __func__, tid)); 210 211 wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 212 if (wr == NULL) { 213 /* XXX */ 214 panic("%s: allocation failure.", __func__); 215 } 216 req = wrtod(wr); 217 218 INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); 219 if (inp->inp_flags & INP_DROPPED) 220 req->rsvd0 = htobe32(snd_nxt); 221 else 222 req->rsvd0 = htobe32(tp->snd_nxt); 223 req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT); 224 req->cmd = CPL_ABORT_SEND_RST; 225 226 /* 227 * XXX: What's the correct way to tell that the inp hasn't been detached 228 * from its socket? Should I even be flushing the snd buffer here? 229 */ 230 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 231 struct socket *so = inp->inp_socket; 232 233 if (so != NULL) /* because I'm not sure. See comment above */ 234 sbflush(&so->so_snd); 235 } 236 237 t4_l2t_send(sc, wr, toep->l2te); 238} 239 240/* 241 * Called when a connection is established to translate the TCP options 242 * reported by HW to FreeBSD's native format. 243 */ 244static void 245assign_rxopt(struct tcpcb *tp, unsigned int opt) 246{ 247 struct toepcb *toep = tp->t_toe; 248 struct inpcb *inp = tp->t_inpcb; 249 struct adapter *sc = td_adapter(toep->td); 250 int n; 251 252 INP_LOCK_ASSERT(inp); 253 254 if (inp->inp_inc.inc_flags & INC_ISIPV6) 255 n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 256 else 257 n = sizeof(struct ip) + sizeof(struct tcphdr); 258 tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; 259 260 CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid, 261 G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]); 262 263 if (G_TCPOPT_TSTAMP(opt)) { 264 tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ 265 tp->ts_recent = 0; /* hmmm */ 266 tp->ts_recent_age = tcp_ts_getticks(); 267 tp->t_maxseg -= TCPOLEN_TSTAMP_APPA; 268 } 269 270 if (G_TCPOPT_SACK(opt)) 271 tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ 272 else 273 tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ 274 275 if (G_TCPOPT_WSCALE_OK(opt)) 276 tp->t_flags |= TF_RCVD_SCALE; 277 278 /* Doing window scaling? */ 279 if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == 280 (TF_RCVD_SCALE | TF_REQ_SCALE)) { 281 tp->rcv_scale = tp->request_r_scale; 282 tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); 283 } 284} 285 286/* 287 * Completes some final bits of initialization for just established connections 288 * and changes their state to TCPS_ESTABLISHED. 289 * 290 * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. 291 */ 292void 293make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, 294 uint16_t opt) 295{ 296 struct inpcb *inp = toep->inp; 297 struct socket *so = inp->inp_socket; 298 struct tcpcb *tp = intotcpcb(inp); 299 long bufsize; 300 uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ 301 uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ 302 uint16_t tcpopt = be16toh(opt); 303 struct flowc_tx_params ftxp; 304 305 INP_WLOCK_ASSERT(inp); 306 KASSERT(tp->t_state == TCPS_SYN_SENT || 307 tp->t_state == TCPS_SYN_RECEIVED, 308 ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); 309 310 CTR4(KTR_CXGBE, "%s: tid %d, toep %p, inp %p", 311 __func__, toep->tid, toep, inp); 312 313 tcp_state_change(tp, TCPS_ESTABLISHED); 314 tp->t_starttime = ticks; 315 TCPSTAT_INC(tcps_connects); 316 317 tp->irs = irs; 318 tcp_rcvseqinit(tp); 319 tp->rcv_wnd = toep->rx_credits << 10; 320 tp->rcv_adv += tp->rcv_wnd; 321 tp->last_ack_sent = tp->rcv_nxt; 322 323 /* 324 * If we were unable to send all rx credits via opt0, save the remainder 325 * in rx_credits so that they can be handed over with the next credit 326 * update. 327 */ 328 SOCKBUF_LOCK(&so->so_rcv); 329 bufsize = select_rcv_wnd(so); 330 SOCKBUF_UNLOCK(&so->so_rcv); 331 toep->rx_credits = bufsize - tp->rcv_wnd; 332 333 tp->iss = iss; 334 tcp_sendseqinit(tp); 335 tp->snd_una = iss + 1; 336 tp->snd_nxt = iss + 1; 337 tp->snd_max = iss + 1; 338 339 assign_rxopt(tp, tcpopt); 340 341 SOCKBUF_LOCK(&so->so_snd); 342 if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) 343 bufsize = V_tcp_autosndbuf_max; 344 else 345 bufsize = sbspace(&so->so_snd); 346 SOCKBUF_UNLOCK(&so->so_snd); 347 348 ftxp.snd_nxt = tp->snd_nxt; 349 ftxp.rcv_nxt = tp->rcv_nxt; 350 ftxp.snd_space = bufsize; 351 ftxp.mss = tp->t_maxseg; 352 send_flowc_wr(toep, &ftxp); 353 354 soisconnected(so); 355} 356 357static int 358send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) 359{ 360 struct wrqe *wr; 361 struct cpl_rx_data_ack *req; 362 uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 363 364 KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); 365 366 wr = alloc_wrqe(sizeof(*req), toep->ctrlq); 367 if (wr == NULL) 368 return (0); 369 req = wrtod(wr); 370 371 INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); 372 req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); 373 374 t4_wrq_tx(sc, wr); 375 return (credits); 376} 377 378void 379t4_rcvd(struct toedev *tod, struct tcpcb *tp) 380{ 381 struct adapter *sc = tod->tod_softc; 382 struct inpcb *inp = tp->t_inpcb; 383 struct socket *so = inp->inp_socket; 384 struct sockbuf *sb = &so->so_rcv; 385 struct toepcb *toep = tp->t_toe; 386 int credits; 387 388 INP_WLOCK_ASSERT(inp); 389 390 SOCKBUF_LOCK(sb); 391 KASSERT(toep->sb_cc >= sb->sb_cc, 392 ("%s: sb %p has more data (%d) than last time (%d).", 393 __func__, sb, sb->sb_cc, toep->sb_cc)); 394 toep->rx_credits += toep->sb_cc - sb->sb_cc; 395 toep->sb_cc = sb->sb_cc; 396 397 if (toep->rx_credits > 0 && 398 (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 || 399 (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) || 400 toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) { 401 402 credits = send_rx_credits(sc, toep, toep->rx_credits); 403 toep->rx_credits -= credits; 404 tp->rcv_wnd += credits; 405 tp->rcv_adv += credits; 406 } 407 SOCKBUF_UNLOCK(sb); 408} 409 410/* 411 * Close a connection by sending a CPL_CLOSE_CON_REQ message. 412 */ 413static int 414close_conn(struct adapter *sc, struct toepcb *toep) 415{ 416 struct wrqe *wr; 417 struct cpl_close_con_req *req; 418 unsigned int tid = toep->tid; 419 420 CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, 421 toep->flags & TPF_FIN_SENT ? ", IGNORED" : ""); 422 423 if (toep->flags & TPF_FIN_SENT) 424 return (0); 425 426 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 427 ("%s: flowc_wr not sent for tid %u.", __func__, tid)); 428 429 wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 430 if (wr == NULL) { 431 /* XXX */ 432 panic("%s: allocation failure.", __func__); 433 } 434 req = wrtod(wr); 435 436 req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | 437 V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); 438 req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | 439 V_FW_WR_FLOWID(tid)); 440 req->wr.wr_lo = cpu_to_be64(0); 441 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 442 req->rsvd = 0; 443 444 toep->flags |= TPF_FIN_SENT; 445 toep->flags &= ~TPF_SEND_FIN; 446 t4_l2t_send(sc, wr, toep->l2te); 447 448 return (0); 449} 450 451#define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) 452#define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) 453 454/* Maximum amount of immediate data we could stuff in a WR */ 455static inline int 456max_imm_payload(int tx_credits) 457{ 458 const int n = 2; /* Use only up to 2 desc for imm. data WR */ 459 460 KASSERT(tx_credits >= 0 && 461 tx_credits <= MAX_OFLD_TX_CREDITS, 462 ("%s: %d credits", __func__, tx_credits)); 463 464 if (tx_credits < MIN_OFLD_TX_CREDITS) 465 return (0); 466 467 if (tx_credits >= (n * EQ_ESIZE) / 16) 468 return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); 469 else 470 return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); 471} 472 473/* Maximum number of SGL entries we could stuff in a WR */ 474static inline int 475max_dsgl_nsegs(int tx_credits) 476{ 477 int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ 478 int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; 479 480 KASSERT(tx_credits >= 0 && 481 tx_credits <= MAX_OFLD_TX_CREDITS, 482 ("%s: %d credits", __func__, tx_credits)); 483 484 if (tx_credits < MIN_OFLD_TX_CREDITS) 485 return (0); 486 487 nseg += 2 * (sge_pair_credits * 16 / 24); 488 if ((sge_pair_credits * 16) % 24 == 16) 489 nseg++; 490 491 return (nseg); 492} 493 494static inline void 495write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, 496 unsigned int plen, uint8_t credits, int shove, int ulp_submode, int txalign) 497{ 498 struct fw_ofld_tx_data_wr *txwr = dst; 499 500 txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | 501 V_FW_WR_IMMDLEN(immdlen)); 502 txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | 503 V_FW_WR_LEN16(credits)); 504 txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(toep->ulp_mode) | 505 V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove)); 506 txwr->plen = htobe32(plen); 507 508 if (txalign > 0) { 509 struct tcpcb *tp = intotcpcb(toep->inp); 510 511 if (plen < 2 * tp->t_maxseg || is_10G_port(toep->vi->pi)) 512 txwr->lsodisable_to_flags |= 513 htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); 514 else 515 txwr->lsodisable_to_flags |= 516 htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD | 517 (tp->t_flags & TF_NODELAY ? 0 : 518 F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE)); 519 } 520} 521 522/* 523 * Generate a DSGL from a starting mbuf. The total number of segments and the 524 * maximum segments in any one mbuf are provided. 525 */ 526static void 527write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) 528{ 529 struct mbuf *m; 530 struct ulptx_sgl *usgl = dst; 531 int i, j, rc; 532 struct sglist sg; 533 struct sglist_seg segs[n]; 534 535 KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); 536 537 sglist_init(&sg, n, segs); 538 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 539 V_ULPTX_NSGE(nsegs)); 540 541 i = -1; 542 for (m = start; m != stop; m = m->m_next) { 543 rc = sglist_append(&sg, mtod(m, void *), m->m_len); 544 if (__predict_false(rc != 0)) 545 panic("%s: sglist_append %d", __func__, rc); 546 547 for (j = 0; j < sg.sg_nseg; i++, j++) { 548 if (i < 0) { 549 usgl->len0 = htobe32(segs[j].ss_len); 550 usgl->addr0 = htobe64(segs[j].ss_paddr); 551 } else { 552 usgl->sge[i / 2].len[i & 1] = 553 htobe32(segs[j].ss_len); 554 usgl->sge[i / 2].addr[i & 1] = 555 htobe64(segs[j].ss_paddr); 556 } 557#ifdef INVARIANTS 558 nsegs--; 559#endif 560 } 561 sglist_reset(&sg); 562 } 563 if (i & 1) 564 usgl->sge[i / 2].len[1] = htobe32(0); 565 KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", 566 __func__, nsegs, start, stop)); 567} 568 569/* 570 * Max number of SGL entries an offload tx work request can have. This is 41 571 * (1 + 40) for a full 512B work request. 572 * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) 573 */ 574#define OFLD_SGL_LEN (41) 575 576/* 577 * Send data and/or a FIN to the peer. 578 * 579 * The socket's so_snd buffer consists of a stream of data starting with sb_mb 580 * and linked together with m_next. sb_sndptr, if set, is the last mbuf that 581 * was transmitted. 582 * 583 * drop indicates the number of bytes that should be dropped from the head of 584 * the send buffer. It is an optimization that lets do_fw4_ack avoid creating 585 * contention on the send buffer lock (before this change it used to do 586 * sowwakeup and then t4_push_frames right after that when recovering from tx 587 * stalls). When drop is set this function MUST drop the bytes and wake up any 588 * writers. 589 */ 590void 591t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) 592{ 593 struct mbuf *sndptr, *m, *sb_sndptr; 594 struct fw_ofld_tx_data_wr *txwr; 595 struct wrqe *wr; 596 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 597 struct inpcb *inp = toep->inp; 598 struct tcpcb *tp = intotcpcb(inp); 599 struct socket *so = inp->inp_socket; 600 struct sockbuf *sb = &so->so_snd; 601 int tx_credits, shove, compl, sowwakeup; 602 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 603 604 INP_WLOCK_ASSERT(inp); 605 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 606 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 607 608 KASSERT(toep->ulp_mode == ULP_MODE_NONE || 609 toep->ulp_mode == ULP_MODE_TCPDDP || 610 toep->ulp_mode == ULP_MODE_RDMA, 611 ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 612 613 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) 614 return; 615 616 /* 617 * This function doesn't resume by itself. Someone else must clear the 618 * flag and call this function. 619 */ 620 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 621 KASSERT(drop == 0, 622 ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 623 return; 624 } 625 626 do { 627 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 628 max_imm = max_imm_payload(tx_credits); 629 max_nsegs = max_dsgl_nsegs(tx_credits); 630 631 SOCKBUF_LOCK(sb); 632 sowwakeup = drop; 633 if (drop) { 634 sbdrop_locked(sb, drop); 635 drop = 0; 636 } 637 sb_sndptr = sb->sb_sndptr; 638 sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; 639 plen = 0; 640 nsegs = 0; 641 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 642 for (m = sndptr; m != NULL; m = m->m_next) { 643 int n = sglist_count(mtod(m, void *), m->m_len); 644 645 nsegs += n; 646 plen += m->m_len; 647 648 /* This mbuf sent us _over_ the nsegs limit, back out */ 649 if (plen > max_imm && nsegs > max_nsegs) { 650 nsegs -= n; 651 plen -= m->m_len; 652 if (plen == 0) { 653 /* Too few credits */ 654 toep->flags |= TPF_TX_SUSPENDED; 655 if (sowwakeup) 656 sowwakeup_locked(so); 657 else 658 SOCKBUF_UNLOCK(sb); 659 SOCKBUF_UNLOCK_ASSERT(sb); 660 return; 661 } 662 break; 663 } 664 665 if (max_nsegs_1mbuf < n) 666 max_nsegs_1mbuf = n; 667 sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ 668 669 /* This mbuf put us right at the max_nsegs limit */ 670 if (plen > max_imm && nsegs == max_nsegs) { 671 m = m->m_next; 672 break; 673 } 674 } 675 676 if (sb->sb_cc > sb->sb_hiwat * 5 / 8 && 677 toep->plen_nocompl + plen >= sb->sb_hiwat / 4) 678 compl = 1; 679 else 680 compl = 0; 681 682 if (sb->sb_flags & SB_AUTOSIZE && 683 V_tcp_do_autosndbuf && 684 sb->sb_hiwat < V_tcp_autosndbuf_max && 685 sb->sb_cc >= sb->sb_hiwat * 7 / 8) { 686 int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, 687 V_tcp_autosndbuf_max); 688 689 if (!sbreserve_locked(sb, newsize, so, NULL)) 690 sb->sb_flags &= ~SB_AUTOSIZE; 691 else 692 sowwakeup = 1; /* room available */ 693 } 694 if (sowwakeup) 695 sowwakeup_locked(so); 696 else 697 SOCKBUF_UNLOCK(sb); 698 SOCKBUF_UNLOCK_ASSERT(sb); 699 700 /* nothing to send */ 701 if (plen == 0) { 702 KASSERT(m == NULL, 703 ("%s: nothing to send, but m != NULL", __func__)); 704 break; 705 } 706 707 if (__predict_false(toep->flags & TPF_FIN_SENT)) 708 panic("%s: excess tx.", __func__); 709 710 shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); 711 if (plen <= max_imm) { 712 713 /* Immediate data tx */ 714 715 wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 716 toep->ofld_txq); 717 if (wr == NULL) { 718 /* XXX: how will we recover from this? */ 719 toep->flags |= TPF_TX_SUSPENDED; 720 return; 721 } 722 txwr = wrtod(wr); 723 credits = howmany(wr->wr_len, 16); 724 write_tx_wr(txwr, toep, plen, plen, credits, shove, 0, 725 sc->tt.tx_align); 726 m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 727 nsegs = 0; 728 } else { 729 int wr_len; 730 731 /* DSGL tx */ 732 733 wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 734 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 735 wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 736 if (wr == NULL) { 737 /* XXX: how will we recover from this? */ 738 toep->flags |= TPF_TX_SUSPENDED; 739 return; 740 } 741 txwr = wrtod(wr); 742 credits = howmany(wr_len, 16); 743 write_tx_wr(txwr, toep, 0, plen, credits, shove, 0, 744 sc->tt.tx_align); 745 write_tx_sgl(txwr + 1, sndptr, m, nsegs, 746 max_nsegs_1mbuf); 747 if (wr_len & 0xf) { 748 uint64_t *pad = (uint64_t *) 749 ((uintptr_t)txwr + wr_len); 750 *pad = 0; 751 } 752 } 753 754 KASSERT(toep->tx_credits >= credits, 755 ("%s: not enough credits", __func__)); 756 757 toep->tx_credits -= credits; 758 toep->tx_nocompl += credits; 759 toep->plen_nocompl += plen; 760 if (toep->tx_credits <= toep->tx_total * 3 / 8 && 761 toep->tx_nocompl >= toep->tx_total / 4) 762 compl = 1; 763 764 if (compl || toep->ulp_mode == ULP_MODE_RDMA) { 765 txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 766 toep->tx_nocompl = 0; 767 toep->plen_nocompl = 0; 768 } 769 770 tp->snd_nxt += plen; 771 tp->snd_max += plen; 772 773 SOCKBUF_LOCK(sb); 774 KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); 775 sb->sb_sndptr = sb_sndptr; 776 SOCKBUF_UNLOCK(sb); 777 778 toep->flags |= TPF_TX_DATA_SENT; 779 if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 780 toep->flags |= TPF_TX_SUSPENDED; 781 782 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 783 txsd->plen = plen; 784 txsd->tx_credits = credits; 785 txsd++; 786 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 787 toep->txsd_pidx = 0; 788 txsd = &toep->txsd[0]; 789 } 790 toep->txsd_avail--; 791 792 t4_l2t_send(sc, wr, toep->l2te); 793 } while (m != NULL); 794 795 /* Send a FIN if requested, but only if there's no more data to send */ 796 if (m == NULL && toep->flags & TPF_SEND_FIN) 797 close_conn(sc, toep); 798} 799 800static inline void 801rqdrop_locked(struct mbufq *q, int plen) 802{ 803 struct mbuf *m; 804 805 while (plen > 0) { 806 m = mbufq_dequeue(q); 807 808 /* Too many credits. */ 809 MPASS(m != NULL); 810 M_ASSERTPKTHDR(m); 811 812 /* Partial credits. */ 813 MPASS(plen >= m->m_pkthdr.len); 814 815 plen -= m->m_pkthdr.len; 816 m_freem(m); 817 } 818} 819 820void 821t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) 822{ 823 struct mbuf *sndptr, *m; 824 struct fw_ofld_tx_data_wr *txwr; 825 struct wrqe *wr; 826 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 827 u_int adjusted_plen, ulp_submode; 828 struct inpcb *inp = toep->inp; 829 struct tcpcb *tp = intotcpcb(inp); 830 int tx_credits, shove; 831 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 832 struct mbufq *pduq = &toep->ulp_pduq; 833 static const u_int ulp_extra_len[] = {0, 4, 4, 8}; 834 835 INP_WLOCK_ASSERT(inp); 836 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 837 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 838 KASSERT(toep->ulp_mode == ULP_MODE_ISCSI, 839 ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 840 841 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) 842 return; 843 844 /* 845 * This function doesn't resume by itself. Someone else must clear the 846 * flag and call this function. 847 */ 848 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 849 KASSERT(drop == 0, 850 ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 851 return; 852 } 853 854 if (drop) 855 rqdrop_locked(&toep->ulp_pdu_reclaimq, drop); 856 857 while ((sndptr = mbufq_first(pduq)) != NULL) { 858 M_ASSERTPKTHDR(sndptr); 859 860 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 861 max_imm = max_imm_payload(tx_credits); 862 max_nsegs = max_dsgl_nsegs(tx_credits); 863 864 plen = 0; 865 nsegs = 0; 866 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 867 for (m = sndptr; m != NULL; m = m->m_next) { 868 int n = sglist_count(mtod(m, void *), m->m_len); 869 870 nsegs += n; 871 plen += m->m_len; 872 873 /* 874 * This mbuf would send us _over_ the nsegs limit. 875 * Suspend tx because the PDU can't be sent out. 876 */ 877 if (plen > max_imm && nsegs > max_nsegs) { 878 toep->flags |= TPF_TX_SUSPENDED; 879 return; 880 } 881 882 if (max_nsegs_1mbuf < n) 883 max_nsegs_1mbuf = n; 884 } 885 886 if (__predict_false(toep->flags & TPF_FIN_SENT)) 887 panic("%s: excess tx.", __func__); 888 889 /* 890 * We have a PDU to send. All of it goes out in one WR so 'm' 891 * is NULL. A PDU's length is always a multiple of 4. 892 */ 893 MPASS(m == NULL); 894 MPASS((plen & 3) == 0); 895 MPASS(sndptr->m_pkthdr.len == plen); 896 897 shove = !(tp->t_flags & TF_MORETOCOME); 898 ulp_submode = mbuf_ulp_submode(sndptr); 899 MPASS(ulp_submode < nitems(ulp_extra_len)); 900 901 /* 902 * plen doesn't include header and data digests, which are 903 * generated and inserted in the right places by the TOE, but 904 * they do occupy TCP sequence space and need to be accounted 905 * for. 906 */ 907 adjusted_plen = plen + ulp_extra_len[ulp_submode]; 908 if (plen <= max_imm) { 909 910 /* Immediate data tx */ 911 912 wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 913 toep->ofld_txq); 914 if (wr == NULL) { 915 /* XXX: how will we recover from this? */ 916 toep->flags |= TPF_TX_SUSPENDED; 917 return; 918 } 919 txwr = wrtod(wr); 920 credits = howmany(wr->wr_len, 16); 921 write_tx_wr(txwr, toep, plen, adjusted_plen, credits, 922 shove, ulp_submode, sc->tt.tx_align); 923 m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 924 nsegs = 0; 925 } else { 926 int wr_len; 927 928 /* DSGL tx */ 929 wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 930 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 931 wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 932 if (wr == NULL) { 933 /* XXX: how will we recover from this? */ 934 toep->flags |= TPF_TX_SUSPENDED; 935 return; 936 } 937 txwr = wrtod(wr); 938 credits = howmany(wr_len, 16); 939 write_tx_wr(txwr, toep, 0, adjusted_plen, credits, 940 shove, ulp_submode, sc->tt.tx_align); 941 write_tx_sgl(txwr + 1, sndptr, m, nsegs, 942 max_nsegs_1mbuf); 943 if (wr_len & 0xf) { 944 uint64_t *pad = (uint64_t *) 945 ((uintptr_t)txwr + wr_len); 946 *pad = 0; 947 } 948 } 949 950 KASSERT(toep->tx_credits >= credits, 951 ("%s: not enough credits", __func__)); 952 953 m = mbufq_dequeue(pduq); 954 MPASS(m == sndptr); 955 mbufq_enqueue(&toep->ulp_pdu_reclaimq, m); 956 957 toep->tx_credits -= credits; 958 toep->tx_nocompl += credits; 959 toep->plen_nocompl += plen; 960 if (toep->tx_credits <= toep->tx_total * 3 / 8 && 961 toep->tx_nocompl >= toep->tx_total / 4) { 962 txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 963 toep->tx_nocompl = 0; 964 toep->plen_nocompl = 0; 965 } 966 967 tp->snd_nxt += adjusted_plen; 968 tp->snd_max += adjusted_plen; 969 970 toep->flags |= TPF_TX_DATA_SENT; 971 if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 972 toep->flags |= TPF_TX_SUSPENDED; 973 974 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 975 txsd->plen = plen; 976 txsd->tx_credits = credits; 977 txsd++; 978 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 979 toep->txsd_pidx = 0; 980 txsd = &toep->txsd[0]; 981 } 982 toep->txsd_avail--; 983 984 t4_l2t_send(sc, wr, toep->l2te); 985 } 986 987 /* Send a FIN if requested, but only if there are no more PDUs to send */ 988 if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN) 989 close_conn(sc, toep); 990} 991 992int 993t4_tod_output(struct toedev *tod, struct tcpcb *tp) 994{ 995 struct adapter *sc = tod->tod_softc; 996#ifdef INVARIANTS 997 struct inpcb *inp = tp->t_inpcb; 998#endif 999 struct toepcb *toep = tp->t_toe; 1000 1001 INP_WLOCK_ASSERT(inp); 1002 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1003 ("%s: inp %p dropped.", __func__, inp)); 1004 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1005 1006 if (toep->ulp_mode == ULP_MODE_ISCSI) 1007 t4_push_pdus(sc, toep, 0); 1008 else 1009 t4_push_frames(sc, toep, 0); 1010 1011 return (0); 1012} 1013 1014int 1015t4_send_fin(struct toedev *tod, struct tcpcb *tp) 1016{ 1017 struct adapter *sc = tod->tod_softc; 1018#ifdef INVARIANTS 1019 struct inpcb *inp = tp->t_inpcb; 1020#endif 1021 struct toepcb *toep = tp->t_toe; 1022 1023 INP_WLOCK_ASSERT(inp); 1024 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1025 ("%s: inp %p dropped.", __func__, inp)); 1026 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1027 1028 toep->flags |= TPF_SEND_FIN; 1029 if (tp->t_state >= TCPS_ESTABLISHED) { 1030 if (toep->ulp_mode == ULP_MODE_ISCSI) 1031 t4_push_pdus(sc, toep, 0); 1032 else 1033 t4_push_frames(sc, toep, 0); 1034 } 1035 1036 return (0); 1037} 1038 1039int 1040t4_send_rst(struct toedev *tod, struct tcpcb *tp) 1041{ 1042 struct adapter *sc = tod->tod_softc; 1043#if defined(INVARIANTS) 1044 struct inpcb *inp = tp->t_inpcb; 1045#endif 1046 struct toepcb *toep = tp->t_toe; 1047 1048 INP_WLOCK_ASSERT(inp); 1049 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1050 ("%s: inp %p dropped.", __func__, inp)); 1051 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1052 1053 /* hmmmm */ 1054 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 1055 ("%s: flowc for tid %u [%s] not sent already", 1056 __func__, toep->tid, tcpstates[tp->t_state])); 1057 1058 send_reset(sc, toep, 0); 1059 return (0); 1060} 1061 1062/* 1063 * Peer has sent us a FIN. 1064 */ 1065static int 1066do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1067{ 1068 struct adapter *sc = iq->adapter; 1069 const struct cpl_peer_close *cpl = (const void *)(rss + 1); 1070 unsigned int tid = GET_TID(cpl); 1071 struct toepcb *toep = lookup_tid(sc, tid); 1072 struct inpcb *inp = toep->inp; 1073 struct tcpcb *tp = NULL; 1074 struct socket *so; 1075 struct sockbuf *sb; 1076#ifdef INVARIANTS 1077 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1078#endif 1079 1080 KASSERT(opcode == CPL_PEER_CLOSE, 1081 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1082 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1083 1084 if (__predict_false(toep->flags & TPF_SYNQE)) { 1085#ifdef INVARIANTS 1086 struct synq_entry *synqe = (void *)toep; 1087 1088 INP_WLOCK(synqe->lctx->inp); 1089 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1090 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1091 ("%s: listen socket closed but tid %u not aborted.", 1092 __func__, tid)); 1093 } else { 1094 /* 1095 * do_pass_accept_req is still running and will 1096 * eventually take care of this tid. 1097 */ 1098 } 1099 INP_WUNLOCK(synqe->lctx->inp); 1100#endif 1101 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1102 toep, toep->flags); 1103 return (0); 1104 } 1105 1106 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1107 1108 CURVNET_SET(toep->vnet); 1109 INP_INFO_RLOCK(&V_tcbinfo); 1110 INP_WLOCK(inp); 1111 tp = intotcpcb(inp); 1112 1113 CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, 1114 tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); 1115 1116 if (toep->flags & TPF_ABORT_SHUTDOWN) 1117 goto done; 1118 1119 tp->rcv_nxt++; /* FIN */ 1120 1121 so = inp->inp_socket; 1122 sb = &so->so_rcv; 1123 SOCKBUF_LOCK(sb); 1124 if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) { 1125 handle_ddp_close(toep, tp, sb, cpl->rcv_nxt); 1126 } 1127 socantrcvmore_locked(so); /* unlocks the sockbuf */ 1128 1129 if (toep->ulp_mode != ULP_MODE_RDMA) { 1130 KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), 1131 ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, 1132 be32toh(cpl->rcv_nxt))); 1133 } 1134 1135 switch (tp->t_state) { 1136 case TCPS_SYN_RECEIVED: 1137 tp->t_starttime = ticks; 1138 /* FALLTHROUGH */ 1139 1140 case TCPS_ESTABLISHED: 1141 tcp_state_change(tp, TCPS_CLOSE_WAIT); 1142 break; 1143 1144 case TCPS_FIN_WAIT_1: 1145 tcp_state_change(tp, TCPS_CLOSING); 1146 break; 1147 1148 case TCPS_FIN_WAIT_2: 1149 tcp_twstart(tp); 1150 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1151 INP_INFO_RUNLOCK(&V_tcbinfo); 1152 CURVNET_RESTORE(); 1153 1154 INP_WLOCK(inp); 1155 final_cpl_received(toep); 1156 return (0); 1157 1158 default: 1159 log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", 1160 __func__, tid, tp->t_state); 1161 } 1162done: 1163 INP_WUNLOCK(inp); 1164 INP_INFO_RUNLOCK(&V_tcbinfo); 1165 CURVNET_RESTORE(); 1166 return (0); 1167} 1168 1169/* 1170 * Peer has ACK'd our FIN. 1171 */ 1172static int 1173do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, 1174 struct mbuf *m) 1175{ 1176 struct adapter *sc = iq->adapter; 1177 const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); 1178 unsigned int tid = GET_TID(cpl); 1179 struct toepcb *toep = lookup_tid(sc, tid); 1180 struct inpcb *inp = toep->inp; 1181 struct tcpcb *tp = NULL; 1182 struct socket *so = NULL; 1183#ifdef INVARIANTS 1184 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1185#endif 1186 1187 KASSERT(opcode == CPL_CLOSE_CON_RPL, 1188 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1189 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1190 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1191 1192 CURVNET_SET(toep->vnet); 1193 INP_INFO_RLOCK(&V_tcbinfo); 1194 INP_WLOCK(inp); 1195 tp = intotcpcb(inp); 1196 1197 CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", 1198 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); 1199 1200 if (toep->flags & TPF_ABORT_SHUTDOWN) 1201 goto done; 1202 1203 so = inp->inp_socket; 1204 tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ 1205 1206 switch (tp->t_state) { 1207 case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ 1208 tcp_twstart(tp); 1209release: 1210 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1211 INP_INFO_RUNLOCK(&V_tcbinfo); 1212 CURVNET_RESTORE(); 1213 1214 INP_WLOCK(inp); 1215 final_cpl_received(toep); /* no more CPLs expected */ 1216 1217 return (0); 1218 case TCPS_LAST_ACK: 1219 if (tcp_close(tp)) 1220 INP_WUNLOCK(inp); 1221 goto release; 1222 1223 case TCPS_FIN_WAIT_1: 1224 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) 1225 soisdisconnected(so); 1226 tcp_state_change(tp, TCPS_FIN_WAIT_2); 1227 break; 1228 1229 default: 1230 log(LOG_ERR, 1231 "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", 1232 __func__, tid, tcpstates[tp->t_state]); 1233 } 1234done: 1235 INP_WUNLOCK(inp); 1236 INP_INFO_RUNLOCK(&V_tcbinfo); 1237 CURVNET_RESTORE(); 1238 return (0); 1239} 1240 1241void 1242send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid, 1243 int rst_status) 1244{ 1245 struct wrqe *wr; 1246 struct cpl_abort_rpl *cpl; 1247 1248 wr = alloc_wrqe(sizeof(*cpl), ofld_txq); 1249 if (wr == NULL) { 1250 /* XXX */ 1251 panic("%s: allocation failure.", __func__); 1252 } 1253 cpl = wrtod(wr); 1254 1255 INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); 1256 cpl->cmd = rst_status; 1257 1258 t4_wrq_tx(sc, wr); 1259} 1260 1261static int 1262abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) 1263{ 1264 switch (abort_reason) { 1265 case CPL_ERR_BAD_SYN: 1266 case CPL_ERR_CONN_RESET: 1267 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 1268 case CPL_ERR_XMIT_TIMEDOUT: 1269 case CPL_ERR_PERSIST_TIMEDOUT: 1270 case CPL_ERR_FINWAIT2_TIMEDOUT: 1271 case CPL_ERR_KEEPALIVE_TIMEDOUT: 1272 return (ETIMEDOUT); 1273 default: 1274 return (EIO); 1275 } 1276} 1277 1278/* 1279 * TCP RST from the peer, timeout, or some other such critical error. 1280 */ 1281static int 1282do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1283{ 1284 struct adapter *sc = iq->adapter; 1285 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); 1286 unsigned int tid = GET_TID(cpl); 1287 struct toepcb *toep = lookup_tid(sc, tid); 1288 struct sge_wrq *ofld_txq = toep->ofld_txq; 1289 struct inpcb *inp; 1290 struct tcpcb *tp; 1291#ifdef INVARIANTS 1292 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1293#endif 1294 1295 KASSERT(opcode == CPL_ABORT_REQ_RSS, 1296 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1297 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1298 1299 if (toep->flags & TPF_SYNQE) 1300 return (do_abort_req_synqe(iq, rss, m)); 1301 1302 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1303 1304 if (negative_advice(cpl->status)) { 1305 CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", 1306 __func__, cpl->status, tid, toep->flags); 1307 return (0); /* Ignore negative advice */ 1308 } 1309 1310 inp = toep->inp; 1311 CURVNET_SET(toep->vnet); 1312 INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */ 1313 INP_WLOCK(inp); 1314 1315 tp = intotcpcb(inp); 1316 1317 CTR6(KTR_CXGBE, 1318 "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", 1319 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, 1320 inp->inp_flags, cpl->status); 1321 1322 /* 1323 * If we'd initiated an abort earlier the reply to it is responsible for 1324 * cleaning up resources. Otherwise we tear everything down right here 1325 * right now. We owe the T4 a CPL_ABORT_RPL no matter what. 1326 */ 1327 if (toep->flags & TPF_ABORT_SHUTDOWN) { 1328 INP_WUNLOCK(inp); 1329 goto done; 1330 } 1331 toep->flags |= TPF_ABORT_SHUTDOWN; 1332 1333 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 1334 struct socket *so = inp->inp_socket; 1335 1336 if (so != NULL) 1337 so_error_set(so, abort_status_to_errno(tp, 1338 cpl->status)); 1339 tp = tcp_close(tp); 1340 if (tp == NULL) 1341 INP_WLOCK(inp); /* re-acquire */ 1342 } 1343 1344 final_cpl_received(toep); 1345done: 1346 INP_INFO_RUNLOCK(&V_tcbinfo); 1347 CURVNET_RESTORE(); 1348 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); 1349 return (0); 1350} 1351 1352/* 1353 * Reply to the CPL_ABORT_REQ (send_reset) 1354 */ 1355static int 1356do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1357{ 1358 struct adapter *sc = iq->adapter; 1359 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); 1360 unsigned int tid = GET_TID(cpl); 1361 struct toepcb *toep = lookup_tid(sc, tid); 1362 struct inpcb *inp = toep->inp; 1363#ifdef INVARIANTS 1364 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1365#endif 1366 1367 KASSERT(opcode == CPL_ABORT_RPL_RSS, 1368 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1369 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1370 1371 if (toep->flags & TPF_SYNQE) 1372 return (do_abort_rpl_synqe(iq, rss, m)); 1373 1374 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1375 1376 CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", 1377 __func__, tid, toep, inp, cpl->status); 1378 1379 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1380 ("%s: wasn't expecting abort reply", __func__)); 1381 1382 INP_WLOCK(inp); 1383 final_cpl_received(toep); 1384 1385 return (0); 1386} 1387 1388static int 1389do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1390{ 1391 struct adapter *sc = iq->adapter; 1392 const struct cpl_rx_data *cpl = mtod(m, const void *); 1393 unsigned int tid = GET_TID(cpl); 1394 struct toepcb *toep = lookup_tid(sc, tid); 1395 struct inpcb *inp = toep->inp; 1396 struct tcpcb *tp; 1397 struct socket *so; 1398 struct sockbuf *sb; 1399 int len; 1400 uint32_t ddp_placed = 0; 1401 1402 if (__predict_false(toep->flags & TPF_SYNQE)) { 1403#ifdef INVARIANTS 1404 struct synq_entry *synqe = (void *)toep; 1405 1406 INP_WLOCK(synqe->lctx->inp); 1407 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1408 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1409 ("%s: listen socket closed but tid %u not aborted.", 1410 __func__, tid)); 1411 } else { 1412 /* 1413 * do_pass_accept_req is still running and will 1414 * eventually take care of this tid. 1415 */ 1416 } 1417 INP_WUNLOCK(synqe->lctx->inp); 1418#endif 1419 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1420 toep, toep->flags); 1421 m_freem(m); 1422 return (0); 1423 } 1424 1425 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1426 1427 /* strip off CPL header */ 1428 m_adj(m, sizeof(*cpl)); 1429 len = m->m_pkthdr.len; 1430 1431 INP_WLOCK(inp); 1432 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { 1433 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", 1434 __func__, tid, len, inp->inp_flags); 1435 INP_WUNLOCK(inp); 1436 m_freem(m); 1437 return (0); 1438 } 1439 1440 tp = intotcpcb(inp); 1441 1442 if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) 1443 ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; 1444 1445 tp->rcv_nxt += len; 1446 if (tp->rcv_wnd < len) { 1447 KASSERT(toep->ulp_mode == ULP_MODE_RDMA, 1448 ("%s: negative window size", __func__)); 1449 } 1450 1451 tp->rcv_wnd -= len; 1452 tp->t_rcvtime = ticks; 1453 1454 so = inp_inpcbtosocket(inp); 1455 sb = &so->so_rcv; 1456 SOCKBUF_LOCK(sb); 1457 1458 if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { 1459 CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", 1460 __func__, tid, len); 1461 m_freem(m); 1462 SOCKBUF_UNLOCK(sb); 1463 INP_WUNLOCK(inp); 1464 1465 CURVNET_SET(toep->vnet); 1466 INP_INFO_RLOCK(&V_tcbinfo); 1467 INP_WLOCK(inp); 1468 tp = tcp_drop(tp, ECONNRESET); 1469 if (tp) 1470 INP_WUNLOCK(inp); 1471 INP_INFO_RUNLOCK(&V_tcbinfo); 1472 CURVNET_RESTORE(); 1473 1474 return (0); 1475 } 1476 1477 /* receive buffer autosize */ 1478 MPASS(toep->vnet == so->so_vnet); 1479 CURVNET_SET(toep->vnet); 1480 if (sb->sb_flags & SB_AUTOSIZE && 1481 V_tcp_do_autorcvbuf && 1482 sb->sb_hiwat < V_tcp_autorcvbuf_max && 1483 len > (sbspace(sb) / 8 * 7)) { 1484 unsigned int hiwat = sb->sb_hiwat; 1485 unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, 1486 V_tcp_autorcvbuf_max); 1487 1488 if (!sbreserve_locked(sb, newsize, so, NULL)) 1489 sb->sb_flags &= ~SB_AUTOSIZE; 1490 else 1491 toep->rx_credits += newsize - hiwat; 1492 } 1493 1494 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1495 int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off; 1496 1497 if (changed) { 1498 if (toep->ddp_flags & DDP_SC_REQ) 1499 toep->ddp_flags ^= DDP_ON | DDP_SC_REQ; 1500 else { 1501 KASSERT(cpl->ddp_off == 1, 1502 ("%s: DDP switched on by itself.", 1503 __func__)); 1504 1505 /* Fell out of DDP mode */ 1506 toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE | 1507 DDP_BUF1_ACTIVE); 1508 1509 if (ddp_placed) 1510 insert_ddp_data(toep, ddp_placed); 1511 } 1512 } 1513 1514 if ((toep->ddp_flags & DDP_OK) == 0 && 1515 time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) { 1516 toep->ddp_score = DDP_LOW_SCORE; 1517 toep->ddp_flags |= DDP_OK; 1518 CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u", 1519 __func__, tid, time_uptime); 1520 } 1521 1522 if (toep->ddp_flags & DDP_ON) { 1523 1524 /* 1525 * CPL_RX_DATA with DDP on can only be an indicate. Ask 1526 * soreceive to post a buffer or disable DDP. The 1527 * payload that arrived in this indicate is appended to 1528 * the socket buffer as usual. 1529 */ 1530 1531#if 0 1532 CTR5(KTR_CXGBE, 1533 "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)", 1534 __func__, tid, toep->flags, be32toh(cpl->seq), len); 1535#endif 1536 sb->sb_flags |= SB_DDP_INDICATE; 1537 } else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK && 1538 tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) { 1539 1540 /* 1541 * DDP allowed but isn't on (and a request to switch it 1542 * on isn't pending either), and conditions are ripe for 1543 * it to work. Switch it on. 1544 */ 1545 1546 enable_ddp(sc, toep); 1547 } 1548 } 1549 1550 KASSERT(toep->sb_cc >= sb->sb_cc, 1551 ("%s: sb %p has more data (%d) than last time (%d).", 1552 __func__, sb, sb->sb_cc, toep->sb_cc)); 1553 toep->rx_credits += toep->sb_cc - sb->sb_cc; 1554 sbappendstream_locked(sb, m); 1555 toep->sb_cc = sb->sb_cc; 1556 if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) { 1557 int credits; 1558 1559 credits = send_rx_credits(sc, toep, toep->rx_credits); 1560 toep->rx_credits -= credits; 1561 tp->rcv_wnd += credits; 1562 tp->rcv_adv += credits; 1563 } 1564 sorwakeup_locked(so); 1565 SOCKBUF_UNLOCK_ASSERT(sb); 1566 1567 INP_WUNLOCK(inp); 1568 CURVNET_RESTORE(); 1569 return (0); 1570} 1571 1572#define S_CPL_FW4_ACK_OPCODE 24 1573#define M_CPL_FW4_ACK_OPCODE 0xff 1574#define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE) 1575#define G_CPL_FW4_ACK_OPCODE(x) \ 1576 (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE) 1577 1578#define S_CPL_FW4_ACK_FLOWID 0 1579#define M_CPL_FW4_ACK_FLOWID 0xffffff 1580#define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID) 1581#define G_CPL_FW4_ACK_FLOWID(x) \ 1582 (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID) 1583 1584#define S_CPL_FW4_ACK_CR 24 1585#define M_CPL_FW4_ACK_CR 0xff 1586#define V_CPL_FW4_ACK_CR(x) ((x) << S_CPL_FW4_ACK_CR) 1587#define G_CPL_FW4_ACK_CR(x) (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR) 1588 1589#define S_CPL_FW4_ACK_SEQVAL 0 1590#define M_CPL_FW4_ACK_SEQVAL 0x1 1591#define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL) 1592#define G_CPL_FW4_ACK_SEQVAL(x) \ 1593 (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL) 1594#define F_CPL_FW4_ACK_SEQVAL V_CPL_FW4_ACK_SEQVAL(1U) 1595 1596static int 1597do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1598{ 1599 struct adapter *sc = iq->adapter; 1600 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 1601 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 1602 struct toepcb *toep = lookup_tid(sc, tid); 1603 struct inpcb *inp; 1604 struct tcpcb *tp; 1605 struct socket *so; 1606 uint8_t credits = cpl->credits; 1607 struct ofld_tx_sdesc *txsd; 1608 int plen; 1609#ifdef INVARIANTS 1610 unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); 1611#endif 1612 1613 /* 1614 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and 1615 * now this comes back carrying the credits for the flowc. 1616 */ 1617 if (__predict_false(toep->flags & TPF_SYNQE)) { 1618 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1619 ("%s: credits for a synq entry %p", __func__, toep)); 1620 return (0); 1621 } 1622 1623 inp = toep->inp; 1624 1625 KASSERT(opcode == CPL_FW4_ACK, 1626 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1627 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1628 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1629 1630 INP_WLOCK(inp); 1631 1632 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) { 1633 INP_WUNLOCK(inp); 1634 return (0); 1635 } 1636 1637 KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, 1638 ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); 1639 1640 tp = intotcpcb(inp); 1641 1642 if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { 1643 tcp_seq snd_una = be32toh(cpl->snd_una); 1644 1645#ifdef INVARIANTS 1646 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 1647 log(LOG_ERR, 1648 "%s: unexpected seq# %x for TID %u, snd_una %x\n", 1649 __func__, snd_una, toep->tid, tp->snd_una); 1650 } 1651#endif 1652 1653 if (tp->snd_una != snd_una) { 1654 tp->snd_una = snd_una; 1655 tp->ts_recent_age = tcp_ts_getticks(); 1656 } 1657 } 1658 1659 so = inp->inp_socket; 1660 txsd = &toep->txsd[toep->txsd_cidx]; 1661 plen = 0; 1662 while (credits) { 1663 KASSERT(credits >= txsd->tx_credits, 1664 ("%s: too many (or partial) credits", __func__)); 1665 credits -= txsd->tx_credits; 1666 toep->tx_credits += txsd->tx_credits; 1667 plen += txsd->plen; 1668 txsd++; 1669 toep->txsd_avail++; 1670 KASSERT(toep->txsd_avail <= toep->txsd_total, 1671 ("%s: txsd avail > total", __func__)); 1672 if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { 1673 txsd = &toep->txsd[0]; 1674 toep->txsd_cidx = 0; 1675 } 1676 } 1677 1678 if (toep->tx_credits == toep->tx_total) { 1679 toep->tx_nocompl = 0; 1680 toep->plen_nocompl = 0; 1681 } 1682 1683 if (toep->flags & TPF_TX_SUSPENDED && 1684 toep->tx_credits >= toep->tx_total / 4) { 1685 toep->flags &= ~TPF_TX_SUSPENDED; 1686 CURVNET_SET(toep->vnet); 1687 if (toep->ulp_mode == ULP_MODE_ISCSI) 1688 t4_push_pdus(sc, toep, plen); 1689 else 1690 t4_push_frames(sc, toep, plen); 1691 CURVNET_RESTORE(); 1692 } else if (plen > 0) { 1693 struct sockbuf *sb = &so->so_snd; 1694 int sbu; 1695 1696 SOCKBUF_LOCK(sb); 1697 sbu = sb->sb_cc; 1698 if (toep->ulp_mode == ULP_MODE_ISCSI) { 1699 1700 if (__predict_false(sbu > 0)) { 1701 /* 1702 * The data trasmitted before the tid's ULP mode 1703 * changed to ISCSI is still in so_snd. 1704 * Incoming credits should account for so_snd 1705 * first. 1706 */ 1707 sbdrop_locked(sb, min(sbu, plen)); 1708 plen -= min(sbu, plen); 1709 } 1710 sowwakeup_locked(so); /* unlocks so_snd */ 1711 rqdrop_locked(&toep->ulp_pdu_reclaimq, plen); 1712 } else { 1713 sbdrop_locked(sb, plen); 1714 sowwakeup_locked(so); /* unlocks so_snd */ 1715 } 1716 SOCKBUF_UNLOCK_ASSERT(sb); 1717 } 1718 1719 INP_WUNLOCK(inp); 1720 1721 return (0); 1722} 1723 1724int 1725do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1726{ 1727#ifdef INVARIANTS 1728 struct adapter *sc = iq->adapter; 1729#endif 1730 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 1731 unsigned int tid = GET_TID(cpl); 1732#ifdef INVARIANTS 1733 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1734#endif 1735 1736 KASSERT(opcode == CPL_SET_TCB_RPL, 1737 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1738 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1739 MPASS(iq != &sc->sge.fwq); 1740 1741 /* 1742 * TOM and/or other ULPs don't request replies for CPL_SET_TCB or 1743 * CPL_SET_TCB_FIELD requests. This can easily change and when it does 1744 * the dispatch code will go here. 1745 */ 1746#ifdef INVARIANTS 1747 panic("%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p", __func__, 1748 tid, iq); 1749#else 1750 log(LOG_ERR, "%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p\n", 1751 __func__, tid, iq); 1752#endif 1753 1754 return (0); 1755} 1756 1757void 1758t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, int tid, 1759 uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie, int iqid) 1760{ 1761 struct wrqe *wr; 1762 struct cpl_set_tcb_field *req; 1763 1764 MPASS((cookie & ~M_COOKIE) == 0); 1765 MPASS((iqid & ~M_QUEUENO) == 0); 1766 1767 wr = alloc_wrqe(sizeof(*req), wrq); 1768 if (wr == NULL) { 1769 /* XXX */ 1770 panic("%s: allocation failure.", __func__); 1771 } 1772 req = wrtod(wr); 1773 1774 INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, tid); 1775 req->reply_ctrl = htobe16(V_QUEUENO(iqid)); 1776 if (reply == 0) 1777 req->reply_ctrl |= htobe16(F_NO_REPLY); 1778 req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie)); 1779 req->mask = htobe64(mask); 1780 req->val = htobe64(val); 1781 1782 t4_wrq_tx(sc, wr); 1783} 1784 1785void 1786t4_init_cpl_io_handlers(void) 1787{ 1788 1789 t4_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 1790 t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 1791 t4_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 1792 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 1793 t4_register_cpl_handler(CPL_RX_DATA, do_rx_data); 1794 t4_register_cpl_handler(CPL_FW4_ACK, do_fw4_ack); 1795} 1796 1797void 1798t4_uninit_cpl_io_handlers(void) 1799{ 1800 1801 t4_register_cpl_handler(CPL_PEER_CLOSE, NULL); 1802 t4_register_cpl_handler(CPL_CLOSE_CON_RPL, NULL); 1803 t4_register_cpl_handler(CPL_ABORT_REQ_RSS, NULL); 1804 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, NULL); 1805 t4_register_cpl_handler(CPL_RX_DATA, NULL); 1806 t4_register_cpl_handler(CPL_FW4_ACK, NULL); 1807} 1808#endif 1809