t4_cpl_io.c revision 346852
1/*- 2 * Copyright (c) 2012, 2015 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c 346852 2019-04-28 18:50:25Z np $"); 30 31#include "opt_inet.h" 32#include "opt_inet6.h" 33 34#ifdef TCP_OFFLOAD 35#include <sys/param.h> 36#include <sys/aio.h> 37#include <sys/file.h> 38#include <sys/kernel.h> 39#include <sys/ktr.h> 40#include <sys/module.h> 41#include <sys/proc.h> 42#include <sys/protosw.h> 43#include <sys/domain.h> 44#include <sys/socket.h> 45#include <sys/socketvar.h> 46#include <sys/sglist.h> 47#include <sys/taskqueue.h> 48#include <netinet/in.h> 49#include <netinet/in_pcb.h> 50#include <netinet/ip.h> 51#include <netinet/ip6.h> 52#define TCPSTATES 53#include <netinet/tcp_fsm.h> 54#include <netinet/tcp_seq.h> 55#include <netinet/tcp_var.h> 56#include <netinet/toecore.h> 57 58#include <security/mac/mac_framework.h> 59 60#include <vm/vm.h> 61#include <vm/vm_extern.h> 62#include <vm/pmap.h> 63#include <vm/vm_map.h> 64#include <vm/vm_page.h> 65 66#include "common/common.h" 67#include "common/t4_msg.h" 68#include "common/t4_regs.h" 69#include "common/t4_tcb.h" 70#include "tom/t4_tom_l2t.h" 71#include "tom/t4_tom.h" 72 73VNET_DECLARE(int, tcp_do_autosndbuf); 74#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) 75VNET_DECLARE(int, tcp_autosndbuf_inc); 76#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) 77VNET_DECLARE(int, tcp_autosndbuf_max); 78#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) 79VNET_DECLARE(int, tcp_do_autorcvbuf); 80#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) 81VNET_DECLARE(int, tcp_autorcvbuf_inc); 82#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) 83VNET_DECLARE(int, tcp_autorcvbuf_max); 84#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) 85 86static void t4_aiotx_cancel(struct kaiocb *job); 87static void t4_aiotx_queue_toep(struct toepcb *toep); 88 89static size_t 90aiotx_mbuf_pgoff(struct mbuf *m) 91{ 92 struct aiotx_buffer *ab; 93 94 MPASS(IS_AIOTX_MBUF(m)); 95 ab = m->m_ext.ext_arg1; 96 return ((ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) % PAGE_SIZE); 97} 98 99static vm_page_t * 100aiotx_mbuf_pages(struct mbuf *m) 101{ 102 struct aiotx_buffer *ab; 103 int npages; 104 105 MPASS(IS_AIOTX_MBUF(m)); 106 ab = m->m_ext.ext_arg1; 107 npages = (ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) / PAGE_SIZE; 108 return (ab->ps.pages + npages); 109} 110 111void 112send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) 113{ 114 struct wrqe *wr; 115 struct fw_flowc_wr *flowc; 116 unsigned int nparams, flowclen, paramidx; 117 struct vi_info *vi = toep->vi; 118 struct port_info *pi = vi->pi; 119 struct adapter *sc = pi->adapter; 120 unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; 121 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 122 123 KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), 124 ("%s: flowc for tid %u sent already", __func__, toep->tid)); 125 126 if (ftxp != NULL) 127 nparams = 8; 128 else 129 nparams = 6; 130 if (toep->ulp_mode == ULP_MODE_TLS) 131 nparams++; 132 if (toep->tls.fcplenmax != 0) 133 nparams++; 134 if (toep->tc_idx != -1) { 135 MPASS(toep->tc_idx >= 0 && 136 toep->tc_idx < sc->chip_params->nsched_cls); 137 nparams++; 138 } 139 140 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 141 142 wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); 143 if (wr == NULL) { 144 /* XXX */ 145 panic("%s: allocation failure.", __func__); 146 } 147 flowc = wrtod(wr); 148 memset(flowc, 0, wr->wr_len); 149 150 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 151 V_FW_FLOWC_WR_NPARAMS(nparams)); 152 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 153 V_FW_WR_FLOWID(toep->tid)); 154 155#define FLOWC_PARAM(__m, __v) \ 156 do { \ 157 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \ 158 flowc->mnemval[paramidx].val = htobe32(__v); \ 159 paramidx++; \ 160 } while (0) 161 162 paramidx = 0; 163 164 FLOWC_PARAM(PFNVFN, pfvf); 165 FLOWC_PARAM(CH, pi->tx_chan); 166 FLOWC_PARAM(PORT, pi->tx_chan); 167 FLOWC_PARAM(IQID, toep->ofld_rxq->iq.abs_id); 168 if (ftxp) { 169 uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); 170 171 FLOWC_PARAM(SNDNXT, ftxp->snd_nxt); 172 FLOWC_PARAM(RCVNXT, ftxp->rcv_nxt); 173 FLOWC_PARAM(SNDBUF, sndbuf); 174 FLOWC_PARAM(MSS, ftxp->mss); 175 176 CTR6(KTR_CXGBE, 177 "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", 178 __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt, 179 ftxp->rcv_nxt); 180 } else { 181 FLOWC_PARAM(SNDBUF, 512); 182 FLOWC_PARAM(MSS, 512); 183 184 CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); 185 } 186 if (toep->ulp_mode == ULP_MODE_TLS) 187 FLOWC_PARAM(ULP_MODE, toep->ulp_mode); 188 if (toep->tls.fcplenmax != 0) 189 FLOWC_PARAM(TXDATAPLEN_MAX, toep->tls.fcplenmax); 190 if (toep->tc_idx != -1) 191 FLOWC_PARAM(SCHEDCLASS, toep->tc_idx); 192#undef FLOWC_PARAM 193 194 KASSERT(paramidx == nparams, ("nparams mismatch")); 195 196 txsd->tx_credits = howmany(flowclen, 16); 197 txsd->plen = 0; 198 KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, 199 ("%s: not enough credits (%d)", __func__, toep->tx_credits)); 200 toep->tx_credits -= txsd->tx_credits; 201 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 202 toep->txsd_pidx = 0; 203 toep->txsd_avail--; 204 205 toep->flags |= TPF_FLOWC_WR_SENT; 206 t4_wrq_tx(sc, wr); 207} 208 209#ifdef RATELIMIT 210/* 211 * Input is Bytes/second (so_max_pacing-rate), chip counts in Kilobits/second. 212 */ 213static int 214update_tx_rate_limit(struct adapter *sc, struct toepcb *toep, u_int Bps) 215{ 216 int tc_idx, rc; 217 const u_int kbps = (u_int) (uint64_t)Bps * 8ULL / 1000; 218 const int port_id = toep->vi->pi->port_id; 219 220 CTR3(KTR_CXGBE, "%s: tid %u, rate %uKbps", __func__, toep->tid, kbps); 221 222 if (kbps == 0) { 223 /* unbind */ 224 tc_idx = -1; 225 } else { 226 rc = t4_reserve_cl_rl_kbps(sc, port_id, kbps, &tc_idx); 227 if (rc != 0) 228 return (rc); 229 MPASS(tc_idx >= 0 && tc_idx < sc->chip_params->nsched_cls); 230 } 231 232 if (toep->tc_idx != tc_idx) { 233 struct wrqe *wr; 234 struct fw_flowc_wr *flowc; 235 int nparams = 1, flowclen, flowclen16; 236 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 237 238 flowclen = sizeof(*flowc) + nparams * sizeof(struct 239 fw_flowc_mnemval); 240 flowclen16 = howmany(flowclen, 16); 241 if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0 || 242 (wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq)) == NULL) { 243 if (tc_idx >= 0) 244 t4_release_cl_rl_kbps(sc, port_id, tc_idx); 245 return (ENOMEM); 246 } 247 248 flowc = wrtod(wr); 249 memset(flowc, 0, wr->wr_len); 250 251 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 252 V_FW_FLOWC_WR_NPARAMS(nparams)); 253 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) | 254 V_FW_WR_FLOWID(toep->tid)); 255 256 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 257 if (tc_idx == -1) 258 flowc->mnemval[0].val = htobe32(0xff); 259 else 260 flowc->mnemval[0].val = htobe32(tc_idx); 261 262 txsd->tx_credits = flowclen16; 263 txsd->plen = 0; 264 toep->tx_credits -= txsd->tx_credits; 265 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 266 toep->txsd_pidx = 0; 267 toep->txsd_avail--; 268 t4_wrq_tx(sc, wr); 269 } 270 271 if (toep->tc_idx >= 0) 272 t4_release_cl_rl_kbps(sc, port_id, toep->tc_idx); 273 toep->tc_idx = tc_idx; 274 275 return (0); 276} 277#endif 278 279void 280send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) 281{ 282 struct wrqe *wr; 283 struct cpl_abort_req *req; 284 int tid = toep->tid; 285 struct inpcb *inp = toep->inp; 286 struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ 287 288 INP_WLOCK_ASSERT(inp); 289 290 CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", 291 __func__, toep->tid, 292 inp->inp_flags & INP_DROPPED ? "inp dropped" : 293 tcpstates[tp->t_state], 294 toep->flags, inp->inp_flags, 295 toep->flags & TPF_ABORT_SHUTDOWN ? 296 " (abort already in progress)" : ""); 297 298 if (toep->flags & TPF_ABORT_SHUTDOWN) 299 return; /* abort already in progress */ 300 301 toep->flags |= TPF_ABORT_SHUTDOWN; 302 303 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 304 ("%s: flowc_wr not sent for tid %d.", __func__, tid)); 305 306 wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 307 if (wr == NULL) { 308 /* XXX */ 309 panic("%s: allocation failure.", __func__); 310 } 311 req = wrtod(wr); 312 313 INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); 314 if (inp->inp_flags & INP_DROPPED) 315 req->rsvd0 = htobe32(snd_nxt); 316 else 317 req->rsvd0 = htobe32(tp->snd_nxt); 318 req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT); 319 req->cmd = CPL_ABORT_SEND_RST; 320 321 /* 322 * XXX: What's the correct way to tell that the inp hasn't been detached 323 * from its socket? Should I even be flushing the snd buffer here? 324 */ 325 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 326 struct socket *so = inp->inp_socket; 327 328 if (so != NULL) /* because I'm not sure. See comment above */ 329 sbflush(&so->so_snd); 330 } 331 332 t4_l2t_send(sc, wr, toep->l2te); 333} 334 335/* 336 * Called when a connection is established to translate the TCP options 337 * reported by HW to FreeBSD's native format. 338 */ 339static void 340assign_rxopt(struct tcpcb *tp, unsigned int opt) 341{ 342 struct toepcb *toep = tp->t_toe; 343 struct inpcb *inp = tp->t_inpcb; 344 struct adapter *sc = td_adapter(toep->td); 345 int n; 346 347 INP_LOCK_ASSERT(inp); 348 349 if (inp->inp_inc.inc_flags & INC_ISIPV6) 350 n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 351 else 352 n = sizeof(struct ip) + sizeof(struct tcphdr); 353 tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; 354 355 if (G_TCPOPT_TSTAMP(opt)) { 356 tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ 357 tp->ts_recent = 0; /* hmmm */ 358 tp->ts_recent_age = tcp_ts_getticks(); 359 tp->t_maxseg -= TCPOLEN_TSTAMP_APPA; 360 } 361 362 CTR5(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u), mss %u", __func__, 363 toep->tid, G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)], 364 tp->t_maxseg); 365 366 if (G_TCPOPT_SACK(opt)) 367 tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ 368 else 369 tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ 370 371 if (G_TCPOPT_WSCALE_OK(opt)) 372 tp->t_flags |= TF_RCVD_SCALE; 373 374 /* Doing window scaling? */ 375 if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == 376 (TF_RCVD_SCALE | TF_REQ_SCALE)) { 377 tp->rcv_scale = tp->request_r_scale; 378 tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); 379 } 380} 381 382/* 383 * Completes some final bits of initialization for just established connections 384 * and changes their state to TCPS_ESTABLISHED. 385 * 386 * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. 387 */ 388void 389make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, 390 uint16_t opt) 391{ 392 struct inpcb *inp = toep->inp; 393 struct socket *so = inp->inp_socket; 394 struct tcpcb *tp = intotcpcb(inp); 395 long bufsize; 396 uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ 397 uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ 398 uint16_t tcpopt = be16toh(opt); 399 struct flowc_tx_params ftxp; 400 401 INP_WLOCK_ASSERT(inp); 402 KASSERT(tp->t_state == TCPS_SYN_SENT || 403 tp->t_state == TCPS_SYN_RECEIVED, 404 ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); 405 406 CTR6(KTR_CXGBE, "%s: tid %d, so %p, inp %p, tp %p, toep %p", 407 __func__, toep->tid, so, inp, tp, toep); 408 409 tcp_state_change(tp, TCPS_ESTABLISHED); 410 tp->t_starttime = ticks; 411 TCPSTAT_INC(tcps_connects); 412 413 tp->irs = irs; 414 tcp_rcvseqinit(tp); 415 tp->rcv_wnd = toep->rx_credits << 10; 416 tp->rcv_adv += tp->rcv_wnd; 417 tp->last_ack_sent = tp->rcv_nxt; 418 419 /* 420 * If we were unable to send all rx credits via opt0, save the remainder 421 * in rx_credits so that they can be handed over with the next credit 422 * update. 423 */ 424 SOCKBUF_LOCK(&so->so_rcv); 425 bufsize = select_rcv_wnd(so); 426 SOCKBUF_UNLOCK(&so->so_rcv); 427 toep->rx_credits = bufsize - tp->rcv_wnd; 428 429 tp->iss = iss; 430 tcp_sendseqinit(tp); 431 tp->snd_una = iss + 1; 432 tp->snd_nxt = iss + 1; 433 tp->snd_max = iss + 1; 434 435 assign_rxopt(tp, tcpopt); 436 437 SOCKBUF_LOCK(&so->so_snd); 438 if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) 439 bufsize = V_tcp_autosndbuf_max; 440 else 441 bufsize = sbspace(&so->so_snd); 442 SOCKBUF_UNLOCK(&so->so_snd); 443 444 ftxp.snd_nxt = tp->snd_nxt; 445 ftxp.rcv_nxt = tp->rcv_nxt; 446 ftxp.snd_space = bufsize; 447 ftxp.mss = tp->t_maxseg; 448 send_flowc_wr(toep, &ftxp); 449 450 soisconnected(so); 451} 452 453int 454send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) 455{ 456 struct wrqe *wr; 457 struct cpl_rx_data_ack *req; 458 uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 459 460 KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); 461 462 wr = alloc_wrqe(sizeof(*req), toep->ctrlq); 463 if (wr == NULL) 464 return (0); 465 req = wrtod(wr); 466 467 INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); 468 req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); 469 470 t4_wrq_tx(sc, wr); 471 return (credits); 472} 473 474void 475send_rx_modulate(struct adapter *sc, struct toepcb *toep) 476{ 477 struct wrqe *wr; 478 struct cpl_rx_data_ack *req; 479 480 wr = alloc_wrqe(sizeof(*req), toep->ctrlq); 481 if (wr == NULL) 482 return; 483 req = wrtod(wr); 484 485 INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); 486 req->credit_dack = htobe32(F_RX_MODULATE_RX); 487 488 t4_wrq_tx(sc, wr); 489} 490 491void 492t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp) 493{ 494 struct adapter *sc = tod->tod_softc; 495 struct inpcb *inp = tp->t_inpcb; 496 struct socket *so = inp->inp_socket; 497 struct sockbuf *sb = &so->so_rcv; 498 struct toepcb *toep = tp->t_toe; 499 int credits; 500 501 INP_WLOCK_ASSERT(inp); 502 503 SOCKBUF_LOCK_ASSERT(sb); 504 KASSERT(toep->sb_cc >= sbused(sb), 505 ("%s: sb %p has more data (%d) than last time (%d).", 506 __func__, sb, sbused(sb), toep->sb_cc)); 507 508 credits = toep->sb_cc - sbused(sb); 509 toep->sb_cc = sbused(sb); 510 if (toep->ulp_mode == ULP_MODE_TLS) { 511 if (toep->tls.rcv_over >= credits) { 512 toep->tls.rcv_over -= credits; 513 credits = 0; 514 } else { 515 credits -= toep->tls.rcv_over; 516 toep->tls.rcv_over = 0; 517 } 518 } 519 toep->rx_credits += credits; 520 521 if (toep->rx_credits > 0 && 522 (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 || 523 (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) || 524 toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) { 525 526 credits = send_rx_credits(sc, toep, toep->rx_credits); 527 toep->rx_credits -= credits; 528 tp->rcv_wnd += credits; 529 tp->rcv_adv += credits; 530 } else if (toep->flags & TPF_FORCE_CREDITS) 531 send_rx_modulate(sc, toep); 532} 533 534void 535t4_rcvd(struct toedev *tod, struct tcpcb *tp) 536{ 537 struct inpcb *inp = tp->t_inpcb; 538 struct socket *so = inp->inp_socket; 539 struct sockbuf *sb = &so->so_rcv; 540 541 SOCKBUF_LOCK(sb); 542 t4_rcvd_locked(tod, tp); 543 SOCKBUF_UNLOCK(sb); 544} 545 546/* 547 * Close a connection by sending a CPL_CLOSE_CON_REQ message. 548 */ 549int 550t4_close_conn(struct adapter *sc, struct toepcb *toep) 551{ 552 struct wrqe *wr; 553 struct cpl_close_con_req *req; 554 unsigned int tid = toep->tid; 555 556 CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, 557 toep->flags & TPF_FIN_SENT ? ", IGNORED" : ""); 558 559 if (toep->flags & TPF_FIN_SENT) 560 return (0); 561 562 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 563 ("%s: flowc_wr not sent for tid %u.", __func__, tid)); 564 565 wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 566 if (wr == NULL) { 567 /* XXX */ 568 panic("%s: allocation failure.", __func__); 569 } 570 req = wrtod(wr); 571 572 req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | 573 V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); 574 req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | 575 V_FW_WR_FLOWID(tid)); 576 req->wr.wr_lo = cpu_to_be64(0); 577 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 578 req->rsvd = 0; 579 580 toep->flags |= TPF_FIN_SENT; 581 toep->flags &= ~TPF_SEND_FIN; 582 t4_l2t_send(sc, wr, toep->l2te); 583 584 return (0); 585} 586 587#define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) 588#define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) 589 590/* Maximum amount of immediate data we could stuff in a WR */ 591static inline int 592max_imm_payload(int tx_credits) 593{ 594 const int n = 2; /* Use only up to 2 desc for imm. data WR */ 595 596 KASSERT(tx_credits >= 0 && 597 tx_credits <= MAX_OFLD_TX_CREDITS, 598 ("%s: %d credits", __func__, tx_credits)); 599 600 if (tx_credits < MIN_OFLD_TX_CREDITS) 601 return (0); 602 603 if (tx_credits >= (n * EQ_ESIZE) / 16) 604 return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); 605 else 606 return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); 607} 608 609/* Maximum number of SGL entries we could stuff in a WR */ 610static inline int 611max_dsgl_nsegs(int tx_credits) 612{ 613 int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ 614 int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; 615 616 KASSERT(tx_credits >= 0 && 617 tx_credits <= MAX_OFLD_TX_CREDITS, 618 ("%s: %d credits", __func__, tx_credits)); 619 620 if (tx_credits < MIN_OFLD_TX_CREDITS) 621 return (0); 622 623 nseg += 2 * (sge_pair_credits * 16 / 24); 624 if ((sge_pair_credits * 16) % 24 == 16) 625 nseg++; 626 627 return (nseg); 628} 629 630static inline void 631write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, 632 unsigned int plen, uint8_t credits, int shove, int ulp_submode, int txalign) 633{ 634 struct fw_ofld_tx_data_wr *txwr = dst; 635 636 txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | 637 V_FW_WR_IMMDLEN(immdlen)); 638 txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | 639 V_FW_WR_LEN16(credits)); 640 txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(toep->ulp_mode) | 641 V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove)); 642 txwr->plen = htobe32(plen); 643 644 if (txalign > 0) { 645 struct tcpcb *tp = intotcpcb(toep->inp); 646 647 if (plen < 2 * tp->t_maxseg) 648 txwr->lsodisable_to_flags |= 649 htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); 650 else 651 txwr->lsodisable_to_flags |= 652 htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD | 653 (tp->t_flags & TF_NODELAY ? 0 : 654 F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE)); 655 } 656} 657 658/* 659 * Generate a DSGL from a starting mbuf. The total number of segments and the 660 * maximum segments in any one mbuf are provided. 661 */ 662static void 663write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) 664{ 665 struct mbuf *m; 666 struct ulptx_sgl *usgl = dst; 667 int i, j, rc; 668 struct sglist sg; 669 struct sglist_seg segs[n]; 670 671 KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); 672 673 sglist_init(&sg, n, segs); 674 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 675 V_ULPTX_NSGE(nsegs)); 676 677 i = -1; 678 for (m = start; m != stop; m = m->m_next) { 679 if (IS_AIOTX_MBUF(m)) 680 rc = sglist_append_vmpages(&sg, aiotx_mbuf_pages(m), 681 aiotx_mbuf_pgoff(m), m->m_len); 682 else 683 rc = sglist_append(&sg, mtod(m, void *), m->m_len); 684 if (__predict_false(rc != 0)) 685 panic("%s: sglist_append %d", __func__, rc); 686 687 for (j = 0; j < sg.sg_nseg; i++, j++) { 688 if (i < 0) { 689 usgl->len0 = htobe32(segs[j].ss_len); 690 usgl->addr0 = htobe64(segs[j].ss_paddr); 691 } else { 692 usgl->sge[i / 2].len[i & 1] = 693 htobe32(segs[j].ss_len); 694 usgl->sge[i / 2].addr[i & 1] = 695 htobe64(segs[j].ss_paddr); 696 } 697#ifdef INVARIANTS 698 nsegs--; 699#endif 700 } 701 sglist_reset(&sg); 702 } 703 if (i & 1) 704 usgl->sge[i / 2].len[1] = htobe32(0); 705 KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", 706 __func__, nsegs, start, stop)); 707} 708 709/* 710 * Max number of SGL entries an offload tx work request can have. This is 41 711 * (1 + 40) for a full 512B work request. 712 * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) 713 */ 714#define OFLD_SGL_LEN (41) 715 716/* 717 * Send data and/or a FIN to the peer. 718 * 719 * The socket's so_snd buffer consists of a stream of data starting with sb_mb 720 * and linked together with m_next. sb_sndptr, if set, is the last mbuf that 721 * was transmitted. 722 * 723 * drop indicates the number of bytes that should be dropped from the head of 724 * the send buffer. It is an optimization that lets do_fw4_ack avoid creating 725 * contention on the send buffer lock (before this change it used to do 726 * sowwakeup and then t4_push_frames right after that when recovering from tx 727 * stalls). When drop is set this function MUST drop the bytes and wake up any 728 * writers. 729 */ 730void 731t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) 732{ 733 struct mbuf *sndptr, *m, *sb_sndptr; 734 struct fw_ofld_tx_data_wr *txwr; 735 struct wrqe *wr; 736 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 737 struct inpcb *inp = toep->inp; 738 struct tcpcb *tp = intotcpcb(inp); 739 struct socket *so = inp->inp_socket; 740 struct sockbuf *sb = &so->so_snd; 741 int tx_credits, shove, compl, sowwakeup; 742 struct ofld_tx_sdesc *txsd; 743 bool aiotx_mbuf_seen; 744 745 INP_WLOCK_ASSERT(inp); 746 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 747 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 748 749 KASSERT(toep->ulp_mode == ULP_MODE_NONE || 750 toep->ulp_mode == ULP_MODE_TCPDDP || 751 toep->ulp_mode == ULP_MODE_TLS || 752 toep->ulp_mode == ULP_MODE_RDMA, 753 ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 754 755#ifdef VERBOSE_TRACES 756 CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d", 757 __func__, toep->tid, toep->flags, tp->t_flags); 758#endif 759 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) 760 return; 761 762#ifdef RATELIMIT 763 if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) && 764 (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) { 765 inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; 766 } 767#endif 768 769 /* 770 * This function doesn't resume by itself. Someone else must clear the 771 * flag and call this function. 772 */ 773 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 774 KASSERT(drop == 0, 775 ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 776 return; 777 } 778 779 txsd = &toep->txsd[toep->txsd_pidx]; 780 do { 781 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 782 max_imm = max_imm_payload(tx_credits); 783 max_nsegs = max_dsgl_nsegs(tx_credits); 784 785 SOCKBUF_LOCK(sb); 786 sowwakeup = drop; 787 if (drop) { 788 sbdrop_locked(sb, drop); 789 drop = 0; 790 } 791 sb_sndptr = sb->sb_sndptr; 792 sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; 793 plen = 0; 794 nsegs = 0; 795 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 796 aiotx_mbuf_seen = false; 797 for (m = sndptr; m != NULL; m = m->m_next) { 798 int n; 799 800 if (IS_AIOTX_MBUF(m)) 801 n = sglist_count_vmpages(aiotx_mbuf_pages(m), 802 aiotx_mbuf_pgoff(m), m->m_len); 803 else 804 n = sglist_count(mtod(m, void *), m->m_len); 805 806 nsegs += n; 807 plen += m->m_len; 808 809 /* This mbuf sent us _over_ the nsegs limit, back out */ 810 if (plen > max_imm && nsegs > max_nsegs) { 811 nsegs -= n; 812 plen -= m->m_len; 813 if (plen == 0) { 814 /* Too few credits */ 815 toep->flags |= TPF_TX_SUSPENDED; 816 if (sowwakeup) { 817 if (!TAILQ_EMPTY( 818 &toep->aiotx_jobq)) 819 t4_aiotx_queue_toep( 820 toep); 821 sowwakeup_locked(so); 822 } else 823 SOCKBUF_UNLOCK(sb); 824 SOCKBUF_UNLOCK_ASSERT(sb); 825 return; 826 } 827 break; 828 } 829 830 if (IS_AIOTX_MBUF(m)) 831 aiotx_mbuf_seen = true; 832 if (max_nsegs_1mbuf < n) 833 max_nsegs_1mbuf = n; 834 sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ 835 836 /* This mbuf put us right at the max_nsegs limit */ 837 if (plen > max_imm && nsegs == max_nsegs) { 838 m = m->m_next; 839 break; 840 } 841 } 842 843 if (sbused(sb) > sb->sb_hiwat * 5 / 8 && 844 toep->plen_nocompl + plen >= sb->sb_hiwat / 4) 845 compl = 1; 846 else 847 compl = 0; 848 849 if (sb->sb_flags & SB_AUTOSIZE && 850 V_tcp_do_autosndbuf && 851 sb->sb_hiwat < V_tcp_autosndbuf_max && 852 sbused(sb) >= sb->sb_hiwat * 7 / 8) { 853 int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, 854 V_tcp_autosndbuf_max); 855 856 if (!sbreserve_locked(sb, newsize, so, NULL)) 857 sb->sb_flags &= ~SB_AUTOSIZE; 858 else 859 sowwakeup = 1; /* room available */ 860 } 861 if (sowwakeup) { 862 if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 863 t4_aiotx_queue_toep(toep); 864 sowwakeup_locked(so); 865 } else 866 SOCKBUF_UNLOCK(sb); 867 SOCKBUF_UNLOCK_ASSERT(sb); 868 869 /* nothing to send */ 870 if (plen == 0) { 871 KASSERT(m == NULL, 872 ("%s: nothing to send, but m != NULL", __func__)); 873 break; 874 } 875 876 if (__predict_false(toep->flags & TPF_FIN_SENT)) 877 panic("%s: excess tx.", __func__); 878 879 shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); 880 if (plen <= max_imm && !aiotx_mbuf_seen) { 881 882 /* Immediate data tx */ 883 884 wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 885 toep->ofld_txq); 886 if (wr == NULL) { 887 /* XXX: how will we recover from this? */ 888 toep->flags |= TPF_TX_SUSPENDED; 889 return; 890 } 891 txwr = wrtod(wr); 892 credits = howmany(wr->wr_len, 16); 893 write_tx_wr(txwr, toep, plen, plen, credits, shove, 0, 894 sc->tt.tx_align); 895 m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 896 nsegs = 0; 897 } else { 898 int wr_len; 899 900 /* DSGL tx */ 901 902 wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 903 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 904 wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 905 if (wr == NULL) { 906 /* XXX: how will we recover from this? */ 907 toep->flags |= TPF_TX_SUSPENDED; 908 return; 909 } 910 txwr = wrtod(wr); 911 credits = howmany(wr_len, 16); 912 write_tx_wr(txwr, toep, 0, plen, credits, shove, 0, 913 sc->tt.tx_align); 914 write_tx_sgl(txwr + 1, sndptr, m, nsegs, 915 max_nsegs_1mbuf); 916 if (wr_len & 0xf) { 917 uint64_t *pad = (uint64_t *) 918 ((uintptr_t)txwr + wr_len); 919 *pad = 0; 920 } 921 } 922 923 KASSERT(toep->tx_credits >= credits, 924 ("%s: not enough credits", __func__)); 925 926 toep->tx_credits -= credits; 927 toep->tx_nocompl += credits; 928 toep->plen_nocompl += plen; 929 if (toep->tx_credits <= toep->tx_total * 3 / 8 && 930 toep->tx_nocompl >= toep->tx_total / 4) 931 compl = 1; 932 933 if (compl || toep->ulp_mode == ULP_MODE_RDMA) { 934 txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 935 toep->tx_nocompl = 0; 936 toep->plen_nocompl = 0; 937 } 938 939 tp->snd_nxt += plen; 940 tp->snd_max += plen; 941 942 SOCKBUF_LOCK(sb); 943 KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); 944 sb->sb_sndptr = sb_sndptr; 945 SOCKBUF_UNLOCK(sb); 946 947 toep->flags |= TPF_TX_DATA_SENT; 948 if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 949 toep->flags |= TPF_TX_SUSPENDED; 950 951 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 952 txsd->plen = plen; 953 txsd->tx_credits = credits; 954 txsd++; 955 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 956 toep->txsd_pidx = 0; 957 txsd = &toep->txsd[0]; 958 } 959 toep->txsd_avail--; 960 961 t4_l2t_send(sc, wr, toep->l2te); 962 } while (m != NULL); 963 964 /* Send a FIN if requested, but only if there's no more data to send */ 965 if (m == NULL && toep->flags & TPF_SEND_FIN) 966 t4_close_conn(sc, toep); 967} 968 969static inline void 970rqdrop_locked(struct mbufq *q, int plen) 971{ 972 struct mbuf *m; 973 974 while (plen > 0) { 975 m = mbufq_dequeue(q); 976 977 /* Too many credits. */ 978 MPASS(m != NULL); 979 M_ASSERTPKTHDR(m); 980 981 /* Partial credits. */ 982 MPASS(plen >= m->m_pkthdr.len); 983 984 plen -= m->m_pkthdr.len; 985 m_freem(m); 986 } 987} 988 989void 990t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) 991{ 992 struct mbuf *sndptr, *m; 993 struct fw_ofld_tx_data_wr *txwr; 994 struct wrqe *wr; 995 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 996 u_int adjusted_plen, ulp_submode; 997 struct inpcb *inp = toep->inp; 998 struct tcpcb *tp = intotcpcb(inp); 999 int tx_credits, shove; 1000 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 1001 struct mbufq *pduq = &toep->ulp_pduq; 1002 static const u_int ulp_extra_len[] = {0, 4, 4, 8}; 1003 1004 INP_WLOCK_ASSERT(inp); 1005 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 1006 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 1007 KASSERT(toep->ulp_mode == ULP_MODE_ISCSI, 1008 ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 1009 1010 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) 1011 return; 1012 1013 /* 1014 * This function doesn't resume by itself. Someone else must clear the 1015 * flag and call this function. 1016 */ 1017 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 1018 KASSERT(drop == 0, 1019 ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 1020 return; 1021 } 1022 1023 if (drop) 1024 rqdrop_locked(&toep->ulp_pdu_reclaimq, drop); 1025 1026 while ((sndptr = mbufq_first(pduq)) != NULL) { 1027 M_ASSERTPKTHDR(sndptr); 1028 1029 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 1030 max_imm = max_imm_payload(tx_credits); 1031 max_nsegs = max_dsgl_nsegs(tx_credits); 1032 1033 plen = 0; 1034 nsegs = 0; 1035 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 1036 for (m = sndptr; m != NULL; m = m->m_next) { 1037 int n = sglist_count(mtod(m, void *), m->m_len); 1038 1039 nsegs += n; 1040 plen += m->m_len; 1041 1042 /* 1043 * This mbuf would send us _over_ the nsegs limit. 1044 * Suspend tx because the PDU can't be sent out. 1045 */ 1046 if (plen > max_imm && nsegs > max_nsegs) { 1047 toep->flags |= TPF_TX_SUSPENDED; 1048 return; 1049 } 1050 1051 if (max_nsegs_1mbuf < n) 1052 max_nsegs_1mbuf = n; 1053 } 1054 1055 if (__predict_false(toep->flags & TPF_FIN_SENT)) 1056 panic("%s: excess tx.", __func__); 1057 1058 /* 1059 * We have a PDU to send. All of it goes out in one WR so 'm' 1060 * is NULL. A PDU's length is always a multiple of 4. 1061 */ 1062 MPASS(m == NULL); 1063 MPASS((plen & 3) == 0); 1064 MPASS(sndptr->m_pkthdr.len == plen); 1065 1066 shove = !(tp->t_flags & TF_MORETOCOME); 1067 ulp_submode = mbuf_ulp_submode(sndptr); 1068 MPASS(ulp_submode < nitems(ulp_extra_len)); 1069 1070 /* 1071 * plen doesn't include header and data digests, which are 1072 * generated and inserted in the right places by the TOE, but 1073 * they do occupy TCP sequence space and need to be accounted 1074 * for. 1075 */ 1076 adjusted_plen = plen + ulp_extra_len[ulp_submode]; 1077 if (plen <= max_imm) { 1078 1079 /* Immediate data tx */ 1080 1081 wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 1082 toep->ofld_txq); 1083 if (wr == NULL) { 1084 /* XXX: how will we recover from this? */ 1085 toep->flags |= TPF_TX_SUSPENDED; 1086 return; 1087 } 1088 txwr = wrtod(wr); 1089 credits = howmany(wr->wr_len, 16); 1090 write_tx_wr(txwr, toep, plen, adjusted_plen, credits, 1091 shove, ulp_submode, sc->tt.tx_align); 1092 m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 1093 nsegs = 0; 1094 } else { 1095 int wr_len; 1096 1097 /* DSGL tx */ 1098 wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 1099 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 1100 wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 1101 if (wr == NULL) { 1102 /* XXX: how will we recover from this? */ 1103 toep->flags |= TPF_TX_SUSPENDED; 1104 return; 1105 } 1106 txwr = wrtod(wr); 1107 credits = howmany(wr_len, 16); 1108 write_tx_wr(txwr, toep, 0, adjusted_plen, credits, 1109 shove, ulp_submode, sc->tt.tx_align); 1110 write_tx_sgl(txwr + 1, sndptr, m, nsegs, 1111 max_nsegs_1mbuf); 1112 if (wr_len & 0xf) { 1113 uint64_t *pad = (uint64_t *) 1114 ((uintptr_t)txwr + wr_len); 1115 *pad = 0; 1116 } 1117 } 1118 1119 KASSERT(toep->tx_credits >= credits, 1120 ("%s: not enough credits", __func__)); 1121 1122 m = mbufq_dequeue(pduq); 1123 MPASS(m == sndptr); 1124 mbufq_enqueue(&toep->ulp_pdu_reclaimq, m); 1125 1126 toep->tx_credits -= credits; 1127 toep->tx_nocompl += credits; 1128 toep->plen_nocompl += plen; 1129 if (toep->tx_credits <= toep->tx_total * 3 / 8 && 1130 toep->tx_nocompl >= toep->tx_total / 4) { 1131 txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 1132 toep->tx_nocompl = 0; 1133 toep->plen_nocompl = 0; 1134 } 1135 1136 tp->snd_nxt += adjusted_plen; 1137 tp->snd_max += adjusted_plen; 1138 1139 toep->flags |= TPF_TX_DATA_SENT; 1140 if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 1141 toep->flags |= TPF_TX_SUSPENDED; 1142 1143 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 1144 txsd->plen = plen; 1145 txsd->tx_credits = credits; 1146 txsd++; 1147 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 1148 toep->txsd_pidx = 0; 1149 txsd = &toep->txsd[0]; 1150 } 1151 toep->txsd_avail--; 1152 1153 t4_l2t_send(sc, wr, toep->l2te); 1154 } 1155 1156 /* Send a FIN if requested, but only if there are no more PDUs to send */ 1157 if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN) 1158 t4_close_conn(sc, toep); 1159} 1160 1161int 1162t4_tod_output(struct toedev *tod, struct tcpcb *tp) 1163{ 1164 struct adapter *sc = tod->tod_softc; 1165#ifdef INVARIANTS 1166 struct inpcb *inp = tp->t_inpcb; 1167#endif 1168 struct toepcb *toep = tp->t_toe; 1169 1170 INP_WLOCK_ASSERT(inp); 1171 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1172 ("%s: inp %p dropped.", __func__, inp)); 1173 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1174 1175 if (toep->ulp_mode == ULP_MODE_ISCSI) 1176 t4_push_pdus(sc, toep, 0); 1177 else if (tls_tx_key(toep)) 1178 t4_push_tls_records(sc, toep, 0); 1179 else 1180 t4_push_frames(sc, toep, 0); 1181 1182 return (0); 1183} 1184 1185int 1186t4_send_fin(struct toedev *tod, struct tcpcb *tp) 1187{ 1188 struct adapter *sc = tod->tod_softc; 1189#ifdef INVARIANTS 1190 struct inpcb *inp = tp->t_inpcb; 1191#endif 1192 struct toepcb *toep = tp->t_toe; 1193 1194 INP_WLOCK_ASSERT(inp); 1195 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1196 ("%s: inp %p dropped.", __func__, inp)); 1197 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1198 1199 toep->flags |= TPF_SEND_FIN; 1200 if (tp->t_state >= TCPS_ESTABLISHED) { 1201 if (toep->ulp_mode == ULP_MODE_ISCSI) 1202 t4_push_pdus(sc, toep, 0); 1203 else if (tls_tx_key(toep)) 1204 t4_push_tls_records(sc, toep, 0); 1205 else 1206 t4_push_frames(sc, toep, 0); 1207 } 1208 1209 return (0); 1210} 1211 1212int 1213t4_send_rst(struct toedev *tod, struct tcpcb *tp) 1214{ 1215 struct adapter *sc = tod->tod_softc; 1216#if defined(INVARIANTS) 1217 struct inpcb *inp = tp->t_inpcb; 1218#endif 1219 struct toepcb *toep = tp->t_toe; 1220 1221 INP_WLOCK_ASSERT(inp); 1222 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1223 ("%s: inp %p dropped.", __func__, inp)); 1224 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1225 1226 /* hmmmm */ 1227 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 1228 ("%s: flowc for tid %u [%s] not sent already", 1229 __func__, toep->tid, tcpstates[tp->t_state])); 1230 1231 send_reset(sc, toep, 0); 1232 return (0); 1233} 1234 1235/* 1236 * Peer has sent us a FIN. 1237 */ 1238static int 1239do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1240{ 1241 struct adapter *sc = iq->adapter; 1242 const struct cpl_peer_close *cpl = (const void *)(rss + 1); 1243 unsigned int tid = GET_TID(cpl); 1244 struct toepcb *toep = lookup_tid(sc, tid); 1245 struct inpcb *inp = toep->inp; 1246 struct tcpcb *tp = NULL; 1247 struct socket *so; 1248#ifdef INVARIANTS 1249 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1250#endif 1251 1252 KASSERT(opcode == CPL_PEER_CLOSE, 1253 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1254 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1255 1256 if (__predict_false(toep->flags & TPF_SYNQE)) { 1257#ifdef INVARIANTS 1258 struct synq_entry *synqe = (void *)toep; 1259 1260 INP_WLOCK(synqe->lctx->inp); 1261 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1262 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1263 ("%s: listen socket closed but tid %u not aborted.", 1264 __func__, tid)); 1265 } else { 1266 /* 1267 * do_pass_accept_req is still running and will 1268 * eventually take care of this tid. 1269 */ 1270 } 1271 INP_WUNLOCK(synqe->lctx->inp); 1272#endif 1273 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1274 toep, toep->flags); 1275 return (0); 1276 } 1277 1278 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1279 1280 CURVNET_SET(toep->vnet); 1281 INP_INFO_RLOCK(&V_tcbinfo); 1282 INP_WLOCK(inp); 1283 tp = intotcpcb(inp); 1284 1285 CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, 1286 tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); 1287 1288 if (toep->flags & TPF_ABORT_SHUTDOWN) 1289 goto done; 1290 1291 tp->rcv_nxt++; /* FIN */ 1292 1293 so = inp->inp_socket; 1294 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1295 DDP_LOCK(toep); 1296 if (__predict_false(toep->ddp.flags & 1297 (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) 1298 handle_ddp_close(toep, tp, cpl->rcv_nxt); 1299 DDP_UNLOCK(toep); 1300 } 1301 socantrcvmore(so); 1302 1303 if (toep->ulp_mode != ULP_MODE_RDMA) { 1304 KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), 1305 ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, 1306 be32toh(cpl->rcv_nxt))); 1307 } 1308 1309 switch (tp->t_state) { 1310 case TCPS_SYN_RECEIVED: 1311 tp->t_starttime = ticks; 1312 /* FALLTHROUGH */ 1313 1314 case TCPS_ESTABLISHED: 1315 tcp_state_change(tp, TCPS_CLOSE_WAIT); 1316 break; 1317 1318 case TCPS_FIN_WAIT_1: 1319 tcp_state_change(tp, TCPS_CLOSING); 1320 break; 1321 1322 case TCPS_FIN_WAIT_2: 1323 tcp_twstart(tp); 1324 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1325 INP_INFO_RUNLOCK(&V_tcbinfo); 1326 CURVNET_RESTORE(); 1327 1328 INP_WLOCK(inp); 1329 final_cpl_received(toep); 1330 return (0); 1331 1332 default: 1333 log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", 1334 __func__, tid, tp->t_state); 1335 } 1336done: 1337 INP_WUNLOCK(inp); 1338 INP_INFO_RUNLOCK(&V_tcbinfo); 1339 CURVNET_RESTORE(); 1340 return (0); 1341} 1342 1343/* 1344 * Peer has ACK'd our FIN. 1345 */ 1346static int 1347do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, 1348 struct mbuf *m) 1349{ 1350 struct adapter *sc = iq->adapter; 1351 const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); 1352 unsigned int tid = GET_TID(cpl); 1353 struct toepcb *toep = lookup_tid(sc, tid); 1354 struct inpcb *inp = toep->inp; 1355 struct tcpcb *tp = NULL; 1356 struct socket *so = NULL; 1357#ifdef INVARIANTS 1358 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1359#endif 1360 1361 KASSERT(opcode == CPL_CLOSE_CON_RPL, 1362 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1363 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1364 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1365 1366 CURVNET_SET(toep->vnet); 1367 INP_INFO_RLOCK(&V_tcbinfo); 1368 INP_WLOCK(inp); 1369 tp = intotcpcb(inp); 1370 1371 CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", 1372 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); 1373 1374 if (toep->flags & TPF_ABORT_SHUTDOWN) 1375 goto done; 1376 1377 so = inp->inp_socket; 1378 tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ 1379 1380 switch (tp->t_state) { 1381 case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ 1382 tcp_twstart(tp); 1383release: 1384 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1385 INP_INFO_RUNLOCK(&V_tcbinfo); 1386 CURVNET_RESTORE(); 1387 1388 INP_WLOCK(inp); 1389 final_cpl_received(toep); /* no more CPLs expected */ 1390 1391 return (0); 1392 case TCPS_LAST_ACK: 1393 if (tcp_close(tp)) 1394 INP_WUNLOCK(inp); 1395 goto release; 1396 1397 case TCPS_FIN_WAIT_1: 1398 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) 1399 soisdisconnected(so); 1400 tcp_state_change(tp, TCPS_FIN_WAIT_2); 1401 break; 1402 1403 default: 1404 log(LOG_ERR, 1405 "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", 1406 __func__, tid, tcpstates[tp->t_state]); 1407 } 1408done: 1409 INP_WUNLOCK(inp); 1410 INP_INFO_RUNLOCK(&V_tcbinfo); 1411 CURVNET_RESTORE(); 1412 return (0); 1413} 1414 1415void 1416send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid, 1417 int rst_status) 1418{ 1419 struct wrqe *wr; 1420 struct cpl_abort_rpl *cpl; 1421 1422 wr = alloc_wrqe(sizeof(*cpl), ofld_txq); 1423 if (wr == NULL) { 1424 /* XXX */ 1425 panic("%s: allocation failure.", __func__); 1426 } 1427 cpl = wrtod(wr); 1428 1429 INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); 1430 cpl->cmd = rst_status; 1431 1432 t4_wrq_tx(sc, wr); 1433} 1434 1435static int 1436abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) 1437{ 1438 switch (abort_reason) { 1439 case CPL_ERR_BAD_SYN: 1440 case CPL_ERR_CONN_RESET: 1441 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 1442 case CPL_ERR_XMIT_TIMEDOUT: 1443 case CPL_ERR_PERSIST_TIMEDOUT: 1444 case CPL_ERR_FINWAIT2_TIMEDOUT: 1445 case CPL_ERR_KEEPALIVE_TIMEDOUT: 1446 return (ETIMEDOUT); 1447 default: 1448 return (EIO); 1449 } 1450} 1451 1452/* 1453 * TCP RST from the peer, timeout, or some other such critical error. 1454 */ 1455static int 1456do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1457{ 1458 struct adapter *sc = iq->adapter; 1459 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); 1460 unsigned int tid = GET_TID(cpl); 1461 struct toepcb *toep = lookup_tid(sc, tid); 1462 struct sge_wrq *ofld_txq = toep->ofld_txq; 1463 struct inpcb *inp; 1464 struct tcpcb *tp; 1465#ifdef INVARIANTS 1466 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1467#endif 1468 1469 KASSERT(opcode == CPL_ABORT_REQ_RSS, 1470 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1471 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1472 1473 if (toep->flags & TPF_SYNQE) 1474 return (do_abort_req_synqe(iq, rss, m)); 1475 1476 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1477 1478 if (negative_advice(cpl->status)) { 1479 CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", 1480 __func__, cpl->status, tid, toep->flags); 1481 return (0); /* Ignore negative advice */ 1482 } 1483 1484 inp = toep->inp; 1485 CURVNET_SET(toep->vnet); 1486 INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */ 1487 INP_WLOCK(inp); 1488 1489 tp = intotcpcb(inp); 1490 1491 CTR6(KTR_CXGBE, 1492 "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", 1493 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, 1494 inp->inp_flags, cpl->status); 1495 1496 /* 1497 * If we'd initiated an abort earlier the reply to it is responsible for 1498 * cleaning up resources. Otherwise we tear everything down right here 1499 * right now. We owe the T4 a CPL_ABORT_RPL no matter what. 1500 */ 1501 if (toep->flags & TPF_ABORT_SHUTDOWN) { 1502 INP_WUNLOCK(inp); 1503 goto done; 1504 } 1505 toep->flags |= TPF_ABORT_SHUTDOWN; 1506 1507 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 1508 struct socket *so = inp->inp_socket; 1509 1510 if (so != NULL) 1511 so_error_set(so, abort_status_to_errno(tp, 1512 cpl->status)); 1513 tp = tcp_close(tp); 1514 if (tp == NULL) 1515 INP_WLOCK(inp); /* re-acquire */ 1516 } 1517 1518 final_cpl_received(toep); 1519done: 1520 INP_INFO_RUNLOCK(&V_tcbinfo); 1521 CURVNET_RESTORE(); 1522 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); 1523 return (0); 1524} 1525 1526/* 1527 * Reply to the CPL_ABORT_REQ (send_reset) 1528 */ 1529static int 1530do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1531{ 1532 struct adapter *sc = iq->adapter; 1533 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); 1534 unsigned int tid = GET_TID(cpl); 1535 struct toepcb *toep = lookup_tid(sc, tid); 1536 struct inpcb *inp = toep->inp; 1537#ifdef INVARIANTS 1538 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1539#endif 1540 1541 KASSERT(opcode == CPL_ABORT_RPL_RSS, 1542 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1543 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1544 1545 if (toep->flags & TPF_SYNQE) 1546 return (do_abort_rpl_synqe(iq, rss, m)); 1547 1548 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1549 1550 CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", 1551 __func__, tid, toep, inp, cpl->status); 1552 1553 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1554 ("%s: wasn't expecting abort reply", __func__)); 1555 1556 INP_WLOCK(inp); 1557 final_cpl_received(toep); 1558 1559 return (0); 1560} 1561 1562static int 1563do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1564{ 1565 struct adapter *sc = iq->adapter; 1566 const struct cpl_rx_data *cpl = mtod(m, const void *); 1567 unsigned int tid = GET_TID(cpl); 1568 struct toepcb *toep = lookup_tid(sc, tid); 1569 struct inpcb *inp = toep->inp; 1570 struct tcpcb *tp; 1571 struct socket *so; 1572 struct sockbuf *sb; 1573 int len; 1574 uint32_t ddp_placed = 0; 1575 1576 if (__predict_false(toep->flags & TPF_SYNQE)) { 1577#ifdef INVARIANTS 1578 struct synq_entry *synqe = (void *)toep; 1579 1580 INP_WLOCK(synqe->lctx->inp); 1581 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1582 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1583 ("%s: listen socket closed but tid %u not aborted.", 1584 __func__, tid)); 1585 } else { 1586 /* 1587 * do_pass_accept_req is still running and will 1588 * eventually take care of this tid. 1589 */ 1590 } 1591 INP_WUNLOCK(synqe->lctx->inp); 1592#endif 1593 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1594 toep, toep->flags); 1595 m_freem(m); 1596 return (0); 1597 } 1598 1599 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1600 1601 /* strip off CPL header */ 1602 m_adj(m, sizeof(*cpl)); 1603 len = m->m_pkthdr.len; 1604 1605 INP_WLOCK(inp); 1606 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { 1607 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", 1608 __func__, tid, len, inp->inp_flags); 1609 INP_WUNLOCK(inp); 1610 m_freem(m); 1611 return (0); 1612 } 1613 1614 tp = intotcpcb(inp); 1615 1616 if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) 1617 ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; 1618 1619 tp->rcv_nxt += len; 1620 if (tp->rcv_wnd < len) { 1621 KASSERT(toep->ulp_mode == ULP_MODE_RDMA, 1622 ("%s: negative window size", __func__)); 1623 } 1624 1625 tp->rcv_wnd -= len; 1626 tp->t_rcvtime = ticks; 1627 1628 if (toep->ulp_mode == ULP_MODE_TCPDDP) 1629 DDP_LOCK(toep); 1630 so = inp_inpcbtosocket(inp); 1631 sb = &so->so_rcv; 1632 SOCKBUF_LOCK(sb); 1633 1634 if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { 1635 CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", 1636 __func__, tid, len); 1637 m_freem(m); 1638 SOCKBUF_UNLOCK(sb); 1639 if (toep->ulp_mode == ULP_MODE_TCPDDP) 1640 DDP_UNLOCK(toep); 1641 INP_WUNLOCK(inp); 1642 1643 CURVNET_SET(toep->vnet); 1644 INP_INFO_RLOCK(&V_tcbinfo); 1645 INP_WLOCK(inp); 1646 tp = tcp_drop(tp, ECONNRESET); 1647 if (tp) 1648 INP_WUNLOCK(inp); 1649 INP_INFO_RUNLOCK(&V_tcbinfo); 1650 CURVNET_RESTORE(); 1651 1652 return (0); 1653 } 1654 1655 /* receive buffer autosize */ 1656 MPASS(toep->vnet == so->so_vnet); 1657 CURVNET_SET(toep->vnet); 1658 if (sb->sb_flags & SB_AUTOSIZE && 1659 V_tcp_do_autorcvbuf && 1660 sb->sb_hiwat < V_tcp_autorcvbuf_max && 1661 len > (sbspace(sb) / 8 * 7)) { 1662 unsigned int hiwat = sb->sb_hiwat; 1663 unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, 1664 V_tcp_autorcvbuf_max); 1665 1666 if (!sbreserve_locked(sb, newsize, so, NULL)) 1667 sb->sb_flags &= ~SB_AUTOSIZE; 1668 else 1669 toep->rx_credits += newsize - hiwat; 1670 } 1671 1672 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1673 int changed = !(toep->ddp.flags & DDP_ON) ^ cpl->ddp_off; 1674 1675 if (toep->ddp.waiting_count != 0 || toep->ddp.active_count != 0) 1676 CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", 1677 __func__, tid, len); 1678 1679 if (changed) { 1680 if (toep->ddp.flags & DDP_SC_REQ) 1681 toep->ddp.flags ^= DDP_ON | DDP_SC_REQ; 1682 else { 1683 KASSERT(cpl->ddp_off == 1, 1684 ("%s: DDP switched on by itself.", 1685 __func__)); 1686 1687 /* Fell out of DDP mode */ 1688 toep->ddp.flags &= ~DDP_ON; 1689 CTR1(KTR_CXGBE, "%s: fell out of DDP mode", 1690 __func__); 1691 1692 insert_ddp_data(toep, ddp_placed); 1693 } 1694 } 1695 1696 if (toep->ddp.flags & DDP_ON) { 1697 /* 1698 * CPL_RX_DATA with DDP on can only be an indicate. 1699 * Start posting queued AIO requests via DDP. The 1700 * payload that arrived in this indicate is appended 1701 * to the socket buffer as usual. 1702 */ 1703 handle_ddp_indicate(toep); 1704 } 1705 } 1706 1707 KASSERT(toep->sb_cc >= sbused(sb), 1708 ("%s: sb %p has more data (%d) than last time (%d).", 1709 __func__, sb, sbused(sb), toep->sb_cc)); 1710 toep->rx_credits += toep->sb_cc - sbused(sb); 1711 sbappendstream_locked(sb, m, 0); 1712 toep->sb_cc = sbused(sb); 1713 if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) { 1714 int credits; 1715 1716 credits = send_rx_credits(sc, toep, toep->rx_credits); 1717 toep->rx_credits -= credits; 1718 tp->rcv_wnd += credits; 1719 tp->rcv_adv += credits; 1720 } 1721 1722 if (toep->ulp_mode == ULP_MODE_TCPDDP && toep->ddp.waiting_count > 0 && 1723 sbavail(sb) != 0) { 1724 CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__, 1725 tid); 1726 ddp_queue_toep(toep); 1727 } 1728 sorwakeup_locked(so); 1729 SOCKBUF_UNLOCK_ASSERT(sb); 1730 if (toep->ulp_mode == ULP_MODE_TCPDDP) 1731 DDP_UNLOCK(toep); 1732 1733 INP_WUNLOCK(inp); 1734 CURVNET_RESTORE(); 1735 return (0); 1736} 1737 1738#define S_CPL_FW4_ACK_OPCODE 24 1739#define M_CPL_FW4_ACK_OPCODE 0xff 1740#define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE) 1741#define G_CPL_FW4_ACK_OPCODE(x) \ 1742 (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE) 1743 1744#define S_CPL_FW4_ACK_FLOWID 0 1745#define M_CPL_FW4_ACK_FLOWID 0xffffff 1746#define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID) 1747#define G_CPL_FW4_ACK_FLOWID(x) \ 1748 (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID) 1749 1750#define S_CPL_FW4_ACK_CR 24 1751#define M_CPL_FW4_ACK_CR 0xff 1752#define V_CPL_FW4_ACK_CR(x) ((x) << S_CPL_FW4_ACK_CR) 1753#define G_CPL_FW4_ACK_CR(x) (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR) 1754 1755#define S_CPL_FW4_ACK_SEQVAL 0 1756#define M_CPL_FW4_ACK_SEQVAL 0x1 1757#define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL) 1758#define G_CPL_FW4_ACK_SEQVAL(x) \ 1759 (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL) 1760#define F_CPL_FW4_ACK_SEQVAL V_CPL_FW4_ACK_SEQVAL(1U) 1761 1762static int 1763do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1764{ 1765 struct adapter *sc = iq->adapter; 1766 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 1767 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 1768 struct toepcb *toep = lookup_tid(sc, tid); 1769 struct inpcb *inp; 1770 struct tcpcb *tp; 1771 struct socket *so; 1772 uint8_t credits = cpl->credits; 1773 struct ofld_tx_sdesc *txsd; 1774 int plen; 1775#ifdef INVARIANTS 1776 unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); 1777#endif 1778 1779 /* 1780 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and 1781 * now this comes back carrying the credits for the flowc. 1782 */ 1783 if (__predict_false(toep->flags & TPF_SYNQE)) { 1784 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1785 ("%s: credits for a synq entry %p", __func__, toep)); 1786 return (0); 1787 } 1788 1789 inp = toep->inp; 1790 1791 KASSERT(opcode == CPL_FW4_ACK, 1792 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1793 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1794 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1795 1796 INP_WLOCK(inp); 1797 1798 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) { 1799 INP_WUNLOCK(inp); 1800 return (0); 1801 } 1802 1803 KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, 1804 ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); 1805 1806 tp = intotcpcb(inp); 1807 1808 if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { 1809 tcp_seq snd_una = be32toh(cpl->snd_una); 1810 1811#ifdef INVARIANTS 1812 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 1813 log(LOG_ERR, 1814 "%s: unexpected seq# %x for TID %u, snd_una %x\n", 1815 __func__, snd_una, toep->tid, tp->snd_una); 1816 } 1817#endif 1818 1819 if (tp->snd_una != snd_una) { 1820 tp->snd_una = snd_una; 1821 tp->ts_recent_age = tcp_ts_getticks(); 1822 } 1823 } 1824 1825#ifdef VERBOSE_TRACES 1826 CTR3(KTR_CXGBE, "%s: tid %d credits %u", __func__, tid, credits); 1827#endif 1828 so = inp->inp_socket; 1829 txsd = &toep->txsd[toep->txsd_cidx]; 1830 plen = 0; 1831 while (credits) { 1832 KASSERT(credits >= txsd->tx_credits, 1833 ("%s: too many (or partial) credits", __func__)); 1834 credits -= txsd->tx_credits; 1835 toep->tx_credits += txsd->tx_credits; 1836 plen += txsd->plen; 1837 if (txsd->iv_buffer) { 1838 free(txsd->iv_buffer, M_CXGBE); 1839 txsd->iv_buffer = NULL; 1840 } 1841 txsd++; 1842 toep->txsd_avail++; 1843 KASSERT(toep->txsd_avail <= toep->txsd_total, 1844 ("%s: txsd avail > total", __func__)); 1845 if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { 1846 txsd = &toep->txsd[0]; 1847 toep->txsd_cidx = 0; 1848 } 1849 } 1850 1851 if (toep->tx_credits == toep->tx_total) { 1852 toep->tx_nocompl = 0; 1853 toep->plen_nocompl = 0; 1854 } 1855 1856 if (toep->flags & TPF_TX_SUSPENDED && 1857 toep->tx_credits >= toep->tx_total / 4) { 1858#ifdef VERBOSE_TRACES 1859 CTR2(KTR_CXGBE, "%s: tid %d calling t4_push_frames", __func__, 1860 tid); 1861#endif 1862 toep->flags &= ~TPF_TX_SUSPENDED; 1863 CURVNET_SET(toep->vnet); 1864 if (toep->ulp_mode == ULP_MODE_ISCSI) 1865 t4_push_pdus(sc, toep, plen); 1866 else if (tls_tx_key(toep)) 1867 t4_push_tls_records(sc, toep, plen); 1868 else 1869 t4_push_frames(sc, toep, plen); 1870 CURVNET_RESTORE(); 1871 } else if (plen > 0) { 1872 struct sockbuf *sb = &so->so_snd; 1873 int sbu; 1874 1875 SOCKBUF_LOCK(sb); 1876 sbu = sbused(sb); 1877 if (toep->ulp_mode == ULP_MODE_ISCSI) { 1878 1879 if (__predict_false(sbu > 0)) { 1880 /* 1881 * The data trasmitted before the tid's ULP mode 1882 * changed to ISCSI is still in so_snd. 1883 * Incoming credits should account for so_snd 1884 * first. 1885 */ 1886 sbdrop_locked(sb, min(sbu, plen)); 1887 plen -= min(sbu, plen); 1888 } 1889 sowwakeup_locked(so); /* unlocks so_snd */ 1890 rqdrop_locked(&toep->ulp_pdu_reclaimq, plen); 1891 } else { 1892#ifdef VERBOSE_TRACES 1893 CTR3(KTR_CXGBE, "%s: tid %d dropped %d bytes", __func__, 1894 tid, plen); 1895#endif 1896 sbdrop_locked(sb, plen); 1897 if (tls_tx_key(toep)) { 1898 struct tls_ofld_info *tls_ofld = &toep->tls; 1899 1900 MPASS(tls_ofld->sb_off >= plen); 1901 tls_ofld->sb_off -= plen; 1902 } 1903 if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 1904 t4_aiotx_queue_toep(toep); 1905 sowwakeup_locked(so); /* unlocks so_snd */ 1906 } 1907 SOCKBUF_UNLOCK_ASSERT(sb); 1908 } 1909 1910 INP_WUNLOCK(inp); 1911 1912 return (0); 1913} 1914 1915void 1916t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep, 1917 uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie) 1918{ 1919 struct wrqe *wr; 1920 struct cpl_set_tcb_field *req; 1921 struct ofld_tx_sdesc *txsd; 1922 1923 MPASS((cookie & ~M_COOKIE) == 0); 1924 if (reply) { 1925 MPASS(cookie != CPL_COOKIE_RESERVED); 1926 } 1927 1928 wr = alloc_wrqe(sizeof(*req), wrq); 1929 if (wr == NULL) { 1930 /* XXX */ 1931 panic("%s: allocation failure.", __func__); 1932 } 1933 req = wrtod(wr); 1934 1935 INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); 1936 req->reply_ctrl = htobe16(V_QUEUENO(toep->ofld_rxq->iq.abs_id)); 1937 if (reply == 0) 1938 req->reply_ctrl |= htobe16(F_NO_REPLY); 1939 req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie)); 1940 req->mask = htobe64(mask); 1941 req->val = htobe64(val); 1942 if ((wrq->eq.flags & EQ_TYPEMASK) == EQ_OFLD) { 1943 txsd = &toep->txsd[toep->txsd_pidx]; 1944 txsd->tx_credits = howmany(sizeof(*req), 16); 1945 txsd->plen = 0; 1946 KASSERT(toep->tx_credits >= txsd->tx_credits && 1947 toep->txsd_avail > 0, 1948 ("%s: not enough credits (%d)", __func__, 1949 toep->tx_credits)); 1950 toep->tx_credits -= txsd->tx_credits; 1951 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 1952 toep->txsd_pidx = 0; 1953 toep->txsd_avail--; 1954 } 1955 1956 t4_wrq_tx(sc, wr); 1957} 1958 1959void 1960t4_init_cpl_io_handlers(void) 1961{ 1962 1963 t4_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 1964 t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 1965 t4_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 1966 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 1967 t4_register_cpl_handler(CPL_RX_DATA, do_rx_data); 1968 t4_register_cpl_handler(CPL_FW4_ACK, do_fw4_ack); 1969} 1970 1971void 1972t4_uninit_cpl_io_handlers(void) 1973{ 1974 1975 t4_register_cpl_handler(CPL_PEER_CLOSE, NULL); 1976 t4_register_cpl_handler(CPL_CLOSE_CON_RPL, NULL); 1977 t4_register_cpl_handler(CPL_ABORT_REQ_RSS, NULL); 1978 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, NULL); 1979 t4_register_cpl_handler(CPL_RX_DATA, NULL); 1980 t4_register_cpl_handler(CPL_FW4_ACK, NULL); 1981} 1982 1983/* 1984 * Use the 'backend3' field in AIO jobs to store the amount of data 1985 * sent by the AIO job so far and the 'backend4' field to hold an 1986 * error that should be reported when the job is completed. 1987 */ 1988#define aio_sent backend3 1989#define aio_error backend4 1990 1991#define jobtotid(job) \ 1992 (((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid) 1993 1994static void 1995free_aiotx_buffer(struct aiotx_buffer *ab) 1996{ 1997 struct kaiocb *job; 1998 long status; 1999 int error; 2000 2001 if (refcount_release(&ab->refcount) == 0) 2002 return; 2003 2004 job = ab->job; 2005 error = job->aio_error; 2006 status = job->aio_sent; 2007 vm_page_unhold_pages(ab->ps.pages, ab->ps.npages); 2008 free(ab, M_CXGBE); 2009#ifdef VERBOSE_TRACES 2010 CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__, 2011 jobtotid(job), job, status, error); 2012#endif 2013 if (error == ECANCELED && status != 0) 2014 error = 0; 2015 if (error == ECANCELED) 2016 aio_cancel(job); 2017 else if (error) 2018 aio_complete(job, -1, error); 2019 else 2020 aio_complete(job, status, 0); 2021} 2022 2023static void 2024t4_aiotx_mbuf_free(struct mbuf *m, void *buffer, void *arg) 2025{ 2026 struct aiotx_buffer *ab = buffer; 2027 2028#ifdef VERBOSE_TRACES 2029 CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__, 2030 m->m_len, jobtotid(ab->job)); 2031#endif 2032 free_aiotx_buffer(ab); 2033} 2034 2035/* 2036 * Hold the buffer backing an AIO request and return an AIO transmit 2037 * buffer. 2038 */ 2039static int 2040hold_aio(struct kaiocb *job) 2041{ 2042 struct aiotx_buffer *ab; 2043 struct vmspace *vm; 2044 vm_map_t map; 2045 vm_offset_t start, end, pgoff; 2046 int n; 2047 2048 MPASS(job->backend1 == NULL); 2049 2050 /* 2051 * The AIO subsystem will cancel and drain all requests before 2052 * permitting a process to exit or exec, so p_vmspace should 2053 * be stable here. 2054 */ 2055 vm = job->userproc->p_vmspace; 2056 map = &vm->vm_map; 2057 start = (uintptr_t)job->uaiocb.aio_buf; 2058 pgoff = start & PAGE_MASK; 2059 end = round_page(start + job->uaiocb.aio_nbytes); 2060 start = trunc_page(start); 2061 n = atop(end - start); 2062 2063 ab = malloc(sizeof(*ab) + n * sizeof(vm_page_t), M_CXGBE, M_WAITOK | 2064 M_ZERO); 2065 refcount_init(&ab->refcount, 1); 2066 ab->ps.pages = (vm_page_t *)(ab + 1); 2067 ab->ps.npages = vm_fault_quick_hold_pages(map, start, end - start, 2068 VM_PROT_WRITE, ab->ps.pages, n); 2069 if (ab->ps.npages < 0) { 2070 free(ab, M_CXGBE); 2071 return (EFAULT); 2072 } 2073 2074 KASSERT(ab->ps.npages == n, 2075 ("hold_aio: page count mismatch: %d vs %d", ab->ps.npages, n)); 2076 2077 ab->ps.offset = pgoff; 2078 ab->ps.len = job->uaiocb.aio_nbytes; 2079 ab->job = job; 2080 job->backend1 = ab; 2081#ifdef VERBOSE_TRACES 2082 CTR5(KTR_CXGBE, "%s: tid %d, new pageset %p for job %p, npages %d", 2083 __func__, jobtotid(job), &ab->ps, job, ab->ps.npages); 2084#endif 2085 return (0); 2086} 2087 2088static void 2089t4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job) 2090{ 2091 struct adapter *sc; 2092 struct sockbuf *sb; 2093 struct file *fp; 2094 struct aiotx_buffer *ab; 2095 struct inpcb *inp; 2096 struct tcpcb *tp; 2097 struct mbuf *m; 2098 int error; 2099 bool moretocome, sendmore; 2100 2101 sc = td_adapter(toep->td); 2102 sb = &so->so_snd; 2103 SOCKBUF_UNLOCK(sb); 2104 fp = job->fd_file; 2105 ab = job->backend1; 2106 m = NULL; 2107 2108#ifdef MAC 2109 error = mac_socket_check_send(fp->f_cred, so); 2110 if (error != 0) 2111 goto out; 2112#endif 2113 2114 if (ab == NULL) { 2115 error = hold_aio(job); 2116 if (error != 0) 2117 goto out; 2118 ab = job->backend1; 2119 } 2120 2121 /* Inline sosend_generic(). */ 2122 2123 job->msgsnd = 1; 2124 2125 error = sblock(sb, SBL_WAIT); 2126 MPASS(error == 0); 2127 2128sendanother: 2129 m = m_get(M_WAITOK, MT_DATA); 2130 2131 SOCKBUF_LOCK(sb); 2132 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2133 SOCKBUF_UNLOCK(sb); 2134 sbunlock(sb); 2135 if ((so->so_options & SO_NOSIGPIPE) == 0) { 2136 PROC_LOCK(job->userproc); 2137 kern_psignal(job->userproc, SIGPIPE); 2138 PROC_UNLOCK(job->userproc); 2139 } 2140 error = EPIPE; 2141 goto out; 2142 } 2143 if (so->so_error) { 2144 error = so->so_error; 2145 so->so_error = 0; 2146 SOCKBUF_UNLOCK(sb); 2147 sbunlock(sb); 2148 goto out; 2149 } 2150 if ((so->so_state & SS_ISCONNECTED) == 0) { 2151 SOCKBUF_UNLOCK(sb); 2152 sbunlock(sb); 2153 error = ENOTCONN; 2154 goto out; 2155 } 2156 if (sbspace(sb) < sb->sb_lowat) { 2157 MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO)); 2158 2159 /* 2160 * Don't block if there is too little room in the socket 2161 * buffer. Instead, requeue the request. 2162 */ 2163 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { 2164 SOCKBUF_UNLOCK(sb); 2165 sbunlock(sb); 2166 error = ECANCELED; 2167 goto out; 2168 } 2169 TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); 2170 SOCKBUF_UNLOCK(sb); 2171 sbunlock(sb); 2172 goto out; 2173 } 2174 2175 /* 2176 * Write as much data as the socket permits, but no more than a 2177 * a single sndbuf at a time. 2178 */ 2179 m->m_len = sbspace(sb); 2180 if (m->m_len > ab->ps.len - job->aio_sent) { 2181 m->m_len = ab->ps.len - job->aio_sent; 2182 moretocome = false; 2183 } else 2184 moretocome = true; 2185 if (m->m_len > sc->tt.sndbuf) { 2186 m->m_len = sc->tt.sndbuf; 2187 sendmore = true; 2188 } else 2189 sendmore = false; 2190 2191 if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 2192 moretocome = true; 2193 SOCKBUF_UNLOCK(sb); 2194 MPASS(m->m_len != 0); 2195 2196 /* Inlined tcp_usr_send(). */ 2197 2198 inp = toep->inp; 2199 INP_WLOCK(inp); 2200 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 2201 INP_WUNLOCK(inp); 2202 sbunlock(sb); 2203 error = ECONNRESET; 2204 goto out; 2205 } 2206 2207 refcount_acquire(&ab->refcount); 2208 m_extadd(m, NULL, ab->ps.len, t4_aiotx_mbuf_free, ab, 2209 (void *)(uintptr_t)job->aio_sent, 0, EXT_NET_DRV); 2210 m->m_ext.ext_flags |= EXT_FLAG_AIOTX; 2211 job->aio_sent += m->m_len; 2212 2213 sbappendstream(sb, m, 0); 2214 m = NULL; 2215 2216 if (!(inp->inp_flags & INP_DROPPED)) { 2217 tp = intotcpcb(inp); 2218 if (moretocome) 2219 tp->t_flags |= TF_MORETOCOME; 2220 error = tp->t_fb->tfb_tcp_output(tp); 2221 if (moretocome) 2222 tp->t_flags &= ~TF_MORETOCOME; 2223 } 2224 2225 INP_WUNLOCK(inp); 2226 if (sendmore) 2227 goto sendanother; 2228 sbunlock(sb); 2229 2230 if (error) 2231 goto out; 2232 2233 /* 2234 * If this is a non-blocking socket and the request has not 2235 * been fully completed, requeue it until the socket is ready 2236 * again. 2237 */ 2238 if (job->aio_sent < job->uaiocb.aio_nbytes && 2239 !(so->so_state & SS_NBIO)) { 2240 SOCKBUF_LOCK(sb); 2241 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { 2242 SOCKBUF_UNLOCK(sb); 2243 error = ECANCELED; 2244 goto out; 2245 } 2246 TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); 2247 return; 2248 } 2249 2250 /* 2251 * If the request will not be requeued, drop a reference on 2252 * the the aiotx buffer. Any mbufs in flight should still 2253 * contain a reference, but this drops the reference that the 2254 * job owns while it is waiting to queue mbufs to the socket. 2255 */ 2256 free_aiotx_buffer(ab); 2257 2258out: 2259 if (error) { 2260 if (ab != NULL) { 2261 job->aio_error = error; 2262 free_aiotx_buffer(ab); 2263 } else { 2264 MPASS(job->aio_sent == 0); 2265 aio_complete(job, -1, error); 2266 } 2267 } 2268 if (m != NULL) 2269 m_free(m); 2270 SOCKBUF_LOCK(sb); 2271} 2272 2273static void 2274t4_aiotx_task(void *context, int pending) 2275{ 2276 struct toepcb *toep = context; 2277 struct inpcb *inp = toep->inp; 2278 struct socket *so = inp->inp_socket; 2279 struct kaiocb *job; 2280 2281 CURVNET_SET(toep->vnet); 2282 SOCKBUF_LOCK(&so->so_snd); 2283 while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) { 2284 job = TAILQ_FIRST(&toep->aiotx_jobq); 2285 TAILQ_REMOVE(&toep->aiotx_jobq, job, list); 2286 if (!aio_clear_cancel_function(job)) 2287 continue; 2288 2289 t4_aiotx_process_job(toep, so, job); 2290 } 2291 toep->aiotx_task_active = false; 2292 SOCKBUF_UNLOCK(&so->so_snd); 2293 CURVNET_RESTORE(); 2294 2295 free_toepcb(toep); 2296} 2297 2298static void 2299t4_aiotx_queue_toep(struct toepcb *toep) 2300{ 2301 2302 SOCKBUF_LOCK_ASSERT(&toep->inp->inp_socket->so_snd); 2303#ifdef VERBOSE_TRACES 2304 CTR3(KTR_CXGBE, "%s: queueing aiotx task for tid %d, active = %s", 2305 __func__, toep->tid, toep->aiotx_task_active ? "true" : "false"); 2306#endif 2307 if (toep->aiotx_task_active) 2308 return; 2309 toep->aiotx_task_active = true; 2310 hold_toepcb(toep); 2311 soaio_enqueue(&toep->aiotx_task); 2312} 2313 2314static void 2315t4_aiotx_cancel(struct kaiocb *job) 2316{ 2317 struct aiotx_buffer *ab; 2318 struct socket *so; 2319 struct sockbuf *sb; 2320 struct tcpcb *tp; 2321 struct toepcb *toep; 2322 2323 so = job->fd_file->f_data; 2324 tp = so_sototcpcb(so); 2325 toep = tp->t_toe; 2326 MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE); 2327 sb = &so->so_snd; 2328 2329 SOCKBUF_LOCK(sb); 2330 if (!aio_cancel_cleared(job)) 2331 TAILQ_REMOVE(&toep->aiotx_jobq, job, list); 2332 SOCKBUF_UNLOCK(sb); 2333 2334 ab = job->backend1; 2335 if (ab != NULL) 2336 free_aiotx_buffer(ab); 2337 else 2338 aio_cancel(job); 2339} 2340 2341int 2342t4_aio_queue_aiotx(struct socket *so, struct kaiocb *job) 2343{ 2344 struct tcpcb *tp = so_sototcpcb(so); 2345 struct toepcb *toep = tp->t_toe; 2346 struct adapter *sc = td_adapter(toep->td); 2347 2348 /* This only handles writes. */ 2349 if (job->uaiocb.aio_lio_opcode != LIO_WRITE) 2350 return (EOPNOTSUPP); 2351 2352 if (!sc->tt.tx_zcopy) 2353 return (EOPNOTSUPP); 2354 2355 if (tls_tx_key(toep)) 2356 return (EOPNOTSUPP); 2357 2358 SOCKBUF_LOCK(&so->so_snd); 2359#ifdef VERBOSE_TRACES 2360 CTR2(KTR_CXGBE, "%s: queueing %p", __func__, job); 2361#endif 2362 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) 2363 panic("new job was cancelled"); 2364 TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list); 2365 if (sowriteable(so)) 2366 t4_aiotx_queue_toep(toep); 2367 SOCKBUF_UNLOCK(&so->so_snd); 2368 return (0); 2369} 2370 2371void 2372aiotx_init_toep(struct toepcb *toep) 2373{ 2374 2375 TAILQ_INIT(&toep->aiotx_jobq); 2376 TASK_INIT(&toep->aiotx_task, 0, t4_aiotx_task, toep); 2377} 2378#endif 2379