t4_cpl_io.c revision 313178
1237263Snp/*- 2292736Snp * Copyright (c) 2012, 2015 Chelsio Communications, Inc. 3237263Snp * All rights reserved. 4237263Snp * Written by: Navdeep Parhar <np@FreeBSD.org> 5237263Snp * 6237263Snp * Redistribution and use in source and binary forms, with or without 7237263Snp * modification, are permitted provided that the following conditions 8237263Snp * are met: 9237263Snp * 1. Redistributions of source code must retain the above copyright 10237263Snp * notice, this list of conditions and the following disclaimer. 11237263Snp * 2. Redistributions in binary form must reproduce the above copyright 12237263Snp * notice, this list of conditions and the following disclaimer in the 13237263Snp * documentation and/or other materials provided with the distribution. 14237263Snp * 15237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18237263Snp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25237263Snp * SUCH DAMAGE. 26237263Snp */ 27237263Snp 28237263Snp#include <sys/cdefs.h> 29237263Snp__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c 313178 2017-02-03 23:33:06Z jhb $"); 30237263Snp 31237263Snp#include "opt_inet.h" 32237263Snp 33237263Snp#ifdef TCP_OFFLOAD 34237263Snp#include <sys/param.h> 35306661Sjhb#include <sys/aio.h> 36306661Sjhb#include <sys/file.h> 37237263Snp#include <sys/kernel.h> 38237263Snp#include <sys/ktr.h> 39237263Snp#include <sys/module.h> 40306661Sjhb#include <sys/proc.h> 41237263Snp#include <sys/protosw.h> 42237263Snp#include <sys/domain.h> 43237263Snp#include <sys/socket.h> 44237263Snp#include <sys/socketvar.h> 45237263Snp#include <sys/sglist.h> 46306661Sjhb#include <sys/taskqueue.h> 47237263Snp#include <netinet/in.h> 48237263Snp#include <netinet/in_pcb.h> 49237263Snp#include <netinet/ip.h> 50276574Snp#include <netinet/ip6.h> 51237263Snp#define TCPSTATES 52237263Snp#include <netinet/tcp_fsm.h> 53237263Snp#include <netinet/tcp_seq.h> 54294869Sglebius#include <netinet/tcp_var.h> 55237263Snp#include <netinet/toecore.h> 56237263Snp 57306661Sjhb#include <security/mac/mac_framework.h> 58306661Sjhb 59306661Sjhb#include <vm/vm.h> 60306661Sjhb#include <vm/vm_extern.h> 61306661Sjhb#include <vm/pmap.h> 62306661Sjhb#include <vm/vm_map.h> 63306661Sjhb#include <vm/vm_page.h> 64306661Sjhb 65237263Snp#include "common/common.h" 66237263Snp#include "common/t4_msg.h" 67237263Snp#include "common/t4_regs.h" 68239344Snp#include "common/t4_tcb.h" 69237263Snp#include "tom/t4_tom_l2t.h" 70237263Snp#include "tom/t4_tom.h" 71237263Snp 72237263SnpVNET_DECLARE(int, tcp_do_autosndbuf); 73237263Snp#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) 74237263SnpVNET_DECLARE(int, tcp_autosndbuf_inc); 75237263Snp#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) 76237263SnpVNET_DECLARE(int, tcp_autosndbuf_max); 77237263Snp#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) 78237263SnpVNET_DECLARE(int, tcp_do_autorcvbuf); 79237263Snp#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) 80237263SnpVNET_DECLARE(int, tcp_autorcvbuf_inc); 81237263Snp#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) 82237263SnpVNET_DECLARE(int, tcp_autorcvbuf_max); 83237263Snp#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) 84237263Snp 85306661Sjhb#define IS_AIOTX_MBUF(m) \ 86306661Sjhb ((m)->m_flags & M_EXT && (m)->m_ext.ext_flags & EXT_FLAG_AIOTX) 87306661Sjhb 88306661Sjhbstatic void t4_aiotx_cancel(struct kaiocb *job); 89306661Sjhbstatic void t4_aiotx_queue_toep(struct toepcb *toep); 90306661Sjhb 91306661Sjhbstatic size_t 92306661Sjhbaiotx_mbuf_pgoff(struct mbuf *m) 93306661Sjhb{ 94306661Sjhb struct aiotx_buffer *ab; 95306661Sjhb 96306661Sjhb MPASS(IS_AIOTX_MBUF(m)); 97306661Sjhb ab = m->m_ext.ext_arg1; 98306661Sjhb return ((ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) % PAGE_SIZE); 99306661Sjhb} 100306661Sjhb 101306661Sjhbstatic vm_page_t * 102306661Sjhbaiotx_mbuf_pages(struct mbuf *m) 103306661Sjhb{ 104306661Sjhb struct aiotx_buffer *ab; 105306661Sjhb int npages; 106306661Sjhb 107306661Sjhb MPASS(IS_AIOTX_MBUF(m)); 108306661Sjhb ab = m->m_ext.ext_arg1; 109306661Sjhb npages = (ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) / PAGE_SIZE; 110306661Sjhb return (ab->ps.pages + npages); 111306661Sjhb} 112306661Sjhb 113237263Snpvoid 114237263Snpsend_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) 115237263Snp{ 116241626Snp struct wrqe *wr; 117241626Snp struct fw_flowc_wr *flowc; 118241642Snp unsigned int nparams = ftxp ? 8 : 6, flowclen; 119291665Sjhb struct vi_info *vi = toep->vi; 120291665Sjhb struct port_info *pi = vi->pi; 121237263Snp struct adapter *sc = pi->adapter; 122291665Sjhb unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; 123237263Snp struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 124237263Snp 125239514Snp KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), 126237263Snp ("%s: flowc for tid %u sent already", __func__, toep->tid)); 127237263Snp 128237263Snp flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 129237263Snp 130248925Snp wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); 131237263Snp if (wr == NULL) { 132237263Snp /* XXX */ 133237263Snp panic("%s: allocation failure.", __func__); 134237263Snp } 135237263Snp flowc = wrtod(wr); 136237263Snp memset(flowc, 0, wr->wr_len); 137237263Snp 138237263Snp flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 139237263Snp V_FW_FLOWC_WR_NPARAMS(nparams)); 140237263Snp flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 141237263Snp V_FW_WR_FLOWID(toep->tid)); 142237263Snp 143237263Snp flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 144241626Snp flowc->mnemval[0].val = htobe32(pfvf); 145241626Snp flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 146241626Snp flowc->mnemval[1].val = htobe32(pi->tx_chan); 147241626Snp flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 148241626Snp flowc->mnemval[2].val = htobe32(pi->tx_chan); 149241626Snp flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 150241626Snp flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id); 151237263Snp if (ftxp) { 152237263Snp uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); 153237263Snp 154237263Snp flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; 155237263Snp flowc->mnemval[4].val = htobe32(ftxp->snd_nxt); 156237263Snp flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; 157237263Snp flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt); 158237263Snp flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; 159237263Snp flowc->mnemval[6].val = htobe32(sndbuf); 160237263Snp flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; 161237263Snp flowc->mnemval[7].val = htobe32(ftxp->mss); 162276570Snp 163276570Snp CTR6(KTR_CXGBE, 164276570Snp "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", 165276570Snp __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt, 166276570Snp ftxp->rcv_nxt); 167241642Snp } else { 168241642Snp flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; 169241642Snp flowc->mnemval[4].val = htobe32(512); 170241642Snp flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; 171241642Snp flowc->mnemval[5].val = htobe32(512); 172276570Snp 173276570Snp CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); 174237263Snp } 175237263Snp 176237263Snp txsd->tx_credits = howmany(flowclen, 16); 177237263Snp txsd->plen = 0; 178237263Snp KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, 179237263Snp ("%s: not enough credits (%d)", __func__, toep->tx_credits)); 180237263Snp toep->tx_credits -= txsd->tx_credits; 181237263Snp if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 182237263Snp toep->txsd_pidx = 0; 183237263Snp toep->txsd_avail--; 184237263Snp 185239514Snp toep->flags |= TPF_FLOWC_WR_SENT; 186237263Snp t4_wrq_tx(sc, wr); 187237263Snp} 188237263Snp 189237263Snpvoid 190237263Snpsend_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) 191237263Snp{ 192237263Snp struct wrqe *wr; 193237263Snp struct cpl_abort_req *req; 194237263Snp int tid = toep->tid; 195237263Snp struct inpcb *inp = toep->inp; 196237263Snp struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ 197237263Snp 198237263Snp INP_WLOCK_ASSERT(inp); 199237263Snp 200237263Snp CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", 201237263Snp __func__, toep->tid, 202237263Snp inp->inp_flags & INP_DROPPED ? "inp dropped" : 203237263Snp tcpstates[tp->t_state], 204237263Snp toep->flags, inp->inp_flags, 205239514Snp toep->flags & TPF_ABORT_SHUTDOWN ? 206237263Snp " (abort already in progress)" : ""); 207237263Snp 208239514Snp if (toep->flags & TPF_ABORT_SHUTDOWN) 209237263Snp return; /* abort already in progress */ 210237263Snp 211239514Snp toep->flags |= TPF_ABORT_SHUTDOWN; 212237263Snp 213239514Snp KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 214237263Snp ("%s: flowc_wr not sent for tid %d.", __func__, tid)); 215237263Snp 216237263Snp wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 217237263Snp if (wr == NULL) { 218237263Snp /* XXX */ 219237263Snp panic("%s: allocation failure.", __func__); 220237263Snp } 221237263Snp req = wrtod(wr); 222237263Snp 223237263Snp INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); 224237263Snp if (inp->inp_flags & INP_DROPPED) 225237263Snp req->rsvd0 = htobe32(snd_nxt); 226237263Snp else 227237263Snp req->rsvd0 = htobe32(tp->snd_nxt); 228239514Snp req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT); 229237263Snp req->cmd = CPL_ABORT_SEND_RST; 230237263Snp 231237263Snp /* 232237263Snp * XXX: What's the correct way to tell that the inp hasn't been detached 233237263Snp * from its socket? Should I even be flushing the snd buffer here? 234237263Snp */ 235237263Snp if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 236237263Snp struct socket *so = inp->inp_socket; 237237263Snp 238237263Snp if (so != NULL) /* because I'm not sure. See comment above */ 239237263Snp sbflush(&so->so_snd); 240237263Snp } 241237263Snp 242237263Snp t4_l2t_send(sc, wr, toep->l2te); 243237263Snp} 244237263Snp 245237263Snp/* 246237263Snp * Called when a connection is established to translate the TCP options 247237263Snp * reported by HW to FreeBSD's native format. 248237263Snp */ 249237263Snpstatic void 250237263Snpassign_rxopt(struct tcpcb *tp, unsigned int opt) 251237263Snp{ 252237263Snp struct toepcb *toep = tp->t_toe; 253276574Snp struct inpcb *inp = tp->t_inpcb; 254237263Snp struct adapter *sc = td_adapter(toep->td); 255276574Snp int n; 256237263Snp 257276574Snp INP_LOCK_ASSERT(inp); 258237263Snp 259276574Snp if (inp->inp_inc.inc_flags & INC_ISIPV6) 260276574Snp n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 261276574Snp else 262276574Snp n = sizeof(struct ip) + sizeof(struct tcphdr); 263293284Sglebius tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; 264237263Snp 265276574Snp CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid, 266276574Snp G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]); 267276574Snp 268237263Snp if (G_TCPOPT_TSTAMP(opt)) { 269237263Snp tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ 270237263Snp tp->ts_recent = 0; /* hmmm */ 271237263Snp tp->ts_recent_age = tcp_ts_getticks(); 272237263Snp } 273237263Snp 274237263Snp if (G_TCPOPT_SACK(opt)) 275237263Snp tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ 276237263Snp else 277237263Snp tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ 278237263Snp 279237263Snp if (G_TCPOPT_WSCALE_OK(opt)) 280237263Snp tp->t_flags |= TF_RCVD_SCALE; 281237263Snp 282237263Snp /* Doing window scaling? */ 283237263Snp if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == 284237263Snp (TF_RCVD_SCALE | TF_REQ_SCALE)) { 285237263Snp tp->rcv_scale = tp->request_r_scale; 286237263Snp tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); 287237263Snp } 288237263Snp} 289237263Snp 290237263Snp/* 291237263Snp * Completes some final bits of initialization for just established connections 292237263Snp * and changes their state to TCPS_ESTABLISHED. 293237263Snp * 294237263Snp * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. 295237263Snp */ 296237263Snpvoid 297237263Snpmake_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, 298237263Snp uint16_t opt) 299237263Snp{ 300237263Snp struct inpcb *inp = toep->inp; 301237263Snp struct socket *so = inp->inp_socket; 302237263Snp struct tcpcb *tp = intotcpcb(inp); 303237263Snp long bufsize; 304237263Snp uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ 305237263Snp uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ 306237263Snp uint16_t tcpopt = be16toh(opt); 307237263Snp struct flowc_tx_params ftxp; 308237263Snp 309237263Snp INP_WLOCK_ASSERT(inp); 310237263Snp KASSERT(tp->t_state == TCPS_SYN_SENT || 311237263Snp tp->t_state == TCPS_SYN_RECEIVED, 312237263Snp ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); 313237263Snp 314237263Snp CTR4(KTR_CXGBE, "%s: tid %d, toep %p, inp %p", 315237263Snp __func__, toep->tid, toep, inp); 316237263Snp 317237263Snp tp->t_state = TCPS_ESTABLISHED; 318237263Snp tp->t_starttime = ticks; 319237263Snp TCPSTAT_INC(tcps_connects); 320237263Snp 321237263Snp tp->irs = irs; 322237263Snp tcp_rcvseqinit(tp); 323237263Snp tp->rcv_wnd = toep->rx_credits << 10; 324237263Snp tp->rcv_adv += tp->rcv_wnd; 325237263Snp tp->last_ack_sent = tp->rcv_nxt; 326237263Snp 327237263Snp /* 328237263Snp * If we were unable to send all rx credits via opt0, save the remainder 329237263Snp * in rx_credits so that they can be handed over with the next credit 330237263Snp * update. 331237263Snp */ 332237263Snp SOCKBUF_LOCK(&so->so_rcv); 333237263Snp bufsize = select_rcv_wnd(so); 334237263Snp SOCKBUF_UNLOCK(&so->so_rcv); 335237263Snp toep->rx_credits = bufsize - tp->rcv_wnd; 336237263Snp 337237263Snp tp->iss = iss; 338237263Snp tcp_sendseqinit(tp); 339237263Snp tp->snd_una = iss + 1; 340237263Snp tp->snd_nxt = iss + 1; 341237263Snp tp->snd_max = iss + 1; 342237263Snp 343237263Snp assign_rxopt(tp, tcpopt); 344237263Snp 345237263Snp SOCKBUF_LOCK(&so->so_snd); 346237263Snp if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) 347237263Snp bufsize = V_tcp_autosndbuf_max; 348237263Snp else 349237263Snp bufsize = sbspace(&so->so_snd); 350237263Snp SOCKBUF_UNLOCK(&so->so_snd); 351237263Snp 352237263Snp ftxp.snd_nxt = tp->snd_nxt; 353237263Snp ftxp.rcv_nxt = tp->rcv_nxt; 354237263Snp ftxp.snd_space = bufsize; 355237263Snp ftxp.mss = tp->t_maxseg; 356237263Snp send_flowc_wr(toep, &ftxp); 357237263Snp 358237263Snp soisconnected(so); 359237263Snp} 360237263Snp 361237263Snpstatic int 362239344Snpsend_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) 363237263Snp{ 364237263Snp struct wrqe *wr; 365237263Snp struct cpl_rx_data_ack *req; 366237263Snp uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 367237263Snp 368239344Snp KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); 369239344Snp 370237263Snp wr = alloc_wrqe(sizeof(*req), toep->ctrlq); 371237263Snp if (wr == NULL) 372237263Snp return (0); 373237263Snp req = wrtod(wr); 374237263Snp 375237263Snp INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); 376237263Snp req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); 377237263Snp 378237263Snp t4_wrq_tx(sc, wr); 379237263Snp return (credits); 380237263Snp} 381237263Snp 382237263Snpvoid 383299210Sjhbt4_rcvd_locked(struct toedev *tod, struct tcpcb *tp) 384237263Snp{ 385237263Snp struct adapter *sc = tod->tod_softc; 386237263Snp struct inpcb *inp = tp->t_inpcb; 387237263Snp struct socket *so = inp->inp_socket; 388239344Snp struct sockbuf *sb = &so->so_rcv; 389237263Snp struct toepcb *toep = tp->t_toe; 390239344Snp int credits; 391237263Snp 392237263Snp INP_WLOCK_ASSERT(inp); 393237263Snp 394299210Sjhb SOCKBUF_LOCK_ASSERT(sb); 395274421Sglebius KASSERT(toep->sb_cc >= sbused(sb), 396239344Snp ("%s: sb %p has more data (%d) than last time (%d).", 397274421Sglebius __func__, sb, sbused(sb), toep->sb_cc)); 398292736Snp 399292736Snp toep->rx_credits += toep->sb_cc - sbused(sb); 400292736Snp toep->sb_cc = sbused(sb); 401292736Snp 402280878Snp if (toep->rx_credits > 0 && 403280878Snp (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 || 404280878Snp (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) || 405280878Snp toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) { 406237263Snp 407280878Snp credits = send_rx_credits(sc, toep, toep->rx_credits); 408237263Snp toep->rx_credits -= credits; 409237263Snp tp->rcv_wnd += credits; 410237263Snp tp->rcv_adv += credits; 411237263Snp } 412299210Sjhb} 413299210Sjhb 414299210Sjhbvoid 415299210Sjhbt4_rcvd(struct toedev *tod, struct tcpcb *tp) 416299210Sjhb{ 417299210Sjhb struct inpcb *inp = tp->t_inpcb; 418299210Sjhb struct socket *so = inp->inp_socket; 419299210Sjhb struct sockbuf *sb = &so->so_rcv; 420299210Sjhb 421299210Sjhb SOCKBUF_LOCK(sb); 422299210Sjhb t4_rcvd_locked(tod, tp); 423280878Snp SOCKBUF_UNLOCK(sb); 424237263Snp} 425237263Snp 426237263Snp/* 427237263Snp * Close a connection by sending a CPL_CLOSE_CON_REQ message. 428237263Snp */ 429237263Snpstatic int 430237263Snpclose_conn(struct adapter *sc, struct toepcb *toep) 431237263Snp{ 432237263Snp struct wrqe *wr; 433237263Snp struct cpl_close_con_req *req; 434237263Snp unsigned int tid = toep->tid; 435237263Snp 436237263Snp CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, 437239514Snp toep->flags & TPF_FIN_SENT ? ", IGNORED" : ""); 438237263Snp 439239514Snp if (toep->flags & TPF_FIN_SENT) 440237263Snp return (0); 441237263Snp 442239514Snp KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 443237263Snp ("%s: flowc_wr not sent for tid %u.", __func__, tid)); 444237263Snp 445237263Snp wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 446237263Snp if (wr == NULL) { 447237263Snp /* XXX */ 448237263Snp panic("%s: allocation failure.", __func__); 449237263Snp } 450237263Snp req = wrtod(wr); 451237263Snp 452237263Snp req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | 453237263Snp V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); 454237263Snp req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | 455237263Snp V_FW_WR_FLOWID(tid)); 456237263Snp req->wr.wr_lo = cpu_to_be64(0); 457237263Snp OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 458237263Snp req->rsvd = 0; 459237263Snp 460239514Snp toep->flags |= TPF_FIN_SENT; 461239514Snp toep->flags &= ~TPF_SEND_FIN; 462237263Snp t4_l2t_send(sc, wr, toep->l2te); 463237263Snp 464237263Snp return (0); 465237263Snp} 466237263Snp 467237263Snp#define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) 468237263Snp#define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) 469237263Snp 470237263Snp/* Maximum amount of immediate data we could stuff in a WR */ 471237263Snpstatic inline int 472237263Snpmax_imm_payload(int tx_credits) 473237263Snp{ 474237263Snp const int n = 2; /* Use only up to 2 desc for imm. data WR */ 475237263Snp 476237263Snp KASSERT(tx_credits >= 0 && 477237263Snp tx_credits <= MAX_OFLD_TX_CREDITS, 478237263Snp ("%s: %d credits", __func__, tx_credits)); 479237263Snp 480237263Snp if (tx_credits < MIN_OFLD_TX_CREDITS) 481237263Snp return (0); 482237263Snp 483237263Snp if (tx_credits >= (n * EQ_ESIZE) / 16) 484237263Snp return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); 485237263Snp else 486237263Snp return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); 487237263Snp} 488237263Snp 489237263Snp/* Maximum number of SGL entries we could stuff in a WR */ 490237263Snpstatic inline int 491237263Snpmax_dsgl_nsegs(int tx_credits) 492237263Snp{ 493237263Snp int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ 494237263Snp int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; 495237263Snp 496237263Snp KASSERT(tx_credits >= 0 && 497237263Snp tx_credits <= MAX_OFLD_TX_CREDITS, 498237263Snp ("%s: %d credits", __func__, tx_credits)); 499237263Snp 500237263Snp if (tx_credits < MIN_OFLD_TX_CREDITS) 501237263Snp return (0); 502237263Snp 503237263Snp nseg += 2 * (sge_pair_credits * 16 / 24); 504237263Snp if ((sge_pair_credits * 16) % 24 == 16) 505237263Snp nseg++; 506237263Snp 507237263Snp return (nseg); 508237263Snp} 509237263Snp 510237263Snpstatic inline void 511237263Snpwrite_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, 512292736Snp unsigned int plen, uint8_t credits, int shove, int ulp_submode, int txalign) 513237263Snp{ 514237263Snp struct fw_ofld_tx_data_wr *txwr = dst; 515237263Snp 516237263Snp txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | 517255411Snp V_FW_WR_IMMDLEN(immdlen)); 518237263Snp txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | 519237263Snp V_FW_WR_LEN16(credits)); 520292736Snp txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(toep->ulp_mode) | 521292736Snp V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove)); 522237263Snp txwr->plen = htobe32(plen); 523276597Snp 524276597Snp if (txalign > 0) { 525276597Snp struct tcpcb *tp = intotcpcb(toep->inp); 526276597Snp 527291665Sjhb if (plen < 2 * tp->t_maxseg || is_10G_port(toep->vi->pi)) 528285527Snp txwr->lsodisable_to_flags |= 529276597Snp htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); 530276597Snp else 531285527Snp txwr->lsodisable_to_flags |= 532276597Snp htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD | 533276597Snp (tp->t_flags & TF_NODELAY ? 0 : 534276597Snp F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE)); 535276597Snp } 536237263Snp} 537237263Snp 538237263Snp/* 539237263Snp * Generate a DSGL from a starting mbuf. The total number of segments and the 540237263Snp * maximum segments in any one mbuf are provided. 541237263Snp */ 542237263Snpstatic void 543237263Snpwrite_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) 544237263Snp{ 545237263Snp struct mbuf *m; 546237263Snp struct ulptx_sgl *usgl = dst; 547237263Snp int i, j, rc; 548237263Snp struct sglist sg; 549237263Snp struct sglist_seg segs[n]; 550237263Snp 551237263Snp KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); 552237263Snp 553237263Snp sglist_init(&sg, n, segs); 554237263Snp usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 555237263Snp V_ULPTX_NSGE(nsegs)); 556237263Snp 557237263Snp i = -1; 558237263Snp for (m = start; m != stop; m = m->m_next) { 559306661Sjhb if (IS_AIOTX_MBUF(m)) 560306661Sjhb rc = sglist_append_vmpages(&sg, aiotx_mbuf_pages(m), 561306661Sjhb aiotx_mbuf_pgoff(m), m->m_len); 562306661Sjhb else 563306661Sjhb rc = sglist_append(&sg, mtod(m, void *), m->m_len); 564237263Snp if (__predict_false(rc != 0)) 565237263Snp panic("%s: sglist_append %d", __func__, rc); 566237263Snp 567237263Snp for (j = 0; j < sg.sg_nseg; i++, j++) { 568237263Snp if (i < 0) { 569237263Snp usgl->len0 = htobe32(segs[j].ss_len); 570237263Snp usgl->addr0 = htobe64(segs[j].ss_paddr); 571237263Snp } else { 572237263Snp usgl->sge[i / 2].len[i & 1] = 573237263Snp htobe32(segs[j].ss_len); 574237263Snp usgl->sge[i / 2].addr[i & 1] = 575237263Snp htobe64(segs[j].ss_paddr); 576237263Snp } 577237263Snp#ifdef INVARIANTS 578237263Snp nsegs--; 579237263Snp#endif 580237263Snp } 581237263Snp sglist_reset(&sg); 582237263Snp } 583237263Snp if (i & 1) 584237263Snp usgl->sge[i / 2].len[1] = htobe32(0); 585237263Snp KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", 586237263Snp __func__, nsegs, start, stop)); 587237263Snp} 588237263Snp 589237263Snp/* 590237263Snp * Max number of SGL entries an offload tx work request can have. This is 41 591237263Snp * (1 + 40) for a full 512B work request. 592237263Snp * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) 593237263Snp */ 594237263Snp#define OFLD_SGL_LEN (41) 595237263Snp 596237263Snp/* 597237263Snp * Send data and/or a FIN to the peer. 598237263Snp * 599237263Snp * The socket's so_snd buffer consists of a stream of data starting with sb_mb 600237263Snp * and linked together with m_next. sb_sndptr, if set, is the last mbuf that 601237263Snp * was transmitted. 602255411Snp * 603255411Snp * drop indicates the number of bytes that should be dropped from the head of 604255411Snp * the send buffer. It is an optimization that lets do_fw4_ack avoid creating 605255411Snp * contention on the send buffer lock (before this change it used to do 606255411Snp * sowwakeup and then t4_push_frames right after that when recovering from tx 607255411Snp * stalls). When drop is set this function MUST drop the bytes and wake up any 608255411Snp * writers. 609237263Snp */ 610269076Snpvoid 611255411Snpt4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) 612237263Snp{ 613237263Snp struct mbuf *sndptr, *m, *sb_sndptr; 614237263Snp struct fw_ofld_tx_data_wr *txwr; 615237263Snp struct wrqe *wr; 616255411Snp u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 617237263Snp struct inpcb *inp = toep->inp; 618237263Snp struct tcpcb *tp = intotcpcb(inp); 619237263Snp struct socket *so = inp->inp_socket; 620237263Snp struct sockbuf *sb = &so->so_snd; 621301932Sjhb int tx_credits, shove, compl, sowwakeup; 622237263Snp struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 623306661Sjhb bool aiotx_mbuf_seen; 624237263Snp 625237263Snp INP_WLOCK_ASSERT(inp); 626239514Snp KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 627237263Snp ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 628237263Snp 629255005Snp KASSERT(toep->ulp_mode == ULP_MODE_NONE || 630255005Snp toep->ulp_mode == ULP_MODE_TCPDDP || 631255005Snp toep->ulp_mode == ULP_MODE_RDMA, 632255005Snp ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 633237263Snp 634306661Sjhb#ifdef VERBOSE_TRACES 635306661Sjhb CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d", 636306661Sjhb __func__, toep->tid, toep->flags, tp->t_flags); 637306661Sjhb#endif 638292736Snp if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) 639292736Snp return; 640292736Snp 641237263Snp /* 642237263Snp * This function doesn't resume by itself. Someone else must clear the 643237263Snp * flag and call this function. 644237263Snp */ 645255411Snp if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 646255411Snp KASSERT(drop == 0, 647255411Snp ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 648237263Snp return; 649255411Snp } 650237263Snp 651237263Snp do { 652237263Snp tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 653237263Snp max_imm = max_imm_payload(tx_credits); 654237263Snp max_nsegs = max_dsgl_nsegs(tx_credits); 655237263Snp 656237263Snp SOCKBUF_LOCK(sb); 657255411Snp sowwakeup = drop; 658255411Snp if (drop) { 659255411Snp sbdrop_locked(sb, drop); 660255411Snp drop = 0; 661255411Snp } 662237263Snp sb_sndptr = sb->sb_sndptr; 663237263Snp sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; 664237263Snp plen = 0; 665237263Snp nsegs = 0; 666237263Snp max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 667306661Sjhb aiotx_mbuf_seen = false; 668237263Snp for (m = sndptr; m != NULL; m = m->m_next) { 669306661Sjhb int n; 670237263Snp 671306661Sjhb if (IS_AIOTX_MBUF(m)) 672306661Sjhb n = sglist_count_vmpages(aiotx_mbuf_pages(m), 673306661Sjhb aiotx_mbuf_pgoff(m), m->m_len); 674306661Sjhb else 675306661Sjhb n = sglist_count(mtod(m, void *), m->m_len); 676306661Sjhb 677237263Snp nsegs += n; 678237263Snp plen += m->m_len; 679237263Snp 680237263Snp /* This mbuf sent us _over_ the nsegs limit, back out */ 681237263Snp if (plen > max_imm && nsegs > max_nsegs) { 682237263Snp nsegs -= n; 683237263Snp plen -= m->m_len; 684237263Snp if (plen == 0) { 685237263Snp /* Too few credits */ 686239514Snp toep->flags |= TPF_TX_SUSPENDED; 687306661Sjhb if (sowwakeup) { 688306661Sjhb if (!TAILQ_EMPTY( 689306661Sjhb &toep->aiotx_jobq)) 690306661Sjhb t4_aiotx_queue_toep( 691306661Sjhb toep); 692255411Snp sowwakeup_locked(so); 693306661Sjhb } else 694255411Snp SOCKBUF_UNLOCK(sb); 695255411Snp SOCKBUF_UNLOCK_ASSERT(sb); 696237263Snp return; 697237263Snp } 698237263Snp break; 699237263Snp } 700237263Snp 701306661Sjhb if (IS_AIOTX_MBUF(m)) 702306661Sjhb aiotx_mbuf_seen = true; 703237263Snp if (max_nsegs_1mbuf < n) 704237263Snp max_nsegs_1mbuf = n; 705237263Snp sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ 706237263Snp 707237263Snp /* This mbuf put us right at the max_nsegs limit */ 708237263Snp if (plen > max_imm && nsegs == max_nsegs) { 709237263Snp m = m->m_next; 710237263Snp break; 711237263Snp } 712237263Snp } 713237263Snp 714301932Sjhb if (sbused(sb) > sb->sb_hiwat * 5 / 8 && 715255411Snp toep->plen_nocompl + plen >= sb->sb_hiwat / 4) 716255411Snp compl = 1; 717255411Snp else 718255411Snp compl = 0; 719255411Snp 720237263Snp if (sb->sb_flags & SB_AUTOSIZE && 721237263Snp V_tcp_do_autosndbuf && 722237263Snp sb->sb_hiwat < V_tcp_autosndbuf_max && 723301932Sjhb sbused(sb) >= sb->sb_hiwat * 7 / 8) { 724237263Snp int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, 725237263Snp V_tcp_autosndbuf_max); 726237263Snp 727237263Snp if (!sbreserve_locked(sb, newsize, so, NULL)) 728237263Snp sb->sb_flags &= ~SB_AUTOSIZE; 729255411Snp else 730255411Snp sowwakeup = 1; /* room available */ 731237263Snp } 732306661Sjhb if (sowwakeup) { 733306661Sjhb if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 734306661Sjhb t4_aiotx_queue_toep(toep); 735255411Snp sowwakeup_locked(so); 736306661Sjhb } else 737255411Snp SOCKBUF_UNLOCK(sb); 738255411Snp SOCKBUF_UNLOCK_ASSERT(sb); 739237263Snp 740237263Snp /* nothing to send */ 741237263Snp if (plen == 0) { 742237263Snp KASSERT(m == NULL, 743237263Snp ("%s: nothing to send, but m != NULL", __func__)); 744237263Snp break; 745237263Snp } 746237263Snp 747239514Snp if (__predict_false(toep->flags & TPF_FIN_SENT)) 748237263Snp panic("%s: excess tx.", __func__); 749237263Snp 750290175Snp shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); 751306661Sjhb if (plen <= max_imm && !aiotx_mbuf_seen) { 752237263Snp 753237263Snp /* Immediate data tx */ 754237263Snp 755248925Snp wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 756237263Snp toep->ofld_txq); 757237263Snp if (wr == NULL) { 758237263Snp /* XXX: how will we recover from this? */ 759239514Snp toep->flags |= TPF_TX_SUSPENDED; 760237263Snp return; 761237263Snp } 762237263Snp txwr = wrtod(wr); 763237263Snp credits = howmany(wr->wr_len, 16); 764276597Snp write_tx_wr(txwr, toep, plen, plen, credits, shove, 0, 765276597Snp sc->tt.tx_align); 766237263Snp m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 767255411Snp nsegs = 0; 768237263Snp } else { 769237263Snp int wr_len; 770237263Snp 771237263Snp /* DSGL tx */ 772237263Snp 773237263Snp wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 774237263Snp ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 775248925Snp wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 776237263Snp if (wr == NULL) { 777237263Snp /* XXX: how will we recover from this? */ 778239514Snp toep->flags |= TPF_TX_SUSPENDED; 779237263Snp return; 780237263Snp } 781237263Snp txwr = wrtod(wr); 782237263Snp credits = howmany(wr_len, 16); 783276597Snp write_tx_wr(txwr, toep, 0, plen, credits, shove, 0, 784276597Snp sc->tt.tx_align); 785237263Snp write_tx_sgl(txwr + 1, sndptr, m, nsegs, 786237263Snp max_nsegs_1mbuf); 787237263Snp if (wr_len & 0xf) { 788237263Snp uint64_t *pad = (uint64_t *) 789237263Snp ((uintptr_t)txwr + wr_len); 790237263Snp *pad = 0; 791237263Snp } 792237263Snp } 793237263Snp 794237263Snp KASSERT(toep->tx_credits >= credits, 795237263Snp ("%s: not enough credits", __func__)); 796237263Snp 797237263Snp toep->tx_credits -= credits; 798255411Snp toep->tx_nocompl += credits; 799255411Snp toep->plen_nocompl += plen; 800255411Snp if (toep->tx_credits <= toep->tx_total * 3 / 8 && 801255411Snp toep->tx_nocompl >= toep->tx_total / 4) 802255411Snp compl = 1; 803237263Snp 804273797Snp if (compl || toep->ulp_mode == ULP_MODE_RDMA) { 805255411Snp txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 806255411Snp toep->tx_nocompl = 0; 807255411Snp toep->plen_nocompl = 0; 808255411Snp } 809255411Snp 810237263Snp tp->snd_nxt += plen; 811237263Snp tp->snd_max += plen; 812237263Snp 813237263Snp SOCKBUF_LOCK(sb); 814237263Snp KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); 815237263Snp sb->sb_sndptr = sb_sndptr; 816237263Snp SOCKBUF_UNLOCK(sb); 817237263Snp 818239514Snp toep->flags |= TPF_TX_DATA_SENT; 819255411Snp if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 820255411Snp toep->flags |= TPF_TX_SUSPENDED; 821237263Snp 822237263Snp KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 823237263Snp txsd->plen = plen; 824237263Snp txsd->tx_credits = credits; 825237263Snp txsd++; 826237263Snp if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 827237263Snp toep->txsd_pidx = 0; 828237263Snp txsd = &toep->txsd[0]; 829237263Snp } 830237263Snp toep->txsd_avail--; 831237263Snp 832237263Snp t4_l2t_send(sc, wr, toep->l2te); 833237263Snp } while (m != NULL); 834237263Snp 835237263Snp /* Send a FIN if requested, but only if there's no more data to send */ 836239514Snp if (m == NULL && toep->flags & TPF_SEND_FIN) 837237263Snp close_conn(sc, toep); 838237263Snp} 839237263Snp 840292736Snpstatic inline void 841292736Snprqdrop_locked(struct mbufq *q, int plen) 842292736Snp{ 843292736Snp struct mbuf *m; 844292736Snp 845292736Snp while (plen > 0) { 846292736Snp m = mbufq_dequeue(q); 847292736Snp 848292736Snp /* Too many credits. */ 849292736Snp MPASS(m != NULL); 850292736Snp M_ASSERTPKTHDR(m); 851292736Snp 852292736Snp /* Partial credits. */ 853292736Snp MPASS(plen >= m->m_pkthdr.len); 854292736Snp 855292736Snp plen -= m->m_pkthdr.len; 856292736Snp m_freem(m); 857292736Snp } 858292736Snp} 859292736Snp 860269076Snpvoid 861292736Snpt4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) 862269076Snp{ 863292736Snp struct mbuf *sndptr, *m; 864269076Snp struct fw_ofld_tx_data_wr *txwr; 865269076Snp struct wrqe *wr; 866292736Snp u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 867292736Snp u_int adjusted_plen, ulp_submode; 868269076Snp struct inpcb *inp = toep->inp; 869292736Snp struct tcpcb *tp = intotcpcb(inp); 870292736Snp int tx_credits, shove; 871292736Snp struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 872292736Snp struct mbufq *pduq = &toep->ulp_pduq; 873292736Snp static const u_int ulp_extra_len[] = {0, 4, 4, 8}; 874269076Snp 875269076Snp INP_WLOCK_ASSERT(inp); 876269076Snp KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 877269076Snp ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 878292736Snp KASSERT(toep->ulp_mode == ULP_MODE_ISCSI, 879292736Snp ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 880269076Snp 881292736Snp if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) 882292736Snp return; 883292736Snp 884269076Snp /* 885269076Snp * This function doesn't resume by itself. Someone else must clear the 886269076Snp * flag and call this function. 887269076Snp */ 888292736Snp if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 889292736Snp KASSERT(drop == 0, 890292736Snp ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 891269076Snp return; 892292736Snp } 893269076Snp 894292736Snp if (drop) 895292736Snp rqdrop_locked(&toep->ulp_pdu_reclaimq, drop); 896269076Snp 897292736Snp while ((sndptr = mbufq_first(pduq)) != NULL) { 898292736Snp M_ASSERTPKTHDR(sndptr); 899292736Snp 900269076Snp tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 901269076Snp max_imm = max_imm_payload(tx_credits); 902269076Snp max_nsegs = max_dsgl_nsegs(tx_credits); 903269076Snp 904269076Snp plen = 0; 905269076Snp nsegs = 0; 906269076Snp max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 907269076Snp for (m = sndptr; m != NULL; m = m->m_next) { 908269076Snp int n = sglist_count(mtod(m, void *), m->m_len); 909269076Snp 910269076Snp nsegs += n; 911269076Snp plen += m->m_len; 912269076Snp 913292736Snp /* 914292736Snp * This mbuf would send us _over_ the nsegs limit. 915292736Snp * Suspend tx because the PDU can't be sent out. 916292736Snp */ 917269076Snp if (plen > max_imm && nsegs > max_nsegs) { 918269076Snp toep->flags |= TPF_TX_SUSPENDED; 919269076Snp return; 920269076Snp } 921269076Snp 922269076Snp if (max_nsegs_1mbuf < n) 923269076Snp max_nsegs_1mbuf = n; 924269076Snp } 925269076Snp 926269076Snp if (__predict_false(toep->flags & TPF_FIN_SENT)) 927269076Snp panic("%s: excess tx.", __func__); 928269076Snp 929292736Snp /* 930292736Snp * We have a PDU to send. All of it goes out in one WR so 'm' 931292736Snp * is NULL. A PDU's length is always a multiple of 4. 932292736Snp */ 933292736Snp MPASS(m == NULL); 934292736Snp MPASS((plen & 3) == 0); 935292736Snp MPASS(sndptr->m_pkthdr.len == plen); 936292736Snp 937292736Snp shove = !(tp->t_flags & TF_MORETOCOME); 938292736Snp ulp_submode = mbuf_ulp_submode(sndptr); 939292736Snp MPASS(ulp_submode < nitems(ulp_extra_len)); 940292736Snp 941292736Snp /* 942292736Snp * plen doesn't include header and data digests, which are 943292736Snp * generated and inserted in the right places by the TOE, but 944292736Snp * they do occupy TCP sequence space and need to be accounted 945292736Snp * for. 946292736Snp */ 947292736Snp adjusted_plen = plen + ulp_extra_len[ulp_submode]; 948269076Snp if (plen <= max_imm) { 949269076Snp 950269076Snp /* Immediate data tx */ 951292736Snp 952292736Snp wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 953269076Snp toep->ofld_txq); 954269076Snp if (wr == NULL) { 955269076Snp /* XXX: how will we recover from this? */ 956269076Snp toep->flags |= TPF_TX_SUSPENDED; 957269076Snp return; 958269076Snp } 959269076Snp txwr = wrtod(wr); 960269076Snp credits = howmany(wr->wr_len, 16); 961292736Snp write_tx_wr(txwr, toep, plen, adjusted_plen, credits, 962292736Snp shove, ulp_submode, sc->tt.tx_align); 963269076Snp m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 964292736Snp nsegs = 0; 965269076Snp } else { 966269076Snp int wr_len; 967269076Snp 968269076Snp /* DSGL tx */ 969269076Snp wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 970269076Snp ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 971292736Snp wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 972269076Snp if (wr == NULL) { 973269076Snp /* XXX: how will we recover from this? */ 974269076Snp toep->flags |= TPF_TX_SUSPENDED; 975269076Snp return; 976269076Snp } 977269076Snp txwr = wrtod(wr); 978269076Snp credits = howmany(wr_len, 16); 979292736Snp write_tx_wr(txwr, toep, 0, adjusted_plen, credits, 980292736Snp shove, ulp_submode, sc->tt.tx_align); 981269076Snp write_tx_sgl(txwr + 1, sndptr, m, nsegs, 982269076Snp max_nsegs_1mbuf); 983269076Snp if (wr_len & 0xf) { 984269076Snp uint64_t *pad = (uint64_t *) 985269076Snp ((uintptr_t)txwr + wr_len); 986269076Snp *pad = 0; 987269076Snp } 988269076Snp } 989269076Snp 990269076Snp KASSERT(toep->tx_credits >= credits, 991269076Snp ("%s: not enough credits", __func__)); 992269076Snp 993292736Snp m = mbufq_dequeue(pduq); 994292736Snp MPASS(m == sndptr); 995292736Snp mbufq_enqueue(&toep->ulp_pdu_reclaimq, m); 996292736Snp 997269076Snp toep->tx_credits -= credits; 998269076Snp toep->tx_nocompl += credits; 999269076Snp toep->plen_nocompl += plen; 1000269076Snp if (toep->tx_credits <= toep->tx_total * 3 / 8 && 1001292736Snp toep->tx_nocompl >= toep->tx_total / 4) { 1002269076Snp txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 1003269076Snp toep->tx_nocompl = 0; 1004269076Snp toep->plen_nocompl = 0; 1005269076Snp } 1006269076Snp 1007292736Snp tp->snd_nxt += adjusted_plen; 1008292736Snp tp->snd_max += adjusted_plen; 1009269076Snp 1010269076Snp toep->flags |= TPF_TX_DATA_SENT; 1011292736Snp if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 1012269076Snp toep->flags |= TPF_TX_SUSPENDED; 1013269076Snp 1014269076Snp KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 1015269076Snp txsd->plen = plen; 1016269076Snp txsd->tx_credits = credits; 1017269076Snp txsd++; 1018269076Snp if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 1019269076Snp toep->txsd_pidx = 0; 1020269076Snp txsd = &toep->txsd[0]; 1021269076Snp } 1022269076Snp toep->txsd_avail--; 1023269076Snp 1024269076Snp t4_l2t_send(sc, wr, toep->l2te); 1025292736Snp } 1026269076Snp 1027292736Snp /* Send a FIN if requested, but only if there are no more PDUs to send */ 1028292736Snp if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN) 1029269076Snp close_conn(sc, toep); 1030269076Snp} 1031269076Snp 1032237263Snpint 1033237263Snpt4_tod_output(struct toedev *tod, struct tcpcb *tp) 1034237263Snp{ 1035237263Snp struct adapter *sc = tod->tod_softc; 1036237263Snp#ifdef INVARIANTS 1037237263Snp struct inpcb *inp = tp->t_inpcb; 1038237263Snp#endif 1039237263Snp struct toepcb *toep = tp->t_toe; 1040237263Snp 1041237263Snp INP_WLOCK_ASSERT(inp); 1042237263Snp KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1043237263Snp ("%s: inp %p dropped.", __func__, inp)); 1044237263Snp KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1045237263Snp 1046292736Snp if (toep->ulp_mode == ULP_MODE_ISCSI) 1047292736Snp t4_push_pdus(sc, toep, 0); 1048292736Snp else 1049292736Snp t4_push_frames(sc, toep, 0); 1050237263Snp 1051237263Snp return (0); 1052237263Snp} 1053237263Snp 1054237263Snpint 1055237263Snpt4_send_fin(struct toedev *tod, struct tcpcb *tp) 1056237263Snp{ 1057237263Snp struct adapter *sc = tod->tod_softc; 1058237263Snp#ifdef INVARIANTS 1059237263Snp struct inpcb *inp = tp->t_inpcb; 1060237263Snp#endif 1061237263Snp struct toepcb *toep = tp->t_toe; 1062237263Snp 1063237263Snp INP_WLOCK_ASSERT(inp); 1064237263Snp KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1065237263Snp ("%s: inp %p dropped.", __func__, inp)); 1066237263Snp KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1067237263Snp 1068239514Snp toep->flags |= TPF_SEND_FIN; 1069269076Snp if (tp->t_state >= TCPS_ESTABLISHED) { 1070269076Snp if (toep->ulp_mode == ULP_MODE_ISCSI) 1071292736Snp t4_push_pdus(sc, toep, 0); 1072269076Snp else 1073269076Snp t4_push_frames(sc, toep, 0); 1074269076Snp } 1075237263Snp 1076237263Snp return (0); 1077237263Snp} 1078237263Snp 1079237263Snpint 1080237263Snpt4_send_rst(struct toedev *tod, struct tcpcb *tp) 1081237263Snp{ 1082237263Snp struct adapter *sc = tod->tod_softc; 1083237263Snp#if defined(INVARIANTS) 1084237263Snp struct inpcb *inp = tp->t_inpcb; 1085237263Snp#endif 1086237263Snp struct toepcb *toep = tp->t_toe; 1087237263Snp 1088237263Snp INP_WLOCK_ASSERT(inp); 1089237263Snp KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1090237263Snp ("%s: inp %p dropped.", __func__, inp)); 1091237263Snp KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1092237263Snp 1093237263Snp /* hmmmm */ 1094239514Snp KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 1095237263Snp ("%s: flowc for tid %u [%s] not sent already", 1096237263Snp __func__, toep->tid, tcpstates[tp->t_state])); 1097237263Snp 1098237263Snp send_reset(sc, toep, 0); 1099237263Snp return (0); 1100237263Snp} 1101237263Snp 1102237263Snp/* 1103237263Snp * Peer has sent us a FIN. 1104237263Snp */ 1105237263Snpstatic int 1106237263Snpdo_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1107237263Snp{ 1108237263Snp struct adapter *sc = iq->adapter; 1109237263Snp const struct cpl_peer_close *cpl = (const void *)(rss + 1); 1110237263Snp unsigned int tid = GET_TID(cpl); 1111237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1112237263Snp struct inpcb *inp = toep->inp; 1113237263Snp struct tcpcb *tp = NULL; 1114239344Snp struct socket *so; 1115237263Snp#ifdef INVARIANTS 1116237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1117237263Snp#endif 1118237263Snp 1119237263Snp KASSERT(opcode == CPL_PEER_CLOSE, 1120237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1121237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1122243680Snp 1123243680Snp if (__predict_false(toep->flags & TPF_SYNQE)) { 1124243680Snp#ifdef INVARIANTS 1125243680Snp struct synq_entry *synqe = (void *)toep; 1126243680Snp 1127243680Snp INP_WLOCK(synqe->lctx->inp); 1128243680Snp if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1129243680Snp KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1130243680Snp ("%s: listen socket closed but tid %u not aborted.", 1131243680Snp __func__, tid)); 1132243680Snp } else { 1133243680Snp /* 1134243680Snp * do_pass_accept_req is still running and will 1135243680Snp * eventually take care of this tid. 1136243680Snp */ 1137243680Snp } 1138243680Snp INP_WUNLOCK(synqe->lctx->inp); 1139243680Snp#endif 1140243680Snp CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1141243680Snp toep, toep->flags); 1142243680Snp return (0); 1143243680Snp } 1144243680Snp 1145237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1146237263Snp 1147312116Snp CURVNET_SET(toep->vnet); 1148286227Sjch INP_INFO_RLOCK(&V_tcbinfo); 1149237263Snp INP_WLOCK(inp); 1150237263Snp tp = intotcpcb(inp); 1151237263Snp 1152237263Snp CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, 1153237263Snp tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); 1154237263Snp 1155239514Snp if (toep->flags & TPF_ABORT_SHUTDOWN) 1156237263Snp goto done; 1157237263Snp 1158239344Snp tp->rcv_nxt++; /* FIN */ 1159239344Snp 1160237263Snp so = inp->inp_socket; 1161299210Sjhb if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1162299210Sjhb DDP_LOCK(toep); 1163299210Sjhb if (__predict_false(toep->ddp_flags & 1164299210Sjhb (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) 1165299210Sjhb handle_ddp_close(toep, tp, cpl->rcv_nxt); 1166299210Sjhb DDP_UNLOCK(toep); 1167239344Snp } 1168299210Sjhb socantrcvmore(so); 1169239344Snp 1170255005Snp if (toep->ulp_mode != ULP_MODE_RDMA) { 1171255005Snp KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), 1172255005Snp ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, 1173255005Snp be32toh(cpl->rcv_nxt))); 1174255005Snp } 1175237263Snp 1176237263Snp switch (tp->t_state) { 1177237263Snp case TCPS_SYN_RECEIVED: 1178237263Snp tp->t_starttime = ticks; 1179237263Snp /* FALLTHROUGH */ 1180237263Snp 1181237263Snp case TCPS_ESTABLISHED: 1182237263Snp tp->t_state = TCPS_CLOSE_WAIT; 1183237263Snp break; 1184237263Snp 1185237263Snp case TCPS_FIN_WAIT_1: 1186237263Snp tp->t_state = TCPS_CLOSING; 1187237263Snp break; 1188237263Snp 1189237263Snp case TCPS_FIN_WAIT_2: 1190237263Snp tcp_twstart(tp); 1191237263Snp INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1192286227Sjch INP_INFO_RUNLOCK(&V_tcbinfo); 1193312116Snp CURVNET_RESTORE(); 1194237263Snp 1195237263Snp INP_WLOCK(inp); 1196237263Snp final_cpl_received(toep); 1197237263Snp return (0); 1198237263Snp 1199237263Snp default: 1200237263Snp log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", 1201237263Snp __func__, tid, tp->t_state); 1202237263Snp } 1203237263Snpdone: 1204237263Snp INP_WUNLOCK(inp); 1205286227Sjch INP_INFO_RUNLOCK(&V_tcbinfo); 1206312116Snp CURVNET_RESTORE(); 1207237263Snp return (0); 1208237263Snp} 1209237263Snp 1210237263Snp/* 1211237263Snp * Peer has ACK'd our FIN. 1212237263Snp */ 1213237263Snpstatic int 1214237263Snpdo_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, 1215237263Snp struct mbuf *m) 1216237263Snp{ 1217237263Snp struct adapter *sc = iq->adapter; 1218237263Snp const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); 1219237263Snp unsigned int tid = GET_TID(cpl); 1220237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1221237263Snp struct inpcb *inp = toep->inp; 1222237263Snp struct tcpcb *tp = NULL; 1223237263Snp struct socket *so = NULL; 1224237263Snp#ifdef INVARIANTS 1225237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1226237263Snp#endif 1227237263Snp 1228237263Snp KASSERT(opcode == CPL_CLOSE_CON_RPL, 1229237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1230237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1231237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1232237263Snp 1233312116Snp CURVNET_SET(toep->vnet); 1234286227Sjch INP_INFO_RLOCK(&V_tcbinfo); 1235237263Snp INP_WLOCK(inp); 1236237263Snp tp = intotcpcb(inp); 1237237263Snp 1238237263Snp CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", 1239237263Snp __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); 1240237263Snp 1241239514Snp if (toep->flags & TPF_ABORT_SHUTDOWN) 1242237263Snp goto done; 1243237263Snp 1244237263Snp so = inp->inp_socket; 1245237263Snp tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ 1246237263Snp 1247237263Snp switch (tp->t_state) { 1248237263Snp case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ 1249237263Snp tcp_twstart(tp); 1250237263Snprelease: 1251237263Snp INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1252286227Sjch INP_INFO_RUNLOCK(&V_tcbinfo); 1253312116Snp CURVNET_RESTORE(); 1254237263Snp 1255237263Snp INP_WLOCK(inp); 1256237263Snp final_cpl_received(toep); /* no more CPLs expected */ 1257237263Snp 1258237263Snp return (0); 1259237263Snp case TCPS_LAST_ACK: 1260237263Snp if (tcp_close(tp)) 1261237263Snp INP_WUNLOCK(inp); 1262237263Snp goto release; 1263237263Snp 1264237263Snp case TCPS_FIN_WAIT_1: 1265237263Snp if (so->so_rcv.sb_state & SBS_CANTRCVMORE) 1266237263Snp soisdisconnected(so); 1267237263Snp tp->t_state = TCPS_FIN_WAIT_2; 1268237263Snp break; 1269237263Snp 1270237263Snp default: 1271237263Snp log(LOG_ERR, 1272237263Snp "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", 1273237263Snp __func__, tid, tcpstates[tp->t_state]); 1274237263Snp } 1275237263Snpdone: 1276237263Snp INP_WUNLOCK(inp); 1277286227Sjch INP_INFO_RUNLOCK(&V_tcbinfo); 1278312116Snp CURVNET_RESTORE(); 1279237263Snp return (0); 1280237263Snp} 1281237263Snp 1282237263Snpvoid 1283237263Snpsend_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid, 1284237263Snp int rst_status) 1285237263Snp{ 1286237263Snp struct wrqe *wr; 1287237263Snp struct cpl_abort_rpl *cpl; 1288237263Snp 1289237263Snp wr = alloc_wrqe(sizeof(*cpl), ofld_txq); 1290237263Snp if (wr == NULL) { 1291237263Snp /* XXX */ 1292237263Snp panic("%s: allocation failure.", __func__); 1293237263Snp } 1294237263Snp cpl = wrtod(wr); 1295237263Snp 1296237263Snp INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); 1297237263Snp cpl->cmd = rst_status; 1298237263Snp 1299237263Snp t4_wrq_tx(sc, wr); 1300237263Snp} 1301237263Snp 1302237263Snpstatic int 1303237263Snpabort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) 1304237263Snp{ 1305237263Snp switch (abort_reason) { 1306237263Snp case CPL_ERR_BAD_SYN: 1307237263Snp case CPL_ERR_CONN_RESET: 1308237263Snp return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 1309237263Snp case CPL_ERR_XMIT_TIMEDOUT: 1310237263Snp case CPL_ERR_PERSIST_TIMEDOUT: 1311237263Snp case CPL_ERR_FINWAIT2_TIMEDOUT: 1312237263Snp case CPL_ERR_KEEPALIVE_TIMEDOUT: 1313237263Snp return (ETIMEDOUT); 1314237263Snp default: 1315237263Snp return (EIO); 1316237263Snp } 1317237263Snp} 1318237263Snp 1319237263Snp/* 1320237263Snp * TCP RST from the peer, timeout, or some other such critical error. 1321237263Snp */ 1322237263Snpstatic int 1323237263Snpdo_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1324237263Snp{ 1325237263Snp struct adapter *sc = iq->adapter; 1326237263Snp const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); 1327237263Snp unsigned int tid = GET_TID(cpl); 1328237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1329237263Snp struct sge_wrq *ofld_txq = toep->ofld_txq; 1330237263Snp struct inpcb *inp; 1331237263Snp struct tcpcb *tp; 1332237263Snp#ifdef INVARIANTS 1333237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1334237263Snp#endif 1335237263Snp 1336237263Snp KASSERT(opcode == CPL_ABORT_REQ_RSS, 1337237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1338237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1339237263Snp 1340239514Snp if (toep->flags & TPF_SYNQE) 1341237263Snp return (do_abort_req_synqe(iq, rss, m)); 1342237263Snp 1343237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1344237263Snp 1345245935Snp if (negative_advice(cpl->status)) { 1346237263Snp CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", 1347237263Snp __func__, cpl->status, tid, toep->flags); 1348237263Snp return (0); /* Ignore negative advice */ 1349237263Snp } 1350237263Snp 1351237263Snp inp = toep->inp; 1352312116Snp CURVNET_SET(toep->vnet); 1353286227Sjch INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */ 1354237263Snp INP_WLOCK(inp); 1355237263Snp 1356237263Snp tp = intotcpcb(inp); 1357237263Snp 1358237263Snp CTR6(KTR_CXGBE, 1359237263Snp "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", 1360239528Snp __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, 1361239528Snp inp->inp_flags, cpl->status); 1362237263Snp 1363237263Snp /* 1364237263Snp * If we'd initiated an abort earlier the reply to it is responsible for 1365237263Snp * cleaning up resources. Otherwise we tear everything down right here 1366237263Snp * right now. We owe the T4 a CPL_ABORT_RPL no matter what. 1367237263Snp */ 1368239514Snp if (toep->flags & TPF_ABORT_SHUTDOWN) { 1369237263Snp INP_WUNLOCK(inp); 1370237263Snp goto done; 1371237263Snp } 1372239514Snp toep->flags |= TPF_ABORT_SHUTDOWN; 1373237263Snp 1374242671Snp if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 1375242671Snp struct socket *so = inp->inp_socket; 1376237263Snp 1377242671Snp if (so != NULL) 1378242671Snp so_error_set(so, abort_status_to_errno(tp, 1379242671Snp cpl->status)); 1380242671Snp tp = tcp_close(tp); 1381242671Snp if (tp == NULL) 1382242671Snp INP_WLOCK(inp); /* re-acquire */ 1383242671Snp } 1384242671Snp 1385237263Snp final_cpl_received(toep); 1386237263Snpdone: 1387286227Sjch INP_INFO_RUNLOCK(&V_tcbinfo); 1388312116Snp CURVNET_RESTORE(); 1389237263Snp send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); 1390237263Snp return (0); 1391237263Snp} 1392237263Snp 1393237263Snp/* 1394237263Snp * Reply to the CPL_ABORT_REQ (send_reset) 1395237263Snp */ 1396237263Snpstatic int 1397237263Snpdo_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1398237263Snp{ 1399237263Snp struct adapter *sc = iq->adapter; 1400237263Snp const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); 1401237263Snp unsigned int tid = GET_TID(cpl); 1402237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1403237263Snp struct inpcb *inp = toep->inp; 1404237263Snp#ifdef INVARIANTS 1405237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1406237263Snp#endif 1407237263Snp 1408237263Snp KASSERT(opcode == CPL_ABORT_RPL_RSS, 1409237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1410237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1411237263Snp 1412239514Snp if (toep->flags & TPF_SYNQE) 1413237263Snp return (do_abort_rpl_synqe(iq, rss, m)); 1414237263Snp 1415237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1416237263Snp 1417237263Snp CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", 1418237263Snp __func__, tid, toep, inp, cpl->status); 1419237263Snp 1420239514Snp KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1421237263Snp ("%s: wasn't expecting abort reply", __func__)); 1422237263Snp 1423237263Snp INP_WLOCK(inp); 1424237263Snp final_cpl_received(toep); 1425237263Snp 1426237263Snp return (0); 1427237263Snp} 1428237263Snp 1429237263Snpstatic int 1430237263Snpdo_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1431237263Snp{ 1432237263Snp struct adapter *sc = iq->adapter; 1433237263Snp const struct cpl_rx_data *cpl = mtod(m, const void *); 1434237263Snp unsigned int tid = GET_TID(cpl); 1435237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1436237263Snp struct inpcb *inp = toep->inp; 1437237263Snp struct tcpcb *tp; 1438237263Snp struct socket *so; 1439239344Snp struct sockbuf *sb; 1440239344Snp int len; 1441243681Snp uint32_t ddp_placed = 0; 1442237263Snp 1443239514Snp if (__predict_false(toep->flags & TPF_SYNQE)) { 1444243680Snp#ifdef INVARIANTS 1445243680Snp struct synq_entry *synqe = (void *)toep; 1446243680Snp 1447243680Snp INP_WLOCK(synqe->lctx->inp); 1448243680Snp if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1449243680Snp KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1450243680Snp ("%s: listen socket closed but tid %u not aborted.", 1451243680Snp __func__, tid)); 1452243680Snp } else { 1453243680Snp /* 1454243680Snp * do_pass_accept_req is still running and will 1455243680Snp * eventually take care of this tid. 1456243680Snp */ 1457243680Snp } 1458243680Snp INP_WUNLOCK(synqe->lctx->inp); 1459243680Snp#endif 1460243680Snp CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1461243680Snp toep, toep->flags); 1462237263Snp m_freem(m); 1463237263Snp return (0); 1464237263Snp } 1465237263Snp 1466237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1467237263Snp 1468237263Snp /* strip off CPL header */ 1469237263Snp m_adj(m, sizeof(*cpl)); 1470239344Snp len = m->m_pkthdr.len; 1471237263Snp 1472237263Snp INP_WLOCK(inp); 1473237263Snp if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { 1474237263Snp CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", 1475239344Snp __func__, tid, len, inp->inp_flags); 1476237263Snp INP_WUNLOCK(inp); 1477237263Snp m_freem(m); 1478237263Snp return (0); 1479237263Snp } 1480237263Snp 1481237263Snp tp = intotcpcb(inp); 1482237263Snp 1483243681Snp if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) 1484243681Snp ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; 1485237263Snp 1486239344Snp tp->rcv_nxt += len; 1487300895Snp if (tp->rcv_wnd < len) { 1488301898Snp KASSERT(toep->ulp_mode == ULP_MODE_RDMA, 1489300895Snp ("%s: negative window size", __func__)); 1490300895Snp } 1491300895Snp 1492239344Snp tp->rcv_wnd -= len; 1493237263Snp tp->t_rcvtime = ticks; 1494237263Snp 1495299210Sjhb if (toep->ulp_mode == ULP_MODE_TCPDDP) 1496299210Sjhb DDP_LOCK(toep); 1497237263Snp so = inp_inpcbtosocket(inp); 1498239344Snp sb = &so->so_rcv; 1499239344Snp SOCKBUF_LOCK(sb); 1500237263Snp 1501239344Snp if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { 1502237263Snp CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", 1503239344Snp __func__, tid, len); 1504237263Snp m_freem(m); 1505239344Snp SOCKBUF_UNLOCK(sb); 1506299210Sjhb if (toep->ulp_mode == ULP_MODE_TCPDDP) 1507299210Sjhb DDP_UNLOCK(toep); 1508237263Snp INP_WUNLOCK(inp); 1509237263Snp 1510312116Snp CURVNET_SET(toep->vnet); 1511286227Sjch INP_INFO_RLOCK(&V_tcbinfo); 1512237263Snp INP_WLOCK(inp); 1513237263Snp tp = tcp_drop(tp, ECONNRESET); 1514237263Snp if (tp) 1515237263Snp INP_WUNLOCK(inp); 1516286227Sjch INP_INFO_RUNLOCK(&V_tcbinfo); 1517312116Snp CURVNET_RESTORE(); 1518237263Snp 1519237263Snp return (0); 1520237263Snp } 1521237263Snp 1522237263Snp /* receive buffer autosize */ 1523312116Snp MPASS(toep->vnet == so->so_vnet); 1524312116Snp CURVNET_SET(toep->vnet); 1525239344Snp if (sb->sb_flags & SB_AUTOSIZE && 1526237263Snp V_tcp_do_autorcvbuf && 1527239344Snp sb->sb_hiwat < V_tcp_autorcvbuf_max && 1528239344Snp len > (sbspace(sb) / 8 * 7)) { 1529239344Snp unsigned int hiwat = sb->sb_hiwat; 1530237263Snp unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, 1531237263Snp V_tcp_autorcvbuf_max); 1532237263Snp 1533239344Snp if (!sbreserve_locked(sb, newsize, so, NULL)) 1534239344Snp sb->sb_flags &= ~SB_AUTOSIZE; 1535237263Snp else 1536237263Snp toep->rx_credits += newsize - hiwat; 1537237263Snp } 1538239344Snp 1539299210Sjhb if (toep->ddp_waiting_count != 0 || toep->ddp_active_count != 0) 1540299210Sjhb CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", __func__, 1541299210Sjhb tid, len); 1542299210Sjhb 1543239344Snp if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1544239344Snp int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off; 1545239344Snp 1546239344Snp if (changed) { 1547243681Snp if (toep->ddp_flags & DDP_SC_REQ) 1548243681Snp toep->ddp_flags ^= DDP_ON | DDP_SC_REQ; 1549243681Snp else { 1550243681Snp KASSERT(cpl->ddp_off == 1, 1551243681Snp ("%s: DDP switched on by itself.", 1552243681Snp __func__)); 1553243681Snp 1554243681Snp /* Fell out of DDP mode */ 1555299210Sjhb toep->ddp_flags &= ~DDP_ON; 1556299210Sjhb CTR1(KTR_CXGBE, "%s: fell out of DDP mode", 1557299210Sjhb __func__); 1558243681Snp 1559299210Sjhb insert_ddp_data(toep, ddp_placed); 1560239344Snp } 1561239344Snp } 1562239344Snp 1563239344Snp if (toep->ddp_flags & DDP_ON) { 1564239344Snp /* 1565299210Sjhb * CPL_RX_DATA with DDP on can only be an indicate. 1566299210Sjhb * Start posting queued AIO requests via DDP. The 1567299210Sjhb * payload that arrived in this indicate is appended 1568299210Sjhb * to the socket buffer as usual. 1569239344Snp */ 1570299210Sjhb handle_ddp_indicate(toep); 1571239344Snp } 1572239344Snp } 1573239344Snp 1574274421Sglebius KASSERT(toep->sb_cc >= sbused(sb), 1575239344Snp ("%s: sb %p has more data (%d) than last time (%d).", 1576274421Sglebius __func__, sb, sbused(sb), toep->sb_cc)); 1577274421Sglebius toep->rx_credits += toep->sb_cc - sbused(sb); 1578275329Sglebius sbappendstream_locked(sb, m, 0); 1579274421Sglebius toep->sb_cc = sbused(sb); 1580280878Snp if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) { 1581280878Snp int credits; 1582280878Snp 1583280878Snp credits = send_rx_credits(sc, toep, toep->rx_credits); 1584280878Snp toep->rx_credits -= credits; 1585280878Snp tp->rcv_wnd += credits; 1586280878Snp tp->rcv_adv += credits; 1587280878Snp } 1588299210Sjhb 1589299210Sjhb if (toep->ddp_waiting_count > 0 && sbavail(sb) != 0) { 1590299210Sjhb CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__, 1591299210Sjhb tid); 1592299210Sjhb ddp_queue_toep(toep); 1593299210Sjhb } 1594237263Snp sorwakeup_locked(so); 1595239344Snp SOCKBUF_UNLOCK_ASSERT(sb); 1596299210Sjhb if (toep->ulp_mode == ULP_MODE_TCPDDP) 1597299210Sjhb DDP_UNLOCK(toep); 1598237263Snp 1599237263Snp INP_WUNLOCK(inp); 1600299206Sjhb CURVNET_RESTORE(); 1601237263Snp return (0); 1602237263Snp} 1603237263Snp 1604237263Snp#define S_CPL_FW4_ACK_OPCODE 24 1605237263Snp#define M_CPL_FW4_ACK_OPCODE 0xff 1606237263Snp#define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE) 1607237263Snp#define G_CPL_FW4_ACK_OPCODE(x) \ 1608237263Snp (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE) 1609259382Snp 1610237263Snp#define S_CPL_FW4_ACK_FLOWID 0 1611237263Snp#define M_CPL_FW4_ACK_FLOWID 0xffffff 1612237263Snp#define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID) 1613237263Snp#define G_CPL_FW4_ACK_FLOWID(x) \ 1614237263Snp (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID) 1615259382Snp 1616237263Snp#define S_CPL_FW4_ACK_CR 24 1617237263Snp#define M_CPL_FW4_ACK_CR 0xff 1618237263Snp#define V_CPL_FW4_ACK_CR(x) ((x) << S_CPL_FW4_ACK_CR) 1619237263Snp#define G_CPL_FW4_ACK_CR(x) (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR) 1620259382Snp 1621237263Snp#define S_CPL_FW4_ACK_SEQVAL 0 1622237263Snp#define M_CPL_FW4_ACK_SEQVAL 0x1 1623237263Snp#define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL) 1624237263Snp#define G_CPL_FW4_ACK_SEQVAL(x) \ 1625237263Snp (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL) 1626237263Snp#define F_CPL_FW4_ACK_SEQVAL V_CPL_FW4_ACK_SEQVAL(1U) 1627237263Snp 1628237263Snpstatic int 1629237263Snpdo_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1630237263Snp{ 1631237263Snp struct adapter *sc = iq->adapter; 1632237263Snp const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 1633237263Snp unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 1634237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1635237263Snp struct inpcb *inp; 1636237263Snp struct tcpcb *tp; 1637237263Snp struct socket *so; 1638237263Snp uint8_t credits = cpl->credits; 1639237263Snp struct ofld_tx_sdesc *txsd; 1640237263Snp int plen; 1641237263Snp#ifdef INVARIANTS 1642237263Snp unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); 1643237263Snp#endif 1644237263Snp 1645237263Snp /* 1646237263Snp * Very unusual case: we'd sent a flowc + abort_req for a synq entry and 1647237263Snp * now this comes back carrying the credits for the flowc. 1648237263Snp */ 1649239514Snp if (__predict_false(toep->flags & TPF_SYNQE)) { 1650239514Snp KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1651237263Snp ("%s: credits for a synq entry %p", __func__, toep)); 1652237263Snp return (0); 1653237263Snp } 1654237263Snp 1655237263Snp inp = toep->inp; 1656237263Snp 1657237263Snp KASSERT(opcode == CPL_FW4_ACK, 1658237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1659237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1660237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1661237263Snp 1662237263Snp INP_WLOCK(inp); 1663237263Snp 1664239514Snp if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) { 1665237263Snp INP_WUNLOCK(inp); 1666237263Snp return (0); 1667237263Snp } 1668237263Snp 1669237263Snp KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, 1670237263Snp ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); 1671237263Snp 1672237263Snp tp = intotcpcb(inp); 1673237263Snp 1674237436Snp if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { 1675237263Snp tcp_seq snd_una = be32toh(cpl->snd_una); 1676237263Snp 1677237263Snp#ifdef INVARIANTS 1678237263Snp if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 1679237263Snp log(LOG_ERR, 1680237263Snp "%s: unexpected seq# %x for TID %u, snd_una %x\n", 1681237263Snp __func__, snd_una, toep->tid, tp->snd_una); 1682237263Snp } 1683237263Snp#endif 1684237263Snp 1685237263Snp if (tp->snd_una != snd_una) { 1686237263Snp tp->snd_una = snd_una; 1687237263Snp tp->ts_recent_age = tcp_ts_getticks(); 1688237263Snp } 1689237263Snp } 1690237263Snp 1691306661Sjhb#ifdef VERBOSE_TRACES 1692306661Sjhb CTR3(KTR_CXGBE, "%s: tid %d credits %u", __func__, tid, credits); 1693306661Sjhb#endif 1694237263Snp so = inp->inp_socket; 1695237263Snp txsd = &toep->txsd[toep->txsd_cidx]; 1696237263Snp plen = 0; 1697237263Snp while (credits) { 1698237263Snp KASSERT(credits >= txsd->tx_credits, 1699237263Snp ("%s: too many (or partial) credits", __func__)); 1700237263Snp credits -= txsd->tx_credits; 1701237263Snp toep->tx_credits += txsd->tx_credits; 1702237263Snp plen += txsd->plen; 1703237263Snp txsd++; 1704237263Snp toep->txsd_avail++; 1705237263Snp KASSERT(toep->txsd_avail <= toep->txsd_total, 1706237263Snp ("%s: txsd avail > total", __func__)); 1707237263Snp if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { 1708237263Snp txsd = &toep->txsd[0]; 1709237263Snp toep->txsd_cidx = 0; 1710237263Snp } 1711237263Snp } 1712237263Snp 1713255411Snp if (toep->tx_credits == toep->tx_total) { 1714255411Snp toep->tx_nocompl = 0; 1715255411Snp toep->plen_nocompl = 0; 1716255411Snp } 1717255411Snp 1718255411Snp if (toep->flags & TPF_TX_SUSPENDED && 1719255411Snp toep->tx_credits >= toep->tx_total / 4) { 1720306661Sjhb#ifdef VERBOSE_TRACES 1721306661Sjhb CTR2(KTR_CXGBE, "%s: tid %d calling t4_push_frames", __func__, 1722306661Sjhb tid); 1723306661Sjhb#endif 1724255411Snp toep->flags &= ~TPF_TX_SUSPENDED; 1725312116Snp CURVNET_SET(toep->vnet); 1726269076Snp if (toep->ulp_mode == ULP_MODE_ISCSI) 1727292736Snp t4_push_pdus(sc, toep, plen); 1728269076Snp else 1729269076Snp t4_push_frames(sc, toep, plen); 1730312116Snp CURVNET_RESTORE(); 1731255411Snp } else if (plen > 0) { 1732237263Snp struct sockbuf *sb = &so->so_snd; 1733292736Snp int sbu; 1734237263Snp 1735292736Snp SOCKBUF_LOCK(sb); 1736292736Snp sbu = sbused(sb); 1737292736Snp if (toep->ulp_mode == ULP_MODE_ISCSI) { 1738292736Snp 1739292736Snp if (__predict_false(sbu > 0)) { 1740292736Snp /* 1741292736Snp * The data trasmitted before the tid's ULP mode 1742292736Snp * changed to ISCSI is still in so_snd. 1743292736Snp * Incoming credits should account for so_snd 1744292736Snp * first. 1745292736Snp */ 1746292736Snp sbdrop_locked(sb, min(sbu, plen)); 1747292736Snp plen -= min(sbu, plen); 1748292736Snp } 1749292736Snp sowwakeup_locked(so); /* unlocks so_snd */ 1750292736Snp rqdrop_locked(&toep->ulp_pdu_reclaimq, plen); 1751292736Snp } else { 1752306661Sjhb#ifdef VERBOSE_TRACES 1753306661Sjhb CTR3(KTR_CXGBE, "%s: tid %d dropped %d bytes", __func__, 1754306661Sjhb tid, plen); 1755306661Sjhb#endif 1756269076Snp sbdrop_locked(sb, plen); 1757306661Sjhb if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 1758306661Sjhb t4_aiotx_queue_toep(toep); 1759292736Snp sowwakeup_locked(so); /* unlocks so_snd */ 1760269076Snp } 1761292736Snp SOCKBUF_UNLOCK_ASSERT(sb); 1762237263Snp } 1763237263Snp 1764237263Snp INP_WUNLOCK(inp); 1765237263Snp 1766237263Snp return (0); 1767237263Snp} 1768237263Snp 1769302339Snpint 1770239338Snpdo_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1771239338Snp{ 1772239338Snp struct adapter *sc = iq->adapter; 1773239338Snp const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 1774239338Snp unsigned int tid = GET_TID(cpl); 1775299210Sjhb struct toepcb *toep; 1776239338Snp#ifdef INVARIANTS 1777239338Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1778239338Snp#endif 1779239338Snp 1780239338Snp KASSERT(opcode == CPL_SET_TCB_RPL, 1781239338Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1782239338Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1783302339Snp MPASS(iq != &sc->sge.fwq); 1784239338Snp 1785299210Sjhb toep = lookup_tid(sc, tid); 1786299210Sjhb if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1787299210Sjhb handle_ddp_tcb_rpl(toep, cpl); 1788299210Sjhb return (0); 1789299210Sjhb } 1790299210Sjhb 1791292736Snp /* 1792292736Snp * TOM and/or other ULPs don't request replies for CPL_SET_TCB or 1793292736Snp * CPL_SET_TCB_FIELD requests. This can easily change and when it does 1794292736Snp * the dispatch code will go here. 1795292736Snp */ 1796292736Snp#ifdef INVARIANTS 1797292736Snp panic("%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p", __func__, 1798292736Snp tid, iq); 1799292736Snp#else 1800292736Snp log(LOG_ERR, "%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p\n", 1801292736Snp __func__, tid, iq); 1802292736Snp#endif 1803269076Snp 1804292736Snp return (0); 1805239338Snp} 1806239338Snp 1807237263Snpvoid 1808302339Snpt4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, int tid, 1809302339Snp uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie, int iqid) 1810239338Snp{ 1811239338Snp struct wrqe *wr; 1812239338Snp struct cpl_set_tcb_field *req; 1813239338Snp 1814302339Snp MPASS((cookie & ~M_COOKIE) == 0); 1815302339Snp MPASS((iqid & ~M_QUEUENO) == 0); 1816239338Snp 1817302339Snp wr = alloc_wrqe(sizeof(*req), wrq); 1818299210Sjhb if (wr == NULL) { 1819299210Sjhb /* XXX */ 1820299210Sjhb panic("%s: allocation failure.", __func__); 1821299210Sjhb } 1822299210Sjhb req = wrtod(wr); 1823299210Sjhb 1824302339Snp INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, tid); 1825302339Snp req->reply_ctrl = htobe16(V_QUEUENO(iqid)); 1826302339Snp if (reply == 0) 1827302339Snp req->reply_ctrl |= htobe16(F_NO_REPLY); 1828299210Sjhb req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie)); 1829299210Sjhb req->mask = htobe64(mask); 1830299210Sjhb req->val = htobe64(val); 1831299210Sjhb 1832299210Sjhb t4_wrq_tx(sc, wr); 1833299210Sjhb} 1834299210Sjhb 1835299210Sjhbvoid 1836302339Snpt4_init_cpl_io_handlers(void) 1837237263Snp{ 1838237263Snp 1839302339Snp t4_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 1840302339Snp t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 1841302339Snp t4_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 1842302339Snp t4_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 1843302339Snp t4_register_cpl_handler(CPL_RX_DATA, do_rx_data); 1844302339Snp t4_register_cpl_handler(CPL_FW4_ACK, do_fw4_ack); 1845237263Snp} 1846239338Snp 1847239338Snpvoid 1848302339Snpt4_uninit_cpl_io_handlers(void) 1849239338Snp{ 1850239338Snp 1851313178Sjhb t4_register_cpl_handler(CPL_PEER_CLOSE, NULL); 1852313178Sjhb t4_register_cpl_handler(CPL_CLOSE_CON_RPL, NULL); 1853313178Sjhb t4_register_cpl_handler(CPL_ABORT_REQ_RSS, NULL); 1854313178Sjhb t4_register_cpl_handler(CPL_ABORT_RPL_RSS, NULL); 1855313178Sjhb t4_register_cpl_handler(CPL_RX_DATA, NULL); 1856313178Sjhb t4_register_cpl_handler(CPL_FW4_ACK, NULL); 1857239338Snp} 1858306661Sjhb 1859306661Sjhb/* 1860306661Sjhb * Use the 'backend3' field in AIO jobs to store the amount of data 1861306661Sjhb * sent by the AIO job so far and the 'backend4' field to hold an 1862306661Sjhb * error that should be reported when the job is completed. 1863306661Sjhb */ 1864306661Sjhb#define aio_sent backend3 1865306661Sjhb#define aio_error backend4 1866306661Sjhb 1867306661Sjhb#define jobtotid(job) \ 1868306661Sjhb (((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid) 1869306661Sjhb 1870306661Sjhbstatic void 1871306661Sjhbfree_aiotx_buffer(struct aiotx_buffer *ab) 1872306661Sjhb{ 1873306661Sjhb struct kaiocb *job; 1874306661Sjhb long status; 1875306661Sjhb int error; 1876306661Sjhb 1877306661Sjhb if (refcount_release(&ab->refcount) == 0) 1878306661Sjhb return; 1879306661Sjhb 1880306661Sjhb job = ab->job; 1881306661Sjhb error = job->aio_error; 1882306661Sjhb status = job->aio_sent; 1883306661Sjhb vm_page_unhold_pages(ab->ps.pages, ab->ps.npages); 1884306661Sjhb free(ab, M_CXGBE); 1885306661Sjhb#ifdef VERBOSE_TRACES 1886306661Sjhb CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__, 1887306661Sjhb jobtotid(job), job, status, error); 1888237263Snp#endif 1889306661Sjhb if (error == ECANCELED && status != 0) 1890306661Sjhb error = 0; 1891306661Sjhb if (error == ECANCELED) 1892306661Sjhb aio_cancel(job); 1893306661Sjhb else if (error) 1894306661Sjhb aio_complete(job, -1, error); 1895306661Sjhb else 1896306661Sjhb aio_complete(job, status, 0); 1897306661Sjhb} 1898306661Sjhb 1899306661Sjhbstatic void 1900306661Sjhbt4_aiotx_mbuf_free(struct mbuf *m, void *buffer, void *arg) 1901306661Sjhb{ 1902306661Sjhb struct aiotx_buffer *ab = buffer; 1903306661Sjhb 1904306661Sjhb#ifdef VERBOSE_TRACES 1905306661Sjhb CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__, 1906306661Sjhb m->m_len, jobtotid(ab->job)); 1907306661Sjhb#endif 1908306661Sjhb free_aiotx_buffer(ab); 1909306661Sjhb} 1910306661Sjhb 1911306661Sjhb/* 1912306661Sjhb * Hold the buffer backing an AIO request and return an AIO transmit 1913306661Sjhb * buffer. 1914306661Sjhb */ 1915306661Sjhbstatic int 1916306661Sjhbhold_aio(struct kaiocb *job) 1917306661Sjhb{ 1918306661Sjhb struct aiotx_buffer *ab; 1919306661Sjhb struct vmspace *vm; 1920306661Sjhb vm_map_t map; 1921306661Sjhb vm_offset_t start, end, pgoff; 1922306661Sjhb int n; 1923306661Sjhb 1924306661Sjhb MPASS(job->backend1 == NULL); 1925306661Sjhb 1926306661Sjhb /* 1927306661Sjhb * The AIO subsystem will cancel and drain all requests before 1928306661Sjhb * permitting a process to exit or exec, so p_vmspace should 1929306661Sjhb * be stable here. 1930306661Sjhb */ 1931306661Sjhb vm = job->userproc->p_vmspace; 1932306661Sjhb map = &vm->vm_map; 1933306661Sjhb start = (uintptr_t)job->uaiocb.aio_buf; 1934306661Sjhb pgoff = start & PAGE_MASK; 1935306661Sjhb end = round_page(start + job->uaiocb.aio_nbytes); 1936306661Sjhb start = trunc_page(start); 1937306661Sjhb n = atop(end - start); 1938306661Sjhb 1939306661Sjhb ab = malloc(sizeof(*ab) + n * sizeof(vm_page_t), M_CXGBE, M_WAITOK | 1940306661Sjhb M_ZERO); 1941306661Sjhb refcount_init(&ab->refcount, 1); 1942306661Sjhb ab->ps.pages = (vm_page_t *)(ab + 1); 1943306661Sjhb ab->ps.npages = vm_fault_quick_hold_pages(map, start, end - start, 1944306661Sjhb VM_PROT_WRITE, ab->ps.pages, n); 1945306661Sjhb if (ab->ps.npages < 0) { 1946306661Sjhb free(ab, M_CXGBE); 1947306661Sjhb return (EFAULT); 1948306661Sjhb } 1949306661Sjhb 1950306661Sjhb KASSERT(ab->ps.npages == n, 1951306661Sjhb ("hold_aio: page count mismatch: %d vs %d", ab->ps.npages, n)); 1952306661Sjhb 1953306661Sjhb ab->ps.offset = pgoff; 1954306661Sjhb ab->ps.len = job->uaiocb.aio_nbytes; 1955306661Sjhb ab->job = job; 1956306661Sjhb job->backend1 = ab; 1957306661Sjhb#ifdef VERBOSE_TRACES 1958306661Sjhb CTR5(KTR_CXGBE, "%s: tid %d, new pageset %p for job %p, npages %d", 1959306661Sjhb __func__, jobtotid(job), &ab->ps, job, ab->ps.npages); 1960306661Sjhb#endif 1961306661Sjhb return (0); 1962306661Sjhb} 1963306661Sjhb 1964306661Sjhbstatic void 1965306661Sjhbt4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job) 1966306661Sjhb{ 1967306661Sjhb struct adapter *sc; 1968306661Sjhb struct sockbuf *sb; 1969306661Sjhb struct file *fp; 1970306661Sjhb struct aiotx_buffer *ab; 1971306661Sjhb struct inpcb *inp; 1972306661Sjhb struct tcpcb *tp; 1973306661Sjhb struct mbuf *m; 1974306661Sjhb int error; 1975306661Sjhb bool moretocome, sendmore; 1976306661Sjhb 1977306661Sjhb sc = td_adapter(toep->td); 1978306661Sjhb sb = &so->so_snd; 1979306661Sjhb SOCKBUF_UNLOCK(sb); 1980306661Sjhb fp = job->fd_file; 1981306661Sjhb ab = job->backend1; 1982306661Sjhb m = NULL; 1983306661Sjhb 1984306661Sjhb#ifdef MAC 1985306661Sjhb error = mac_socket_check_send(fp->f_cred, so); 1986306661Sjhb if (error != 0) 1987306661Sjhb goto out; 1988306661Sjhb#endif 1989306661Sjhb 1990306661Sjhb if (ab == NULL) { 1991306661Sjhb error = hold_aio(job); 1992306661Sjhb if (error != 0) 1993306661Sjhb goto out; 1994306661Sjhb ab = job->backend1; 1995306661Sjhb } 1996306661Sjhb 1997306661Sjhb /* Inline sosend_generic(). */ 1998306661Sjhb 1999306661Sjhb job->msgsnd = 1; 2000306661Sjhb 2001306661Sjhb error = sblock(sb, SBL_WAIT); 2002306661Sjhb MPASS(error == 0); 2003306661Sjhb 2004306661Sjhbsendanother: 2005306661Sjhb m = m_get(M_WAITOK, MT_DATA); 2006306661Sjhb 2007306661Sjhb SOCKBUF_LOCK(sb); 2008306661Sjhb if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2009306661Sjhb SOCKBUF_UNLOCK(sb); 2010306661Sjhb sbunlock(sb); 2011306661Sjhb if ((so->so_options & SO_NOSIGPIPE) == 0) { 2012306661Sjhb PROC_LOCK(job->userproc); 2013306661Sjhb kern_psignal(job->userproc, SIGPIPE); 2014306661Sjhb PROC_UNLOCK(job->userproc); 2015306661Sjhb } 2016306661Sjhb error = EPIPE; 2017306661Sjhb goto out; 2018306661Sjhb } 2019306661Sjhb if (so->so_error) { 2020306661Sjhb error = so->so_error; 2021306661Sjhb so->so_error = 0; 2022306661Sjhb SOCKBUF_UNLOCK(sb); 2023306661Sjhb sbunlock(sb); 2024306661Sjhb goto out; 2025306661Sjhb } 2026306661Sjhb if ((so->so_state & SS_ISCONNECTED) == 0) { 2027306661Sjhb SOCKBUF_UNLOCK(sb); 2028306661Sjhb sbunlock(sb); 2029306661Sjhb error = ENOTCONN; 2030306661Sjhb goto out; 2031306661Sjhb } 2032306661Sjhb if (sbspace(sb) < sb->sb_lowat) { 2033306661Sjhb MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO)); 2034306661Sjhb 2035306661Sjhb /* 2036306661Sjhb * Don't block if there is too little room in the socket 2037306661Sjhb * buffer. Instead, requeue the request. 2038306661Sjhb */ 2039306661Sjhb if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { 2040306661Sjhb SOCKBUF_UNLOCK(sb); 2041306661Sjhb sbunlock(sb); 2042306661Sjhb error = ECANCELED; 2043306661Sjhb goto out; 2044306661Sjhb } 2045306661Sjhb TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); 2046306661Sjhb SOCKBUF_UNLOCK(sb); 2047306661Sjhb sbunlock(sb); 2048306661Sjhb goto out; 2049306661Sjhb } 2050306661Sjhb 2051306661Sjhb /* 2052306661Sjhb * Write as much data as the socket permits, but no more than a 2053306661Sjhb * a single sndbuf at a time. 2054306661Sjhb */ 2055306661Sjhb m->m_len = sbspace(sb); 2056306661Sjhb if (m->m_len > ab->ps.len - job->aio_sent) { 2057306661Sjhb m->m_len = ab->ps.len - job->aio_sent; 2058306661Sjhb moretocome = false; 2059306661Sjhb } else 2060306661Sjhb moretocome = true; 2061306661Sjhb if (m->m_len > sc->tt.sndbuf) { 2062306661Sjhb m->m_len = sc->tt.sndbuf; 2063306661Sjhb sendmore = true; 2064306661Sjhb } else 2065306661Sjhb sendmore = false; 2066306661Sjhb 2067306661Sjhb if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 2068306661Sjhb moretocome = true; 2069306661Sjhb SOCKBUF_UNLOCK(sb); 2070306661Sjhb MPASS(m->m_len != 0); 2071306661Sjhb 2072306661Sjhb /* Inlined tcp_usr_send(). */ 2073306661Sjhb 2074306661Sjhb inp = toep->inp; 2075306661Sjhb INP_WLOCK(inp); 2076306661Sjhb if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 2077306661Sjhb INP_WUNLOCK(inp); 2078306661Sjhb sbunlock(sb); 2079306661Sjhb error = ECONNRESET; 2080306661Sjhb goto out; 2081306661Sjhb } 2082306661Sjhb 2083306661Sjhb refcount_acquire(&ab->refcount); 2084306661Sjhb m_extadd(m, NULL, ab->ps.len, t4_aiotx_mbuf_free, ab, 2085306661Sjhb (void *)(uintptr_t)job->aio_sent, 0, EXT_NET_DRV); 2086306661Sjhb m->m_ext.ext_flags |= EXT_FLAG_AIOTX; 2087306661Sjhb job->aio_sent += m->m_len; 2088306661Sjhb 2089306661Sjhb sbappendstream(sb, m, 0); 2090306661Sjhb m = NULL; 2091306661Sjhb 2092306661Sjhb if (!(inp->inp_flags & INP_DROPPED)) { 2093306661Sjhb tp = intotcpcb(inp); 2094306661Sjhb if (moretocome) 2095306661Sjhb tp->t_flags |= TF_MORETOCOME; 2096306661Sjhb error = tp->t_fb->tfb_tcp_output(tp); 2097306661Sjhb if (moretocome) 2098306661Sjhb tp->t_flags &= ~TF_MORETOCOME; 2099306661Sjhb } 2100306661Sjhb 2101306661Sjhb INP_WUNLOCK(inp); 2102306661Sjhb if (sendmore) 2103306661Sjhb goto sendanother; 2104306661Sjhb sbunlock(sb); 2105306661Sjhb 2106306661Sjhb if (error) 2107306661Sjhb goto out; 2108306661Sjhb 2109306661Sjhb /* 2110306661Sjhb * If this is a non-blocking socket and the request has not 2111306661Sjhb * been fully completed, requeue it until the socket is ready 2112306661Sjhb * again. 2113306661Sjhb */ 2114306661Sjhb if (job->aio_sent < job->uaiocb.aio_nbytes && 2115306661Sjhb !(so->so_state & SS_NBIO)) { 2116306661Sjhb SOCKBUF_LOCK(sb); 2117306661Sjhb if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { 2118306661Sjhb SOCKBUF_UNLOCK(sb); 2119306661Sjhb error = ECANCELED; 2120306661Sjhb goto out; 2121306661Sjhb } 2122306661Sjhb TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); 2123306661Sjhb return; 2124306661Sjhb } 2125306661Sjhb 2126306661Sjhb /* 2127306661Sjhb * If the request will not be requeued, drop a reference on 2128306661Sjhb * the the aiotx buffer. Any mbufs in flight should still 2129306661Sjhb * contain a reference, but this drops the reference that the 2130306661Sjhb * job owns while it is waiting to queue mbufs to the socket. 2131306661Sjhb */ 2132306661Sjhb free_aiotx_buffer(ab); 2133306661Sjhb 2134306661Sjhbout: 2135306661Sjhb if (error) { 2136306661Sjhb if (ab != NULL) { 2137306661Sjhb job->aio_error = error; 2138306661Sjhb free_aiotx_buffer(ab); 2139306661Sjhb } else { 2140306661Sjhb MPASS(job->aio_sent == 0); 2141306661Sjhb aio_complete(job, -1, error); 2142306661Sjhb } 2143306661Sjhb } 2144306661Sjhb if (m != NULL) 2145306661Sjhb m_free(m); 2146306661Sjhb SOCKBUF_LOCK(sb); 2147306661Sjhb} 2148306661Sjhb 2149306661Sjhbstatic void 2150306661Sjhbt4_aiotx_task(void *context, int pending) 2151306661Sjhb{ 2152306661Sjhb struct toepcb *toep = context; 2153306661Sjhb struct inpcb *inp = toep->inp; 2154306661Sjhb struct socket *so = inp->inp_socket; 2155306661Sjhb struct kaiocb *job; 2156306661Sjhb 2157312116Snp CURVNET_SET(toep->vnet); 2158306661Sjhb SOCKBUF_LOCK(&so->so_snd); 2159306661Sjhb while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) { 2160306661Sjhb job = TAILQ_FIRST(&toep->aiotx_jobq); 2161306661Sjhb TAILQ_REMOVE(&toep->aiotx_jobq, job, list); 2162306661Sjhb if (!aio_clear_cancel_function(job)) 2163306661Sjhb continue; 2164306661Sjhb 2165306661Sjhb t4_aiotx_process_job(toep, so, job); 2166306661Sjhb } 2167306661Sjhb toep->aiotx_task_active = false; 2168306661Sjhb SOCKBUF_UNLOCK(&so->so_snd); 2169306661Sjhb CURVNET_RESTORE(); 2170306661Sjhb 2171306661Sjhb free_toepcb(toep); 2172306661Sjhb} 2173306661Sjhb 2174306661Sjhbstatic void 2175306661Sjhbt4_aiotx_queue_toep(struct toepcb *toep) 2176306661Sjhb{ 2177306661Sjhb 2178306661Sjhb SOCKBUF_LOCK_ASSERT(&toep->inp->inp_socket->so_snd); 2179306661Sjhb#ifdef VERBOSE_TRACES 2180306661Sjhb CTR3(KTR_CXGBE, "%s: queueing aiotx task for tid %d, active = %s", 2181306661Sjhb __func__, toep->tid, toep->aiotx_task_active ? "true" : "false"); 2182306661Sjhb#endif 2183306661Sjhb if (toep->aiotx_task_active) 2184306661Sjhb return; 2185306661Sjhb toep->aiotx_task_active = true; 2186306661Sjhb hold_toepcb(toep); 2187306661Sjhb soaio_enqueue(&toep->aiotx_task); 2188306661Sjhb} 2189306661Sjhb 2190306661Sjhbstatic void 2191306661Sjhbt4_aiotx_cancel(struct kaiocb *job) 2192306661Sjhb{ 2193306661Sjhb struct aiotx_buffer *ab; 2194306661Sjhb struct socket *so; 2195306661Sjhb struct sockbuf *sb; 2196306661Sjhb struct tcpcb *tp; 2197306661Sjhb struct toepcb *toep; 2198306661Sjhb 2199306661Sjhb so = job->fd_file->f_data; 2200306661Sjhb tp = so_sototcpcb(so); 2201306661Sjhb toep = tp->t_toe; 2202306661Sjhb MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE); 2203306661Sjhb sb = &so->so_snd; 2204306661Sjhb 2205306661Sjhb SOCKBUF_LOCK(sb); 2206306661Sjhb if (!aio_cancel_cleared(job)) 2207306661Sjhb TAILQ_REMOVE(&toep->aiotx_jobq, job, list); 2208306661Sjhb SOCKBUF_UNLOCK(sb); 2209306661Sjhb 2210306661Sjhb ab = job->backend1; 2211306661Sjhb if (ab != NULL) 2212306661Sjhb free_aiotx_buffer(ab); 2213306661Sjhb else 2214306661Sjhb aio_cancel(job); 2215306661Sjhb} 2216306661Sjhb 2217306661Sjhbint 2218306661Sjhbt4_aio_queue_aiotx(struct socket *so, struct kaiocb *job) 2219306661Sjhb{ 2220306661Sjhb struct tcpcb *tp = so_sototcpcb(so); 2221306661Sjhb struct toepcb *toep = tp->t_toe; 2222306661Sjhb struct adapter *sc = td_adapter(toep->td); 2223306661Sjhb 2224306661Sjhb /* This only handles writes. */ 2225306661Sjhb if (job->uaiocb.aio_lio_opcode != LIO_WRITE) 2226306661Sjhb return (EOPNOTSUPP); 2227306661Sjhb 2228306661Sjhb if (!sc->tt.tx_zcopy) 2229306661Sjhb return (EOPNOTSUPP); 2230306661Sjhb 2231306661Sjhb SOCKBUF_LOCK(&so->so_snd); 2232306661Sjhb#ifdef VERBOSE_TRACES 2233306661Sjhb CTR2(KTR_CXGBE, "%s: queueing %p", __func__, job); 2234306661Sjhb#endif 2235306661Sjhb if (!aio_set_cancel_function(job, t4_aiotx_cancel)) 2236306661Sjhb panic("new job was cancelled"); 2237306661Sjhb TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list); 2238306661Sjhb if (sowriteable(so)) 2239306661Sjhb t4_aiotx_queue_toep(toep); 2240306661Sjhb SOCKBUF_UNLOCK(&so->so_snd); 2241306661Sjhb return (0); 2242306661Sjhb} 2243306661Sjhb 2244306661Sjhbvoid 2245306661Sjhbaiotx_init_toep(struct toepcb *toep) 2246306661Sjhb{ 2247306661Sjhb 2248306661Sjhb TAILQ_INIT(&toep->aiotx_jobq); 2249306661Sjhb TASK_INIT(&toep->aiotx_task, 0, t4_aiotx_task, toep); 2250306661Sjhb} 2251306661Sjhb#endif 2252