t4_cpl_io.c revision 276597
1237263Snp/*- 2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc. 3237263Snp * All rights reserved. 4237263Snp * Written by: Navdeep Parhar <np@FreeBSD.org> 5237263Snp * 6237263Snp * Redistribution and use in source and binary forms, with or without 7237263Snp * modification, are permitted provided that the following conditions 8237263Snp * are met: 9237263Snp * 1. Redistributions of source code must retain the above copyright 10237263Snp * notice, this list of conditions and the following disclaimer. 11237263Snp * 2. Redistributions in binary form must reproduce the above copyright 12237263Snp * notice, this list of conditions and the following disclaimer in the 13237263Snp * documentation and/or other materials provided with the distribution. 14237263Snp * 15237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18237263Snp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25237263Snp * SUCH DAMAGE. 26237263Snp */ 27237263Snp 28237263Snp#include <sys/cdefs.h> 29237263Snp__FBSDID("$FreeBSD: head/sys/dev/cxgbe/tom/t4_cpl_io.c 276597 2015-01-03 00:09:21Z np $"); 30237263Snp 31237263Snp#include "opt_inet.h" 32237263Snp 33237263Snp#ifdef TCP_OFFLOAD 34237263Snp#include <sys/param.h> 35237263Snp#include <sys/types.h> 36237263Snp#include <sys/kernel.h> 37237263Snp#include <sys/ktr.h> 38237263Snp#include <sys/module.h> 39237263Snp#include <sys/protosw.h> 40237263Snp#include <sys/domain.h> 41237263Snp#include <sys/socket.h> 42237263Snp#include <sys/socketvar.h> 43237263Snp#include <sys/sglist.h> 44237263Snp#include <netinet/in.h> 45237263Snp#include <netinet/in_pcb.h> 46237263Snp#include <netinet/ip.h> 47276574Snp#include <netinet/ip6.h> 48237263Snp#include <netinet/tcp_var.h> 49237263Snp#define TCPSTATES 50237263Snp#include <netinet/tcp_fsm.h> 51237263Snp#include <netinet/tcp_seq.h> 52237263Snp#include <netinet/toecore.h> 53237263Snp 54237263Snp#include "common/common.h" 55237263Snp#include "common/t4_msg.h" 56237263Snp#include "common/t4_regs.h" 57239344Snp#include "common/t4_tcb.h" 58237263Snp#include "tom/t4_tom_l2t.h" 59237263Snp#include "tom/t4_tom.h" 60237263Snp 61237263SnpVNET_DECLARE(int, tcp_do_autosndbuf); 62237263Snp#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) 63237263SnpVNET_DECLARE(int, tcp_autosndbuf_inc); 64237263Snp#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) 65237263SnpVNET_DECLARE(int, tcp_autosndbuf_max); 66237263Snp#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) 67237263SnpVNET_DECLARE(int, tcp_do_autorcvbuf); 68237263Snp#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) 69237263SnpVNET_DECLARE(int, tcp_autorcvbuf_inc); 70237263Snp#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) 71237263SnpVNET_DECLARE(int, tcp_autorcvbuf_max); 72237263Snp#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) 73237263Snp 74269076Snp/* 75269076Snp * For ULP connections HW may add headers, e.g., for digests, that aren't part 76269076Snp * of the messages sent by the host but that are part of the TCP payload and 77269076Snp * therefore consume TCP sequence space. Tx connection parameters that 78269076Snp * operate in TCP sequence space are affected by the HW additions and need to 79269076Snp * compensate for them to accurately track TCP sequence numbers. This array 80269076Snp * contains the compensating extra lengths for ULP packets. It is indexed by 81269076Snp * a packet's ULP submode. 82269076Snp */ 83269076Snpconst unsigned int t4_ulp_extra_len[] = {0, 4, 4, 8}; 84269076Snp 85269076Snp/* 86269076Snp * Return the length of any HW additions that will be made to a Tx packet. 87269076Snp * Such additions can happen for some types of ULP packets. 88269076Snp */ 89269076Snpstatic inline unsigned int 90269076Snpulp_extra_len(struct mbuf *m, int *ulp_mode) 91269076Snp{ 92269076Snp struct m_tag *mtag; 93269076Snp 94269076Snp if ((mtag = m_tag_find(m, CXGBE_ISCSI_MBUF_TAG, NULL)) == NULL) 95269076Snp return (0); 96269076Snp *ulp_mode = *((int *)(mtag + 1)); 97269076Snp 98269076Snp return (t4_ulp_extra_len[*ulp_mode & 3]); 99269076Snp} 100269076Snp 101237263Snpvoid 102237263Snpsend_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) 103237263Snp{ 104241626Snp struct wrqe *wr; 105241626Snp struct fw_flowc_wr *flowc; 106241642Snp unsigned int nparams = ftxp ? 8 : 6, flowclen; 107237263Snp struct port_info *pi = toep->port; 108237263Snp struct adapter *sc = pi->adapter; 109237263Snp unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; 110237263Snp struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 111237263Snp 112239514Snp KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), 113237263Snp ("%s: flowc for tid %u sent already", __func__, toep->tid)); 114237263Snp 115237263Snp flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 116237263Snp 117248925Snp wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); 118237263Snp if (wr == NULL) { 119237263Snp /* XXX */ 120237263Snp panic("%s: allocation failure.", __func__); 121237263Snp } 122237263Snp flowc = wrtod(wr); 123237263Snp memset(flowc, 0, wr->wr_len); 124237263Snp 125237263Snp flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 126237263Snp V_FW_FLOWC_WR_NPARAMS(nparams)); 127237263Snp flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 128237263Snp V_FW_WR_FLOWID(toep->tid)); 129237263Snp 130237263Snp flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 131241626Snp flowc->mnemval[0].val = htobe32(pfvf); 132241626Snp flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 133241626Snp flowc->mnemval[1].val = htobe32(pi->tx_chan); 134241626Snp flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 135241626Snp flowc->mnemval[2].val = htobe32(pi->tx_chan); 136241626Snp flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 137241626Snp flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id); 138237263Snp if (ftxp) { 139237263Snp uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); 140237263Snp 141237263Snp flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; 142237263Snp flowc->mnemval[4].val = htobe32(ftxp->snd_nxt); 143237263Snp flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; 144237263Snp flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt); 145237263Snp flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; 146237263Snp flowc->mnemval[6].val = htobe32(sndbuf); 147237263Snp flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; 148237263Snp flowc->mnemval[7].val = htobe32(ftxp->mss); 149276570Snp 150276570Snp CTR6(KTR_CXGBE, 151276570Snp "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", 152276570Snp __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt, 153276570Snp ftxp->rcv_nxt); 154241642Snp } else { 155241642Snp flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; 156241642Snp flowc->mnemval[4].val = htobe32(512); 157241642Snp flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; 158241642Snp flowc->mnemval[5].val = htobe32(512); 159276570Snp 160276570Snp CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); 161237263Snp } 162237263Snp 163237263Snp txsd->tx_credits = howmany(flowclen, 16); 164237263Snp txsd->plen = 0; 165237263Snp KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, 166237263Snp ("%s: not enough credits (%d)", __func__, toep->tx_credits)); 167237263Snp toep->tx_credits -= txsd->tx_credits; 168237263Snp if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 169237263Snp toep->txsd_pidx = 0; 170237263Snp toep->txsd_avail--; 171237263Snp 172239514Snp toep->flags |= TPF_FLOWC_WR_SENT; 173237263Snp t4_wrq_tx(sc, wr); 174237263Snp} 175237263Snp 176237263Snpvoid 177237263Snpsend_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) 178237263Snp{ 179237263Snp struct wrqe *wr; 180237263Snp struct cpl_abort_req *req; 181237263Snp int tid = toep->tid; 182237263Snp struct inpcb *inp = toep->inp; 183237263Snp struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ 184237263Snp 185237263Snp INP_WLOCK_ASSERT(inp); 186237263Snp 187237263Snp CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", 188237263Snp __func__, toep->tid, 189237263Snp inp->inp_flags & INP_DROPPED ? "inp dropped" : 190237263Snp tcpstates[tp->t_state], 191237263Snp toep->flags, inp->inp_flags, 192239514Snp toep->flags & TPF_ABORT_SHUTDOWN ? 193237263Snp " (abort already in progress)" : ""); 194237263Snp 195239514Snp if (toep->flags & TPF_ABORT_SHUTDOWN) 196237263Snp return; /* abort already in progress */ 197237263Snp 198239514Snp toep->flags |= TPF_ABORT_SHUTDOWN; 199237263Snp 200239514Snp KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 201237263Snp ("%s: flowc_wr not sent for tid %d.", __func__, tid)); 202237263Snp 203237263Snp wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 204237263Snp if (wr == NULL) { 205237263Snp /* XXX */ 206237263Snp panic("%s: allocation failure.", __func__); 207237263Snp } 208237263Snp req = wrtod(wr); 209237263Snp 210237263Snp INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); 211237263Snp if (inp->inp_flags & INP_DROPPED) 212237263Snp req->rsvd0 = htobe32(snd_nxt); 213237263Snp else 214237263Snp req->rsvd0 = htobe32(tp->snd_nxt); 215239514Snp req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT); 216237263Snp req->cmd = CPL_ABORT_SEND_RST; 217237263Snp 218237263Snp /* 219237263Snp * XXX: What's the correct way to tell that the inp hasn't been detached 220237263Snp * from its socket? Should I even be flushing the snd buffer here? 221237263Snp */ 222237263Snp if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 223237263Snp struct socket *so = inp->inp_socket; 224237263Snp 225237263Snp if (so != NULL) /* because I'm not sure. See comment above */ 226237263Snp sbflush(&so->so_snd); 227237263Snp } 228237263Snp 229237263Snp t4_l2t_send(sc, wr, toep->l2te); 230237263Snp} 231237263Snp 232237263Snp/* 233237263Snp * Called when a connection is established to translate the TCP options 234237263Snp * reported by HW to FreeBSD's native format. 235237263Snp */ 236237263Snpstatic void 237237263Snpassign_rxopt(struct tcpcb *tp, unsigned int opt) 238237263Snp{ 239237263Snp struct toepcb *toep = tp->t_toe; 240276574Snp struct inpcb *inp = tp->t_inpcb; 241237263Snp struct adapter *sc = td_adapter(toep->td); 242276574Snp int n; 243237263Snp 244276574Snp INP_LOCK_ASSERT(inp); 245237263Snp 246276574Snp if (inp->inp_inc.inc_flags & INC_ISIPV6) 247276574Snp n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 248276574Snp else 249276574Snp n = sizeof(struct ip) + sizeof(struct tcphdr); 250276574Snp tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; 251237263Snp 252276574Snp CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid, 253276574Snp G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]); 254276574Snp 255237263Snp if (G_TCPOPT_TSTAMP(opt)) { 256237263Snp tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ 257237263Snp tp->ts_recent = 0; /* hmmm */ 258237263Snp tp->ts_recent_age = tcp_ts_getticks(); 259237263Snp tp->t_maxseg -= TCPOLEN_TSTAMP_APPA; 260237263Snp } 261237263Snp 262237263Snp if (G_TCPOPT_SACK(opt)) 263237263Snp tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ 264237263Snp else 265237263Snp tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ 266237263Snp 267237263Snp if (G_TCPOPT_WSCALE_OK(opt)) 268237263Snp tp->t_flags |= TF_RCVD_SCALE; 269237263Snp 270237263Snp /* Doing window scaling? */ 271237263Snp if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == 272237263Snp (TF_RCVD_SCALE | TF_REQ_SCALE)) { 273237263Snp tp->rcv_scale = tp->request_r_scale; 274237263Snp tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); 275237263Snp } 276237263Snp} 277237263Snp 278237263Snp/* 279237263Snp * Completes some final bits of initialization for just established connections 280237263Snp * and changes their state to TCPS_ESTABLISHED. 281237263Snp * 282237263Snp * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. 283237263Snp */ 284237263Snpvoid 285237263Snpmake_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, 286237263Snp uint16_t opt) 287237263Snp{ 288237263Snp struct inpcb *inp = toep->inp; 289237263Snp struct socket *so = inp->inp_socket; 290237263Snp struct tcpcb *tp = intotcpcb(inp); 291237263Snp long bufsize; 292237263Snp uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ 293237263Snp uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ 294237263Snp uint16_t tcpopt = be16toh(opt); 295237263Snp struct flowc_tx_params ftxp; 296237263Snp 297237263Snp INP_WLOCK_ASSERT(inp); 298237263Snp KASSERT(tp->t_state == TCPS_SYN_SENT || 299237263Snp tp->t_state == TCPS_SYN_RECEIVED, 300237263Snp ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); 301237263Snp 302237263Snp CTR4(KTR_CXGBE, "%s: tid %d, toep %p, inp %p", 303237263Snp __func__, toep->tid, toep, inp); 304237263Snp 305237263Snp tp->t_state = TCPS_ESTABLISHED; 306237263Snp tp->t_starttime = ticks; 307237263Snp TCPSTAT_INC(tcps_connects); 308237263Snp 309237263Snp tp->irs = irs; 310237263Snp tcp_rcvseqinit(tp); 311237263Snp tp->rcv_wnd = toep->rx_credits << 10; 312237263Snp tp->rcv_adv += tp->rcv_wnd; 313237263Snp tp->last_ack_sent = tp->rcv_nxt; 314237263Snp 315237263Snp /* 316237263Snp * If we were unable to send all rx credits via opt0, save the remainder 317237263Snp * in rx_credits so that they can be handed over with the next credit 318237263Snp * update. 319237263Snp */ 320237263Snp SOCKBUF_LOCK(&so->so_rcv); 321237263Snp bufsize = select_rcv_wnd(so); 322237263Snp SOCKBUF_UNLOCK(&so->so_rcv); 323237263Snp toep->rx_credits = bufsize - tp->rcv_wnd; 324237263Snp 325237263Snp tp->iss = iss; 326237263Snp tcp_sendseqinit(tp); 327237263Snp tp->snd_una = iss + 1; 328237263Snp tp->snd_nxt = iss + 1; 329237263Snp tp->snd_max = iss + 1; 330237263Snp 331237263Snp assign_rxopt(tp, tcpopt); 332237263Snp 333237263Snp SOCKBUF_LOCK(&so->so_snd); 334237263Snp if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) 335237263Snp bufsize = V_tcp_autosndbuf_max; 336237263Snp else 337237263Snp bufsize = sbspace(&so->so_snd); 338237263Snp SOCKBUF_UNLOCK(&so->so_snd); 339237263Snp 340237263Snp ftxp.snd_nxt = tp->snd_nxt; 341237263Snp ftxp.rcv_nxt = tp->rcv_nxt; 342237263Snp ftxp.snd_space = bufsize; 343237263Snp ftxp.mss = tp->t_maxseg; 344237263Snp send_flowc_wr(toep, &ftxp); 345237263Snp 346237263Snp soisconnected(so); 347237263Snp} 348237263Snp 349237263Snpstatic int 350239344Snpsend_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) 351237263Snp{ 352237263Snp struct wrqe *wr; 353237263Snp struct cpl_rx_data_ack *req; 354237263Snp uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 355237263Snp 356239344Snp KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); 357239344Snp 358237263Snp wr = alloc_wrqe(sizeof(*req), toep->ctrlq); 359237263Snp if (wr == NULL) 360237263Snp return (0); 361237263Snp req = wrtod(wr); 362237263Snp 363237263Snp INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); 364237263Snp req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); 365237263Snp 366237263Snp t4_wrq_tx(sc, wr); 367237263Snp return (credits); 368237263Snp} 369237263Snp 370237263Snpvoid 371237263Snpt4_rcvd(struct toedev *tod, struct tcpcb *tp) 372237263Snp{ 373237263Snp struct adapter *sc = tod->tod_softc; 374237263Snp struct inpcb *inp = tp->t_inpcb; 375237263Snp struct socket *so = inp->inp_socket; 376239344Snp struct sockbuf *sb = &so->so_rcv; 377237263Snp struct toepcb *toep = tp->t_toe; 378239344Snp int credits; 379237263Snp 380237263Snp INP_WLOCK_ASSERT(inp); 381237263Snp 382239344Snp SOCKBUF_LOCK(sb); 383274421Sglebius KASSERT(toep->sb_cc >= sbused(sb), 384239344Snp ("%s: sb %p has more data (%d) than last time (%d).", 385274421Sglebius __func__, sb, sbused(sb), toep->sb_cc)); 386269076Snp if (toep->ulp_mode == ULP_MODE_ISCSI) { 387269076Snp toep->rx_credits += toep->sb_cc; 388269076Snp toep->sb_cc = 0; 389269076Snp } else { 390274421Sglebius toep->rx_credits += toep->sb_cc - sbused(sb); 391274421Sglebius toep->sb_cc = sbused(sb); 392269076Snp } 393239344Snp credits = toep->rx_credits; 394239344Snp SOCKBUF_UNLOCK(sb); 395237263Snp 396239344Snp if (credits > 0 && 397239344Snp (credits + 16384 >= tp->rcv_wnd || credits >= 15 * 1024)) { 398237263Snp 399239344Snp credits = send_rx_credits(sc, toep, credits); 400239344Snp SOCKBUF_LOCK(sb); 401237263Snp toep->rx_credits -= credits; 402239344Snp SOCKBUF_UNLOCK(sb); 403237263Snp tp->rcv_wnd += credits; 404237263Snp tp->rcv_adv += credits; 405237263Snp } 406237263Snp} 407237263Snp 408237263Snp/* 409237263Snp * Close a connection by sending a CPL_CLOSE_CON_REQ message. 410237263Snp */ 411237263Snpstatic int 412237263Snpclose_conn(struct adapter *sc, struct toepcb *toep) 413237263Snp{ 414237263Snp struct wrqe *wr; 415237263Snp struct cpl_close_con_req *req; 416237263Snp unsigned int tid = toep->tid; 417237263Snp 418237263Snp CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, 419239514Snp toep->flags & TPF_FIN_SENT ? ", IGNORED" : ""); 420237263Snp 421239514Snp if (toep->flags & TPF_FIN_SENT) 422237263Snp return (0); 423237263Snp 424239514Snp KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 425237263Snp ("%s: flowc_wr not sent for tid %u.", __func__, tid)); 426237263Snp 427237263Snp wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 428237263Snp if (wr == NULL) { 429237263Snp /* XXX */ 430237263Snp panic("%s: allocation failure.", __func__); 431237263Snp } 432237263Snp req = wrtod(wr); 433237263Snp 434237263Snp req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | 435237263Snp V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); 436237263Snp req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | 437237263Snp V_FW_WR_FLOWID(tid)); 438237263Snp req->wr.wr_lo = cpu_to_be64(0); 439237263Snp OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 440237263Snp req->rsvd = 0; 441237263Snp 442239514Snp toep->flags |= TPF_FIN_SENT; 443239514Snp toep->flags &= ~TPF_SEND_FIN; 444237263Snp t4_l2t_send(sc, wr, toep->l2te); 445237263Snp 446237263Snp return (0); 447237263Snp} 448237263Snp 449237263Snp#define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) 450237263Snp#define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) 451237263Snp 452237263Snp/* Maximum amount of immediate data we could stuff in a WR */ 453237263Snpstatic inline int 454237263Snpmax_imm_payload(int tx_credits) 455237263Snp{ 456237263Snp const int n = 2; /* Use only up to 2 desc for imm. data WR */ 457237263Snp 458237263Snp KASSERT(tx_credits >= 0 && 459237263Snp tx_credits <= MAX_OFLD_TX_CREDITS, 460237263Snp ("%s: %d credits", __func__, tx_credits)); 461237263Snp 462237263Snp if (tx_credits < MIN_OFLD_TX_CREDITS) 463237263Snp return (0); 464237263Snp 465237263Snp if (tx_credits >= (n * EQ_ESIZE) / 16) 466237263Snp return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); 467237263Snp else 468237263Snp return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); 469237263Snp} 470237263Snp 471237263Snp/* Maximum number of SGL entries we could stuff in a WR */ 472237263Snpstatic inline int 473237263Snpmax_dsgl_nsegs(int tx_credits) 474237263Snp{ 475237263Snp int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ 476237263Snp int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; 477237263Snp 478237263Snp KASSERT(tx_credits >= 0 && 479237263Snp tx_credits <= MAX_OFLD_TX_CREDITS, 480237263Snp ("%s: %d credits", __func__, tx_credits)); 481237263Snp 482237263Snp if (tx_credits < MIN_OFLD_TX_CREDITS) 483237263Snp return (0); 484237263Snp 485237263Snp nseg += 2 * (sge_pair_credits * 16 / 24); 486237263Snp if ((sge_pair_credits * 16) % 24 == 16) 487237263Snp nseg++; 488237263Snp 489237263Snp return (nseg); 490237263Snp} 491237263Snp 492237263Snpstatic inline void 493237263Snpwrite_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, 494276597Snp unsigned int plen, uint8_t credits, int shove, int ulp_mode, int txalign) 495237263Snp{ 496237263Snp struct fw_ofld_tx_data_wr *txwr = dst; 497269076Snp unsigned int wr_ulp_mode; 498237263Snp 499237263Snp txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | 500255411Snp V_FW_WR_IMMDLEN(immdlen)); 501237263Snp txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | 502237263Snp V_FW_WR_LEN16(credits)); 503269076Snp 504269076Snp /* for iscsi, the mode & submode setting is per-packet */ 505269076Snp if (toep->ulp_mode == ULP_MODE_ISCSI) 506269076Snp wr_ulp_mode = V_FW_OFLD_TX_DATA_WR_ULPMODE(ulp_mode >> 4) | 507269076Snp V_FW_OFLD_TX_DATA_WR_ULPSUBMODE(ulp_mode & 3); 508269076Snp else 509269076Snp wr_ulp_mode = V_FW_OFLD_TX_DATA_WR_ULPMODE(toep->ulp_mode); 510269076Snp 511256459Snp txwr->lsodisable_to_proxy = 512269076Snp htobe32(wr_ulp_mode | 513237263Snp V_FW_OFLD_TX_DATA_WR_URGENT(0) | /* XXX */ 514237263Snp V_FW_OFLD_TX_DATA_WR_SHOVE(shove)); 515237263Snp txwr->plen = htobe32(plen); 516276597Snp 517276597Snp if (txalign > 0) { 518276597Snp struct tcpcb *tp = intotcpcb(toep->inp); 519276597Snp 520276597Snp if (plen < 2 * tp->t_maxseg || is_10G_port(toep->port)) 521276597Snp txwr->lsodisable_to_proxy |= 522276597Snp htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); 523276597Snp else 524276597Snp txwr->lsodisable_to_proxy |= 525276597Snp htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD | 526276597Snp (tp->t_flags & TF_NODELAY ? 0 : 527276597Snp F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE)); 528276597Snp } 529237263Snp} 530237263Snp 531237263Snp/* 532237263Snp * Generate a DSGL from a starting mbuf. The total number of segments and the 533237263Snp * maximum segments in any one mbuf are provided. 534237263Snp */ 535237263Snpstatic void 536237263Snpwrite_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) 537237263Snp{ 538237263Snp struct mbuf *m; 539237263Snp struct ulptx_sgl *usgl = dst; 540237263Snp int i, j, rc; 541237263Snp struct sglist sg; 542237263Snp struct sglist_seg segs[n]; 543237263Snp 544237263Snp KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); 545237263Snp 546237263Snp sglist_init(&sg, n, segs); 547237263Snp usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 548237263Snp V_ULPTX_NSGE(nsegs)); 549237263Snp 550237263Snp i = -1; 551237263Snp for (m = start; m != stop; m = m->m_next) { 552237263Snp rc = sglist_append(&sg, mtod(m, void *), m->m_len); 553237263Snp if (__predict_false(rc != 0)) 554237263Snp panic("%s: sglist_append %d", __func__, rc); 555237263Snp 556237263Snp for (j = 0; j < sg.sg_nseg; i++, j++) { 557237263Snp if (i < 0) { 558237263Snp usgl->len0 = htobe32(segs[j].ss_len); 559237263Snp usgl->addr0 = htobe64(segs[j].ss_paddr); 560237263Snp } else { 561237263Snp usgl->sge[i / 2].len[i & 1] = 562237263Snp htobe32(segs[j].ss_len); 563237263Snp usgl->sge[i / 2].addr[i & 1] = 564237263Snp htobe64(segs[j].ss_paddr); 565237263Snp } 566237263Snp#ifdef INVARIANTS 567237263Snp nsegs--; 568237263Snp#endif 569237263Snp } 570237263Snp sglist_reset(&sg); 571237263Snp } 572237263Snp if (i & 1) 573237263Snp usgl->sge[i / 2].len[1] = htobe32(0); 574237263Snp KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", 575237263Snp __func__, nsegs, start, stop)); 576237263Snp} 577237263Snp 578237263Snp/* 579237263Snp * Max number of SGL entries an offload tx work request can have. This is 41 580237263Snp * (1 + 40) for a full 512B work request. 581237263Snp * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) 582237263Snp */ 583237263Snp#define OFLD_SGL_LEN (41) 584237263Snp 585237263Snp/* 586237263Snp * Send data and/or a FIN to the peer. 587237263Snp * 588237263Snp * The socket's so_snd buffer consists of a stream of data starting with sb_mb 589237263Snp * and linked together with m_next. sb_sndptr, if set, is the last mbuf that 590237263Snp * was transmitted. 591255411Snp * 592255411Snp * drop indicates the number of bytes that should be dropped from the head of 593255411Snp * the send buffer. It is an optimization that lets do_fw4_ack avoid creating 594255411Snp * contention on the send buffer lock (before this change it used to do 595255411Snp * sowwakeup and then t4_push_frames right after that when recovering from tx 596255411Snp * stalls). When drop is set this function MUST drop the bytes and wake up any 597255411Snp * writers. 598237263Snp */ 599269076Snpvoid 600255411Snpt4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) 601237263Snp{ 602237263Snp struct mbuf *sndptr, *m, *sb_sndptr; 603237263Snp struct fw_ofld_tx_data_wr *txwr; 604237263Snp struct wrqe *wr; 605255411Snp u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 606237263Snp struct inpcb *inp = toep->inp; 607237263Snp struct tcpcb *tp = intotcpcb(inp); 608237263Snp struct socket *so = inp->inp_socket; 609237263Snp struct sockbuf *sb = &so->so_snd; 610255411Snp int tx_credits, shove, compl, space, sowwakeup; 611237263Snp struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 612237263Snp 613237263Snp INP_WLOCK_ASSERT(inp); 614239514Snp KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 615237263Snp ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 616237263Snp 617255005Snp KASSERT(toep->ulp_mode == ULP_MODE_NONE || 618255005Snp toep->ulp_mode == ULP_MODE_TCPDDP || 619255005Snp toep->ulp_mode == ULP_MODE_RDMA, 620255005Snp ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 621237263Snp 622237263Snp /* 623237263Snp * This function doesn't resume by itself. Someone else must clear the 624237263Snp * flag and call this function. 625237263Snp */ 626255411Snp if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 627255411Snp KASSERT(drop == 0, 628255411Snp ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 629237263Snp return; 630255411Snp } 631237263Snp 632237263Snp do { 633237263Snp tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 634237263Snp max_imm = max_imm_payload(tx_credits); 635237263Snp max_nsegs = max_dsgl_nsegs(tx_credits); 636237263Snp 637237263Snp SOCKBUF_LOCK(sb); 638255411Snp sowwakeup = drop; 639255411Snp if (drop) { 640255411Snp sbdrop_locked(sb, drop); 641255411Snp drop = 0; 642255411Snp } 643237263Snp sb_sndptr = sb->sb_sndptr; 644237263Snp sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; 645237263Snp plen = 0; 646237263Snp nsegs = 0; 647237263Snp max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 648237263Snp for (m = sndptr; m != NULL; m = m->m_next) { 649237263Snp int n = sglist_count(mtod(m, void *), m->m_len); 650237263Snp 651237263Snp nsegs += n; 652237263Snp plen += m->m_len; 653237263Snp 654237263Snp /* This mbuf sent us _over_ the nsegs limit, back out */ 655237263Snp if (plen > max_imm && nsegs > max_nsegs) { 656237263Snp nsegs -= n; 657237263Snp plen -= m->m_len; 658237263Snp if (plen == 0) { 659237263Snp /* Too few credits */ 660239514Snp toep->flags |= TPF_TX_SUSPENDED; 661255411Snp if (sowwakeup) 662255411Snp sowwakeup_locked(so); 663255411Snp else 664255411Snp SOCKBUF_UNLOCK(sb); 665255411Snp SOCKBUF_UNLOCK_ASSERT(sb); 666237263Snp return; 667237263Snp } 668237263Snp break; 669237263Snp } 670237263Snp 671237263Snp if (max_nsegs_1mbuf < n) 672237263Snp max_nsegs_1mbuf = n; 673237263Snp sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ 674237263Snp 675237263Snp /* This mbuf put us right at the max_nsegs limit */ 676237263Snp if (plen > max_imm && nsegs == max_nsegs) { 677237263Snp m = m->m_next; 678237263Snp break; 679237263Snp } 680237263Snp } 681237263Snp 682255411Snp shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); 683255411Snp space = sbspace(sb); 684255411Snp 685255411Snp if (space <= sb->sb_hiwat * 3 / 8 && 686255411Snp toep->plen_nocompl + plen >= sb->sb_hiwat / 4) 687255411Snp compl = 1; 688255411Snp else 689255411Snp compl = 0; 690255411Snp 691237263Snp if (sb->sb_flags & SB_AUTOSIZE && 692237263Snp V_tcp_do_autosndbuf && 693237263Snp sb->sb_hiwat < V_tcp_autosndbuf_max && 694255411Snp space < sb->sb_hiwat / 8) { 695237263Snp int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, 696237263Snp V_tcp_autosndbuf_max); 697237263Snp 698237263Snp if (!sbreserve_locked(sb, newsize, so, NULL)) 699237263Snp sb->sb_flags &= ~SB_AUTOSIZE; 700255411Snp else 701255411Snp sowwakeup = 1; /* room available */ 702237263Snp } 703255411Snp if (sowwakeup) 704255411Snp sowwakeup_locked(so); 705255411Snp else 706255411Snp SOCKBUF_UNLOCK(sb); 707255411Snp SOCKBUF_UNLOCK_ASSERT(sb); 708237263Snp 709237263Snp /* nothing to send */ 710237263Snp if (plen == 0) { 711237263Snp KASSERT(m == NULL, 712237263Snp ("%s: nothing to send, but m != NULL", __func__)); 713237263Snp break; 714237263Snp } 715237263Snp 716239514Snp if (__predict_false(toep->flags & TPF_FIN_SENT)) 717237263Snp panic("%s: excess tx.", __func__); 718237263Snp 719237263Snp if (plen <= max_imm) { 720237263Snp 721237263Snp /* Immediate data tx */ 722237263Snp 723248925Snp wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 724237263Snp toep->ofld_txq); 725237263Snp if (wr == NULL) { 726237263Snp /* XXX: how will we recover from this? */ 727239514Snp toep->flags |= TPF_TX_SUSPENDED; 728237263Snp return; 729237263Snp } 730237263Snp txwr = wrtod(wr); 731237263Snp credits = howmany(wr->wr_len, 16); 732276597Snp write_tx_wr(txwr, toep, plen, plen, credits, shove, 0, 733276597Snp sc->tt.tx_align); 734237263Snp m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 735255411Snp nsegs = 0; 736237263Snp } else { 737237263Snp int wr_len; 738237263Snp 739237263Snp /* DSGL tx */ 740237263Snp 741237263Snp wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 742237263Snp ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 743248925Snp wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 744237263Snp if (wr == NULL) { 745237263Snp /* XXX: how will we recover from this? */ 746239514Snp toep->flags |= TPF_TX_SUSPENDED; 747237263Snp return; 748237263Snp } 749237263Snp txwr = wrtod(wr); 750237263Snp credits = howmany(wr_len, 16); 751276597Snp write_tx_wr(txwr, toep, 0, plen, credits, shove, 0, 752276597Snp sc->tt.tx_align); 753237263Snp write_tx_sgl(txwr + 1, sndptr, m, nsegs, 754237263Snp max_nsegs_1mbuf); 755237263Snp if (wr_len & 0xf) { 756237263Snp uint64_t *pad = (uint64_t *) 757237263Snp ((uintptr_t)txwr + wr_len); 758237263Snp *pad = 0; 759237263Snp } 760237263Snp } 761237263Snp 762237263Snp KASSERT(toep->tx_credits >= credits, 763237263Snp ("%s: not enough credits", __func__)); 764237263Snp 765237263Snp toep->tx_credits -= credits; 766255411Snp toep->tx_nocompl += credits; 767255411Snp toep->plen_nocompl += plen; 768255411Snp if (toep->tx_credits <= toep->tx_total * 3 / 8 && 769255411Snp toep->tx_nocompl >= toep->tx_total / 4) 770255411Snp compl = 1; 771237263Snp 772273797Snp if (compl || toep->ulp_mode == ULP_MODE_RDMA) { 773255411Snp txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 774255411Snp toep->tx_nocompl = 0; 775255411Snp toep->plen_nocompl = 0; 776255411Snp } 777255411Snp 778237263Snp tp->snd_nxt += plen; 779237263Snp tp->snd_max += plen; 780237263Snp 781237263Snp SOCKBUF_LOCK(sb); 782237263Snp KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); 783237263Snp sb->sb_sndptr = sb_sndptr; 784237263Snp SOCKBUF_UNLOCK(sb); 785237263Snp 786239514Snp toep->flags |= TPF_TX_DATA_SENT; 787255411Snp if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 788255411Snp toep->flags |= TPF_TX_SUSPENDED; 789237263Snp 790237263Snp KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 791237263Snp txsd->plen = plen; 792237263Snp txsd->tx_credits = credits; 793237263Snp txsd++; 794237263Snp if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 795237263Snp toep->txsd_pidx = 0; 796237263Snp txsd = &toep->txsd[0]; 797237263Snp } 798237263Snp toep->txsd_avail--; 799237263Snp 800237263Snp t4_l2t_send(sc, wr, toep->l2te); 801237263Snp } while (m != NULL); 802237263Snp 803237263Snp /* Send a FIN if requested, but only if there's no more data to send */ 804239514Snp if (m == NULL && toep->flags & TPF_SEND_FIN) 805237263Snp close_conn(sc, toep); 806237263Snp} 807237263Snp 808269076Snp/* Send ULP data over TOE using TX_DATA_WR. We send whole mbuf at once */ 809269076Snpvoid 810269076Snpt4_ulp_push_frames(struct adapter *sc, struct toepcb *toep, int drop) 811269076Snp{ 812269076Snp struct mbuf *sndptr, *m = NULL; 813269076Snp struct fw_ofld_tx_data_wr *txwr; 814269076Snp struct wrqe *wr; 815269076Snp unsigned int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 816269076Snp struct inpcb *inp = toep->inp; 817269076Snp struct tcpcb *tp; 818269076Snp struct socket *so; 819269076Snp struct sockbuf *sb; 820269076Snp int tx_credits, ulp_len = 0, ulp_mode = 0, qlen = 0; 821269076Snp int shove, compl; 822269076Snp struct ofld_tx_sdesc *txsd; 823269076Snp 824269076Snp INP_WLOCK_ASSERT(inp); 825269076Snp if (toep->flags & TPF_ABORT_SHUTDOWN) 826269076Snp return; 827269076Snp 828269076Snp tp = intotcpcb(inp); 829269076Snp so = inp->inp_socket; 830269076Snp sb = &so->so_snd; 831269076Snp txsd = &toep->txsd[toep->txsd_pidx]; 832269076Snp 833269076Snp KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 834269076Snp ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 835269076Snp 836269076Snp /* 837269076Snp * This function doesn't resume by itself. Someone else must clear the 838269076Snp * flag and call this function. 839269076Snp */ 840269076Snp if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) 841269076Snp return; 842269076Snp 843269076Snp sndptr = t4_queue_iscsi_callback(so, toep, 1, &qlen); 844269076Snp if (!qlen) 845269076Snp return; 846269076Snp 847269076Snp do { 848269076Snp tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 849269076Snp max_imm = max_imm_payload(tx_credits); 850269076Snp max_nsegs = max_dsgl_nsegs(tx_credits); 851269076Snp 852269076Snp if (drop) { 853269076Snp t4_cpl_iscsi_callback(toep->td, toep, &drop, 854269076Snp CPL_FW4_ACK); 855269076Snp drop = 0; 856269076Snp } 857269076Snp 858269076Snp plen = 0; 859269076Snp nsegs = 0; 860269076Snp max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 861269076Snp for (m = sndptr; m != NULL; m = m->m_next) { 862269076Snp int n = sglist_count(mtod(m, void *), m->m_len); 863269076Snp 864269076Snp nsegs += n; 865269076Snp plen += m->m_len; 866269076Snp 867269076Snp /* This mbuf sent us _over_ the nsegs limit, return */ 868269076Snp if (plen > max_imm && nsegs > max_nsegs) { 869269076Snp toep->flags |= TPF_TX_SUSPENDED; 870269076Snp return; 871269076Snp } 872269076Snp 873269076Snp if (max_nsegs_1mbuf < n) 874269076Snp max_nsegs_1mbuf = n; 875269076Snp 876269076Snp /* This mbuf put us right at the max_nsegs limit */ 877269076Snp if (plen > max_imm && nsegs == max_nsegs) { 878269076Snp toep->flags |= TPF_TX_SUSPENDED; 879269076Snp return; 880269076Snp } 881269076Snp } 882269076Snp 883269076Snp shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); 884269076Snp /* nothing to send */ 885269076Snp if (plen == 0) { 886269076Snp KASSERT(m == NULL, 887269076Snp ("%s: nothing to send, but m != NULL", __func__)); 888269076Snp break; 889269076Snp } 890269076Snp 891269076Snp if (__predict_false(toep->flags & TPF_FIN_SENT)) 892269076Snp panic("%s: excess tx.", __func__); 893269076Snp 894269076Snp ulp_len = plen + ulp_extra_len(sndptr, &ulp_mode); 895269076Snp if (plen <= max_imm) { 896269076Snp 897269076Snp /* Immediate data tx */ 898269076Snp wr = alloc_wrqe(roundup(sizeof(*txwr) + plen, 16), 899269076Snp toep->ofld_txq); 900269076Snp if (wr == NULL) { 901269076Snp /* XXX: how will we recover from this? */ 902269076Snp toep->flags |= TPF_TX_SUSPENDED; 903269076Snp return; 904269076Snp } 905269076Snp txwr = wrtod(wr); 906269076Snp credits = howmany(wr->wr_len, 16); 907269076Snp write_tx_wr(txwr, toep, plen, ulp_len, credits, shove, 908276597Snp ulp_mode, 0); 909269076Snp m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 910269076Snp } else { 911269076Snp int wr_len; 912269076Snp 913269076Snp /* DSGL tx */ 914269076Snp wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 915269076Snp ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 916269076Snp wr = alloc_wrqe(roundup(wr_len, 16), toep->ofld_txq); 917269076Snp if (wr == NULL) { 918269076Snp /* XXX: how will we recover from this? */ 919269076Snp toep->flags |= TPF_TX_SUSPENDED; 920269076Snp return; 921269076Snp } 922269076Snp txwr = wrtod(wr); 923269076Snp credits = howmany(wr_len, 16); 924269076Snp write_tx_wr(txwr, toep, 0, ulp_len, credits, shove, 925276597Snp ulp_mode, 0); 926269076Snp write_tx_sgl(txwr + 1, sndptr, m, nsegs, 927269076Snp max_nsegs_1mbuf); 928269076Snp if (wr_len & 0xf) { 929269076Snp uint64_t *pad = (uint64_t *) 930269076Snp ((uintptr_t)txwr + wr_len); 931269076Snp *pad = 0; 932269076Snp } 933269076Snp } 934269076Snp 935269076Snp KASSERT(toep->tx_credits >= credits, 936269076Snp ("%s: not enough credits", __func__)); 937269076Snp 938269076Snp toep->tx_credits -= credits; 939269076Snp toep->tx_nocompl += credits; 940269076Snp toep->plen_nocompl += plen; 941269076Snp if (toep->tx_credits <= toep->tx_total * 3 / 8 && 942269076Snp toep->tx_nocompl >= toep->tx_total / 4) 943269076Snp compl = 1; 944269076Snp 945269076Snp if (compl) { 946269076Snp txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 947269076Snp toep->tx_nocompl = 0; 948269076Snp toep->plen_nocompl = 0; 949269076Snp } 950269076Snp tp->snd_nxt += ulp_len; 951269076Snp tp->snd_max += ulp_len; 952269076Snp 953269076Snp /* goto next mbuf */ 954269076Snp sndptr = m = t4_queue_iscsi_callback(so, toep, 2, &qlen); 955269076Snp 956269076Snp toep->flags |= TPF_TX_DATA_SENT; 957269076Snp if (toep->tx_credits < MIN_OFLD_TX_CREDITS) { 958269076Snp toep->flags |= TPF_TX_SUSPENDED; 959269076Snp } 960269076Snp 961269076Snp KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 962269076Snp txsd->plen = plen; 963269076Snp txsd->tx_credits = credits; 964269076Snp txsd++; 965269076Snp if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 966269076Snp toep->txsd_pidx = 0; 967269076Snp txsd = &toep->txsd[0]; 968269076Snp } 969269076Snp toep->txsd_avail--; 970269076Snp 971269076Snp t4_l2t_send(sc, wr, toep->l2te); 972269076Snp } while (m != NULL); 973269076Snp 974269076Snp /* Send a FIN if requested, but only if there's no more data to send */ 975269076Snp if (m == NULL && toep->flags & TPF_SEND_FIN) 976269076Snp close_conn(sc, toep); 977269076Snp} 978269076Snp 979237263Snpint 980237263Snpt4_tod_output(struct toedev *tod, struct tcpcb *tp) 981237263Snp{ 982237263Snp struct adapter *sc = tod->tod_softc; 983237263Snp#ifdef INVARIANTS 984237263Snp struct inpcb *inp = tp->t_inpcb; 985237263Snp#endif 986237263Snp struct toepcb *toep = tp->t_toe; 987237263Snp 988237263Snp INP_WLOCK_ASSERT(inp); 989237263Snp KASSERT((inp->inp_flags & INP_DROPPED) == 0, 990237263Snp ("%s: inp %p dropped.", __func__, inp)); 991237263Snp KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 992237263Snp 993255411Snp t4_push_frames(sc, toep, 0); 994237263Snp 995237263Snp return (0); 996237263Snp} 997237263Snp 998237263Snpint 999237263Snpt4_send_fin(struct toedev *tod, struct tcpcb *tp) 1000237263Snp{ 1001237263Snp struct adapter *sc = tod->tod_softc; 1002237263Snp#ifdef INVARIANTS 1003237263Snp struct inpcb *inp = tp->t_inpcb; 1004237263Snp#endif 1005237263Snp struct toepcb *toep = tp->t_toe; 1006237263Snp 1007237263Snp INP_WLOCK_ASSERT(inp); 1008237263Snp KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1009237263Snp ("%s: inp %p dropped.", __func__, inp)); 1010237263Snp KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1011237263Snp 1012239514Snp toep->flags |= TPF_SEND_FIN; 1013269076Snp if (tp->t_state >= TCPS_ESTABLISHED) { 1014269076Snp if (toep->ulp_mode == ULP_MODE_ISCSI) 1015269076Snp t4_ulp_push_frames(sc, toep, 0); 1016269076Snp else 1017269076Snp t4_push_frames(sc, toep, 0); 1018269076Snp } 1019237263Snp 1020237263Snp return (0); 1021237263Snp} 1022237263Snp 1023237263Snpint 1024237263Snpt4_send_rst(struct toedev *tod, struct tcpcb *tp) 1025237263Snp{ 1026237263Snp struct adapter *sc = tod->tod_softc; 1027237263Snp#if defined(INVARIANTS) 1028237263Snp struct inpcb *inp = tp->t_inpcb; 1029237263Snp#endif 1030237263Snp struct toepcb *toep = tp->t_toe; 1031237263Snp 1032237263Snp INP_WLOCK_ASSERT(inp); 1033237263Snp KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1034237263Snp ("%s: inp %p dropped.", __func__, inp)); 1035237263Snp KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1036237263Snp 1037237263Snp /* hmmmm */ 1038239514Snp KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 1039237263Snp ("%s: flowc for tid %u [%s] not sent already", 1040237263Snp __func__, toep->tid, tcpstates[tp->t_state])); 1041237263Snp 1042237263Snp send_reset(sc, toep, 0); 1043237263Snp return (0); 1044237263Snp} 1045237263Snp 1046237263Snp/* 1047237263Snp * Peer has sent us a FIN. 1048237263Snp */ 1049237263Snpstatic int 1050237263Snpdo_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1051237263Snp{ 1052237263Snp struct adapter *sc = iq->adapter; 1053237263Snp const struct cpl_peer_close *cpl = (const void *)(rss + 1); 1054237263Snp unsigned int tid = GET_TID(cpl); 1055237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1056237263Snp struct inpcb *inp = toep->inp; 1057237263Snp struct tcpcb *tp = NULL; 1058239344Snp struct socket *so; 1059239344Snp struct sockbuf *sb; 1060237263Snp#ifdef INVARIANTS 1061237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1062237263Snp#endif 1063237263Snp 1064237263Snp KASSERT(opcode == CPL_PEER_CLOSE, 1065237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1066237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1067243680Snp 1068243680Snp if (__predict_false(toep->flags & TPF_SYNQE)) { 1069243680Snp#ifdef INVARIANTS 1070243680Snp struct synq_entry *synqe = (void *)toep; 1071243680Snp 1072243680Snp INP_WLOCK(synqe->lctx->inp); 1073243680Snp if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1074243680Snp KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1075243680Snp ("%s: listen socket closed but tid %u not aborted.", 1076243680Snp __func__, tid)); 1077243680Snp } else { 1078243680Snp /* 1079243680Snp * do_pass_accept_req is still running and will 1080243680Snp * eventually take care of this tid. 1081243680Snp */ 1082243680Snp } 1083243680Snp INP_WUNLOCK(synqe->lctx->inp); 1084243680Snp#endif 1085243680Snp CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1086243680Snp toep, toep->flags); 1087243680Snp return (0); 1088243680Snp } 1089243680Snp 1090237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1091237263Snp 1092237263Snp INP_INFO_WLOCK(&V_tcbinfo); 1093237263Snp INP_WLOCK(inp); 1094237263Snp tp = intotcpcb(inp); 1095237263Snp 1096237263Snp CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, 1097237263Snp tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); 1098237263Snp 1099239514Snp if (toep->flags & TPF_ABORT_SHUTDOWN) 1100237263Snp goto done; 1101237263Snp 1102239344Snp tp->rcv_nxt++; /* FIN */ 1103239344Snp 1104237263Snp so = inp->inp_socket; 1105239344Snp sb = &so->so_rcv; 1106239344Snp SOCKBUF_LOCK(sb); 1107239344Snp if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) { 1108250218Snp m = get_ddp_mbuf(be32toh(cpl->rcv_nxt) - tp->rcv_nxt); 1109239344Snp tp->rcv_nxt = be32toh(cpl->rcv_nxt); 1110239344Snp toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE); 1111239344Snp 1112274421Sglebius KASSERT(toep->sb_cc >= sbused(sb), 1113239344Snp ("%s: sb %p has more data (%d) than last time (%d).", 1114274421Sglebius __func__, sb, sbused(sb), toep->sb_cc)); 1115274421Sglebius toep->rx_credits += toep->sb_cc - sbused(sb); 1116239344Snp#ifdef USE_DDP_RX_FLOW_CONTROL 1117239344Snp toep->rx_credits -= m->m_len; /* adjust for F_RX_FC_DDP */ 1118239344Snp#endif 1119275329Sglebius sbappendstream_locked(sb, m, 0); 1120274421Sglebius toep->sb_cc = sbused(sb); 1121239344Snp } 1122239344Snp socantrcvmore_locked(so); /* unlocks the sockbuf */ 1123239344Snp 1124255005Snp if (toep->ulp_mode != ULP_MODE_RDMA) { 1125255005Snp KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), 1126255005Snp ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, 1127255005Snp be32toh(cpl->rcv_nxt))); 1128255005Snp } 1129237263Snp 1130237263Snp switch (tp->t_state) { 1131237263Snp case TCPS_SYN_RECEIVED: 1132237263Snp tp->t_starttime = ticks; 1133237263Snp /* FALLTHROUGH */ 1134237263Snp 1135237263Snp case TCPS_ESTABLISHED: 1136237263Snp tp->t_state = TCPS_CLOSE_WAIT; 1137237263Snp break; 1138237263Snp 1139237263Snp case TCPS_FIN_WAIT_1: 1140237263Snp tp->t_state = TCPS_CLOSING; 1141237263Snp break; 1142237263Snp 1143237263Snp case TCPS_FIN_WAIT_2: 1144237263Snp tcp_twstart(tp); 1145237263Snp INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1146237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1147237263Snp 1148237263Snp INP_WLOCK(inp); 1149237263Snp final_cpl_received(toep); 1150237263Snp return (0); 1151237263Snp 1152237263Snp default: 1153237263Snp log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", 1154237263Snp __func__, tid, tp->t_state); 1155237263Snp } 1156237263Snpdone: 1157237263Snp INP_WUNLOCK(inp); 1158237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1159237263Snp return (0); 1160237263Snp} 1161237263Snp 1162237263Snp/* 1163237263Snp * Peer has ACK'd our FIN. 1164237263Snp */ 1165237263Snpstatic int 1166237263Snpdo_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, 1167237263Snp struct mbuf *m) 1168237263Snp{ 1169237263Snp struct adapter *sc = iq->adapter; 1170237263Snp const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); 1171237263Snp unsigned int tid = GET_TID(cpl); 1172237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1173237263Snp struct inpcb *inp = toep->inp; 1174237263Snp struct tcpcb *tp = NULL; 1175237263Snp struct socket *so = NULL; 1176237263Snp#ifdef INVARIANTS 1177237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1178237263Snp#endif 1179237263Snp 1180237263Snp KASSERT(opcode == CPL_CLOSE_CON_RPL, 1181237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1182237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1183237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1184237263Snp 1185237263Snp INP_INFO_WLOCK(&V_tcbinfo); 1186237263Snp INP_WLOCK(inp); 1187237263Snp tp = intotcpcb(inp); 1188237263Snp 1189237263Snp CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", 1190237263Snp __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); 1191237263Snp 1192239514Snp if (toep->flags & TPF_ABORT_SHUTDOWN) 1193237263Snp goto done; 1194237263Snp 1195237263Snp so = inp->inp_socket; 1196237263Snp tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ 1197237263Snp 1198237263Snp switch (tp->t_state) { 1199237263Snp case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ 1200237263Snp tcp_twstart(tp); 1201237263Snprelease: 1202237263Snp INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1203237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1204237263Snp 1205237263Snp INP_WLOCK(inp); 1206237263Snp final_cpl_received(toep); /* no more CPLs expected */ 1207237263Snp 1208237263Snp return (0); 1209237263Snp case TCPS_LAST_ACK: 1210237263Snp if (tcp_close(tp)) 1211237263Snp INP_WUNLOCK(inp); 1212237263Snp goto release; 1213237263Snp 1214237263Snp case TCPS_FIN_WAIT_1: 1215237263Snp if (so->so_rcv.sb_state & SBS_CANTRCVMORE) 1216237263Snp soisdisconnected(so); 1217237263Snp tp->t_state = TCPS_FIN_WAIT_2; 1218237263Snp break; 1219237263Snp 1220237263Snp default: 1221237263Snp log(LOG_ERR, 1222237263Snp "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", 1223237263Snp __func__, tid, tcpstates[tp->t_state]); 1224237263Snp } 1225237263Snpdone: 1226237263Snp INP_WUNLOCK(inp); 1227237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1228237263Snp return (0); 1229237263Snp} 1230237263Snp 1231237263Snpvoid 1232237263Snpsend_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid, 1233237263Snp int rst_status) 1234237263Snp{ 1235237263Snp struct wrqe *wr; 1236237263Snp struct cpl_abort_rpl *cpl; 1237237263Snp 1238237263Snp wr = alloc_wrqe(sizeof(*cpl), ofld_txq); 1239237263Snp if (wr == NULL) { 1240237263Snp /* XXX */ 1241237263Snp panic("%s: allocation failure.", __func__); 1242237263Snp } 1243237263Snp cpl = wrtod(wr); 1244237263Snp 1245237263Snp INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); 1246237263Snp cpl->cmd = rst_status; 1247237263Snp 1248237263Snp t4_wrq_tx(sc, wr); 1249237263Snp} 1250237263Snp 1251237263Snpstatic int 1252237263Snpabort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) 1253237263Snp{ 1254237263Snp switch (abort_reason) { 1255237263Snp case CPL_ERR_BAD_SYN: 1256237263Snp case CPL_ERR_CONN_RESET: 1257237263Snp return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 1258237263Snp case CPL_ERR_XMIT_TIMEDOUT: 1259237263Snp case CPL_ERR_PERSIST_TIMEDOUT: 1260237263Snp case CPL_ERR_FINWAIT2_TIMEDOUT: 1261237263Snp case CPL_ERR_KEEPALIVE_TIMEDOUT: 1262237263Snp return (ETIMEDOUT); 1263237263Snp default: 1264237263Snp return (EIO); 1265237263Snp } 1266237263Snp} 1267237263Snp 1268269076Snpint 1269269076Snpcpl_not_handled(struct sge_iq *, const struct rss_header *, struct mbuf *); 1270237263Snp/* 1271269076Snp * tom_cpl_iscsi_callback - 1272269076Snp * iscsi and tom would share the following cpl messages, so when any of these 1273269076Snp * message is received, after tom is done with processing it, the messages 1274269076Snp * needs to be forwarded to iscsi for further processing: 1275269076Snp * - CPL_SET_TCB_RPL 1276269076Snp * - CPL_RX_DATA_DDP 1277269076Snp */ 1278269076Snpvoid (*tom_cpl_iscsi_callback)(struct tom_data *, struct socket *, void *, 1279269076Snp unsigned int); 1280269076Snp 1281269076Snpstruct mbuf *(*tom_queue_iscsi_callback)(struct socket *, unsigned int, int *); 1282269076Snp/* 1283269076Snp * Check if the handler function is set for a given CPL 1284269076Snp * return 0 if the function is NULL or cpl_not_handled, 1 otherwise. 1285269076Snp */ 1286269076Snpint 1287269076Snpt4tom_cpl_handler_registered(struct adapter *sc, unsigned int opcode) 1288269076Snp{ 1289269076Snp 1290269076Snp MPASS(opcode < nitems(sc->cpl_handler)); 1291269076Snp 1292269076Snp return (sc->cpl_handler[opcode] && 1293269076Snp sc->cpl_handler[opcode] != cpl_not_handled); 1294269076Snp} 1295269076Snp 1296269076Snp/* 1297269076Snp * set the tom_cpl_iscsi_callback function, this function should be used 1298269076Snp * whenever both toe and iscsi need to process the same cpl msg. 1299269076Snp */ 1300269076Snpvoid 1301269076Snpt4tom_register_cpl_iscsi_callback(void (*fp)(struct tom_data *, struct socket *, 1302269076Snp void *, unsigned int)) 1303269076Snp{ 1304269076Snp 1305269076Snp tom_cpl_iscsi_callback = fp; 1306269076Snp} 1307269076Snp 1308269076Snpvoid 1309269076Snpt4tom_register_queue_iscsi_callback(struct mbuf *(*fp)(struct socket *, 1310269076Snp unsigned int, int *qlen)) 1311269076Snp{ 1312269076Snp 1313269076Snp tom_queue_iscsi_callback = fp; 1314269076Snp} 1315269076Snp 1316269076Snpint 1317269076Snpt4_cpl_iscsi_callback(struct tom_data *td, struct toepcb *toep, void *m, 1318269076Snp unsigned int opcode) 1319269076Snp{ 1320269076Snp struct socket *so; 1321269076Snp 1322269076Snp if (opcode == CPL_FW4_ACK) 1323269076Snp so = toep->inp->inp_socket; 1324269076Snp else { 1325269076Snp INP_WLOCK(toep->inp); 1326269076Snp so = toep->inp->inp_socket; 1327269076Snp INP_WUNLOCK(toep->inp); 1328269076Snp } 1329269076Snp 1330269076Snp if (tom_cpl_iscsi_callback && so) { 1331269076Snp if (toep->ulp_mode == ULP_MODE_ISCSI) { 1332269076Snp tom_cpl_iscsi_callback(td, so, m, opcode); 1333269076Snp return (0); 1334269076Snp } 1335269076Snp } 1336269076Snp 1337269076Snp return (1); 1338269076Snp} 1339269076Snp 1340269076Snpstruct mbuf * 1341269076Snpt4_queue_iscsi_callback(struct socket *so, struct toepcb *toep, 1342269076Snp unsigned int cmd, int *qlen) 1343269076Snp{ 1344269076Snp 1345269076Snp if (tom_queue_iscsi_callback && so) { 1346269076Snp if (toep->ulp_mode == ULP_MODE_ISCSI) 1347269076Snp return (tom_queue_iscsi_callback(so, cmd, qlen)); 1348269076Snp } 1349269076Snp 1350269076Snp return (NULL); 1351269076Snp} 1352269076Snp 1353269076Snp/* 1354237263Snp * TCP RST from the peer, timeout, or some other such critical error. 1355237263Snp */ 1356237263Snpstatic int 1357237263Snpdo_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1358237263Snp{ 1359237263Snp struct adapter *sc = iq->adapter; 1360237263Snp const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); 1361237263Snp unsigned int tid = GET_TID(cpl); 1362237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1363237263Snp struct sge_wrq *ofld_txq = toep->ofld_txq; 1364237263Snp struct inpcb *inp; 1365237263Snp struct tcpcb *tp; 1366237263Snp#ifdef INVARIANTS 1367237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1368237263Snp#endif 1369237263Snp 1370237263Snp KASSERT(opcode == CPL_ABORT_REQ_RSS, 1371237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1372237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1373237263Snp 1374239514Snp if (toep->flags & TPF_SYNQE) 1375237263Snp return (do_abort_req_synqe(iq, rss, m)); 1376237263Snp 1377237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1378237263Snp 1379245935Snp if (negative_advice(cpl->status)) { 1380237263Snp CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", 1381237263Snp __func__, cpl->status, tid, toep->flags); 1382237263Snp return (0); /* Ignore negative advice */ 1383237263Snp } 1384237263Snp 1385237263Snp inp = toep->inp; 1386237263Snp INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */ 1387237263Snp INP_WLOCK(inp); 1388237263Snp 1389237263Snp tp = intotcpcb(inp); 1390237263Snp 1391237263Snp CTR6(KTR_CXGBE, 1392237263Snp "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", 1393239528Snp __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, 1394239528Snp inp->inp_flags, cpl->status); 1395237263Snp 1396237263Snp /* 1397237263Snp * If we'd initiated an abort earlier the reply to it is responsible for 1398237263Snp * cleaning up resources. Otherwise we tear everything down right here 1399237263Snp * right now. We owe the T4 a CPL_ABORT_RPL no matter what. 1400237263Snp */ 1401239514Snp if (toep->flags & TPF_ABORT_SHUTDOWN) { 1402237263Snp INP_WUNLOCK(inp); 1403237263Snp goto done; 1404237263Snp } 1405239514Snp toep->flags |= TPF_ABORT_SHUTDOWN; 1406237263Snp 1407242671Snp if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 1408242671Snp struct socket *so = inp->inp_socket; 1409237263Snp 1410242671Snp if (so != NULL) 1411242671Snp so_error_set(so, abort_status_to_errno(tp, 1412242671Snp cpl->status)); 1413242671Snp tp = tcp_close(tp); 1414242671Snp if (tp == NULL) 1415242671Snp INP_WLOCK(inp); /* re-acquire */ 1416242671Snp } 1417242671Snp 1418237263Snp final_cpl_received(toep); 1419237263Snpdone: 1420237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1421237263Snp send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); 1422237263Snp return (0); 1423237263Snp} 1424237263Snp 1425237263Snp/* 1426237263Snp * Reply to the CPL_ABORT_REQ (send_reset) 1427237263Snp */ 1428237263Snpstatic int 1429237263Snpdo_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1430237263Snp{ 1431237263Snp struct adapter *sc = iq->adapter; 1432237263Snp const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); 1433237263Snp unsigned int tid = GET_TID(cpl); 1434237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1435237263Snp struct inpcb *inp = toep->inp; 1436237263Snp#ifdef INVARIANTS 1437237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1438237263Snp#endif 1439237263Snp 1440237263Snp KASSERT(opcode == CPL_ABORT_RPL_RSS, 1441237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1442237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1443237263Snp 1444239514Snp if (toep->flags & TPF_SYNQE) 1445237263Snp return (do_abort_rpl_synqe(iq, rss, m)); 1446237263Snp 1447237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1448237263Snp 1449237263Snp CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", 1450237263Snp __func__, tid, toep, inp, cpl->status); 1451237263Snp 1452239514Snp KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1453237263Snp ("%s: wasn't expecting abort reply", __func__)); 1454237263Snp 1455237263Snp INP_WLOCK(inp); 1456237263Snp final_cpl_received(toep); 1457237263Snp 1458237263Snp return (0); 1459237263Snp} 1460237263Snp 1461237263Snpstatic int 1462237263Snpdo_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1463237263Snp{ 1464237263Snp struct adapter *sc = iq->adapter; 1465237263Snp const struct cpl_rx_data *cpl = mtod(m, const void *); 1466237263Snp unsigned int tid = GET_TID(cpl); 1467237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1468237263Snp struct inpcb *inp = toep->inp; 1469237263Snp struct tcpcb *tp; 1470237263Snp struct socket *so; 1471239344Snp struct sockbuf *sb; 1472239344Snp int len; 1473243681Snp uint32_t ddp_placed = 0; 1474237263Snp 1475239514Snp if (__predict_false(toep->flags & TPF_SYNQE)) { 1476243680Snp#ifdef INVARIANTS 1477243680Snp struct synq_entry *synqe = (void *)toep; 1478243680Snp 1479243680Snp INP_WLOCK(synqe->lctx->inp); 1480243680Snp if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1481243680Snp KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1482243680Snp ("%s: listen socket closed but tid %u not aborted.", 1483243680Snp __func__, tid)); 1484243680Snp } else { 1485243680Snp /* 1486243680Snp * do_pass_accept_req is still running and will 1487243680Snp * eventually take care of this tid. 1488243680Snp */ 1489243680Snp } 1490243680Snp INP_WUNLOCK(synqe->lctx->inp); 1491243680Snp#endif 1492243680Snp CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1493243680Snp toep, toep->flags); 1494237263Snp m_freem(m); 1495237263Snp return (0); 1496237263Snp } 1497237263Snp 1498237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1499237263Snp 1500237263Snp /* strip off CPL header */ 1501237263Snp m_adj(m, sizeof(*cpl)); 1502239344Snp len = m->m_pkthdr.len; 1503237263Snp 1504237263Snp INP_WLOCK(inp); 1505237263Snp if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { 1506237263Snp CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", 1507239344Snp __func__, tid, len, inp->inp_flags); 1508237263Snp INP_WUNLOCK(inp); 1509237263Snp m_freem(m); 1510237263Snp return (0); 1511237263Snp } 1512237263Snp 1513237263Snp tp = intotcpcb(inp); 1514237263Snp 1515243681Snp if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) 1516243681Snp ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; 1517237263Snp 1518239344Snp tp->rcv_nxt += len; 1519239344Snp KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__)); 1520239344Snp tp->rcv_wnd -= len; 1521237263Snp tp->t_rcvtime = ticks; 1522237263Snp 1523237263Snp so = inp_inpcbtosocket(inp); 1524239344Snp sb = &so->so_rcv; 1525239344Snp SOCKBUF_LOCK(sb); 1526237263Snp 1527239344Snp if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { 1528237263Snp CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", 1529239344Snp __func__, tid, len); 1530237263Snp m_freem(m); 1531239344Snp SOCKBUF_UNLOCK(sb); 1532237263Snp INP_WUNLOCK(inp); 1533237263Snp 1534237263Snp INP_INFO_WLOCK(&V_tcbinfo); 1535237263Snp INP_WLOCK(inp); 1536237263Snp tp = tcp_drop(tp, ECONNRESET); 1537237263Snp if (tp) 1538237263Snp INP_WUNLOCK(inp); 1539237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1540237263Snp 1541237263Snp return (0); 1542237263Snp } 1543237263Snp 1544237263Snp /* receive buffer autosize */ 1545239344Snp if (sb->sb_flags & SB_AUTOSIZE && 1546237263Snp V_tcp_do_autorcvbuf && 1547239344Snp sb->sb_hiwat < V_tcp_autorcvbuf_max && 1548239344Snp len > (sbspace(sb) / 8 * 7)) { 1549239344Snp unsigned int hiwat = sb->sb_hiwat; 1550237263Snp unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, 1551237263Snp V_tcp_autorcvbuf_max); 1552237263Snp 1553239344Snp if (!sbreserve_locked(sb, newsize, so, NULL)) 1554239344Snp sb->sb_flags &= ~SB_AUTOSIZE; 1555237263Snp else 1556237263Snp toep->rx_credits += newsize - hiwat; 1557237263Snp } 1558239344Snp 1559239344Snp if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1560239344Snp int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off; 1561239344Snp 1562239344Snp if (changed) { 1563243681Snp if (toep->ddp_flags & DDP_SC_REQ) 1564243681Snp toep->ddp_flags ^= DDP_ON | DDP_SC_REQ; 1565243681Snp else { 1566243681Snp KASSERT(cpl->ddp_off == 1, 1567243681Snp ("%s: DDP switched on by itself.", 1568243681Snp __func__)); 1569243681Snp 1570243681Snp /* Fell out of DDP mode */ 1571243681Snp toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE | 1572243681Snp DDP_BUF1_ACTIVE); 1573243681Snp 1574243681Snp if (ddp_placed) 1575243681Snp insert_ddp_data(toep, ddp_placed); 1576239344Snp } 1577239344Snp } 1578239344Snp 1579239344Snp if ((toep->ddp_flags & DDP_OK) == 0 && 1580239344Snp time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) { 1581239344Snp toep->ddp_score = DDP_LOW_SCORE; 1582239344Snp toep->ddp_flags |= DDP_OK; 1583239344Snp CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u", 1584239344Snp __func__, tid, time_uptime); 1585239344Snp } 1586239344Snp 1587239344Snp if (toep->ddp_flags & DDP_ON) { 1588239344Snp 1589239344Snp /* 1590239344Snp * CPL_RX_DATA with DDP on can only be an indicate. Ask 1591239344Snp * soreceive to post a buffer or disable DDP. The 1592239344Snp * payload that arrived in this indicate is appended to 1593239344Snp * the socket buffer as usual. 1594239344Snp */ 1595239344Snp 1596239344Snp#if 0 1597239344Snp CTR5(KTR_CXGBE, 1598239344Snp "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)", 1599239344Snp __func__, tid, toep->flags, be32toh(cpl->seq), len); 1600239344Snp#endif 1601239344Snp sb->sb_flags |= SB_DDP_INDICATE; 1602239344Snp } else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK && 1603239344Snp tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) { 1604239344Snp 1605239344Snp /* 1606239344Snp * DDP allowed but isn't on (and a request to switch it 1607239344Snp * on isn't pending either), and conditions are ripe for 1608239344Snp * it to work. Switch it on. 1609239344Snp */ 1610239344Snp 1611239344Snp enable_ddp(sc, toep); 1612239344Snp } 1613239344Snp } 1614239344Snp 1615274421Sglebius KASSERT(toep->sb_cc >= sbused(sb), 1616239344Snp ("%s: sb %p has more data (%d) than last time (%d).", 1617274421Sglebius __func__, sb, sbused(sb), toep->sb_cc)); 1618274421Sglebius toep->rx_credits += toep->sb_cc - sbused(sb); 1619275329Sglebius sbappendstream_locked(sb, m, 0); 1620274421Sglebius toep->sb_cc = sbused(sb); 1621237263Snp sorwakeup_locked(so); 1622239344Snp SOCKBUF_UNLOCK_ASSERT(sb); 1623237263Snp 1624237263Snp INP_WUNLOCK(inp); 1625237263Snp return (0); 1626237263Snp} 1627237263Snp 1628237263Snp#define S_CPL_FW4_ACK_OPCODE 24 1629237263Snp#define M_CPL_FW4_ACK_OPCODE 0xff 1630237263Snp#define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE) 1631237263Snp#define G_CPL_FW4_ACK_OPCODE(x) \ 1632237263Snp (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE) 1633259382Snp 1634237263Snp#define S_CPL_FW4_ACK_FLOWID 0 1635237263Snp#define M_CPL_FW4_ACK_FLOWID 0xffffff 1636237263Snp#define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID) 1637237263Snp#define G_CPL_FW4_ACK_FLOWID(x) \ 1638237263Snp (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID) 1639259382Snp 1640237263Snp#define S_CPL_FW4_ACK_CR 24 1641237263Snp#define M_CPL_FW4_ACK_CR 0xff 1642237263Snp#define V_CPL_FW4_ACK_CR(x) ((x) << S_CPL_FW4_ACK_CR) 1643237263Snp#define G_CPL_FW4_ACK_CR(x) (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR) 1644259382Snp 1645237263Snp#define S_CPL_FW4_ACK_SEQVAL 0 1646237263Snp#define M_CPL_FW4_ACK_SEQVAL 0x1 1647237263Snp#define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL) 1648237263Snp#define G_CPL_FW4_ACK_SEQVAL(x) \ 1649237263Snp (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL) 1650237263Snp#define F_CPL_FW4_ACK_SEQVAL V_CPL_FW4_ACK_SEQVAL(1U) 1651237263Snp 1652237263Snpstatic int 1653237263Snpdo_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1654237263Snp{ 1655237263Snp struct adapter *sc = iq->adapter; 1656237263Snp const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 1657237263Snp unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 1658237263Snp struct toepcb *toep = lookup_tid(sc, tid); 1659237263Snp struct inpcb *inp; 1660237263Snp struct tcpcb *tp; 1661237263Snp struct socket *so; 1662237263Snp uint8_t credits = cpl->credits; 1663237263Snp struct ofld_tx_sdesc *txsd; 1664237263Snp int plen; 1665237263Snp#ifdef INVARIANTS 1666237263Snp unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); 1667237263Snp#endif 1668237263Snp 1669237263Snp /* 1670237263Snp * Very unusual case: we'd sent a flowc + abort_req for a synq entry and 1671237263Snp * now this comes back carrying the credits for the flowc. 1672237263Snp */ 1673239514Snp if (__predict_false(toep->flags & TPF_SYNQE)) { 1674239514Snp KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1675237263Snp ("%s: credits for a synq entry %p", __func__, toep)); 1676237263Snp return (0); 1677237263Snp } 1678237263Snp 1679237263Snp inp = toep->inp; 1680237263Snp 1681237263Snp KASSERT(opcode == CPL_FW4_ACK, 1682237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1683237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1684237263Snp KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1685237263Snp 1686237263Snp INP_WLOCK(inp); 1687237263Snp 1688239514Snp if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) { 1689237263Snp INP_WUNLOCK(inp); 1690237263Snp return (0); 1691237263Snp } 1692237263Snp 1693237263Snp KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, 1694237263Snp ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); 1695237263Snp 1696237263Snp tp = intotcpcb(inp); 1697237263Snp 1698237436Snp if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { 1699237263Snp tcp_seq snd_una = be32toh(cpl->snd_una); 1700237263Snp 1701237263Snp#ifdef INVARIANTS 1702237263Snp if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 1703237263Snp log(LOG_ERR, 1704237263Snp "%s: unexpected seq# %x for TID %u, snd_una %x\n", 1705237263Snp __func__, snd_una, toep->tid, tp->snd_una); 1706237263Snp } 1707237263Snp#endif 1708237263Snp 1709237263Snp if (tp->snd_una != snd_una) { 1710237263Snp tp->snd_una = snd_una; 1711237263Snp tp->ts_recent_age = tcp_ts_getticks(); 1712237263Snp } 1713237263Snp } 1714237263Snp 1715237263Snp so = inp->inp_socket; 1716237263Snp txsd = &toep->txsd[toep->txsd_cidx]; 1717237263Snp plen = 0; 1718237263Snp while (credits) { 1719237263Snp KASSERT(credits >= txsd->tx_credits, 1720237263Snp ("%s: too many (or partial) credits", __func__)); 1721237263Snp credits -= txsd->tx_credits; 1722237263Snp toep->tx_credits += txsd->tx_credits; 1723237263Snp plen += txsd->plen; 1724237263Snp txsd++; 1725237263Snp toep->txsd_avail++; 1726237263Snp KASSERT(toep->txsd_avail <= toep->txsd_total, 1727237263Snp ("%s: txsd avail > total", __func__)); 1728237263Snp if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { 1729237263Snp txsd = &toep->txsd[0]; 1730237263Snp toep->txsd_cidx = 0; 1731237263Snp } 1732237263Snp } 1733237263Snp 1734255411Snp if (toep->tx_credits == toep->tx_total) { 1735255411Snp toep->tx_nocompl = 0; 1736255411Snp toep->plen_nocompl = 0; 1737255411Snp } 1738255411Snp 1739255411Snp if (toep->flags & TPF_TX_SUSPENDED && 1740255411Snp toep->tx_credits >= toep->tx_total / 4) { 1741255411Snp toep->flags &= ~TPF_TX_SUSPENDED; 1742269076Snp if (toep->ulp_mode == ULP_MODE_ISCSI) 1743269076Snp t4_ulp_push_frames(sc, toep, plen); 1744269076Snp else 1745269076Snp t4_push_frames(sc, toep, plen); 1746255411Snp } else if (plen > 0) { 1747237263Snp struct sockbuf *sb = &so->so_snd; 1748237263Snp 1749269076Snp if (toep->ulp_mode == ULP_MODE_ISCSI) 1750269076Snp t4_cpl_iscsi_callback(toep->td, toep, &plen, 1751269076Snp CPL_FW4_ACK); 1752269076Snp else { 1753269076Snp SOCKBUF_LOCK(sb); 1754269076Snp sbdrop_locked(sb, plen); 1755269076Snp sowwakeup_locked(so); 1756269076Snp SOCKBUF_UNLOCK_ASSERT(sb); 1757269076Snp } 1758237263Snp } 1759237263Snp 1760237263Snp INP_WUNLOCK(inp); 1761237263Snp 1762237263Snp return (0); 1763237263Snp} 1764237263Snp 1765239338Snpstatic int 1766239338Snpdo_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1767239338Snp{ 1768239338Snp struct adapter *sc = iq->adapter; 1769239338Snp const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 1770239338Snp unsigned int tid = GET_TID(cpl); 1771239338Snp#ifdef INVARIANTS 1772239338Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1773239338Snp#endif 1774239338Snp 1775239338Snp KASSERT(opcode == CPL_SET_TCB_RPL, 1776239338Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1777239338Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1778239338Snp 1779259382Snp if (is_ftid(sc, tid)) 1780239338Snp return (t4_filter_rpl(iq, rss, m)); /* TCB is a filter */ 1781269076Snp else { 1782269076Snp struct toepcb *toep = lookup_tid(sc, tid); 1783239338Snp 1784269076Snp t4_cpl_iscsi_callback(toep->td, toep, m, CPL_SET_TCB_RPL); 1785269076Snp return (0); 1786269076Snp } 1787269076Snp 1788239338Snp CXGBE_UNIMPLEMENTED(__func__); 1789239338Snp} 1790239338Snp 1791237263Snpvoid 1792251638Snpt4_set_tcb_field(struct adapter *sc, struct toepcb *toep, int ctrl, 1793251638Snp uint16_t word, uint64_t mask, uint64_t val) 1794239338Snp{ 1795239338Snp struct wrqe *wr; 1796239338Snp struct cpl_set_tcb_field *req; 1797239338Snp 1798251638Snp wr = alloc_wrqe(sizeof(*req), ctrl ? toep->ctrlq : toep->ofld_txq); 1799239338Snp if (wr == NULL) { 1800239338Snp /* XXX */ 1801239338Snp panic("%s: allocation failure.", __func__); 1802239338Snp } 1803239338Snp req = wrtod(wr); 1804239338Snp 1805239338Snp INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); 1806239338Snp req->reply_ctrl = htobe16(V_NO_REPLY(1) | 1807239338Snp V_QUEUENO(toep->ofld_rxq->iq.abs_id)); 1808239338Snp req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); 1809239338Snp req->mask = htobe64(mask); 1810239338Snp req->val = htobe64(val); 1811239338Snp 1812239338Snp t4_wrq_tx(sc, wr); 1813239338Snp} 1814239338Snp 1815239338Snpvoid 1816237263Snpt4_init_cpl_io_handlers(struct adapter *sc) 1817237263Snp{ 1818237263Snp 1819237263Snp t4_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close); 1820237263Snp t4_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl); 1821237263Snp t4_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req); 1822237263Snp t4_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl); 1823237263Snp t4_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data); 1824237263Snp t4_register_cpl_handler(sc, CPL_FW4_ACK, do_fw4_ack); 1825239338Snp t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl); 1826237263Snp} 1827239338Snp 1828239338Snpvoid 1829239338Snpt4_uninit_cpl_io_handlers(struct adapter *sc) 1830239338Snp{ 1831239338Snp 1832239338Snp t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl); 1833239338Snp} 1834237263Snp#endif 1835