t4_cpl_io.c revision 300895
1/*-
2 * Copyright (c) 2012, 2015 Chelsio Communications, Inc.
3 * All rights reserved.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/dev/cxgbe/tom/t4_cpl_io.c 300895 2016-05-28 00:38:17Z np $");
30
31#include "opt_inet.h"
32
33#ifdef TCP_OFFLOAD
34#include <sys/param.h>
35#include <sys/types.h>
36#include <sys/kernel.h>
37#include <sys/ktr.h>
38#include <sys/module.h>
39#include <sys/protosw.h>
40#include <sys/domain.h>
41#include <sys/socket.h>
42#include <sys/socketvar.h>
43#include <sys/sglist.h>
44#include <netinet/in.h>
45#include <netinet/in_pcb.h>
46#include <netinet/ip.h>
47#include <netinet/ip6.h>
48#define TCPSTATES
49#include <netinet/tcp_fsm.h>
50#include <netinet/tcp_seq.h>
51#include <netinet/tcp_var.h>
52#include <netinet/toecore.h>
53
54#include "common/common.h"
55#include "common/t4_msg.h"
56#include "common/t4_regs.h"
57#include "common/t4_tcb.h"
58#include "tom/t4_tom_l2t.h"
59#include "tom/t4_tom.h"
60
61VNET_DECLARE(int, tcp_do_autosndbuf);
62#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
63VNET_DECLARE(int, tcp_autosndbuf_inc);
64#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
65VNET_DECLARE(int, tcp_autosndbuf_max);
66#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
67VNET_DECLARE(int, tcp_do_autorcvbuf);
68#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
69VNET_DECLARE(int, tcp_autorcvbuf_inc);
70#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc)
71VNET_DECLARE(int, tcp_autorcvbuf_max);
72#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
73
74void
75send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp)
76{
77	struct wrqe *wr;
78	struct fw_flowc_wr *flowc;
79	unsigned int nparams = ftxp ? 8 : 6, flowclen;
80	struct vi_info *vi = toep->vi;
81	struct port_info *pi = vi->pi;
82	struct adapter *sc = pi->adapter;
83	unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN;
84	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
85
86	KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT),
87	    ("%s: flowc for tid %u sent already", __func__, toep->tid));
88
89	flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
90
91	wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq);
92	if (wr == NULL) {
93		/* XXX */
94		panic("%s: allocation failure.", __func__);
95	}
96	flowc = wrtod(wr);
97	memset(flowc, 0, wr->wr_len);
98
99	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
100	    V_FW_FLOWC_WR_NPARAMS(nparams));
101	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
102	    V_FW_WR_FLOWID(toep->tid));
103
104	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
105	flowc->mnemval[0].val = htobe32(pfvf);
106	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
107	flowc->mnemval[1].val = htobe32(pi->tx_chan);
108	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
109	flowc->mnemval[2].val = htobe32(pi->tx_chan);
110	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
111	flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id);
112	if (ftxp) {
113		uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf);
114
115		flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
116		flowc->mnemval[4].val = htobe32(ftxp->snd_nxt);
117		flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
118		flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt);
119		flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
120		flowc->mnemval[6].val = htobe32(sndbuf);
121		flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
122		flowc->mnemval[7].val = htobe32(ftxp->mss);
123
124		CTR6(KTR_CXGBE,
125		    "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x",
126		    __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt,
127		    ftxp->rcv_nxt);
128	} else {
129		flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
130		flowc->mnemval[4].val = htobe32(512);
131		flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
132		flowc->mnemval[5].val = htobe32(512);
133
134		CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid);
135	}
136
137	txsd->tx_credits = howmany(flowclen, 16);
138	txsd->plen = 0;
139	KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0,
140	    ("%s: not enough credits (%d)", __func__, toep->tx_credits));
141	toep->tx_credits -= txsd->tx_credits;
142	if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
143		toep->txsd_pidx = 0;
144	toep->txsd_avail--;
145
146	toep->flags |= TPF_FLOWC_WR_SENT;
147        t4_wrq_tx(sc, wr);
148}
149
150void
151send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt)
152{
153	struct wrqe *wr;
154	struct cpl_abort_req *req;
155	int tid = toep->tid;
156	struct inpcb *inp = toep->inp;
157	struct tcpcb *tp = intotcpcb(inp);	/* don't use if INP_DROPPED */
158
159	INP_WLOCK_ASSERT(inp);
160
161	CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s",
162	    __func__, toep->tid,
163	    inp->inp_flags & INP_DROPPED ? "inp dropped" :
164	    tcpstates[tp->t_state],
165	    toep->flags, inp->inp_flags,
166	    toep->flags & TPF_ABORT_SHUTDOWN ?
167	    " (abort already in progress)" : "");
168
169	if (toep->flags & TPF_ABORT_SHUTDOWN)
170		return;	/* abort already in progress */
171
172	toep->flags |= TPF_ABORT_SHUTDOWN;
173
174	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
175	    ("%s: flowc_wr not sent for tid %d.", __func__, tid));
176
177	wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
178	if (wr == NULL) {
179		/* XXX */
180		panic("%s: allocation failure.", __func__);
181	}
182	req = wrtod(wr);
183
184	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid);
185	if (inp->inp_flags & INP_DROPPED)
186		req->rsvd0 = htobe32(snd_nxt);
187	else
188		req->rsvd0 = htobe32(tp->snd_nxt);
189	req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT);
190	req->cmd = CPL_ABORT_SEND_RST;
191
192	/*
193	 * XXX: What's the correct way to tell that the inp hasn't been detached
194	 * from its socket?  Should I even be flushing the snd buffer here?
195	 */
196	if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
197		struct socket *so = inp->inp_socket;
198
199		if (so != NULL)	/* because I'm not sure.  See comment above */
200			sbflush(&so->so_snd);
201	}
202
203	t4_l2t_send(sc, wr, toep->l2te);
204}
205
206/*
207 * Called when a connection is established to translate the TCP options
208 * reported by HW to FreeBSD's native format.
209 */
210static void
211assign_rxopt(struct tcpcb *tp, unsigned int opt)
212{
213	struct toepcb *toep = tp->t_toe;
214	struct inpcb *inp = tp->t_inpcb;
215	struct adapter *sc = td_adapter(toep->td);
216	int n;
217
218	INP_LOCK_ASSERT(inp);
219
220	if (inp->inp_inc.inc_flags & INC_ISIPV6)
221		n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
222	else
223		n = sizeof(struct ip) + sizeof(struct tcphdr);
224	tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(opt)] - n;
225
226	CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid,
227	    G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]);
228
229	if (G_TCPOPT_TSTAMP(opt)) {
230		tp->t_flags |= TF_RCVD_TSTMP;	/* timestamps ok */
231		tp->ts_recent = 0;		/* hmmm */
232		tp->ts_recent_age = tcp_ts_getticks();
233	}
234
235	if (G_TCPOPT_SACK(opt))
236		tp->t_flags |= TF_SACK_PERMIT;	/* should already be set */
237	else
238		tp->t_flags &= ~TF_SACK_PERMIT;	/* sack disallowed by peer */
239
240	if (G_TCPOPT_WSCALE_OK(opt))
241		tp->t_flags |= TF_RCVD_SCALE;
242
243	/* Doing window scaling? */
244	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
245	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
246		tp->rcv_scale = tp->request_r_scale;
247		tp->snd_scale = G_TCPOPT_SND_WSCALE(opt);
248	}
249}
250
251/*
252 * Completes some final bits of initialization for just established connections
253 * and changes their state to TCPS_ESTABLISHED.
254 *
255 * The ISNs are from after the exchange of SYNs.  i.e., the true ISN + 1.
256 */
257void
258make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn,
259    uint16_t opt)
260{
261	struct inpcb *inp = toep->inp;
262	struct socket *so = inp->inp_socket;
263	struct tcpcb *tp = intotcpcb(inp);
264	long bufsize;
265	uint32_t iss = be32toh(snd_isn) - 1;	/* true ISS */
266	uint32_t irs = be32toh(rcv_isn) - 1;	/* true IRS */
267	uint16_t tcpopt = be16toh(opt);
268	struct flowc_tx_params ftxp;
269
270	CURVNET_SET(so->so_vnet);
271	INP_WLOCK_ASSERT(inp);
272	KASSERT(tp->t_state == TCPS_SYN_SENT ||
273	    tp->t_state == TCPS_SYN_RECEIVED,
274	    ("%s: TCP state %s", __func__, tcpstates[tp->t_state]));
275
276	CTR4(KTR_CXGBE, "%s: tid %d, toep %p, inp %p",
277	    __func__, toep->tid, toep, inp);
278
279	tp->t_state = TCPS_ESTABLISHED;
280	tp->t_starttime = ticks;
281	TCPSTAT_INC(tcps_connects);
282
283	tp->irs = irs;
284	tcp_rcvseqinit(tp);
285	tp->rcv_wnd = toep->rx_credits << 10;
286	tp->rcv_adv += tp->rcv_wnd;
287	tp->last_ack_sent = tp->rcv_nxt;
288
289	/*
290	 * If we were unable to send all rx credits via opt0, save the remainder
291	 * in rx_credits so that they can be handed over with the next credit
292	 * update.
293	 */
294	SOCKBUF_LOCK(&so->so_rcv);
295	bufsize = select_rcv_wnd(so);
296	SOCKBUF_UNLOCK(&so->so_rcv);
297	toep->rx_credits = bufsize - tp->rcv_wnd;
298
299	tp->iss = iss;
300	tcp_sendseqinit(tp);
301	tp->snd_una = iss + 1;
302	tp->snd_nxt = iss + 1;
303	tp->snd_max = iss + 1;
304
305	assign_rxopt(tp, tcpopt);
306
307	SOCKBUF_LOCK(&so->so_snd);
308	if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf)
309		bufsize = V_tcp_autosndbuf_max;
310	else
311		bufsize = sbspace(&so->so_snd);
312	SOCKBUF_UNLOCK(&so->so_snd);
313
314	ftxp.snd_nxt = tp->snd_nxt;
315	ftxp.rcv_nxt = tp->rcv_nxt;
316	ftxp.snd_space = bufsize;
317	ftxp.mss = tp->t_maxseg;
318	send_flowc_wr(toep, &ftxp);
319
320	soisconnected(so);
321	CURVNET_RESTORE();
322}
323
324static int
325send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
326{
327	struct wrqe *wr;
328	struct cpl_rx_data_ack *req;
329	uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
330
331	KASSERT(credits >= 0, ("%s: %d credits", __func__, credits));
332
333	wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
334	if (wr == NULL)
335		return (0);
336	req = wrtod(wr);
337
338	INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid);
339	req->credit_dack = htobe32(dack | V_RX_CREDITS(credits));
340
341	t4_wrq_tx(sc, wr);
342	return (credits);
343}
344
345void
346t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp)
347{
348	struct adapter *sc = tod->tod_softc;
349	struct inpcb *inp = tp->t_inpcb;
350	struct socket *so = inp->inp_socket;
351	struct sockbuf *sb = &so->so_rcv;
352	struct toepcb *toep = tp->t_toe;
353	int credits;
354
355	INP_WLOCK_ASSERT(inp);
356
357	SOCKBUF_LOCK_ASSERT(sb);
358	KASSERT(toep->sb_cc >= sbused(sb),
359	    ("%s: sb %p has more data (%d) than last time (%d).",
360	    __func__, sb, sbused(sb), toep->sb_cc));
361
362	toep->rx_credits += toep->sb_cc - sbused(sb);
363	toep->sb_cc = sbused(sb);
364
365	if (toep->rx_credits > 0 &&
366	    (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 ||
367	    (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) ||
368	    toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) {
369
370		credits = send_rx_credits(sc, toep, toep->rx_credits);
371		toep->rx_credits -= credits;
372		tp->rcv_wnd += credits;
373		tp->rcv_adv += credits;
374	}
375}
376
377void
378t4_rcvd(struct toedev *tod, struct tcpcb *tp)
379{
380	struct inpcb *inp = tp->t_inpcb;
381	struct socket *so = inp->inp_socket;
382	struct sockbuf *sb = &so->so_rcv;
383
384	SOCKBUF_LOCK(sb);
385	t4_rcvd_locked(tod, tp);
386	SOCKBUF_UNLOCK(sb);
387}
388
389/*
390 * Close a connection by sending a CPL_CLOSE_CON_REQ message.
391 */
392static int
393close_conn(struct adapter *sc, struct toepcb *toep)
394{
395	struct wrqe *wr;
396	struct cpl_close_con_req *req;
397	unsigned int tid = toep->tid;
398
399	CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid,
400	    toep->flags & TPF_FIN_SENT ? ", IGNORED" : "");
401
402	if (toep->flags & TPF_FIN_SENT)
403		return (0);
404
405	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
406	    ("%s: flowc_wr not sent for tid %u.", __func__, tid));
407
408	wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
409	if (wr == NULL) {
410		/* XXX */
411		panic("%s: allocation failure.", __func__);
412	}
413	req = wrtod(wr);
414
415        req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) |
416	    V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr)));
417	req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) |
418	    V_FW_WR_FLOWID(tid));
419        req->wr.wr_lo = cpu_to_be64(0);
420        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
421	req->rsvd = 0;
422
423	toep->flags |= TPF_FIN_SENT;
424	toep->flags &= ~TPF_SEND_FIN;
425	t4_l2t_send(sc, wr, toep->l2te);
426
427	return (0);
428}
429
430#define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
431#define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16))
432
433/* Maximum amount of immediate data we could stuff in a WR */
434static inline int
435max_imm_payload(int tx_credits)
436{
437	const int n = 2;	/* Use only up to 2 desc for imm. data WR */
438
439	KASSERT(tx_credits >= 0 &&
440		tx_credits <= MAX_OFLD_TX_CREDITS,
441		("%s: %d credits", __func__, tx_credits));
442
443	if (tx_credits < MIN_OFLD_TX_CREDITS)
444		return (0);
445
446	if (tx_credits >= (n * EQ_ESIZE) / 16)
447		return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr));
448	else
449		return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr));
450}
451
452/* Maximum number of SGL entries we could stuff in a WR */
453static inline int
454max_dsgl_nsegs(int tx_credits)
455{
456	int nseg = 1;	/* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */
457	int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS;
458
459	KASSERT(tx_credits >= 0 &&
460		tx_credits <= MAX_OFLD_TX_CREDITS,
461		("%s: %d credits", __func__, tx_credits));
462
463	if (tx_credits < MIN_OFLD_TX_CREDITS)
464		return (0);
465
466	nseg += 2 * (sge_pair_credits * 16 / 24);
467	if ((sge_pair_credits * 16) % 24 == 16)
468		nseg++;
469
470	return (nseg);
471}
472
473static inline void
474write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen,
475    unsigned int plen, uint8_t credits, int shove, int ulp_submode, int txalign)
476{
477	struct fw_ofld_tx_data_wr *txwr = dst;
478
479	txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) |
480	    V_FW_WR_IMMDLEN(immdlen));
481	txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) |
482	    V_FW_WR_LEN16(credits));
483	txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(toep->ulp_mode) |
484	    V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove));
485	txwr->plen = htobe32(plen);
486
487	if (txalign > 0) {
488		struct tcpcb *tp = intotcpcb(toep->inp);
489
490		if (plen < 2 * tp->t_maxseg || is_10G_port(toep->vi->pi))
491			txwr->lsodisable_to_flags |=
492			    htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE);
493		else
494			txwr->lsodisable_to_flags |=
495			    htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD |
496				(tp->t_flags & TF_NODELAY ? 0 :
497				F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE));
498	}
499}
500
501/*
502 * Generate a DSGL from a starting mbuf.  The total number of segments and the
503 * maximum segments in any one mbuf are provided.
504 */
505static void
506write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n)
507{
508	struct mbuf *m;
509	struct ulptx_sgl *usgl = dst;
510	int i, j, rc;
511	struct sglist sg;
512	struct sglist_seg segs[n];
513
514	KASSERT(nsegs > 0, ("%s: nsegs 0", __func__));
515
516	sglist_init(&sg, n, segs);
517	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
518	    V_ULPTX_NSGE(nsegs));
519
520	i = -1;
521	for (m = start; m != stop; m = m->m_next) {
522		rc = sglist_append(&sg, mtod(m, void *), m->m_len);
523		if (__predict_false(rc != 0))
524			panic("%s: sglist_append %d", __func__, rc);
525
526		for (j = 0; j < sg.sg_nseg; i++, j++) {
527			if (i < 0) {
528				usgl->len0 = htobe32(segs[j].ss_len);
529				usgl->addr0 = htobe64(segs[j].ss_paddr);
530			} else {
531				usgl->sge[i / 2].len[i & 1] =
532				    htobe32(segs[j].ss_len);
533				usgl->sge[i / 2].addr[i & 1] =
534				    htobe64(segs[j].ss_paddr);
535			}
536#ifdef INVARIANTS
537			nsegs--;
538#endif
539		}
540		sglist_reset(&sg);
541	}
542	if (i & 1)
543		usgl->sge[i / 2].len[1] = htobe32(0);
544	KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p",
545	    __func__, nsegs, start, stop));
546}
547
548/*
549 * Max number of SGL entries an offload tx work request can have.  This is 41
550 * (1 + 40) for a full 512B work request.
551 * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40)
552 */
553#define OFLD_SGL_LEN (41)
554
555/*
556 * Send data and/or a FIN to the peer.
557 *
558 * The socket's so_snd buffer consists of a stream of data starting with sb_mb
559 * and linked together with m_next.  sb_sndptr, if set, is the last mbuf that
560 * was transmitted.
561 *
562 * drop indicates the number of bytes that should be dropped from the head of
563 * the send buffer.  It is an optimization that lets do_fw4_ack avoid creating
564 * contention on the send buffer lock (before this change it used to do
565 * sowwakeup and then t4_push_frames right after that when recovering from tx
566 * stalls).  When drop is set this function MUST drop the bytes and wake up any
567 * writers.
568 */
569void
570t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
571{
572	struct mbuf *sndptr, *m, *sb_sndptr;
573	struct fw_ofld_tx_data_wr *txwr;
574	struct wrqe *wr;
575	u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
576	struct inpcb *inp = toep->inp;
577	struct tcpcb *tp = intotcpcb(inp);
578	struct socket *so = inp->inp_socket;
579	struct sockbuf *sb = &so->so_snd;
580	int tx_credits, shove, compl, space, sowwakeup;
581	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
582
583	INP_WLOCK_ASSERT(inp);
584	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
585	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
586
587	KASSERT(toep->ulp_mode == ULP_MODE_NONE ||
588	    toep->ulp_mode == ULP_MODE_TCPDDP ||
589	    toep->ulp_mode == ULP_MODE_RDMA,
590	    ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep));
591
592	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
593		return;
594
595	/*
596	 * This function doesn't resume by itself.  Someone else must clear the
597	 * flag and call this function.
598	 */
599	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
600		KASSERT(drop == 0,
601		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
602		return;
603	}
604
605	do {
606		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
607		max_imm = max_imm_payload(tx_credits);
608		max_nsegs = max_dsgl_nsegs(tx_credits);
609
610		SOCKBUF_LOCK(sb);
611		sowwakeup = drop;
612		if (drop) {
613			sbdrop_locked(sb, drop);
614			drop = 0;
615		}
616		sb_sndptr = sb->sb_sndptr;
617		sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb;
618		plen = 0;
619		nsegs = 0;
620		max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
621		for (m = sndptr; m != NULL; m = m->m_next) {
622			int n = sglist_count(mtod(m, void *), m->m_len);
623
624			nsegs += n;
625			plen += m->m_len;
626
627			/* This mbuf sent us _over_ the nsegs limit, back out */
628			if (plen > max_imm && nsegs > max_nsegs) {
629				nsegs -= n;
630				plen -= m->m_len;
631				if (plen == 0) {
632					/* Too few credits */
633					toep->flags |= TPF_TX_SUSPENDED;
634					if (sowwakeup)
635						sowwakeup_locked(so);
636					else
637						SOCKBUF_UNLOCK(sb);
638					SOCKBUF_UNLOCK_ASSERT(sb);
639					return;
640				}
641				break;
642			}
643
644			if (max_nsegs_1mbuf < n)
645				max_nsegs_1mbuf = n;
646			sb_sndptr = m;	/* new sb->sb_sndptr if all goes well */
647
648			/* This mbuf put us right at the max_nsegs limit */
649			if (plen > max_imm && nsegs == max_nsegs) {
650				m = m->m_next;
651				break;
652			}
653		}
654
655		space = sbspace(sb);
656
657		if (space <= sb->sb_hiwat * 3 / 8 &&
658		    toep->plen_nocompl + plen >= sb->sb_hiwat / 4)
659			compl = 1;
660		else
661			compl = 0;
662
663		if (sb->sb_flags & SB_AUTOSIZE &&
664		    V_tcp_do_autosndbuf &&
665		    sb->sb_hiwat < V_tcp_autosndbuf_max &&
666		    space < sb->sb_hiwat / 8) {
667			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
668			    V_tcp_autosndbuf_max);
669
670			if (!sbreserve_locked(sb, newsize, so, NULL))
671				sb->sb_flags &= ~SB_AUTOSIZE;
672			else
673				sowwakeup = 1;	/* room available */
674		}
675		if (sowwakeup)
676			sowwakeup_locked(so);
677		else
678			SOCKBUF_UNLOCK(sb);
679		SOCKBUF_UNLOCK_ASSERT(sb);
680
681		/* nothing to send */
682		if (plen == 0) {
683			KASSERT(m == NULL,
684			    ("%s: nothing to send, but m != NULL", __func__));
685			break;
686		}
687
688		if (__predict_false(toep->flags & TPF_FIN_SENT))
689			panic("%s: excess tx.", __func__);
690
691		shove = m == NULL && !(tp->t_flags & TF_MORETOCOME);
692		if (plen <= max_imm) {
693
694			/* Immediate data tx */
695
696			wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16),
697					toep->ofld_txq);
698			if (wr == NULL) {
699				/* XXX: how will we recover from this? */
700				toep->flags |= TPF_TX_SUSPENDED;
701				return;
702			}
703			txwr = wrtod(wr);
704			credits = howmany(wr->wr_len, 16);
705			write_tx_wr(txwr, toep, plen, plen, credits, shove, 0,
706			    sc->tt.tx_align);
707			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
708			nsegs = 0;
709		} else {
710			int wr_len;
711
712			/* DSGL tx */
713
714			wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
715			    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
716			wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq);
717			if (wr == NULL) {
718				/* XXX: how will we recover from this? */
719				toep->flags |= TPF_TX_SUSPENDED;
720				return;
721			}
722			txwr = wrtod(wr);
723			credits = howmany(wr_len, 16);
724			write_tx_wr(txwr, toep, 0, plen, credits, shove, 0,
725			    sc->tt.tx_align);
726			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
727			    max_nsegs_1mbuf);
728			if (wr_len & 0xf) {
729				uint64_t *pad = (uint64_t *)
730				    ((uintptr_t)txwr + wr_len);
731				*pad = 0;
732			}
733		}
734
735		KASSERT(toep->tx_credits >= credits,
736			("%s: not enough credits", __func__));
737
738		toep->tx_credits -= credits;
739		toep->tx_nocompl += credits;
740		toep->plen_nocompl += plen;
741		if (toep->tx_credits <= toep->tx_total * 3 / 8 &&
742		    toep->tx_nocompl >= toep->tx_total / 4)
743			compl = 1;
744
745		if (compl || toep->ulp_mode == ULP_MODE_RDMA) {
746			txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL);
747			toep->tx_nocompl = 0;
748			toep->plen_nocompl = 0;
749		}
750
751		tp->snd_nxt += plen;
752		tp->snd_max += plen;
753
754		SOCKBUF_LOCK(sb);
755		KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__));
756		sb->sb_sndptr = sb_sndptr;
757		SOCKBUF_UNLOCK(sb);
758
759		toep->flags |= TPF_TX_DATA_SENT;
760		if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
761			toep->flags |= TPF_TX_SUSPENDED;
762
763		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
764		txsd->plen = plen;
765		txsd->tx_credits = credits;
766		txsd++;
767		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
768			toep->txsd_pidx = 0;
769			txsd = &toep->txsd[0];
770		}
771		toep->txsd_avail--;
772
773		t4_l2t_send(sc, wr, toep->l2te);
774	} while (m != NULL);
775
776	/* Send a FIN if requested, but only if there's no more data to send */
777	if (m == NULL && toep->flags & TPF_SEND_FIN)
778		close_conn(sc, toep);
779}
780
781static inline void
782rqdrop_locked(struct mbufq *q, int plen)
783{
784	struct mbuf *m;
785
786	while (plen > 0) {
787		m = mbufq_dequeue(q);
788
789		/* Too many credits. */
790		MPASS(m != NULL);
791		M_ASSERTPKTHDR(m);
792
793		/* Partial credits. */
794		MPASS(plen >= m->m_pkthdr.len);
795
796		plen -= m->m_pkthdr.len;
797		m_freem(m);
798	}
799}
800
801void
802t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop)
803{
804	struct mbuf *sndptr, *m;
805	struct fw_ofld_tx_data_wr *txwr;
806	struct wrqe *wr;
807	u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
808	u_int adjusted_plen, ulp_submode;
809	struct inpcb *inp = toep->inp;
810	struct tcpcb *tp = intotcpcb(inp);
811	int tx_credits, shove;
812	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
813	struct mbufq *pduq = &toep->ulp_pduq;
814	static const u_int ulp_extra_len[] = {0, 4, 4, 8};
815
816	INP_WLOCK_ASSERT(inp);
817	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
818	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
819	KASSERT(toep->ulp_mode == ULP_MODE_ISCSI,
820	    ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep));
821
822	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
823		return;
824
825	/*
826	 * This function doesn't resume by itself.  Someone else must clear the
827	 * flag and call this function.
828	 */
829	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
830		KASSERT(drop == 0,
831		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
832		return;
833	}
834
835	if (drop)
836		rqdrop_locked(&toep->ulp_pdu_reclaimq, drop);
837
838	while ((sndptr = mbufq_first(pduq)) != NULL) {
839		M_ASSERTPKTHDR(sndptr);
840
841		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
842		max_imm = max_imm_payload(tx_credits);
843		max_nsegs = max_dsgl_nsegs(tx_credits);
844
845		plen = 0;
846		nsegs = 0;
847		max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
848		for (m = sndptr; m != NULL; m = m->m_next) {
849			int n = sglist_count(mtod(m, void *), m->m_len);
850
851			nsegs += n;
852			plen += m->m_len;
853
854			/*
855			 * This mbuf would send us _over_ the nsegs limit.
856			 * Suspend tx because the PDU can't be sent out.
857			 */
858			if (plen > max_imm && nsegs > max_nsegs) {
859				toep->flags |= TPF_TX_SUSPENDED;
860				return;
861			}
862
863			if (max_nsegs_1mbuf < n)
864				max_nsegs_1mbuf = n;
865		}
866
867		if (__predict_false(toep->flags & TPF_FIN_SENT))
868			panic("%s: excess tx.", __func__);
869
870		/*
871		 * We have a PDU to send.  All of it goes out in one WR so 'm'
872		 * is NULL.  A PDU's length is always a multiple of 4.
873		 */
874		MPASS(m == NULL);
875		MPASS((plen & 3) == 0);
876		MPASS(sndptr->m_pkthdr.len == plen);
877
878		shove = !(tp->t_flags & TF_MORETOCOME);
879		ulp_submode = mbuf_ulp_submode(sndptr);
880		MPASS(ulp_submode < nitems(ulp_extra_len));
881
882		/*
883		 * plen doesn't include header and data digests, which are
884		 * generated and inserted in the right places by the TOE, but
885		 * they do occupy TCP sequence space and need to be accounted
886		 * for.
887		 */
888		adjusted_plen = plen + ulp_extra_len[ulp_submode];
889		if (plen <= max_imm) {
890
891			/* Immediate data tx */
892
893			wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16),
894					toep->ofld_txq);
895			if (wr == NULL) {
896				/* XXX: how will we recover from this? */
897				toep->flags |= TPF_TX_SUSPENDED;
898				return;
899			}
900			txwr = wrtod(wr);
901			credits = howmany(wr->wr_len, 16);
902			write_tx_wr(txwr, toep, plen, adjusted_plen, credits,
903			    shove, ulp_submode, sc->tt.tx_align);
904			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
905			nsegs = 0;
906		} else {
907			int wr_len;
908
909			/* DSGL tx */
910			wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
911			    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
912			wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq);
913			if (wr == NULL) {
914				/* XXX: how will we recover from this? */
915				toep->flags |= TPF_TX_SUSPENDED;
916				return;
917			}
918			txwr = wrtod(wr);
919			credits = howmany(wr_len, 16);
920			write_tx_wr(txwr, toep, 0, adjusted_plen, credits,
921			    shove, ulp_submode, sc->tt.tx_align);
922			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
923			    max_nsegs_1mbuf);
924			if (wr_len & 0xf) {
925				uint64_t *pad = (uint64_t *)
926				    ((uintptr_t)txwr + wr_len);
927				*pad = 0;
928			}
929		}
930
931		KASSERT(toep->tx_credits >= credits,
932			("%s: not enough credits", __func__));
933
934		m = mbufq_dequeue(pduq);
935		MPASS(m == sndptr);
936		mbufq_enqueue(&toep->ulp_pdu_reclaimq, m);
937
938		toep->tx_credits -= credits;
939		toep->tx_nocompl += credits;
940		toep->plen_nocompl += plen;
941		if (toep->tx_credits <= toep->tx_total * 3 / 8 &&
942		    toep->tx_nocompl >= toep->tx_total / 4) {
943			txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL);
944			toep->tx_nocompl = 0;
945			toep->plen_nocompl = 0;
946		}
947
948		tp->snd_nxt += adjusted_plen;
949		tp->snd_max += adjusted_plen;
950
951		toep->flags |= TPF_TX_DATA_SENT;
952		if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
953			toep->flags |= TPF_TX_SUSPENDED;
954
955		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
956		txsd->plen = plen;
957		txsd->tx_credits = credits;
958		txsd++;
959		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
960			toep->txsd_pidx = 0;
961			txsd = &toep->txsd[0];
962		}
963		toep->txsd_avail--;
964
965		t4_l2t_send(sc, wr, toep->l2te);
966	}
967
968	/* Send a FIN if requested, but only if there are no more PDUs to send */
969	if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN)
970		close_conn(sc, toep);
971}
972
973int
974t4_tod_output(struct toedev *tod, struct tcpcb *tp)
975{
976	struct adapter *sc = tod->tod_softc;
977#ifdef INVARIANTS
978	struct inpcb *inp = tp->t_inpcb;
979#endif
980	struct toepcb *toep = tp->t_toe;
981
982	INP_WLOCK_ASSERT(inp);
983	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
984	    ("%s: inp %p dropped.", __func__, inp));
985	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
986
987	if (toep->ulp_mode == ULP_MODE_ISCSI)
988		t4_push_pdus(sc, toep, 0);
989	else
990		t4_push_frames(sc, toep, 0);
991
992	return (0);
993}
994
995int
996t4_send_fin(struct toedev *tod, struct tcpcb *tp)
997{
998	struct adapter *sc = tod->tod_softc;
999#ifdef INVARIANTS
1000	struct inpcb *inp = tp->t_inpcb;
1001#endif
1002	struct toepcb *toep = tp->t_toe;
1003
1004	INP_WLOCK_ASSERT(inp);
1005	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1006	    ("%s: inp %p dropped.", __func__, inp));
1007	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
1008
1009	toep->flags |= TPF_SEND_FIN;
1010	if (tp->t_state >= TCPS_ESTABLISHED) {
1011		if (toep->ulp_mode == ULP_MODE_ISCSI)
1012			t4_push_pdus(sc, toep, 0);
1013		else
1014			t4_push_frames(sc, toep, 0);
1015	}
1016
1017	return (0);
1018}
1019
1020int
1021t4_send_rst(struct toedev *tod, struct tcpcb *tp)
1022{
1023	struct adapter *sc = tod->tod_softc;
1024#if defined(INVARIANTS)
1025	struct inpcb *inp = tp->t_inpcb;
1026#endif
1027	struct toepcb *toep = tp->t_toe;
1028
1029	INP_WLOCK_ASSERT(inp);
1030	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1031	    ("%s: inp %p dropped.", __func__, inp));
1032	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
1033
1034	/* hmmmm */
1035	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
1036	    ("%s: flowc for tid %u [%s] not sent already",
1037	    __func__, toep->tid, tcpstates[tp->t_state]));
1038
1039	send_reset(sc, toep, 0);
1040	return (0);
1041}
1042
1043/*
1044 * Peer has sent us a FIN.
1045 */
1046static int
1047do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1048{
1049	struct adapter *sc = iq->adapter;
1050	const struct cpl_peer_close *cpl = (const void *)(rss + 1);
1051	unsigned int tid = GET_TID(cpl);
1052	struct toepcb *toep = lookup_tid(sc, tid);
1053	struct inpcb *inp = toep->inp;
1054	struct tcpcb *tp = NULL;
1055	struct socket *so;
1056#ifdef INVARIANTS
1057	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1058#endif
1059
1060	KASSERT(opcode == CPL_PEER_CLOSE,
1061	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1062	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1063
1064	if (__predict_false(toep->flags & TPF_SYNQE)) {
1065#ifdef INVARIANTS
1066		struct synq_entry *synqe = (void *)toep;
1067
1068		INP_WLOCK(synqe->lctx->inp);
1069		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
1070			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1071			    ("%s: listen socket closed but tid %u not aborted.",
1072			    __func__, tid));
1073		} else {
1074			/*
1075			 * do_pass_accept_req is still running and will
1076			 * eventually take care of this tid.
1077			 */
1078		}
1079		INP_WUNLOCK(synqe->lctx->inp);
1080#endif
1081		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
1082		    toep, toep->flags);
1083		return (0);
1084	}
1085
1086	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1087
1088	INP_INFO_RLOCK(&V_tcbinfo);
1089	INP_WLOCK(inp);
1090	tp = intotcpcb(inp);
1091
1092	CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__,
1093	    tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp);
1094
1095	if (toep->flags & TPF_ABORT_SHUTDOWN)
1096		goto done;
1097
1098	tp->rcv_nxt++;	/* FIN */
1099
1100	so = inp->inp_socket;
1101	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
1102		DDP_LOCK(toep);
1103		if (__predict_false(toep->ddp_flags &
1104		    (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE)))
1105			handle_ddp_close(toep, tp, cpl->rcv_nxt);
1106		DDP_UNLOCK(toep);
1107	}
1108	socantrcvmore(so);
1109
1110	if (toep->ulp_mode != ULP_MODE_RDMA) {
1111		KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt),
1112	    		("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt,
1113	    		be32toh(cpl->rcv_nxt)));
1114	}
1115
1116	switch (tp->t_state) {
1117	case TCPS_SYN_RECEIVED:
1118		tp->t_starttime = ticks;
1119		/* FALLTHROUGH */
1120
1121	case TCPS_ESTABLISHED:
1122		tp->t_state = TCPS_CLOSE_WAIT;
1123		break;
1124
1125	case TCPS_FIN_WAIT_1:
1126		tp->t_state = TCPS_CLOSING;
1127		break;
1128
1129	case TCPS_FIN_WAIT_2:
1130		tcp_twstart(tp);
1131		INP_UNLOCK_ASSERT(inp);	 /* safe, we have a ref on the inp */
1132		INP_INFO_RUNLOCK(&V_tcbinfo);
1133
1134		INP_WLOCK(inp);
1135		final_cpl_received(toep);
1136		return (0);
1137
1138	default:
1139		log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n",
1140		    __func__, tid, tp->t_state);
1141	}
1142done:
1143	INP_WUNLOCK(inp);
1144	INP_INFO_RUNLOCK(&V_tcbinfo);
1145	return (0);
1146}
1147
1148/*
1149 * Peer has ACK'd our FIN.
1150 */
1151static int
1152do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
1153    struct mbuf *m)
1154{
1155	struct adapter *sc = iq->adapter;
1156	const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1);
1157	unsigned int tid = GET_TID(cpl);
1158	struct toepcb *toep = lookup_tid(sc, tid);
1159	struct inpcb *inp = toep->inp;
1160	struct tcpcb *tp = NULL;
1161	struct socket *so = NULL;
1162#ifdef INVARIANTS
1163	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1164#endif
1165
1166	KASSERT(opcode == CPL_CLOSE_CON_RPL,
1167	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1168	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1169	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1170
1171	INP_INFO_RLOCK(&V_tcbinfo);
1172	INP_WLOCK(inp);
1173	tp = intotcpcb(inp);
1174
1175	CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x",
1176	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags);
1177
1178	if (toep->flags & TPF_ABORT_SHUTDOWN)
1179		goto done;
1180
1181	so = inp->inp_socket;
1182	tp->snd_una = be32toh(cpl->snd_nxt) - 1;	/* exclude FIN */
1183
1184	switch (tp->t_state) {
1185	case TCPS_CLOSING:	/* see TCPS_FIN_WAIT_2 in do_peer_close too */
1186		tcp_twstart(tp);
1187release:
1188		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
1189		INP_INFO_RUNLOCK(&V_tcbinfo);
1190
1191		INP_WLOCK(inp);
1192		final_cpl_received(toep);	/* no more CPLs expected */
1193
1194		return (0);
1195	case TCPS_LAST_ACK:
1196		if (tcp_close(tp))
1197			INP_WUNLOCK(inp);
1198		goto release;
1199
1200	case TCPS_FIN_WAIT_1:
1201		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
1202			soisdisconnected(so);
1203		tp->t_state = TCPS_FIN_WAIT_2;
1204		break;
1205
1206	default:
1207		log(LOG_ERR,
1208		    "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n",
1209		    __func__, tid, tcpstates[tp->t_state]);
1210	}
1211done:
1212	INP_WUNLOCK(inp);
1213	INP_INFO_RUNLOCK(&V_tcbinfo);
1214	return (0);
1215}
1216
1217void
1218send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid,
1219    int rst_status)
1220{
1221	struct wrqe *wr;
1222	struct cpl_abort_rpl *cpl;
1223
1224	wr = alloc_wrqe(sizeof(*cpl), ofld_txq);
1225	if (wr == NULL) {
1226		/* XXX */
1227		panic("%s: allocation failure.", __func__);
1228	}
1229	cpl = wrtod(wr);
1230
1231	INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid);
1232	cpl->cmd = rst_status;
1233
1234	t4_wrq_tx(sc, wr);
1235}
1236
1237static int
1238abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason)
1239{
1240	switch (abort_reason) {
1241	case CPL_ERR_BAD_SYN:
1242	case CPL_ERR_CONN_RESET:
1243		return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
1244	case CPL_ERR_XMIT_TIMEDOUT:
1245	case CPL_ERR_PERSIST_TIMEDOUT:
1246	case CPL_ERR_FINWAIT2_TIMEDOUT:
1247	case CPL_ERR_KEEPALIVE_TIMEDOUT:
1248		return (ETIMEDOUT);
1249	default:
1250		return (EIO);
1251	}
1252}
1253
1254/*
1255 * TCP RST from the peer, timeout, or some other such critical error.
1256 */
1257static int
1258do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1259{
1260	struct adapter *sc = iq->adapter;
1261	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
1262	unsigned int tid = GET_TID(cpl);
1263	struct toepcb *toep = lookup_tid(sc, tid);
1264	struct sge_wrq *ofld_txq = toep->ofld_txq;
1265	struct inpcb *inp;
1266	struct tcpcb *tp;
1267#ifdef INVARIANTS
1268	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1269#endif
1270
1271	KASSERT(opcode == CPL_ABORT_REQ_RSS,
1272	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1273	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1274
1275	if (toep->flags & TPF_SYNQE)
1276		return (do_abort_req_synqe(iq, rss, m));
1277
1278	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1279
1280	if (negative_advice(cpl->status)) {
1281		CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)",
1282		    __func__, cpl->status, tid, toep->flags);
1283		return (0);	/* Ignore negative advice */
1284	}
1285
1286	inp = toep->inp;
1287	INP_INFO_RLOCK(&V_tcbinfo);	/* for tcp_close */
1288	INP_WLOCK(inp);
1289
1290	tp = intotcpcb(inp);
1291
1292	CTR6(KTR_CXGBE,
1293	    "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d",
1294	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags,
1295	    inp->inp_flags, cpl->status);
1296
1297	/*
1298	 * If we'd initiated an abort earlier the reply to it is responsible for
1299	 * cleaning up resources.  Otherwise we tear everything down right here
1300	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
1301	 */
1302	if (toep->flags & TPF_ABORT_SHUTDOWN) {
1303		INP_WUNLOCK(inp);
1304		goto done;
1305	}
1306	toep->flags |= TPF_ABORT_SHUTDOWN;
1307
1308	if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
1309		struct socket *so = inp->inp_socket;
1310
1311		if (so != NULL)
1312			so_error_set(so, abort_status_to_errno(tp,
1313			    cpl->status));
1314		tp = tcp_close(tp);
1315		if (tp == NULL)
1316			INP_WLOCK(inp);	/* re-acquire */
1317	}
1318
1319	final_cpl_received(toep);
1320done:
1321	INP_INFO_RUNLOCK(&V_tcbinfo);
1322	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
1323	return (0);
1324}
1325
1326/*
1327 * Reply to the CPL_ABORT_REQ (send_reset)
1328 */
1329static int
1330do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1331{
1332	struct adapter *sc = iq->adapter;
1333	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
1334	unsigned int tid = GET_TID(cpl);
1335	struct toepcb *toep = lookup_tid(sc, tid);
1336	struct inpcb *inp = toep->inp;
1337#ifdef INVARIANTS
1338	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1339#endif
1340
1341	KASSERT(opcode == CPL_ABORT_RPL_RSS,
1342	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1343	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1344
1345	if (toep->flags & TPF_SYNQE)
1346		return (do_abort_rpl_synqe(iq, rss, m));
1347
1348	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1349
1350	CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d",
1351	    __func__, tid, toep, inp, cpl->status);
1352
1353	KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
1354	    ("%s: wasn't expecting abort reply", __func__));
1355
1356	INP_WLOCK(inp);
1357	final_cpl_received(toep);
1358
1359	return (0);
1360}
1361
1362static int
1363do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1364{
1365	struct adapter *sc = iq->adapter;
1366	const struct cpl_rx_data *cpl = mtod(m, const void *);
1367	unsigned int tid = GET_TID(cpl);
1368	struct toepcb *toep = lookup_tid(sc, tid);
1369	struct inpcb *inp = toep->inp;
1370	struct tcpcb *tp;
1371	struct socket *so;
1372	struct sockbuf *sb;
1373	int len;
1374	uint32_t ddp_placed = 0;
1375
1376	if (__predict_false(toep->flags & TPF_SYNQE)) {
1377#ifdef INVARIANTS
1378		struct synq_entry *synqe = (void *)toep;
1379
1380		INP_WLOCK(synqe->lctx->inp);
1381		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
1382			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1383			    ("%s: listen socket closed but tid %u not aborted.",
1384			    __func__, tid));
1385		} else {
1386			/*
1387			 * do_pass_accept_req is still running and will
1388			 * eventually take care of this tid.
1389			 */
1390		}
1391		INP_WUNLOCK(synqe->lctx->inp);
1392#endif
1393		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
1394		    toep, toep->flags);
1395		m_freem(m);
1396		return (0);
1397	}
1398
1399	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1400
1401	/* strip off CPL header */
1402	m_adj(m, sizeof(*cpl));
1403	len = m->m_pkthdr.len;
1404
1405	INP_WLOCK(inp);
1406	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
1407		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
1408		    __func__, tid, len, inp->inp_flags);
1409		INP_WUNLOCK(inp);
1410		m_freem(m);
1411		return (0);
1412	}
1413
1414	tp = intotcpcb(inp);
1415
1416	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
1417		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;
1418
1419	tp->rcv_nxt += len;
1420	if (tp->rcv_wnd < len) {
1421		KASSERT(toep->ulp_mode != ULP_MODE_RDMA,
1422				("%s: negative window size", __func__));
1423	}
1424
1425	tp->rcv_wnd -= len;
1426	tp->t_rcvtime = ticks;
1427
1428	if (toep->ulp_mode == ULP_MODE_TCPDDP)
1429		DDP_LOCK(toep);
1430	so = inp_inpcbtosocket(inp);
1431	sb = &so->so_rcv;
1432	SOCKBUF_LOCK(sb);
1433
1434	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
1435		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
1436		    __func__, tid, len);
1437		m_freem(m);
1438		SOCKBUF_UNLOCK(sb);
1439		if (toep->ulp_mode == ULP_MODE_TCPDDP)
1440			DDP_UNLOCK(toep);
1441		INP_WUNLOCK(inp);
1442
1443		INP_INFO_RLOCK(&V_tcbinfo);
1444		INP_WLOCK(inp);
1445		tp = tcp_drop(tp, ECONNRESET);
1446		if (tp)
1447			INP_WUNLOCK(inp);
1448		INP_INFO_RUNLOCK(&V_tcbinfo);
1449
1450		return (0);
1451	}
1452
1453	/* receive buffer autosize */
1454	CURVNET_SET(so->so_vnet);
1455	if (sb->sb_flags & SB_AUTOSIZE &&
1456	    V_tcp_do_autorcvbuf &&
1457	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
1458	    len > (sbspace(sb) / 8 * 7)) {
1459		unsigned int hiwat = sb->sb_hiwat;
1460		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
1461		    V_tcp_autorcvbuf_max);
1462
1463		if (!sbreserve_locked(sb, newsize, so, NULL))
1464			sb->sb_flags &= ~SB_AUTOSIZE;
1465		else
1466			toep->rx_credits += newsize - hiwat;
1467	}
1468
1469	if (toep->ddp_waiting_count != 0 || toep->ddp_active_count != 0)
1470		CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", __func__,
1471		    tid, len);
1472
1473	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
1474		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;
1475
1476		if (changed) {
1477			if (toep->ddp_flags & DDP_SC_REQ)
1478				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
1479			else {
1480				KASSERT(cpl->ddp_off == 1,
1481				    ("%s: DDP switched on by itself.",
1482				    __func__));
1483
1484				/* Fell out of DDP mode */
1485				toep->ddp_flags &= ~DDP_ON;
1486				CTR1(KTR_CXGBE, "%s: fell out of DDP mode",
1487				    __func__);
1488
1489				insert_ddp_data(toep, ddp_placed);
1490			}
1491		}
1492
1493		if (toep->ddp_flags & DDP_ON) {
1494			/*
1495			 * CPL_RX_DATA with DDP on can only be an indicate.
1496			 * Start posting queued AIO requests via DDP.  The
1497			 * payload that arrived in this indicate is appended
1498			 * to the socket buffer as usual.
1499			 */
1500			handle_ddp_indicate(toep);
1501		}
1502	}
1503
1504	KASSERT(toep->sb_cc >= sbused(sb),
1505	    ("%s: sb %p has more data (%d) than last time (%d).",
1506	    __func__, sb, sbused(sb), toep->sb_cc));
1507	toep->rx_credits += toep->sb_cc - sbused(sb);
1508	sbappendstream_locked(sb, m, 0);
1509	toep->sb_cc = sbused(sb);
1510	if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) {
1511		int credits;
1512
1513		credits = send_rx_credits(sc, toep, toep->rx_credits);
1514		toep->rx_credits -= credits;
1515		tp->rcv_wnd += credits;
1516		tp->rcv_adv += credits;
1517	}
1518
1519	if (toep->ddp_waiting_count > 0 && sbavail(sb) != 0) {
1520		CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__,
1521		    tid);
1522		ddp_queue_toep(toep);
1523	}
1524	sorwakeup_locked(so);
1525	SOCKBUF_UNLOCK_ASSERT(sb);
1526	if (toep->ulp_mode == ULP_MODE_TCPDDP)
1527		DDP_UNLOCK(toep);
1528
1529	INP_WUNLOCK(inp);
1530	CURVNET_RESTORE();
1531	return (0);
1532}
1533
1534#define S_CPL_FW4_ACK_OPCODE    24
1535#define M_CPL_FW4_ACK_OPCODE    0xff
1536#define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE)
1537#define G_CPL_FW4_ACK_OPCODE(x) \
1538    (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE)
1539
1540#define S_CPL_FW4_ACK_FLOWID    0
1541#define M_CPL_FW4_ACK_FLOWID    0xffffff
1542#define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID)
1543#define G_CPL_FW4_ACK_FLOWID(x) \
1544    (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID)
1545
1546#define S_CPL_FW4_ACK_CR        24
1547#define M_CPL_FW4_ACK_CR        0xff
1548#define V_CPL_FW4_ACK_CR(x)     ((x) << S_CPL_FW4_ACK_CR)
1549#define G_CPL_FW4_ACK_CR(x)     (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR)
1550
1551#define S_CPL_FW4_ACK_SEQVAL    0
1552#define M_CPL_FW4_ACK_SEQVAL    0x1
1553#define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL)
1554#define G_CPL_FW4_ACK_SEQVAL(x) \
1555    (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL)
1556#define F_CPL_FW4_ACK_SEQVAL    V_CPL_FW4_ACK_SEQVAL(1U)
1557
1558static int
1559do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1560{
1561	struct adapter *sc = iq->adapter;
1562	const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
1563	unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
1564	struct toepcb *toep = lookup_tid(sc, tid);
1565	struct inpcb *inp;
1566	struct tcpcb *tp;
1567	struct socket *so;
1568	uint8_t credits = cpl->credits;
1569	struct ofld_tx_sdesc *txsd;
1570	int plen;
1571#ifdef INVARIANTS
1572	unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl)));
1573#endif
1574
1575	/*
1576	 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and
1577	 * now this comes back carrying the credits for the flowc.
1578	 */
1579	if (__predict_false(toep->flags & TPF_SYNQE)) {
1580		KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
1581		    ("%s: credits for a synq entry %p", __func__, toep));
1582		return (0);
1583	}
1584
1585	inp = toep->inp;
1586
1587	KASSERT(opcode == CPL_FW4_ACK,
1588	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1589	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1590	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
1591
1592	INP_WLOCK(inp);
1593
1594	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) {
1595		INP_WUNLOCK(inp);
1596		return (0);
1597	}
1598
1599	KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0,
1600	    ("%s: inp_flags 0x%x", __func__, inp->inp_flags));
1601
1602	tp = intotcpcb(inp);
1603
1604	if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) {
1605		tcp_seq snd_una = be32toh(cpl->snd_una);
1606
1607#ifdef INVARIANTS
1608		if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
1609			log(LOG_ERR,
1610			    "%s: unexpected seq# %x for TID %u, snd_una %x\n",
1611			    __func__, snd_una, toep->tid, tp->snd_una);
1612		}
1613#endif
1614
1615		if (tp->snd_una != snd_una) {
1616			tp->snd_una = snd_una;
1617			tp->ts_recent_age = tcp_ts_getticks();
1618		}
1619	}
1620
1621	so = inp->inp_socket;
1622	txsd = &toep->txsd[toep->txsd_cidx];
1623	plen = 0;
1624	while (credits) {
1625		KASSERT(credits >= txsd->tx_credits,
1626		    ("%s: too many (or partial) credits", __func__));
1627		credits -= txsd->tx_credits;
1628		toep->tx_credits += txsd->tx_credits;
1629		plen += txsd->plen;
1630		txsd++;
1631		toep->txsd_avail++;
1632		KASSERT(toep->txsd_avail <= toep->txsd_total,
1633		    ("%s: txsd avail > total", __func__));
1634		if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) {
1635			txsd = &toep->txsd[0];
1636			toep->txsd_cidx = 0;
1637		}
1638	}
1639
1640	if (toep->tx_credits == toep->tx_total) {
1641		toep->tx_nocompl = 0;
1642		toep->plen_nocompl = 0;
1643	}
1644
1645	if (toep->flags & TPF_TX_SUSPENDED &&
1646	    toep->tx_credits >= toep->tx_total / 4) {
1647		toep->flags &= ~TPF_TX_SUSPENDED;
1648		if (toep->ulp_mode == ULP_MODE_ISCSI)
1649			t4_push_pdus(sc, toep, plen);
1650		else
1651			t4_push_frames(sc, toep, plen);
1652	} else if (plen > 0) {
1653		struct sockbuf *sb = &so->so_snd;
1654		int sbu;
1655
1656		SOCKBUF_LOCK(sb);
1657		sbu = sbused(sb);
1658		if (toep->ulp_mode == ULP_MODE_ISCSI) {
1659
1660			if (__predict_false(sbu > 0)) {
1661				/*
1662				 * The data trasmitted before the tid's ULP mode
1663				 * changed to ISCSI is still in so_snd.
1664				 * Incoming credits should account for so_snd
1665				 * first.
1666				 */
1667				sbdrop_locked(sb, min(sbu, plen));
1668				plen -= min(sbu, plen);
1669			}
1670			sowwakeup_locked(so);	/* unlocks so_snd */
1671			rqdrop_locked(&toep->ulp_pdu_reclaimq, plen);
1672		} else {
1673			sbdrop_locked(sb, plen);
1674			sowwakeup_locked(so);	/* unlocks so_snd */
1675		}
1676		SOCKBUF_UNLOCK_ASSERT(sb);
1677	}
1678
1679	INP_WUNLOCK(inp);
1680
1681	return (0);
1682}
1683
1684static int
1685do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1686{
1687	struct adapter *sc = iq->adapter;
1688	const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1);
1689	unsigned int tid = GET_TID(cpl);
1690	struct toepcb *toep;
1691#ifdef INVARIANTS
1692	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1693#endif
1694
1695	KASSERT(opcode == CPL_SET_TCB_RPL,
1696	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1697	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1698
1699	if (is_ftid(sc, tid))
1700		return (t4_filter_rpl(iq, rss, m)); /* TCB is a filter */
1701
1702	toep = lookup_tid(sc, tid);
1703	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
1704		handle_ddp_tcb_rpl(toep, cpl);
1705		return (0);
1706	}
1707
1708	/*
1709	 * TOM and/or other ULPs don't request replies for CPL_SET_TCB or
1710	 * CPL_SET_TCB_FIELD requests.  This can easily change and when it does
1711	 * the dispatch code will go here.
1712	 */
1713#ifdef INVARIANTS
1714	panic("%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p", __func__,
1715	    tid, iq);
1716#else
1717	log(LOG_ERR, "%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p\n",
1718	    __func__, tid, iq);
1719#endif
1720
1721	return (0);
1722}
1723
1724void
1725t4_set_tcb_field(struct adapter *sc, struct toepcb *toep, int ctrl,
1726    uint16_t word, uint64_t mask, uint64_t val)
1727{
1728	struct wrqe *wr;
1729	struct cpl_set_tcb_field *req;
1730
1731	wr = alloc_wrqe(sizeof(*req), ctrl ? toep->ctrlq : toep->ofld_txq);
1732	if (wr == NULL) {
1733		/* XXX */
1734		panic("%s: allocation failure.", __func__);
1735	}
1736	req = wrtod(wr);
1737
1738	INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid);
1739	req->reply_ctrl = htobe16(V_NO_REPLY(1) |
1740	    V_QUEUENO(toep->ofld_rxq->iq.abs_id));
1741	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0));
1742	req->mask = htobe64(mask);
1743	req->val = htobe64(val);
1744
1745	t4_wrq_tx(sc, wr);
1746}
1747
1748void
1749t4_set_tcb_field_rpl(struct adapter *sc, struct toepcb *toep, int ctrl,
1750    uint16_t word, uint64_t mask, uint64_t val, uint8_t cookie)
1751{
1752	struct wrqe *wr;
1753	struct cpl_set_tcb_field *req;
1754
1755	KASSERT((cookie & ~M_COOKIE) == 0, ("%s: invalid cookie %#x", __func__,
1756	    cookie));
1757	wr = alloc_wrqe(sizeof(*req), ctrl ? toep->ctrlq : toep->ofld_txq);
1758	if (wr == NULL) {
1759		/* XXX */
1760		panic("%s: allocation failure.", __func__);
1761	}
1762	req = wrtod(wr);
1763
1764	INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid);
1765	req->reply_ctrl = htobe16(V_QUEUENO(toep->ofld_rxq->iq.abs_id));
1766	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie));
1767	req->mask = htobe64(mask);
1768	req->val = htobe64(val);
1769
1770	t4_wrq_tx(sc, wr);
1771}
1772
1773void
1774t4_init_cpl_io_handlers(struct adapter *sc)
1775{
1776
1777	t4_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close);
1778	t4_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl);
1779	t4_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req);
1780	t4_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl);
1781	t4_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data);
1782	t4_register_cpl_handler(sc, CPL_FW4_ACK, do_fw4_ack);
1783	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl);
1784}
1785
1786void
1787t4_uninit_cpl_io_handlers(struct adapter *sc)
1788{
1789
1790	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl);
1791}
1792#endif
1793