1237263Snp/*-
2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc.
3237263Snp * All rights reserved.
4237263Snp * Written by: Navdeep Parhar <np@FreeBSD.org>
5237263Snp *
6237263Snp * Redistribution and use in source and binary forms, with or without
7237263Snp * modification, are permitted provided that the following conditions
8237263Snp * are met:
9237263Snp * 1. Redistributions of source code must retain the above copyright
10237263Snp *    notice, this list of conditions and the following disclaimer.
11237263Snp * 2. Redistributions in binary form must reproduce the above copyright
12237263Snp *    notice, this list of conditions and the following disclaimer in the
13237263Snp *    documentation and/or other materials provided with the distribution.
14237263Snp *
15237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18237263Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25237263Snp * SUCH DAMAGE.
26237263Snp */
27237263Snp
28237263Snp#include <sys/cdefs.h>
29237263Snp__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/tom/t4_tom.c 355242 2019-11-30 19:33:14Z np $");
30237263Snp
31237263Snp#include "opt_inet.h"
32245441Snp#include "opt_inet6.h"
33237263Snp
34237263Snp#include <sys/param.h>
35237263Snp#include <sys/types.h>
36237263Snp#include <sys/systm.h>
37237263Snp#include <sys/kernel.h>
38237263Snp#include <sys/ktr.h>
39286001Sae#include <sys/lock.h>
40292736Snp#include <sys/limits.h>
41237263Snp#include <sys/module.h>
42237263Snp#include <sys/protosw.h>
43237263Snp#include <sys/domain.h>
44299210Sjhb#include <sys/refcount.h>
45286001Sae#include <sys/rmlock.h>
46237263Snp#include <sys/socket.h>
47237263Snp#include <sys/socketvar.h>
48249627Snp#include <sys/taskqueue.h>
49245448Snp#include <net/if.h>
50257241Sglebius#include <net/if_var.h>
51346805Snp#include <net/if_types.h>
52346805Snp#include <net/if_vlan_var.h>
53237263Snp#include <netinet/in.h>
54237263Snp#include <netinet/in_pcb.h>
55245448Snp#include <netinet/in_var.h>
56237263Snp#include <netinet/ip.h>
57245441Snp#include <netinet/ip6.h>
58245448Snp#include <netinet6/scope6_var.h>
59237263Snp#define TCPSTATES
60237263Snp#include <netinet/tcp_fsm.h>
61330303Sjhb#include <netinet/tcp_timer.h>
62294889Sglebius#include <netinet/tcp_var.h>
63237263Snp#include <netinet/toecore.h>
64237263Snp
65237263Snp#ifdef TCP_OFFLOAD
66237263Snp#include "common/common.h"
67237263Snp#include "common/t4_msg.h"
68237263Snp#include "common/t4_regs.h"
69252705Snp#include "common/t4_regs_values.h"
70252716Snp#include "common/t4_tcb.h"
71346934Snp#include "t4_clip.h"
72237263Snp#include "tom/t4_tom_l2t.h"
73237263Snp#include "tom/t4_tom.h"
74345664Sjhb#include "tom/t4_tls.h"
75237263Snp
76306661Sjhbstatic struct protosw toe_protosw;
77306661Sjhbstatic struct pr_usrreqs toe_usrreqs;
78239344Snp
79306661Sjhbstatic struct protosw toe6_protosw;
80306661Sjhbstatic struct pr_usrreqs toe6_usrreqs;
81245441Snp
82237263Snp/* Module ops */
83237263Snpstatic int t4_tom_mod_load(void);
84237263Snpstatic int t4_tom_mod_unload(void);
85237263Snpstatic int t4_tom_modevent(module_t, int, void *);
86237263Snp
87237263Snp/* ULD ops and helpers */
88237263Snpstatic int t4_tom_activate(struct adapter *);
89237263Snpstatic int t4_tom_deactivate(struct adapter *);
90237263Snp
91237263Snpstatic struct uld_info tom_uld_info = {
92237263Snp	.uld_id = ULD_TOM,
93237263Snp	.activate = t4_tom_activate,
94237263Snp	.deactivate = t4_tom_deactivate,
95237263Snp};
96237263Snp
97237263Snpstatic void release_offload_resources(struct toepcb *);
98237263Snpstatic int alloc_tid_tabs(struct tid_info *);
99237263Snpstatic void free_tid_tabs(struct tid_info *);
100237263Snpstatic void free_tom_data(struct adapter *, struct tom_data *);
101272719Snpstatic void reclaim_wr_resources(void *, int);
102237263Snp
103237263Snpstruct toepcb *
104291665Sjhballoc_toepcb(struct vi_info *vi, int txqid, int rxqid, int flags)
105237263Snp{
106291665Sjhb	struct port_info *pi = vi->pi;
107237263Snp	struct adapter *sc = pi->adapter;
108237263Snp	struct toepcb *toep;
109237263Snp	int tx_credits, txsd_total, len;
110237263Snp
111237263Snp	/*
112237263Snp	 * The firmware counts tx work request credits in units of 16 bytes
113237263Snp	 * each.  Reserve room for an ABORT_REQ so the driver never has to worry
114237263Snp	 * about tx credits if it wants to abort a connection.
115237263Snp	 */
116237263Snp	tx_credits = sc->params.ofldq_wr_cred;
117237263Snp	tx_credits -= howmany(sizeof(struct cpl_abort_req), 16);
118237263Snp
119237263Snp	/*
120237263Snp	 * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte
121237263Snp	 * immediate payload, and firmware counts tx work request credits in
122237263Snp	 * units of 16 byte.  Calculate the maximum work requests possible.
123237263Snp	 */
124237263Snp	txsd_total = tx_credits /
125298482Spfg	    howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16);
126237263Snp
127291665Sjhb	KASSERT(txqid >= vi->first_ofld_txq &&
128291665Sjhb	    txqid < vi->first_ofld_txq + vi->nofldtxq,
129291665Sjhb	    ("%s: txqid %d for vi %p (first %d, n %d)", __func__, txqid, vi,
130291665Sjhb		vi->first_ofld_txq, vi->nofldtxq));
131237263Snp
132291665Sjhb	KASSERT(rxqid >= vi->first_ofld_rxq &&
133291665Sjhb	    rxqid < vi->first_ofld_rxq + vi->nofldrxq,
134291665Sjhb	    ("%s: rxqid %d for vi %p (first %d, n %d)", __func__, rxqid, vi,
135291665Sjhb		vi->first_ofld_rxq, vi->nofldrxq));
136237263Snp
137237263Snp	len = offsetof(struct toepcb, txsd) +
138237263Snp	    txsd_total * sizeof(struct ofld_tx_sdesc);
139237263Snp
140237263Snp	toep = malloc(len, M_CXGBE, M_ZERO | flags);
141237263Snp	if (toep == NULL)
142237263Snp		return (NULL);
143237263Snp
144299210Sjhb	refcount_init(&toep->refcount, 1);
145237263Snp	toep->td = sc->tom_softc;
146291665Sjhb	toep->vi = vi;
147346805Snp	toep->tc_idx = -1;
148255411Snp	toep->tx_total = tx_credits;
149237263Snp	toep->tx_credits = tx_credits;
150237263Snp	toep->ofld_txq = &sc->sge.ofld_txq[txqid];
151237263Snp	toep->ofld_rxq = &sc->sge.ofld_rxq[rxqid];
152237263Snp	toep->ctrlq = &sc->sge.ctrlq[pi->port_id];
153292736Snp	mbufq_init(&toep->ulp_pduq, INT_MAX);
154292736Snp	mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX);
155237263Snp	toep->txsd_total = txsd_total;
156237263Snp	toep->txsd_avail = txsd_total;
157237263Snp	toep->txsd_pidx = 0;
158237263Snp	toep->txsd_cidx = 0;
159306661Sjhb	aiotx_init_toep(toep);
160237263Snp
161237263Snp	return (toep);
162237263Snp}
163237263Snp
164299210Sjhbstruct toepcb *
165299210Sjhbhold_toepcb(struct toepcb *toep)
166299210Sjhb{
167299210Sjhb
168299210Sjhb	refcount_acquire(&toep->refcount);
169299210Sjhb	return (toep);
170299210Sjhb}
171299210Sjhb
172237263Snpvoid
173237263Snpfree_toepcb(struct toepcb *toep)
174237263Snp{
175237263Snp
176299210Sjhb	if (refcount_release(&toep->refcount) == 0)
177299210Sjhb		return;
178299210Sjhb
179239514Snp	KASSERT(!(toep->flags & TPF_ATTACHED),
180237263Snp	    ("%s: attached to an inpcb", __func__));
181239514Snp	KASSERT(!(toep->flags & TPF_CPL_PENDING),
182237263Snp	    ("%s: CPL pending", __func__));
183237263Snp
184331645Sjhb	if (toep->ulp_mode == ULP_MODE_TCPDDP)
185331645Sjhb		ddp_uninit_toep(toep);
186345664Sjhb	tls_uninit_toep(toep);
187237263Snp	free(toep, M_CXGBE);
188237263Snp}
189237263Snp
190237263Snp/*
191237263Snp * Set up the socket for TCP offload.
192237263Snp */
193237263Snpvoid
194237263Snpoffload_socket(struct socket *so, struct toepcb *toep)
195237263Snp{
196237263Snp	struct tom_data *td = toep->td;
197237263Snp	struct inpcb *inp = sotoinpcb(so);
198237263Snp	struct tcpcb *tp = intotcpcb(inp);
199237263Snp	struct sockbuf *sb;
200237263Snp
201237263Snp	INP_WLOCK_ASSERT(inp);
202237263Snp
203237263Snp	/* Update socket */
204237263Snp	sb = &so->so_snd;
205237263Snp	SOCKBUF_LOCK(sb);
206237263Snp	sb->sb_flags |= SB_NOCOALESCE;
207237263Snp	SOCKBUF_UNLOCK(sb);
208237263Snp	sb = &so->so_rcv;
209237263Snp	SOCKBUF_LOCK(sb);
210237263Snp	sb->sb_flags |= SB_NOCOALESCE;
211306661Sjhb	if (inp->inp_vflag & INP_IPV6)
212306661Sjhb		so->so_proto = &toe6_protosw;
213306661Sjhb	else
214306661Sjhb		so->so_proto = &toe_protosw;
215237263Snp	SOCKBUF_UNLOCK(sb);
216237263Snp
217237263Snp	/* Update TCP PCB */
218237263Snp	tp->tod = &td->tod;
219237263Snp	tp->t_toe = toep;
220237263Snp	tp->t_flags |= TF_TOE;
221237263Snp
222237263Snp	/* Install an extra hold on inp */
223237263Snp	toep->inp = inp;
224239514Snp	toep->flags |= TPF_ATTACHED;
225237263Snp	in_pcbref(inp);
226237263Snp
227237263Snp	/* Add the TOE PCB to the active list */
228237263Snp	mtx_lock(&td->toep_list_lock);
229237263Snp	TAILQ_INSERT_HEAD(&td->toep_list, toep, link);
230237263Snp	mtx_unlock(&td->toep_list_lock);
231237263Snp}
232237263Snp
233237263Snp/* This is _not_ the normal way to "unoffload" a socket. */
234237263Snpvoid
235237263Snpundo_offload_socket(struct socket *so)
236237263Snp{
237237263Snp	struct inpcb *inp = sotoinpcb(so);
238237263Snp	struct tcpcb *tp = intotcpcb(inp);
239237263Snp	struct toepcb *toep = tp->t_toe;
240237263Snp	struct tom_data *td = toep->td;
241237263Snp	struct sockbuf *sb;
242237263Snp
243237263Snp	INP_WLOCK_ASSERT(inp);
244237263Snp
245237263Snp	sb = &so->so_snd;
246237263Snp	SOCKBUF_LOCK(sb);
247237263Snp	sb->sb_flags &= ~SB_NOCOALESCE;
248237263Snp	SOCKBUF_UNLOCK(sb);
249237263Snp	sb = &so->so_rcv;
250237263Snp	SOCKBUF_LOCK(sb);
251237263Snp	sb->sb_flags &= ~SB_NOCOALESCE;
252237263Snp	SOCKBUF_UNLOCK(sb);
253237263Snp
254237263Snp	tp->tod = NULL;
255237263Snp	tp->t_toe = NULL;
256237263Snp	tp->t_flags &= ~TF_TOE;
257237263Snp
258237263Snp	toep->inp = NULL;
259239514Snp	toep->flags &= ~TPF_ATTACHED;
260237263Snp	if (in_pcbrele_wlocked(inp))
261237263Snp		panic("%s: inp freed.", __func__);
262237263Snp
263237263Snp	mtx_lock(&td->toep_list_lock);
264237263Snp	TAILQ_REMOVE(&td->toep_list, toep, link);
265237263Snp	mtx_unlock(&td->toep_list_lock);
266237263Snp}
267237263Snp
268237263Snpstatic void
269237263Snprelease_offload_resources(struct toepcb *toep)
270237263Snp{
271237263Snp	struct tom_data *td = toep->td;
272237263Snp	struct adapter *sc = td_adapter(td);
273237263Snp	int tid = toep->tid;
274237263Snp
275239514Snp	KASSERT(!(toep->flags & TPF_CPL_PENDING),
276237263Snp	    ("%s: %p has CPL pending.", __func__, toep));
277239514Snp	KASSERT(!(toep->flags & TPF_ATTACHED),
278237263Snp	    ("%s: %p is still attached.", __func__, toep));
279237263Snp
280245448Snp	CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)",
281245448Snp	    __func__, toep, tid, toep->l2te, toep->ce);
282237263Snp
283292736Snp	/*
284292736Snp	 * These queues should have been emptied at approximately the same time
285292736Snp	 * that a normal connection's socket's so_snd would have been purged or
286292736Snp	 * drained.  Do _not_ clean up here.
287292736Snp	 */
288292736Snp	MPASS(mbufq_len(&toep->ulp_pduq) == 0);
289292736Snp	MPASS(mbufq_len(&toep->ulp_pdu_reclaimq) == 0);
290299210Sjhb#ifdef INVARIANTS
291331645Sjhb	if (toep->ulp_mode == ULP_MODE_TCPDDP)
292331645Sjhb		ddp_assert_empty(toep);
293299210Sjhb#endif
294292736Snp
295237263Snp	if (toep->l2te)
296237263Snp		t4_l2t_release(toep->l2te);
297237263Snp
298237263Snp	if (tid >= 0) {
299312116Snp		remove_tid(sc, tid, toep->ce ? 2 : 1);
300237263Snp		release_tid(sc, tid, toep->ctrlq);
301237263Snp	}
302237263Snp
303245448Snp	if (toep->ce)
304346934Snp		t4_release_lip(sc, toep->ce);
305245448Snp
306346805Snp	if (toep->tc_idx != -1)
307346871Snp		t4_release_cl_rl(sc, toep->vi->pi->port_id, toep->tc_idx);
308346871Snp
309237263Snp	mtx_lock(&td->toep_list_lock);
310237263Snp	TAILQ_REMOVE(&td->toep_list, toep, link);
311237263Snp	mtx_unlock(&td->toep_list_lock);
312237263Snp
313237263Snp	free_toepcb(toep);
314237263Snp}
315237263Snp
316237263Snp/*
317237263Snp * The kernel is done with the TCP PCB and this is our opportunity to unhook the
318237263Snp * toepcb hanging off of it.  If the TOE driver is also done with the toepcb (no
319237263Snp * pending CPL) then it is time to release all resources tied to the toepcb.
320237263Snp *
321237263Snp * Also gets called when an offloaded active open fails and the TOM wants the
322237263Snp * kernel to take the TCP PCB back.
323237263Snp */
324237263Snpstatic void
325237263Snpt4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp)
326237263Snp{
327237263Snp#if defined(KTR) || defined(INVARIANTS)
328237263Snp	struct inpcb *inp = tp->t_inpcb;
329237263Snp#endif
330237263Snp	struct toepcb *toep = tp->t_toe;
331237263Snp
332237263Snp	INP_WLOCK_ASSERT(inp);
333237263Snp
334237263Snp	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
335239514Snp	KASSERT(toep->flags & TPF_ATTACHED,
336237263Snp	    ("%s: not attached", __func__));
337237263Snp
338237263Snp#ifdef KTR
339237263Snp	if (tp->t_state == TCPS_SYN_SENT) {
340237263Snp		CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)",
341237263Snp		    __func__, toep->tid, toep, toep->flags, inp,
342237263Snp		    inp->inp_flags);
343237263Snp	} else {
344237263Snp		CTR6(KTR_CXGBE,
345237263Snp		    "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)",
346237263Snp		    toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp,
347237263Snp		    inp->inp_flags);
348237263Snp	}
349237263Snp#endif
350237263Snp
351237263Snp	tp->t_toe = NULL;
352237263Snp	tp->t_flags &= ~TF_TOE;
353239514Snp	toep->flags &= ~TPF_ATTACHED;
354237263Snp
355239514Snp	if (!(toep->flags & TPF_CPL_PENDING))
356237263Snp		release_offload_resources(toep);
357237263Snp}
358237263Snp
359237263Snp/*
360252716Snp * setsockopt handler.
361252716Snp */
362252716Snpstatic void
363252716Snpt4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name)
364252716Snp{
365252716Snp	struct adapter *sc = tod->tod_softc;
366252716Snp	struct toepcb *toep = tp->t_toe;
367252716Snp
368252716Snp	if (dir == SOPT_GET)
369252716Snp		return;
370252716Snp
371252716Snp	CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name);
372252716Snp
373252716Snp	switch (name) {
374252716Snp	case TCP_NODELAY:
375330307Snp		if (tp->t_state != TCPS_ESTABLISHED)
376330307Snp			break;
377345664Sjhb		t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS,
378302339Snp		    V_TF_NAGLE(1), V_TF_NAGLE(tp->t_flags & TF_NODELAY ? 0 : 1),
379345664Sjhb		    0, 0);
380252716Snp		break;
381252716Snp	default:
382252716Snp		break;
383252716Snp	}
384252716Snp}
385252716Snp
386346848Snpstatic inline int
387346848Snpget_tcb_bit(u_char *tcb, int bit)
388346848Snp{
389346848Snp	int ix, shift;
390346848Snp
391346848Snp	ix = 127 - (bit >> 3);
392346848Snp	shift = bit & 0x7;
393346848Snp
394346848Snp	return ((tcb[ix] >> shift) & 1);
395346848Snp}
396346848Snp
397346848Snpstatic inline uint64_t
398346848Snpget_tcb_bits(u_char *tcb, int hi, int lo)
399346848Snp{
400346848Snp	uint64_t rc = 0;
401346848Snp
402346848Snp	while (hi >= lo) {
403346848Snp		rc = (rc << 1) | get_tcb_bit(tcb, hi);
404346848Snp		--hi;
405346848Snp	}
406346848Snp
407346848Snp	return (rc);
408346848Snp}
409346848Snp
410252716Snp/*
411346848Snp * Called by the kernel to allow the TOE driver to "refine" values filled up in
412346848Snp * the tcp_info for an offloaded connection.
413346848Snp */
414346848Snpstatic void
415346848Snpt4_tcp_info(struct toedev *tod, struct tcpcb *tp, struct tcp_info *ti)
416346848Snp{
417346848Snp	int i, j, k, rc;
418346848Snp	struct adapter *sc = tod->tod_softc;
419346848Snp	struct toepcb *toep = tp->t_toe;
420346848Snp	uint32_t addr, v;
421346848Snp	uint32_t buf[TCB_SIZE / sizeof(uint32_t)];
422346848Snp	u_char *tcb, tmp;
423346848Snp
424346848Snp	INP_WLOCK_ASSERT(tp->t_inpcb);
425346848Snp	MPASS(ti != NULL);
426346848Snp
427346848Snp	addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + toep->tid * TCB_SIZE;
428346848Snp	rc = read_via_memwin(sc, 2, addr, &buf[0], TCB_SIZE);
429346848Snp	if (rc != 0)
430346848Snp		return;
431346848Snp
432346848Snp	tcb = (u_char *)&buf[0];
433346848Snp	for (i = 0, j = TCB_SIZE - 16; i < j; i += 16, j -= 16) {
434346848Snp		for (k = 0; k < 16; k++) {
435346848Snp			tmp = tcb[i + k];
436346848Snp			tcb[i + k] = tcb[j + k];
437346848Snp			tcb[j + k] = tmp;
438346848Snp		}
439346848Snp	}
440346848Snp
441346848Snp	ti->tcpi_state = get_tcb_bits(tcb, 115, 112);
442346848Snp
443346848Snp	v = get_tcb_bits(tcb, 271, 256);
444346848Snp	ti->tcpi_rtt = tcp_ticks_to_us(sc, v);
445346848Snp
446346848Snp	v = get_tcb_bits(tcb, 287, 272);
447346848Snp	ti->tcpi_rttvar = tcp_ticks_to_us(sc, v);
448346848Snp
449346848Snp	ti->tcpi_snd_ssthresh = get_tcb_bits(tcb, 487, 460);
450346848Snp	ti->tcpi_snd_cwnd = get_tcb_bits(tcb, 459, 432);
451346848Snp	ti->tcpi_rcv_nxt = get_tcb_bits(tcb, 553, 522);
452346848Snp
453346848Snp	ti->tcpi_snd_nxt = get_tcb_bits(tcb, 319, 288) -
454346848Snp	    get_tcb_bits(tcb, 375, 348);
455346848Snp
456346848Snp	/* Receive window being advertised by us. */
457346848Snp	ti->tcpi_rcv_space = get_tcb_bits(tcb, 581, 554);
458346848Snp
459346848Snp	/* Send window ceiling. */
460346848Snp	v = get_tcb_bits(tcb, 159, 144) << get_tcb_bits(tcb, 131, 128);
461346848Snp	ti->tcpi_snd_wnd = min(v, ti->tcpi_snd_cwnd);
462346848Snp}
463346848Snp
464346848Snp/*
465237263Snp * The TOE driver will not receive any more CPLs for the tid associated with the
466237263Snp * toepcb; release the hold on the inpcb.
467237263Snp */
468237263Snpvoid
469237263Snpfinal_cpl_received(struct toepcb *toep)
470237263Snp{
471237263Snp	struct inpcb *inp = toep->inp;
472237263Snp
473237263Snp	KASSERT(inp != NULL, ("%s: inp is NULL", __func__));
474237263Snp	INP_WLOCK_ASSERT(inp);
475239514Snp	KASSERT(toep->flags & TPF_CPL_PENDING,
476237263Snp	    ("%s: CPL not pending already?", __func__));
477237263Snp
478237263Snp	CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)",
479237263Snp	    __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags);
480237263Snp
481299210Sjhb	if (toep->ulp_mode == ULP_MODE_TCPDDP)
482299210Sjhb		release_ddp_resources(toep);
483237263Snp	toep->inp = NULL;
484239514Snp	toep->flags &= ~TPF_CPL_PENDING;
485292736Snp	mbufq_drain(&toep->ulp_pdu_reclaimq);
486237263Snp
487239514Snp	if (!(toep->flags & TPF_ATTACHED))
488237263Snp		release_offload_resources(toep);
489237263Snp
490237263Snp	if (!in_pcbrele_wlocked(inp))
491237263Snp		INP_WUNLOCK(inp);
492237263Snp}
493237263Snp
494237263Snpvoid
495312116Snpinsert_tid(struct adapter *sc, int tid, void *ctx, int ntids)
496237263Snp{
497237263Snp	struct tid_info *t = &sc->tids;
498237263Snp
499346874Snp	MPASS(tid >= t->tid_base);
500346874Snp	MPASS(tid - t->tid_base < t->ntids);
501346874Snp
502346874Snp	t->tid_tab[tid - t->tid_base] = ctx;
503312116Snp	atomic_add_int(&t->tids_in_use, ntids);
504237263Snp}
505237263Snp
506237263Snpvoid *
507237263Snplookup_tid(struct adapter *sc, int tid)
508237263Snp{
509237263Snp	struct tid_info *t = &sc->tids;
510237263Snp
511346874Snp	return (t->tid_tab[tid - t->tid_base]);
512237263Snp}
513237263Snp
514237263Snpvoid
515237263Snpupdate_tid(struct adapter *sc, int tid, void *ctx)
516237263Snp{
517237263Snp	struct tid_info *t = &sc->tids;
518237263Snp
519346874Snp	t->tid_tab[tid - t->tid_base] = ctx;
520237263Snp}
521237263Snp
522237263Snpvoid
523312116Snpremove_tid(struct adapter *sc, int tid, int ntids)
524237263Snp{
525237263Snp	struct tid_info *t = &sc->tids;
526237263Snp
527346874Snp	t->tid_tab[tid - t->tid_base] = NULL;
528312116Snp	atomic_subtract_int(&t->tids_in_use, ntids);
529237263Snp}
530237263Snp
531237263Snp/*
532346805Snp * What mtu_idx to use, given a 4-tuple.  Note that both s->mss and tcp_mssopt
533346805Snp * have the MSS that we should advertise in our SYN.  Advertised MSS doesn't
534346805Snp * account for any TCP options so the effective MSS (only payload, no headers or
535355242Snp * options) could be different.
536237263Snp */
537237263Snpint
538346805Snpfind_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc,
539346805Snp    struct offload_settings *s)
540237263Snp{
541237263Snp	unsigned short *mtus = &sc->params.mtus[0];
542346805Snp	int i, mss, mtu;
543237263Snp
544346805Snp	MPASS(inc != NULL);
545237263Snp
546346805Snp	mss = s->mss > 0 ? s->mss : tcp_mssopt(inc);
547245441Snp	if (inc->inc_flags & INC_ISIPV6)
548346805Snp		mtu = mss + sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
549245441Snp	else
550346805Snp		mtu = mss + sizeof(struct ip) + sizeof(struct tcphdr);
551237263Snp
552346805Snp	for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mtu; i++)
553245441Snp		continue;
554245441Snp
555237263Snp	return (i);
556237263Snp}
557237263Snp
558237263Snp/*
559237263Snp * Determine the receive window size for a socket.
560237263Snp */
561237263Snpu_long
562237263Snpselect_rcv_wnd(struct socket *so)
563237263Snp{
564237263Snp	unsigned long wnd;
565237263Snp
566237263Snp	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
567237263Snp
568237263Snp	wnd = sbspace(&so->so_rcv);
569237263Snp	if (wnd < MIN_RCV_WND)
570237263Snp		wnd = MIN_RCV_WND;
571237263Snp
572237263Snp	return min(wnd, MAX_RCV_WND);
573237263Snp}
574237263Snp
575237263Snpint
576237263Snpselect_rcv_wscale(void)
577237263Snp{
578237263Snp	int wscale = 0;
579237263Snp	unsigned long space = sb_max;
580237263Snp
581237263Snp	if (space > MAX_RCV_WND)
582237263Snp		space = MAX_RCV_WND;
583237263Snp
584237263Snp	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space)
585237263Snp		wscale++;
586237263Snp
587237263Snp	return (wscale);
588237263Snp}
589237263Snp
590237263Snp/*
591237263Snp * socket so could be a listening socket too.
592237263Snp */
593237263Snpuint64_t
594291665Sjhbcalc_opt0(struct socket *so, struct vi_info *vi, struct l2t_entry *e,
595346805Snp    int mtu_idx, int rscale, int rx_credits, int ulp_mode,
596346805Snp    struct offload_settings *s)
597237263Snp{
598346805Snp	int keepalive;
599237263Snp	uint64_t opt0;
600237263Snp
601346805Snp	MPASS(so != NULL);
602346805Snp	MPASS(vi != NULL);
603237263Snp	KASSERT(rx_credits <= M_RCV_BUFSIZ,
604237263Snp	    ("%s: rcv_bufsiz too high", __func__));
605237263Snp
606237263Snp	opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) |
607346805Snp	    V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits) |
608346805Snp	    V_L2T_IDX(e->idx) | V_SMAC_SEL(vi->smt_idx) |
609346805Snp	    V_TX_CHAN(vi->pi->tx_chan);
610237263Snp
611346805Snp	keepalive = tcp_always_keepalive || so_options_get(so) & SO_KEEPALIVE;
612346805Snp	opt0 |= V_KEEP_ALIVE(keepalive != 0);
613346805Snp
614346805Snp	if (s->nagle < 0) {
615237263Snp		struct inpcb *inp = sotoinpcb(so);
616237263Snp		struct tcpcb *tp = intotcpcb(inp);
617237263Snp
618237263Snp		opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0);
619346805Snp	} else
620346805Snp		opt0 |= V_NAGLE(s->nagle != 0);
621237263Snp
622237263Snp	return htobe64(opt0);
623237263Snp}
624237263Snp
625248925Snpuint64_t
626291665Sjhbselect_ntuple(struct vi_info *vi, struct l2t_entry *e)
627237263Snp{
628291665Sjhb	struct adapter *sc = vi->pi->adapter;
629252705Snp	struct tp_params *tp = &sc->params.tp;
630252705Snp	uint64_t ntuple = 0;
631237263Snp
632252705Snp	/*
633252705Snp	 * Initialize each of the fields which we care about which are present
634252705Snp	 * in the Compressed Filter Tuple.
635252705Snp	 */
636252705Snp	if (tp->vlan_shift >= 0 && e->vlan != CPL_L2T_VLAN_NONE)
637252705Snp		ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift;
638237263Snp
639252705Snp	if (tp->port_shift >= 0)
640252705Snp		ntuple |= (uint64_t)e->lport << tp->port_shift;
641252705Snp
642252705Snp	if (tp->protocol_shift >= 0)
643252705Snp		ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift;
644252705Snp
645346855Snp	if (tp->vnic_shift >= 0 && tp->ingress_config & F_VNIC) {
646346967Snp		ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vi->vin) |
647346967Snp		    V_FT_VNID_ID_PF(sc->pf) | V_FT_VNID_ID_VLD(vi->vfvld)) <<
648346967Snp		    tp->vnic_shift;
649252705Snp	}
650252705Snp
651252705Snp	if (is_t4(sc))
652252705Snp		return (htobe32((uint32_t)ntuple));
653248925Snp	else
654248925Snp		return (htobe64(V_FILTER_TUPLE(ntuple)));
655237263Snp}
656237263Snp
657345664Sjhbstatic int
658345664Sjhbis_tls_sock(struct socket *so, struct adapter *sc)
659345664Sjhb{
660345664Sjhb	struct inpcb *inp = sotoinpcb(so);
661345664Sjhb	int i, rc;
662345664Sjhb
663345664Sjhb	/* XXX: Eventually add a SO_WANT_TLS socket option perhaps? */
664345664Sjhb	rc = 0;
665345664Sjhb	ADAPTER_LOCK(sc);
666345664Sjhb	for (i = 0; i < sc->tt.num_tls_rx_ports; i++) {
667345664Sjhb		if (inp->inp_lport == htons(sc->tt.tls_rx_ports[i]) ||
668345664Sjhb		    inp->inp_fport == htons(sc->tt.tls_rx_ports[i])) {
669345664Sjhb			rc = 1;
670345664Sjhb			break;
671345664Sjhb		}
672345664Sjhb	}
673345664Sjhb	ADAPTER_UNLOCK(sc);
674345664Sjhb	return (rc);
675345664Sjhb}
676345664Sjhb
677345664Sjhbint
678346805Snpselect_ulp_mode(struct socket *so, struct adapter *sc,
679346805Snp    struct offload_settings *s)
680345664Sjhb{
681345664Sjhb
682346805Snp	if (can_tls_offload(sc) &&
683346805Snp	    (s->tls > 0 || (s->tls < 0 && is_tls_sock(so, sc))))
684345664Sjhb		return (ULP_MODE_TLS);
685346805Snp	else if (s->ddp > 0 ||
686346805Snp	    (s->ddp < 0 && sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0))
687345664Sjhb		return (ULP_MODE_TCPDDP);
688345664Sjhb	else
689345664Sjhb		return (ULP_MODE_NONE);
690345664Sjhb}
691345664Sjhb
692245441Snpvoid
693345664Sjhbset_ulp_mode(struct toepcb *toep, int ulp_mode)
694245441Snp{
695245441Snp
696345664Sjhb	CTR4(KTR_CXGBE, "%s: toep %p (tid %d) ulp_mode %d",
697345664Sjhb	    __func__, toep, toep->tid, ulp_mode);
698345664Sjhb	toep->ulp_mode = ulp_mode;
699345664Sjhb	tls_init_toep(toep);
700345664Sjhb	if (toep->ulp_mode == ULP_MODE_TCPDDP)
701345664Sjhb		ddp_init_toep(toep);
702245441Snp}
703245441Snp
704245935Snpint
705245935Snpnegative_advice(int status)
706245935Snp{
707245935Snp
708245935Snp	return (status == CPL_ERR_RTX_NEG_ADVICE ||
709245935Snp	    status == CPL_ERR_PERSIST_NEG_ADVICE ||
710245935Snp	    status == CPL_ERR_KEEPALV_NEG_ADVICE);
711245935Snp}
712245935Snp
713237263Snpstatic int
714346849Snpalloc_tid_tab(struct tid_info *t, int flags)
715237263Snp{
716237263Snp
717346849Snp	MPASS(t->ntids > 0);
718346849Snp	MPASS(t->tid_tab == NULL);
719237263Snp
720346849Snp	t->tid_tab = malloc(t->ntids * sizeof(*t->tid_tab), M_CXGBE,
721346849Snp	    M_ZERO | flags);
722237263Snp	if (t->tid_tab == NULL)
723237263Snp		return (ENOMEM);
724346849Snp	atomic_store_rel_int(&t->tids_in_use, 0);
725237263Snp
726346849Snp	return (0);
727346849Snp}
728237263Snp
729346849Snpstatic void
730346849Snpfree_tid_tab(struct tid_info *t)
731346849Snp{
732346849Snp
733346849Snp	KASSERT(t->tids_in_use == 0,
734346849Snp	    ("%s: %d tids still in use.", __func__, t->tids_in_use));
735346849Snp
736346849Snp	free(t->tid_tab, M_CXGBE);
737346849Snp	t->tid_tab = NULL;
738346849Snp}
739346849Snp
740346849Snpstatic int
741346849Snpalloc_stid_tab(struct tid_info *t, int flags)
742346849Snp{
743346849Snp
744346849Snp	MPASS(t->nstids > 0);
745346849Snp	MPASS(t->stid_tab == NULL);
746346849Snp
747346849Snp	t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE,
748346849Snp	    M_ZERO | flags);
749346849Snp	if (t->stid_tab == NULL)
750346849Snp		return (ENOMEM);
751237263Snp	mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
752237263Snp	t->stids_in_use = 0;
753245276Snp	TAILQ_INIT(&t->stids);
754245276Snp	t->nstids_free_head = t->nstids;
755237263Snp
756237263Snp	return (0);
757237263Snp}
758237263Snp
759237263Snpstatic void
760346849Snpfree_stid_tab(struct tid_info *t)
761237263Snp{
762346849Snp
763237263Snp	KASSERT(t->stids_in_use == 0,
764237263Snp	    ("%s: %d tids still in use.", __func__, t->stids_in_use));
765237263Snp
766237263Snp	if (mtx_initialized(&t->stid_lock))
767237263Snp		mtx_destroy(&t->stid_lock);
768346849Snp	free(t->stid_tab, M_CXGBE);
769346849Snp	t->stid_tab = NULL;
770237263Snp}
771237263Snp
772346849Snpstatic void
773346849Snpfree_tid_tabs(struct tid_info *t)
774346849Snp{
775346849Snp
776346849Snp	free_tid_tab(t);
777346849Snp	free_atid_tab(t);
778346849Snp	free_stid_tab(t);
779346849Snp}
780346849Snp
781245448Snpstatic int
782346849Snpalloc_tid_tabs(struct tid_info *t)
783346849Snp{
784346849Snp	int rc;
785346849Snp
786346849Snp	rc = alloc_tid_tab(t, M_NOWAIT);
787346849Snp	if (rc != 0)
788346849Snp		goto failed;
789346849Snp
790346849Snp	rc = alloc_atid_tab(t, M_NOWAIT);
791346849Snp	if (rc != 0)
792346849Snp		goto failed;
793346849Snp
794346849Snp	rc = alloc_stid_tab(t, M_NOWAIT);
795346849Snp	if (rc != 0)
796346849Snp		goto failed;
797346849Snp
798346849Snp	return (0);
799346849Snpfailed:
800346849Snp	free_tid_tabs(t);
801346849Snp	return (rc);
802346849Snp}
803346849Snp
804237263Snpstatic void
805237263Snpfree_tom_data(struct adapter *sc, struct tom_data *td)
806237263Snp{
807245448Snp
808245448Snp	ASSERT_SYNCHRONIZED_OP(sc);
809245448Snp
810237263Snp	KASSERT(TAILQ_EMPTY(&td->toep_list),
811237263Snp	    ("%s: TOE PCB list is not empty.", __func__));
812237263Snp	KASSERT(td->lctx_count == 0,
813237263Snp	    ("%s: lctx hash table is not empty.", __func__));
814237263Snp
815309555Sjhb	t4_free_ppod_region(&td->pr);
816237263Snp
817237263Snp	if (td->listen_mask != 0)
818237263Snp		hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask);
819237263Snp
820272719Snp	if (mtx_initialized(&td->unsent_wr_lock))
821272719Snp		mtx_destroy(&td->unsent_wr_lock);
822237263Snp	if (mtx_initialized(&td->lctx_hash_lock))
823237263Snp		mtx_destroy(&td->lctx_hash_lock);
824237263Snp	if (mtx_initialized(&td->toep_list_lock))
825237263Snp		mtx_destroy(&td->toep_list_lock);
826237263Snp
827237263Snp	free_tid_tabs(&sc->tids);
828237263Snp	free(td, M_CXGBE);
829237263Snp}
830237263Snp
831346805Snpstatic char *
832346805Snpprepare_pkt(int open_type, uint16_t vtag, struct inpcb *inp, int *pktlen,
833346805Snp    int *buflen)
834346805Snp{
835346805Snp	char *pkt;
836346805Snp	struct tcphdr *th;
837346805Snp	int ipv6, len;
838346805Snp	const int maxlen =
839346805Snp	    max(sizeof(struct ether_header), sizeof(struct ether_vlan_header)) +
840346805Snp	    max(sizeof(struct ip), sizeof(struct ip6_hdr)) +
841346805Snp	    sizeof(struct tcphdr);
842346805Snp
843346805Snp	MPASS(open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN);
844346805Snp
845346805Snp	pkt = malloc(maxlen, M_CXGBE, M_ZERO | M_NOWAIT);
846346805Snp	if (pkt == NULL)
847346805Snp		return (NULL);
848346805Snp
849346805Snp	ipv6 = inp->inp_vflag & INP_IPV6;
850346805Snp	len = 0;
851346805Snp
852346805Snp	if (vtag == 0xffff) {
853346805Snp		struct ether_header *eh = (void *)pkt;
854346805Snp
855346805Snp		if (ipv6)
856346805Snp			eh->ether_type = htons(ETHERTYPE_IPV6);
857346805Snp		else
858346805Snp			eh->ether_type = htons(ETHERTYPE_IP);
859346805Snp
860346805Snp		len += sizeof(*eh);
861346805Snp	} else {
862346805Snp		struct ether_vlan_header *evh = (void *)pkt;
863346805Snp
864346805Snp		evh->evl_encap_proto = htons(ETHERTYPE_VLAN);
865346805Snp		evh->evl_tag = htons(vtag);
866346805Snp		if (ipv6)
867346805Snp			evh->evl_proto = htons(ETHERTYPE_IPV6);
868346805Snp		else
869346805Snp			evh->evl_proto = htons(ETHERTYPE_IP);
870346805Snp
871346805Snp		len += sizeof(*evh);
872346805Snp	}
873346805Snp
874346805Snp	if (ipv6) {
875346805Snp		struct ip6_hdr *ip6 = (void *)&pkt[len];
876346805Snp
877346805Snp		ip6->ip6_vfc = IPV6_VERSION;
878346805Snp		ip6->ip6_plen = htons(sizeof(struct tcphdr));
879346805Snp		ip6->ip6_nxt = IPPROTO_TCP;
880346805Snp		if (open_type == OPEN_TYPE_ACTIVE) {
881346805Snp			ip6->ip6_src = inp->in6p_laddr;
882346805Snp			ip6->ip6_dst = inp->in6p_faddr;
883346805Snp		} else if (open_type == OPEN_TYPE_LISTEN) {
884346805Snp			ip6->ip6_src = inp->in6p_laddr;
885346805Snp			ip6->ip6_dst = ip6->ip6_src;
886346805Snp		}
887346805Snp
888346805Snp		len += sizeof(*ip6);
889346805Snp	} else {
890346805Snp		struct ip *ip = (void *)&pkt[len];
891346805Snp
892346805Snp		ip->ip_v = IPVERSION;
893346805Snp		ip->ip_hl = sizeof(*ip) >> 2;
894346805Snp		ip->ip_tos = inp->inp_ip_tos;
895346805Snp		ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr));
896346805Snp		ip->ip_ttl = inp->inp_ip_ttl;
897346805Snp		ip->ip_p = IPPROTO_TCP;
898346805Snp		if (open_type == OPEN_TYPE_ACTIVE) {
899346805Snp			ip->ip_src = inp->inp_laddr;
900346805Snp			ip->ip_dst = inp->inp_faddr;
901346805Snp		} else if (open_type == OPEN_TYPE_LISTEN) {
902346805Snp			ip->ip_src = inp->inp_laddr;
903346805Snp			ip->ip_dst = ip->ip_src;
904346805Snp		}
905346805Snp
906346805Snp		len += sizeof(*ip);
907346805Snp	}
908346805Snp
909346805Snp	th = (void *)&pkt[len];
910346805Snp	if (open_type == OPEN_TYPE_ACTIVE) {
911346805Snp		th->th_sport = inp->inp_lport;	/* network byte order already */
912346805Snp		th->th_dport = inp->inp_fport;	/* ditto */
913346805Snp	} else if (open_type == OPEN_TYPE_LISTEN) {
914346805Snp		th->th_sport = inp->inp_lport;	/* network byte order already */
915346805Snp		th->th_dport = th->th_sport;
916346805Snp	}
917346805Snp	len += sizeof(th);
918346805Snp
919346805Snp	*pktlen = *buflen = len;
920346805Snp	return (pkt);
921346805Snp}
922346805Snp
923346805Snpconst struct offload_settings *
924346805Snplookup_offload_policy(struct adapter *sc, int open_type, struct mbuf *m,
925346805Snp    uint16_t vtag, struct inpcb *inp)
926346805Snp{
927346805Snp	const struct t4_offload_policy *op;
928346805Snp	char *pkt;
929346805Snp	struct offload_rule *r;
930346805Snp	int i, matched, pktlen, buflen;
931346805Snp	static const struct offload_settings allow_offloading_settings = {
932346805Snp		.offload = 1,
933346805Snp		.rx_coalesce = -1,
934346805Snp		.cong_algo = -1,
935346805Snp		.sched_class = -1,
936346805Snp		.tstamp = -1,
937346805Snp		.sack = -1,
938346805Snp		.nagle = -1,
939346805Snp		.ecn = -1,
940346805Snp		.ddp = -1,
941346805Snp		.tls = -1,
942346805Snp		.txq = -1,
943346805Snp		.rxq = -1,
944346805Snp		.mss = -1,
945346805Snp	};
946346805Snp	static const struct offload_settings disallow_offloading_settings = {
947346805Snp		.offload = 0,
948346805Snp		/* rest is irrelevant when offload is off. */
949346805Snp	};
950346805Snp
951346805Snp	rw_assert(&sc->policy_lock, RA_LOCKED);
952346805Snp
953346805Snp	/*
954346805Snp	 * If there's no Connection Offloading Policy attached to the device
955346805Snp	 * then we need to return a default static policy.  If
956346805Snp	 * "cop_managed_offloading" is true, then we need to disallow
957346805Snp	 * offloading until a COP is attached to the device.  Otherwise we
958346805Snp	 * allow offloading ...
959346805Snp	 */
960346805Snp	op = sc->policy;
961346805Snp	if (op == NULL) {
962346805Snp		if (sc->tt.cop_managed_offloading)
963346805Snp			return (&disallow_offloading_settings);
964346805Snp		else
965346805Snp			return (&allow_offloading_settings);
966346805Snp	}
967346805Snp
968346805Snp	switch (open_type) {
969346805Snp	case OPEN_TYPE_ACTIVE:
970346805Snp	case OPEN_TYPE_LISTEN:
971346805Snp		pkt = prepare_pkt(open_type, vtag, inp, &pktlen, &buflen);
972346805Snp		break;
973346805Snp	case OPEN_TYPE_PASSIVE:
974346805Snp		MPASS(m != NULL);
975346805Snp		pkt = mtod(m, char *);
976346805Snp		MPASS(*pkt == CPL_PASS_ACCEPT_REQ);
977346805Snp		pkt += sizeof(struct cpl_pass_accept_req);
978346805Snp		pktlen = m->m_pkthdr.len - sizeof(struct cpl_pass_accept_req);
979346805Snp		buflen = m->m_len - sizeof(struct cpl_pass_accept_req);
980346805Snp		break;
981346805Snp	default:
982346805Snp		MPASS(0);
983346805Snp		return (&disallow_offloading_settings);
984346805Snp	}
985346805Snp
986346805Snp	if (pkt == NULL || pktlen == 0 || buflen == 0)
987346805Snp		return (&disallow_offloading_settings);
988346805Snp
989346882Snp	matched = 0;
990346805Snp	r = &op->rule[0];
991346805Snp	for (i = 0; i < op->nrules; i++, r++) {
992346805Snp		if (r->open_type != open_type &&
993346805Snp		    r->open_type != OPEN_TYPE_DONTCARE) {
994346805Snp			continue;
995346805Snp		}
996346805Snp		matched = bpf_filter(r->bpf_prog.bf_insns, pkt, pktlen, buflen);
997346805Snp		if (matched)
998346805Snp			break;
999346805Snp	}
1000346805Snp
1001346805Snp	if (open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN)
1002346805Snp		free(pkt, M_CXGBE);
1003346805Snp
1004346805Snp	return (matched ? &r->settings : &disallow_offloading_settings);
1005346805Snp}
1006346805Snp
1007272719Snpstatic void
1008272719Snpreclaim_wr_resources(void *arg, int count)
1009272719Snp{
1010272719Snp	struct tom_data *td = arg;
1011272719Snp	STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list);
1012272719Snp	struct cpl_act_open_req *cpl;
1013346970Snp	u_int opcode, atid, tid;
1014272719Snp	struct wrqe *wr;
1015346970Snp	struct adapter *sc = td_adapter(td);
1016272719Snp
1017272719Snp	mtx_lock(&td->unsent_wr_lock);
1018272719Snp	STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe);
1019272719Snp	mtx_unlock(&td->unsent_wr_lock);
1020272719Snp
1021272719Snp	while ((wr = STAILQ_FIRST(&twr_list)) != NULL) {
1022272719Snp		STAILQ_REMOVE_HEAD(&twr_list, link);
1023272719Snp
1024272719Snp		cpl = wrtod(wr);
1025272719Snp		opcode = GET_OPCODE(cpl);
1026272719Snp
1027272719Snp		switch (opcode) {
1028272719Snp		case CPL_ACT_OPEN_REQ:
1029272719Snp		case CPL_ACT_OPEN_REQ6:
1030272719Snp			atid = G_TID_TID(be32toh(OPCODE_TID(cpl)));
1031272719Snp			CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid);
1032272719Snp			act_open_failure_cleanup(sc, atid, EHOSTUNREACH);
1033272719Snp			free(wr, M_CXGBE);
1034272719Snp			break;
1035346970Snp		case CPL_PASS_ACCEPT_RPL:
1036346970Snp			tid = GET_TID(cpl);
1037346970Snp			CTR2(KTR_CXGBE, "%s: tid %u ", __func__, tid);
1038346970Snp			synack_failure_cleanup(sc, tid);
1039346970Snp			free(wr, M_CXGBE);
1040346970Snp			break;
1041272719Snp		default:
1042272719Snp			log(LOG_ERR, "%s: leaked work request %p, wr_len %d, "
1043272719Snp			    "opcode %x\n", __func__, wr, wr->wr_len, opcode);
1044272719Snp			/* WR not freed here; go look at it with a debugger.  */
1045272719Snp		}
1046272719Snp	}
1047272719Snp}
1048272719Snp
1049237263Snp/*
1050237263Snp * Ground control to Major TOM
1051237263Snp * Commencing countdown, engines on
1052237263Snp */
1053237263Snpstatic int
1054237263Snpt4_tom_activate(struct adapter *sc)
1055237263Snp{
1056237263Snp	struct tom_data *td;
1057237263Snp	struct toedev *tod;
1058291665Sjhb	struct vi_info *vi;
1059346852Snp	int i, rc, v;
1060237263Snp
1061245274Snp	ASSERT_SYNCHRONIZED_OP(sc);
1062237263Snp
1063237263Snp	/* per-adapter softc for TOM */
1064237263Snp	td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT);
1065237263Snp	if (td == NULL)
1066237263Snp		return (ENOMEM);
1067237263Snp
1068237263Snp	/* List of TOE PCBs and associated lock */
1069237263Snp	mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF);
1070237263Snp	TAILQ_INIT(&td->toep_list);
1071237263Snp
1072237263Snp	/* Listen context */
1073237263Snp	mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF);
1074237263Snp	td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE,
1075237263Snp	    &td->listen_mask, HASH_NOWAIT);
1076237263Snp
1077272719Snp	/* List of WRs for which L2 resolution failed */
1078272719Snp	mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF);
1079272719Snp	STAILQ_INIT(&td->unsent_wr_list);
1080272719Snp	TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td);
1081272719Snp
1082237263Snp	/* TID tables */
1083237263Snp	rc = alloc_tid_tabs(&sc->tids);
1084237263Snp	if (rc != 0)
1085237263Snp		goto done;
1086237263Snp
1087309555Sjhb	rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp,
1088309555Sjhb	    t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods");
1089309555Sjhb	if (rc != 0)
1090309555Sjhb		goto done;
1091309555Sjhb	t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK,
1092309555Sjhb	    V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask);
1093239344Snp
1094237263Snp	/* toedev ops */
1095237263Snp	tod = &td->tod;
1096237263Snp	init_toedev(tod);
1097237263Snp	tod->tod_softc = sc;
1098237263Snp	tod->tod_connect = t4_connect;
1099237263Snp	tod->tod_listen_start = t4_listen_start;
1100237263Snp	tod->tod_listen_stop = t4_listen_stop;
1101237263Snp	tod->tod_rcvd = t4_rcvd;
1102237263Snp	tod->tod_output = t4_tod_output;
1103237263Snp	tod->tod_send_rst = t4_send_rst;
1104237263Snp	tod->tod_send_fin = t4_send_fin;
1105237263Snp	tod->tod_pcb_detach = t4_pcb_detach;
1106237263Snp	tod->tod_l2_update = t4_l2_update;
1107237263Snp	tod->tod_syncache_added = t4_syncache_added;
1108237263Snp	tod->tod_syncache_removed = t4_syncache_removed;
1109237263Snp	tod->tod_syncache_respond = t4_syncache_respond;
1110237263Snp	tod->tod_offload_socket = t4_offload_socket;
1111252716Snp	tod->tod_ctloutput = t4_ctloutput;
1112346848Snp#if 0
1113346848Snp	tod->tod_tcp_info = t4_tcp_info;
1114346848Snp#else
1115346848Snp	(void)&t4_tcp_info;
1116346848Snp#endif
1117237263Snp
1118291665Sjhb	for_each_port(sc, i) {
1119291665Sjhb		for_each_vi(sc->port[i], v, vi) {
1120291665Sjhb			TOEDEV(vi->ifp) = &td->tod;
1121291665Sjhb		}
1122291665Sjhb	}
1123237263Snp
1124237263Snp	sc->tom_softc = td;
1125237263Snp	register_toedev(sc->tom_softc);
1126237263Snp
1127237263Snpdone:
1128237263Snp	if (rc != 0)
1129237263Snp		free_tom_data(sc, td);
1130237263Snp	return (rc);
1131237263Snp}
1132237263Snp
1133237263Snpstatic int
1134237263Snpt4_tom_deactivate(struct adapter *sc)
1135237263Snp{
1136237263Snp	int rc = 0;
1137237263Snp	struct tom_data *td = sc->tom_softc;
1138237263Snp
1139245274Snp	ASSERT_SYNCHRONIZED_OP(sc);
1140237263Snp
1141237263Snp	if (td == NULL)
1142237263Snp		return (0);	/* XXX. KASSERT? */
1143237263Snp
1144237263Snp	if (sc->offload_map != 0)
1145237263Snp		return (EBUSY);	/* at least one port has IFCAP_TOE enabled */
1146237263Snp
1147278374Snp	if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI))
1148278374Snp		return (EBUSY);	/* both iWARP and iSCSI rely on the TOE. */
1149278374Snp
1150237263Snp	mtx_lock(&td->toep_list_lock);
1151237263Snp	if (!TAILQ_EMPTY(&td->toep_list))
1152237263Snp		rc = EBUSY;
1153237263Snp	mtx_unlock(&td->toep_list_lock);
1154237263Snp
1155237263Snp	mtx_lock(&td->lctx_hash_lock);
1156237263Snp	if (td->lctx_count > 0)
1157237263Snp		rc = EBUSY;
1158237263Snp	mtx_unlock(&td->lctx_hash_lock);
1159237263Snp
1160272719Snp	taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources);
1161272719Snp	mtx_lock(&td->unsent_wr_lock);
1162272719Snp	if (!STAILQ_EMPTY(&td->unsent_wr_list))
1163272719Snp		rc = EBUSY;
1164272719Snp	mtx_unlock(&td->unsent_wr_lock);
1165272719Snp
1166237263Snp	if (rc == 0) {
1167237263Snp		unregister_toedev(sc->tom_softc);
1168237263Snp		free_tom_data(sc, td);
1169237263Snp		sc->tom_softc = NULL;
1170237263Snp	}
1171237263Snp
1172237263Snp	return (rc);
1173237263Snp}
1174237263Snp
1175237263Snpstatic int
1176306661Sjhbt4_aio_queue_tom(struct socket *so, struct kaiocb *job)
1177306661Sjhb{
1178306661Sjhb	struct tcpcb *tp = so_sototcpcb(so);
1179306661Sjhb	struct toepcb *toep = tp->t_toe;
1180306661Sjhb	int error;
1181306661Sjhb
1182306661Sjhb	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
1183306661Sjhb		error = t4_aio_queue_ddp(so, job);
1184306661Sjhb		if (error != EOPNOTSUPP)
1185306661Sjhb			return (error);
1186306661Sjhb	}
1187306661Sjhb
1188306661Sjhb	return (t4_aio_queue_aiotx(so, job));
1189306661Sjhb}
1190306661Sjhb
1191306661Sjhbstatic int
1192345664Sjhbt4_ctloutput_tom(struct socket *so, struct sockopt *sopt)
1193345664Sjhb{
1194345664Sjhb
1195345664Sjhb	if (sopt->sopt_level != IPPROTO_TCP)
1196345664Sjhb		return (tcp_ctloutput(so, sopt));
1197345664Sjhb
1198345664Sjhb	switch (sopt->sopt_name) {
1199345664Sjhb	case TCP_TLSOM_SET_TLS_CONTEXT:
1200345664Sjhb	case TCP_TLSOM_GET_TLS_TOM:
1201345664Sjhb	case TCP_TLSOM_CLR_TLS_TOM:
1202345664Sjhb	case TCP_TLSOM_CLR_QUIES:
1203345664Sjhb		return (t4_ctloutput_tls(so, sopt));
1204345664Sjhb	default:
1205345664Sjhb		return (tcp_ctloutput(so, sopt));
1206345664Sjhb	}
1207345664Sjhb}
1208345664Sjhb
1209345664Sjhbstatic int
1210237263Snpt4_tom_mod_load(void)
1211237263Snp{
1212245441Snp	struct protosw *tcp_protosw, *tcp6_protosw;
1213237263Snp
1214302339Snp	/* CPL handlers */
1215346852Snp	t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl2,
1216346852Snp	    CPL_COOKIE_TOM);
1217302339Snp	t4_init_connect_cpl_handlers();
1218302339Snp	t4_init_listen_cpl_handlers();
1219302339Snp	t4_init_cpl_io_handlers();
1220302339Snp
1221344856Sjhb	t4_ddp_mod_load();
1222345664Sjhb	t4_tls_mod_load();
1223299210Sjhb
1224239344Snp	tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM);
1225239344Snp	if (tcp_protosw == NULL)
1226239344Snp		return (ENOPROTOOPT);
1227306661Sjhb	bcopy(tcp_protosw, &toe_protosw, sizeof(toe_protosw));
1228306661Sjhb	bcopy(tcp_protosw->pr_usrreqs, &toe_usrreqs, sizeof(toe_usrreqs));
1229306661Sjhb	toe_usrreqs.pru_aio_queue = t4_aio_queue_tom;
1230345664Sjhb	toe_protosw.pr_ctloutput = t4_ctloutput_tom;
1231306661Sjhb	toe_protosw.pr_usrreqs = &toe_usrreqs;
1232239344Snp
1233245441Snp	tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM);
1234245441Snp	if (tcp6_protosw == NULL)
1235245441Snp		return (ENOPROTOOPT);
1236306661Sjhb	bcopy(tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw));
1237306661Sjhb	bcopy(tcp6_protosw->pr_usrreqs, &toe6_usrreqs, sizeof(toe6_usrreqs));
1238306661Sjhb	toe6_usrreqs.pru_aio_queue = t4_aio_queue_tom;
1239345664Sjhb	toe6_protosw.pr_ctloutput = t4_ctloutput_tom;
1240306661Sjhb	toe6_protosw.pr_usrreqs = &toe6_usrreqs;
1241245441Snp
1242344856Sjhb	return (t4_register_uld(&tom_uld_info));
1243237263Snp}
1244237263Snp
1245237263Snpstatic void
1246237263Snptom_uninit(struct adapter *sc, void *arg __unused)
1247237263Snp{
1248255006Snp	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun"))
1249245274Snp		return;
1250245274Snp
1251237263Snp	/* Try to free resources (works only if no port has IFCAP_TOE) */
1252278374Snp	if (uld_active(sc, ULD_TOM))
1253237263Snp		t4_deactivate_uld(sc, ULD_TOM);
1254245274Snp
1255255006Snp	end_synchronized_op(sc, 0);
1256237263Snp}
1257237263Snp
1258237263Snpstatic int
1259237263Snpt4_tom_mod_unload(void)
1260237263Snp{
1261237263Snp	t4_iterate(tom_uninit, NULL);
1262237263Snp
1263237263Snp	if (t4_unregister_uld(&tom_uld_info) == EBUSY)
1264237263Snp		return (EBUSY);
1265237263Snp
1266345664Sjhb	t4_tls_mod_unload();
1267299210Sjhb	t4_ddp_mod_unload();
1268299210Sjhb
1269313178Sjhb	t4_uninit_connect_cpl_handlers();
1270313178Sjhb	t4_uninit_listen_cpl_handlers();
1271313178Sjhb	t4_uninit_cpl_io_handlers();
1272346852Snp	t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, NULL, CPL_COOKIE_TOM);
1273313178Sjhb
1274237263Snp	return (0);
1275237263Snp}
1276237263Snp#endif	/* TCP_OFFLOAD */
1277237263Snp
1278237263Snpstatic int
1279237263Snpt4_tom_modevent(module_t mod, int cmd, void *arg)
1280237263Snp{
1281237263Snp	int rc = 0;
1282237263Snp
1283237263Snp#ifdef TCP_OFFLOAD
1284237263Snp	switch (cmd) {
1285237263Snp	case MOD_LOAD:
1286237263Snp		rc = t4_tom_mod_load();
1287237263Snp		break;
1288237263Snp
1289237263Snp	case MOD_UNLOAD:
1290237263Snp		rc = t4_tom_mod_unload();
1291237263Snp		break;
1292237263Snp
1293237263Snp	default:
1294237263Snp		rc = EINVAL;
1295237263Snp	}
1296237263Snp#else
1297237263Snp	printf("t4_tom: compiled without TCP_OFFLOAD support.\n");
1298237263Snp	rc = EOPNOTSUPP;
1299237263Snp#endif
1300237263Snp	return (rc);
1301237263Snp}
1302237263Snp
1303237263Snpstatic moduledata_t t4_tom_moddata= {
1304237263Snp	"t4_tom",
1305237263Snp	t4_tom_modevent,
1306241394Skevlo	0
1307237263Snp};
1308237263Snp
1309237263SnpMODULE_VERSION(t4_tom, 1);
1310237263SnpMODULE_DEPEND(t4_tom, toecore, 1, 1, 1);
1311237263SnpMODULE_DEPEND(t4_tom, t4nex, 1, 1, 1);
1312237263SnpDECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY);
1313