t4_tom.c revision 331645
1237263Snp/*-
2330897Seadler * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3330897Seadler *
4237263Snp * Copyright (c) 2012 Chelsio Communications, Inc.
5237263Snp * All rights reserved.
6237263Snp * Written by: Navdeep Parhar <np@FreeBSD.org>
7237263Snp *
8237263Snp * Redistribution and use in source and binary forms, with or without
9237263Snp * modification, are permitted provided that the following conditions
10237263Snp * are met:
11237263Snp * 1. Redistributions of source code must retain the above copyright
12237263Snp *    notice, this list of conditions and the following disclaimer.
13237263Snp * 2. Redistributions in binary form must reproduce the above copyright
14237263Snp *    notice, this list of conditions and the following disclaimer in the
15237263Snp *    documentation and/or other materials provided with the distribution.
16237263Snp *
17237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20237263Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27237263Snp * SUCH DAMAGE.
28237263Snp */
29237263Snp
30237263Snp#include <sys/cdefs.h>
31237263Snp__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/tom/t4_tom.c 331645 2018-03-27 20:14:22Z jhb $");
32237263Snp
33237263Snp#include "opt_inet.h"
34245441Snp#include "opt_inet6.h"
35237263Snp
36237263Snp#include <sys/param.h>
37237263Snp#include <sys/types.h>
38237263Snp#include <sys/systm.h>
39237263Snp#include <sys/kernel.h>
40237263Snp#include <sys/ktr.h>
41286001Sae#include <sys/lock.h>
42292736Snp#include <sys/limits.h>
43237263Snp#include <sys/module.h>
44237263Snp#include <sys/protosw.h>
45237263Snp#include <sys/domain.h>
46299210Sjhb#include <sys/refcount.h>
47286001Sae#include <sys/rmlock.h>
48237263Snp#include <sys/socket.h>
49237263Snp#include <sys/socketvar.h>
50249627Snp#include <sys/taskqueue.h>
51245448Snp#include <net/if.h>
52257241Sglebius#include <net/if_var.h>
53237263Snp#include <netinet/in.h>
54237263Snp#include <netinet/in_pcb.h>
55245448Snp#include <netinet/in_var.h>
56237263Snp#include <netinet/ip.h>
57245441Snp#include <netinet/ip6.h>
58245448Snp#include <netinet6/scope6_var.h>
59237263Snp#define TCPSTATES
60237263Snp#include <netinet/tcp_fsm.h>
61330303Sjhb#include <netinet/tcp_timer.h>
62294889Sglebius#include <netinet/tcp_var.h>
63237263Snp#include <netinet/toecore.h>
64237263Snp
65237263Snp#ifdef TCP_OFFLOAD
66237263Snp#include "common/common.h"
67237263Snp#include "common/t4_msg.h"
68237263Snp#include "common/t4_regs.h"
69252705Snp#include "common/t4_regs_values.h"
70252716Snp#include "common/t4_tcb.h"
71237263Snp#include "tom/t4_tom_l2t.h"
72237263Snp#include "tom/t4_tom.h"
73237263Snp
74306661Sjhbstatic struct protosw toe_protosw;
75306661Sjhbstatic struct pr_usrreqs toe_usrreqs;
76239344Snp
77306661Sjhbstatic struct protosw toe6_protosw;
78306661Sjhbstatic struct pr_usrreqs toe6_usrreqs;
79245441Snp
80237263Snp/* Module ops */
81237263Snpstatic int t4_tom_mod_load(void);
82237263Snpstatic int t4_tom_mod_unload(void);
83237263Snpstatic int t4_tom_modevent(module_t, int, void *);
84237263Snp
85237263Snp/* ULD ops and helpers */
86237263Snpstatic int t4_tom_activate(struct adapter *);
87237263Snpstatic int t4_tom_deactivate(struct adapter *);
88237263Snp
89237263Snpstatic struct uld_info tom_uld_info = {
90237263Snp	.uld_id = ULD_TOM,
91237263Snp	.activate = t4_tom_activate,
92237263Snp	.deactivate = t4_tom_deactivate,
93237263Snp};
94237263Snp
95237263Snpstatic void queue_tid_release(struct adapter *, int);
96237263Snpstatic void release_offload_resources(struct toepcb *);
97237263Snpstatic int alloc_tid_tabs(struct tid_info *);
98237263Snpstatic void free_tid_tabs(struct tid_info *);
99245448Snpstatic int add_lip(struct adapter *, struct in6_addr *);
100245448Snpstatic int delete_lip(struct adapter *, struct in6_addr *);
101245448Snpstatic struct clip_entry *search_lip(struct tom_data *, struct in6_addr *);
102245448Snpstatic void init_clip_table(struct adapter *, struct tom_data *);
103249627Snpstatic void update_clip(struct adapter *, void *);
104249627Snpstatic void t4_clip_task(void *, int);
105249627Snpstatic void update_clip_table(struct adapter *, struct tom_data *);
106245448Snpstatic void destroy_clip_table(struct adapter *, struct tom_data *);
107237263Snpstatic void free_tom_data(struct adapter *, struct tom_data *);
108272719Snpstatic void reclaim_wr_resources(void *, int);
109237263Snp
110249627Snpstatic int in6_ifaddr_gen;
111249627Snpstatic eventhandler_tag ifaddr_evhandler;
112249627Snpstatic struct timeout_task clip_task;
113249627Snp
114237263Snpstruct toepcb *
115291665Sjhballoc_toepcb(struct vi_info *vi, int txqid, int rxqid, int flags)
116237263Snp{
117291665Sjhb	struct port_info *pi = vi->pi;
118237263Snp	struct adapter *sc = pi->adapter;
119237263Snp	struct toepcb *toep;
120237263Snp	int tx_credits, txsd_total, len;
121237263Snp
122237263Snp	/*
123237263Snp	 * The firmware counts tx work request credits in units of 16 bytes
124237263Snp	 * each.  Reserve room for an ABORT_REQ so the driver never has to worry
125237263Snp	 * about tx credits if it wants to abort a connection.
126237263Snp	 */
127237263Snp	tx_credits = sc->params.ofldq_wr_cred;
128237263Snp	tx_credits -= howmany(sizeof(struct cpl_abort_req), 16);
129237263Snp
130237263Snp	/*
131237263Snp	 * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte
132237263Snp	 * immediate payload, and firmware counts tx work request credits in
133237263Snp	 * units of 16 byte.  Calculate the maximum work requests possible.
134237263Snp	 */
135237263Snp	txsd_total = tx_credits /
136298482Spfg	    howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16);
137237263Snp
138237263Snp	if (txqid < 0)
139291665Sjhb		txqid = (arc4random() % vi->nofldtxq) + vi->first_ofld_txq;
140291665Sjhb	KASSERT(txqid >= vi->first_ofld_txq &&
141291665Sjhb	    txqid < vi->first_ofld_txq + vi->nofldtxq,
142291665Sjhb	    ("%s: txqid %d for vi %p (first %d, n %d)", __func__, txqid, vi,
143291665Sjhb		vi->first_ofld_txq, vi->nofldtxq));
144237263Snp
145237263Snp	if (rxqid < 0)
146291665Sjhb		rxqid = (arc4random() % vi->nofldrxq) + vi->first_ofld_rxq;
147291665Sjhb	KASSERT(rxqid >= vi->first_ofld_rxq &&
148291665Sjhb	    rxqid < vi->first_ofld_rxq + vi->nofldrxq,
149291665Sjhb	    ("%s: rxqid %d for vi %p (first %d, n %d)", __func__, rxqid, vi,
150291665Sjhb		vi->first_ofld_rxq, vi->nofldrxq));
151237263Snp
152237263Snp	len = offsetof(struct toepcb, txsd) +
153237263Snp	    txsd_total * sizeof(struct ofld_tx_sdesc);
154237263Snp
155237263Snp	toep = malloc(len, M_CXGBE, M_ZERO | flags);
156237263Snp	if (toep == NULL)
157237263Snp		return (NULL);
158237263Snp
159299210Sjhb	refcount_init(&toep->refcount, 1);
160237263Snp	toep->td = sc->tom_softc;
161291665Sjhb	toep->vi = vi;
162255411Snp	toep->tx_total = tx_credits;
163237263Snp	toep->tx_credits = tx_credits;
164237263Snp	toep->ofld_txq = &sc->sge.ofld_txq[txqid];
165237263Snp	toep->ofld_rxq = &sc->sge.ofld_rxq[rxqid];
166237263Snp	toep->ctrlq = &sc->sge.ctrlq[pi->port_id];
167292736Snp	mbufq_init(&toep->ulp_pduq, INT_MAX);
168292736Snp	mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX);
169237263Snp	toep->txsd_total = txsd_total;
170237263Snp	toep->txsd_avail = txsd_total;
171237263Snp	toep->txsd_pidx = 0;
172237263Snp	toep->txsd_cidx = 0;
173306661Sjhb	aiotx_init_toep(toep);
174237263Snp
175237263Snp	return (toep);
176237263Snp}
177237263Snp
178299210Sjhbstruct toepcb *
179299210Sjhbhold_toepcb(struct toepcb *toep)
180299210Sjhb{
181299210Sjhb
182299210Sjhb	refcount_acquire(&toep->refcount);
183299210Sjhb	return (toep);
184299210Sjhb}
185299210Sjhb
186237263Snpvoid
187237263Snpfree_toepcb(struct toepcb *toep)
188237263Snp{
189237263Snp
190299210Sjhb	if (refcount_release(&toep->refcount) == 0)
191299210Sjhb		return;
192299210Sjhb
193239514Snp	KASSERT(!(toep->flags & TPF_ATTACHED),
194237263Snp	    ("%s: attached to an inpcb", __func__));
195239514Snp	KASSERT(!(toep->flags & TPF_CPL_PENDING),
196237263Snp	    ("%s: CPL pending", __func__));
197237263Snp
198331645Sjhb	if (toep->ulp_mode == ULP_MODE_TCPDDP)
199331645Sjhb		ddp_uninit_toep(toep);
200237263Snp	free(toep, M_CXGBE);
201237263Snp}
202237263Snp
203237263Snp/*
204237263Snp * Set up the socket for TCP offload.
205237263Snp */
206237263Snpvoid
207237263Snpoffload_socket(struct socket *so, struct toepcb *toep)
208237263Snp{
209237263Snp	struct tom_data *td = toep->td;
210237263Snp	struct inpcb *inp = sotoinpcb(so);
211237263Snp	struct tcpcb *tp = intotcpcb(inp);
212237263Snp	struct sockbuf *sb;
213237263Snp
214237263Snp	INP_WLOCK_ASSERT(inp);
215237263Snp
216237263Snp	/* Update socket */
217237263Snp	sb = &so->so_snd;
218237263Snp	SOCKBUF_LOCK(sb);
219237263Snp	sb->sb_flags |= SB_NOCOALESCE;
220237263Snp	SOCKBUF_UNLOCK(sb);
221237263Snp	sb = &so->so_rcv;
222237263Snp	SOCKBUF_LOCK(sb);
223237263Snp	sb->sb_flags |= SB_NOCOALESCE;
224306661Sjhb	if (inp->inp_vflag & INP_IPV6)
225306661Sjhb		so->so_proto = &toe6_protosw;
226306661Sjhb	else
227306661Sjhb		so->so_proto = &toe_protosw;
228237263Snp	SOCKBUF_UNLOCK(sb);
229237263Snp
230237263Snp	/* Update TCP PCB */
231237263Snp	tp->tod = &td->tod;
232237263Snp	tp->t_toe = toep;
233237263Snp	tp->t_flags |= TF_TOE;
234237263Snp
235237263Snp	/* Install an extra hold on inp */
236237263Snp	toep->inp = inp;
237239514Snp	toep->flags |= TPF_ATTACHED;
238237263Snp	in_pcbref(inp);
239237263Snp
240237263Snp	/* Add the TOE PCB to the active list */
241237263Snp	mtx_lock(&td->toep_list_lock);
242237263Snp	TAILQ_INSERT_HEAD(&td->toep_list, toep, link);
243237263Snp	mtx_unlock(&td->toep_list_lock);
244237263Snp}
245237263Snp
246237263Snp/* This is _not_ the normal way to "unoffload" a socket. */
247237263Snpvoid
248237263Snpundo_offload_socket(struct socket *so)
249237263Snp{
250237263Snp	struct inpcb *inp = sotoinpcb(so);
251237263Snp	struct tcpcb *tp = intotcpcb(inp);
252237263Snp	struct toepcb *toep = tp->t_toe;
253237263Snp	struct tom_data *td = toep->td;
254237263Snp	struct sockbuf *sb;
255237263Snp
256237263Snp	INP_WLOCK_ASSERT(inp);
257237263Snp
258237263Snp	sb = &so->so_snd;
259237263Snp	SOCKBUF_LOCK(sb);
260237263Snp	sb->sb_flags &= ~SB_NOCOALESCE;
261237263Snp	SOCKBUF_UNLOCK(sb);
262237263Snp	sb = &so->so_rcv;
263237263Snp	SOCKBUF_LOCK(sb);
264237263Snp	sb->sb_flags &= ~SB_NOCOALESCE;
265237263Snp	SOCKBUF_UNLOCK(sb);
266237263Snp
267237263Snp	tp->tod = NULL;
268237263Snp	tp->t_toe = NULL;
269237263Snp	tp->t_flags &= ~TF_TOE;
270237263Snp
271237263Snp	toep->inp = NULL;
272239514Snp	toep->flags &= ~TPF_ATTACHED;
273237263Snp	if (in_pcbrele_wlocked(inp))
274237263Snp		panic("%s: inp freed.", __func__);
275237263Snp
276237263Snp	mtx_lock(&td->toep_list_lock);
277237263Snp	TAILQ_REMOVE(&td->toep_list, toep, link);
278237263Snp	mtx_unlock(&td->toep_list_lock);
279237263Snp}
280237263Snp
281237263Snpstatic void
282237263Snprelease_offload_resources(struct toepcb *toep)
283237263Snp{
284237263Snp	struct tom_data *td = toep->td;
285237263Snp	struct adapter *sc = td_adapter(td);
286237263Snp	int tid = toep->tid;
287237263Snp
288239514Snp	KASSERT(!(toep->flags & TPF_CPL_PENDING),
289237263Snp	    ("%s: %p has CPL pending.", __func__, toep));
290239514Snp	KASSERT(!(toep->flags & TPF_ATTACHED),
291237263Snp	    ("%s: %p is still attached.", __func__, toep));
292237263Snp
293245448Snp	CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)",
294245448Snp	    __func__, toep, tid, toep->l2te, toep->ce);
295237263Snp
296292736Snp	/*
297292736Snp	 * These queues should have been emptied at approximately the same time
298292736Snp	 * that a normal connection's socket's so_snd would have been purged or
299292736Snp	 * drained.  Do _not_ clean up here.
300292736Snp	 */
301292736Snp	MPASS(mbufq_len(&toep->ulp_pduq) == 0);
302292736Snp	MPASS(mbufq_len(&toep->ulp_pdu_reclaimq) == 0);
303299210Sjhb#ifdef INVARIANTS
304331645Sjhb	if (toep->ulp_mode == ULP_MODE_TCPDDP)
305331645Sjhb		ddp_assert_empty(toep);
306299210Sjhb#endif
307292736Snp
308237263Snp	if (toep->l2te)
309237263Snp		t4_l2t_release(toep->l2te);
310237263Snp
311237263Snp	if (tid >= 0) {
312312116Snp		remove_tid(sc, tid, toep->ce ? 2 : 1);
313237263Snp		release_tid(sc, tid, toep->ctrlq);
314237263Snp	}
315237263Snp
316245448Snp	if (toep->ce)
317245448Snp		release_lip(td, toep->ce);
318245448Snp
319237263Snp	mtx_lock(&td->toep_list_lock);
320237263Snp	TAILQ_REMOVE(&td->toep_list, toep, link);
321237263Snp	mtx_unlock(&td->toep_list_lock);
322237263Snp
323237263Snp	free_toepcb(toep);
324237263Snp}
325237263Snp
326237263Snp/*
327237263Snp * The kernel is done with the TCP PCB and this is our opportunity to unhook the
328237263Snp * toepcb hanging off of it.  If the TOE driver is also done with the toepcb (no
329237263Snp * pending CPL) then it is time to release all resources tied to the toepcb.
330237263Snp *
331237263Snp * Also gets called when an offloaded active open fails and the TOM wants the
332237263Snp * kernel to take the TCP PCB back.
333237263Snp */
334237263Snpstatic void
335237263Snpt4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp)
336237263Snp{
337237263Snp#if defined(KTR) || defined(INVARIANTS)
338237263Snp	struct inpcb *inp = tp->t_inpcb;
339237263Snp#endif
340237263Snp	struct toepcb *toep = tp->t_toe;
341237263Snp
342237263Snp	INP_WLOCK_ASSERT(inp);
343237263Snp
344237263Snp	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
345239514Snp	KASSERT(toep->flags & TPF_ATTACHED,
346237263Snp	    ("%s: not attached", __func__));
347237263Snp
348237263Snp#ifdef KTR
349237263Snp	if (tp->t_state == TCPS_SYN_SENT) {
350237263Snp		CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)",
351237263Snp		    __func__, toep->tid, toep, toep->flags, inp,
352237263Snp		    inp->inp_flags);
353237263Snp	} else {
354237263Snp		CTR6(KTR_CXGBE,
355237263Snp		    "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)",
356237263Snp		    toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp,
357237263Snp		    inp->inp_flags);
358237263Snp	}
359237263Snp#endif
360237263Snp
361237263Snp	tp->t_toe = NULL;
362237263Snp	tp->t_flags &= ~TF_TOE;
363239514Snp	toep->flags &= ~TPF_ATTACHED;
364237263Snp
365239514Snp	if (!(toep->flags & TPF_CPL_PENDING))
366237263Snp		release_offload_resources(toep);
367237263Snp}
368237263Snp
369237263Snp/*
370252716Snp * setsockopt handler.
371252716Snp */
372252716Snpstatic void
373252716Snpt4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name)
374252716Snp{
375252716Snp	struct adapter *sc = tod->tod_softc;
376252716Snp	struct toepcb *toep = tp->t_toe;
377252716Snp
378252716Snp	if (dir == SOPT_GET)
379252716Snp		return;
380252716Snp
381252716Snp	CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name);
382252716Snp
383252716Snp	switch (name) {
384252716Snp	case TCP_NODELAY:
385330307Snp		if (tp->t_state != TCPS_ESTABLISHED)
386330307Snp			break;
387302339Snp		t4_set_tcb_field(sc, toep->ctrlq, toep->tid, W_TCB_T_FLAGS,
388302339Snp		    V_TF_NAGLE(1), V_TF_NAGLE(tp->t_flags & TF_NODELAY ? 0 : 1),
389302339Snp		    0, 0, toep->ofld_rxq->iq.abs_id);
390252716Snp		break;
391252716Snp	default:
392252716Snp		break;
393252716Snp	}
394252716Snp}
395252716Snp
396252716Snp/*
397237263Snp * The TOE driver will not receive any more CPLs for the tid associated with the
398237263Snp * toepcb; release the hold on the inpcb.
399237263Snp */
400237263Snpvoid
401237263Snpfinal_cpl_received(struct toepcb *toep)
402237263Snp{
403237263Snp	struct inpcb *inp = toep->inp;
404237263Snp
405237263Snp	KASSERT(inp != NULL, ("%s: inp is NULL", __func__));
406237263Snp	INP_WLOCK_ASSERT(inp);
407239514Snp	KASSERT(toep->flags & TPF_CPL_PENDING,
408237263Snp	    ("%s: CPL not pending already?", __func__));
409237263Snp
410237263Snp	CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)",
411237263Snp	    __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags);
412237263Snp
413299210Sjhb	if (toep->ulp_mode == ULP_MODE_TCPDDP)
414299210Sjhb		release_ddp_resources(toep);
415237263Snp	toep->inp = NULL;
416239514Snp	toep->flags &= ~TPF_CPL_PENDING;
417292736Snp	mbufq_drain(&toep->ulp_pdu_reclaimq);
418237263Snp
419239514Snp	if (!(toep->flags & TPF_ATTACHED))
420237263Snp		release_offload_resources(toep);
421237263Snp
422237263Snp	if (!in_pcbrele_wlocked(inp))
423237263Snp		INP_WUNLOCK(inp);
424237263Snp}
425237263Snp
426237263Snpvoid
427312116Snpinsert_tid(struct adapter *sc, int tid, void *ctx, int ntids)
428237263Snp{
429237263Snp	struct tid_info *t = &sc->tids;
430237263Snp
431237263Snp	t->tid_tab[tid] = ctx;
432312116Snp	atomic_add_int(&t->tids_in_use, ntids);
433237263Snp}
434237263Snp
435237263Snpvoid *
436237263Snplookup_tid(struct adapter *sc, int tid)
437237263Snp{
438237263Snp	struct tid_info *t = &sc->tids;
439237263Snp
440237263Snp	return (t->tid_tab[tid]);
441237263Snp}
442237263Snp
443237263Snpvoid
444237263Snpupdate_tid(struct adapter *sc, int tid, void *ctx)
445237263Snp{
446237263Snp	struct tid_info *t = &sc->tids;
447237263Snp
448237263Snp	t->tid_tab[tid] = ctx;
449237263Snp}
450237263Snp
451237263Snpvoid
452312116Snpremove_tid(struct adapter *sc, int tid, int ntids)
453237263Snp{
454237263Snp	struct tid_info *t = &sc->tids;
455237263Snp
456237263Snp	t->tid_tab[tid] = NULL;
457312116Snp	atomic_subtract_int(&t->tids_in_use, ntids);
458237263Snp}
459237263Snp
460237263Snpvoid
461237263Snprelease_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq)
462237263Snp{
463237263Snp	struct wrqe *wr;
464237263Snp	struct cpl_tid_release *req;
465237263Snp
466237263Snp	wr = alloc_wrqe(sizeof(*req), ctrlq);
467237263Snp	if (wr == NULL) {
468237263Snp		queue_tid_release(sc, tid);	/* defer */
469237263Snp		return;
470237263Snp	}
471237263Snp	req = wrtod(wr);
472237263Snp
473237263Snp	INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid);
474237263Snp
475237263Snp	t4_wrq_tx(sc, wr);
476237263Snp}
477237263Snp
478237263Snpstatic void
479237263Snpqueue_tid_release(struct adapter *sc, int tid)
480237263Snp{
481237263Snp
482237263Snp	CXGBE_UNIMPLEMENTED("deferred tid release");
483237263Snp}
484237263Snp
485237263Snp/*
486237263Snp * What mtu_idx to use, given a 4-tuple and/or an MSS cap
487237263Snp */
488237263Snpint
489237263Snpfind_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
490237263Snp{
491237263Snp	unsigned short *mtus = &sc->params.mtus[0];
492245441Snp	int i, mss, n;
493237263Snp
494237263Snp	KASSERT(inc != NULL || pmss > 0,
495237263Snp	    ("%s: at least one of inc/pmss must be specified", __func__));
496237263Snp
497237263Snp	mss = inc ? tcp_mssopt(inc) : pmss;
498237263Snp	if (pmss > 0 && mss > pmss)
499237263Snp		mss = pmss;
500237263Snp
501245441Snp	if (inc->inc_flags & INC_ISIPV6)
502245441Snp		n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
503245441Snp	else
504245441Snp		n = sizeof(struct ip) + sizeof(struct tcphdr);
505237263Snp
506245441Snp	for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mss + n; i++)
507245441Snp		continue;
508245441Snp
509237263Snp	return (i);
510237263Snp}
511237263Snp
512237263Snp/*
513237263Snp * Determine the receive window size for a socket.
514237263Snp */
515237263Snpu_long
516237263Snpselect_rcv_wnd(struct socket *so)
517237263Snp{
518237263Snp	unsigned long wnd;
519237263Snp
520237263Snp	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
521237263Snp
522237263Snp	wnd = sbspace(&so->so_rcv);
523237263Snp	if (wnd < MIN_RCV_WND)
524237263Snp		wnd = MIN_RCV_WND;
525237263Snp
526237263Snp	return min(wnd, MAX_RCV_WND);
527237263Snp}
528237263Snp
529237263Snpint
530237263Snpselect_rcv_wscale(void)
531237263Snp{
532237263Snp	int wscale = 0;
533237263Snp	unsigned long space = sb_max;
534237263Snp
535237263Snp	if (space > MAX_RCV_WND)
536237263Snp		space = MAX_RCV_WND;
537237263Snp
538237263Snp	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space)
539237263Snp		wscale++;
540237263Snp
541237263Snp	return (wscale);
542237263Snp}
543237263Snp
544237263Snp/*
545237263Snp * socket so could be a listening socket too.
546237263Snp */
547237263Snpuint64_t
548291665Sjhbcalc_opt0(struct socket *so, struct vi_info *vi, struct l2t_entry *e,
549237263Snp    int mtu_idx, int rscale, int rx_credits, int ulp_mode)
550237263Snp{
551237263Snp	uint64_t opt0;
552237263Snp
553237263Snp	KASSERT(rx_credits <= M_RCV_BUFSIZ,
554237263Snp	    ("%s: rcv_bufsiz too high", __func__));
555237263Snp
556237263Snp	opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) |
557237263Snp	    V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits);
558237263Snp
559237263Snp	if (so != NULL) {
560237263Snp		struct inpcb *inp = sotoinpcb(so);
561237263Snp		struct tcpcb *tp = intotcpcb(inp);
562330303Sjhb		int keepalive = tcp_always_keepalive ||
563237263Snp		    so_options_get(so) & SO_KEEPALIVE;
564237263Snp
565237263Snp		opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0);
566237263Snp		opt0 |= V_KEEP_ALIVE(keepalive != 0);
567237263Snp	}
568237263Snp
569237263Snp	if (e != NULL)
570237263Snp		opt0 |= V_L2T_IDX(e->idx);
571237263Snp
572291665Sjhb	if (vi != NULL) {
573309560Sjhb		opt0 |= V_SMAC_SEL(vi->smt_idx);
574291665Sjhb		opt0 |= V_TX_CHAN(vi->pi->tx_chan);
575237263Snp	}
576237263Snp
577237263Snp	return htobe64(opt0);
578237263Snp}
579237263Snp
580248925Snpuint64_t
581291665Sjhbselect_ntuple(struct vi_info *vi, struct l2t_entry *e)
582237263Snp{
583291665Sjhb	struct adapter *sc = vi->pi->adapter;
584252705Snp	struct tp_params *tp = &sc->params.tp;
585291665Sjhb	uint16_t viid = vi->viid;
586252705Snp	uint64_t ntuple = 0;
587237263Snp
588252705Snp	/*
589252705Snp	 * Initialize each of the fields which we care about which are present
590252705Snp	 * in the Compressed Filter Tuple.
591252705Snp	 */
592252705Snp	if (tp->vlan_shift >= 0 && e->vlan != CPL_L2T_VLAN_NONE)
593252705Snp		ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift;
594237263Snp
595252705Snp	if (tp->port_shift >= 0)
596252705Snp		ntuple |= (uint64_t)e->lport << tp->port_shift;
597252705Snp
598252705Snp	if (tp->protocol_shift >= 0)
599252705Snp		ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift;
600252705Snp
601252705Snp	if (tp->vnic_shift >= 0) {
602252705Snp		uint32_t vf = G_FW_VIID_VIN(viid);
603252705Snp		uint32_t pf = G_FW_VIID_PFN(viid);
604252705Snp		uint32_t vld = G_FW_VIID_VIVLD(viid);
605252705Snp
606252705Snp		ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vf) | V_FT_VNID_ID_PF(pf) |
607252705Snp		    V_FT_VNID_ID_VLD(vld)) << tp->vnic_shift;
608252705Snp	}
609252705Snp
610252705Snp	if (is_t4(sc))
611252705Snp		return (htobe32((uint32_t)ntuple));
612248925Snp	else
613248925Snp		return (htobe64(V_FILTER_TUPLE(ntuple)));
614237263Snp}
615237263Snp
616245441Snpvoid
617245441Snpset_tcpddp_ulp_mode(struct toepcb *toep)
618245441Snp{
619245441Snp
620245441Snp	toep->ulp_mode = ULP_MODE_TCPDDP;
621331645Sjhb	ddp_init_toep(toep);
622245441Snp}
623245441Snp
624245935Snpint
625245935Snpnegative_advice(int status)
626245935Snp{
627245935Snp
628245935Snp	return (status == CPL_ERR_RTX_NEG_ADVICE ||
629245935Snp	    status == CPL_ERR_PERSIST_NEG_ADVICE ||
630245935Snp	    status == CPL_ERR_KEEPALV_NEG_ADVICE);
631245935Snp}
632245935Snp
633237263Snpstatic int
634237263Snpalloc_tid_tabs(struct tid_info *t)
635237263Snp{
636237263Snp	size_t size;
637237263Snp	unsigned int i;
638237263Snp
639237263Snp	size = t->ntids * sizeof(*t->tid_tab) +
640237263Snp	    t->natids * sizeof(*t->atid_tab) +
641237263Snp	    t->nstids * sizeof(*t->stid_tab);
642237263Snp
643237263Snp	t->tid_tab = malloc(size, M_CXGBE, M_ZERO | M_NOWAIT);
644237263Snp	if (t->tid_tab == NULL)
645237263Snp		return (ENOMEM);
646237263Snp
647237263Snp	mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF);
648237263Snp	t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
649237263Snp	t->afree = t->atid_tab;
650237263Snp	t->atids_in_use = 0;
651237263Snp	for (i = 1; i < t->natids; i++)
652237263Snp		t->atid_tab[i - 1].next = &t->atid_tab[i];
653237263Snp	t->atid_tab[t->natids - 1].next = NULL;
654237263Snp
655237263Snp	mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
656245276Snp	t->stid_tab = (struct listen_ctx **)&t->atid_tab[t->natids];
657237263Snp	t->stids_in_use = 0;
658245276Snp	TAILQ_INIT(&t->stids);
659245276Snp	t->nstids_free_head = t->nstids;
660237263Snp
661237263Snp	atomic_store_rel_int(&t->tids_in_use, 0);
662237263Snp
663237263Snp	return (0);
664237263Snp}
665237263Snp
666237263Snpstatic void
667237263Snpfree_tid_tabs(struct tid_info *t)
668237263Snp{
669237263Snp	KASSERT(t->tids_in_use == 0,
670237263Snp	    ("%s: %d tids still in use.", __func__, t->tids_in_use));
671237263Snp	KASSERT(t->atids_in_use == 0,
672237263Snp	    ("%s: %d atids still in use.", __func__, t->atids_in_use));
673237263Snp	KASSERT(t->stids_in_use == 0,
674237263Snp	    ("%s: %d tids still in use.", __func__, t->stids_in_use));
675237263Snp
676237263Snp	free(t->tid_tab, M_CXGBE);
677237263Snp	t->tid_tab = NULL;
678237263Snp
679237263Snp	if (mtx_initialized(&t->atid_lock))
680237263Snp		mtx_destroy(&t->atid_lock);
681237263Snp	if (mtx_initialized(&t->stid_lock))
682237263Snp		mtx_destroy(&t->stid_lock);
683237263Snp}
684237263Snp
685245448Snpstatic int
686245448Snpadd_lip(struct adapter *sc, struct in6_addr *lip)
687245448Snp{
688245448Snp        struct fw_clip_cmd c;
689245448Snp
690245448Snp	ASSERT_SYNCHRONIZED_OP(sc);
691245448Snp	/* mtx_assert(&td->clip_table_lock, MA_OWNED); */
692245448Snp
693245448Snp        memset(&c, 0, sizeof(c));
694245448Snp	c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST |
695245448Snp	    F_FW_CMD_WRITE);
696245448Snp        c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c));
697245448Snp        c.ip_hi = *(uint64_t *)&lip->s6_addr[0];
698245448Snp        c.ip_lo = *(uint64_t *)&lip->s6_addr[8];
699245448Snp
700249627Snp	return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c));
701245448Snp}
702245448Snp
703245448Snpstatic int
704245448Snpdelete_lip(struct adapter *sc, struct in6_addr *lip)
705245448Snp{
706245448Snp	struct fw_clip_cmd c;
707245448Snp
708245448Snp	ASSERT_SYNCHRONIZED_OP(sc);
709245448Snp	/* mtx_assert(&td->clip_table_lock, MA_OWNED); */
710245448Snp
711245448Snp	memset(&c, 0, sizeof(c));
712245448Snp	c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST |
713245448Snp	    F_FW_CMD_READ);
714245448Snp        c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c));
715245448Snp        c.ip_hi = *(uint64_t *)&lip->s6_addr[0];
716245448Snp        c.ip_lo = *(uint64_t *)&lip->s6_addr[8];
717245448Snp
718249627Snp	return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c));
719245448Snp}
720245448Snp
721245448Snpstatic struct clip_entry *
722245448Snpsearch_lip(struct tom_data *td, struct in6_addr *lip)
723245448Snp{
724245448Snp	struct clip_entry *ce;
725245448Snp
726245448Snp	mtx_assert(&td->clip_table_lock, MA_OWNED);
727245448Snp
728245448Snp	TAILQ_FOREACH(ce, &td->clip_table, link) {
729245448Snp		if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
730245448Snp			return (ce);
731245448Snp	}
732245448Snp
733245448Snp	return (NULL);
734245448Snp}
735245448Snp
736245448Snpstruct clip_entry *
737318803Snphold_lip(struct tom_data *td, struct in6_addr *lip, struct clip_entry *ce)
738245448Snp{
739245448Snp
740245448Snp	mtx_lock(&td->clip_table_lock);
741318803Snp	if (ce == NULL)
742318803Snp		ce = search_lip(td, lip);
743245448Snp	if (ce != NULL)
744245448Snp		ce->refcount++;
745245448Snp	mtx_unlock(&td->clip_table_lock);
746245448Snp
747245448Snp	return (ce);
748245448Snp}
749245448Snp
750245448Snpvoid
751245448Snprelease_lip(struct tom_data *td, struct clip_entry *ce)
752245448Snp{
753245448Snp
754245448Snp	mtx_lock(&td->clip_table_lock);
755245448Snp	KASSERT(search_lip(td, &ce->lip) == ce,
756245448Snp	    ("%s: CLIP entry %p p not in CLIP table.", __func__, ce));
757245448Snp	KASSERT(ce->refcount > 0,
758245448Snp	    ("%s: CLIP entry %p has refcount 0", __func__, ce));
759245448Snp	--ce->refcount;
760245448Snp	mtx_unlock(&td->clip_table_lock);
761245448Snp}
762245448Snp
763237263Snpstatic void
764245448Snpinit_clip_table(struct adapter *sc, struct tom_data *td)
765245448Snp{
766245448Snp
767245448Snp	ASSERT_SYNCHRONIZED_OP(sc);
768245448Snp
769245448Snp	mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF);
770245448Snp	TAILQ_INIT(&td->clip_table);
771249627Snp	td->clip_gen = -1;
772245448Snp
773249627Snp	update_clip_table(sc, td);
774249627Snp}
775249627Snp
776249627Snpstatic void
777249627Snpupdate_clip(struct adapter *sc, void *arg __unused)
778249627Snp{
779249627Snp
780249627Snp	if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomuc"))
781249627Snp		return;
782249627Snp
783278374Snp	if (uld_active(sc, ULD_TOM))
784249627Snp		update_clip_table(sc, sc->tom_softc);
785249627Snp
786249627Snp	end_synchronized_op(sc, LOCK_HELD);
787249627Snp}
788249627Snp
789249627Snpstatic void
790249627Snpt4_clip_task(void *arg, int count)
791249627Snp{
792249627Snp
793249627Snp	t4_iterate(update_clip, NULL);
794249627Snp}
795249627Snp
796249627Snpstatic void
797249627Snpupdate_clip_table(struct adapter *sc, struct tom_data *td)
798249627Snp{
799286001Sae	struct rm_priotracker in6_ifa_tracker;
800249627Snp	struct in6_ifaddr *ia;
801249627Snp	struct in6_addr *lip, tlip;
802249627Snp	struct clip_head stale;
803249627Snp	struct clip_entry *ce, *ce_temp;
804312116Snp	struct vi_info *vi;
805312116Snp	int rc, gen, i, j;
806312116Snp	uintptr_t last_vnet;
807249627Snp
808249627Snp	ASSERT_SYNCHRONIZED_OP(sc);
809249627Snp
810286001Sae	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
811249627Snp	mtx_lock(&td->clip_table_lock);
812249627Snp
813312116Snp	gen = atomic_load_acq_int(&in6_ifaddr_gen);
814249627Snp	if (gen == td->clip_gen)
815249627Snp		goto done;
816249627Snp
817249627Snp	TAILQ_INIT(&stale);
818249627Snp	TAILQ_CONCAT(&stale, &td->clip_table, link);
819249627Snp
820312116Snp	/*
821312116Snp	 * last_vnet optimizes the common cases where all if_vnet = NULL (no
822312116Snp	 * VIMAGE) or all if_vnet = vnet0.
823312116Snp	 */
824312116Snp	last_vnet = (uintptr_t)(-1);
825312116Snp	for_each_port(sc, i)
826312116Snp	for_each_vi(sc->port[i], j, vi) {
827312116Snp		if (last_vnet == (uintptr_t)vi->ifp->if_vnet)
828312116Snp			continue;
829245448Snp
830312116Snp		/* XXX: races with if_vmove */
831312116Snp		CURVNET_SET(vi->ifp->if_vnet);
832312116Snp		TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
833312116Snp			lip = &ia->ia_addr.sin6_addr;
834245448Snp
835312116Snp			KASSERT(!IN6_IS_ADDR_MULTICAST(lip),
836312116Snp			    ("%s: mcast address in in6_ifaddr list", __func__));
837245448Snp
838312116Snp			if (IN6_IS_ADDR_LOOPBACK(lip))
839312116Snp				continue;
840312116Snp			if (IN6_IS_SCOPE_EMBED(lip)) {
841312116Snp				/* Remove the embedded scope */
842312116Snp				tlip = *lip;
843312116Snp				lip = &tlip;
844312116Snp				in6_clearscope(lip);
845312116Snp			}
846312116Snp			/*
847312116Snp			 * XXX: how to weed out the link local address for the
848312116Snp			 * loopback interface?  It's fe80::1 usually (always?).
849312116Snp			 */
850249627Snp
851312116Snp			/*
852312116Snp			 * If it's in the main list then we already know it's
853312116Snp			 * not stale.
854312116Snp			 */
855312116Snp			TAILQ_FOREACH(ce, &td->clip_table, link) {
856312116Snp				if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
857312116Snp					goto next;
858249627Snp			}
859249627Snp
860312116Snp			/*
861312116Snp			 * If it's in the stale list we should move it to the
862312116Snp			 * main list.
863312116Snp			 */
864312116Snp			TAILQ_FOREACH(ce, &stale, link) {
865312116Snp				if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) {
866312116Snp					TAILQ_REMOVE(&stale, ce, link);
867312116Snp					TAILQ_INSERT_TAIL(&td->clip_table, ce,
868312116Snp					    link);
869312116Snp					goto next;
870312116Snp				}
871312116Snp			}
872249627Snp
873312116Snp			/* A new IP6 address; add it to the CLIP table */
874312116Snp			ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT);
875312116Snp			memcpy(&ce->lip, lip, sizeof(ce->lip));
876312116Snp			ce->refcount = 0;
877312116Snp			rc = add_lip(sc, lip);
878312116Snp			if (rc == 0)
879312116Snp				TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
880312116Snp			else {
881312116Snp				char ip[INET6_ADDRSTRLEN];
882312116Snp
883312116Snp				inet_ntop(AF_INET6, &ce->lip, &ip[0],
884312116Snp				    sizeof(ip));
885312116Snp				log(LOG_ERR, "%s: could not add %s (%d)\n",
886312116Snp				    __func__, ip, rc);
887312116Snp				free(ce, M_CXGBE);
888312116Snp			}
889312116Snpnext:
890312116Snp			continue;
891249627Snp		}
892312116Snp		CURVNET_RESTORE();
893312116Snp		last_vnet = (uintptr_t)vi->ifp->if_vnet;
894249627Snp	}
895249627Snp
896249627Snp	/*
897249627Snp	 * Remove stale addresses (those no longer in V_in6_ifaddrhead) that are
898249627Snp	 * no longer referenced by the driver.
899249627Snp	 */
900249627Snp	TAILQ_FOREACH_SAFE(ce, &stale, link, ce_temp) {
901249627Snp		if (ce->refcount == 0) {
902249627Snp			rc = delete_lip(sc, &ce->lip);
903249627Snp			if (rc == 0) {
904249627Snp				TAILQ_REMOVE(&stale, ce, link);
905245448Snp				free(ce, M_CXGBE);
906249627Snp			} else {
907249627Snp				char ip[INET6_ADDRSTRLEN];
908249627Snp
909249627Snp				inet_ntop(AF_INET6, &ce->lip, &ip[0],
910249627Snp				    sizeof(ip));
911249627Snp				log(LOG_ERR, "%s: could not delete %s (%d)\n",
912249627Snp				    __func__, ip, rc);
913249627Snp			}
914245448Snp		}
915245448Snp	}
916249627Snp	/* The ones that are still referenced need to stay in the CLIP table */
917249627Snp	TAILQ_CONCAT(&td->clip_table, &stale, link);
918249627Snp
919249627Snp	td->clip_gen = gen;
920249627Snpdone:
921249627Snp	mtx_unlock(&td->clip_table_lock);
922286001Sae	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
923245448Snp}
924245448Snp
925245448Snpstatic void
926245448Snpdestroy_clip_table(struct adapter *sc, struct tom_data *td)
927245448Snp{
928245448Snp	struct clip_entry *ce, *ce_temp;
929245448Snp
930245448Snp	if (mtx_initialized(&td->clip_table_lock)) {
931245448Snp		mtx_lock(&td->clip_table_lock);
932245448Snp		TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) {
933245448Snp			KASSERT(ce->refcount == 0,
934245448Snp			    ("%s: CLIP entry %p still in use (%d)", __func__,
935245448Snp			    ce, ce->refcount));
936245448Snp			TAILQ_REMOVE(&td->clip_table, ce, link);
937245448Snp			delete_lip(sc, &ce->lip);
938245448Snp			free(ce, M_CXGBE);
939245448Snp		}
940245448Snp		mtx_unlock(&td->clip_table_lock);
941245448Snp		mtx_destroy(&td->clip_table_lock);
942245448Snp	}
943245448Snp}
944245448Snp
945245448Snpstatic void
946237263Snpfree_tom_data(struct adapter *sc, struct tom_data *td)
947237263Snp{
948245448Snp
949245448Snp	ASSERT_SYNCHRONIZED_OP(sc);
950245448Snp
951237263Snp	KASSERT(TAILQ_EMPTY(&td->toep_list),
952237263Snp	    ("%s: TOE PCB list is not empty.", __func__));
953237263Snp	KASSERT(td->lctx_count == 0,
954237263Snp	    ("%s: lctx hash table is not empty.", __func__));
955237263Snp
956309555Sjhb	t4_free_ppod_region(&td->pr);
957245448Snp	destroy_clip_table(sc, td);
958237263Snp
959237263Snp	if (td->listen_mask != 0)
960237263Snp		hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask);
961237263Snp
962272719Snp	if (mtx_initialized(&td->unsent_wr_lock))
963272719Snp		mtx_destroy(&td->unsent_wr_lock);
964237263Snp	if (mtx_initialized(&td->lctx_hash_lock))
965237263Snp		mtx_destroy(&td->lctx_hash_lock);
966237263Snp	if (mtx_initialized(&td->toep_list_lock))
967237263Snp		mtx_destroy(&td->toep_list_lock);
968237263Snp
969237263Snp	free_tid_tabs(&sc->tids);
970237263Snp	free(td, M_CXGBE);
971237263Snp}
972237263Snp
973272719Snpstatic void
974272719Snpreclaim_wr_resources(void *arg, int count)
975272719Snp{
976272719Snp	struct tom_data *td = arg;
977272719Snp	STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list);
978272719Snp	struct cpl_act_open_req *cpl;
979272719Snp	u_int opcode, atid;
980272719Snp	struct wrqe *wr;
981272719Snp	struct adapter *sc;
982272719Snp
983272719Snp	mtx_lock(&td->unsent_wr_lock);
984272719Snp	STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe);
985272719Snp	mtx_unlock(&td->unsent_wr_lock);
986272719Snp
987272719Snp	while ((wr = STAILQ_FIRST(&twr_list)) != NULL) {
988272719Snp		STAILQ_REMOVE_HEAD(&twr_list, link);
989272719Snp
990272719Snp		cpl = wrtod(wr);
991272719Snp		opcode = GET_OPCODE(cpl);
992272719Snp
993272719Snp		switch (opcode) {
994272719Snp		case CPL_ACT_OPEN_REQ:
995272719Snp		case CPL_ACT_OPEN_REQ6:
996272719Snp			atid = G_TID_TID(be32toh(OPCODE_TID(cpl)));
997272719Snp			sc = td_adapter(td);
998272719Snp
999272719Snp			CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid);
1000272719Snp			act_open_failure_cleanup(sc, atid, EHOSTUNREACH);
1001272719Snp			free(wr, M_CXGBE);
1002272719Snp			break;
1003272719Snp		default:
1004272719Snp			log(LOG_ERR, "%s: leaked work request %p, wr_len %d, "
1005272719Snp			    "opcode %x\n", __func__, wr, wr->wr_len, opcode);
1006272719Snp			/* WR not freed here; go look at it with a debugger.  */
1007272719Snp		}
1008272719Snp	}
1009272719Snp}
1010272719Snp
1011237263Snp/*
1012237263Snp * Ground control to Major TOM
1013237263Snp * Commencing countdown, engines on
1014237263Snp */
1015237263Snpstatic int
1016237263Snpt4_tom_activate(struct adapter *sc)
1017237263Snp{
1018237263Snp	struct tom_data *td;
1019237263Snp	struct toedev *tod;
1020291665Sjhb	struct vi_info *vi;
1021302339Snp	struct sge_ofld_rxq *ofld_rxq;
1022302339Snp	int i, j, rc, v;
1023237263Snp
1024245274Snp	ASSERT_SYNCHRONIZED_OP(sc);
1025237263Snp
1026237263Snp	/* per-adapter softc for TOM */
1027237263Snp	td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT);
1028237263Snp	if (td == NULL)
1029237263Snp		return (ENOMEM);
1030237263Snp
1031237263Snp	/* List of TOE PCBs and associated lock */
1032237263Snp	mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF);
1033237263Snp	TAILQ_INIT(&td->toep_list);
1034237263Snp
1035237263Snp	/* Listen context */
1036237263Snp	mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF);
1037237263Snp	td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE,
1038237263Snp	    &td->listen_mask, HASH_NOWAIT);
1039237263Snp
1040272719Snp	/* List of WRs for which L2 resolution failed */
1041272719Snp	mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF);
1042272719Snp	STAILQ_INIT(&td->unsent_wr_list);
1043272719Snp	TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td);
1044272719Snp
1045237263Snp	/* TID tables */
1046237263Snp	rc = alloc_tid_tabs(&sc->tids);
1047237263Snp	if (rc != 0)
1048237263Snp		goto done;
1049237263Snp
1050309555Sjhb	rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp,
1051309555Sjhb	    t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods");
1052309555Sjhb	if (rc != 0)
1053309555Sjhb		goto done;
1054309555Sjhb	t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK,
1055309555Sjhb	    V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask);
1056239344Snp
1057245448Snp	/* CLIP table for IPv6 offload */
1058245448Snp	init_clip_table(sc, td);
1059245448Snp
1060237263Snp	/* toedev ops */
1061237263Snp	tod = &td->tod;
1062237263Snp	init_toedev(tod);
1063237263Snp	tod->tod_softc = sc;
1064237263Snp	tod->tod_connect = t4_connect;
1065237263Snp	tod->tod_listen_start = t4_listen_start;
1066237263Snp	tod->tod_listen_stop = t4_listen_stop;
1067237263Snp	tod->tod_rcvd = t4_rcvd;
1068237263Snp	tod->tod_output = t4_tod_output;
1069237263Snp	tod->tod_send_rst = t4_send_rst;
1070237263Snp	tod->tod_send_fin = t4_send_fin;
1071237263Snp	tod->tod_pcb_detach = t4_pcb_detach;
1072237263Snp	tod->tod_l2_update = t4_l2_update;
1073237263Snp	tod->tod_syncache_added = t4_syncache_added;
1074237263Snp	tod->tod_syncache_removed = t4_syncache_removed;
1075237263Snp	tod->tod_syncache_respond = t4_syncache_respond;
1076237263Snp	tod->tod_offload_socket = t4_offload_socket;
1077252716Snp	tod->tod_ctloutput = t4_ctloutput;
1078237263Snp
1079291665Sjhb	for_each_port(sc, i) {
1080291665Sjhb		for_each_vi(sc->port[i], v, vi) {
1081291665Sjhb			TOEDEV(vi->ifp) = &td->tod;
1082302339Snp			for_each_ofld_rxq(vi, j, ofld_rxq) {
1083302339Snp				ofld_rxq->iq.set_tcb_rpl = do_set_tcb_rpl;
1084302339Snp				ofld_rxq->iq.l2t_write_rpl = do_l2t_write_rpl2;
1085302339Snp			}
1086291665Sjhb		}
1087291665Sjhb	}
1088237263Snp
1089237263Snp	sc->tom_softc = td;
1090237263Snp	register_toedev(sc->tom_softc);
1091237263Snp
1092237263Snpdone:
1093237263Snp	if (rc != 0)
1094237263Snp		free_tom_data(sc, td);
1095237263Snp	return (rc);
1096237263Snp}
1097237263Snp
1098237263Snpstatic int
1099237263Snpt4_tom_deactivate(struct adapter *sc)
1100237263Snp{
1101237263Snp	int rc = 0;
1102237263Snp	struct tom_data *td = sc->tom_softc;
1103237263Snp
1104245274Snp	ASSERT_SYNCHRONIZED_OP(sc);
1105237263Snp
1106237263Snp	if (td == NULL)
1107237263Snp		return (0);	/* XXX. KASSERT? */
1108237263Snp
1109237263Snp	if (sc->offload_map != 0)
1110237263Snp		return (EBUSY);	/* at least one port has IFCAP_TOE enabled */
1111237263Snp
1112278374Snp	if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI))
1113278374Snp		return (EBUSY);	/* both iWARP and iSCSI rely on the TOE. */
1114278374Snp
1115237263Snp	mtx_lock(&td->toep_list_lock);
1116237263Snp	if (!TAILQ_EMPTY(&td->toep_list))
1117237263Snp		rc = EBUSY;
1118237263Snp	mtx_unlock(&td->toep_list_lock);
1119237263Snp
1120237263Snp	mtx_lock(&td->lctx_hash_lock);
1121237263Snp	if (td->lctx_count > 0)
1122237263Snp		rc = EBUSY;
1123237263Snp	mtx_unlock(&td->lctx_hash_lock);
1124237263Snp
1125272719Snp	taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources);
1126272719Snp	mtx_lock(&td->unsent_wr_lock);
1127272719Snp	if (!STAILQ_EMPTY(&td->unsent_wr_list))
1128272719Snp		rc = EBUSY;
1129272719Snp	mtx_unlock(&td->unsent_wr_lock);
1130272719Snp
1131237263Snp	if (rc == 0) {
1132237263Snp		unregister_toedev(sc->tom_softc);
1133237263Snp		free_tom_data(sc, td);
1134237263Snp		sc->tom_softc = NULL;
1135237263Snp	}
1136237263Snp
1137237263Snp	return (rc);
1138237263Snp}
1139237263Snp
1140249627Snpstatic void
1141249627Snpt4_tom_ifaddr_event(void *arg __unused, struct ifnet *ifp)
1142249627Snp{
1143249627Snp
1144249627Snp	atomic_add_rel_int(&in6_ifaddr_gen, 1);
1145249627Snp	taskqueue_enqueue_timeout(taskqueue_thread, &clip_task, -hz / 4);
1146249627Snp}
1147249627Snp
1148237263Snpstatic int
1149306661Sjhbt4_aio_queue_tom(struct socket *so, struct kaiocb *job)
1150306661Sjhb{
1151306661Sjhb	struct tcpcb *tp = so_sototcpcb(so);
1152306661Sjhb	struct toepcb *toep = tp->t_toe;
1153306661Sjhb	int error;
1154306661Sjhb
1155306661Sjhb	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
1156306661Sjhb		error = t4_aio_queue_ddp(so, job);
1157306661Sjhb		if (error != EOPNOTSUPP)
1158306661Sjhb			return (error);
1159306661Sjhb	}
1160306661Sjhb
1161306661Sjhb	return (t4_aio_queue_aiotx(so, job));
1162306661Sjhb}
1163306661Sjhb
1164306661Sjhbstatic int
1165237263Snpt4_tom_mod_load(void)
1166237263Snp{
1167237263Snp	int rc;
1168245441Snp	struct protosw *tcp_protosw, *tcp6_protosw;
1169237263Snp
1170302339Snp	/* CPL handlers */
1171302339Snp	t4_init_connect_cpl_handlers();
1172302339Snp	t4_init_listen_cpl_handlers();
1173302339Snp	t4_init_cpl_io_handlers();
1174302339Snp
1175299210Sjhb	rc = t4_ddp_mod_load();
1176299210Sjhb	if (rc != 0)
1177299210Sjhb		return (rc);
1178299210Sjhb
1179239344Snp	tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM);
1180239344Snp	if (tcp_protosw == NULL)
1181239344Snp		return (ENOPROTOOPT);
1182306661Sjhb	bcopy(tcp_protosw, &toe_protosw, sizeof(toe_protosw));
1183306661Sjhb	bcopy(tcp_protosw->pr_usrreqs, &toe_usrreqs, sizeof(toe_usrreqs));
1184306661Sjhb	toe_usrreqs.pru_aio_queue = t4_aio_queue_tom;
1185306661Sjhb	toe_protosw.pr_usrreqs = &toe_usrreqs;
1186239344Snp
1187245441Snp	tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM);
1188245441Snp	if (tcp6_protosw == NULL)
1189245441Snp		return (ENOPROTOOPT);
1190306661Sjhb	bcopy(tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw));
1191306661Sjhb	bcopy(tcp6_protosw->pr_usrreqs, &toe6_usrreqs, sizeof(toe6_usrreqs));
1192306661Sjhb	toe6_usrreqs.pru_aio_queue = t4_aio_queue_tom;
1193306661Sjhb	toe6_protosw.pr_usrreqs = &toe6_usrreqs;
1194245441Snp
1195249627Snp	TIMEOUT_TASK_INIT(taskqueue_thread, &clip_task, 0, t4_clip_task, NULL);
1196249627Snp	ifaddr_evhandler = EVENTHANDLER_REGISTER(ifaddr_event,
1197249627Snp	    t4_tom_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY);
1198249627Snp
1199237263Snp	rc = t4_register_uld(&tom_uld_info);
1200237263Snp	if (rc != 0)
1201237263Snp		t4_tom_mod_unload();
1202237263Snp
1203237263Snp	return (rc);
1204237263Snp}
1205237263Snp
1206237263Snpstatic void
1207237263Snptom_uninit(struct adapter *sc, void *arg __unused)
1208237263Snp{
1209255006Snp	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun"))
1210245274Snp		return;
1211245274Snp
1212237263Snp	/* Try to free resources (works only if no port has IFCAP_TOE) */
1213278374Snp	if (uld_active(sc, ULD_TOM))
1214237263Snp		t4_deactivate_uld(sc, ULD_TOM);
1215245274Snp
1216255006Snp	end_synchronized_op(sc, 0);
1217237263Snp}
1218237263Snp
1219237263Snpstatic int
1220237263Snpt4_tom_mod_unload(void)
1221237263Snp{
1222237263Snp	t4_iterate(tom_uninit, NULL);
1223237263Snp
1224237263Snp	if (t4_unregister_uld(&tom_uld_info) == EBUSY)
1225237263Snp		return (EBUSY);
1226237263Snp
1227249627Snp	if (ifaddr_evhandler) {
1228249627Snp		EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_evhandler);
1229249627Snp		taskqueue_cancel_timeout(taskqueue_thread, &clip_task, NULL);
1230249627Snp	}
1231249627Snp
1232299210Sjhb	t4_ddp_mod_unload();
1233299210Sjhb
1234313178Sjhb	t4_uninit_connect_cpl_handlers();
1235313178Sjhb	t4_uninit_listen_cpl_handlers();
1236313178Sjhb	t4_uninit_cpl_io_handlers();
1237313178Sjhb
1238237263Snp	return (0);
1239237263Snp}
1240237263Snp#endif	/* TCP_OFFLOAD */
1241237263Snp
1242237263Snpstatic int
1243237263Snpt4_tom_modevent(module_t mod, int cmd, void *arg)
1244237263Snp{
1245237263Snp	int rc = 0;
1246237263Snp
1247237263Snp#ifdef TCP_OFFLOAD
1248237263Snp	switch (cmd) {
1249237263Snp	case MOD_LOAD:
1250237263Snp		rc = t4_tom_mod_load();
1251237263Snp		break;
1252237263Snp
1253237263Snp	case MOD_UNLOAD:
1254237263Snp		rc = t4_tom_mod_unload();
1255237263Snp		break;
1256237263Snp
1257237263Snp	default:
1258237263Snp		rc = EINVAL;
1259237263Snp	}
1260237263Snp#else
1261237263Snp	printf("t4_tom: compiled without TCP_OFFLOAD support.\n");
1262237263Snp	rc = EOPNOTSUPP;
1263237263Snp#endif
1264237263Snp	return (rc);
1265237263Snp}
1266237263Snp
1267237263Snpstatic moduledata_t t4_tom_moddata= {
1268237263Snp	"t4_tom",
1269237263Snp	t4_tom_modevent,
1270241394Skevlo	0
1271237263Snp};
1272237263Snp
1273237263SnpMODULE_VERSION(t4_tom, 1);
1274237263SnpMODULE_DEPEND(t4_tom, toecore, 1, 1, 1);
1275237263SnpMODULE_DEPEND(t4_tom, t4nex, 1, 1, 1);
1276237263SnpDECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY);
1277