t4_sge.c revision 281253
1218792Snp/*-
2218792Snp * Copyright (c) 2011 Chelsio Communications, Inc.
3218792Snp * All rights reserved.
4218792Snp * Written by: Navdeep Parhar <np@FreeBSD.org>
5218792Snp *
6218792Snp * Redistribution and use in source and binary forms, with or without
7218792Snp * modification, are permitted provided that the following conditions
8218792Snp * are met:
9218792Snp * 1. Redistributions of source code must retain the above copyright
10218792Snp *    notice, this list of conditions and the following disclaimer.
11218792Snp * 2. Redistributions in binary form must reproduce the above copyright
12218792Snp *    notice, this list of conditions and the following disclaimer in the
13218792Snp *    documentation and/or other materials provided with the distribution.
14218792Snp *
15218792Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16218792Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17218792Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18218792Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19218792Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20218792Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21218792Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22218792Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23218792Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24218792Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25218792Snp * SUCH DAMAGE.
26218792Snp */
27218792Snp
28218792Snp#include <sys/cdefs.h>
29218792Snp__FBSDID("$FreeBSD: stable/10/sys/dev/cxgbe/t4_sge.c 281253 2015-04-08 01:43:29Z np $");
30218792Snp
31218792Snp#include "opt_inet.h"
32237819Snp#include "opt_inet6.h"
33218792Snp
34218792Snp#include <sys/types.h>
35218792Snp#include <sys/mbuf.h>
36218792Snp#include <sys/socket.h>
37218792Snp#include <sys/kernel.h>
38237263Snp#include <sys/kdb.h>
39219286Snp#include <sys/malloc.h>
40219286Snp#include <sys/queue.h>
41265425Snp#include <sys/sbuf.h>
42219286Snp#include <sys/taskqueue.h>
43255015Snp#include <sys/time.h>
44218792Snp#include <sys/sysctl.h>
45228561Snp#include <sys/smp.h>
46269356Snp#include <sys/counter.h>
47218792Snp#include <net/bpf.h>
48218792Snp#include <net/ethernet.h>
49218792Snp#include <net/if.h>
50218792Snp#include <net/if_vlan_var.h>
51218792Snp#include <netinet/in.h>
52218792Snp#include <netinet/ip.h>
53237819Snp#include <netinet/ip6.h>
54218792Snp#include <netinet/tcp.h>
55256131Sdim#include <machine/md_var.h>
56265425Snp#include <vm/vm.h>
57265425Snp#include <vm/pmap.h>
58270297Snp#ifdef DEV_NETMAP
59270297Snp#include <machine/bus.h>
60270297Snp#include <sys/selinfo.h>
61270297Snp#include <net/if_var.h>
62270297Snp#include <net/netmap.h>
63270297Snp#include <dev/netmap/netmap_kern.h>
64270297Snp#endif
65218792Snp
66218792Snp#include "common/common.h"
67218792Snp#include "common/t4_regs.h"
68218792Snp#include "common/t4_regs_values.h"
69218792Snp#include "common/t4_msg.h"
70218792Snp
71248925Snp#ifdef T4_PKT_TIMESTAMP
72248925Snp#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
73248925Snp#else
74248925Snp#define RX_COPY_THRESHOLD MINCLSIZE
75248925Snp#endif
76248925Snp
77239258Snp/*
78239258Snp * Ethernet frames are DMA'd at this byte offset into the freelist buffer.
79239258Snp * 0-7 are valid values.
80239258Snp */
81270297Snpint fl_pktshift = 2;
82239258SnpTUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift);
83218792Snp
84239258Snp/*
85239258Snp * Pad ethernet payload up to this boundary.
86239258Snp * -1: driver should figure out a good value.
87255050Snp *  0: disable padding.
88255050Snp *  Any power of 2 from 32 to 4096 (both inclusive) is also a valid value.
89239258Snp */
90270297Snpint fl_pad = -1;
91239258SnpTUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad);
92218792Snp
93239258Snp/*
94239258Snp * Status page length.
95239258Snp * -1: driver should figure out a good value.
96239258Snp *  64 or 128 are the only other valid values.
97239258Snp */
98270297Snpint spg_len = -1;
99239258SnpTUNABLE_INT("hw.cxgbe.spg_len", &spg_len);
100239258Snp
101239258Snp/*
102239258Snp * Congestion drops.
103239258Snp * -1: no congestion feedback (not recommended).
104239258Snp *  0: backpressure the channel instead of dropping packets right away.
105239258Snp *  1: no backpressure, drop packets for the congested queue immediately.
106239258Snp */
107239258Snpstatic int cong_drop = 0;
108239258SnpTUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop);
109239258Snp
110255050Snp/*
111255050Snp * Deliver multiple frames in the same free list buffer if they fit.
112255050Snp * -1: let the driver decide whether to enable buffer packing or not.
113255050Snp *  0: disable buffer packing.
114255050Snp *  1: enable buffer packing.
115255050Snp */
116255050Snpstatic int buffer_packing = -1;
117255050SnpTUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing);
118255050Snp
119255050Snp/*
120255050Snp * Start next frame in a packed buffer at this boundary.
121255050Snp * -1: driver should figure out a good value.
122281212Snp * T4: driver will ignore this and use the same value as fl_pad above.
123281212Snp * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value.
124255050Snp */
125255050Snpstatic int fl_pack = -1;
126255050SnpTUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack);
127255050Snp
128265425Snp/*
129265425Snp * Allow the driver to create mbuf(s) in a cluster allocated for rx.
130265425Snp * 0: never; always allocate mbufs from the zone_mbuf UMA zone.
131265425Snp * 1: ok to create mbuf(s) within a cluster if there is room.
132265425Snp */
133265425Snpstatic int allow_mbufs_in_cluster = 1;
134265425SnpTUNABLE_INT("hw.cxgbe.allow_mbufs_in_cluster", &allow_mbufs_in_cluster);
135265425Snp
136265425Snp/*
137265425Snp * Largest rx cluster size that the driver is allowed to allocate.
138265425Snp */
139265425Snpstatic int largest_rx_cluster = MJUM16BYTES;
140265425SnpTUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster);
141265425Snp
142265425Snp/*
143265425Snp * Size of cluster allocation that's most likely to succeed.  The driver will
144265425Snp * fall back to this size if it fails to allocate clusters larger than this.
145265425Snp */
146265425Snpstatic int safest_rx_cluster = PAGE_SIZE;
147265425SnpTUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster);
148265425Snp
149218792Snp/* Used to track coalesced tx work request */
150218792Snpstruct txpkts {
151218792Snp	uint64_t *flitp;	/* ptr to flit where next pkt should start */
152218792Snp	uint8_t npkt;		/* # of packets in this work request */
153218792Snp	uint8_t nflits;		/* # of flits used by this work request */
154218792Snp	uint16_t plen;		/* total payload (sum of all packets) */
155218792Snp};
156218792Snp
157218792Snp/* A packet's SGL.  This + m_pkthdr has all info needed for tx */
158218792Snpstruct sgl {
159218792Snp	int nsegs;		/* # of segments in the SGL, 0 means imm. tx */
160218792Snp	int nflits;		/* # of flits needed for the SGL */
161218792Snp	bus_dma_segment_t seg[TX_SGL_SEGS];
162218792Snp};
163218792Snp
164228561Snpstatic int service_iq(struct sge_iq *, int);
165270297Snpstatic struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t);
166228561Snpstatic int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
167270297Snpstatic inline void init_iq(struct sge_iq *, struct adapter *, int, int, int);
168281212Snpstatic inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *);
169228561Snpstatic inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t,
170228561Snp    char *);
171218792Snpstatic int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
172218792Snp    bus_addr_t *, void **);
173218792Snpstatic int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
174218792Snp    void *);
175218792Snpstatic int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
176222085Snp    int, int);
177218792Snpstatic int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
178265425Snpstatic void add_fl_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
179265425Snp    struct sge_fl *);
180228561Snpstatic int alloc_fwq(struct adapter *);
181228561Snpstatic int free_fwq(struct adapter *);
182228561Snpstatic int alloc_mgmtq(struct adapter *);
183228561Snpstatic int free_mgmtq(struct adapter *);
184228561Snpstatic int alloc_rxq(struct port_info *, struct sge_rxq *, int, int,
185228561Snp    struct sysctl_oid *);
186218792Snpstatic int free_rxq(struct port_info *, struct sge_rxq *);
187237263Snp#ifdef TCP_OFFLOAD
188228561Snpstatic int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int,
189228561Snp    struct sysctl_oid *);
190228561Snpstatic int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *);
191228561Snp#endif
192270297Snp#ifdef DEV_NETMAP
193270297Snpstatic int alloc_nm_rxq(struct port_info *, struct sge_nm_rxq *, int, int,
194270297Snp    struct sysctl_oid *);
195270297Snpstatic int free_nm_rxq(struct port_info *, struct sge_nm_rxq *);
196270297Snpstatic int alloc_nm_txq(struct port_info *, struct sge_nm_txq *, int, int,
197270297Snp    struct sysctl_oid *);
198270297Snpstatic int free_nm_txq(struct port_info *, struct sge_nm_txq *);
199270297Snp#endif
200228561Snpstatic int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
201228561Snpstatic int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
202237263Snp#ifdef TCP_OFFLOAD
203228561Snpstatic int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
204228561Snp#endif
205228561Snpstatic int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *);
206228561Snpstatic int free_eq(struct adapter *, struct sge_eq *);
207228561Snpstatic int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *,
208228561Snp    struct sysctl_oid *);
209228561Snpstatic int free_wrq(struct adapter *, struct sge_wrq *);
210228561Snpstatic int alloc_txq(struct port_info *, struct sge_txq *, int,
211228561Snp    struct sysctl_oid *);
212218792Snpstatic int free_txq(struct port_info *, struct sge_txq *);
213218792Snpstatic void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
214218792Snpstatic inline void ring_fl_db(struct adapter *, struct sge_fl *);
215228561Snpstatic int refill_fl(struct adapter *, struct sge_fl *, int);
216228561Snpstatic void refill_sfl(void *);
217218792Snpstatic int alloc_fl_sdesc(struct sge_fl *);
218255050Snpstatic void free_fl_sdesc(struct adapter *, struct sge_fl *);
219265425Snpstatic void find_best_refill_source(struct adapter *, struct sge_fl *, int);
220265425Snpstatic void find_safe_refill_source(struct adapter *, struct sge_fl *);
221228561Snpstatic void add_fl_to_sfl(struct adapter *, struct sge_fl *);
222218792Snp
223218792Snpstatic int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
224218792Snpstatic int free_pkt_sgl(struct sge_txq *, struct sgl *);
225218792Snpstatic int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *,
226218792Snp    struct sgl *);
227218792Snpstatic int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *,
228218792Snp    struct mbuf *, struct sgl *);
229218792Snpstatic void write_txpkts_wr(struct sge_txq *, struct txpkts *);
230218792Snpstatic inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *,
231218792Snp    struct txpkts *, struct mbuf *, struct sgl *);
232218792Snpstatic int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *);
233218792Snpstatic inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
234220873Snpstatic inline void ring_eq_db(struct adapter *, struct sge_eq *);
235219292Snpstatic inline int reclaimable(struct sge_eq *);
236220873Snpstatic int reclaim_tx_descs(struct sge_txq *, int, int);
237218792Snpstatic void write_eqflush_wr(struct sge_eq *);
238218792Snpstatic __be64 get_flit(bus_dma_segment_t *, int, int);
239228561Snpstatic int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
240228561Snp    struct mbuf *);
241239336Snpstatic int handle_fw_msg(struct sge_iq *, const struct rss_header *,
242228561Snp    struct mbuf *);
243218792Snp
244222510Snpstatic int sysctl_uint16(SYSCTL_HANDLER_ARGS);
245265425Snpstatic int sysctl_bufsizes(SYSCTL_HANDLER_ARGS);
246220873Snp
247269356Snpstatic counter_u64_t extfree_refs;
248269356Snpstatic counter_u64_t extfree_rels;
249269356Snp
250219392Snp/*
251255050Snp * Called on MOD_LOAD.  Validates and calculates the SGE tunables.
252219392Snp */
253219392Snpvoid
254219392Snpt4_sge_modload(void)
255219392Snp{
256255050Snp
257239258Snp	if (fl_pktshift < 0 || fl_pktshift > 7) {
258239258Snp		printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
259239258Snp		    " using 2 instead.\n", fl_pktshift);
260239258Snp		fl_pktshift = 2;
261239258Snp	}
262239258Snp
263239258Snp	if (spg_len != 64 && spg_len != 128) {
264239258Snp		int len;
265239258Snp
266239258Snp#if defined(__i386__) || defined(__amd64__)
267239258Snp		len = cpu_clflush_line_size > 64 ? 128 : 64;
268239258Snp#else
269239258Snp		len = 64;
270239258Snp#endif
271239258Snp		if (spg_len != -1) {
272239258Snp			printf("Invalid hw.cxgbe.spg_len value (%d),"
273239258Snp			    " using %d instead.\n", spg_len, len);
274239258Snp		}
275239258Snp		spg_len = len;
276239258Snp	}
277239258Snp
278239258Snp	if (cong_drop < -1 || cong_drop > 1) {
279239258Snp		printf("Invalid hw.cxgbe.cong_drop value (%d),"
280239258Snp		    " using 0 instead.\n", cong_drop);
281239258Snp		cong_drop = 0;
282239258Snp	}
283269356Snp
284269356Snp	extfree_refs = counter_u64_alloc(M_WAITOK);
285269356Snp	extfree_rels = counter_u64_alloc(M_WAITOK);
286269356Snp	counter_u64_zero(extfree_refs);
287269356Snp	counter_u64_zero(extfree_rels);
288219392Snp}
289219392Snp
290248925Snpvoid
291269356Snpt4_sge_modunload(void)
292269356Snp{
293269356Snp
294269356Snp	counter_u64_free(extfree_refs);
295269356Snp	counter_u64_free(extfree_rels);
296269356Snp}
297269356Snp
298269356Snpuint64_t
299269356Snpt4_sge_extfree_refs(void)
300269356Snp{
301269356Snp	uint64_t refs, rels;
302269356Snp
303269356Snp	rels = counter_u64_fetch(extfree_rels);
304269356Snp	refs = counter_u64_fetch(extfree_refs);
305269356Snp
306269356Snp	return (refs - rels);
307269356Snp}
308269356Snp
309269356Snpvoid
310248925Snpt4_init_sge_cpl_handlers(struct adapter *sc)
311218792Snp{
312218792Snp
313248925Snp	t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_msg);
314248925Snp	t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_msg);
315248925Snp	t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
316248925Snp	t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
317248925Snp	t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
318248925Snp}
319248925Snp
320281212Snpstatic inline void
321281212Snpsetup_pad_and_pack_boundaries(struct adapter *sc)
322281212Snp{
323281212Snp	uint32_t v, m;
324281212Snp	int pad, pack;
325281212Snp
326281212Snp	pad = fl_pad;
327281212Snp	if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) {
328281212Snp		/*
329281212Snp		 * If there is any chance that we might use buffer packing and
330281212Snp		 * the chip is a T4, then pick 64 as the pad/pack boundary.  Set
331281212Snp		 * it to 32 in all other cases.
332281212Snp		 */
333281212Snp		pad = is_t4(sc) && buffer_packing ? 64 : 32;
334281212Snp
335281212Snp		/*
336281212Snp		 * For fl_pad = 0 we'll still write a reasonable value to the
337281212Snp		 * register but all the freelists will opt out of padding.
338281212Snp		 * We'll complain here only if the user tried to set it to a
339281212Snp		 * value greater than 0 that was invalid.
340281212Snp		 */
341281212Snp		if (fl_pad > 0) {
342281212Snp			device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value"
343281212Snp			    " (%d), using %d instead.\n", fl_pad, pad);
344281212Snp		}
345281212Snp	}
346281212Snp	m = V_INGPADBOUNDARY(M_INGPADBOUNDARY);
347281212Snp	v = V_INGPADBOUNDARY(ilog2(pad) - 5);
348281212Snp	t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
349281212Snp
350281212Snp	if (is_t4(sc)) {
351281212Snp		if (fl_pack != -1 && fl_pack != pad) {
352281212Snp			/* Complain but carry on. */
353281212Snp			device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored,"
354281212Snp			    " using %d instead.\n", fl_pack, pad);
355281212Snp		}
356281212Snp		return;
357281212Snp	}
358281212Snp
359281212Snp	pack = fl_pack;
360281212Snp	if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 ||
361281212Snp	    !powerof2(fl_pack)) {
362281212Snp		pack = max(sc->params.pci.mps, CACHE_LINE_SIZE);
363281212Snp		MPASS(powerof2(pack));
364281212Snp		if (pack < 16)
365281212Snp			pack = 16;
366281212Snp		if (pack == 32)
367281212Snp			pack = 64;
368281212Snp		if (pack > 4096)
369281212Snp			pack = 4096;
370281212Snp		if (fl_pack != -1) {
371281212Snp			device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value"
372281212Snp			    " (%d), using %d instead.\n", fl_pack, pack);
373281212Snp		}
374281212Snp	}
375281212Snp	m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
376281212Snp	if (pack == 16)
377281212Snp		v = V_INGPACKBOUNDARY(0);
378281212Snp	else
379281212Snp		v = V_INGPACKBOUNDARY(ilog2(pack) - 5);
380281212Snp
381281212Snp	MPASS(!is_t4(sc));	/* T4 doesn't have SGE_CONTROL2 */
382281212Snp	t4_set_reg_field(sc, A_SGE_CONTROL2, m, v);
383281212Snp}
384281212Snp
385249391Snp/*
386249391Snp * adap->params.vpd.cclk must be set up before this is called.
387249391Snp */
388248925Snpvoid
389248925Snpt4_tweak_chip_settings(struct adapter *sc)
390248925Snp{
391248925Snp	int i;
392248925Snp	uint32_t v, m;
393248925Snp	int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
394249391Snp	int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk;
395248925Snp	int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
396248925Snp	uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
397265425Snp	static int sge_flbuf_sizes[] = {
398255050Snp		MCLBYTES,
399255050Snp#if MJUMPAGESIZE != MCLBYTES
400255050Snp		MJUMPAGESIZE,
401265425Snp		MJUMPAGESIZE - CL_METADATA_SIZE,
402265425Snp		MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE,
403255050Snp#endif
404255050Snp		MJUM9BYTES,
405255050Snp		MJUM16BYTES,
406265425Snp		MCLBYTES - MSIZE - CL_METADATA_SIZE,
407265425Snp		MJUM9BYTES - CL_METADATA_SIZE,
408265425Snp		MJUM16BYTES - CL_METADATA_SIZE,
409255050Snp	};
410248925Snp
411248925Snp	KASSERT(sc->flags & MASTER_PF,
412248925Snp	    ("%s: trying to change chip settings when not master.", __func__));
413248925Snp
414255050Snp	m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
415248925Snp	v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
416237512Snp	    V_EGRSTATUSPAGESIZE(spg_len == 128);
417248925Snp	t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
418218792Snp
419281212Snp	setup_pad_and_pack_boundaries(sc);
420255050Snp
421248925Snp	v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
422228561Snp	    V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
423228561Snp	    V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
424228561Snp	    V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
425228561Snp	    V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
426228561Snp	    V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
427228561Snp	    V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
428228561Snp	    V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
429248925Snp	t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v);
430228561Snp
431265425Snp	KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES,
432265425Snp	    ("%s: hw buffer size table too big", __func__));
433265425Snp	for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) {
434248925Snp		t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
435265425Snp		    sge_flbuf_sizes[i]);
436248925Snp	}
437228561Snp
438248925Snp	v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) |
439248925Snp	    V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]);
440248925Snp	t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v);
441228561Snp
442249391Snp	KASSERT(intr_timer[0] <= timer_max,
443249391Snp	    ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0],
444249391Snp	    timer_max));
445249391Snp	for (i = 1; i < nitems(intr_timer); i++) {
446249391Snp		KASSERT(intr_timer[i] >= intr_timer[i - 1],
447249391Snp		    ("%s: timers not listed in increasing order (%d)",
448249391Snp		    __func__, i));
449249391Snp
450249391Snp		while (intr_timer[i] > timer_max) {
451249391Snp			if (i == nitems(intr_timer) - 1) {
452249391Snp				intr_timer[i] = timer_max;
453249391Snp				break;
454249391Snp			}
455249391Snp			intr_timer[i] += intr_timer[i - 1];
456249391Snp			intr_timer[i] /= 2;
457249391Snp		}
458249391Snp	}
459249391Snp
460248925Snp	v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) |
461248925Snp	    V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1]));
462248925Snp	t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v);
463248925Snp	v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) |
464248925Snp	    V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3]));
465248925Snp	t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v);
466248925Snp	v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) |
467248925Snp	    V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5]));
468248925Snp	t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v);
469228561Snp
470248925Snp	if (cong_drop == 0) {
471248925Snp		m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
472248925Snp		    F_TUNNELCNGDROP3;
473248925Snp		t4_set_reg_field(sc, A_TP_PARA_REG3, m, 0);
474228561Snp	}
475228561Snp
476248925Snp	/* 4K, 16K, 64K, 256K DDP "page sizes" */
477248925Snp	v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
478248925Snp	t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v);
479248925Snp
480248925Snp	m = v = F_TDDPTAGTCB;
481248925Snp	t4_set_reg_field(sc, A_ULP_RX_CTL, m, v);
482248925Snp
483248925Snp	m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
484248925Snp	    F_RESETDDPOFFSET;
485248925Snp	v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
486248925Snp	t4_set_reg_field(sc, A_TP_PARA_REG5, m, v);
487248925Snp}
488248925Snp
489248925Snp/*
490281212Snp * SGE wants the buffer to be at least 64B and then a multiple of 16.  If
491281212Snp * padding is is use the buffer's start and end need to be aligned to the pad
492281212Snp * boundary as well.  We'll just make sure that the size is a multiple of the
493281212Snp * boundary here, it is up to the buffer allocation code to make sure the start
494281212Snp * of the buffer is aligned as well.
495265425Snp */
496265425Snpstatic inline int
497281212Snphwsz_ok(struct adapter *sc, int hwsz)
498265425Snp{
499281212Snp	int mask = fl_pad ? sc->sge.pad_boundary - 1 : 16 - 1;
500265425Snp
501265425Snp	return (hwsz >= 64 && (hwsz & mask) == 0);
502265425Snp}
503265425Snp
504265425Snp/*
505248925Snp * XXX: driver really should be able to deal with unexpected settings.
506248925Snp */
507248925Snpint
508248925Snpt4_read_chip_settings(struct adapter *sc)
509248925Snp{
510248925Snp	struct sge *s = &sc->sge;
511255050Snp	int i, j, n, rc = 0;
512248925Snp	uint32_t m, v, r;
513248925Snp	uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
514265425Snp	static int sw_buf_sizes[] = {	/* Sorted by size */
515255050Snp		MCLBYTES,
516255050Snp#if MJUMPAGESIZE != MCLBYTES
517255050Snp		MJUMPAGESIZE,
518255050Snp#endif
519255050Snp		MJUM9BYTES,
520255050Snp		MJUM16BYTES
521255050Snp	};
522265425Snp	struct sw_zone_info *swz, *safe_swz;
523265425Snp	struct hw_buf_info *hwb;
524248925Snp
525255050Snp	m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
526248925Snp	v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
527248925Snp	    V_EGRSTATUSPAGESIZE(spg_len == 128);
528248925Snp	r = t4_read_reg(sc, A_SGE_CONTROL);
529248925Snp	if ((r & m) != v) {
530248925Snp		device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r);
531228561Snp		rc = EINVAL;
532228561Snp	}
533281212Snp	s->pad_boundary = 1 << (G_INGPADBOUNDARY(r) + 5);
534228561Snp
535281212Snp	if (is_t4(sc))
536281212Snp		s->pack_boundary = s->pad_boundary;
537281212Snp	else {
538281212Snp		r = t4_read_reg(sc, A_SGE_CONTROL2);
539281212Snp		if (G_INGPACKBOUNDARY(r) == 0)
540281212Snp			s->pack_boundary = 16;
541255050Snp		else
542281212Snp			s->pack_boundary = 1 << (G_INGPACKBOUNDARY(r) + 5);
543255050Snp	}
544255050Snp
545248925Snp	v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
546248925Snp	    V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
547248925Snp	    V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
548248925Snp	    V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
549248925Snp	    V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
550248925Snp	    V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
551248925Snp	    V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
552248925Snp	    V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
553248925Snp	r = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE);
554248925Snp	if (r != v) {
555248925Snp		device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r);
556228561Snp		rc = EINVAL;
557228561Snp	}
558228561Snp
559265425Snp	/* Filter out unusable hw buffer sizes entirely (mark with -2). */
560265425Snp	hwb = &s->hw_buf_info[0];
561265425Snp	for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) {
562265425Snp		r = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
563265425Snp		hwb->size = r;
564281212Snp		hwb->zidx = hwsz_ok(sc, r) ? -1 : -2;
565265425Snp		hwb->next = -1;
566265425Snp	}
567265425Snp
568255050Snp	/*
569265425Snp	 * Create a sorted list in decreasing order of hw buffer sizes (and so
570265425Snp	 * increasing order of spare area) for each software zone.
571281212Snp	 *
572281212Snp	 * If padding is enabled then the start and end of the buffer must align
573281212Snp	 * to the pad boundary; if packing is enabled then they must align with
574281212Snp	 * the pack boundary as well.  Allocations from the cluster zones are
575281212Snp	 * aligned to min(size, 4K), so the buffer starts at that alignment and
576281212Snp	 * ends at hwb->size alignment.  If mbuf inlining is allowed the
577281212Snp	 * starting alignment will be reduced to MSIZE and the driver will
578281212Snp	 * exercise appropriate caution when deciding on the best buffer layout
579281212Snp	 * to use.
580255050Snp	 */
581265425Snp	n = 0;	/* no usable buffer size to begin with */
582265425Snp	swz = &s->sw_zone_info[0];
583265425Snp	safe_swz = NULL;
584265425Snp	for (i = 0; i < SW_ZONE_SIZES; i++, swz++) {
585265425Snp		int8_t head = -1, tail = -1;
586265425Snp
587265425Snp		swz->size = sw_buf_sizes[i];
588265425Snp		swz->zone = m_getzone(swz->size);
589265425Snp		swz->type = m_gettype(swz->size);
590265425Snp
591281212Snp		if (swz->size < PAGE_SIZE) {
592281212Snp			MPASS(powerof2(swz->size));
593281212Snp			if (fl_pad && (swz->size % sc->sge.pad_boundary != 0))
594281212Snp				continue;
595281212Snp		}
596281212Snp
597265425Snp		if (swz->size == safest_rx_cluster)
598265425Snp			safe_swz = swz;
599265425Snp
600265425Snp		hwb = &s->hw_buf_info[0];
601265425Snp		for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) {
602265425Snp			if (hwb->zidx != -1 || hwb->size > swz->size)
603265425Snp				continue;
604281212Snp#ifdef INVARIANTS
605281212Snp			if (fl_pad)
606281212Snp				MPASS(hwb->size % sc->sge.pad_boundary == 0);
607281212Snp#endif
608265425Snp			hwb->zidx = i;
609265425Snp			if (head == -1)
610265425Snp				head = tail = j;
611265425Snp			else if (hwb->size < s->hw_buf_info[tail].size) {
612265425Snp				s->hw_buf_info[tail].next = j;
613265425Snp				tail = j;
614265425Snp			} else {
615265425Snp				int8_t *cur;
616265425Snp				struct hw_buf_info *t;
617265425Snp
618265425Snp				for (cur = &head; *cur != -1; cur = &t->next) {
619265425Snp					t = &s->hw_buf_info[*cur];
620265425Snp					if (hwb->size == t->size) {
621265425Snp						hwb->zidx = -2;
622265425Snp						break;
623265425Snp					}
624265425Snp					if (hwb->size > t->size) {
625265425Snp						hwb->next = *cur;
626265425Snp						*cur = j;
627265425Snp						break;
628265425Snp					}
629265425Snp				}
630265425Snp			}
631228561Snp		}
632265425Snp		swz->head_hwidx = head;
633265425Snp		swz->tail_hwidx = tail;
634265425Snp
635265425Snp		if (tail != -1) {
636255050Snp			n++;
637265425Snp			if (swz->size - s->hw_buf_info[tail].size >=
638265425Snp			    CL_METADATA_SIZE)
639265425Snp				sc->flags |= BUF_PACKING_OK;
640255050Snp		}
641255050Snp	}
642255050Snp	if (n == 0) {
643255050Snp		device_printf(sc->dev, "no usable SGE FL buffer size.\n");
644255050Snp		rc = EINVAL;
645255050Snp	}
646218792Snp
647265425Snp	s->safe_hwidx1 = -1;
648265425Snp	s->safe_hwidx2 = -1;
649265425Snp	if (safe_swz != NULL) {
650265425Snp		s->safe_hwidx1 = safe_swz->head_hwidx;
651265425Snp		for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) {
652265425Snp			int spare;
653265425Snp
654265425Snp			hwb = &s->hw_buf_info[i];
655281212Snp#ifdef INVARIANTS
656281212Snp			if (fl_pad)
657281212Snp				MPASS(hwb->size % sc->sge.pad_boundary == 0);
658281212Snp#endif
659265425Snp			spare = safe_swz->size - hwb->size;
660281212Snp			if (spare >= CL_METADATA_SIZE) {
661265425Snp				s->safe_hwidx2 = i;
662265425Snp				break;
663281212Snp			}
664265425Snp		}
665265425Snp	}
666265425Snp
667248925Snp	r = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD);
668248925Snp	s->counter_val[0] = G_THRESHOLD_0(r);
669248925Snp	s->counter_val[1] = G_THRESHOLD_1(r);
670248925Snp	s->counter_val[2] = G_THRESHOLD_2(r);
671248925Snp	s->counter_val[3] = G_THRESHOLD_3(r);
672222701Snp
673248925Snp	r = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1);
674248925Snp	s->timer_val[0] = G_TIMERVALUE0(r) / core_ticks_per_usec(sc);
675248925Snp	s->timer_val[1] = G_TIMERVALUE1(r) / core_ticks_per_usec(sc);
676248925Snp	r = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3);
677248925Snp	s->timer_val[2] = G_TIMERVALUE2(r) / core_ticks_per_usec(sc);
678248925Snp	s->timer_val[3] = G_TIMERVALUE3(r) / core_ticks_per_usec(sc);
679248925Snp	r = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5);
680248925Snp	s->timer_val[4] = G_TIMERVALUE4(r) / core_ticks_per_usec(sc);
681248925Snp	s->timer_val[5] = G_TIMERVALUE5(r) / core_ticks_per_usec(sc);
682218792Snp
683248925Snp	if (cong_drop == 0) {
684248925Snp		m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
685248925Snp		    F_TUNNELCNGDROP3;
686248925Snp		r = t4_read_reg(sc, A_TP_PARA_REG3);
687248925Snp		if (r & m) {
688248925Snp			device_printf(sc->dev,
689248925Snp			    "invalid TP_PARA_REG3(0x%x)\n", r);
690248925Snp			rc = EINVAL;
691248925Snp		}
692248925Snp	}
693228561Snp
694248925Snp	v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
695248925Snp	r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ);
696248925Snp	if (r != v) {
697248925Snp		device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r);
698248925Snp		rc = EINVAL;
699248925Snp	}
700228561Snp
701248925Snp	m = v = F_TDDPTAGTCB;
702248925Snp	r = t4_read_reg(sc, A_ULP_RX_CTL);
703248925Snp	if ((r & m) != v) {
704248925Snp		device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r);
705248925Snp		rc = EINVAL;
706248925Snp	}
707239336Snp
708248925Snp	m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
709248925Snp	    F_RESETDDPOFFSET;
710248925Snp	v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
711248925Snp	r = t4_read_reg(sc, A_TP_PARA_REG5);
712248925Snp	if ((r & m) != v) {
713248925Snp		device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r);
714248925Snp		rc = EINVAL;
715248925Snp	}
716248925Snp
717248925Snp	r = t4_read_reg(sc, A_SGE_CONM_CTRL);
718248925Snp	s->fl_starve_threshold = G_EGRTHRESHOLD(r) * 2 + 1;
719265410Snp	if (is_t4(sc))
720265410Snp		s->fl_starve_threshold2 = s->fl_starve_threshold;
721265410Snp	else
722265410Snp		s->fl_starve_threshold2 = G_EGRTHRESHOLDPACKING(r) * 2 + 1;
723248925Snp
724256794Snp	/* egress queues: log2 of # of doorbells per BAR2 page */
725256794Snp	r = t4_read_reg(sc, A_SGE_EGRESS_QUEUES_PER_PAGE_PF);
726256794Snp	r >>= S_QUEUESPERPAGEPF0 +
727256794Snp	    (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf;
728256794Snp	s->eq_s_qpp = r & M_QUEUESPERPAGEPF0;
729248925Snp
730256794Snp	/* ingress queues: log2 of # of doorbells per BAR2 page */
731256794Snp	r = t4_read_reg(sc, A_SGE_INGRESS_QUEUES_PER_PAGE_PF);
732256794Snp	r >>= S_QUEUESPERPAGEPF0 +
733256794Snp	    (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf;
734256794Snp	s->iq_s_qpp = r & M_QUEUESPERPAGEPF0;
735256794Snp
736252705Snp	t4_init_tp_params(sc);
737248925Snp
738248925Snp	t4_read_mtu_tbl(sc, sc->params.mtus, NULL);
739248925Snp	t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd);
740248925Snp
741228561Snp	return (rc);
742218792Snp}
743218792Snp
744218792Snpint
745218792Snpt4_create_dma_tag(struct adapter *sc)
746218792Snp{
747218792Snp	int rc;
748218792Snp
749218792Snp	rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
750218792Snp	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
751218792Snp	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
752218792Snp	    NULL, &sc->dmat);
753218792Snp	if (rc != 0) {
754218792Snp		device_printf(sc->dev,
755218792Snp		    "failed to create main DMA tag: %d\n", rc);
756218792Snp	}
757218792Snp
758218792Snp	return (rc);
759218792Snp}
760218792Snp
761253829Snpvoid
762253829Snpt4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
763253829Snp    struct sysctl_oid_list *children)
764253829Snp{
765253829Snp
766265425Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes",
767265425Snp	    CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A",
768265425Snp	    "freelist buffer sizes");
769265425Snp
770253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD,
771253829Snp	    NULL, fl_pktshift, "payload DMA offset in rx buffer (bytes)");
772253829Snp
773253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD,
774281212Snp	    NULL, sc->sge.pad_boundary, "payload pad boundary (bytes)");
775253829Snp
776253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD,
777253829Snp	    NULL, spg_len, "status page size (bytes)");
778253829Snp
779253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD,
780253829Snp	    NULL, cong_drop, "congestion drop setting");
781255050Snp
782255050Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD,
783265425Snp	    NULL, sc->sge.pack_boundary, "payload pack boundary (bytes)");
784253829Snp}
785253829Snp
786218792Snpint
787218792Snpt4_destroy_dma_tag(struct adapter *sc)
788218792Snp{
789218792Snp	if (sc->dmat)
790218792Snp		bus_dma_tag_destroy(sc->dmat);
791218792Snp
792218792Snp	return (0);
793218792Snp}
794218792Snp
795218792Snp/*
796228561Snp * Allocate and initialize the firmware event queue and the management queue.
797218792Snp *
798218792Snp * Returns errno on failure.  Resources allocated up to that point may still be
799218792Snp * allocated.  Caller is responsible for cleanup in case this function fails.
800218792Snp */
801218792Snpint
802220873Snpt4_setup_adapter_queues(struct adapter *sc)
803218792Snp{
804228561Snp	int rc;
805218792Snp
806218792Snp	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
807218792Snp
808228561Snp	sysctl_ctx_init(&sc->ctx);
809228561Snp	sc->flags |= ADAP_SYSCTL_CTX;
810220873Snp
811222510Snp	/*
812222510Snp	 * Firmware event queue
813222510Snp	 */
814228561Snp	rc = alloc_fwq(sc);
815241398Snp	if (rc != 0)
816220873Snp		return (rc);
817218792Snp
818220873Snp	/*
819228561Snp	 * Management queue.  This is just a control queue that uses the fwq as
820228561Snp	 * its associated iq.
821220873Snp	 */
822228561Snp	rc = alloc_mgmtq(sc);
823220873Snp
824218792Snp	return (rc);
825218792Snp}
826218792Snp
827218792Snp/*
828218792Snp * Idempotent
829218792Snp */
830218792Snpint
831220873Snpt4_teardown_adapter_queues(struct adapter *sc)
832218792Snp{
833218792Snp
834218792Snp	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
835218792Snp
836228561Snp	/* Do this before freeing the queue */
837228561Snp	if (sc->flags & ADAP_SYSCTL_CTX) {
838220873Snp		sysctl_ctx_free(&sc->ctx);
839228561Snp		sc->flags &= ~ADAP_SYSCTL_CTX;
840220873Snp	}
841220873Snp
842228561Snp	free_mgmtq(sc);
843228561Snp	free_fwq(sc);
844220873Snp
845228561Snp	return (0);
846228561Snp}
847222510Snp
848228561Snpstatic inline int
849270297Snpport_intr_count(struct port_info *pi)
850270297Snp{
851270297Snp	int rc = 0;
852270297Snp
853270297Snp	if (pi->flags & INTR_RXQ)
854270297Snp		rc += pi->nrxq;
855270297Snp#ifdef TCP_OFFLOAD
856270297Snp	if (pi->flags & INTR_OFLD_RXQ)
857270297Snp		rc += pi->nofldrxq;
858270297Snp#endif
859270297Snp#ifdef DEV_NETMAP
860270297Snp	if (pi->flags & INTR_NM_RXQ)
861270297Snp		rc += pi->nnmrxq;
862270297Snp#endif
863270297Snp	return (rc);
864270297Snp}
865270297Snp
866270297Snpstatic inline int
867228561Snpfirst_vector(struct port_info *pi)
868228561Snp{
869228561Snp	struct adapter *sc = pi->adapter;
870228561Snp	int rc = T4_EXTRA_INTR, i;
871228561Snp
872228561Snp	if (sc->intr_count == 1)
873228561Snp		return (0);
874228561Snp
875228561Snp	for_each_port(sc, i) {
876228561Snp		if (i == pi->port_id)
877228561Snp			break;
878228561Snp
879270297Snp		rc += port_intr_count(sc->port[i]);
880218792Snp	}
881218792Snp
882228561Snp	return (rc);
883218792Snp}
884218792Snp
885228561Snp/*
886228561Snp * Given an arbitrary "index," come up with an iq that can be used by other
887228561Snp * queues (of this port) for interrupt forwarding, SGE egress updates, etc.
888228561Snp * The iq returned is guaranteed to be something that takes direct interrupts.
889228561Snp */
890228561Snpstatic struct sge_iq *
891228561Snpport_intr_iq(struct port_info *pi, int idx)
892228561Snp{
893228561Snp	struct adapter *sc = pi->adapter;
894228561Snp	struct sge *s = &sc->sge;
895228561Snp	struct sge_iq *iq = NULL;
896270297Snp	int nintr, i;
897228561Snp
898228561Snp	if (sc->intr_count == 1)
899228561Snp		return (&sc->sge.fwq);
900228561Snp
901270297Snp	nintr = port_intr_count(pi);
902270297Snp	KASSERT(nintr != 0,
903270297Snp	    ("%s: pi %p has no exclusive interrupts, total interrupts = %d",
904270297Snp	    __func__, pi, sc->intr_count));
905270297Snp#ifdef DEV_NETMAP
906270297Snp	/* Exclude netmap queues as they can't take anyone else's interrupts */
907270297Snp	if (pi->flags & INTR_NM_RXQ)
908270297Snp		nintr -= pi->nnmrxq;
909270297Snp	KASSERT(nintr > 0,
910270297Snp	    ("%s: pi %p has nintr %d after netmap adjustment of %d", __func__,
911270297Snp	    pi, nintr, pi->nnmrxq));
912270297Snp#endif
913270297Snp	i = idx % nintr;
914265425Snp
915270297Snp	if (pi->flags & INTR_RXQ) {
916270297Snp	       	if (i < pi->nrxq) {
917270297Snp			iq = &s->rxq[pi->first_rxq + i].iq;
918270297Snp			goto done;
919270297Snp		}
920270297Snp		i -= pi->nrxq;
921228561Snp	}
922270297Snp#ifdef TCP_OFFLOAD
923270297Snp	if (pi->flags & INTR_OFLD_RXQ) {
924270297Snp	       	if (i < pi->nofldrxq) {
925270297Snp			iq = &s->ofld_rxq[pi->first_ofld_rxq + i].iq;
926270297Snp			goto done;
927270297Snp		}
928270297Snp		i -= pi->nofldrxq;
929270297Snp	}
930228561Snp#endif
931270297Snp	panic("%s: pi %p, intr_flags 0x%lx, idx %d, total intr %d\n", __func__,
932270297Snp	    pi, pi->flags & INTR_ALL, idx, nintr);
933270297Snpdone:
934270297Snp	MPASS(iq != NULL);
935270297Snp	KASSERT(iq->flags & IQ_INTR,
936270297Snp	    ("%s: iq %p (port %p, intr_flags 0x%lx, idx %d)", __func__, iq, pi,
937270297Snp	    pi->flags & INTR_ALL, idx));
938228561Snp	return (iq);
939228561Snp}
940228561Snp
941265425Snp/* Maximum payload that can be delivered with a single iq descriptor */
942239266Snpstatic inline int
943265425Snpmtu_to_max_payload(struct adapter *sc, int mtu, const int toe)
944239266Snp{
945265425Snp	int payload;
946239266Snp
947252728Snp#ifdef TCP_OFFLOAD
948265425Snp	if (toe) {
949265425Snp		payload = sc->tt.rx_coalesce ?
950265425Snp		    G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)) : mtu;
951265425Snp	} else {
952265425Snp#endif
953265425Snp		/* large enough even when hw VLAN extraction is disabled */
954265425Snp		payload = fl_pktshift + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
955265425Snp		    mtu;
956265425Snp#ifdef TCP_OFFLOAD
957265425Snp	}
958265425Snp#endif
959252728Snp
960265425Snp	return (payload);
961252728Snp}
962252728Snp
963218792Snpint
964228561Snpt4_setup_port_queues(struct port_info *pi)
965218792Snp{
966228561Snp	int rc = 0, i, j, intr_idx, iqid;
967218792Snp	struct sge_rxq *rxq;
968218792Snp	struct sge_txq *txq;
969228561Snp	struct sge_wrq *ctrlq;
970237263Snp#ifdef TCP_OFFLOAD
971228561Snp	struct sge_ofld_rxq *ofld_rxq;
972228561Snp	struct sge_wrq *ofld_txq;
973228561Snp#endif
974270297Snp#ifdef DEV_NETMAP
975270297Snp	struct sge_nm_rxq *nm_rxq;
976270297Snp	struct sge_nm_txq *nm_txq;
977270297Snp#endif
978218792Snp	char name[16];
979218792Snp	struct adapter *sc = pi->adapter;
980252728Snp	struct ifnet *ifp = pi->ifp;
981237263Snp	struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
982228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
983281212Snp	int maxp, mtu = ifp->if_mtu;
984218792Snp
985228561Snp	/* Interrupt vector to start from (when using multiple vectors) */
986228561Snp	intr_idx = first_vector(pi);
987228561Snp
988228561Snp	/*
989270297Snp	 * First pass over all NIC and TOE rx queues:
990228561Snp	 * a) initialize iq and fl
991228561Snp	 * b) allocate queue iff it will take direct interrupts.
992228561Snp	 */
993265425Snp	maxp = mtu_to_max_payload(sc, mtu, 0);
994270297Snp	if (pi->flags & INTR_RXQ) {
995270297Snp		oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq",
996270297Snp		    CTLFLAG_RD, NULL, "rx queues");
997270297Snp	}
998218792Snp	for_each_rxq(pi, i, rxq) {
999218792Snp
1000270297Snp		init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq);
1001218792Snp
1002218792Snp		snprintf(name, sizeof(name), "%s rxq%d-fl",
1003218792Snp		    device_get_nameunit(pi->dev), i);
1004281212Snp		init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, name);
1005218792Snp
1006270297Snp		if (pi->flags & INTR_RXQ) {
1007228561Snp			rxq->iq.flags |= IQ_INTR;
1008228561Snp			rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
1009228561Snp			if (rc != 0)
1010228561Snp				goto done;
1011228561Snp			intr_idx++;
1012228561Snp		}
1013228561Snp	}
1014237263Snp#ifdef TCP_OFFLOAD
1015265425Snp	maxp = mtu_to_max_payload(sc, mtu, 1);
1016270297Snp	if (is_offload(sc) && pi->flags & INTR_OFLD_RXQ) {
1017270297Snp		oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq",
1018270297Snp		    CTLFLAG_RD, NULL,
1019270297Snp		    "rx queues for offloaded TCP connections");
1020270297Snp	}
1021228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
1022228561Snp
1023228561Snp		init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
1024270297Snp		    pi->qsize_rxq);
1025228561Snp
1026228561Snp		snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
1027228561Snp		    device_get_nameunit(pi->dev), i);
1028281212Snp		init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, name);
1029228561Snp
1030270297Snp		if (pi->flags & INTR_OFLD_RXQ) {
1031228561Snp			ofld_rxq->iq.flags |= IQ_INTR;
1032270297Snp			rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid);
1033228561Snp			if (rc != 0)
1034228561Snp				goto done;
1035228561Snp			intr_idx++;
1036228561Snp		}
1037228561Snp	}
1038228561Snp#endif
1039270297Snp#ifdef DEV_NETMAP
1040270297Snp	/*
1041270297Snp	 * We don't have buffers to back the netmap rx queues right now so we
1042270297Snp	 * create the queues in a way that doesn't set off any congestion signal
1043270297Snp	 * in the chip.
1044270297Snp	 */
1045270297Snp	if (pi->flags & INTR_NM_RXQ) {
1046270297Snp		oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "nm_rxq",
1047270297Snp		    CTLFLAG_RD, NULL, "rx queues for netmap");
1048270297Snp		for_each_nm_rxq(pi, i, nm_rxq) {
1049270297Snp			rc = alloc_nm_rxq(pi, nm_rxq, intr_idx, i, oid);
1050270297Snp			if (rc != 0)
1051270297Snp				goto done;
1052270297Snp			intr_idx++;
1053270297Snp		}
1054270297Snp	}
1055270297Snp#endif
1056228561Snp
1057228561Snp	/*
1058270297Snp	 * Second pass over all NIC and TOE rx queues.  The queues forwarding
1059228561Snp	 * their interrupts are allocated now.
1060228561Snp	 */
1061228561Snp	j = 0;
1062270297Snp	if (!(pi->flags & INTR_RXQ)) {
1063270297Snp		oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq",
1064270297Snp		    CTLFLAG_RD, NULL, "rx queues");
1065270297Snp		for_each_rxq(pi, i, rxq) {
1066270297Snp			MPASS(!(rxq->iq.flags & IQ_INTR));
1067228561Snp
1068270297Snp			intr_idx = port_intr_iq(pi, j)->abs_id;
1069228561Snp
1070270297Snp			rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
1071270297Snp			if (rc != 0)
1072270297Snp				goto done;
1073270297Snp			j++;
1074270297Snp		}
1075218792Snp	}
1076237263Snp#ifdef TCP_OFFLOAD
1077270297Snp	if (is_offload(sc) && !(pi->flags & INTR_OFLD_RXQ)) {
1078270297Snp		oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq",
1079270297Snp		    CTLFLAG_RD, NULL,
1080270297Snp		    "rx queues for offloaded TCP connections");
1081270297Snp		for_each_ofld_rxq(pi, i, ofld_rxq) {
1082270297Snp			MPASS(!(ofld_rxq->iq.flags & IQ_INTR));
1083228561Snp
1084270297Snp			intr_idx = port_intr_iq(pi, j)->abs_id;
1085228561Snp
1086270297Snp			rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid);
1087270297Snp			if (rc != 0)
1088270297Snp				goto done;
1089270297Snp			j++;
1090270297Snp		}
1091228561Snp	}
1092228561Snp#endif
1093270297Snp#ifdef DEV_NETMAP
1094270297Snp	if (!(pi->flags & INTR_NM_RXQ))
1095270297Snp		CXGBE_UNIMPLEMENTED(__func__);
1096270297Snp#endif
1097228561Snp
1098228561Snp	/*
1099228561Snp	 * Now the tx queues.  Only one pass needed.
1100228561Snp	 */
1101228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD,
1102228561Snp	    NULL, "tx queues");
1103228561Snp	j = 0;
1104218792Snp	for_each_txq(pi, i, txq) {
1105228561Snp		iqid = port_intr_iq(pi, j)->cntxt_id;
1106218792Snp		snprintf(name, sizeof(name), "%s txq%d",
1107218792Snp		    device_get_nameunit(pi->dev), i);
1108228561Snp		init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid,
1109228561Snp		    name);
1110218792Snp
1111228561Snp		rc = alloc_txq(pi, txq, i, oid);
1112218792Snp		if (rc != 0)
1113218792Snp			goto done;
1114228561Snp		j++;
1115218792Snp	}
1116237263Snp#ifdef TCP_OFFLOAD
1117228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq",
1118228561Snp	    CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections");
1119228561Snp	for_each_ofld_txq(pi, i, ofld_txq) {
1120270297Snp		struct sysctl_oid *oid2;
1121228561Snp
1122228561Snp		iqid = port_intr_iq(pi, j)->cntxt_id;
1123228561Snp		snprintf(name, sizeof(name), "%s ofld_txq%d",
1124228561Snp		    device_get_nameunit(pi->dev), i);
1125228561Snp		init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan,
1126228561Snp		    iqid, name);
1127228561Snp
1128228561Snp		snprintf(name, sizeof(name), "%d", i);
1129228561Snp		oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
1130228561Snp		    name, CTLFLAG_RD, NULL, "offload tx queue");
1131228561Snp
1132228561Snp		rc = alloc_wrq(sc, pi, ofld_txq, oid2);
1133228561Snp		if (rc != 0)
1134228561Snp			goto done;
1135228561Snp		j++;
1136228561Snp	}
1137228561Snp#endif
1138270297Snp#ifdef DEV_NETMAP
1139270297Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "nm_txq",
1140270297Snp	    CTLFLAG_RD, NULL, "tx queues for netmap use");
1141270297Snp	for_each_nm_txq(pi, i, nm_txq) {
1142270297Snp		iqid = pi->first_nm_rxq + (j % pi->nnmrxq);
1143270297Snp		rc = alloc_nm_txq(pi, nm_txq, iqid, i, oid);
1144270297Snp		if (rc != 0)
1145270297Snp			goto done;
1146270297Snp		j++;
1147270297Snp	}
1148270297Snp#endif
1149228561Snp
1150228561Snp	/*
1151228561Snp	 * Finally, the control queue.
1152228561Snp	 */
1153228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD,
1154228561Snp	    NULL, "ctrl queue");
1155228561Snp	ctrlq = &sc->sge.ctrlq[pi->port_id];
1156228561Snp	iqid = port_intr_iq(pi, 0)->cntxt_id;
1157228561Snp	snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev));
1158228561Snp	init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name);
1159228561Snp	rc = alloc_wrq(sc, pi, ctrlq, oid);
1160228561Snp
1161218792Snpdone:
1162218792Snp	if (rc)
1163228561Snp		t4_teardown_port_queues(pi);
1164218792Snp
1165218792Snp	return (rc);
1166218792Snp}
1167218792Snp
1168218792Snp/*
1169218792Snp * Idempotent
1170218792Snp */
1171218792Snpint
1172228561Snpt4_teardown_port_queues(struct port_info *pi)
1173218792Snp{
1174218792Snp	int i;
1175228561Snp	struct adapter *sc = pi->adapter;
1176218792Snp	struct sge_rxq *rxq;
1177218792Snp	struct sge_txq *txq;
1178237263Snp#ifdef TCP_OFFLOAD
1179228561Snp	struct sge_ofld_rxq *ofld_rxq;
1180228561Snp	struct sge_wrq *ofld_txq;
1181228561Snp#endif
1182270297Snp#ifdef DEV_NETMAP
1183270297Snp	struct sge_nm_rxq *nm_rxq;
1184270297Snp	struct sge_nm_txq *nm_txq;
1185270297Snp#endif
1186218792Snp
1187218792Snp	/* Do this before freeing the queues */
1188228561Snp	if (pi->flags & PORT_SYSCTL_CTX) {
1189218792Snp		sysctl_ctx_free(&pi->ctx);
1190228561Snp		pi->flags &= ~PORT_SYSCTL_CTX;
1191218792Snp	}
1192218792Snp
1193228561Snp	/*
1194228561Snp	 * Take down all the tx queues first, as they reference the rx queues
1195228561Snp	 * (for egress updates, etc.).
1196228561Snp	 */
1197228561Snp
1198228561Snp	free_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
1199228561Snp
1200218792Snp	for_each_txq(pi, i, txq) {
1201218792Snp		free_txq(pi, txq);
1202218792Snp	}
1203237263Snp#ifdef TCP_OFFLOAD
1204228561Snp	for_each_ofld_txq(pi, i, ofld_txq) {
1205228561Snp		free_wrq(sc, ofld_txq);
1206228561Snp	}
1207228561Snp#endif
1208270297Snp#ifdef DEV_NETMAP
1209270297Snp	for_each_nm_txq(pi, i, nm_txq)
1210270297Snp	    free_nm_txq(pi, nm_txq);
1211270297Snp#endif
1212228561Snp
1213228561Snp	/*
1214228561Snp	 * Then take down the rx queues that forward their interrupts, as they
1215228561Snp	 * reference other rx queues.
1216228561Snp	 */
1217228561Snp
1218218792Snp	for_each_rxq(pi, i, rxq) {
1219228561Snp		if ((rxq->iq.flags & IQ_INTR) == 0)
1220228561Snp			free_rxq(pi, rxq);
1221218792Snp	}
1222237263Snp#ifdef TCP_OFFLOAD
1223228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
1224228561Snp		if ((ofld_rxq->iq.flags & IQ_INTR) == 0)
1225228561Snp			free_ofld_rxq(pi, ofld_rxq);
1226228561Snp	}
1227228561Snp#endif
1228270297Snp#ifdef DEV_NETMAP
1229270297Snp	for_each_nm_rxq(pi, i, nm_rxq)
1230270297Snp	    free_nm_rxq(pi, nm_rxq);
1231270297Snp#endif
1232228561Snp
1233228561Snp	/*
1234228561Snp	 * Then take down the rx queues that take direct interrupts.
1235228561Snp	 */
1236228561Snp
1237228561Snp	for_each_rxq(pi, i, rxq) {
1238228561Snp		if (rxq->iq.flags & IQ_INTR)
1239228561Snp			free_rxq(pi, rxq);
1240228561Snp	}
1241237263Snp#ifdef TCP_OFFLOAD
1242228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
1243228561Snp		if (ofld_rxq->iq.flags & IQ_INTR)
1244228561Snp			free_ofld_rxq(pi, ofld_rxq);
1245228561Snp	}
1246228561Snp#endif
1247270297Snp#ifdef DEV_NETMAP
1248270297Snp	CXGBE_UNIMPLEMENTED(__func__);
1249270297Snp#endif
1250228561Snp
1251218792Snp	return (0);
1252218792Snp}
1253218792Snp
1254228561Snp/*
1255228561Snp * Deals with errors and the firmware event queue.  All data rx queues forward
1256228561Snp * their interrupt to the firmware event queue.
1257228561Snp */
1258218792Snpvoid
1259218792Snpt4_intr_all(void *arg)
1260218792Snp{
1261218792Snp	struct adapter *sc = arg;
1262228561Snp	struct sge_iq *fwq = &sc->sge.fwq;
1263218792Snp
1264218792Snp	t4_intr_err(arg);
1265228561Snp	if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) {
1266228561Snp		service_iq(fwq, 0);
1267228561Snp		atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE);
1268218792Snp	}
1269218792Snp}
1270218792Snp
1271218792Snp/* Deals with error interrupts */
1272218792Snpvoid
1273218792Snpt4_intr_err(void *arg)
1274218792Snp{
1275218792Snp	struct adapter *sc = arg;
1276218792Snp
1277222510Snp	t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
1278218792Snp	t4_slow_intr_handler(sc);
1279218792Snp}
1280218792Snp
1281218792Snpvoid
1282218792Snpt4_intr_evt(void *arg)
1283218792Snp{
1284218792Snp	struct sge_iq *iq = arg;
1285220649Snp
1286228561Snp	if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
1287228561Snp		service_iq(iq, 0);
1288228561Snp		atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
1289222510Snp	}
1290220649Snp}
1291220649Snp
1292228561Snpvoid
1293228561Snpt4_intr(void *arg)
1294220649Snp{
1295220649Snp	struct sge_iq *iq = arg;
1296228561Snp
1297228561Snp	if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
1298228561Snp		service_iq(iq, 0);
1299228561Snp		atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
1300228561Snp	}
1301228561Snp}
1302228561Snp
1303228561Snp/*
1304228561Snp * Deals with anything and everything on the given ingress queue.
1305228561Snp */
1306228561Snpstatic int
1307228561Snpservice_iq(struct sge_iq *iq, int budget)
1308228561Snp{
1309228561Snp	struct sge_iq *q;
1310237263Snp	struct sge_rxq *rxq = iq_to_rxq(iq);	/* Use iff iq is part of rxq */
1311270297Snp	struct sge_fl *fl;			/* Use iff IQ_HAS_FL */
1312218792Snp	struct adapter *sc = iq->adapter;
1313270297Snp	struct iq_desc *d = &iq->desc[iq->cidx];
1314270297Snp	int ndescs = 0, limit;
1315270297Snp	int rsp_type, refill;
1316228561Snp	uint32_t lq;
1317270297Snp	uint16_t fl_hw_cidx;
1318228561Snp	struct mbuf *m0;
1319228561Snp	STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
1320255015Snp#if defined(INET) || defined(INET6)
1321255015Snp	const struct timeval lro_timeout = {0, sc->lro_timeout};
1322255015Snp#endif
1323218792Snp
1324228561Snp	KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
1325218792Snp
1326270297Snp	limit = budget ? budget : iq->qsize / 16;
1327270297Snp
1328270297Snp	if (iq->flags & IQ_HAS_FL) {
1329270297Snp		fl = &rxq->fl;
1330270297Snp		fl_hw_cidx = fl->hw_cidx;	/* stable snapshot */
1331270297Snp	} else {
1332270297Snp		fl = NULL;
1333270297Snp		fl_hw_cidx = 0;			/* to silence gcc warning */
1334270297Snp	}
1335270297Snp
1336228561Snp	/*
1337228561Snp	 * We always come back and check the descriptor ring for new indirect
1338228561Snp	 * interrupts and other responses after running a single handler.
1339228561Snp	 */
1340228561Snp	for (;;) {
1341270297Snp		while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) {
1342218792Snp
1343228561Snp			rmb();
1344218792Snp
1345270297Snp			refill = 0;
1346228561Snp			m0 = NULL;
1347270297Snp			rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen);
1348270297Snp			lq = be32toh(d->rsp.pldbuflen_qid);
1349218792Snp
1350228561Snp			switch (rsp_type) {
1351228561Snp			case X_RSPD_TYPE_FLBUF:
1352228561Snp
1353228561Snp				KASSERT(iq->flags & IQ_HAS_FL,
1354228561Snp				    ("%s: data for an iq (%p) with no freelist",
1355228561Snp				    __func__, iq));
1356228561Snp
1357270297Snp				m0 = get_fl_payload(sc, fl, lq);
1358255050Snp				if (__predict_false(m0 == NULL))
1359255050Snp					goto process_iql;
1360270297Snp				refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2;
1361228561Snp#ifdef T4_PKT_TIMESTAMP
1362228561Snp				/*
1363228561Snp				 * 60 bit timestamp for the payload is
1364228561Snp				 * *(uint64_t *)m0->m_pktdat.  Note that it is
1365228561Snp				 * in the leading free-space in the mbuf.  The
1366228561Snp				 * kernel can clobber it during a pullup,
1367228561Snp				 * m_copymdata, etc.  You need to make sure that
1368228561Snp				 * the mbuf reaches you unmolested if you care
1369228561Snp				 * about the timestamp.
1370228561Snp				 */
1371228561Snp				*(uint64_t *)m0->m_pktdat =
1372228561Snp				    be64toh(ctrl->u.last_flit) &
1373228561Snp				    0xfffffffffffffff;
1374228561Snp#endif
1375228561Snp
1376228561Snp				/* fall through */
1377228561Snp
1378228561Snp			case X_RSPD_TYPE_CPL:
1379270297Snp				KASSERT(d->rss.opcode < NUM_CPL_CMDS,
1380228561Snp				    ("%s: bad opcode %02x.", __func__,
1381270297Snp				    d->rss.opcode));
1382270297Snp				sc->cpl_handler[d->rss.opcode](iq, &d->rss, m0);
1383228561Snp				break;
1384228561Snp
1385228561Snp			case X_RSPD_TYPE_INTR:
1386228561Snp
1387228561Snp				/*
1388228561Snp				 * Interrupts should be forwarded only to queues
1389228561Snp				 * that are not forwarding their interrupts.
1390228561Snp				 * This means service_iq can recurse but only 1
1391228561Snp				 * level deep.
1392228561Snp				 */
1393228561Snp				KASSERT(budget == 0,
1394228561Snp				    ("%s: budget %u, rsp_type %u", __func__,
1395228561Snp				    budget, rsp_type));
1396228561Snp
1397255005Snp				/*
1398255005Snp				 * There are 1K interrupt-capable queues (qids 0
1399255005Snp				 * through 1023).  A response type indicating a
1400255005Snp				 * forwarded interrupt with a qid >= 1K is an
1401255005Snp				 * iWARP async notification.
1402255005Snp				 */
1403255005Snp				if (lq >= 1024) {
1404270297Snp                                        sc->an_handler(iq, &d->rsp);
1405255005Snp                                        break;
1406255005Snp                                }
1407255005Snp
1408228561Snp				q = sc->sge.iqmap[lq - sc->sge.iq_start];
1409228561Snp				if (atomic_cmpset_int(&q->state, IQS_IDLE,
1410228561Snp				    IQS_BUSY)) {
1411270297Snp					if (service_iq(q, q->qsize / 16) == 0) {
1412228561Snp						atomic_cmpset_int(&q->state,
1413228561Snp						    IQS_BUSY, IQS_IDLE);
1414228561Snp					} else {
1415228561Snp						STAILQ_INSERT_TAIL(&iql, q,
1416228561Snp						    link);
1417228561Snp					}
1418228561Snp				}
1419228561Snp				break;
1420228561Snp
1421228561Snp			default:
1422255005Snp				KASSERT(0,
1423255005Snp				    ("%s: illegal response type %d on iq %p",
1424255005Snp				    __func__, rsp_type, iq));
1425255005Snp				log(LOG_ERR,
1426255005Snp				    "%s: illegal response type %d on iq %p",
1427255005Snp				    device_get_nameunit(sc->dev), rsp_type, iq);
1428237263Snp				break;
1429228561Snp			}
1430228561Snp
1431270297Snp			d++;
1432270297Snp			if (__predict_false(++iq->cidx == iq->sidx)) {
1433270297Snp				iq->cidx = 0;
1434270297Snp				iq->gen ^= F_RSPD_GEN;
1435270297Snp				d = &iq->desc[0];
1436265425Snp			}
1437270297Snp			if (__predict_false(++ndescs == limit)) {
1438228561Snp				t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
1439228561Snp				    V_CIDXINC(ndescs) |
1440228561Snp				    V_INGRESSQID(iq->cntxt_id) |
1441228561Snp				    V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
1442228561Snp				ndescs = 0;
1443228561Snp
1444255015Snp#if defined(INET) || defined(INET6)
1445255015Snp				if (iq->flags & IQ_LRO_ENABLED &&
1446255015Snp				    sc->lro_timeout != 0) {
1447255015Snp					tcp_lro_flush_inactive(&rxq->lro,
1448255015Snp					    &lro_timeout);
1449255015Snp				}
1450255015Snp#endif
1451255015Snp
1452267244Snp				if (budget) {
1453270297Snp					if (iq->flags & IQ_HAS_FL) {
1454267244Snp						FL_LOCK(fl);
1455267244Snp						refill_fl(sc, fl, 32);
1456267244Snp						FL_UNLOCK(fl);
1457267244Snp					}
1458228561Snp					return (EINPROGRESS);
1459267244Snp				}
1460228561Snp			}
1461270297Snp			if (refill) {
1462270297Snp				FL_LOCK(fl);
1463270297Snp				refill_fl(sc, fl, 32);
1464270297Snp				FL_UNLOCK(fl);
1465270297Snp				fl_hw_cidx = fl->hw_cidx;
1466270297Snp			}
1467218792Snp		}
1468222510Snp
1469255050Snpprocess_iql:
1470228561Snp		if (STAILQ_EMPTY(&iql))
1471228561Snp			break;
1472228561Snp
1473228561Snp		/*
1474228561Snp		 * Process the head only, and send it to the back of the list if
1475228561Snp		 * it's still not done.
1476228561Snp		 */
1477228561Snp		q = STAILQ_FIRST(&iql);
1478228561Snp		STAILQ_REMOVE_HEAD(&iql, link);
1479228561Snp		if (service_iq(q, q->qsize / 8) == 0)
1480228561Snp			atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE);
1481228561Snp		else
1482228561Snp			STAILQ_INSERT_TAIL(&iql, q, link);
1483218792Snp	}
1484218792Snp
1485237819Snp#if defined(INET) || defined(INET6)
1486228561Snp	if (iq->flags & IQ_LRO_ENABLED) {
1487228561Snp		struct lro_ctrl *lro = &rxq->lro;
1488228561Snp		struct lro_entry *l;
1489228561Snp
1490228561Snp		while (!SLIST_EMPTY(&lro->lro_active)) {
1491228561Snp			l = SLIST_FIRST(&lro->lro_active);
1492228561Snp			SLIST_REMOVE_HEAD(&lro->lro_active, next);
1493228561Snp			tcp_lro_flush(lro, l);
1494228561Snp		}
1495228561Snp	}
1496228561Snp#endif
1497228561Snp
1498228561Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
1499228561Snp	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
1500228561Snp
1501228561Snp	if (iq->flags & IQ_HAS_FL) {
1502228561Snp		int starved;
1503228561Snp
1504228561Snp		FL_LOCK(fl);
1505265425Snp		starved = refill_fl(sc, fl, 64);
1506228561Snp		FL_UNLOCK(fl);
1507228561Snp		if (__predict_false(starved != 0))
1508228561Snp			add_fl_to_sfl(sc, fl);
1509228561Snp	}
1510228561Snp
1511228561Snp	return (0);
1512218792Snp}
1513218792Snp
1514265425Snpstatic inline int
1515265425Snpcl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll)
1516255050Snp{
1517265425Snp	int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0;
1518255050Snp
1519265425Snp	if (rc)
1520265425Snp		MPASS(cll->region3 >= CL_METADATA_SIZE);
1521255050Snp
1522265425Snp	return (rc);
1523255050Snp}
1524255050Snp
1525265425Snpstatic inline struct cluster_metadata *
1526265425Snpcl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll,
1527265425Snp    caddr_t cl)
1528255050Snp{
1529255050Snp
1530265425Snp	if (cl_has_metadata(fl, cll)) {
1531265425Snp		struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
1532255050Snp
1533265425Snp		return ((struct cluster_metadata *)(cl + swz->size) - 1);
1534255050Snp	}
1535265425Snp	return (NULL);
1536255050Snp}
1537255050Snp
1538255050Snpstatic int
1539255050Snprxb_free(struct mbuf *m, void *arg1, void *arg2)
1540255050Snp{
1541255050Snp	uma_zone_t zone = arg1;
1542255050Snp	caddr_t cl = arg2;
1543255050Snp
1544255050Snp	uma_zfree(zone, cl);
1545269356Snp	counter_u64_add(extfree_rels, 1);
1546255050Snp
1547255050Snp	return (EXT_FREE_OK);
1548255050Snp}
1549255050Snp
1550265425Snp/*
1551265425Snp * The mbuf returned by this function could be allocated from zone_mbuf or
1552265425Snp * constructed in spare room in the cluster.
1553265425Snp *
1554265425Snp * The mbuf carries the payload in one of these ways
1555265425Snp * a) frame inside the mbuf (mbuf from zone_mbuf)
1556265425Snp * b) m_cljset (for clusters without metadata) zone_mbuf
1557265425Snp * c) m_extaddref (cluster with metadata) inline mbuf
1558265425Snp * d) m_extaddref (cluster with metadata) zone_mbuf
1559265425Snp */
1560255050Snpstatic struct mbuf *
1561281212Snpget_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset,
1562281212Snp    int remaining)
1563218792Snp{
1564265425Snp	struct mbuf *m;
1565228561Snp	struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
1566265425Snp	struct cluster_layout *cll = &sd->cll;
1567265425Snp	struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
1568265425Snp	struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx];
1569265425Snp	struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl);
1570281212Snp	int len, blen;
1571265425Snp	caddr_t payload;
1572218792Snp
1573281212Snp	blen = hwb->size - fl->rx_offset;	/* max possible in this buf */
1574281212Snp	len = min(remaining, blen);
1575265425Snp	payload = sd->cl + cll->region1 + fl->rx_offset;
1576281212Snp	if (fl->flags & FL_BUF_PACKING) {
1577281212Snp		const u_int l = fr_offset + len;
1578281212Snp		const u_int pad = roundup2(l, fl->buf_boundary) - l;
1579219290Snp
1580281212Snp		if (fl->rx_offset + len + pad < hwb->size)
1581281212Snp			blen = len + pad;
1582281212Snp		MPASS(fl->rx_offset + blen <= hwb->size);
1583281212Snp	} else {
1584281212Snp		MPASS(fl->rx_offset == 0);	/* not packing */
1585281212Snp	}
1586281212Snp
1587281212Snp
1588265425Snp	if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) {
1589255050Snp
1590265425Snp		/*
1591265425Snp		 * Copy payload into a freshly allocated mbuf.
1592265425Snp		 */
1593255050Snp
1594281212Snp		m = fr_offset == 0 ?
1595265425Snp		    m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
1596265425Snp		if (m == NULL)
1597255050Snp			return (NULL);
1598265425Snp		fl->mbuf_allocated++;
1599255050Snp#ifdef T4_PKT_TIMESTAMP
1600265425Snp		/* Leave room for a timestamp */
1601265425Snp		m->m_data += 8;
1602255050Snp#endif
1603265425Snp		/* copy data to mbuf */
1604265425Snp		bcopy(payload, mtod(m, caddr_t), len);
1605255050Snp
1606269356Snp	} else if (sd->nmbuf * MSIZE < cll->region1) {
1607255050Snp
1608265425Snp		/*
1609265425Snp		 * There's spare room in the cluster for an mbuf.  Create one
1610267694Snp		 * and associate it with the payload that's in the cluster.
1611265425Snp		 */
1612255050Snp
1613265425Snp		MPASS(clm != NULL);
1614269356Snp		m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE);
1615265425Snp		/* No bzero required */
1616281212Snp		if (m_init(m, NULL, 0, M_NOWAIT, MT_DATA,
1617281212Snp		    fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE))
1618265425Snp			return (NULL);
1619265425Snp		fl->mbuf_inlined++;
1620281212Snp		m_extaddref(m, payload, blen, &clm->refcount, rxb_free,
1621265425Snp		    swz->zone, sd->cl);
1622269356Snp		if (sd->nmbuf++ == 0)
1623269356Snp			counter_u64_add(extfree_refs, 1);
1624255050Snp
1625265425Snp	} else {
1626255050Snp
1627265425Snp		/*
1628265425Snp		 * Grab an mbuf from zone_mbuf and associate it with the
1629265425Snp		 * payload in the cluster.
1630265425Snp		 */
1631255050Snp
1632281212Snp		m = fr_offset == 0 ?
1633265425Snp		    m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
1634265425Snp		if (m == NULL)
1635265425Snp			return (NULL);
1636265425Snp		fl->mbuf_allocated++;
1637267694Snp		if (clm != NULL) {
1638281212Snp			m_extaddref(m, payload, blen, &clm->refcount,
1639265425Snp			    rxb_free, swz->zone, sd->cl);
1640269356Snp			if (sd->nmbuf++ == 0)
1641269356Snp				counter_u64_add(extfree_refs, 1);
1642267694Snp		} else {
1643265425Snp			m_cljset(m, sd->cl, swz->type);
1644265425Snp			sd->cl = NULL;	/* consumed, not a recycle candidate */
1645255050Snp		}
1646255050Snp	}
1647281212Snp	if (fr_offset == 0)
1648281212Snp		m->m_pkthdr.len = remaining;
1649265425Snp	m->m_len = len;
1650255050Snp
1651265425Snp	if (fl->flags & FL_BUF_PACKING) {
1652281212Snp		fl->rx_offset += blen;
1653265425Snp		MPASS(fl->rx_offset <= hwb->size);
1654265425Snp		if (fl->rx_offset < hwb->size)
1655265425Snp			return (m);	/* without advancing the cidx */
1656265425Snp	}
1657255050Snp
1658270297Snp	if (__predict_false(++fl->cidx % 8 == 0)) {
1659270297Snp		uint16_t cidx = fl->cidx / 8;
1660270297Snp
1661270297Snp		if (__predict_false(cidx == fl->sidx))
1662270297Snp			fl->cidx = cidx = 0;
1663270297Snp		fl->hw_cidx = cidx;
1664270297Snp	}
1665265425Snp	fl->rx_offset = 0;
1666255050Snp
1667265425Snp	return (m);
1668255050Snp}
1669255050Snp
1670255050Snpstatic struct mbuf *
1671270297Snpget_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf)
1672255050Snp{
1673265425Snp	struct mbuf *m0, *m, **pnext;
1674281212Snp	u_int remaining;
1675281212Snp	const u_int total = G_RSPD_LEN(len_newbuf);
1676255050Snp
1677270297Snp	if (__predict_false(fl->flags & FL_BUF_RESUME)) {
1678266965Snp		M_ASSERTPKTHDR(fl->m0);
1679281212Snp		MPASS(fl->m0->m_pkthdr.len == total);
1680281212Snp		MPASS(fl->remaining < total);
1681218792Snp
1682265425Snp		m0 = fl->m0;
1683265425Snp		pnext = fl->pnext;
1684281212Snp		remaining = fl->remaining;
1685270297Snp		fl->flags &= ~FL_BUF_RESUME;
1686265425Snp		goto get_segment;
1687255050Snp	}
1688255050Snp
1689265425Snp	if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) {
1690265425Snp		fl->rx_offset = 0;
1691270297Snp		if (__predict_false(++fl->cidx % 8 == 0)) {
1692270297Snp			uint16_t cidx = fl->cidx / 8;
1693270297Snp
1694270297Snp			if (__predict_false(cidx == fl->sidx))
1695270297Snp				fl->cidx = cidx = 0;
1696270297Snp			fl->hw_cidx = cidx;
1697270297Snp		}
1698228561Snp	}
1699218792Snp
1700265425Snp	/*
1701265425Snp	 * Payload starts at rx_offset in the current hw buffer.  Its length is
1702265425Snp	 * 'len' and it may span multiple hw buffers.
1703265425Snp	 */
1704218792Snp
1705281212Snp	m0 = get_scatter_segment(sc, fl, 0, total);
1706266965Snp	if (m0 == NULL)
1707270297Snp		return (NULL);
1708281212Snp	remaining = total - m0->m_len;
1709265425Snp	pnext = &m0->m_next;
1710281212Snp	while (remaining > 0) {
1711265425Snpget_segment:
1712265425Snp		MPASS(fl->rx_offset == 0);
1713281212Snp		m = get_scatter_segment(sc, fl, total - remaining, remaining);
1714270297Snp		if (__predict_false(m == NULL)) {
1715265425Snp			fl->m0 = m0;
1716265425Snp			fl->pnext = pnext;
1717281212Snp			fl->remaining = remaining;
1718270297Snp			fl->flags |= FL_BUF_RESUME;
1719270297Snp			return (NULL);
1720218792Snp		}
1721265425Snp		*pnext = m;
1722265425Snp		pnext = &m->m_next;
1723281212Snp		remaining -= m->m_len;
1724265425Snp	}
1725265425Snp	*pnext = NULL;
1726270297Snp
1727228561Snp	return (m0);
1728228561Snp}
1729218792Snp
1730228561Snpstatic int
1731228561Snpt4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
1732228561Snp{
1733237463Snp	struct sge_rxq *rxq = iq_to_rxq(iq);
1734228561Snp	struct ifnet *ifp = rxq->ifp;
1735228561Snp	const struct cpl_rx_pkt *cpl = (const void *)(rss + 1);
1736237819Snp#if defined(INET) || defined(INET6)
1737228561Snp	struct lro_ctrl *lro = &rxq->lro;
1738228561Snp#endif
1739219290Snp
1740228561Snp	KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__,
1741228561Snp	    rss->opcode));
1742219290Snp
1743239258Snp	m0->m_pkthdr.len -= fl_pktshift;
1744239258Snp	m0->m_len -= fl_pktshift;
1745239258Snp	m0->m_data += fl_pktshift;
1746219290Snp
1747228561Snp	m0->m_pkthdr.rcvif = ifp;
1748228561Snp	m0->m_flags |= M_FLOWID;
1749259142Snp	m0->m_pkthdr.flowid = be32toh(rss->hash_val);
1750219290Snp
1751237799Snp	if (cpl->csum_calc && !cpl->err_vec) {
1752237799Snp		if (ifp->if_capenable & IFCAP_RXCSUM &&
1753237799Snp		    cpl->l2info & htobe32(F_RXF_IP)) {
1754237831Snp			m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
1755237799Snp			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1756237799Snp			rxq->rxcsum++;
1757237799Snp		} else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
1758237799Snp		    cpl->l2info & htobe32(F_RXF_IP6)) {
1759237831Snp			m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
1760237799Snp			    CSUM_PSEUDO_HDR);
1761237799Snp			rxq->rxcsum++;
1762237799Snp		}
1763237799Snp
1764237799Snp		if (__predict_false(cpl->ip_frag))
1765228561Snp			m0->m_pkthdr.csum_data = be16toh(cpl->csum);
1766228561Snp		else
1767228561Snp			m0->m_pkthdr.csum_data = 0xffff;
1768228561Snp	}
1769219290Snp
1770228561Snp	if (cpl->vlan_ex) {
1771228561Snp		m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
1772228561Snp		m0->m_flags |= M_VLANTAG;
1773228561Snp		rxq->vlan_extraction++;
1774228561Snp	}
1775219290Snp
1776237819Snp#if defined(INET) || defined(INET6)
1777228561Snp	if (cpl->l2info & htobe32(F_RXF_LRO) &&
1778228561Snp	    iq->flags & IQ_LRO_ENABLED &&
1779228561Snp	    tcp_lro_rx(lro, m0, 0) == 0) {
1780228561Snp		/* queued for LRO */
1781228561Snp	} else
1782218792Snp#endif
1783228561Snp	ifp->if_input(ifp, m0);
1784218792Snp
1785228561Snp	return (0);
1786228561Snp}
1787218792Snp
1788228561Snp/*
1789228561Snp * Doesn't fail.  Holds on to work requests it can't send right away.
1790228561Snp */
1791237263Snpvoid
1792237263Snpt4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr)
1793228561Snp{
1794228561Snp	struct sge_eq *eq = &wrq->eq;
1795228561Snp	int can_reclaim;
1796228561Snp	caddr_t dst;
1797228561Snp
1798228561Snp	TXQ_LOCK_ASSERT_OWNED(wrq);
1799237263Snp#ifdef TCP_OFFLOAD
1800228561Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD ||
1801228561Snp	    (eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1802228561Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1803237263Snp#else
1804237263Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1805237263Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1806237263Snp#endif
1807228561Snp
1808237263Snp	if (__predict_true(wr != NULL))
1809237263Snp		STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
1810218792Snp
1811228561Snp	can_reclaim = reclaimable(eq);
1812228561Snp	if (__predict_false(eq->flags & EQ_STALLED)) {
1813267764Snp		if (eq->avail + can_reclaim < tx_resume_threshold(eq))
1814237263Snp			return;
1815228561Snp		eq->flags &= ~EQ_STALLED;
1816228561Snp		eq->unstalled++;
1817218792Snp	}
1818228561Snp	eq->cidx += can_reclaim;
1819228561Snp	eq->avail += can_reclaim;
1820228561Snp	if (__predict_false(eq->cidx >= eq->cap))
1821228561Snp		eq->cidx -= eq->cap;
1822228561Snp
1823237263Snp	while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) {
1824228561Snp		int ndesc;
1825228561Snp
1826237263Snp		if (__predict_false(wr->wr_len < 0 ||
1827237263Snp		    wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) {
1828228561Snp
1829228561Snp#ifdef INVARIANTS
1830237263Snp			panic("%s: work request with length %d", __func__,
1831237263Snp			    wr->wr_len);
1832237263Snp#endif
1833237263Snp#ifdef KDB
1834237263Snp			kdb_backtrace();
1835237263Snp#endif
1836237263Snp			log(LOG_ERR, "%s: %s work request with length %d",
1837237263Snp			    device_get_nameunit(sc->dev), __func__, wr->wr_len);
1838237263Snp			STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1839237263Snp			free_wrqe(wr);
1840228561Snp			continue;
1841228561Snp		}
1842218792Snp
1843237263Snp		ndesc = howmany(wr->wr_len, EQ_ESIZE);
1844228561Snp		if (eq->avail < ndesc) {
1845228561Snp			wrq->no_desc++;
1846228561Snp			break;
1847228561Snp		}
1848218792Snp
1849228561Snp		dst = (void *)&eq->desc[eq->pidx];
1850237263Snp		copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len);
1851218792Snp
1852228561Snp		eq->pidx += ndesc;
1853228561Snp		eq->avail -= ndesc;
1854228561Snp		if (__predict_false(eq->pidx >= eq->cap))
1855228561Snp			eq->pidx -= eq->cap;
1856228561Snp
1857228561Snp		eq->pending += ndesc;
1858252715Snp		if (eq->pending >= 8)
1859228561Snp			ring_eq_db(sc, eq);
1860228561Snp
1861228561Snp		wrq->tx_wrs++;
1862237263Snp		STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1863237263Snp		free_wrqe(wr);
1864228561Snp
1865228561Snp		if (eq->avail < 8) {
1866228561Snp			can_reclaim = reclaimable(eq);
1867228561Snp			eq->cidx += can_reclaim;
1868228561Snp			eq->avail += can_reclaim;
1869228561Snp			if (__predict_false(eq->cidx >= eq->cap))
1870228561Snp				eq->cidx -= eq->cap;
1871228561Snp		}
1872228561Snp	}
1873228561Snp
1874228561Snp	if (eq->pending)
1875228561Snp		ring_eq_db(sc, eq);
1876228561Snp
1877237263Snp	if (wr != NULL) {
1878228561Snp		eq->flags |= EQ_STALLED;
1879228561Snp		if (callout_pending(&eq->tx_callout) == 0)
1880228561Snp			callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1881228561Snp	}
1882220873Snp}
1883220873Snp
1884218792Snp/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
1885218792Snp#define TXPKTS_PKT_HDR ((\
1886218792Snp    sizeof(struct ulp_txpkt) + \
1887218792Snp    sizeof(struct ulptx_idata) + \
1888218792Snp    sizeof(struct cpl_tx_pkt_core) \
1889218792Snp    ) / 8)
1890218792Snp
1891218792Snp/* Header of a coalesced tx WR, before SGL of first packet (in flits) */
1892218792Snp#define TXPKTS_WR_HDR (\
1893218792Snp    sizeof(struct fw_eth_tx_pkts_wr) / 8 + \
1894218792Snp    TXPKTS_PKT_HDR)
1895218792Snp
1896218792Snp/* Header of a tx WR, before SGL of first packet (in flits) */
1897218792Snp#define TXPKT_WR_HDR ((\
1898218792Snp    sizeof(struct fw_eth_tx_pkt_wr) + \
1899218792Snp    sizeof(struct cpl_tx_pkt_core) \
1900218792Snp    ) / 8 )
1901218792Snp
1902218792Snp/* Header of a tx LSO WR, before SGL of first packet (in flits) */
1903218792Snp#define TXPKT_LSO_WR_HDR ((\
1904218792Snp    sizeof(struct fw_eth_tx_pkt_wr) + \
1905237436Snp    sizeof(struct cpl_tx_pkt_lso_core) + \
1906218792Snp    sizeof(struct cpl_tx_pkt_core) \
1907218792Snp    ) / 8 )
1908218792Snp
1909218792Snpint
1910218792Snpt4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
1911218792Snp{
1912218792Snp	struct port_info *pi = (void *)ifp->if_softc;
1913218792Snp	struct adapter *sc = pi->adapter;
1914218792Snp	struct sge_eq *eq = &txq->eq;
1915220873Snp	struct buf_ring *br = txq->br;
1916218792Snp	struct mbuf *next;
1917219292Snp	int rc, coalescing, can_reclaim;
1918218792Snp	struct txpkts txpkts;
1919218792Snp	struct sgl sgl;
1920218792Snp
1921218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
1922218792Snp	KASSERT(m, ("%s: called with nothing to do.", __func__));
1923228561Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH,
1924228561Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1925218792Snp
1926219292Snp	prefetch(&eq->desc[eq->pidx]);
1927220873Snp	prefetch(&txq->sdesc[eq->pidx]);
1928219292Snp
1929218792Snp	txpkts.npkt = 0;/* indicates there's nothing in txpkts */
1930218792Snp	coalescing = 0;
1931218792Snp
1932228561Snp	can_reclaim = reclaimable(eq);
1933228561Snp	if (__predict_false(eq->flags & EQ_STALLED)) {
1934267764Snp		if (eq->avail + can_reclaim < tx_resume_threshold(eq)) {
1935228561Snp			txq->m = m;
1936228561Snp			return (0);
1937228561Snp		}
1938228561Snp		eq->flags &= ~EQ_STALLED;
1939228561Snp		eq->unstalled++;
1940228561Snp	}
1941218792Snp
1942228561Snp	if (__predict_false(eq->flags & EQ_DOOMED)) {
1943228561Snp		m_freem(m);
1944228561Snp		while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
1945228561Snp			m_freem(m);
1946228561Snp		return (ENETDOWN);
1947228561Snp	}
1948228561Snp
1949228561Snp	if (eq->avail < 8 && can_reclaim)
1950228561Snp		reclaim_tx_descs(txq, can_reclaim, 32);
1951228561Snp
1952218792Snp	for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
1953218792Snp
1954218792Snp		if (eq->avail < 8)
1955218792Snp			break;
1956218792Snp
1957218792Snp		next = m->m_nextpkt;
1958218792Snp		m->m_nextpkt = NULL;
1959218792Snp
1960218792Snp		if (next || buf_ring_peek(br))
1961218792Snp			coalescing = 1;
1962218792Snp
1963218792Snp		rc = get_pkt_sgl(txq, &m, &sgl, coalescing);
1964218792Snp		if (rc != 0) {
1965218792Snp			if (rc == ENOMEM) {
1966218792Snp
1967218792Snp				/* Short of resources, suspend tx */
1968218792Snp
1969218792Snp				m->m_nextpkt = next;
1970218792Snp				break;
1971218792Snp			}
1972218792Snp
1973218792Snp			/*
1974218792Snp			 * Unrecoverable error for this packet, throw it away
1975218792Snp			 * and move on to the next.  get_pkt_sgl may already
1976218792Snp			 * have freed m (it will be NULL in that case and the
1977218792Snp			 * m_freem here is still safe).
1978218792Snp			 */
1979218792Snp
1980218792Snp			m_freem(m);
1981218792Snp			continue;
1982218792Snp		}
1983218792Snp
1984218792Snp		if (coalescing &&
1985218792Snp		    add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) {
1986218792Snp
1987218792Snp			/* Successfully absorbed into txpkts */
1988218792Snp
1989218792Snp			write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl);
1990218792Snp			goto doorbell;
1991218792Snp		}
1992218792Snp
1993218792Snp		/*
1994218792Snp		 * We weren't coalescing to begin with, or current frame could
1995218792Snp		 * not be coalesced (add_to_txpkts flushes txpkts if a frame
1996218792Snp		 * given to it can't be coalesced).  Either way there should be
1997218792Snp		 * nothing in txpkts.
1998218792Snp		 */
1999218792Snp		KASSERT(txpkts.npkt == 0,
2000218792Snp		    ("%s: txpkts not empty: %d", __func__, txpkts.npkt));
2001218792Snp
2002218792Snp		/* We're sending out individual packets now */
2003218792Snp		coalescing = 0;
2004218792Snp
2005218792Snp		if (eq->avail < 8)
2006220873Snp			reclaim_tx_descs(txq, 0, 8);
2007218792Snp		rc = write_txpkt_wr(pi, txq, m, &sgl);
2008218792Snp		if (rc != 0) {
2009218792Snp
2010218792Snp			/* Short of hardware descriptors, suspend tx */
2011218792Snp
2012218792Snp			/*
2013218792Snp			 * This is an unlikely but expensive failure.  We've
2014218792Snp			 * done all the hard work (DMA mappings etc.) and now we
2015218792Snp			 * can't send out the packet.  What's worse, we have to
2016218792Snp			 * spend even more time freeing up everything in sgl.
2017218792Snp			 */
2018218792Snp			txq->no_desc++;
2019218792Snp			free_pkt_sgl(txq, &sgl);
2020218792Snp
2021218792Snp			m->m_nextpkt = next;
2022218792Snp			break;
2023218792Snp		}
2024218792Snp
2025218792Snp		ETHER_BPF_MTAP(ifp, m);
2026218792Snp		if (sgl.nsegs == 0)
2027218792Snp			m_freem(m);
2028218792Snpdoorbell:
2029252715Snp		if (eq->pending >= 8)
2030252715Snp			ring_eq_db(sc, eq);
2031219292Snp
2032219292Snp		can_reclaim = reclaimable(eq);
2033219292Snp		if (can_reclaim >= 32)
2034228561Snp			reclaim_tx_descs(txq, can_reclaim, 64);
2035218792Snp	}
2036218792Snp
2037218792Snp	if (txpkts.npkt > 0)
2038218792Snp		write_txpkts_wr(txq, &txpkts);
2039218792Snp
2040218792Snp	/*
2041218792Snp	 * m not NULL means there was an error but we haven't thrown it away.
2042218792Snp	 * This can happen when we're short of tx descriptors (no_desc) or maybe
2043218792Snp	 * even DMA maps (no_dmamap).  Either way, a credit flush and reclaim
2044218792Snp	 * will get things going again.
2045218792Snp	 */
2046228561Snp	if (m && !(eq->flags & EQ_CRFLUSHED)) {
2047220873Snp		struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
2048220873Snp
2049228561Snp		/*
2050228561Snp		 * If EQ_CRFLUSHED is not set then we know we have at least one
2051228561Snp		 * available descriptor because any WR that reduces eq->avail to
2052228561Snp		 * 0 also sets EQ_CRFLUSHED.
2053228561Snp		 */
2054228561Snp		KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__));
2055228561Snp
2056220873Snp		txsd->desc_used = 1;
2057220873Snp		txsd->credits = 0;
2058218792Snp		write_eqflush_wr(eq);
2059220873Snp	}
2060218792Snp	txq->m = m;
2061218792Snp
2062218792Snp	if (eq->pending)
2063220873Snp		ring_eq_db(sc, eq);
2064218792Snp
2065228561Snp	reclaim_tx_descs(txq, 0, 128);
2066218792Snp
2067228561Snp	if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0)
2068228561Snp		callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
2069228561Snp
2070218792Snp	return (0);
2071218792Snp}
2072218792Snp
2073218792Snpvoid
2074218792Snpt4_update_fl_bufsize(struct ifnet *ifp)
2075218792Snp{
2076218792Snp	struct port_info *pi = ifp->if_softc;
2077255050Snp	struct adapter *sc = pi->adapter;
2078218792Snp	struct sge_rxq *rxq;
2079252728Snp#ifdef TCP_OFFLOAD
2080252728Snp	struct sge_ofld_rxq *ofld_rxq;
2081252728Snp#endif
2082218792Snp	struct sge_fl *fl;
2083265425Snp	int i, maxp, mtu = ifp->if_mtu;
2084218792Snp
2085265425Snp	maxp = mtu_to_max_payload(sc, mtu, 0);
2086218792Snp	for_each_rxq(pi, i, rxq) {
2087218792Snp		fl = &rxq->fl;
2088218792Snp
2089218792Snp		FL_LOCK(fl);
2090265425Snp		find_best_refill_source(sc, fl, maxp);
2091218792Snp		FL_UNLOCK(fl);
2092218792Snp	}
2093252728Snp#ifdef TCP_OFFLOAD
2094265425Snp	maxp = mtu_to_max_payload(sc, mtu, 1);
2095252728Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
2096252728Snp		fl = &ofld_rxq->fl;
2097252728Snp
2098252728Snp		FL_LOCK(fl);
2099265425Snp		find_best_refill_source(sc, fl, maxp);
2100252728Snp		FL_UNLOCK(fl);
2101252728Snp	}
2102252728Snp#endif
2103218792Snp}
2104218792Snp
2105228561Snpint
2106228561Snpcan_resume_tx(struct sge_eq *eq)
2107228561Snp{
2108267764Snp
2109267764Snp	return (eq->avail + reclaimable(eq) >= tx_resume_threshold(eq));
2110228561Snp}
2111228561Snp
2112218792Snpstatic inline void
2113218792Snpinit_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
2114270297Snp    int qsize)
2115218792Snp{
2116270297Snp
2117218792Snp	KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
2118218792Snp	    ("%s: bad tmr_idx %d", __func__, tmr_idx));
2119218792Snp	KASSERT(pktc_idx < SGE_NCOUNTERS,	/* -ve is ok, means don't use */
2120218792Snp	    ("%s: bad pktc_idx %d", __func__, pktc_idx));
2121218792Snp
2122218792Snp	iq->flags = 0;
2123218792Snp	iq->adapter = sc;
2124234833Snp	iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx);
2125234833Snp	iq->intr_pktc_idx = SGE_NCOUNTERS - 1;
2126234833Snp	if (pktc_idx >= 0) {
2127234833Snp		iq->intr_params |= F_QINTR_CNT_EN;
2128234833Snp		iq->intr_pktc_idx = pktc_idx;
2129234833Snp	}
2130248925Snp	iq->qsize = roundup2(qsize, 16);	/* See FW_IQ_CMD/iqsize */
2131270297Snp	iq->sidx = iq->qsize - spg_len / IQ_ESIZE;
2132218792Snp}
2133218792Snp
2134218792Snpstatic inline void
2135281212Snpinit_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name)
2136218792Snp{
2137255050Snp
2138218792Snp	fl->qsize = qsize;
2139270297Snp	fl->sidx = qsize - spg_len / EQ_ESIZE;
2140218792Snp	strlcpy(fl->lockname, name, sizeof(fl->lockname));
2141281212Snp	if (sc->flags & BUF_PACKING_OK &&
2142281212Snp	    ((!is_t4(sc) && buffer_packing) ||	/* T5+: enabled unless 0 */
2143281212Snp	    (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */
2144255050Snp		fl->flags |= FL_BUF_PACKING;
2145265425Snp	find_best_refill_source(sc, fl, maxp);
2146265425Snp	find_safe_refill_source(sc, fl);
2147218792Snp}
2148218792Snp
2149218792Snpstatic inline void
2150228561Snpinit_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan,
2151228561Snp    uint16_t iqid, char *name)
2152218792Snp{
2153228561Snp	KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan));
2154228561Snp	KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype));
2155228561Snp
2156228561Snp	eq->flags = eqtype & EQ_TYPEMASK;
2157228561Snp	eq->tx_chan = tx_chan;
2158228561Snp	eq->iqid = iqid;
2159220873Snp	eq->qsize = qsize;
2160220873Snp	strlcpy(eq->lockname, name, sizeof(eq->lockname));
2161228561Snp
2162228561Snp	TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq);
2163228561Snp	callout_init(&eq->tx_callout, CALLOUT_MPSAFE);
2164218792Snp}
2165218792Snp
2166218792Snpstatic int
2167218792Snpalloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
2168218792Snp    bus_dmamap_t *map, bus_addr_t *pa, void **va)
2169218792Snp{
2170218792Snp	int rc;
2171218792Snp
2172218792Snp	rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
2173218792Snp	    BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
2174218792Snp	if (rc != 0) {
2175218792Snp		device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc);
2176218792Snp		goto done;
2177218792Snp	}
2178218792Snp
2179218792Snp	rc = bus_dmamem_alloc(*tag, va,
2180218792Snp	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
2181218792Snp	if (rc != 0) {
2182218792Snp		device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc);
2183218792Snp		goto done;
2184218792Snp	}
2185218792Snp
2186218792Snp	rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
2187218792Snp	if (rc != 0) {
2188218792Snp		device_printf(sc->dev, "cannot load DMA map: %d\n", rc);
2189218792Snp		goto done;
2190218792Snp	}
2191218792Snpdone:
2192218792Snp	if (rc)
2193218792Snp		free_ring(sc, *tag, *map, *pa, *va);
2194218792Snp
2195218792Snp	return (rc);
2196218792Snp}
2197218792Snp
2198218792Snpstatic int
2199218792Snpfree_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
2200218792Snp    bus_addr_t pa, void *va)
2201218792Snp{
2202218792Snp	if (pa)
2203218792Snp		bus_dmamap_unload(tag, map);
2204218792Snp	if (va)
2205218792Snp		bus_dmamem_free(tag, va, map);
2206218792Snp	if (tag)
2207218792Snp		bus_dma_tag_destroy(tag);
2208218792Snp
2209218792Snp	return (0);
2210218792Snp}
2211218792Snp
2212218792Snp/*
2213218792Snp * Allocates the ring for an ingress queue and an optional freelist.  If the
2214218792Snp * freelist is specified it will be allocated and then associated with the
2215218792Snp * ingress queue.
2216218792Snp *
2217218792Snp * Returns errno on failure.  Resources allocated up to that point may still be
2218218792Snp * allocated.  Caller is responsible for cleanup in case this function fails.
2219218792Snp *
2220228561Snp * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then
2221218792Snp * the intr_idx specifies the vector, starting from 0.  Otherwise it specifies
2222228561Snp * the abs_id of the ingress queue to which its interrupts should be forwarded.
2223218792Snp */
2224218792Snpstatic int
2225218792Snpalloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
2226222085Snp    int intr_idx, int cong)
2227218792Snp{
2228218792Snp	int rc, i, cntxt_id;
2229218792Snp	size_t len;
2230218792Snp	struct fw_iq_cmd c;
2231218792Snp	struct adapter *sc = iq->adapter;
2232218792Snp	__be32 v = 0;
2233218792Snp
2234270297Snp	len = iq->qsize * IQ_ESIZE;
2235218792Snp	rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
2236218792Snp	    (void **)&iq->desc);
2237218792Snp	if (rc != 0)
2238218792Snp		return (rc);
2239218792Snp
2240218792Snp	bzero(&c, sizeof(c));
2241218792Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
2242218792Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
2243218792Snp	    V_FW_IQ_CMD_VFN(0));
2244218792Snp
2245218792Snp	c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
2246218792Snp	    FW_LEN16(c));
2247218792Snp
2248218792Snp	/* Special handling for firmware event queue */
2249218792Snp	if (iq == &sc->sge.fwq)
2250218792Snp		v |= F_FW_IQ_CMD_IQASYNCH;
2251218792Snp
2252228561Snp	if (iq->flags & IQ_INTR) {
2253218792Snp		KASSERT(intr_idx < sc->intr_count,
2254218792Snp		    ("%s: invalid direct intr_idx %d", __func__, intr_idx));
2255228561Snp	} else
2256228561Snp		v |= F_FW_IQ_CMD_IQANDST;
2257228561Snp	v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
2258218792Snp
2259218792Snp	c.type_to_iqandstindex = htobe32(v |
2260218792Snp	    V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
2261218792Snp	    V_FW_IQ_CMD_VIID(pi->viid) |
2262218792Snp	    V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
2263218792Snp	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
2264218792Snp	    F_FW_IQ_CMD_IQGTSMODE |
2265218792Snp	    V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
2266270297Snp	    V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
2267218792Snp	c.iqsize = htobe16(iq->qsize);
2268218792Snp	c.iqaddr = htobe64(iq->ba);
2269222085Snp	if (cong >= 0)
2270222085Snp		c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
2271218792Snp
2272218792Snp	if (fl) {
2273218792Snp		mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
2274218792Snp
2275270297Snp		len = fl->qsize * EQ_ESIZE;
2276218792Snp		rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
2277218792Snp		    &fl->ba, (void **)&fl->desc);
2278218792Snp		if (rc)
2279218792Snp			return (rc);
2280218792Snp
2281218792Snp		/* Allocate space for one software descriptor per buffer. */
2282218792Snp		rc = alloc_fl_sdesc(fl);
2283218792Snp		if (rc != 0) {
2284218792Snp			device_printf(sc->dev,
2285218792Snp			    "failed to setup fl software descriptors: %d\n",
2286218792Snp			    rc);
2287218792Snp			return (rc);
2288218792Snp		}
2289218792Snp
2290270297Snp		if (fl->flags & FL_BUF_PACKING) {
2291270297Snp			fl->lowat = roundup2(sc->sge.fl_starve_threshold2, 8);
2292281212Snp			fl->buf_boundary = sc->sge.pack_boundary;
2293270297Snp		} else {
2294270297Snp			fl->lowat = roundup2(sc->sge.fl_starve_threshold, 8);
2295281212Snp			fl->buf_boundary = 16;
2296270297Snp		}
2297281212Snp		if (fl_pad && fl->buf_boundary < sc->sge.pad_boundary)
2298281212Snp			fl->buf_boundary = sc->sge.pad_boundary;
2299270297Snp
2300228491Snp		c.iqns_to_fl0congen |=
2301222085Snp		    htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
2302222085Snp			F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
2303255050Snp			(fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) |
2304255050Snp			(fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN :
2305255050Snp			    0));
2306222085Snp		if (cong >= 0) {
2307222085Snp			c.iqns_to_fl0congen |=
2308222085Snp				htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
2309222085Snp				    F_FW_IQ_CMD_FL0CONGCIF |
2310222085Snp				    F_FW_IQ_CMD_FL0CONGEN);
2311222085Snp		}
2312218792Snp		c.fl0dcaen_to_fl0cidxfthresh =
2313218792Snp		    htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
2314218792Snp			V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
2315218792Snp		c.fl0size = htobe16(fl->qsize);
2316218792Snp		c.fl0addr = htobe64(fl->ba);
2317218792Snp	}
2318218792Snp
2319218792Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2320218792Snp	if (rc != 0) {
2321218792Snp		device_printf(sc->dev,
2322218792Snp		    "failed to create ingress queue: %d\n", rc);
2323218792Snp		return (rc);
2324218792Snp	}
2325218792Snp
2326218792Snp	iq->cidx = 0;
2327270297Snp	iq->gen = F_RSPD_GEN;
2328218792Snp	iq->intr_next = iq->intr_params;
2329218792Snp	iq->cntxt_id = be16toh(c.iqid);
2330218792Snp	iq->abs_id = be16toh(c.physiqid);
2331228561Snp	iq->flags |= IQ_ALLOCATED;
2332218792Snp
2333218792Snp	cntxt_id = iq->cntxt_id - sc->sge.iq_start;
2334228561Snp	if (cntxt_id >= sc->sge.niq) {
2335228561Snp		panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
2336228561Snp		    cntxt_id, sc->sge.niq - 1);
2337228561Snp	}
2338218792Snp	sc->sge.iqmap[cntxt_id] = iq;
2339218792Snp
2340218792Snp	if (fl) {
2341270297Snp		u_int qid;
2342270297Snp
2343270297Snp		iq->flags |= IQ_HAS_FL;
2344218792Snp		fl->cntxt_id = be16toh(c.fl0id);
2345218792Snp		fl->pidx = fl->cidx = 0;
2346218792Snp
2347219883Snp		cntxt_id = fl->cntxt_id - sc->sge.eq_start;
2348228561Snp		if (cntxt_id >= sc->sge.neq) {
2349228561Snp			panic("%s: fl->cntxt_id (%d) more than the max (%d)",
2350228561Snp			    __func__, cntxt_id, sc->sge.neq - 1);
2351228561Snp		}
2352218792Snp		sc->sge.eqmap[cntxt_id] = (void *)fl;
2353218792Snp
2354270297Snp		qid = fl->cntxt_id;
2355270297Snp		if (isset(&sc->doorbells, DOORBELL_UDB)) {
2356270297Snp			uint32_t s_qpp = sc->sge.eq_s_qpp;
2357270297Snp			uint32_t mask = (1 << s_qpp) - 1;
2358270297Snp			volatile uint8_t *udb;
2359270297Snp
2360270297Snp			udb = sc->udbs_base + UDBS_DB_OFFSET;
2361270297Snp			udb += (qid >> s_qpp) << PAGE_SHIFT;
2362270297Snp			qid &= mask;
2363270297Snp			if (qid < PAGE_SIZE / UDBS_SEG_SIZE) {
2364270297Snp				udb += qid << UDBS_SEG_SHIFT;
2365270297Snp				qid = 0;
2366270297Snp			}
2367270297Snp			fl->udb = (volatile void *)udb;
2368270297Snp		}
2369270297Snp		fl->dbval = F_DBPRIO | V_QID(qid);
2370270297Snp		if (is_t5(sc))
2371270297Snp			fl->dbval |= F_DBTYPE;
2372270297Snp
2373218792Snp		FL_LOCK(fl);
2374228561Snp		/* Enough to make sure the SGE doesn't think it's starved */
2375228561Snp		refill_fl(sc, fl, fl->lowat);
2376218792Snp		FL_UNLOCK(fl);
2377218792Snp	}
2378218792Snp
2379253873Snp	if (is_t5(sc) && cong >= 0) {
2380253873Snp		uint32_t param, val;
2381253873Snp
2382253873Snp		param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
2383253873Snp		    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
2384253873Snp		    V_FW_PARAMS_PARAM_YZ(iq->cntxt_id);
2385253889Snp		if (cong == 0)
2386253889Snp			val = 1 << 19;
2387253889Snp		else {
2388253889Snp			val = 2 << 19;
2389253889Snp			for (i = 0; i < 4; i++) {
2390253889Snp				if (cong & (1 << i))
2391253889Snp					val |= 1 << (i << 2);
2392253889Snp			}
2393253889Snp		}
2394253889Snp
2395253873Snp		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2396253873Snp		if (rc != 0) {
2397253873Snp			/* report error but carry on */
2398253873Snp			device_printf(sc->dev,
2399253873Snp			    "failed to set congestion manager context for "
2400253873Snp			    "ingress queue %d: %d\n", iq->cntxt_id, rc);
2401253873Snp		}
2402253873Snp	}
2403253873Snp
2404218792Snp	/* Enable IQ interrupts */
2405228561Snp	atomic_store_rel_int(&iq->state, IQS_IDLE);
2406218792Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
2407218792Snp	    V_INGRESSQID(iq->cntxt_id));
2408218792Snp
2409218792Snp	return (0);
2410218792Snp}
2411218792Snp
2412218792Snpstatic int
2413218792Snpfree_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
2414218792Snp{
2415265425Snp	int rc;
2416218792Snp	struct adapter *sc = iq->adapter;
2417218792Snp	device_t dev;
2418218792Snp
2419218792Snp	if (sc == NULL)
2420218792Snp		return (0);	/* nothing to do */
2421218792Snp
2422218792Snp	dev = pi ? pi->dev : sc->dev;
2423218792Snp
2424218792Snp	if (iq->flags & IQ_ALLOCATED) {
2425218792Snp		rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
2426218792Snp		    FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
2427218792Snp		    fl ? fl->cntxt_id : 0xffff, 0xffff);
2428218792Snp		if (rc != 0) {
2429218792Snp			device_printf(dev,
2430218792Snp			    "failed to free queue %p: %d\n", iq, rc);
2431218792Snp			return (rc);
2432218792Snp		}
2433218792Snp		iq->flags &= ~IQ_ALLOCATED;
2434218792Snp	}
2435218792Snp
2436218792Snp	free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
2437218792Snp
2438218792Snp	bzero(iq, sizeof(*iq));
2439218792Snp
2440218792Snp	if (fl) {
2441218792Snp		free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba,
2442218792Snp		    fl->desc);
2443218792Snp
2444254727Snp		if (fl->sdesc)
2445255050Snp			free_fl_sdesc(sc, fl);
2446218792Snp
2447218792Snp		if (mtx_initialized(&fl->fl_lock))
2448218792Snp			mtx_destroy(&fl->fl_lock);
2449218792Snp
2450218792Snp		bzero(fl, sizeof(*fl));
2451218792Snp	}
2452218792Snp
2453218792Snp	return (0);
2454218792Snp}
2455218792Snp
2456265425Snpstatic void
2457265425Snpadd_fl_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
2458265425Snp    struct sge_fl *fl)
2459265425Snp{
2460265425Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2461265425Snp
2462265425Snp	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL,
2463265425Snp	    "freelist");
2464265425Snp	children = SYSCTL_CHILDREN(oid);
2465265425Snp
2466265425Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id",
2467265425Snp	    CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I",
2468265425Snp	    "SGE context id of the freelist");
2469281212Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL,
2470281212Snp	    fl_pad ? 1 : 0, "padding enabled");
2471281212Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL,
2472281212Snp	    fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled");
2473265425Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx,
2474265425Snp	    0, "consumer index");
2475265425Snp	if (fl->flags & FL_BUF_PACKING) {
2476265425Snp		SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset",
2477265425Snp		    CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset");
2478265425Snp	}
2479265425Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx,
2480265425Snp	    0, "producer index");
2481265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated",
2482265425Snp	    CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated");
2483265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined",
2484265425Snp	    CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters");
2485265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated",
2486265425Snp	    CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated");
2487265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled",
2488265425Snp	    CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled");
2489265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled",
2490265425Snp	    CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)");
2491265425Snp}
2492265425Snp
2493218792Snpstatic int
2494228561Snpalloc_fwq(struct adapter *sc)
2495218792Snp{
2496228561Snp	int rc, intr_idx;
2497228561Snp	struct sge_iq *fwq = &sc->sge.fwq;
2498228561Snp	struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
2499228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2500222510Snp
2501270297Snp	init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE);
2502228561Snp	fwq->flags |= IQ_INTR;	/* always */
2503228561Snp	intr_idx = sc->intr_count > 1 ? 1 : 0;
2504228561Snp	rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
2505228561Snp	if (rc != 0) {
2506228561Snp		device_printf(sc->dev,
2507228561Snp		    "failed to create firmware event queue: %d\n", rc);
2508222510Snp		return (rc);
2509228561Snp	}
2510222510Snp
2511228561Snp	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD,
2512228561Snp	    NULL, "firmware event queue");
2513222510Snp	children = SYSCTL_CHILDREN(oid);
2514222510Snp
2515228561Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id",
2516228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I",
2517228561Snp	    "absolute id of the queue");
2518228561Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id",
2519228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I",
2520228561Snp	    "SGE context id of the queue");
2521222510Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
2522228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I",
2523222510Snp	    "consumer index");
2524222510Snp
2525228561Snp	return (0);
2526218792Snp}
2527218792Snp
2528218792Snpstatic int
2529228561Snpfree_fwq(struct adapter *sc)
2530218792Snp{
2531228561Snp	return free_iq_fl(NULL, &sc->sge.fwq, NULL);
2532218792Snp}
2533218792Snp
2534218792Snpstatic int
2535228561Snpalloc_mgmtq(struct adapter *sc)
2536222510Snp{
2537222510Snp	int rc;
2538228561Snp	struct sge_wrq *mgmtq = &sc->sge.mgmtq;
2539228561Snp	char name[16];
2540228561Snp	struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
2541228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2542222510Snp
2543228561Snp	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD,
2544228561Snp	    NULL, "management queue");
2545228561Snp
2546228561Snp	snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev));
2547228561Snp	init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan,
2548228561Snp	    sc->sge.fwq.cntxt_id, name);
2549228561Snp	rc = alloc_wrq(sc, NULL, mgmtq, oid);
2550228561Snp	if (rc != 0) {
2551228561Snp		device_printf(sc->dev,
2552228561Snp		    "failed to create management queue: %d\n", rc);
2553222510Snp		return (rc);
2554228561Snp	}
2555222510Snp
2556228561Snp	return (0);
2557222510Snp}
2558222510Snp
2559222510Snpstatic int
2560228561Snpfree_mgmtq(struct adapter *sc)
2561222510Snp{
2562237263Snp
2563228561Snp	return free_wrq(sc, &sc->sge.mgmtq);
2564222510Snp}
2565222510Snp
2566281253Snpint
2567239258Snptnl_cong(struct port_info *pi)
2568239258Snp{
2569239258Snp
2570239258Snp	if (cong_drop == -1)
2571239258Snp		return (-1);
2572239258Snp	else if (cong_drop == 1)
2573239258Snp		return (0);
2574239258Snp	else
2575265410Snp		return (pi->rx_chan_map);
2576239258Snp}
2577239258Snp
2578222510Snpstatic int
2579228561Snpalloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx,
2580228561Snp    struct sysctl_oid *oid)
2581218792Snp{
2582218792Snp	int rc;
2583218792Snp	struct sysctl_oid_list *children;
2584218792Snp	char name[16];
2585218792Snp
2586239258Snp	rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(pi));
2587218792Snp	if (rc != 0)
2588218792Snp		return (rc);
2589218792Snp
2590270297Snp	/*
2591270297Snp	 * The freelist is just barely above the starvation threshold right now,
2592270297Snp	 * fill it up a bit more.
2593270297Snp	 */
2594222701Snp	FL_LOCK(&rxq->fl);
2595270297Snp	refill_fl(pi->adapter, &rxq->fl, 128);
2596222701Snp	FL_UNLOCK(&rxq->fl);
2597222701Snp
2598237819Snp#if defined(INET) || defined(INET6)
2599218792Snp	rc = tcp_lro_init(&rxq->lro);
2600218792Snp	if (rc != 0)
2601218792Snp		return (rc);
2602218792Snp	rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */
2603218792Snp
2604218792Snp	if (pi->ifp->if_capenable & IFCAP_LRO)
2605228561Snp		rxq->iq.flags |= IQ_LRO_ENABLED;
2606218792Snp#endif
2607219289Snp	rxq->ifp = pi->ifp;
2608218792Snp
2609228561Snp	children = SYSCTL_CHILDREN(oid);
2610218792Snp
2611218792Snp	snprintf(name, sizeof(name), "%d", idx);
2612218792Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2613218792Snp	    NULL, "rx queue");
2614218792Snp	children = SYSCTL_CHILDREN(oid);
2615218792Snp
2616221911Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
2617222510Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I",
2618221911Snp	    "absolute id of the queue");
2619222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2620222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I",
2621222973Snp	    "SGE context id of the queue");
2622222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2623222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I",
2624222973Snp	    "consumer index");
2625237819Snp#if defined(INET) || defined(INET6)
2626218792Snp	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
2627218792Snp	    &rxq->lro.lro_queued, 0, NULL);
2628218792Snp	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
2629218792Snp	    &rxq->lro.lro_flushed, 0, NULL);
2630219290Snp#endif
2631218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
2632218792Snp	    &rxq->rxcsum, "# of times hardware assisted with checksum");
2633218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction",
2634218792Snp	    CTLFLAG_RD, &rxq->vlan_extraction,
2635218792Snp	    "# of times hardware extracted 802.1Q tag");
2636218792Snp
2637265425Snp	add_fl_sysctls(&pi->ctx, oid, &rxq->fl);
2638222973Snp
2639218792Snp	return (rc);
2640218792Snp}
2641218792Snp
2642218792Snpstatic int
2643218792Snpfree_rxq(struct port_info *pi, struct sge_rxq *rxq)
2644218792Snp{
2645218792Snp	int rc;
2646218792Snp
2647237819Snp#if defined(INET) || defined(INET6)
2648218792Snp	if (rxq->lro.ifp) {
2649218792Snp		tcp_lro_free(&rxq->lro);
2650218792Snp		rxq->lro.ifp = NULL;
2651218792Snp	}
2652218792Snp#endif
2653218792Snp
2654218792Snp	rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
2655218792Snp	if (rc == 0)
2656218792Snp		bzero(rxq, sizeof(*rxq));
2657218792Snp
2658218792Snp	return (rc);
2659218792Snp}
2660218792Snp
2661237263Snp#ifdef TCP_OFFLOAD
2662218792Snpstatic int
2663228561Snpalloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
2664228561Snp    int intr_idx, int idx, struct sysctl_oid *oid)
2665220873Snp{
2666228561Snp	int rc;
2667228561Snp	struct sysctl_oid_list *children;
2668220873Snp	char name[16];
2669220873Snp
2670228561Snp	rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx,
2671265410Snp	    pi->rx_chan_map);
2672228561Snp	if (rc != 0)
2673220873Snp		return (rc);
2674220873Snp
2675228561Snp	children = SYSCTL_CHILDREN(oid);
2676220873Snp
2677228561Snp	snprintf(name, sizeof(name), "%d", idx);
2678228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2679228561Snp	    NULL, "rx queue");
2680228561Snp	children = SYSCTL_CHILDREN(oid);
2681228561Snp
2682228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
2683228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16,
2684228561Snp	    "I", "absolute id of the queue");
2685228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2686228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16,
2687228561Snp	    "I", "SGE context id of the queue");
2688228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2689228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I",
2690228561Snp	    "consumer index");
2691228561Snp
2692265425Snp	add_fl_sysctls(&pi->ctx, oid, &ofld_rxq->fl);
2693228561Snp
2694228561Snp	return (rc);
2695228561Snp}
2696228561Snp
2697228561Snpstatic int
2698228561Snpfree_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq)
2699228561Snp{
2700228561Snp	int rc;
2701228561Snp
2702228561Snp	rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl);
2703228561Snp	if (rc == 0)
2704228561Snp		bzero(ofld_rxq, sizeof(*ofld_rxq));
2705228561Snp
2706228561Snp	return (rc);
2707228561Snp}
2708228561Snp#endif
2709228561Snp
2710270297Snp#ifdef DEV_NETMAP
2711228561Snpstatic int
2712270297Snpalloc_nm_rxq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int intr_idx,
2713270297Snp    int idx, struct sysctl_oid *oid)
2714270297Snp{
2715270297Snp	int rc;
2716270297Snp	struct sysctl_oid_list *children;
2717270297Snp	struct sysctl_ctx_list *ctx;
2718270297Snp	char name[16];
2719270297Snp	size_t len;
2720270297Snp	struct adapter *sc = pi->adapter;
2721270297Snp	struct netmap_adapter *na = NA(pi->nm_ifp);
2722270297Snp
2723270297Snp	MPASS(na != NULL);
2724270297Snp
2725270297Snp	len = pi->qsize_rxq * IQ_ESIZE;
2726270297Snp	rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map,
2727270297Snp	    &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc);
2728270297Snp	if (rc != 0)
2729270297Snp		return (rc);
2730270297Snp
2731270297Snp	len = na->num_rx_desc * EQ_ESIZE + spg_len;
2732270297Snp	rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map,
2733270297Snp	    &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc);
2734270297Snp	if (rc != 0)
2735270297Snp		return (rc);
2736270297Snp
2737270297Snp	nm_rxq->pi = pi;
2738270297Snp	nm_rxq->nid = idx;
2739270297Snp	nm_rxq->iq_cidx = 0;
2740270297Snp	nm_rxq->iq_sidx = pi->qsize_rxq - spg_len / IQ_ESIZE;
2741270297Snp	nm_rxq->iq_gen = F_RSPD_GEN;
2742270297Snp	nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0;
2743270297Snp	nm_rxq->fl_sidx = na->num_rx_desc;
2744270297Snp	nm_rxq->intr_idx = intr_idx;
2745270297Snp
2746270297Snp	ctx = &pi->ctx;
2747270297Snp	children = SYSCTL_CHILDREN(oid);
2748270297Snp
2749270297Snp	snprintf(name, sizeof(name), "%d", idx);
2750270297Snp	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL,
2751270297Snp	    "rx queue");
2752270297Snp	children = SYSCTL_CHILDREN(oid);
2753270297Snp
2754270297Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id",
2755270297Snp	    CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_abs_id, 0, sysctl_uint16,
2756270297Snp	    "I", "absolute id of the queue");
2757270297Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id",
2758270297Snp	    CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cntxt_id, 0, sysctl_uint16,
2759270297Snp	    "I", "SGE context id of the queue");
2760270297Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
2761270297Snp	    CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cidx, 0, sysctl_uint16, "I",
2762270297Snp	    "consumer index");
2763270297Snp
2764270297Snp	children = SYSCTL_CHILDREN(oid);
2765270297Snp	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL,
2766270297Snp	    "freelist");
2767270297Snp	children = SYSCTL_CHILDREN(oid);
2768270297Snp
2769270297Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id",
2770270297Snp	    CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->fl_cntxt_id, 0, sysctl_uint16,
2771270297Snp	    "I", "SGE context id of the freelist");
2772270297Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
2773270297Snp	    &nm_rxq->fl_cidx, 0, "consumer index");
2774270297Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
2775270297Snp	    &nm_rxq->fl_pidx, 0, "producer index");
2776270297Snp
2777270297Snp	return (rc);
2778270297Snp}
2779270297Snp
2780270297Snp
2781270297Snpstatic int
2782270297Snpfree_nm_rxq(struct port_info *pi, struct sge_nm_rxq *nm_rxq)
2783270297Snp{
2784270297Snp	struct adapter *sc = pi->adapter;
2785270297Snp
2786270297Snp	free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba,
2787270297Snp	    nm_rxq->iq_desc);
2788270297Snp	free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba,
2789270297Snp	    nm_rxq->fl_desc);
2790270297Snp
2791270297Snp	return (0);
2792270297Snp}
2793270297Snp
2794270297Snpstatic int
2795270297Snpalloc_nm_txq(struct port_info *pi, struct sge_nm_txq *nm_txq, int iqidx, int idx,
2796270297Snp    struct sysctl_oid *oid)
2797270297Snp{
2798270297Snp	int rc;
2799270297Snp	size_t len;
2800270297Snp	struct adapter *sc = pi->adapter;
2801270297Snp	struct netmap_adapter *na = NA(pi->nm_ifp);
2802270297Snp	char name[16];
2803270297Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2804270297Snp
2805270297Snp	len = na->num_tx_desc * EQ_ESIZE + spg_len;
2806270297Snp	rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map,
2807270297Snp	    &nm_txq->ba, (void **)&nm_txq->desc);
2808270297Snp	if (rc)
2809270297Snp		return (rc);
2810270297Snp
2811270297Snp	nm_txq->pidx = nm_txq->cidx = 0;
2812270297Snp	nm_txq->sidx = na->num_tx_desc;
2813270297Snp	nm_txq->nid = idx;
2814270297Snp	nm_txq->iqidx = iqidx;
2815270297Snp	nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
2816270297Snp	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf));
2817270297Snp
2818270297Snp	snprintf(name, sizeof(name), "%d", idx);
2819270297Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2820270297Snp	    NULL, "netmap tx queue");
2821270297Snp	children = SYSCTL_CHILDREN(oid);
2822270297Snp
2823270297Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
2824270297Snp	    &nm_txq->cntxt_id, 0, "SGE context id of the queue");
2825270297Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2826270297Snp	    CTLTYPE_INT | CTLFLAG_RD, &nm_txq->cidx, 0, sysctl_uint16, "I",
2827270297Snp	    "consumer index");
2828270297Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx",
2829270297Snp	    CTLTYPE_INT | CTLFLAG_RD, &nm_txq->pidx, 0, sysctl_uint16, "I",
2830270297Snp	    "producer index");
2831270297Snp
2832270297Snp	return (rc);
2833270297Snp}
2834270297Snp
2835270297Snpstatic int
2836270297Snpfree_nm_txq(struct port_info *pi, struct sge_nm_txq *nm_txq)
2837270297Snp{
2838270297Snp	struct adapter *sc = pi->adapter;
2839270297Snp
2840270297Snp	free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba,
2841270297Snp	    nm_txq->desc);
2842270297Snp
2843270297Snp	return (0);
2844270297Snp}
2845270297Snp#endif
2846270297Snp
2847270297Snpstatic int
2848228561Snpctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
2849228561Snp{
2850228561Snp	int rc, cntxt_id;
2851228561Snp	struct fw_eq_ctrl_cmd c;
2852228561Snp
2853220873Snp	bzero(&c, sizeof(c));
2854220873Snp
2855220873Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
2856220873Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
2857220873Snp	    V_FW_EQ_CTRL_CMD_VFN(0));
2858220873Snp	c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
2859220873Snp	    F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
2860220873Snp	c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */
2861220873Snp	c.physeqid_pkd = htobe32(0);
2862220873Snp	c.fetchszm_to_iqid =
2863220873Snp	    htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2864228561Snp		V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
2865222510Snp		F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
2866220873Snp	c.dcaen_to_eqsize =
2867220873Snp	    htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2868220873Snp		V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2869220873Snp		V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2870220873Snp		V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
2871220873Snp	c.eqaddr = htobe64(eq->ba);
2872220873Snp
2873220873Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2874220873Snp	if (rc != 0) {
2875220873Snp		device_printf(sc->dev,
2876228561Snp		    "failed to create control queue %d: %d\n", eq->tx_chan, rc);
2877220873Snp		return (rc);
2878220873Snp	}
2879228561Snp	eq->flags |= EQ_ALLOCATED;
2880220873Snp
2881220873Snp	eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
2882228561Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2883228561Snp	if (cntxt_id >= sc->sge.neq)
2884228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2885228561Snp		cntxt_id, sc->sge.neq - 1);
2886228561Snp	sc->sge.eqmap[cntxt_id] = eq;
2887220873Snp
2888228561Snp	return (rc);
2889228561Snp}
2890228561Snp
2891228561Snpstatic int
2892228561Snpeth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2893228561Snp{
2894228561Snp	int rc, cntxt_id;
2895228561Snp	struct fw_eq_eth_cmd c;
2896228561Snp
2897228561Snp	bzero(&c, sizeof(c));
2898228561Snp
2899228561Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
2900228561Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
2901228561Snp	    V_FW_EQ_ETH_CMD_VFN(0));
2902228561Snp	c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
2903228561Snp	    F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
2904267849Snp	c.autoequiqe_to_viid = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
2905228561Snp	c.fetchszm_to_iqid =
2906228561Snp	    htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2907228561Snp		V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
2908228561Snp		V_FW_EQ_ETH_CMD_IQID(eq->iqid));
2909228561Snp	c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2910228561Snp		      V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2911228561Snp		      V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2912228561Snp		      V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
2913228561Snp	c.eqaddr = htobe64(eq->ba);
2914228561Snp
2915228561Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2916228561Snp	if (rc != 0) {
2917228561Snp		device_printf(pi->dev,
2918228561Snp		    "failed to create Ethernet egress queue: %d\n", rc);
2919228561Snp		return (rc);
2920228561Snp	}
2921228561Snp	eq->flags |= EQ_ALLOCATED;
2922228561Snp
2923228561Snp	eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
2924220873Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2925228561Snp	if (cntxt_id >= sc->sge.neq)
2926228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2927228561Snp		cntxt_id, sc->sge.neq - 1);
2928220873Snp	sc->sge.eqmap[cntxt_id] = eq;
2929220873Snp
2930228561Snp	return (rc);
2931228561Snp}
2932220873Snp
2933237263Snp#ifdef TCP_OFFLOAD
2934228561Snpstatic int
2935228561Snpofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2936228561Snp{
2937228561Snp	int rc, cntxt_id;
2938228561Snp	struct fw_eq_ofld_cmd c;
2939220873Snp
2940228561Snp	bzero(&c, sizeof(c));
2941220873Snp
2942228561Snp	c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST |
2943228561Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) |
2944228561Snp	    V_FW_EQ_OFLD_CMD_VFN(0));
2945228561Snp	c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
2946228561Snp	    F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
2947228561Snp	c.fetchszm_to_iqid =
2948228561Snp		htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2949228561Snp		    V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
2950228561Snp		    F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
2951228561Snp	c.dcaen_to_eqsize =
2952228561Snp	    htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2953228561Snp		V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2954228561Snp		V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2955228561Snp		V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize));
2956228561Snp	c.eqaddr = htobe64(eq->ba);
2957228561Snp
2958228561Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2959228561Snp	if (rc != 0) {
2960228561Snp		device_printf(pi->dev,
2961228561Snp		    "failed to create egress queue for TCP offload: %d\n", rc);
2962228561Snp		return (rc);
2963228561Snp	}
2964228561Snp	eq->flags |= EQ_ALLOCATED;
2965228561Snp
2966228561Snp	eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd));
2967228561Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2968228561Snp	if (cntxt_id >= sc->sge.neq)
2969228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2970228561Snp		cntxt_id, sc->sge.neq - 1);
2971228561Snp	sc->sge.eqmap[cntxt_id] = eq;
2972228561Snp
2973220873Snp	return (rc);
2974220873Snp}
2975228561Snp#endif
2976220873Snp
2977220873Snpstatic int
2978228561Snpalloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2979220873Snp{
2980220873Snp	int rc;
2981228561Snp	size_t len;
2982220873Snp
2983228561Snp	mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
2984228561Snp
2985228561Snp	len = eq->qsize * EQ_ESIZE;
2986228561Snp	rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
2987228561Snp	    &eq->ba, (void **)&eq->desc);
2988228561Snp	if (rc)
2989228561Snp		return (rc);
2990228561Snp
2991237512Snp	eq->cap = eq->qsize - spg_len / EQ_ESIZE;
2992228561Snp	eq->spg = (void *)&eq->desc[eq->cap];
2993228561Snp	eq->avail = eq->cap - 1;	/* one less to avoid cidx = pidx */
2994228561Snp	eq->pidx = eq->cidx = 0;
2995248925Snp	eq->doorbells = sc->doorbells;
2996228561Snp
2997228561Snp	switch (eq->flags & EQ_TYPEMASK) {
2998228561Snp	case EQ_CTRL:
2999228561Snp		rc = ctrl_eq_alloc(sc, eq);
3000228561Snp		break;
3001228561Snp
3002228561Snp	case EQ_ETH:
3003228561Snp		rc = eth_eq_alloc(sc, pi, eq);
3004228561Snp		break;
3005228561Snp
3006237263Snp#ifdef TCP_OFFLOAD
3007228561Snp	case EQ_OFLD:
3008228561Snp		rc = ofld_eq_alloc(sc, pi, eq);
3009228561Snp		break;
3010228561Snp#endif
3011228561Snp
3012228561Snp	default:
3013228561Snp		panic("%s: invalid eq type %d.", __func__,
3014228561Snp		    eq->flags & EQ_TYPEMASK);
3015228561Snp	}
3016228561Snp	if (rc != 0) {
3017228561Snp		device_printf(sc->dev,
3018269082Snp		    "failed to allocate egress queue(%d): %d\n",
3019228561Snp		    eq->flags & EQ_TYPEMASK, rc);
3020228561Snp	}
3021228561Snp
3022228561Snp	eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus;
3023228561Snp
3024248925Snp	if (isset(&eq->doorbells, DOORBELL_UDB) ||
3025248925Snp	    isset(&eq->doorbells, DOORBELL_UDBWC) ||
3026249392Snp	    isset(&eq->doorbells, DOORBELL_WCWR)) {
3027256794Snp		uint32_t s_qpp = sc->sge.eq_s_qpp;
3028248925Snp		uint32_t mask = (1 << s_qpp) - 1;
3029248925Snp		volatile uint8_t *udb;
3030248925Snp
3031248925Snp		udb = sc->udbs_base + UDBS_DB_OFFSET;
3032248925Snp		udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT;	/* pg offset */
3033248925Snp		eq->udb_qid = eq->cntxt_id & mask;		/* id in page */
3034270297Snp		if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE)
3035249392Snp	    		clrbit(&eq->doorbells, DOORBELL_WCWR);
3036248925Snp		else {
3037248925Snp			udb += eq->udb_qid << UDBS_SEG_SHIFT;	/* seg offset */
3038248925Snp			eq->udb_qid = 0;
3039248925Snp		}
3040248925Snp		eq->udb = (volatile void *)udb;
3041248925Snp	}
3042248925Snp
3043228561Snp	return (rc);
3044228561Snp}
3045228561Snp
3046228561Snpstatic int
3047228561Snpfree_eq(struct adapter *sc, struct sge_eq *eq)
3048228561Snp{
3049228561Snp	int rc;
3050228561Snp
3051228561Snp	if (eq->flags & EQ_ALLOCATED) {
3052228561Snp		switch (eq->flags & EQ_TYPEMASK) {
3053228561Snp		case EQ_CTRL:
3054228561Snp			rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0,
3055228561Snp			    eq->cntxt_id);
3056228561Snp			break;
3057228561Snp
3058228561Snp		case EQ_ETH:
3059228561Snp			rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0,
3060228561Snp			    eq->cntxt_id);
3061228561Snp			break;
3062228561Snp
3063237263Snp#ifdef TCP_OFFLOAD
3064228561Snp		case EQ_OFLD:
3065228561Snp			rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0,
3066228561Snp			    eq->cntxt_id);
3067228561Snp			break;
3068228561Snp#endif
3069228561Snp
3070228561Snp		default:
3071228561Snp			panic("%s: invalid eq type %d.", __func__,
3072228561Snp			    eq->flags & EQ_TYPEMASK);
3073228561Snp		}
3074220873Snp		if (rc != 0) {
3075220873Snp			device_printf(sc->dev,
3076228561Snp			    "failed to free egress queue (%d): %d\n",
3077228561Snp			    eq->flags & EQ_TYPEMASK, rc);
3078220873Snp			return (rc);
3079220873Snp		}
3080228561Snp		eq->flags &= ~EQ_ALLOCATED;
3081220873Snp	}
3082220873Snp
3083220873Snp	free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
3084220873Snp
3085220873Snp	if (mtx_initialized(&eq->eq_lock))
3086220873Snp		mtx_destroy(&eq->eq_lock);
3087220873Snp
3088228561Snp	bzero(eq, sizeof(*eq));
3089220873Snp	return (0);
3090220873Snp}
3091220873Snp
3092220873Snpstatic int
3093228561Snpalloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
3094228561Snp    struct sysctl_oid *oid)
3095218792Snp{
3096228561Snp	int rc;
3097228561Snp	struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx;
3098228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
3099228561Snp
3100228561Snp	rc = alloc_eq(sc, pi, &wrq->eq);
3101228561Snp	if (rc)
3102228561Snp		return (rc);
3103228561Snp
3104228561Snp	wrq->adapter = sc;
3105237263Snp	STAILQ_INIT(&wrq->wr_list);
3106228561Snp
3107228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
3108228561Snp	    &wrq->eq.cntxt_id, 0, "SGE context id of the queue");
3109228561Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
3110228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I",
3111228561Snp	    "consumer index");
3112228561Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx",
3113228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I",
3114228561Snp	    "producer index");
3115228561Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD,
3116228561Snp	    &wrq->tx_wrs, "# of work requests");
3117228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
3118228561Snp	    &wrq->no_desc, 0,
3119228561Snp	    "# of times queue ran out of hardware descriptors");
3120228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
3121228561Snp	    &wrq->eq.unstalled, 0, "# of times queue recovered after stall");
3122228561Snp
3123228561Snp	return (rc);
3124228561Snp}
3125228561Snp
3126228561Snpstatic int
3127228561Snpfree_wrq(struct adapter *sc, struct sge_wrq *wrq)
3128228561Snp{
3129228561Snp	int rc;
3130228561Snp
3131228561Snp	rc = free_eq(sc, &wrq->eq);
3132228561Snp	if (rc)
3133228561Snp		return (rc);
3134228561Snp
3135228561Snp	bzero(wrq, sizeof(*wrq));
3136228561Snp	return (0);
3137228561Snp}
3138228561Snp
3139228561Snpstatic int
3140228561Snpalloc_txq(struct port_info *pi, struct sge_txq *txq, int idx,
3141228561Snp    struct sysctl_oid *oid)
3142228561Snp{
3143228561Snp	int rc;
3144218792Snp	struct adapter *sc = pi->adapter;
3145218792Snp	struct sge_eq *eq = &txq->eq;
3146218792Snp	char name[16];
3147228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
3148218792Snp
3149228561Snp	rc = alloc_eq(sc, pi, eq);
3150218792Snp	if (rc)
3151218792Snp		return (rc);
3152218792Snp
3153228561Snp	txq->ifp = pi->ifp;
3154228561Snp
3155220873Snp	txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
3156218792Snp	    M_ZERO | M_WAITOK);
3157220873Snp	txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
3158218792Snp
3159218792Snp	rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
3160218792Snp	    BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
3161220873Snp	    BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
3162218792Snp	if (rc != 0) {
3163218792Snp		device_printf(sc->dev,
3164218792Snp		    "failed to create tx DMA tag: %d\n", rc);
3165218792Snp		return (rc);
3166218792Snp	}
3167218792Snp
3168228561Snp	/*
3169228561Snp	 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
3170228561Snp	 * limit for any WR).  txq->no_dmamap events shouldn't occur if maps is
3171228561Snp	 * sized for the worst case.
3172228561Snp	 */
3173228561Snp	rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8,
3174228561Snp	    M_WAITOK);
3175218792Snp	if (rc != 0) {
3176218792Snp		device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
3177218792Snp		return (rc);
3178218792Snp	}
3179218792Snp
3180218792Snp	snprintf(name, sizeof(name), "%d", idx);
3181218792Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
3182218792Snp	    NULL, "tx queue");
3183218792Snp	children = SYSCTL_CHILDREN(oid);
3184218792Snp
3185222973Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
3186222973Snp	    &eq->cntxt_id, 0, "SGE context id of the queue");
3187222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
3188222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I",
3189222973Snp	    "consumer index");
3190222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx",
3191222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I",
3192222973Snp	    "producer index");
3193222973Snp
3194218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
3195218792Snp	    &txq->txcsum, "# of times hardware assisted with checksum");
3196218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion",
3197218792Snp	    CTLFLAG_RD, &txq->vlan_insertion,
3198218792Snp	    "# of times hardware inserted 802.1Q tag");
3199218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
3200237819Snp	    &txq->tso_wrs, "# of TSO work requests");
3201218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
3202218792Snp	    &txq->imm_wrs, "# of work requests with immediate data");
3203218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
3204218792Snp	    &txq->sgl_wrs, "# of work requests with direct SGL");
3205218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
3206218792Snp	    &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
3207218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD,
3208218792Snp	    &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)");
3209218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
3210218792Snp	    &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
3211218792Snp
3212246093Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "br_drops", CTLFLAG_RD,
3213246093Snp	    &txq->br->br_drops, "# of drops in the buf_ring for this queue");
3214218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
3215218792Snp	    &txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
3216218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
3217218792Snp	    &txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
3218218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
3219228561Snp	    &eq->egr_update, 0, "egress update notifications from the SGE");
3220228561Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
3221228561Snp	    &eq->unstalled, 0, "# of times txq recovered after stall");
3222218792Snp
3223218792Snp	return (rc);
3224218792Snp}
3225218792Snp
3226218792Snpstatic int
3227218792Snpfree_txq(struct port_info *pi, struct sge_txq *txq)
3228218792Snp{
3229218792Snp	int rc;
3230218792Snp	struct adapter *sc = pi->adapter;
3231218792Snp	struct sge_eq *eq = &txq->eq;
3232218792Snp
3233228561Snp	rc = free_eq(sc, eq);
3234228561Snp	if (rc)
3235228561Snp		return (rc);
3236220649Snp
3237220873Snp	free(txq->sdesc, M_CXGBE);
3238218792Snp
3239228561Snp	if (txq->txmaps.maps)
3240228561Snp		t4_free_tx_maps(&txq->txmaps, txq->tx_tag);
3241218792Snp
3242220873Snp	buf_ring_free(txq->br, M_CXGBE);
3243218792Snp
3244220873Snp	if (txq->tx_tag)
3245220873Snp		bus_dma_tag_destroy(txq->tx_tag);
3246218792Snp
3247218792Snp	bzero(txq, sizeof(*txq));
3248218792Snp	return (0);
3249218792Snp}
3250218792Snp
3251218792Snpstatic void
3252218792Snponeseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3253218792Snp{
3254218792Snp	bus_addr_t *ba = arg;
3255218792Snp
3256218792Snp	KASSERT(nseg == 1,
3257218792Snp	    ("%s meant for single segment mappings only.", __func__));
3258218792Snp
3259218792Snp	*ba = error ? 0 : segs->ds_addr;
3260218792Snp}
3261218792Snp
3262218792Snpstatic inline void
3263218792Snpring_fl_db(struct adapter *sc, struct sge_fl *fl)
3264218792Snp{
3265270297Snp	uint32_t n, v;
3266218792Snp
3267270297Snp	n = IDXDIFF(fl->pidx / 8, fl->dbidx, fl->sidx);
3268270297Snp	MPASS(n > 0);
3269218792Snp
3270218792Snp	wmb();
3271270297Snp	v = fl->dbval | V_PIDX(n);
3272270297Snp	if (fl->udb)
3273270297Snp		*fl->udb = htole32(v);
3274270297Snp	else
3275270297Snp		t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), v);
3276270297Snp	IDXINCR(fl->dbidx, n, fl->sidx);
3277218792Snp}
3278218792Snp
3279220905Snp/*
3280270297Snp * Fills up the freelist by allocating upto 'n' buffers.  Buffers that are
3281270297Snp * recycled do not count towards this allocation budget.
3282228561Snp *
3283270297Snp * Returns non-zero to indicate that this freelist should be added to the list
3284270297Snp * of starving freelists.
3285220905Snp */
3286228561Snpstatic int
3287270297Snprefill_fl(struct adapter *sc, struct sge_fl *fl, int n)
3288218792Snp{
3289270297Snp	__be64 *d;
3290270297Snp	struct fl_sdesc *sd;
3291265425Snp	uintptr_t pa;
3292218792Snp	caddr_t cl;
3293270297Snp	struct cluster_layout *cll;
3294270297Snp	struct sw_zone_info *swz;
3295265425Snp	struct cluster_metadata *clm;
3296270297Snp	uint16_t max_pidx;
3297270297Snp	uint16_t hw_cidx = fl->hw_cidx;		/* stable snapshot */
3298218792Snp
3299218792Snp	FL_LOCK_ASSERT_OWNED(fl);
3300218792Snp
3301270297Snp	/*
3302270297Snp	 * We always stop at the begining of the hardware descriptor that's just
3303270297Snp	 * before the one with the hw cidx.  This is to avoid hw pidx = hw cidx,
3304270297Snp	 * which would mean an empty freelist to the chip.
3305270297Snp	 */
3306270297Snp	max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1;
3307270297Snp	if (fl->pidx == max_pidx * 8)
3308270297Snp		return (0);
3309218792Snp
3310270297Snp	d = &fl->desc[fl->pidx];
3311270297Snp	sd = &fl->sdesc[fl->pidx];
3312270297Snp	cll = &fl->cll_def;	/* default layout */
3313270297Snp	swz = &sc->sge.sw_zone_info[cll->zidx];
3314218792Snp
3315270297Snp	while (n > 0) {
3316270297Snp
3317218792Snp		if (sd->cl != NULL) {
3318218792Snp
3319269356Snp			if (sd->nmbuf == 0) {
3320255050Snp				/*
3321265425Snp				 * Fast recycle without involving any atomics on
3322265425Snp				 * the cluster's metadata (if the cluster has
3323265425Snp				 * metadata).  This happens when all frames
3324265425Snp				 * received in the cluster were small enough to
3325265425Snp				 * fit within a single mbuf each.
3326255050Snp				 */
3327265425Snp				fl->cl_fast_recycled++;
3328267694Snp#ifdef INVARIANTS
3329267694Snp				clm = cl_metadata(sc, fl, &sd->cll, sd->cl);
3330267694Snp				if (clm != NULL)
3331267694Snp					MPASS(clm->refcount == 1);
3332267694Snp#endif
3333265425Snp				goto recycled_fast;
3334255050Snp			}
3335218792Snp
3336218792Snp			/*
3337265425Snp			 * Cluster is guaranteed to have metadata.  Clusters
3338265425Snp			 * without metadata always take the fast recycle path
3339265425Snp			 * when they're recycled.
3340218792Snp			 */
3341265425Snp			clm = cl_metadata(sc, fl, &sd->cll, sd->cl);
3342265425Snp			MPASS(clm != NULL);
3343265425Snp
3344265425Snp			if (atomic_fetchadd_int(&clm->refcount, -1) == 1) {
3345265425Snp				fl->cl_recycled++;
3346269356Snp				counter_u64_add(extfree_rels, 1);
3347265425Snp				goto recycled;
3348218792Snp			}
3349265425Snp			sd->cl = NULL;	/* gave up my reference */
3350218792Snp		}
3351265425Snp		MPASS(sd->cl == NULL);
3352265425Snpalloc:
3353265425Snp		cl = uma_zalloc(swz->zone, M_NOWAIT);
3354265425Snp		if (__predict_false(cl == NULL)) {
3355265425Snp			if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 ||
3356265425Snp			    fl->cll_def.zidx == fl->cll_alt.zidx)
3357265425Snp				break;
3358218792Snp
3359265425Snp			/* fall back to the safe zone */
3360265425Snp			cll = &fl->cll_alt;
3361265425Snp			swz = &sc->sge.sw_zone_info[cll->zidx];
3362265425Snp			goto alloc;
3363255050Snp		}
3364265425Snp		fl->cl_allocated++;
3365270297Snp		n--;
3366218792Snp
3367265425Snp		pa = pmap_kextract((vm_offset_t)cl);
3368265425Snp		pa += cll->region1;
3369218792Snp		sd->cl = cl;
3370265425Snp		sd->cll = *cll;
3371265425Snp		*d = htobe64(pa | cll->hwidx);
3372265425Snp		clm = cl_metadata(sc, fl, cll, cl);
3373265425Snp		if (clm != NULL) {
3374265425Snprecycled:
3375218792Snp#ifdef INVARIANTS
3376265425Snp			clm->sd = sd;
3377218792Snp#endif
3378265425Snp			clm->refcount = 1;
3379265425Snp		}
3380269356Snp		sd->nmbuf = 0;
3381265425Snprecycled_fast:
3382265425Snp		d++;
3383218792Snp		sd++;
3384270297Snp		if (__predict_false(++fl->pidx % 8 == 0)) {
3385270297Snp			uint16_t pidx = fl->pidx / 8;
3386270297Snp
3387270297Snp			if (__predict_false(pidx == fl->sidx)) {
3388270297Snp				fl->pidx = 0;
3389270297Snp				pidx = 0;
3390270297Snp				sd = fl->sdesc;
3391270297Snp				d = fl->desc;
3392270297Snp			}
3393270297Snp			if (pidx == max_pidx)
3394270297Snp				break;
3395270297Snp
3396270297Snp			if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4)
3397270297Snp				ring_fl_db(sc, fl);
3398218792Snp		}
3399218792Snp	}
3400220905Snp
3401270297Snp	if (fl->pidx / 8 != fl->dbidx)
3402220905Snp		ring_fl_db(sc, fl);
3403228561Snp
3404228561Snp	return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
3405218792Snp}
3406218792Snp
3407228561Snp/*
3408228561Snp * Attempt to refill all starving freelists.
3409228561Snp */
3410228561Snpstatic void
3411228561Snprefill_sfl(void *arg)
3412228561Snp{
3413228561Snp	struct adapter *sc = arg;
3414228561Snp	struct sge_fl *fl, *fl_temp;
3415228561Snp
3416228561Snp	mtx_lock(&sc->sfl_lock);
3417228561Snp	TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
3418228561Snp		FL_LOCK(fl);
3419228561Snp		refill_fl(sc, fl, 64);
3420228561Snp		if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
3421228561Snp			TAILQ_REMOVE(&sc->sfl, fl, link);
3422228561Snp			fl->flags &= ~FL_STARVING;
3423228561Snp		}
3424228561Snp		FL_UNLOCK(fl);
3425228561Snp	}
3426228561Snp
3427228561Snp	if (!TAILQ_EMPTY(&sc->sfl))
3428228561Snp		callout_schedule(&sc->sfl_callout, hz / 5);
3429228561Snp	mtx_unlock(&sc->sfl_lock);
3430228561Snp}
3431228561Snp
3432218792Snpstatic int
3433218792Snpalloc_fl_sdesc(struct sge_fl *fl)
3434218792Snp{
3435218792Snp
3436270297Snp	fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), M_CXGBE,
3437218792Snp	    M_ZERO | M_WAITOK);
3438218792Snp
3439218792Snp	return (0);
3440218792Snp}
3441218792Snp
3442218792Snpstatic void
3443255050Snpfree_fl_sdesc(struct adapter *sc, struct sge_fl *fl)
3444218792Snp{
3445218792Snp	struct fl_sdesc *sd;
3446265425Snp	struct cluster_metadata *clm;
3447265425Snp	struct cluster_layout *cll;
3448218792Snp	int i;
3449218792Snp
3450218792Snp	sd = fl->sdesc;
3451270297Snp	for (i = 0; i < fl->sidx * 8; i++, sd++) {
3452265425Snp		if (sd->cl == NULL)
3453265425Snp			continue;
3454218792Snp
3455265425Snp		cll = &sd->cll;
3456265425Snp		clm = cl_metadata(sc, fl, cll, sd->cl);
3457269356Snp		if (sd->nmbuf == 0)
3458265425Snp			uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl);
3459269356Snp		else if (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1) {
3460269356Snp			uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl);
3461269356Snp			counter_u64_add(extfree_rels, 1);
3462218792Snp		}
3463265425Snp		sd->cl = NULL;
3464218792Snp	}
3465218792Snp
3466218792Snp	free(fl->sdesc, M_CXGBE);
3467218792Snp	fl->sdesc = NULL;
3468218792Snp}
3469218792Snp
3470228561Snpint
3471228561Snpt4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count,
3472228561Snp    int flags)
3473218792Snp{
3474218792Snp	struct tx_map *txm;
3475228561Snp	int i, rc;
3476218792Snp
3477228561Snp	txmaps->map_total = txmaps->map_avail = count;
3478228561Snp	txmaps->map_cidx = txmaps->map_pidx = 0;
3479218792Snp
3480228561Snp	txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
3481228561Snp	    M_ZERO | flags);
3482218792Snp
3483228561Snp	txm = txmaps->maps;
3484218792Snp	for (i = 0; i < count; i++, txm++) {
3485228561Snp		rc = bus_dmamap_create(tx_tag, 0, &txm->map);
3486218792Snp		if (rc != 0)
3487218792Snp			goto failed;
3488218792Snp	}
3489218792Snp
3490218792Snp	return (0);
3491218792Snpfailed:
3492218792Snp	while (--i >= 0) {
3493218792Snp		txm--;
3494228561Snp		bus_dmamap_destroy(tx_tag, txm->map);
3495218792Snp	}
3496228561Snp	KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__));
3497218792Snp
3498228561Snp	free(txmaps->maps, M_CXGBE);
3499228561Snp	txmaps->maps = NULL;
3500218792Snp
3501218792Snp	return (rc);
3502218792Snp}
3503218792Snp
3504228561Snpvoid
3505228561Snpt4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag)
3506218792Snp{
3507218792Snp	struct tx_map *txm;
3508218792Snp	int i;
3509218792Snp
3510228561Snp	txm = txmaps->maps;
3511228561Snp	for (i = 0; i < txmaps->map_total; i++, txm++) {
3512218792Snp
3513218792Snp		if (txm->m) {
3514228561Snp			bus_dmamap_unload(tx_tag, txm->map);
3515218792Snp			m_freem(txm->m);
3516218792Snp			txm->m = NULL;
3517218792Snp		}
3518218792Snp
3519228561Snp		bus_dmamap_destroy(tx_tag, txm->map);
3520218792Snp	}
3521218792Snp
3522228561Snp	free(txmaps->maps, M_CXGBE);
3523228561Snp	txmaps->maps = NULL;
3524218792Snp}
3525218792Snp
3526218792Snp/*
3527218792Snp * We'll do immediate data tx for non-TSO, but only when not coalescing.  We're
3528218792Snp * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
3529218792Snp * of immediate data.
3530218792Snp */
3531218792Snp#define IMM_LEN ( \
3532228561Snp      2 * EQ_ESIZE \
3533218792Snp    - sizeof(struct fw_eth_tx_pkt_wr) \
3534218792Snp    - sizeof(struct cpl_tx_pkt_core))
3535218792Snp
3536218792Snp/*
3537218792Snp * Returns non-zero on failure, no need to cleanup anything in that case.
3538218792Snp *
3539218792Snp * Note 1: We always try to defrag the mbuf if required and return EFBIG only
3540218792Snp * if the resulting chain still won't fit in a tx descriptor.
3541218792Snp *
3542218792Snp * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf
3543218792Snp * does not have the TCP header in it.
3544218792Snp */
3545218792Snpstatic int
3546218792Snpget_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
3547218792Snp    int sgl_only)
3548218792Snp{
3549218792Snp	struct mbuf *m = *fp;
3550228561Snp	struct tx_maps *txmaps;
3551218792Snp	struct tx_map *txm;
3552218792Snp	int rc, defragged = 0, n;
3553218792Snp
3554218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3555218792Snp
3556218792Snp	if (m->m_pkthdr.tso_segsz)
3557218792Snp		sgl_only = 1;	/* Do not allow immediate data with LSO */
3558218792Snp
3559218792Snpstart:	sgl->nsegs = 0;
3560218792Snp
3561218792Snp	if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
3562218792Snp		return (0);	/* nsegs = 0 tells caller to use imm. tx */
3563218792Snp
3564228561Snp	txmaps = &txq->txmaps;
3565228561Snp	if (txmaps->map_avail == 0) {
3566218792Snp		txq->no_dmamap++;
3567218792Snp		return (ENOMEM);
3568218792Snp	}
3569228561Snp	txm = &txmaps->maps[txmaps->map_pidx];
3570218792Snp
3571218792Snp	if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
3572218792Snp		*fp = m_pullup(m, 50);
3573218792Snp		m = *fp;
3574218792Snp		if (m == NULL)
3575218792Snp			return (ENOBUFS);
3576218792Snp	}
3577218792Snp
3578220873Snp	rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg,
3579218792Snp	    &sgl->nsegs, BUS_DMA_NOWAIT);
3580218792Snp	if (rc == EFBIG && defragged == 0) {
3581243857Sglebius		m = m_defrag(m, M_NOWAIT);
3582218792Snp		if (m == NULL)
3583218792Snp			return (EFBIG);
3584218792Snp
3585218792Snp		defragged = 1;
3586218792Snp		*fp = m;
3587218792Snp		goto start;
3588218792Snp	}
3589218792Snp	if (rc != 0)
3590218792Snp		return (rc);
3591218792Snp
3592218792Snp	txm->m = m;
3593228561Snp	txmaps->map_avail--;
3594228561Snp	if (++txmaps->map_pidx == txmaps->map_total)
3595228561Snp		txmaps->map_pidx = 0;
3596218792Snp
3597218792Snp	KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
3598218792Snp	    ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
3599218792Snp
3600218792Snp	/*
3601218792Snp	 * Store the # of flits required to hold this frame's SGL in nflits.  An
3602218792Snp	 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
3603218792Snp	 * multiple (len0 + len1, addr0, addr1) tuples.  If addr1 is not used
3604218792Snp	 * then len1 must be set to 0.
3605218792Snp	 */
3606218792Snp	n = sgl->nsegs - 1;
3607218792Snp	sgl->nflits = (3 * n) / 2 + (n & 1) + 2;
3608218792Snp
3609218792Snp	return (0);
3610218792Snp}
3611218792Snp
3612218792Snp
3613218792Snp/*
3614218792Snp * Releases all the txq resources used up in the specified sgl.
3615218792Snp */
3616218792Snpstatic int
3617218792Snpfree_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
3618218792Snp{
3619228561Snp	struct tx_maps *txmaps;
3620218792Snp	struct tx_map *txm;
3621218792Snp
3622218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3623218792Snp
3624218792Snp	if (sgl->nsegs == 0)
3625218792Snp		return (0);	/* didn't use any map */
3626218792Snp
3627228561Snp	txmaps = &txq->txmaps;
3628228561Snp
3629218792Snp	/* 1 pkt uses exactly 1 map, back it out */
3630218792Snp
3631228561Snp	txmaps->map_avail++;
3632228561Snp	if (txmaps->map_pidx > 0)
3633228561Snp		txmaps->map_pidx--;
3634218792Snp	else
3635228561Snp		txmaps->map_pidx = txmaps->map_total - 1;
3636218792Snp
3637228561Snp	txm = &txmaps->maps[txmaps->map_pidx];
3638220873Snp	bus_dmamap_unload(txq->tx_tag, txm->map);
3639218792Snp	txm->m = NULL;
3640218792Snp
3641218792Snp	return (0);
3642218792Snp}
3643218792Snp
3644218792Snpstatic int
3645218792Snpwrite_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
3646218792Snp    struct sgl *sgl)
3647218792Snp{
3648218792Snp	struct sge_eq *eq = &txq->eq;
3649218792Snp	struct fw_eth_tx_pkt_wr *wr;
3650218792Snp	struct cpl_tx_pkt_core *cpl;
3651218792Snp	uint32_t ctrl;	/* used in many unrelated places */
3652218792Snp	uint64_t ctrl1;
3653219286Snp	int nflits, ndesc, pktlen;
3654218792Snp	struct tx_sdesc *txsd;
3655218792Snp	caddr_t dst;
3656218792Snp
3657218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3658218792Snp
3659219286Snp	pktlen = m->m_pkthdr.len;
3660219286Snp
3661218792Snp	/*
3662218792Snp	 * Do we have enough flits to send this frame out?
3663218792Snp	 */
3664218792Snp	ctrl = sizeof(struct cpl_tx_pkt_core);
3665218792Snp	if (m->m_pkthdr.tso_segsz) {
3666218792Snp		nflits = TXPKT_LSO_WR_HDR;
3667237436Snp		ctrl += sizeof(struct cpl_tx_pkt_lso_core);
3668218792Snp	} else
3669218792Snp		nflits = TXPKT_WR_HDR;
3670218792Snp	if (sgl->nsegs > 0)
3671218792Snp		nflits += sgl->nflits;
3672218792Snp	else {
3673219286Snp		nflits += howmany(pktlen, 8);
3674219286Snp		ctrl += pktlen;
3675218792Snp	}
3676218792Snp	ndesc = howmany(nflits, 8);
3677218792Snp	if (ndesc > eq->avail)
3678218792Snp		return (ENOMEM);
3679218792Snp
3680218792Snp	/* Firmware work request header */
3681218792Snp	wr = (void *)&eq->desc[eq->pidx];
3682218792Snp	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
3683228561Snp	    V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
3684218792Snp	ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
3685228561Snp	if (eq->avail == ndesc) {
3686228561Snp		if (!(eq->flags & EQ_CRFLUSHED)) {
3687228561Snp			ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3688228561Snp			eq->flags |= EQ_CRFLUSHED;
3689228561Snp		}
3690228561Snp		eq->flags |= EQ_STALLED;
3691220643Snp	}
3692220643Snp
3693218792Snp	wr->equiq_to_len16 = htobe32(ctrl);
3694218792Snp	wr->r3 = 0;
3695218792Snp
3696218792Snp	if (m->m_pkthdr.tso_segsz) {
3697237436Snp		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
3698218792Snp		struct ether_header *eh;
3699237819Snp		void *l3hdr;
3700237819Snp#if defined(INET) || defined(INET6)
3701218792Snp		struct tcphdr *tcp;
3702237819Snp#endif
3703237819Snp		uint16_t eh_type;
3704218792Snp
3705218792Snp		ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
3706218792Snp		    F_LSO_LAST_SLICE;
3707218792Snp
3708218792Snp		eh = mtod(m, struct ether_header *);
3709237819Snp		eh_type = ntohs(eh->ether_type);
3710237819Snp		if (eh_type == ETHERTYPE_VLAN) {
3711237819Snp			struct ether_vlan_header *evh = (void *)eh;
3712237819Snp
3713218792Snp			ctrl |= V_LSO_ETHHDR_LEN(1);
3714237819Snp			l3hdr = evh + 1;
3715237819Snp			eh_type = ntohs(evh->evl_proto);
3716218792Snp		} else
3717237819Snp			l3hdr = eh + 1;
3718218792Snp
3719237819Snp		switch (eh_type) {
3720237819Snp#ifdef INET6
3721237819Snp		case ETHERTYPE_IPV6:
3722237819Snp		{
3723237819Snp			struct ip6_hdr *ip6 = l3hdr;
3724218792Snp
3725237819Snp			/*
3726237819Snp			 * XXX-BZ For now we do not pretend to support
3727237819Snp			 * IPv6 extension headers.
3728237819Snp			 */
3729237819Snp			KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO "
3730237819Snp			    "with ip6_nxt != TCP: %u", __func__, ip6->ip6_nxt));
3731237819Snp			tcp = (struct tcphdr *)(ip6 + 1);
3732237819Snp			ctrl |= F_LSO_IPV6;
3733237819Snp			ctrl |= V_LSO_IPHDR_LEN(sizeof(*ip6) >> 2) |
3734237819Snp			    V_LSO_TCPHDR_LEN(tcp->th_off);
3735237819Snp			break;
3736237819Snp		}
3737237819Snp#endif
3738237819Snp#ifdef INET
3739237819Snp		case ETHERTYPE_IP:
3740237819Snp		{
3741237819Snp			struct ip *ip = l3hdr;
3742237819Snp
3743237819Snp			tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4);
3744237819Snp			ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) |
3745237819Snp			    V_LSO_TCPHDR_LEN(tcp->th_off);
3746237819Snp			break;
3747237819Snp		}
3748237819Snp#endif
3749237819Snp		default:
3750237819Snp			panic("%s: CSUM_TSO but no supported IP version "
3751237819Snp			    "(0x%04x)", __func__, eh_type);
3752237819Snp		}
3753237819Snp
3754218792Snp		lso->lso_ctrl = htobe32(ctrl);
3755218792Snp		lso->ipid_ofst = htobe16(0);
3756218792Snp		lso->mss = htobe16(m->m_pkthdr.tso_segsz);
3757218792Snp		lso->seqno_offset = htobe32(0);
3758219286Snp		lso->len = htobe32(pktlen);
3759218792Snp
3760218792Snp		cpl = (void *)(lso + 1);
3761218792Snp
3762218792Snp		txq->tso_wrs++;
3763218792Snp	} else
3764218792Snp		cpl = (void *)(wr + 1);
3765218792Snp
3766218792Snp	/* Checksum offload */
3767218792Snp	ctrl1 = 0;
3768247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
3769218792Snp		ctrl1 |= F_TXPKT_IPCSUM_DIS;
3770237799Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
3771247062Snp	    CSUM_TCP_IPV6 | CSUM_TSO)))
3772218792Snp		ctrl1 |= F_TXPKT_L4CSUM_DIS;
3773237799Snp	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
3774247062Snp	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
3775218792Snp		txq->txcsum++;	/* some hardware assistance provided */
3776218792Snp
3777218792Snp	/* VLAN tag insertion */
3778218792Snp	if (m->m_flags & M_VLANTAG) {
3779218792Snp		ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
3780218792Snp		txq->vlan_insertion++;
3781218792Snp	}
3782218792Snp
3783218792Snp	/* CPL header */
3784218792Snp	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3785218792Snp	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3786218792Snp	cpl->pack = 0;
3787219286Snp	cpl->len = htobe16(pktlen);
3788218792Snp	cpl->ctrl1 = htobe64(ctrl1);
3789218792Snp
3790218792Snp	/* Software descriptor */
3791220873Snp	txsd = &txq->sdesc[eq->pidx];
3792218792Snp	txsd->desc_used = ndesc;
3793218792Snp
3794218792Snp	eq->pending += ndesc;
3795218792Snp	eq->avail -= ndesc;
3796218792Snp	eq->pidx += ndesc;
3797218792Snp	if (eq->pidx >= eq->cap)
3798218792Snp		eq->pidx -= eq->cap;
3799218792Snp
3800218792Snp	/* SGL */
3801218792Snp	dst = (void *)(cpl + 1);
3802218792Snp	if (sgl->nsegs > 0) {
3803220873Snp		txsd->credits = 1;
3804218792Snp		txq->sgl_wrs++;
3805218792Snp		write_sgl_to_txd(eq, sgl, &dst);
3806218792Snp	} else {
3807220873Snp		txsd->credits = 0;
3808218792Snp		txq->imm_wrs++;
3809218792Snp		for (; m; m = m->m_next) {
3810218792Snp			copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
3811219286Snp#ifdef INVARIANTS
3812219286Snp			pktlen -= m->m_len;
3813219286Snp#endif
3814218792Snp		}
3815219286Snp#ifdef INVARIANTS
3816219286Snp		KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
3817219286Snp#endif
3818219286Snp
3819218792Snp	}
3820218792Snp
3821218792Snp	txq->txpkt_wrs++;
3822218792Snp	return (0);
3823218792Snp}
3824218792Snp
3825218792Snp/*
3826218792Snp * Returns 0 to indicate that m has been accepted into a coalesced tx work
3827218792Snp * request.  It has either been folded into txpkts or txpkts was flushed and m
3828218792Snp * has started a new coalesced work request (as the first frame in a fresh
3829218792Snp * txpkts).
3830218792Snp *
3831218792Snp * Returns non-zero to indicate a failure - caller is responsible for
3832218792Snp * transmitting m, if there was anything in txpkts it has been flushed.
3833218792Snp */
3834218792Snpstatic int
3835218792Snpadd_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
3836218792Snp    struct mbuf *m, struct sgl *sgl)
3837218792Snp{
3838218792Snp	struct sge_eq *eq = &txq->eq;
3839218792Snp	int can_coalesce;
3840218792Snp	struct tx_sdesc *txsd;
3841218792Snp	int flits;
3842218792Snp
3843218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3844218792Snp
3845228561Snp	KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__));
3846228561Snp
3847218792Snp	if (txpkts->npkt > 0) {
3848218792Snp		flits = TXPKTS_PKT_HDR + sgl->nflits;
3849218792Snp		can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3850218792Snp		    txpkts->nflits + flits <= TX_WR_FLITS &&
3851218792Snp		    txpkts->nflits + flits <= eq->avail * 8 &&
3852218792Snp		    txpkts->plen + m->m_pkthdr.len < 65536;
3853218792Snp
3854218792Snp		if (can_coalesce) {
3855218792Snp			txpkts->npkt++;
3856218792Snp			txpkts->nflits += flits;
3857218792Snp			txpkts->plen += m->m_pkthdr.len;
3858218792Snp
3859220873Snp			txsd = &txq->sdesc[eq->pidx];
3860220873Snp			txsd->credits++;
3861218792Snp
3862218792Snp			return (0);
3863218792Snp		}
3864218792Snp
3865218792Snp		/*
3866218792Snp		 * Couldn't coalesce m into txpkts.  The first order of business
3867218792Snp		 * is to send txpkts on its way.  Then we'll revisit m.
3868218792Snp		 */
3869218792Snp		write_txpkts_wr(txq, txpkts);
3870218792Snp	}
3871218792Snp
3872218792Snp	/*
3873218792Snp	 * Check if we can start a new coalesced tx work request with m as
3874218792Snp	 * the first packet in it.
3875218792Snp	 */
3876218792Snp
3877218792Snp	KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__));
3878218792Snp
3879218792Snp	flits = TXPKTS_WR_HDR + sgl->nflits;
3880218792Snp	can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3881218792Snp	    flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
3882218792Snp
3883218792Snp	if (can_coalesce == 0)
3884218792Snp		return (EINVAL);
3885218792Snp
3886218792Snp	/*
3887218792Snp	 * Start a fresh coalesced tx WR with m as the first frame in it.
3888218792Snp	 */
3889218792Snp	txpkts->npkt = 1;
3890218792Snp	txpkts->nflits = flits;
3891218792Snp	txpkts->flitp = &eq->desc[eq->pidx].flit[2];
3892218792Snp	txpkts->plen = m->m_pkthdr.len;
3893218792Snp
3894220873Snp	txsd = &txq->sdesc[eq->pidx];
3895220873Snp	txsd->credits = 1;
3896218792Snp
3897218792Snp	return (0);
3898218792Snp}
3899218792Snp
3900218792Snp/*
3901218792Snp * Note that write_txpkts_wr can never run out of hardware descriptors (but
3902218792Snp * write_txpkt_wr can).  add_to_txpkts ensures that a frame is accepted for
3903218792Snp * coalescing only if sufficient hardware descriptors are available.
3904218792Snp */
3905218792Snpstatic void
3906218792Snpwrite_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
3907218792Snp{
3908218792Snp	struct sge_eq *eq = &txq->eq;
3909218792Snp	struct fw_eth_tx_pkts_wr *wr;
3910218792Snp	struct tx_sdesc *txsd;
3911218792Snp	uint32_t ctrl;
3912218792Snp	int ndesc;
3913218792Snp
3914218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3915218792Snp
3916218792Snp	ndesc = howmany(txpkts->nflits, 8);
3917218792Snp
3918218792Snp	wr = (void *)&eq->desc[eq->pidx];
3919228561Snp	wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
3920218792Snp	ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
3921228561Snp	if (eq->avail == ndesc) {
3922228561Snp		if (!(eq->flags & EQ_CRFLUSHED)) {
3923228561Snp			ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3924228561Snp			eq->flags |= EQ_CRFLUSHED;
3925228561Snp		}
3926228561Snp		eq->flags |= EQ_STALLED;
3927220643Snp	}
3928218792Snp	wr->equiq_to_len16 = htobe32(ctrl);
3929218792Snp	wr->plen = htobe16(txpkts->plen);
3930218792Snp	wr->npkt = txpkts->npkt;
3931222513Snp	wr->r3 = wr->type = 0;
3932218792Snp
3933218792Snp	/* Everything else already written */
3934218792Snp
3935220873Snp	txsd = &txq->sdesc[eq->pidx];
3936218792Snp	txsd->desc_used = ndesc;
3937218792Snp
3938220643Snp	KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__));
3939218792Snp
3940218792Snp	eq->pending += ndesc;
3941218792Snp	eq->avail -= ndesc;
3942218792Snp	eq->pidx += ndesc;
3943218792Snp	if (eq->pidx >= eq->cap)
3944218792Snp		eq->pidx -= eq->cap;
3945218792Snp
3946218792Snp	txq->txpkts_pkts += txpkts->npkt;
3947218792Snp	txq->txpkts_wrs++;
3948218792Snp	txpkts->npkt = 0;	/* emptied */
3949218792Snp}
3950218792Snp
3951218792Snpstatic inline void
3952218792Snpwrite_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
3953218792Snp    struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl)
3954218792Snp{
3955218792Snp	struct ulp_txpkt *ulpmc;
3956218792Snp	struct ulptx_idata *ulpsc;
3957218792Snp	struct cpl_tx_pkt_core *cpl;
3958218792Snp	struct sge_eq *eq = &txq->eq;
3959218792Snp	uintptr_t flitp, start, end;
3960218792Snp	uint64_t ctrl;
3961218792Snp	caddr_t dst;
3962218792Snp
3963218792Snp	KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__));
3964218792Snp
3965218792Snp	start = (uintptr_t)eq->desc;
3966218792Snp	end = (uintptr_t)eq->spg;
3967218792Snp
3968218792Snp	/* Checksum offload */
3969218792Snp	ctrl = 0;
3970247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
3971218792Snp		ctrl |= F_TXPKT_IPCSUM_DIS;
3972247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
3973247062Snp	    CSUM_TCP_IPV6 | CSUM_TSO)))
3974218792Snp		ctrl |= F_TXPKT_L4CSUM_DIS;
3975247062Snp	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
3976247062Snp	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
3977218792Snp		txq->txcsum++;	/* some hardware assistance provided */
3978218792Snp
3979218792Snp	/* VLAN tag insertion */
3980218792Snp	if (m->m_flags & M_VLANTAG) {
3981218792Snp		ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
3982218792Snp		txq->vlan_insertion++;
3983218792Snp	}
3984218792Snp
3985218792Snp	/*
3986218792Snp	 * The previous packet's SGL must have ended at a 16 byte boundary (this
3987218792Snp	 * is required by the firmware/hardware).  It follows that flitp cannot
3988218792Snp	 * wrap around between the ULPTX master command and ULPTX subcommand (8
3989218792Snp	 * bytes each), and that it can not wrap around in the middle of the
3990218792Snp	 * cpl_tx_pkt_core either.
3991218792Snp	 */
3992218792Snp	flitp = (uintptr_t)txpkts->flitp;
3993218792Snp	KASSERT((flitp & 0xf) == 0,
3994218792Snp	    ("%s: last SGL did not end at 16 byte boundary: %p",
3995218792Snp	    __func__, txpkts->flitp));
3996218792Snp
3997218792Snp	/* ULP master command */
3998218792Snp	ulpmc = (void *)flitp;
3999219288Snp	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) |
4000219288Snp	    V_ULP_TXPKT_FID(eq->iqid));
4001218792Snp	ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) +
4002218792Snp	    sizeof(*cpl) + 8 * sgl->nflits, 16));
4003218792Snp
4004218792Snp	/* ULP subcommand */
4005218792Snp	ulpsc = (void *)(ulpmc + 1);
4006218792Snp	ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
4007218792Snp	    F_ULP_TX_SC_MORE);
4008218792Snp	ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
4009218792Snp
4010218792Snp	flitp += sizeof(*ulpmc) + sizeof(*ulpsc);
4011218792Snp	if (flitp == end)
4012218792Snp		flitp = start;
4013218792Snp
4014218792Snp	/* CPL_TX_PKT */
4015218792Snp	cpl = (void *)flitp;
4016218792Snp	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
4017218792Snp	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
4018218792Snp	cpl->pack = 0;
4019218792Snp	cpl->len = htobe16(m->m_pkthdr.len);
4020218792Snp	cpl->ctrl1 = htobe64(ctrl);
4021218792Snp
4022218792Snp	flitp += sizeof(*cpl);
4023218792Snp	if (flitp == end)
4024218792Snp		flitp = start;
4025218792Snp
4026218792Snp	/* SGL for this frame */
4027218792Snp	dst = (caddr_t)flitp;
4028218792Snp	txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst);
4029218792Snp	txpkts->flitp = (void *)dst;
4030218792Snp
4031218792Snp	KASSERT(((uintptr_t)dst & 0xf) == 0,
4032218792Snp	    ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst));
4033218792Snp}
4034218792Snp
4035218792Snp/*
4036218792Snp * If the SGL ends on an address that is not 16 byte aligned, this function will
4037218792Snp * add a 0 filled flit at the end.  It returns 1 in that case.
4038218792Snp */
4039218792Snpstatic int
4040218792Snpwrite_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
4041218792Snp{
4042218792Snp	__be64 *flitp, *end;
4043218792Snp	struct ulptx_sgl *usgl;
4044218792Snp	bus_dma_segment_t *seg;
4045218792Snp	int i, padded;
4046218792Snp
4047218792Snp	KASSERT(sgl->nsegs > 0 && sgl->nflits > 0,
4048218792Snp	    ("%s: bad SGL - nsegs=%d, nflits=%d",
4049218792Snp	    __func__, sgl->nsegs, sgl->nflits));
4050218792Snp
4051218792Snp	KASSERT(((uintptr_t)(*to) & 0xf) == 0,
4052218792Snp	    ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
4053218792Snp
4054218792Snp	flitp = (__be64 *)(*to);
4055218792Snp	end = flitp + sgl->nflits;
4056218792Snp	seg = &sgl->seg[0];
4057218792Snp	usgl = (void *)flitp;
4058218792Snp
4059218792Snp	/*
4060218792Snp	 * We start at a 16 byte boundary somewhere inside the tx descriptor
4061218792Snp	 * ring, so we're at least 16 bytes away from the status page.  There is
4062218792Snp	 * no chance of a wrap around in the middle of usgl (which is 16 bytes).
4063218792Snp	 */
4064218792Snp
4065218792Snp	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
4066218792Snp	    V_ULPTX_NSGE(sgl->nsegs));
4067218792Snp	usgl->len0 = htobe32(seg->ds_len);
4068218792Snp	usgl->addr0 = htobe64(seg->ds_addr);
4069218792Snp	seg++;
4070218792Snp
4071218792Snp	if ((uintptr_t)end <= (uintptr_t)eq->spg) {
4072218792Snp
4073218792Snp		/* Won't wrap around at all */
4074218792Snp
4075218792Snp		for (i = 0; i < sgl->nsegs - 1; i++, seg++) {
4076218792Snp			usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len);
4077218792Snp			usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr);
4078218792Snp		}
4079218792Snp		if (i & 1)
4080218792Snp			usgl->sge[i / 2].len[1] = htobe32(0);
4081218792Snp	} else {
4082218792Snp
4083218792Snp		/* Will wrap somewhere in the rest of the SGL */
4084218792Snp
4085218792Snp		/* 2 flits already written, write the rest flit by flit */
4086218792Snp		flitp = (void *)(usgl + 1);
4087218792Snp		for (i = 0; i < sgl->nflits - 2; i++) {
4088218792Snp			if ((uintptr_t)flitp == (uintptr_t)eq->spg)
4089218792Snp				flitp = (void *)eq->desc;
4090218792Snp			*flitp++ = get_flit(seg, sgl->nsegs - 1, i);
4091218792Snp		}
4092218792Snp		end = flitp;
4093218792Snp	}
4094218792Snp
4095218792Snp	if ((uintptr_t)end & 0xf) {
4096218792Snp		*(uint64_t *)end = 0;
4097218792Snp		end++;
4098218792Snp		padded = 1;
4099218792Snp	} else
4100218792Snp		padded = 0;
4101218792Snp
4102218792Snp	if ((uintptr_t)end == (uintptr_t)eq->spg)
4103218792Snp		*to = (void *)eq->desc;
4104218792Snp	else
4105218792Snp		*to = (void *)end;
4106218792Snp
4107218792Snp	return (padded);
4108218792Snp}
4109218792Snp
4110218792Snpstatic inline void
4111218792Snpcopy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
4112218792Snp{
4113237263Snp	if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) {
4114218792Snp		bcopy(from, *to, len);
4115218792Snp		(*to) += len;
4116218792Snp	} else {
4117218792Snp		int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
4118218792Snp
4119218792Snp		bcopy(from, *to, portion);
4120218792Snp		from += portion;
4121218792Snp		portion = len - portion;	/* remaining */
4122218792Snp		bcopy(from, (void *)eq->desc, portion);
4123218792Snp		(*to) = (caddr_t)eq->desc + portion;
4124218792Snp	}
4125218792Snp}
4126218792Snp
4127218792Snpstatic inline void
4128220873Snpring_eq_db(struct adapter *sc, struct sge_eq *eq)
4129218792Snp{
4130248925Snp	u_int db, pending;
4131248925Snp
4132248925Snp	db = eq->doorbells;
4133248925Snp	pending = eq->pending;
4134248925Snp	if (pending > 1)
4135249392Snp		clrbit(&db, DOORBELL_WCWR);
4136248925Snp	eq->pending = 0;
4137218792Snp	wmb();
4138248925Snp
4139248925Snp	switch (ffs(db) - 1) {
4140248925Snp	case DOORBELL_UDB:
4141248925Snp		*eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
4142248925Snp		return;
4143248925Snp
4144249392Snp	case DOORBELL_WCWR: {
4145248925Snp		volatile uint64_t *dst, *src;
4146248925Snp		int i;
4147248925Snp
4148248925Snp		/*
4149248925Snp		 * Queues whose 128B doorbell segment fits in the page do not
4150248925Snp		 * use relative qid (udb_qid is always 0).  Only queues with
4151249392Snp		 * doorbell segments can do WCWR.
4152248925Snp		 */
4153248925Snp		KASSERT(eq->udb_qid == 0 && pending == 1,
4154248925Snp		    ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p",
4155248925Snp		    __func__, eq->doorbells, pending, eq->pidx, eq));
4156248925Snp
4157248925Snp		dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET -
4158248925Snp		    UDBS_DB_OFFSET);
4159248925Snp		i = eq->pidx ? eq->pidx - 1 : eq->cap - 1;
4160248925Snp		src = (void *)&eq->desc[i];
4161248925Snp		while (src != (void *)&eq->desc[i + 1])
4162248925Snp			*dst++ = *src++;
4163248925Snp		wmb();
4164248925Snp		return;
4165248925Snp	}
4166248925Snp
4167248925Snp	case DOORBELL_UDBWC:
4168248925Snp		*eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
4169248925Snp		wmb();
4170248925Snp		return;
4171248925Snp
4172248925Snp	case DOORBELL_KDB:
4173248925Snp		t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
4174248925Snp		    V_QID(eq->cntxt_id) | V_PIDX(pending));
4175248925Snp		return;
4176248925Snp	}
4177218792Snp}
4178218792Snp
4179219292Snpstatic inline int
4180219292Snpreclaimable(struct sge_eq *eq)
4181218792Snp{
4182219292Snp	unsigned int cidx;
4183218792Snp
4184218792Snp	cidx = eq->spg->cidx;	/* stable snapshot */
4185228561Snp	cidx = be16toh(cidx);
4186218792Snp
4187218792Snp	if (cidx >= eq->cidx)
4188219292Snp		return (cidx - eq->cidx);
4189218792Snp	else
4190219292Snp		return (cidx + eq->cap - eq->cidx);
4191219292Snp}
4192218792Snp
4193219292Snp/*
4194219292Snp * There are "can_reclaim" tx descriptors ready to be reclaimed.  Reclaim as
4195219292Snp * many as possible but stop when there are around "n" mbufs to free.
4196219292Snp *
4197219292Snp * The actual number reclaimed is provided as the return value.
4198219292Snp */
4199219292Snpstatic int
4200220873Snpreclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
4201219292Snp{
4202219292Snp	struct tx_sdesc *txsd;
4203228561Snp	struct tx_maps *txmaps;
4204219292Snp	struct tx_map *txm;
4205219292Snp	unsigned int reclaimed, maps;
4206220873Snp	struct sge_eq *eq = &txq->eq;
4207218792Snp
4208228561Snp	TXQ_LOCK_ASSERT_OWNED(txq);
4209218792Snp
4210219292Snp	if (can_reclaim == 0)
4211219292Snp		can_reclaim = reclaimable(eq);
4212219292Snp
4213218792Snp	maps = reclaimed = 0;
4214219292Snp	while (can_reclaim && maps < n) {
4215218792Snp		int ndesc;
4216218792Snp
4217220873Snp		txsd = &txq->sdesc[eq->cidx];
4218218792Snp		ndesc = txsd->desc_used;
4219218792Snp
4220218792Snp		/* Firmware doesn't return "partial" credits. */
4221218792Snp		KASSERT(can_reclaim >= ndesc,
4222218792Snp		    ("%s: unexpected number of credits: %d, %d",
4223218792Snp		    __func__, can_reclaim, ndesc));
4224218792Snp
4225220873Snp		maps += txsd->credits;
4226219292Snp
4227218792Snp		reclaimed += ndesc;
4228219292Snp		can_reclaim -= ndesc;
4229218792Snp
4230218792Snp		eq->cidx += ndesc;
4231219292Snp		if (__predict_false(eq->cidx >= eq->cap))
4232218792Snp			eq->cidx -= eq->cap;
4233219292Snp	}
4234218792Snp
4235228561Snp	txmaps = &txq->txmaps;
4236228561Snp	txm = &txmaps->maps[txmaps->map_cidx];
4237219292Snp	if (maps)
4238219292Snp		prefetch(txm->m);
4239218792Snp
4240218792Snp	eq->avail += reclaimed;
4241218792Snp	KASSERT(eq->avail < eq->cap,	/* avail tops out at (cap - 1) */
4242218792Snp	    ("%s: too many descriptors available", __func__));
4243218792Snp
4244228561Snp	txmaps->map_avail += maps;
4245228561Snp	KASSERT(txmaps->map_avail <= txmaps->map_total,
4246218792Snp	    ("%s: too many maps available", __func__));
4247218792Snp
4248218792Snp	while (maps--) {
4249219292Snp		struct tx_map *next;
4250218792Snp
4251219292Snp		next = txm + 1;
4252228561Snp		if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total))
4253228561Snp			next = txmaps->maps;
4254219292Snp		prefetch(next->m);
4255219292Snp
4256220873Snp		bus_dmamap_unload(txq->tx_tag, txm->map);
4257218792Snp		m_freem(txm->m);
4258218792Snp		txm->m = NULL;
4259218792Snp
4260219292Snp		txm = next;
4261228561Snp		if (__predict_false(++txmaps->map_cidx == txmaps->map_total))
4262228561Snp			txmaps->map_cidx = 0;
4263218792Snp	}
4264218792Snp
4265218792Snp	return (reclaimed);
4266218792Snp}
4267218792Snp
4268218792Snpstatic void
4269218792Snpwrite_eqflush_wr(struct sge_eq *eq)
4270218792Snp{
4271218792Snp	struct fw_eq_flush_wr *wr;
4272218792Snp
4273218792Snp	EQ_LOCK_ASSERT_OWNED(eq);
4274218792Snp	KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
4275228561Snp	KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__));
4276218792Snp
4277218792Snp	wr = (void *)&eq->desc[eq->pidx];
4278218792Snp	bzero(wr, sizeof(*wr));
4279218792Snp	wr->opcode = FW_EQ_FLUSH_WR;
4280218792Snp	wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
4281218792Snp	    F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
4282218792Snp
4283228561Snp	eq->flags |= (EQ_CRFLUSHED | EQ_STALLED);
4284218792Snp	eq->pending++;
4285218792Snp	eq->avail--;
4286218792Snp	if (++eq->pidx == eq->cap)
4287265425Snp		eq->pidx = 0;
4288218792Snp}
4289218792Snp
4290218792Snpstatic __be64
4291218792Snpget_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
4292218792Snp{
4293218792Snp	int i = (idx / 3) * 2;
4294218792Snp
4295218792Snp	switch (idx % 3) {
4296218792Snp	case 0: {
4297218792Snp		__be64 rc;
4298218792Snp
4299218792Snp		rc = htobe32(sgl[i].ds_len);
4300218792Snp		if (i + 1 < nsegs)
4301218792Snp			rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32;
4302218792Snp
4303218792Snp		return (rc);
4304218792Snp	}
4305218792Snp	case 1:
4306218792Snp		return htobe64(sgl[i].ds_addr);
4307218792Snp	case 2:
4308218792Snp		return htobe64(sgl[i + 1].ds_addr);
4309218792Snp	}
4310218792Snp
4311218792Snp	return (0);
4312218792Snp}
4313218792Snp
4314218792Snpstatic void
4315265425Snpfind_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp)
4316218792Snp{
4317265425Snp	int8_t zidx, hwidx, idx;
4318265425Snp	uint16_t region1, region3;
4319265425Snp	int spare, spare_needed, n;
4320265425Snp	struct sw_zone_info *swz;
4321265425Snp	struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0];
4322218792Snp
4323265425Snp	/*
4324265425Snp	 * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize
4325265425Snp	 * large enough for the max payload and cluster metadata.  Otherwise
4326265425Snp	 * settle for the largest bufsize that leaves enough room in the cluster
4327265425Snp	 * for metadata.
4328265425Snp	 *
4329265425Snp	 * Without buffer packing: Look for the smallest zone which has a
4330265425Snp	 * bufsize large enough for the max payload.  Settle for the largest
4331265425Snp	 * bufsize available if there's nothing big enough for max payload.
4332265425Snp	 */
4333265425Snp	spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0;
4334265425Snp	swz = &sc->sge.sw_zone_info[0];
4335265425Snp	hwidx = -1;
4336265425Snp	for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) {
4337265425Snp		if (swz->size > largest_rx_cluster) {
4338265425Snp			if (__predict_true(hwidx != -1))
4339265425Snp				break;
4340218792Snp
4341265425Snp			/*
4342265425Snp			 * This is a misconfiguration.  largest_rx_cluster is
4343265425Snp			 * preventing us from finding a refill source.  See
4344265425Snp			 * dev.t5nex.<n>.buffer_sizes to figure out why.
4345265425Snp			 */
4346265425Snp			device_printf(sc->dev, "largest_rx_cluster=%u leaves no"
4347265425Snp			    " refill source for fl %p (dma %u).  Ignored.\n",
4348265425Snp			    largest_rx_cluster, fl, maxp);
4349265425Snp		}
4350265425Snp		for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) {
4351265425Snp			hwb = &hwb_list[idx];
4352265425Snp			spare = swz->size - hwb->size;
4353265425Snp			if (spare < spare_needed)
4354265425Snp				continue;
4355265425Snp
4356265425Snp			hwidx = idx;		/* best option so far */
4357265425Snp			if (hwb->size >= maxp) {
4358265425Snp
4359265425Snp				if ((fl->flags & FL_BUF_PACKING) == 0)
4360265425Snp					goto done; /* stop looking (not packing) */
4361265425Snp
4362265425Snp				if (swz->size >= safest_rx_cluster)
4363265425Snp					goto done; /* stop looking (packing) */
4364265425Snp			}
4365265425Snp			break;		/* keep looking, next zone */
4366265425Snp		}
4367255050Snp	}
4368265425Snpdone:
4369265425Snp	/* A usable hwidx has been located. */
4370265425Snp	MPASS(hwidx != -1);
4371265425Snp	hwb = &hwb_list[hwidx];
4372265425Snp	zidx = hwb->zidx;
4373265425Snp	swz = &sc->sge.sw_zone_info[zidx];
4374265425Snp	region1 = 0;
4375265425Snp	region3 = swz->size - hwb->size;
4376255050Snp
4377265425Snp	/*
4378265425Snp	 * Stay within this zone and see if there is a better match when mbuf
4379265425Snp	 * inlining is allowed.  Remember that the hwidx's are sorted in
4380265425Snp	 * decreasing order of size (so in increasing order of spare area).
4381265425Snp	 */
4382265425Snp	for (idx = hwidx; idx != -1; idx = hwb->next) {
4383265425Snp		hwb = &hwb_list[idx];
4384265425Snp		spare = swz->size - hwb->size;
4385255050Snp
4386265425Snp		if (allow_mbufs_in_cluster == 0 || hwb->size < maxp)
4387265425Snp			break;
4388281212Snp
4389281212Snp		/*
4390281212Snp		 * Do not inline mbufs if doing so would violate the pad/pack
4391281212Snp		 * boundary alignment requirement.
4392281212Snp		 */
4393281212Snp		if (fl_pad && (MSIZE % sc->sge.pad_boundary) != 0)
4394281212Snp			continue;
4395281212Snp		if (fl->flags & FL_BUF_PACKING &&
4396281212Snp		    (MSIZE % sc->sge.pack_boundary) != 0)
4397281212Snp			continue;
4398281212Snp
4399265425Snp		if (spare < CL_METADATA_SIZE + MSIZE)
4400265425Snp			continue;
4401265425Snp		n = (spare - CL_METADATA_SIZE) / MSIZE;
4402265425Snp		if (n > howmany(hwb->size, maxp))
4403265425Snp			break;
4404255050Snp
4405265425Snp		hwidx = idx;
4406265425Snp		if (fl->flags & FL_BUF_PACKING) {
4407265425Snp			region1 = n * MSIZE;
4408265425Snp			region3 = spare - region1;
4409265425Snp		} else {
4410265425Snp			region1 = MSIZE;
4411265425Snp			region3 = spare - region1;
4412265425Snp			break;
4413255050Snp		}
4414255050Snp	}
4415255050Snp
4416265425Snp	KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES,
4417265425Snp	    ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp));
4418265425Snp	KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES,
4419265425Snp	    ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp));
4420265425Snp	KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 ==
4421265425Snp	    sc->sge.sw_zone_info[zidx].size,
4422265425Snp	    ("%s: bad buffer layout for fl %p, maxp %d. "
4423265425Snp		"cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
4424265425Snp		sc->sge.sw_zone_info[zidx].size, region1,
4425265425Snp		sc->sge.hw_buf_info[hwidx].size, region3));
4426265425Snp	if (fl->flags & FL_BUF_PACKING || region1 > 0) {
4427265425Snp		KASSERT(region3 >= CL_METADATA_SIZE,
4428265425Snp		    ("%s: no room for metadata.  fl %p, maxp %d; "
4429265425Snp		    "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
4430265425Snp		    sc->sge.sw_zone_info[zidx].size, region1,
4431265425Snp		    sc->sge.hw_buf_info[hwidx].size, region3));
4432265425Snp		KASSERT(region1 % MSIZE == 0,
4433265425Snp		    ("%s: bad mbuf region for fl %p, maxp %d. "
4434265425Snp		    "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
4435265425Snp		    sc->sge.sw_zone_info[zidx].size, region1,
4436265425Snp		    sc->sge.hw_buf_info[hwidx].size, region3));
4437265425Snp	}
4438265425Snp
4439265425Snp	fl->cll_def.zidx = zidx;
4440265425Snp	fl->cll_def.hwidx = hwidx;
4441265425Snp	fl->cll_def.region1 = region1;
4442265425Snp	fl->cll_def.region3 = region3;
4443265425Snp}
4444265425Snp
4445265425Snpstatic void
4446265425Snpfind_safe_refill_source(struct adapter *sc, struct sge_fl *fl)
4447265425Snp{
4448265425Snp	struct sge *s = &sc->sge;
4449265425Snp	struct hw_buf_info *hwb;
4450265425Snp	struct sw_zone_info *swz;
4451265425Snp	int spare;
4452265425Snp	int8_t hwidx;
4453265425Snp
4454265425Snp	if (fl->flags & FL_BUF_PACKING)
4455265425Snp		hwidx = s->safe_hwidx2;	/* with room for metadata */
4456265425Snp	else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) {
4457265425Snp		hwidx = s->safe_hwidx2;
4458265425Snp		hwb = &s->hw_buf_info[hwidx];
4459265425Snp		swz = &s->sw_zone_info[hwb->zidx];
4460265425Snp		spare = swz->size - hwb->size;
4461265425Snp
4462265425Snp		/* no good if there isn't room for an mbuf as well */
4463265425Snp		if (spare < CL_METADATA_SIZE + MSIZE)
4464265425Snp			hwidx = s->safe_hwidx1;
4465265425Snp	} else
4466265425Snp		hwidx = s->safe_hwidx1;
4467265425Snp
4468265425Snp	if (hwidx == -1) {
4469265425Snp		/* No fallback source */
4470265425Snp		fl->cll_alt.hwidx = -1;
4471265425Snp		fl->cll_alt.zidx = -1;
4472265425Snp
4473265425Snp		return;
4474265425Snp	}
4475265425Snp
4476265425Snp	hwb = &s->hw_buf_info[hwidx];
4477265425Snp	swz = &s->sw_zone_info[hwb->zidx];
4478265425Snp	spare = swz->size - hwb->size;
4479265425Snp	fl->cll_alt.hwidx = hwidx;
4480265425Snp	fl->cll_alt.zidx = hwb->zidx;
4481281212Snp	if (allow_mbufs_in_cluster &&
4482281212Snp	    (fl_pad == 0 || (MSIZE % sc->sge.pad_boundary) == 0))
4483265425Snp		fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE;
4484255050Snp	else
4485265425Snp		fl->cll_alt.region1 = 0;
4486265425Snp	fl->cll_alt.region3 = spare - fl->cll_alt.region1;
4487218792Snp}
4488219286Snp
4489222510Snpstatic void
4490228561Snpadd_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
4491222510Snp{
4492228561Snp	mtx_lock(&sc->sfl_lock);
4493228561Snp	FL_LOCK(fl);
4494228561Snp	if ((fl->flags & FL_DOOMED) == 0) {
4495228561Snp		fl->flags |= FL_STARVING;
4496228561Snp		TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
4497228561Snp		callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc);
4498222510Snp	}
4499228561Snp	FL_UNLOCK(fl);
4500228561Snp	mtx_unlock(&sc->sfl_lock);
4501222510Snp}
4502222510Snp
4503220873Snpstatic int
4504228561Snphandle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
4505228561Snp    struct mbuf *m)
4506220873Snp{
4507228561Snp	const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1);
4508228561Snp	unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
4509228561Snp	struct adapter *sc = iq->adapter;
4510228561Snp	struct sge *s = &sc->sge;
4511228561Snp	struct sge_eq *eq;
4512220873Snp
4513228561Snp	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
4514228561Snp	    rss->opcode));
4515220873Snp
4516228561Snp	eq = s->eqmap[qid - s->eq_start];
4517220873Snp	EQ_LOCK(eq);
4518228561Snp	KASSERT(eq->flags & EQ_CRFLUSHED,
4519228561Snp	    ("%s: unsolicited egress update", __func__));
4520228561Snp	eq->flags &= ~EQ_CRFLUSHED;
4521228561Snp	eq->egr_update++;
4522220873Snp
4523228561Snp	if (__predict_false(eq->flags & EQ_DOOMED))
4524228561Snp		wakeup_one(eq);
4525228561Snp	else if (eq->flags & EQ_STALLED && can_resume_tx(eq))
4526228561Snp		taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
4527228561Snp	EQ_UNLOCK(eq);
4528220873Snp
4529228561Snp	return (0);
4530228561Snp}
4531220873Snp
4532247291Snp/* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */
4533247291SnpCTASSERT(offsetof(struct cpl_fw4_msg, data) == \
4534247291Snp    offsetof(struct cpl_fw6_msg, data));
4535247291Snp
4536228561Snpstatic int
4537239336Snphandle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
4538228561Snp{
4539239336Snp	struct adapter *sc = iq->adapter;
4540228561Snp	const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
4541220873Snp
4542228561Snp	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
4543228561Snp	    rss->opcode));
4544220873Snp
4545247291Snp	if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) {
4546247291Snp		const struct rss_header *rss2;
4547247291Snp
4548247291Snp		rss2 = (const struct rss_header *)&cpl->data[0];
4549247291Snp		return (sc->cpl_handler[rss2->opcode](iq, rss2, m));
4550247291Snp	}
4551247291Snp
4552239336Snp	return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0]));
4553220873Snp}
4554221911Snp
4555221911Snpstatic int
4556222510Snpsysctl_uint16(SYSCTL_HANDLER_ARGS)
4557221911Snp{
4558221911Snp	uint16_t *id = arg1;
4559221911Snp	int i = *id;
4560221911Snp
4561221911Snp	return sysctl_handle_int(oidp, &i, 0, req);
4562221911Snp}
4563265425Snp
4564265425Snpstatic int
4565265425Snpsysctl_bufsizes(SYSCTL_HANDLER_ARGS)
4566265425Snp{
4567265425Snp	struct sge *s = arg1;
4568265425Snp	struct hw_buf_info *hwb = &s->hw_buf_info[0];
4569265425Snp	struct sw_zone_info *swz = &s->sw_zone_info[0];
4570265425Snp	int i, rc;
4571265425Snp	struct sbuf sb;
4572265425Snp	char c;
4573265425Snp
4574265425Snp	sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
4575265425Snp	for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) {
4576265425Snp		if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster)
4577265425Snp			c = '*';
4578265425Snp		else
4579265425Snp			c = '\0';
4580265425Snp
4581265425Snp		sbuf_printf(&sb, "%u%c ", hwb->size, c);
4582265425Snp	}
4583265425Snp	sbuf_trim(&sb);
4584265425Snp	sbuf_finish(&sb);
4585265425Snp	rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
4586265425Snp	sbuf_delete(&sb);
4587265425Snp	return (rc);
4588265425Snp}
4589