t4_sge.c revision 266965
1218792Snp/*-
2218792Snp * Copyright (c) 2011 Chelsio Communications, Inc.
3218792Snp * All rights reserved.
4218792Snp * Written by: Navdeep Parhar <np@FreeBSD.org>
5218792Snp *
6218792Snp * Redistribution and use in source and binary forms, with or without
7218792Snp * modification, are permitted provided that the following conditions
8218792Snp * are met:
9218792Snp * 1. Redistributions of source code must retain the above copyright
10218792Snp *    notice, this list of conditions and the following disclaimer.
11218792Snp * 2. Redistributions in binary form must reproduce the above copyright
12218792Snp *    notice, this list of conditions and the following disclaimer in the
13218792Snp *    documentation and/or other materials provided with the distribution.
14218792Snp *
15218792Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16218792Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17218792Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18218792Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19218792Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20218792Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21218792Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22218792Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23218792Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24218792Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25218792Snp * SUCH DAMAGE.
26218792Snp */
27218792Snp
28218792Snp#include <sys/cdefs.h>
29218792Snp__FBSDID("$FreeBSD: stable/10/sys/dev/cxgbe/t4_sge.c 266965 2014-06-02 05:01:08Z np $");
30218792Snp
31218792Snp#include "opt_inet.h"
32237819Snp#include "opt_inet6.h"
33218792Snp
34218792Snp#include <sys/types.h>
35218792Snp#include <sys/mbuf.h>
36218792Snp#include <sys/socket.h>
37218792Snp#include <sys/kernel.h>
38237263Snp#include <sys/kdb.h>
39219286Snp#include <sys/malloc.h>
40219286Snp#include <sys/queue.h>
41265425Snp#include <sys/sbuf.h>
42219286Snp#include <sys/taskqueue.h>
43255015Snp#include <sys/time.h>
44218792Snp#include <sys/sysctl.h>
45228561Snp#include <sys/smp.h>
46218792Snp#include <net/bpf.h>
47218792Snp#include <net/ethernet.h>
48218792Snp#include <net/if.h>
49218792Snp#include <net/if_vlan_var.h>
50218792Snp#include <netinet/in.h>
51218792Snp#include <netinet/ip.h>
52237819Snp#include <netinet/ip6.h>
53218792Snp#include <netinet/tcp.h>
54256131Sdim#include <machine/md_var.h>
55265425Snp#include <vm/vm.h>
56265425Snp#include <vm/pmap.h>
57218792Snp
58218792Snp#include "common/common.h"
59218792Snp#include "common/t4_regs.h"
60218792Snp#include "common/t4_regs_values.h"
61218792Snp#include "common/t4_msg.h"
62218792Snp
63248925Snp#ifdef T4_PKT_TIMESTAMP
64248925Snp#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
65248925Snp#else
66248925Snp#define RX_COPY_THRESHOLD MINCLSIZE
67248925Snp#endif
68248925Snp
69239258Snp/*
70239258Snp * Ethernet frames are DMA'd at this byte offset into the freelist buffer.
71239258Snp * 0-7 are valid values.
72239258Snp */
73239258Snpstatic int fl_pktshift = 2;
74239258SnpTUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift);
75218792Snp
76239258Snp/*
77239258Snp * Pad ethernet payload up to this boundary.
78239258Snp * -1: driver should figure out a good value.
79255050Snp *  0: disable padding.
80255050Snp *  Any power of 2 from 32 to 4096 (both inclusive) is also a valid value.
81239258Snp */
82239258Snpstatic int fl_pad = -1;
83239258SnpTUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad);
84218792Snp
85239258Snp/*
86239258Snp * Status page length.
87239258Snp * -1: driver should figure out a good value.
88239258Snp *  64 or 128 are the only other valid values.
89239258Snp */
90239258Snpstatic int spg_len = -1;
91239258SnpTUNABLE_INT("hw.cxgbe.spg_len", &spg_len);
92239258Snp
93239258Snp/*
94239258Snp * Congestion drops.
95239258Snp * -1: no congestion feedback (not recommended).
96239258Snp *  0: backpressure the channel instead of dropping packets right away.
97239258Snp *  1: no backpressure, drop packets for the congested queue immediately.
98239258Snp */
99239258Snpstatic int cong_drop = 0;
100239258SnpTUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop);
101239258Snp
102255050Snp/*
103255050Snp * Deliver multiple frames in the same free list buffer if they fit.
104255050Snp * -1: let the driver decide whether to enable buffer packing or not.
105255050Snp *  0: disable buffer packing.
106255050Snp *  1: enable buffer packing.
107255050Snp */
108255050Snpstatic int buffer_packing = -1;
109255050SnpTUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing);
110255050Snp
111255050Snp/*
112255050Snp * Start next frame in a packed buffer at this boundary.
113255050Snp * -1: driver should figure out a good value.
114255050Snp * T4:
115255050Snp * ---
116255050Snp * if fl_pad != 0
117255050Snp * 	value specified here will be overridden by fl_pad.
118255050Snp * else
119255050Snp * 	power of 2 from 32 to 4096 (both inclusive) is a valid value here.
120255050Snp * T5:
121255050Snp * ---
122255050Snp * 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value.
123255050Snp */
124255050Snpstatic int fl_pack = -1;
125255050Snpstatic int t4_fl_pack;
126255050Snpstatic int t5_fl_pack;
127255050SnpTUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack);
128255050Snp
129265425Snp/*
130265425Snp * Allow the driver to create mbuf(s) in a cluster allocated for rx.
131265425Snp * 0: never; always allocate mbufs from the zone_mbuf UMA zone.
132265425Snp * 1: ok to create mbuf(s) within a cluster if there is room.
133265425Snp */
134265425Snpstatic int allow_mbufs_in_cluster = 1;
135265425SnpTUNABLE_INT("hw.cxgbe.allow_mbufs_in_cluster", &allow_mbufs_in_cluster);
136265425Snp
137265425Snp/*
138265425Snp * Largest rx cluster size that the driver is allowed to allocate.
139265425Snp */
140265425Snpstatic int largest_rx_cluster = MJUM16BYTES;
141265425SnpTUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster);
142265425Snp
143265425Snp/*
144265425Snp * Size of cluster allocation that's most likely to succeed.  The driver will
145265425Snp * fall back to this size if it fails to allocate clusters larger than this.
146265425Snp */
147265425Snpstatic int safest_rx_cluster = PAGE_SIZE;
148265425SnpTUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster);
149265425Snp
150218792Snp/* Used to track coalesced tx work request */
151218792Snpstruct txpkts {
152218792Snp	uint64_t *flitp;	/* ptr to flit where next pkt should start */
153218792Snp	uint8_t npkt;		/* # of packets in this work request */
154218792Snp	uint8_t nflits;		/* # of flits used by this work request */
155218792Snp	uint16_t plen;		/* total payload (sum of all packets) */
156218792Snp};
157218792Snp
158218792Snp/* A packet's SGL.  This + m_pkthdr has all info needed for tx */
159218792Snpstruct sgl {
160218792Snp	int nsegs;		/* # of segments in the SGL, 0 means imm. tx */
161218792Snp	int nflits;		/* # of flits needed for the SGL */
162218792Snp	bus_dma_segment_t seg[TX_SGL_SEGS];
163218792Snp};
164218792Snp
165228561Snpstatic int service_iq(struct sge_iq *, int);
166265425Snpstatic struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t,
167228561Snp    int *);
168228561Snpstatic int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
169218792Snpstatic inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
170241397Snp    int);
171255050Snpstatic inline void init_fl(struct adapter *, struct sge_fl *, int, int, int,
172255050Snp    char *);
173228561Snpstatic inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t,
174228561Snp    char *);
175218792Snpstatic int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
176218792Snp    bus_addr_t *, void **);
177218792Snpstatic int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
178218792Snp    void *);
179218792Snpstatic int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
180222085Snp    int, int);
181218792Snpstatic int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
182265425Snpstatic void add_fl_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
183265425Snp    struct sge_fl *);
184228561Snpstatic int alloc_fwq(struct adapter *);
185228561Snpstatic int free_fwq(struct adapter *);
186228561Snpstatic int alloc_mgmtq(struct adapter *);
187228561Snpstatic int free_mgmtq(struct adapter *);
188228561Snpstatic int alloc_rxq(struct port_info *, struct sge_rxq *, int, int,
189228561Snp    struct sysctl_oid *);
190218792Snpstatic int free_rxq(struct port_info *, struct sge_rxq *);
191237263Snp#ifdef TCP_OFFLOAD
192228561Snpstatic int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int,
193228561Snp    struct sysctl_oid *);
194228561Snpstatic int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *);
195228561Snp#endif
196228561Snpstatic int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
197228561Snpstatic int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
198237263Snp#ifdef TCP_OFFLOAD
199228561Snpstatic int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
200228561Snp#endif
201228561Snpstatic int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *);
202228561Snpstatic int free_eq(struct adapter *, struct sge_eq *);
203228561Snpstatic int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *,
204228561Snp    struct sysctl_oid *);
205228561Snpstatic int free_wrq(struct adapter *, struct sge_wrq *);
206228561Snpstatic int alloc_txq(struct port_info *, struct sge_txq *, int,
207228561Snp    struct sysctl_oid *);
208218792Snpstatic int free_txq(struct port_info *, struct sge_txq *);
209218792Snpstatic void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
210218792Snpstatic inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **);
211218792Snpstatic inline void iq_next(struct sge_iq *);
212218792Snpstatic inline void ring_fl_db(struct adapter *, struct sge_fl *);
213228561Snpstatic int refill_fl(struct adapter *, struct sge_fl *, int);
214228561Snpstatic void refill_sfl(void *);
215218792Snpstatic int alloc_fl_sdesc(struct sge_fl *);
216255050Snpstatic void free_fl_sdesc(struct adapter *, struct sge_fl *);
217265425Snpstatic void find_best_refill_source(struct adapter *, struct sge_fl *, int);
218265425Snpstatic void find_safe_refill_source(struct adapter *, struct sge_fl *);
219228561Snpstatic void add_fl_to_sfl(struct adapter *, struct sge_fl *);
220218792Snp
221218792Snpstatic int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
222218792Snpstatic int free_pkt_sgl(struct sge_txq *, struct sgl *);
223218792Snpstatic int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *,
224218792Snp    struct sgl *);
225218792Snpstatic int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *,
226218792Snp    struct mbuf *, struct sgl *);
227218792Snpstatic void write_txpkts_wr(struct sge_txq *, struct txpkts *);
228218792Snpstatic inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *,
229218792Snp    struct txpkts *, struct mbuf *, struct sgl *);
230218792Snpstatic int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *);
231218792Snpstatic inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
232220873Snpstatic inline void ring_eq_db(struct adapter *, struct sge_eq *);
233219292Snpstatic inline int reclaimable(struct sge_eq *);
234220873Snpstatic int reclaim_tx_descs(struct sge_txq *, int, int);
235218792Snpstatic void write_eqflush_wr(struct sge_eq *);
236218792Snpstatic __be64 get_flit(bus_dma_segment_t *, int, int);
237228561Snpstatic int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
238228561Snp    struct mbuf *);
239239336Snpstatic int handle_fw_msg(struct sge_iq *, const struct rss_header *,
240228561Snp    struct mbuf *);
241218792Snp
242222510Snpstatic int sysctl_uint16(SYSCTL_HANDLER_ARGS);
243265425Snpstatic int sysctl_bufsizes(SYSCTL_HANDLER_ARGS);
244220873Snp
245219392Snp/*
246255050Snp * Called on MOD_LOAD.  Validates and calculates the SGE tunables.
247219392Snp */
248219392Snpvoid
249219392Snpt4_sge_modload(void)
250219392Snp{
251255050Snp	int pad;
252255050Snp
253255050Snp	/* set pad to a reasonable powerof2 between 16 and 4096 (inclusive) */
254255050Snp#if defined(__i386__) || defined(__amd64__)
255255050Snp	pad = max(cpu_clflush_line_size, 16);
256255050Snp#else
257255050Snp	pad = max(CACHE_LINE_SIZE, 16);
258219392Snp#endif
259255050Snp	pad = min(pad, 4096);
260219392Snp
261239258Snp	if (fl_pktshift < 0 || fl_pktshift > 7) {
262239258Snp		printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
263239258Snp		    " using 2 instead.\n", fl_pktshift);
264239258Snp		fl_pktshift = 2;
265239258Snp	}
266239258Snp
267255050Snp	if (fl_pad != 0 &&
268255050Snp	    (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad))) {
269239258Snp
270239258Snp		if (fl_pad != -1) {
271239258Snp			printf("Invalid hw.cxgbe.fl_pad value (%d),"
272255050Snp			    " using %d instead.\n", fl_pad, max(pad, 32));
273239258Snp		}
274255050Snp		fl_pad = max(pad, 32);
275239258Snp	}
276239258Snp
277255050Snp	/*
278255050Snp	 * T4 has the same pad and pack boundary.  If a pad boundary is set,
279255050Snp	 * pack boundary must be set to the same value.  Otherwise take the
280255050Snp	 * specified value or auto-calculate something reasonable.
281255050Snp	 */
282255050Snp	if (fl_pad)
283255050Snp		t4_fl_pack = fl_pad;
284255050Snp	else if (fl_pack < 32 || fl_pack > 4096 || !powerof2(fl_pack))
285255050Snp		t4_fl_pack = max(pad, 32);
286255050Snp	else
287255050Snp		t4_fl_pack = fl_pack;
288255050Snp
289255050Snp	/* T5's pack boundary is independent of the pad boundary. */
290255050Snp	if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 ||
291255050Snp	    !powerof2(fl_pack))
292265425Snp	       t5_fl_pack = max(pad, CACHE_LINE_SIZE);
293255050Snp	else
294255050Snp	       t5_fl_pack = fl_pack;
295255050Snp
296239258Snp	if (spg_len != 64 && spg_len != 128) {
297239258Snp		int len;
298239258Snp
299239258Snp#if defined(__i386__) || defined(__amd64__)
300239258Snp		len = cpu_clflush_line_size > 64 ? 128 : 64;
301239258Snp#else
302239258Snp		len = 64;
303239258Snp#endif
304239258Snp		if (spg_len != -1) {
305239258Snp			printf("Invalid hw.cxgbe.spg_len value (%d),"
306239258Snp			    " using %d instead.\n", spg_len, len);
307239258Snp		}
308239258Snp		spg_len = len;
309239258Snp	}
310239258Snp
311239258Snp	if (cong_drop < -1 || cong_drop > 1) {
312239258Snp		printf("Invalid hw.cxgbe.cong_drop value (%d),"
313239258Snp		    " using 0 instead.\n", cong_drop);
314239258Snp		cong_drop = 0;
315239258Snp	}
316219392Snp}
317219392Snp
318248925Snpvoid
319248925Snpt4_init_sge_cpl_handlers(struct adapter *sc)
320218792Snp{
321218792Snp
322248925Snp	t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_msg);
323248925Snp	t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_msg);
324248925Snp	t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
325248925Snp	t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
326248925Snp	t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
327248925Snp}
328248925Snp
329249391Snp/*
330249391Snp * adap->params.vpd.cclk must be set up before this is called.
331249391Snp */
332248925Snpvoid
333248925Snpt4_tweak_chip_settings(struct adapter *sc)
334248925Snp{
335248925Snp	int i;
336248925Snp	uint32_t v, m;
337248925Snp	int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
338249391Snp	int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk;
339248925Snp	int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
340248925Snp	uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
341265425Snp	static int sge_flbuf_sizes[] = {
342255050Snp		MCLBYTES,
343255050Snp#if MJUMPAGESIZE != MCLBYTES
344255050Snp		MJUMPAGESIZE,
345265425Snp		MJUMPAGESIZE - CL_METADATA_SIZE,
346265425Snp		MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE,
347255050Snp#endif
348255050Snp		MJUM9BYTES,
349255050Snp		MJUM16BYTES,
350265425Snp		MCLBYTES - MSIZE - CL_METADATA_SIZE,
351265425Snp		MJUM9BYTES - CL_METADATA_SIZE,
352265425Snp		MJUM16BYTES - CL_METADATA_SIZE,
353255050Snp	};
354248925Snp
355248925Snp	KASSERT(sc->flags & MASTER_PF,
356248925Snp	    ("%s: trying to change chip settings when not master.", __func__));
357248925Snp
358255050Snp	m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
359248925Snp	v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
360237512Snp	    V_EGRSTATUSPAGESIZE(spg_len == 128);
361255050Snp	if (is_t4(sc) && (fl_pad || buffer_packing)) {
362255050Snp		/* t4_fl_pack has the correct value even when fl_pad = 0 */
363255050Snp		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
364255050Snp		v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5);
365255050Snp	} else if (is_t5(sc) && fl_pad) {
366255050Snp		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
367255050Snp		v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5);
368255050Snp	}
369248925Snp	t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
370218792Snp
371255050Snp	if (is_t5(sc) && buffer_packing) {
372255050Snp		m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
373255050Snp		if (t5_fl_pack == 16)
374255050Snp			v = V_INGPACKBOUNDARY(0);
375255050Snp		else
376255050Snp			v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5);
377255050Snp		t4_set_reg_field(sc, A_SGE_CONTROL2, m, v);
378255050Snp	}
379255050Snp
380248925Snp	v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
381228561Snp	    V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
382228561Snp	    V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
383228561Snp	    V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
384228561Snp	    V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
385228561Snp	    V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
386228561Snp	    V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
387228561Snp	    V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
388248925Snp	t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v);
389228561Snp
390265425Snp	KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES,
391265425Snp	    ("%s: hw buffer size table too big", __func__));
392265425Snp	for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) {
393248925Snp		t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
394265425Snp		    sge_flbuf_sizes[i]);
395248925Snp	}
396228561Snp
397248925Snp	v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) |
398248925Snp	    V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]);
399248925Snp	t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v);
400228561Snp
401249391Snp	KASSERT(intr_timer[0] <= timer_max,
402249391Snp	    ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0],
403249391Snp	    timer_max));
404249391Snp	for (i = 1; i < nitems(intr_timer); i++) {
405249391Snp		KASSERT(intr_timer[i] >= intr_timer[i - 1],
406249391Snp		    ("%s: timers not listed in increasing order (%d)",
407249391Snp		    __func__, i));
408249391Snp
409249391Snp		while (intr_timer[i] > timer_max) {
410249391Snp			if (i == nitems(intr_timer) - 1) {
411249391Snp				intr_timer[i] = timer_max;
412249391Snp				break;
413249391Snp			}
414249391Snp			intr_timer[i] += intr_timer[i - 1];
415249391Snp			intr_timer[i] /= 2;
416249391Snp		}
417249391Snp	}
418249391Snp
419248925Snp	v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) |
420248925Snp	    V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1]));
421248925Snp	t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v);
422248925Snp	v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) |
423248925Snp	    V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3]));
424248925Snp	t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v);
425248925Snp	v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) |
426248925Snp	    V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5]));
427248925Snp	t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v);
428228561Snp
429248925Snp	if (cong_drop == 0) {
430248925Snp		m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
431248925Snp		    F_TUNNELCNGDROP3;
432248925Snp		t4_set_reg_field(sc, A_TP_PARA_REG3, m, 0);
433228561Snp	}
434228561Snp
435248925Snp	/* 4K, 16K, 64K, 256K DDP "page sizes" */
436248925Snp	v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
437248925Snp	t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v);
438248925Snp
439248925Snp	m = v = F_TDDPTAGTCB;
440248925Snp	t4_set_reg_field(sc, A_ULP_RX_CTL, m, v);
441248925Snp
442248925Snp	m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
443248925Snp	    F_RESETDDPOFFSET;
444248925Snp	v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
445248925Snp	t4_set_reg_field(sc, A_TP_PARA_REG5, m, v);
446248925Snp}
447248925Snp
448248925Snp/*
449265425Snp * SGE wants the buffer to be at least 64B and then a multiple of the pad
450265425Snp * boundary or 16, whichever is greater.
451265425Snp */
452265425Snpstatic inline int
453265425Snphwsz_ok(int hwsz)
454265425Snp{
455265425Snp	int mask = max(fl_pad, 16) - 1;
456265425Snp
457265425Snp	return (hwsz >= 64 && (hwsz & mask) == 0);
458265425Snp}
459265425Snp
460265425Snp/*
461248925Snp * XXX: driver really should be able to deal with unexpected settings.
462248925Snp */
463248925Snpint
464248925Snpt4_read_chip_settings(struct adapter *sc)
465248925Snp{
466248925Snp	struct sge *s = &sc->sge;
467255050Snp	int i, j, n, rc = 0;
468248925Snp	uint32_t m, v, r;
469248925Snp	uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
470265425Snp	static int sw_buf_sizes[] = {	/* Sorted by size */
471255050Snp		MCLBYTES,
472255050Snp#if MJUMPAGESIZE != MCLBYTES
473255050Snp		MJUMPAGESIZE,
474255050Snp#endif
475255050Snp		MJUM9BYTES,
476255050Snp		MJUM16BYTES
477255050Snp	};
478265425Snp	struct sw_zone_info *swz, *safe_swz;
479265425Snp	struct hw_buf_info *hwb;
480248925Snp
481255050Snp	m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
482248925Snp	v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
483248925Snp	    V_EGRSTATUSPAGESIZE(spg_len == 128);
484255050Snp	if (is_t4(sc) && (fl_pad || buffer_packing)) {
485255050Snp		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
486255050Snp		v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5);
487255050Snp	} else if (is_t5(sc) && fl_pad) {
488255050Snp		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
489255050Snp		v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5);
490255050Snp	}
491248925Snp	r = t4_read_reg(sc, A_SGE_CONTROL);
492248925Snp	if ((r & m) != v) {
493248925Snp		device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r);
494228561Snp		rc = EINVAL;
495228561Snp	}
496228561Snp
497255050Snp	if (is_t5(sc) && buffer_packing) {
498255050Snp		m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
499255050Snp		if (t5_fl_pack == 16)
500255050Snp			v = V_INGPACKBOUNDARY(0);
501255050Snp		else
502255050Snp			v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5);
503255050Snp		r = t4_read_reg(sc, A_SGE_CONTROL2);
504255050Snp		if ((r & m) != v) {
505255050Snp			device_printf(sc->dev,
506255050Snp			    "invalid SGE_CONTROL2(0x%x)\n", r);
507255050Snp			rc = EINVAL;
508255050Snp		}
509255050Snp	}
510265425Snp	s->pack_boundary = is_t4(sc) ? t4_fl_pack : t5_fl_pack;
511255050Snp
512248925Snp	v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
513248925Snp	    V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
514248925Snp	    V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
515248925Snp	    V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
516248925Snp	    V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
517248925Snp	    V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
518248925Snp	    V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
519248925Snp	    V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
520248925Snp	r = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE);
521248925Snp	if (r != v) {
522248925Snp		device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r);
523228561Snp		rc = EINVAL;
524228561Snp	}
525228561Snp
526265425Snp	/* Filter out unusable hw buffer sizes entirely (mark with -2). */
527265425Snp	hwb = &s->hw_buf_info[0];
528265425Snp	for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) {
529265425Snp		r = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
530265425Snp		hwb->size = r;
531265425Snp		hwb->zidx = hwsz_ok(r) ? -1 : -2;
532265425Snp		hwb->next = -1;
533265425Snp	}
534265425Snp
535255050Snp	/*
536265425Snp	 * Create a sorted list in decreasing order of hw buffer sizes (and so
537265425Snp	 * increasing order of spare area) for each software zone.
538255050Snp	 */
539265425Snp	n = 0;	/* no usable buffer size to begin with */
540265425Snp	swz = &s->sw_zone_info[0];
541265425Snp	safe_swz = NULL;
542265425Snp	for (i = 0; i < SW_ZONE_SIZES; i++, swz++) {
543265425Snp		int8_t head = -1, tail = -1;
544265425Snp
545265425Snp		swz->size = sw_buf_sizes[i];
546265425Snp		swz->zone = m_getzone(swz->size);
547265425Snp		swz->type = m_gettype(swz->size);
548265425Snp
549265425Snp		if (swz->size == safest_rx_cluster)
550265425Snp			safe_swz = swz;
551265425Snp
552265425Snp		hwb = &s->hw_buf_info[0];
553265425Snp		for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) {
554265425Snp			if (hwb->zidx != -1 || hwb->size > swz->size)
555265425Snp				continue;
556265425Snp			hwb->zidx = i;
557265425Snp			if (head == -1)
558265425Snp				head = tail = j;
559265425Snp			else if (hwb->size < s->hw_buf_info[tail].size) {
560265425Snp				s->hw_buf_info[tail].next = j;
561265425Snp				tail = j;
562265425Snp			} else {
563265425Snp				int8_t *cur;
564265425Snp				struct hw_buf_info *t;
565265425Snp
566265425Snp				for (cur = &head; *cur != -1; cur = &t->next) {
567265425Snp					t = &s->hw_buf_info[*cur];
568265425Snp					if (hwb->size == t->size) {
569265425Snp						hwb->zidx = -2;
570265425Snp						break;
571265425Snp					}
572265425Snp					if (hwb->size > t->size) {
573265425Snp						hwb->next = *cur;
574265425Snp						*cur = j;
575265425Snp						break;
576265425Snp					}
577265425Snp				}
578265425Snp			}
579228561Snp		}
580265425Snp		swz->head_hwidx = head;
581265425Snp		swz->tail_hwidx = tail;
582265425Snp
583265425Snp		if (tail != -1) {
584255050Snp			n++;
585265425Snp			if (swz->size - s->hw_buf_info[tail].size >=
586265425Snp			    CL_METADATA_SIZE)
587265425Snp				sc->flags |= BUF_PACKING_OK;
588255050Snp		}
589255050Snp	}
590255050Snp	if (n == 0) {
591255050Snp		device_printf(sc->dev, "no usable SGE FL buffer size.\n");
592255050Snp		rc = EINVAL;
593255050Snp	}
594218792Snp
595265425Snp	s->safe_hwidx1 = -1;
596265425Snp	s->safe_hwidx2 = -1;
597265425Snp	if (safe_swz != NULL) {
598265425Snp		s->safe_hwidx1 = safe_swz->head_hwidx;
599265425Snp		for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) {
600265425Snp			int spare;
601265425Snp
602265425Snp			hwb = &s->hw_buf_info[i];
603265425Snp			spare = safe_swz->size - hwb->size;
604265425Snp			if (spare < CL_METADATA_SIZE)
605265425Snp				continue;
606265425Snp			if (s->safe_hwidx2 == -1 ||
607265425Snp			    spare == CL_METADATA_SIZE + MSIZE)
608265425Snp				s->safe_hwidx2 = i;
609265425Snp			if (spare >= CL_METADATA_SIZE + MSIZE)
610265425Snp				break;
611265425Snp		}
612265425Snp	}
613265425Snp
614248925Snp	r = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD);
615248925Snp	s->counter_val[0] = G_THRESHOLD_0(r);
616248925Snp	s->counter_val[1] = G_THRESHOLD_1(r);
617248925Snp	s->counter_val[2] = G_THRESHOLD_2(r);
618248925Snp	s->counter_val[3] = G_THRESHOLD_3(r);
619222701Snp
620248925Snp	r = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1);
621248925Snp	s->timer_val[0] = G_TIMERVALUE0(r) / core_ticks_per_usec(sc);
622248925Snp	s->timer_val[1] = G_TIMERVALUE1(r) / core_ticks_per_usec(sc);
623248925Snp	r = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3);
624248925Snp	s->timer_val[2] = G_TIMERVALUE2(r) / core_ticks_per_usec(sc);
625248925Snp	s->timer_val[3] = G_TIMERVALUE3(r) / core_ticks_per_usec(sc);
626248925Snp	r = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5);
627248925Snp	s->timer_val[4] = G_TIMERVALUE4(r) / core_ticks_per_usec(sc);
628248925Snp	s->timer_val[5] = G_TIMERVALUE5(r) / core_ticks_per_usec(sc);
629218792Snp
630248925Snp	if (cong_drop == 0) {
631248925Snp		m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
632248925Snp		    F_TUNNELCNGDROP3;
633248925Snp		r = t4_read_reg(sc, A_TP_PARA_REG3);
634248925Snp		if (r & m) {
635248925Snp			device_printf(sc->dev,
636248925Snp			    "invalid TP_PARA_REG3(0x%x)\n", r);
637248925Snp			rc = EINVAL;
638248925Snp		}
639248925Snp	}
640228561Snp
641248925Snp	v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
642248925Snp	r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ);
643248925Snp	if (r != v) {
644248925Snp		device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r);
645248925Snp		rc = EINVAL;
646248925Snp	}
647228561Snp
648248925Snp	m = v = F_TDDPTAGTCB;
649248925Snp	r = t4_read_reg(sc, A_ULP_RX_CTL);
650248925Snp	if ((r & m) != v) {
651248925Snp		device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r);
652248925Snp		rc = EINVAL;
653248925Snp	}
654239336Snp
655248925Snp	m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
656248925Snp	    F_RESETDDPOFFSET;
657248925Snp	v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
658248925Snp	r = t4_read_reg(sc, A_TP_PARA_REG5);
659248925Snp	if ((r & m) != v) {
660248925Snp		device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r);
661248925Snp		rc = EINVAL;
662248925Snp	}
663248925Snp
664248925Snp	r = t4_read_reg(sc, A_SGE_CONM_CTRL);
665248925Snp	s->fl_starve_threshold = G_EGRTHRESHOLD(r) * 2 + 1;
666265410Snp	if (is_t4(sc))
667265410Snp		s->fl_starve_threshold2 = s->fl_starve_threshold;
668265410Snp	else
669265410Snp		s->fl_starve_threshold2 = G_EGRTHRESHOLDPACKING(r) * 2 + 1;
670248925Snp
671256794Snp	/* egress queues: log2 of # of doorbells per BAR2 page */
672256794Snp	r = t4_read_reg(sc, A_SGE_EGRESS_QUEUES_PER_PAGE_PF);
673256794Snp	r >>= S_QUEUESPERPAGEPF0 +
674256794Snp	    (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf;
675256794Snp	s->eq_s_qpp = r & M_QUEUESPERPAGEPF0;
676248925Snp
677256794Snp	/* ingress queues: log2 of # of doorbells per BAR2 page */
678256794Snp	r = t4_read_reg(sc, A_SGE_INGRESS_QUEUES_PER_PAGE_PF);
679256794Snp	r >>= S_QUEUESPERPAGEPF0 +
680256794Snp	    (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf;
681256794Snp	s->iq_s_qpp = r & M_QUEUESPERPAGEPF0;
682256794Snp
683252705Snp	t4_init_tp_params(sc);
684248925Snp
685248925Snp	t4_read_mtu_tbl(sc, sc->params.mtus, NULL);
686248925Snp	t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd);
687248925Snp
688228561Snp	return (rc);
689218792Snp}
690218792Snp
691218792Snpint
692218792Snpt4_create_dma_tag(struct adapter *sc)
693218792Snp{
694218792Snp	int rc;
695218792Snp
696218792Snp	rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
697218792Snp	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
698218792Snp	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
699218792Snp	    NULL, &sc->dmat);
700218792Snp	if (rc != 0) {
701218792Snp		device_printf(sc->dev,
702218792Snp		    "failed to create main DMA tag: %d\n", rc);
703218792Snp	}
704218792Snp
705218792Snp	return (rc);
706218792Snp}
707218792Snp
708255052Snpstatic inline int
709255052Snpenable_buffer_packing(struct adapter *sc)
710255052Snp{
711255052Snp
712255052Snp	if (sc->flags & BUF_PACKING_OK &&
713255052Snp	    ((is_t5(sc) && buffer_packing) ||	/* 1 or -1 both ok for T5 */
714255052Snp	    (is_t4(sc) && buffer_packing == 1)))
715255052Snp		return (1);
716255052Snp	return (0);
717255052Snp}
718255052Snp
719253829Snpvoid
720253829Snpt4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
721253829Snp    struct sysctl_oid_list *children)
722253829Snp{
723253829Snp
724265425Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes",
725265425Snp	    CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A",
726265425Snp	    "freelist buffer sizes");
727265425Snp
728253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD,
729253829Snp	    NULL, fl_pktshift, "payload DMA offset in rx buffer (bytes)");
730253829Snp
731253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD,
732253829Snp	    NULL, fl_pad, "payload pad boundary (bytes)");
733253829Snp
734253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD,
735253829Snp	    NULL, spg_len, "status page size (bytes)");
736253829Snp
737253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD,
738253829Snp	    NULL, cong_drop, "congestion drop setting");
739255050Snp
740255050Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "buffer_packing", CTLFLAG_RD,
741255052Snp	    NULL, enable_buffer_packing(sc),
742255050Snp	    "pack multiple frames in one fl buffer");
743255050Snp
744255050Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD,
745265425Snp	    NULL, sc->sge.pack_boundary, "payload pack boundary (bytes)");
746253829Snp}
747253829Snp
748218792Snpint
749218792Snpt4_destroy_dma_tag(struct adapter *sc)
750218792Snp{
751218792Snp	if (sc->dmat)
752218792Snp		bus_dma_tag_destroy(sc->dmat);
753218792Snp
754218792Snp	return (0);
755218792Snp}
756218792Snp
757218792Snp/*
758228561Snp * Allocate and initialize the firmware event queue and the management queue.
759218792Snp *
760218792Snp * Returns errno on failure.  Resources allocated up to that point may still be
761218792Snp * allocated.  Caller is responsible for cleanup in case this function fails.
762218792Snp */
763218792Snpint
764220873Snpt4_setup_adapter_queues(struct adapter *sc)
765218792Snp{
766228561Snp	int rc;
767218792Snp
768218792Snp	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
769218792Snp
770228561Snp	sysctl_ctx_init(&sc->ctx);
771228561Snp	sc->flags |= ADAP_SYSCTL_CTX;
772220873Snp
773222510Snp	/*
774222510Snp	 * Firmware event queue
775222510Snp	 */
776228561Snp	rc = alloc_fwq(sc);
777241398Snp	if (rc != 0)
778220873Snp		return (rc);
779218792Snp
780220873Snp	/*
781228561Snp	 * Management queue.  This is just a control queue that uses the fwq as
782228561Snp	 * its associated iq.
783220873Snp	 */
784228561Snp	rc = alloc_mgmtq(sc);
785220873Snp
786218792Snp	return (rc);
787218792Snp}
788218792Snp
789218792Snp/*
790218792Snp * Idempotent
791218792Snp */
792218792Snpint
793220873Snpt4_teardown_adapter_queues(struct adapter *sc)
794218792Snp{
795218792Snp
796218792Snp	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
797218792Snp
798228561Snp	/* Do this before freeing the queue */
799228561Snp	if (sc->flags & ADAP_SYSCTL_CTX) {
800220873Snp		sysctl_ctx_free(&sc->ctx);
801228561Snp		sc->flags &= ~ADAP_SYSCTL_CTX;
802220873Snp	}
803220873Snp
804228561Snp	free_mgmtq(sc);
805228561Snp	free_fwq(sc);
806220873Snp
807228561Snp	return (0);
808228561Snp}
809222510Snp
810228561Snpstatic inline int
811228561Snpfirst_vector(struct port_info *pi)
812228561Snp{
813228561Snp	struct adapter *sc = pi->adapter;
814228561Snp	int rc = T4_EXTRA_INTR, i;
815228561Snp
816228561Snp	if (sc->intr_count == 1)
817228561Snp		return (0);
818228561Snp
819228561Snp	for_each_port(sc, i) {
820238313Snp		struct port_info *p = sc->port[i];
821238313Snp
822228561Snp		if (i == pi->port_id)
823228561Snp			break;
824228561Snp
825237263Snp#ifdef TCP_OFFLOAD
826228561Snp		if (sc->flags & INTR_DIRECT)
827238313Snp			rc += p->nrxq + p->nofldrxq;
828228561Snp		else
829238313Snp			rc += max(p->nrxq, p->nofldrxq);
830228561Snp#else
831228561Snp		/*
832228561Snp		 * Not compiled with offload support and intr_count > 1.  Only
833228561Snp		 * NIC queues exist and they'd better be taking direct
834228561Snp		 * interrupts.
835228561Snp		 */
836228561Snp		KASSERT(sc->flags & INTR_DIRECT,
837228561Snp		    ("%s: intr_count %d, !INTR_DIRECT", __func__,
838228561Snp		    sc->intr_count));
839228561Snp
840238313Snp		rc += p->nrxq;
841228561Snp#endif
842218792Snp	}
843218792Snp
844228561Snp	return (rc);
845218792Snp}
846218792Snp
847228561Snp/*
848228561Snp * Given an arbitrary "index," come up with an iq that can be used by other
849228561Snp * queues (of this port) for interrupt forwarding, SGE egress updates, etc.
850228561Snp * The iq returned is guaranteed to be something that takes direct interrupts.
851228561Snp */
852228561Snpstatic struct sge_iq *
853228561Snpport_intr_iq(struct port_info *pi, int idx)
854228561Snp{
855228561Snp	struct adapter *sc = pi->adapter;
856228561Snp	struct sge *s = &sc->sge;
857228561Snp	struct sge_iq *iq = NULL;
858228561Snp
859228561Snp	if (sc->intr_count == 1)
860228561Snp		return (&sc->sge.fwq);
861228561Snp
862237263Snp#ifdef TCP_OFFLOAD
863228561Snp	if (sc->flags & INTR_DIRECT) {
864228561Snp		idx %= pi->nrxq + pi->nofldrxq;
865265425Snp
866228561Snp		if (idx >= pi->nrxq) {
867228561Snp			idx -= pi->nrxq;
868228561Snp			iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
869228561Snp		} else
870228561Snp			iq = &s->rxq[pi->first_rxq + idx].iq;
871228561Snp
872228561Snp	} else {
873228561Snp		idx %= max(pi->nrxq, pi->nofldrxq);
874228561Snp
875228561Snp		if (pi->nrxq >= pi->nofldrxq)
876228561Snp			iq = &s->rxq[pi->first_rxq + idx].iq;
877228561Snp		else
878228561Snp			iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
879228561Snp	}
880228561Snp#else
881228561Snp	/*
882228561Snp	 * Not compiled with offload support and intr_count > 1.  Only NIC
883228561Snp	 * queues exist and they'd better be taking direct interrupts.
884228561Snp	 */
885228561Snp	KASSERT(sc->flags & INTR_DIRECT,
886228561Snp	    ("%s: intr_count %d, !INTR_DIRECT", __func__, sc->intr_count));
887228561Snp
888228561Snp	idx %= pi->nrxq;
889228561Snp	iq = &s->rxq[pi->first_rxq + idx].iq;
890228561Snp#endif
891228561Snp
892228561Snp	KASSERT(iq->flags & IQ_INTR, ("%s: EDOOFUS", __func__));
893228561Snp	return (iq);
894228561Snp}
895228561Snp
896265425Snp/* Maximum payload that can be delivered with a single iq descriptor */
897239266Snpstatic inline int
898265425Snpmtu_to_max_payload(struct adapter *sc, int mtu, const int toe)
899239266Snp{
900265425Snp	int payload;
901239266Snp
902252728Snp#ifdef TCP_OFFLOAD
903265425Snp	if (toe) {
904265425Snp		payload = sc->tt.rx_coalesce ?
905265425Snp		    G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)) : mtu;
906265425Snp	} else {
907265425Snp#endif
908265425Snp		/* large enough even when hw VLAN extraction is disabled */
909265425Snp		payload = fl_pktshift + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
910265425Snp		    mtu;
911265425Snp#ifdef TCP_OFFLOAD
912265425Snp	}
913265425Snp#endif
914265425Snp	payload = roundup2(payload, fl_pad);
915252728Snp
916265425Snp	return (payload);
917252728Snp}
918252728Snp
919218792Snpint
920228561Snpt4_setup_port_queues(struct port_info *pi)
921218792Snp{
922228561Snp	int rc = 0, i, j, intr_idx, iqid;
923218792Snp	struct sge_rxq *rxq;
924218792Snp	struct sge_txq *txq;
925228561Snp	struct sge_wrq *ctrlq;
926237263Snp#ifdef TCP_OFFLOAD
927228561Snp	struct sge_ofld_rxq *ofld_rxq;
928228561Snp	struct sge_wrq *ofld_txq;
929237263Snp	struct sysctl_oid *oid2 = NULL;
930228561Snp#endif
931218792Snp	char name[16];
932218792Snp	struct adapter *sc = pi->adapter;
933252728Snp	struct ifnet *ifp = pi->ifp;
934237263Snp	struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
935228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
936265425Snp	int maxp, pack, mtu = ifp->if_mtu;
937218792Snp
938228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
939228561Snp	    NULL, "rx queues");
940218792Snp
941237263Snp#ifdef TCP_OFFLOAD
942228561Snp	if (is_offload(sc)) {
943228561Snp		oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq",
944228561Snp		    CTLFLAG_RD, NULL,
945228561Snp		    "rx queues for offloaded TCP connections");
946218792Snp	}
947228561Snp#endif
948218792Snp
949228561Snp	/* Interrupt vector to start from (when using multiple vectors) */
950228561Snp	intr_idx = first_vector(pi);
951228561Snp
952228561Snp	/*
953228561Snp	 * First pass over all rx queues (NIC and TOE):
954228561Snp	 * a) initialize iq and fl
955228561Snp	 * b) allocate queue iff it will take direct interrupts.
956228561Snp	 */
957265425Snp	maxp = mtu_to_max_payload(sc, mtu, 0);
958255052Snp	pack = enable_buffer_packing(sc);
959218792Snp	for_each_rxq(pi, i, rxq) {
960218792Snp
961228561Snp		init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq,
962241397Snp		    RX_IQ_ESIZE);
963218792Snp
964218792Snp		snprintf(name, sizeof(name), "%s rxq%d-fl",
965218792Snp		    device_get_nameunit(pi->dev), i);
966265425Snp		init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, pack, name);
967218792Snp
968228561Snp		if (sc->flags & INTR_DIRECT
969237263Snp#ifdef TCP_OFFLOAD
970228561Snp		    || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq)
971228561Snp#endif
972228561Snp		   ) {
973228561Snp			rxq->iq.flags |= IQ_INTR;
974228561Snp			rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
975228561Snp			if (rc != 0)
976228561Snp				goto done;
977228561Snp			intr_idx++;
978228561Snp		}
979228561Snp	}
980218792Snp
981237263Snp#ifdef TCP_OFFLOAD
982265425Snp	maxp = mtu_to_max_payload(sc, mtu, 1);
983228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
984228561Snp
985228561Snp		init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
986241397Snp		    pi->qsize_rxq, RX_IQ_ESIZE);
987228561Snp
988228561Snp		snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
989228561Snp		    device_get_nameunit(pi->dev), i);
990265425Snp		init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, pack, name);
991228561Snp
992228561Snp		if (sc->flags & INTR_DIRECT ||
993228561Snp		    (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
994228561Snp			ofld_rxq->iq.flags |= IQ_INTR;
995228561Snp			rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
996228561Snp			if (rc != 0)
997228561Snp				goto done;
998228561Snp			intr_idx++;
999228561Snp		}
1000228561Snp	}
1001228561Snp#endif
1002228561Snp
1003228561Snp	/*
1004228561Snp	 * Second pass over all rx queues (NIC and TOE).  The queues forwarding
1005228561Snp	 * their interrupts are allocated now.
1006228561Snp	 */
1007228561Snp	j = 0;
1008228561Snp	for_each_rxq(pi, i, rxq) {
1009228561Snp		if (rxq->iq.flags & IQ_INTR)
1010228561Snp			continue;
1011228561Snp
1012228561Snp		intr_idx = port_intr_iq(pi, j)->abs_id;
1013228561Snp
1014228561Snp		rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
1015218792Snp		if (rc != 0)
1016218792Snp			goto done;
1017228561Snp		j++;
1018218792Snp	}
1019218792Snp
1020237263Snp#ifdef TCP_OFFLOAD
1021228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
1022228561Snp		if (ofld_rxq->iq.flags & IQ_INTR)
1023228561Snp			continue;
1024228561Snp
1025228561Snp		intr_idx = port_intr_iq(pi, j)->abs_id;
1026228561Snp
1027228561Snp		rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
1028228561Snp		if (rc != 0)
1029228561Snp			goto done;
1030228561Snp		j++;
1031228561Snp	}
1032228561Snp#endif
1033228561Snp
1034228561Snp	/*
1035228561Snp	 * Now the tx queues.  Only one pass needed.
1036228561Snp	 */
1037228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD,
1038228561Snp	    NULL, "tx queues");
1039228561Snp	j = 0;
1040218792Snp	for_each_txq(pi, i, txq) {
1041228561Snp		uint16_t iqid;
1042218792Snp
1043228561Snp		iqid = port_intr_iq(pi, j)->cntxt_id;
1044228561Snp
1045218792Snp		snprintf(name, sizeof(name), "%s txq%d",
1046218792Snp		    device_get_nameunit(pi->dev), i);
1047228561Snp		init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid,
1048228561Snp		    name);
1049218792Snp
1050228561Snp		rc = alloc_txq(pi, txq, i, oid);
1051218792Snp		if (rc != 0)
1052218792Snp			goto done;
1053228561Snp		j++;
1054218792Snp	}
1055218792Snp
1056237263Snp#ifdef TCP_OFFLOAD
1057228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq",
1058228561Snp	    CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections");
1059228561Snp	for_each_ofld_txq(pi, i, ofld_txq) {
1060228561Snp		uint16_t iqid;
1061228561Snp
1062228561Snp		iqid = port_intr_iq(pi, j)->cntxt_id;
1063228561Snp
1064228561Snp		snprintf(name, sizeof(name), "%s ofld_txq%d",
1065228561Snp		    device_get_nameunit(pi->dev), i);
1066228561Snp		init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan,
1067228561Snp		    iqid, name);
1068228561Snp
1069228561Snp		snprintf(name, sizeof(name), "%d", i);
1070228561Snp		oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
1071228561Snp		    name, CTLFLAG_RD, NULL, "offload tx queue");
1072228561Snp
1073228561Snp		rc = alloc_wrq(sc, pi, ofld_txq, oid2);
1074228561Snp		if (rc != 0)
1075228561Snp			goto done;
1076228561Snp		j++;
1077228561Snp	}
1078228561Snp#endif
1079228561Snp
1080228561Snp	/*
1081228561Snp	 * Finally, the control queue.
1082228561Snp	 */
1083228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD,
1084228561Snp	    NULL, "ctrl queue");
1085228561Snp	ctrlq = &sc->sge.ctrlq[pi->port_id];
1086228561Snp	iqid = port_intr_iq(pi, 0)->cntxt_id;
1087228561Snp	snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev));
1088228561Snp	init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name);
1089228561Snp	rc = alloc_wrq(sc, pi, ctrlq, oid);
1090228561Snp
1091218792Snpdone:
1092218792Snp	if (rc)
1093228561Snp		t4_teardown_port_queues(pi);
1094218792Snp
1095218792Snp	return (rc);
1096218792Snp}
1097218792Snp
1098218792Snp/*
1099218792Snp * Idempotent
1100218792Snp */
1101218792Snpint
1102228561Snpt4_teardown_port_queues(struct port_info *pi)
1103218792Snp{
1104218792Snp	int i;
1105228561Snp	struct adapter *sc = pi->adapter;
1106218792Snp	struct sge_rxq *rxq;
1107218792Snp	struct sge_txq *txq;
1108237263Snp#ifdef TCP_OFFLOAD
1109228561Snp	struct sge_ofld_rxq *ofld_rxq;
1110228561Snp	struct sge_wrq *ofld_txq;
1111228561Snp#endif
1112218792Snp
1113218792Snp	/* Do this before freeing the queues */
1114228561Snp	if (pi->flags & PORT_SYSCTL_CTX) {
1115218792Snp		sysctl_ctx_free(&pi->ctx);
1116228561Snp		pi->flags &= ~PORT_SYSCTL_CTX;
1117218792Snp	}
1118218792Snp
1119228561Snp	/*
1120228561Snp	 * Take down all the tx queues first, as they reference the rx queues
1121228561Snp	 * (for egress updates, etc.).
1122228561Snp	 */
1123228561Snp
1124228561Snp	free_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
1125228561Snp
1126218792Snp	for_each_txq(pi, i, txq) {
1127218792Snp		free_txq(pi, txq);
1128218792Snp	}
1129218792Snp
1130237263Snp#ifdef TCP_OFFLOAD
1131228561Snp	for_each_ofld_txq(pi, i, ofld_txq) {
1132228561Snp		free_wrq(sc, ofld_txq);
1133228561Snp	}
1134228561Snp#endif
1135228561Snp
1136228561Snp	/*
1137228561Snp	 * Then take down the rx queues that forward their interrupts, as they
1138228561Snp	 * reference other rx queues.
1139228561Snp	 */
1140228561Snp
1141218792Snp	for_each_rxq(pi, i, rxq) {
1142228561Snp		if ((rxq->iq.flags & IQ_INTR) == 0)
1143228561Snp			free_rxq(pi, rxq);
1144218792Snp	}
1145218792Snp
1146237263Snp#ifdef TCP_OFFLOAD
1147228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
1148228561Snp		if ((ofld_rxq->iq.flags & IQ_INTR) == 0)
1149228561Snp			free_ofld_rxq(pi, ofld_rxq);
1150228561Snp	}
1151228561Snp#endif
1152228561Snp
1153228561Snp	/*
1154228561Snp	 * Then take down the rx queues that take direct interrupts.
1155228561Snp	 */
1156228561Snp
1157228561Snp	for_each_rxq(pi, i, rxq) {
1158228561Snp		if (rxq->iq.flags & IQ_INTR)
1159228561Snp			free_rxq(pi, rxq);
1160228561Snp	}
1161228561Snp
1162237263Snp#ifdef TCP_OFFLOAD
1163228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
1164228561Snp		if (ofld_rxq->iq.flags & IQ_INTR)
1165228561Snp			free_ofld_rxq(pi, ofld_rxq);
1166228561Snp	}
1167228561Snp#endif
1168228561Snp
1169218792Snp	return (0);
1170218792Snp}
1171218792Snp
1172228561Snp/*
1173228561Snp * Deals with errors and the firmware event queue.  All data rx queues forward
1174228561Snp * their interrupt to the firmware event queue.
1175228561Snp */
1176218792Snpvoid
1177218792Snpt4_intr_all(void *arg)
1178218792Snp{
1179218792Snp	struct adapter *sc = arg;
1180228561Snp	struct sge_iq *fwq = &sc->sge.fwq;
1181218792Snp
1182218792Snp	t4_intr_err(arg);
1183228561Snp	if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) {
1184228561Snp		service_iq(fwq, 0);
1185228561Snp		atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE);
1186218792Snp	}
1187218792Snp}
1188218792Snp
1189218792Snp/* Deals with error interrupts */
1190218792Snpvoid
1191218792Snpt4_intr_err(void *arg)
1192218792Snp{
1193218792Snp	struct adapter *sc = arg;
1194218792Snp
1195222510Snp	t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
1196218792Snp	t4_slow_intr_handler(sc);
1197218792Snp}
1198218792Snp
1199218792Snpvoid
1200218792Snpt4_intr_evt(void *arg)
1201218792Snp{
1202218792Snp	struct sge_iq *iq = arg;
1203220649Snp
1204228561Snp	if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
1205228561Snp		service_iq(iq, 0);
1206228561Snp		atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
1207222510Snp	}
1208220649Snp}
1209220649Snp
1210228561Snpvoid
1211228561Snpt4_intr(void *arg)
1212220649Snp{
1213220649Snp	struct sge_iq *iq = arg;
1214228561Snp
1215228561Snp	if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
1216228561Snp		service_iq(iq, 0);
1217228561Snp		atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
1218228561Snp	}
1219228561Snp}
1220228561Snp
1221228561Snp/*
1222228561Snp * Deals with anything and everything on the given ingress queue.
1223228561Snp */
1224228561Snpstatic int
1225228561Snpservice_iq(struct sge_iq *iq, int budget)
1226228561Snp{
1227228561Snp	struct sge_iq *q;
1228237263Snp	struct sge_rxq *rxq = iq_to_rxq(iq);	/* Use iff iq is part of rxq */
1229228561Snp	struct sge_fl *fl = &rxq->fl;		/* Use iff IQ_HAS_FL */
1230218792Snp	struct adapter *sc = iq->adapter;
1231218792Snp	struct rsp_ctrl *ctrl;
1232228561Snp	const struct rss_header *rss;
1233228561Snp	int ndescs = 0, limit, fl_bufs_used = 0;
1234228561Snp	int rsp_type;
1235228561Snp	uint32_t lq;
1236228561Snp	struct mbuf *m0;
1237228561Snp	STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
1238255015Snp#if defined(INET) || defined(INET6)
1239255015Snp	const struct timeval lro_timeout = {0, sc->lro_timeout};
1240255015Snp#endif
1241218792Snp
1242228561Snp	limit = budget ? budget : iq->qsize / 8;
1243218792Snp
1244228561Snp	KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
1245218792Snp
1246228561Snp	/*
1247228561Snp	 * We always come back and check the descriptor ring for new indirect
1248228561Snp	 * interrupts and other responses after running a single handler.
1249228561Snp	 */
1250228561Snp	for (;;) {
1251228561Snp		while (is_new_response(iq, &ctrl)) {
1252218792Snp
1253228561Snp			rmb();
1254218792Snp
1255228561Snp			m0 = NULL;
1256228561Snp			rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
1257228561Snp			lq = be32toh(ctrl->pldbuflen_qid);
1258228561Snp			rss = (const void *)iq->cdesc;
1259218792Snp
1260228561Snp			switch (rsp_type) {
1261228561Snp			case X_RSPD_TYPE_FLBUF:
1262228561Snp
1263228561Snp				KASSERT(iq->flags & IQ_HAS_FL,
1264228561Snp				    ("%s: data for an iq (%p) with no freelist",
1265228561Snp				    __func__, iq));
1266228561Snp
1267265425Snp				m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used);
1268255050Snp				if (__predict_false(m0 == NULL))
1269255050Snp					goto process_iql;
1270228561Snp#ifdef T4_PKT_TIMESTAMP
1271228561Snp				/*
1272228561Snp				 * 60 bit timestamp for the payload is
1273228561Snp				 * *(uint64_t *)m0->m_pktdat.  Note that it is
1274228561Snp				 * in the leading free-space in the mbuf.  The
1275228561Snp				 * kernel can clobber it during a pullup,
1276228561Snp				 * m_copymdata, etc.  You need to make sure that
1277228561Snp				 * the mbuf reaches you unmolested if you care
1278228561Snp				 * about the timestamp.
1279228561Snp				 */
1280228561Snp				*(uint64_t *)m0->m_pktdat =
1281228561Snp				    be64toh(ctrl->u.last_flit) &
1282228561Snp				    0xfffffffffffffff;
1283228561Snp#endif
1284228561Snp
1285228561Snp				/* fall through */
1286228561Snp
1287228561Snp			case X_RSPD_TYPE_CPL:
1288228561Snp				KASSERT(rss->opcode < NUM_CPL_CMDS,
1289228561Snp				    ("%s: bad opcode %02x.", __func__,
1290228561Snp				    rss->opcode));
1291228561Snp				sc->cpl_handler[rss->opcode](iq, rss, m0);
1292228561Snp				break;
1293228561Snp
1294228561Snp			case X_RSPD_TYPE_INTR:
1295228561Snp
1296228561Snp				/*
1297228561Snp				 * Interrupts should be forwarded only to queues
1298228561Snp				 * that are not forwarding their interrupts.
1299228561Snp				 * This means service_iq can recurse but only 1
1300228561Snp				 * level deep.
1301228561Snp				 */
1302228561Snp				KASSERT(budget == 0,
1303228561Snp				    ("%s: budget %u, rsp_type %u", __func__,
1304228561Snp				    budget, rsp_type));
1305228561Snp
1306255005Snp				/*
1307255005Snp				 * There are 1K interrupt-capable queues (qids 0
1308255005Snp				 * through 1023).  A response type indicating a
1309255005Snp				 * forwarded interrupt with a qid >= 1K is an
1310255005Snp				 * iWARP async notification.
1311255005Snp				 */
1312255005Snp				if (lq >= 1024) {
1313255005Snp                                        sc->an_handler(iq, ctrl);
1314255005Snp                                        break;
1315255005Snp                                }
1316255005Snp
1317228561Snp				q = sc->sge.iqmap[lq - sc->sge.iq_start];
1318228561Snp				if (atomic_cmpset_int(&q->state, IQS_IDLE,
1319228561Snp				    IQS_BUSY)) {
1320228561Snp					if (service_iq(q, q->qsize / 8) == 0) {
1321228561Snp						atomic_cmpset_int(&q->state,
1322228561Snp						    IQS_BUSY, IQS_IDLE);
1323228561Snp					} else {
1324228561Snp						STAILQ_INSERT_TAIL(&iql, q,
1325228561Snp						    link);
1326228561Snp					}
1327228561Snp				}
1328228561Snp				break;
1329228561Snp
1330228561Snp			default:
1331255005Snp				KASSERT(0,
1332255005Snp				    ("%s: illegal response type %d on iq %p",
1333255005Snp				    __func__, rsp_type, iq));
1334255005Snp				log(LOG_ERR,
1335255005Snp				    "%s: illegal response type %d on iq %p",
1336255005Snp				    device_get_nameunit(sc->dev), rsp_type, iq);
1337237263Snp				break;
1338228561Snp			}
1339228561Snp
1340265425Snp			if (fl_bufs_used >= 16) {
1341265425Snp				FL_LOCK(fl);
1342265425Snp				fl->needed += fl_bufs_used;
1343265425Snp				refill_fl(sc, fl, 32);
1344265425Snp				FL_UNLOCK(fl);
1345265425Snp				fl_bufs_used = 0;
1346265425Snp			}
1347265425Snp
1348228561Snp			iq_next(iq);
1349228561Snp			if (++ndescs == limit) {
1350228561Snp				t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
1351228561Snp				    V_CIDXINC(ndescs) |
1352228561Snp				    V_INGRESSQID(iq->cntxt_id) |
1353228561Snp				    V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
1354228561Snp				ndescs = 0;
1355228561Snp
1356255015Snp#if defined(INET) || defined(INET6)
1357255015Snp				if (iq->flags & IQ_LRO_ENABLED &&
1358255015Snp				    sc->lro_timeout != 0) {
1359255015Snp					tcp_lro_flush_inactive(&rxq->lro,
1360255015Snp					    &lro_timeout);
1361255015Snp				}
1362255015Snp#endif
1363255015Snp
1364228561Snp				if (budget)
1365228561Snp					return (EINPROGRESS);
1366228561Snp			}
1367218792Snp		}
1368222510Snp
1369255050Snpprocess_iql:
1370228561Snp		if (STAILQ_EMPTY(&iql))
1371228561Snp			break;
1372228561Snp
1373228561Snp		/*
1374228561Snp		 * Process the head only, and send it to the back of the list if
1375228561Snp		 * it's still not done.
1376228561Snp		 */
1377228561Snp		q = STAILQ_FIRST(&iql);
1378228561Snp		STAILQ_REMOVE_HEAD(&iql, link);
1379228561Snp		if (service_iq(q, q->qsize / 8) == 0)
1380228561Snp			atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE);
1381228561Snp		else
1382228561Snp			STAILQ_INSERT_TAIL(&iql, q, link);
1383218792Snp	}
1384218792Snp
1385237819Snp#if defined(INET) || defined(INET6)
1386228561Snp	if (iq->flags & IQ_LRO_ENABLED) {
1387228561Snp		struct lro_ctrl *lro = &rxq->lro;
1388228561Snp		struct lro_entry *l;
1389228561Snp
1390228561Snp		while (!SLIST_EMPTY(&lro->lro_active)) {
1391228561Snp			l = SLIST_FIRST(&lro->lro_active);
1392228561Snp			SLIST_REMOVE_HEAD(&lro->lro_active, next);
1393228561Snp			tcp_lro_flush(lro, l);
1394228561Snp		}
1395228561Snp	}
1396228561Snp#endif
1397228561Snp
1398228561Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
1399228561Snp	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
1400228561Snp
1401228561Snp	if (iq->flags & IQ_HAS_FL) {
1402228561Snp		int starved;
1403228561Snp
1404228561Snp		FL_LOCK(fl);
1405228561Snp		fl->needed += fl_bufs_used;
1406265425Snp		starved = refill_fl(sc, fl, 64);
1407228561Snp		FL_UNLOCK(fl);
1408228561Snp		if (__predict_false(starved != 0))
1409228561Snp			add_fl_to_sfl(sc, fl);
1410228561Snp	}
1411228561Snp
1412228561Snp	return (0);
1413218792Snp}
1414218792Snp
1415265425Snpstatic inline int
1416265425Snpcl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll)
1417255050Snp{
1418265425Snp	int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0;
1419255050Snp
1420265425Snp	if (rc)
1421265425Snp		MPASS(cll->region3 >= CL_METADATA_SIZE);
1422255050Snp
1423265425Snp	return (rc);
1424255050Snp}
1425255050Snp
1426265425Snpstatic inline struct cluster_metadata *
1427265425Snpcl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll,
1428265425Snp    caddr_t cl)
1429255050Snp{
1430255050Snp
1431265425Snp	if (cl_has_metadata(fl, cll)) {
1432265425Snp		struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
1433255050Snp
1434265425Snp		return ((struct cluster_metadata *)(cl + swz->size) - 1);
1435255050Snp	}
1436265425Snp	return (NULL);
1437255050Snp}
1438255050Snp
1439255050Snpstatic int
1440255050Snprxb_free(struct mbuf *m, void *arg1, void *arg2)
1441255050Snp{
1442255050Snp	uma_zone_t zone = arg1;
1443255050Snp	caddr_t cl = arg2;
1444255050Snp
1445255050Snp	uma_zfree(zone, cl);
1446255050Snp
1447255050Snp	return (EXT_FREE_OK);
1448255050Snp}
1449255050Snp
1450265425Snp/*
1451265425Snp * The mbuf returned by this function could be allocated from zone_mbuf or
1452265425Snp * constructed in spare room in the cluster.
1453265425Snp *
1454265425Snp * The mbuf carries the payload in one of these ways
1455265425Snp * a) frame inside the mbuf (mbuf from zone_mbuf)
1456265425Snp * b) m_cljset (for clusters without metadata) zone_mbuf
1457265425Snp * c) m_extaddref (cluster with metadata) inline mbuf
1458265425Snp * d) m_extaddref (cluster with metadata) zone_mbuf
1459265425Snp */
1460255050Snpstatic struct mbuf *
1461265425Snpget_scatter_segment(struct adapter *sc, struct sge_fl *fl, int total, int flags)
1462218792Snp{
1463265425Snp	struct mbuf *m;
1464228561Snp	struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
1465265425Snp	struct cluster_layout *cll = &sd->cll;
1466265425Snp	struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
1467265425Snp	struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx];
1468265425Snp	struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl);
1469265425Snp	int len, padded_len;
1470265425Snp	caddr_t payload;
1471218792Snp
1472265425Snp	len = min(total, hwb->size - fl->rx_offset);
1473265425Snp	padded_len = roundup2(len, fl_pad);
1474265425Snp	payload = sd->cl + cll->region1 + fl->rx_offset;
1475219290Snp
1476265425Snp	if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) {
1477255050Snp
1478265425Snp		/*
1479265425Snp		 * Copy payload into a freshly allocated mbuf.
1480265425Snp		 */
1481255050Snp
1482265425Snp		m = flags & M_PKTHDR ?
1483265425Snp		    m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
1484265425Snp		if (m == NULL)
1485255050Snp			return (NULL);
1486265425Snp		fl->mbuf_allocated++;
1487255050Snp#ifdef T4_PKT_TIMESTAMP
1488265425Snp		/* Leave room for a timestamp */
1489265425Snp		m->m_data += 8;
1490255050Snp#endif
1491265425Snp		/* copy data to mbuf */
1492265425Snp		bcopy(payload, mtod(m, caddr_t), len);
1493255050Snp
1494265425Snp	} else if (sd->nmbuf * MSIZE < cll->region1) {
1495255050Snp
1496265425Snp		/*
1497265425Snp		 * There's spare room in the cluster for an mbuf.  Create one
1498265425Snp		 * and associate it with the payload that's in the cluster too.
1499265425Snp		 */
1500255050Snp
1501265425Snp		MPASS(clm != NULL);
1502265425Snp		m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE);
1503265425Snp		/* No bzero required */
1504265425Snp		if (m_init(m, NULL, 0, M_NOWAIT, MT_DATA, flags | M_NOFREE))
1505265425Snp			return (NULL);
1506265425Snp		fl->mbuf_inlined++;
1507265425Snp		m_extaddref(m, payload, padded_len, &clm->refcount, rxb_free,
1508265425Snp		    swz->zone, sd->cl);
1509265425Snp		sd->nmbuf++;
1510255050Snp
1511265425Snp	} else {
1512255050Snp
1513265425Snp		/*
1514265425Snp		 * Grab an mbuf from zone_mbuf and associate it with the
1515265425Snp		 * payload in the cluster.
1516265425Snp		 */
1517255050Snp
1518265425Snp		m = flags & M_PKTHDR ?
1519265425Snp		    m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
1520265425Snp		if (m == NULL)
1521265425Snp			return (NULL);
1522265425Snp		fl->mbuf_allocated++;
1523265425Snp		if (clm != NULL)
1524265425Snp			m_extaddref(m, payload, padded_len, &clm->refcount,
1525265425Snp			    rxb_free, swz->zone, sd->cl);
1526265425Snp		else {
1527265425Snp			m_cljset(m, sd->cl, swz->type);
1528265425Snp			sd->cl = NULL;	/* consumed, not a recycle candidate */
1529255050Snp		}
1530255050Snp	}
1531265425Snp	if (flags & M_PKTHDR)
1532265425Snp		m->m_pkthdr.len = total;
1533265425Snp	m->m_len = len;
1534255050Snp
1535265425Snp	if (fl->flags & FL_BUF_PACKING) {
1536265425Snp		fl->rx_offset += roundup2(padded_len, sc->sge.pack_boundary);
1537265425Snp		MPASS(fl->rx_offset <= hwb->size);
1538265425Snp		if (fl->rx_offset < hwb->size)
1539265425Snp			return (m);	/* without advancing the cidx */
1540265425Snp	}
1541255050Snp
1542265425Snp	if (__predict_false(++fl->cidx == fl->cap))
1543265425Snp		fl->cidx = 0;
1544265425Snp	fl->rx_offset = 0;
1545255050Snp
1546265425Snp	return (m);
1547255050Snp}
1548255050Snp
1549255050Snpstatic struct mbuf *
1550265425Snpget_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
1551255050Snp    int *fl_bufs_used)
1552255050Snp{
1553265425Snp	struct mbuf *m0, *m, **pnext;
1554265425Snp	u_int nbuf, len;
1555255050Snp
1556255050Snp	/*
1557255050Snp	 * No assertion for the fl lock because we don't need it.  This routine
1558255050Snp	 * is called only from the rx interrupt handler and it only updates
1559255050Snp	 * fl->cidx.  (Contrast that with fl->pidx/fl->needed which could be
1560255050Snp	 * updated in the rx interrupt handler or the starvation helper routine.
1561255050Snp	 * That's why code that manipulates fl->pidx/fl->needed needs the fl
1562255050Snp	 * lock but this routine does not).
1563255050Snp	 */
1564255050Snp
1565265425Snp	nbuf = 0;
1566228561Snp	len = G_RSPD_LEN(len_newbuf);
1567265425Snp	if (__predict_false(fl->m0 != NULL)) {
1568266965Snp		M_ASSERTPKTHDR(fl->m0);
1569265425Snp		MPASS(len == fl->m0->m_pkthdr.len);
1570265425Snp		MPASS(fl->remaining < len);
1571218792Snp
1572265425Snp		m0 = fl->m0;
1573265425Snp		pnext = fl->pnext;
1574265425Snp		len = fl->remaining;
1575265425Snp		fl->m0 = NULL;
1576265425Snp		goto get_segment;
1577255050Snp	}
1578255050Snp
1579265425Snp	if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) {
1580265425Snp		nbuf++;
1581265425Snp		fl->rx_offset = 0;
1582265425Snp		if (__predict_false(++fl->cidx == fl->cap))
1583265425Snp			fl->cidx = 0;
1584228561Snp	}
1585218792Snp
1586265425Snp	/*
1587265425Snp	 * Payload starts at rx_offset in the current hw buffer.  Its length is
1588265425Snp	 * 'len' and it may span multiple hw buffers.
1589265425Snp	 */
1590218792Snp
1591265425Snp	m0 = get_scatter_segment(sc, fl, len, M_PKTHDR);
1592266965Snp	if (m0 == NULL)
1593266965Snp		goto done;
1594265425Snp	len -= m0->m_len;
1595265425Snp	pnext = &m0->m_next;
1596228561Snp	while (len > 0) {
1597265425Snp		nbuf++;
1598265425Snpget_segment:
1599265425Snp		MPASS(fl->rx_offset == 0);
1600265425Snp		m = get_scatter_segment(sc, fl, len, 0);
1601265425Snp		if (m == NULL) {
1602265425Snp			fl->m0 = m0;
1603265425Snp			fl->pnext = pnext;
1604265425Snp			fl->remaining = len;
1605266965Snp			m0 = NULL;
1606266965Snp			goto done;
1607218792Snp		}
1608265425Snp		*pnext = m;
1609265425Snp		pnext = &m->m_next;
1610228561Snp		len -= m->m_len;
1611265425Snp	}
1612265425Snp	*pnext = NULL;
1613265425Snp	if (fl->rx_offset == 0)
1614228561Snp		nbuf++;
1615266965Snpdone:
1616228561Snp	(*fl_bufs_used) += nbuf;
1617228561Snp	return (m0);
1618228561Snp}
1619218792Snp
1620228561Snpstatic int
1621228561Snpt4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
1622228561Snp{
1623237463Snp	struct sge_rxq *rxq = iq_to_rxq(iq);
1624228561Snp	struct ifnet *ifp = rxq->ifp;
1625228561Snp	const struct cpl_rx_pkt *cpl = (const void *)(rss + 1);
1626237819Snp#if defined(INET) || defined(INET6)
1627228561Snp	struct lro_ctrl *lro = &rxq->lro;
1628228561Snp#endif
1629219290Snp
1630228561Snp	KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__,
1631228561Snp	    rss->opcode));
1632219290Snp
1633239258Snp	m0->m_pkthdr.len -= fl_pktshift;
1634239258Snp	m0->m_len -= fl_pktshift;
1635239258Snp	m0->m_data += fl_pktshift;
1636219290Snp
1637228561Snp	m0->m_pkthdr.rcvif = ifp;
1638228561Snp	m0->m_flags |= M_FLOWID;
1639259142Snp	m0->m_pkthdr.flowid = be32toh(rss->hash_val);
1640219290Snp
1641237799Snp	if (cpl->csum_calc && !cpl->err_vec) {
1642237799Snp		if (ifp->if_capenable & IFCAP_RXCSUM &&
1643237799Snp		    cpl->l2info & htobe32(F_RXF_IP)) {
1644237831Snp			m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
1645237799Snp			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1646237799Snp			rxq->rxcsum++;
1647237799Snp		} else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
1648237799Snp		    cpl->l2info & htobe32(F_RXF_IP6)) {
1649237831Snp			m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
1650237799Snp			    CSUM_PSEUDO_HDR);
1651237799Snp			rxq->rxcsum++;
1652237799Snp		}
1653237799Snp
1654237799Snp		if (__predict_false(cpl->ip_frag))
1655228561Snp			m0->m_pkthdr.csum_data = be16toh(cpl->csum);
1656228561Snp		else
1657228561Snp			m0->m_pkthdr.csum_data = 0xffff;
1658228561Snp	}
1659219290Snp
1660228561Snp	if (cpl->vlan_ex) {
1661228561Snp		m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
1662228561Snp		m0->m_flags |= M_VLANTAG;
1663228561Snp		rxq->vlan_extraction++;
1664228561Snp	}
1665219290Snp
1666237819Snp#if defined(INET) || defined(INET6)
1667228561Snp	if (cpl->l2info & htobe32(F_RXF_LRO) &&
1668228561Snp	    iq->flags & IQ_LRO_ENABLED &&
1669228561Snp	    tcp_lro_rx(lro, m0, 0) == 0) {
1670228561Snp		/* queued for LRO */
1671228561Snp	} else
1672218792Snp#endif
1673228561Snp	ifp->if_input(ifp, m0);
1674218792Snp
1675228561Snp	return (0);
1676228561Snp}
1677218792Snp
1678228561Snp/*
1679228561Snp * Doesn't fail.  Holds on to work requests it can't send right away.
1680228561Snp */
1681237263Snpvoid
1682237263Snpt4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr)
1683228561Snp{
1684228561Snp	struct sge_eq *eq = &wrq->eq;
1685228561Snp	int can_reclaim;
1686228561Snp	caddr_t dst;
1687228561Snp
1688228561Snp	TXQ_LOCK_ASSERT_OWNED(wrq);
1689237263Snp#ifdef TCP_OFFLOAD
1690228561Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD ||
1691228561Snp	    (eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1692228561Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1693237263Snp#else
1694237263Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1695237263Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1696237263Snp#endif
1697228561Snp
1698237263Snp	if (__predict_true(wr != NULL))
1699237263Snp		STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
1700218792Snp
1701228561Snp	can_reclaim = reclaimable(eq);
1702228561Snp	if (__predict_false(eq->flags & EQ_STALLED)) {
1703228561Snp		if (can_reclaim < tx_resume_threshold(eq))
1704237263Snp			return;
1705228561Snp		eq->flags &= ~EQ_STALLED;
1706228561Snp		eq->unstalled++;
1707218792Snp	}
1708228561Snp	eq->cidx += can_reclaim;
1709228561Snp	eq->avail += can_reclaim;
1710228561Snp	if (__predict_false(eq->cidx >= eq->cap))
1711228561Snp		eq->cidx -= eq->cap;
1712228561Snp
1713237263Snp	while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) {
1714228561Snp		int ndesc;
1715228561Snp
1716237263Snp		if (__predict_false(wr->wr_len < 0 ||
1717237263Snp		    wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) {
1718228561Snp
1719228561Snp#ifdef INVARIANTS
1720237263Snp			panic("%s: work request with length %d", __func__,
1721237263Snp			    wr->wr_len);
1722237263Snp#endif
1723237263Snp#ifdef KDB
1724237263Snp			kdb_backtrace();
1725237263Snp#endif
1726237263Snp			log(LOG_ERR, "%s: %s work request with length %d",
1727237263Snp			    device_get_nameunit(sc->dev), __func__, wr->wr_len);
1728237263Snp			STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1729237263Snp			free_wrqe(wr);
1730228561Snp			continue;
1731228561Snp		}
1732218792Snp
1733237263Snp		ndesc = howmany(wr->wr_len, EQ_ESIZE);
1734228561Snp		if (eq->avail < ndesc) {
1735228561Snp			wrq->no_desc++;
1736228561Snp			break;
1737228561Snp		}
1738218792Snp
1739228561Snp		dst = (void *)&eq->desc[eq->pidx];
1740237263Snp		copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len);
1741218792Snp
1742228561Snp		eq->pidx += ndesc;
1743228561Snp		eq->avail -= ndesc;
1744228561Snp		if (__predict_false(eq->pidx >= eq->cap))
1745228561Snp			eq->pidx -= eq->cap;
1746228561Snp
1747228561Snp		eq->pending += ndesc;
1748252715Snp		if (eq->pending >= 8)
1749228561Snp			ring_eq_db(sc, eq);
1750228561Snp
1751228561Snp		wrq->tx_wrs++;
1752237263Snp		STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1753237263Snp		free_wrqe(wr);
1754228561Snp
1755228561Snp		if (eq->avail < 8) {
1756228561Snp			can_reclaim = reclaimable(eq);
1757228561Snp			eq->cidx += can_reclaim;
1758228561Snp			eq->avail += can_reclaim;
1759228561Snp			if (__predict_false(eq->cidx >= eq->cap))
1760228561Snp				eq->cidx -= eq->cap;
1761228561Snp		}
1762228561Snp	}
1763228561Snp
1764228561Snp	if (eq->pending)
1765228561Snp		ring_eq_db(sc, eq);
1766228561Snp
1767237263Snp	if (wr != NULL) {
1768228561Snp		eq->flags |= EQ_STALLED;
1769228561Snp		if (callout_pending(&eq->tx_callout) == 0)
1770228561Snp			callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1771228561Snp	}
1772220873Snp}
1773220873Snp
1774218792Snp/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
1775218792Snp#define TXPKTS_PKT_HDR ((\
1776218792Snp    sizeof(struct ulp_txpkt) + \
1777218792Snp    sizeof(struct ulptx_idata) + \
1778218792Snp    sizeof(struct cpl_tx_pkt_core) \
1779218792Snp    ) / 8)
1780218792Snp
1781218792Snp/* Header of a coalesced tx WR, before SGL of first packet (in flits) */
1782218792Snp#define TXPKTS_WR_HDR (\
1783218792Snp    sizeof(struct fw_eth_tx_pkts_wr) / 8 + \
1784218792Snp    TXPKTS_PKT_HDR)
1785218792Snp
1786218792Snp/* Header of a tx WR, before SGL of first packet (in flits) */
1787218792Snp#define TXPKT_WR_HDR ((\
1788218792Snp    sizeof(struct fw_eth_tx_pkt_wr) + \
1789218792Snp    sizeof(struct cpl_tx_pkt_core) \
1790218792Snp    ) / 8 )
1791218792Snp
1792218792Snp/* Header of a tx LSO WR, before SGL of first packet (in flits) */
1793218792Snp#define TXPKT_LSO_WR_HDR ((\
1794218792Snp    sizeof(struct fw_eth_tx_pkt_wr) + \
1795237436Snp    sizeof(struct cpl_tx_pkt_lso_core) + \
1796218792Snp    sizeof(struct cpl_tx_pkt_core) \
1797218792Snp    ) / 8 )
1798218792Snp
1799218792Snpint
1800218792Snpt4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
1801218792Snp{
1802218792Snp	struct port_info *pi = (void *)ifp->if_softc;
1803218792Snp	struct adapter *sc = pi->adapter;
1804218792Snp	struct sge_eq *eq = &txq->eq;
1805220873Snp	struct buf_ring *br = txq->br;
1806218792Snp	struct mbuf *next;
1807219292Snp	int rc, coalescing, can_reclaim;
1808218792Snp	struct txpkts txpkts;
1809218792Snp	struct sgl sgl;
1810218792Snp
1811218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
1812218792Snp	KASSERT(m, ("%s: called with nothing to do.", __func__));
1813228561Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH,
1814228561Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1815218792Snp
1816219292Snp	prefetch(&eq->desc[eq->pidx]);
1817220873Snp	prefetch(&txq->sdesc[eq->pidx]);
1818219292Snp
1819218792Snp	txpkts.npkt = 0;/* indicates there's nothing in txpkts */
1820218792Snp	coalescing = 0;
1821218792Snp
1822228561Snp	can_reclaim = reclaimable(eq);
1823228561Snp	if (__predict_false(eq->flags & EQ_STALLED)) {
1824228561Snp		if (can_reclaim < tx_resume_threshold(eq)) {
1825228561Snp			txq->m = m;
1826228561Snp			return (0);
1827228561Snp		}
1828228561Snp		eq->flags &= ~EQ_STALLED;
1829228561Snp		eq->unstalled++;
1830228561Snp	}
1831218792Snp
1832228561Snp	if (__predict_false(eq->flags & EQ_DOOMED)) {
1833228561Snp		m_freem(m);
1834228561Snp		while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
1835228561Snp			m_freem(m);
1836228561Snp		return (ENETDOWN);
1837228561Snp	}
1838228561Snp
1839228561Snp	if (eq->avail < 8 && can_reclaim)
1840228561Snp		reclaim_tx_descs(txq, can_reclaim, 32);
1841228561Snp
1842218792Snp	for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
1843218792Snp
1844218792Snp		if (eq->avail < 8)
1845218792Snp			break;
1846218792Snp
1847218792Snp		next = m->m_nextpkt;
1848218792Snp		m->m_nextpkt = NULL;
1849218792Snp
1850218792Snp		if (next || buf_ring_peek(br))
1851218792Snp			coalescing = 1;
1852218792Snp
1853218792Snp		rc = get_pkt_sgl(txq, &m, &sgl, coalescing);
1854218792Snp		if (rc != 0) {
1855218792Snp			if (rc == ENOMEM) {
1856218792Snp
1857218792Snp				/* Short of resources, suspend tx */
1858218792Snp
1859218792Snp				m->m_nextpkt = next;
1860218792Snp				break;
1861218792Snp			}
1862218792Snp
1863218792Snp			/*
1864218792Snp			 * Unrecoverable error for this packet, throw it away
1865218792Snp			 * and move on to the next.  get_pkt_sgl may already
1866218792Snp			 * have freed m (it will be NULL in that case and the
1867218792Snp			 * m_freem here is still safe).
1868218792Snp			 */
1869218792Snp
1870218792Snp			m_freem(m);
1871218792Snp			continue;
1872218792Snp		}
1873218792Snp
1874218792Snp		if (coalescing &&
1875218792Snp		    add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) {
1876218792Snp
1877218792Snp			/* Successfully absorbed into txpkts */
1878218792Snp
1879218792Snp			write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl);
1880218792Snp			goto doorbell;
1881218792Snp		}
1882218792Snp
1883218792Snp		/*
1884218792Snp		 * We weren't coalescing to begin with, or current frame could
1885218792Snp		 * not be coalesced (add_to_txpkts flushes txpkts if a frame
1886218792Snp		 * given to it can't be coalesced).  Either way there should be
1887218792Snp		 * nothing in txpkts.
1888218792Snp		 */
1889218792Snp		KASSERT(txpkts.npkt == 0,
1890218792Snp		    ("%s: txpkts not empty: %d", __func__, txpkts.npkt));
1891218792Snp
1892218792Snp		/* We're sending out individual packets now */
1893218792Snp		coalescing = 0;
1894218792Snp
1895218792Snp		if (eq->avail < 8)
1896220873Snp			reclaim_tx_descs(txq, 0, 8);
1897218792Snp		rc = write_txpkt_wr(pi, txq, m, &sgl);
1898218792Snp		if (rc != 0) {
1899218792Snp
1900218792Snp			/* Short of hardware descriptors, suspend tx */
1901218792Snp
1902218792Snp			/*
1903218792Snp			 * This is an unlikely but expensive failure.  We've
1904218792Snp			 * done all the hard work (DMA mappings etc.) and now we
1905218792Snp			 * can't send out the packet.  What's worse, we have to
1906218792Snp			 * spend even more time freeing up everything in sgl.
1907218792Snp			 */
1908218792Snp			txq->no_desc++;
1909218792Snp			free_pkt_sgl(txq, &sgl);
1910218792Snp
1911218792Snp			m->m_nextpkt = next;
1912218792Snp			break;
1913218792Snp		}
1914218792Snp
1915218792Snp		ETHER_BPF_MTAP(ifp, m);
1916218792Snp		if (sgl.nsegs == 0)
1917218792Snp			m_freem(m);
1918218792Snpdoorbell:
1919252715Snp		if (eq->pending >= 8)
1920252715Snp			ring_eq_db(sc, eq);
1921219292Snp
1922219292Snp		can_reclaim = reclaimable(eq);
1923219292Snp		if (can_reclaim >= 32)
1924228561Snp			reclaim_tx_descs(txq, can_reclaim, 64);
1925218792Snp	}
1926218792Snp
1927218792Snp	if (txpkts.npkt > 0)
1928218792Snp		write_txpkts_wr(txq, &txpkts);
1929218792Snp
1930218792Snp	/*
1931218792Snp	 * m not NULL means there was an error but we haven't thrown it away.
1932218792Snp	 * This can happen when we're short of tx descriptors (no_desc) or maybe
1933218792Snp	 * even DMA maps (no_dmamap).  Either way, a credit flush and reclaim
1934218792Snp	 * will get things going again.
1935218792Snp	 */
1936228561Snp	if (m && !(eq->flags & EQ_CRFLUSHED)) {
1937220873Snp		struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
1938220873Snp
1939228561Snp		/*
1940228561Snp		 * If EQ_CRFLUSHED is not set then we know we have at least one
1941228561Snp		 * available descriptor because any WR that reduces eq->avail to
1942228561Snp		 * 0 also sets EQ_CRFLUSHED.
1943228561Snp		 */
1944228561Snp		KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__));
1945228561Snp
1946220873Snp		txsd->desc_used = 1;
1947220873Snp		txsd->credits = 0;
1948218792Snp		write_eqflush_wr(eq);
1949220873Snp	}
1950218792Snp	txq->m = m;
1951218792Snp
1952218792Snp	if (eq->pending)
1953220873Snp		ring_eq_db(sc, eq);
1954218792Snp
1955228561Snp	reclaim_tx_descs(txq, 0, 128);
1956218792Snp
1957228561Snp	if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0)
1958228561Snp		callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1959228561Snp
1960218792Snp	return (0);
1961218792Snp}
1962218792Snp
1963218792Snpvoid
1964218792Snpt4_update_fl_bufsize(struct ifnet *ifp)
1965218792Snp{
1966218792Snp	struct port_info *pi = ifp->if_softc;
1967255050Snp	struct adapter *sc = pi->adapter;
1968218792Snp	struct sge_rxq *rxq;
1969252728Snp#ifdef TCP_OFFLOAD
1970252728Snp	struct sge_ofld_rxq *ofld_rxq;
1971252728Snp#endif
1972218792Snp	struct sge_fl *fl;
1973265425Snp	int i, maxp, mtu = ifp->if_mtu;
1974218792Snp
1975265425Snp	maxp = mtu_to_max_payload(sc, mtu, 0);
1976218792Snp	for_each_rxq(pi, i, rxq) {
1977218792Snp		fl = &rxq->fl;
1978218792Snp
1979218792Snp		FL_LOCK(fl);
1980265425Snp		find_best_refill_source(sc, fl, maxp);
1981218792Snp		FL_UNLOCK(fl);
1982218792Snp	}
1983252728Snp#ifdef TCP_OFFLOAD
1984265425Snp	maxp = mtu_to_max_payload(sc, mtu, 1);
1985252728Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
1986252728Snp		fl = &ofld_rxq->fl;
1987252728Snp
1988252728Snp		FL_LOCK(fl);
1989265425Snp		find_best_refill_source(sc, fl, maxp);
1990252728Snp		FL_UNLOCK(fl);
1991252728Snp	}
1992252728Snp#endif
1993218792Snp}
1994218792Snp
1995228561Snpint
1996228561Snpcan_resume_tx(struct sge_eq *eq)
1997228561Snp{
1998228561Snp	return (reclaimable(eq) >= tx_resume_threshold(eq));
1999228561Snp}
2000228561Snp
2001218792Snpstatic inline void
2002218792Snpinit_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
2003241397Snp    int qsize, int esize)
2004218792Snp{
2005218792Snp	KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
2006218792Snp	    ("%s: bad tmr_idx %d", __func__, tmr_idx));
2007218792Snp	KASSERT(pktc_idx < SGE_NCOUNTERS,	/* -ve is ok, means don't use */
2008218792Snp	    ("%s: bad pktc_idx %d", __func__, pktc_idx));
2009218792Snp
2010218792Snp	iq->flags = 0;
2011218792Snp	iq->adapter = sc;
2012234833Snp	iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx);
2013234833Snp	iq->intr_pktc_idx = SGE_NCOUNTERS - 1;
2014234833Snp	if (pktc_idx >= 0) {
2015234833Snp		iq->intr_params |= F_QINTR_CNT_EN;
2016234833Snp		iq->intr_pktc_idx = pktc_idx;
2017234833Snp	}
2018248925Snp	iq->qsize = roundup2(qsize, 16);	/* See FW_IQ_CMD/iqsize */
2019218792Snp	iq->esize = max(esize, 16);		/* See FW_IQ_CMD/iqesize */
2020218792Snp}
2021218792Snp
2022218792Snpstatic inline void
2023265425Snpinit_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, int pack,
2024255050Snp    char *name)
2025218792Snp{
2026255050Snp
2027218792Snp	fl->qsize = qsize;
2028218792Snp	strlcpy(fl->lockname, name, sizeof(fl->lockname));
2029255050Snp	if (pack)
2030255050Snp		fl->flags |= FL_BUF_PACKING;
2031265425Snp	find_best_refill_source(sc, fl, maxp);
2032265425Snp	find_safe_refill_source(sc, fl);
2033218792Snp}
2034218792Snp
2035218792Snpstatic inline void
2036228561Snpinit_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan,
2037228561Snp    uint16_t iqid, char *name)
2038218792Snp{
2039228561Snp	KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan));
2040228561Snp	KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype));
2041228561Snp
2042228561Snp	eq->flags = eqtype & EQ_TYPEMASK;
2043228561Snp	eq->tx_chan = tx_chan;
2044228561Snp	eq->iqid = iqid;
2045220873Snp	eq->qsize = qsize;
2046220873Snp	strlcpy(eq->lockname, name, sizeof(eq->lockname));
2047228561Snp
2048228561Snp	TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq);
2049228561Snp	callout_init(&eq->tx_callout, CALLOUT_MPSAFE);
2050218792Snp}
2051218792Snp
2052218792Snpstatic int
2053218792Snpalloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
2054218792Snp    bus_dmamap_t *map, bus_addr_t *pa, void **va)
2055218792Snp{
2056218792Snp	int rc;
2057218792Snp
2058218792Snp	rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
2059218792Snp	    BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
2060218792Snp	if (rc != 0) {
2061218792Snp		device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc);
2062218792Snp		goto done;
2063218792Snp	}
2064218792Snp
2065218792Snp	rc = bus_dmamem_alloc(*tag, va,
2066218792Snp	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
2067218792Snp	if (rc != 0) {
2068218792Snp		device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc);
2069218792Snp		goto done;
2070218792Snp	}
2071218792Snp
2072218792Snp	rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
2073218792Snp	if (rc != 0) {
2074218792Snp		device_printf(sc->dev, "cannot load DMA map: %d\n", rc);
2075218792Snp		goto done;
2076218792Snp	}
2077218792Snpdone:
2078218792Snp	if (rc)
2079218792Snp		free_ring(sc, *tag, *map, *pa, *va);
2080218792Snp
2081218792Snp	return (rc);
2082218792Snp}
2083218792Snp
2084218792Snpstatic int
2085218792Snpfree_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
2086218792Snp    bus_addr_t pa, void *va)
2087218792Snp{
2088218792Snp	if (pa)
2089218792Snp		bus_dmamap_unload(tag, map);
2090218792Snp	if (va)
2091218792Snp		bus_dmamem_free(tag, va, map);
2092218792Snp	if (tag)
2093218792Snp		bus_dma_tag_destroy(tag);
2094218792Snp
2095218792Snp	return (0);
2096218792Snp}
2097218792Snp
2098218792Snp/*
2099218792Snp * Allocates the ring for an ingress queue and an optional freelist.  If the
2100218792Snp * freelist is specified it will be allocated and then associated with the
2101218792Snp * ingress queue.
2102218792Snp *
2103218792Snp * Returns errno on failure.  Resources allocated up to that point may still be
2104218792Snp * allocated.  Caller is responsible for cleanup in case this function fails.
2105218792Snp *
2106228561Snp * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then
2107218792Snp * the intr_idx specifies the vector, starting from 0.  Otherwise it specifies
2108228561Snp * the abs_id of the ingress queue to which its interrupts should be forwarded.
2109218792Snp */
2110218792Snpstatic int
2111218792Snpalloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
2112222085Snp    int intr_idx, int cong)
2113218792Snp{
2114218792Snp	int rc, i, cntxt_id;
2115218792Snp	size_t len;
2116218792Snp	struct fw_iq_cmd c;
2117218792Snp	struct adapter *sc = iq->adapter;
2118218792Snp	__be32 v = 0;
2119218792Snp
2120218792Snp	len = iq->qsize * iq->esize;
2121218792Snp	rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
2122218792Snp	    (void **)&iq->desc);
2123218792Snp	if (rc != 0)
2124218792Snp		return (rc);
2125218792Snp
2126218792Snp	bzero(&c, sizeof(c));
2127218792Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
2128218792Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
2129218792Snp	    V_FW_IQ_CMD_VFN(0));
2130218792Snp
2131218792Snp	c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
2132218792Snp	    FW_LEN16(c));
2133218792Snp
2134218792Snp	/* Special handling for firmware event queue */
2135218792Snp	if (iq == &sc->sge.fwq)
2136218792Snp		v |= F_FW_IQ_CMD_IQASYNCH;
2137218792Snp
2138228561Snp	if (iq->flags & IQ_INTR) {
2139218792Snp		KASSERT(intr_idx < sc->intr_count,
2140218792Snp		    ("%s: invalid direct intr_idx %d", __func__, intr_idx));
2141228561Snp	} else
2142228561Snp		v |= F_FW_IQ_CMD_IQANDST;
2143228561Snp	v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
2144218792Snp
2145218792Snp	c.type_to_iqandstindex = htobe32(v |
2146218792Snp	    V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
2147218792Snp	    V_FW_IQ_CMD_VIID(pi->viid) |
2148218792Snp	    V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
2149218792Snp	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
2150218792Snp	    F_FW_IQ_CMD_IQGTSMODE |
2151218792Snp	    V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
2152218792Snp	    V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
2153218792Snp	c.iqsize = htobe16(iq->qsize);
2154218792Snp	c.iqaddr = htobe64(iq->ba);
2155222085Snp	if (cong >= 0)
2156222085Snp		c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
2157218792Snp
2158218792Snp	if (fl) {
2159218792Snp		mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
2160218792Snp
2161218792Snp		len = fl->qsize * RX_FL_ESIZE;
2162218792Snp		rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
2163218792Snp		    &fl->ba, (void **)&fl->desc);
2164218792Snp		if (rc)
2165218792Snp			return (rc);
2166218792Snp
2167218792Snp		/* Allocate space for one software descriptor per buffer. */
2168237512Snp		fl->cap = (fl->qsize - spg_len / RX_FL_ESIZE) * 8;
2169218792Snp		rc = alloc_fl_sdesc(fl);
2170218792Snp		if (rc != 0) {
2171218792Snp			device_printf(sc->dev,
2172218792Snp			    "failed to setup fl software descriptors: %d\n",
2173218792Snp			    rc);
2174218792Snp			return (rc);
2175218792Snp		}
2176220905Snp		fl->needed = fl->cap;
2177265410Snp		fl->lowat = fl->flags & FL_BUF_PACKING ?
2178265410Snp		    roundup2(sc->sge.fl_starve_threshold2, 8) :
2179265410Snp		    roundup2(sc->sge.fl_starve_threshold, 8);
2180218792Snp
2181228491Snp		c.iqns_to_fl0congen |=
2182222085Snp		    htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
2183222085Snp			F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
2184255050Snp			(fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) |
2185255050Snp			(fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN :
2186255050Snp			    0));
2187222085Snp		if (cong >= 0) {
2188222085Snp			c.iqns_to_fl0congen |=
2189222085Snp				htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
2190222085Snp				    F_FW_IQ_CMD_FL0CONGCIF |
2191222085Snp				    F_FW_IQ_CMD_FL0CONGEN);
2192222085Snp		}
2193218792Snp		c.fl0dcaen_to_fl0cidxfthresh =
2194218792Snp		    htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
2195218792Snp			V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
2196218792Snp		c.fl0size = htobe16(fl->qsize);
2197218792Snp		c.fl0addr = htobe64(fl->ba);
2198218792Snp	}
2199218792Snp
2200218792Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2201218792Snp	if (rc != 0) {
2202218792Snp		device_printf(sc->dev,
2203218792Snp		    "failed to create ingress queue: %d\n", rc);
2204218792Snp		return (rc);
2205218792Snp	}
2206218792Snp
2207218792Snp	iq->cdesc = iq->desc;
2208218792Snp	iq->cidx = 0;
2209218792Snp	iq->gen = 1;
2210218792Snp	iq->intr_next = iq->intr_params;
2211218792Snp	iq->cntxt_id = be16toh(c.iqid);
2212218792Snp	iq->abs_id = be16toh(c.physiqid);
2213228561Snp	iq->flags |= IQ_ALLOCATED;
2214218792Snp
2215218792Snp	cntxt_id = iq->cntxt_id - sc->sge.iq_start;
2216228561Snp	if (cntxt_id >= sc->sge.niq) {
2217228561Snp		panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
2218228561Snp		    cntxt_id, sc->sge.niq - 1);
2219228561Snp	}
2220218792Snp	sc->sge.iqmap[cntxt_id] = iq;
2221218792Snp
2222218792Snp	if (fl) {
2223218792Snp		fl->cntxt_id = be16toh(c.fl0id);
2224218792Snp		fl->pidx = fl->cidx = 0;
2225218792Snp
2226219883Snp		cntxt_id = fl->cntxt_id - sc->sge.eq_start;
2227228561Snp		if (cntxt_id >= sc->sge.neq) {
2228228561Snp			panic("%s: fl->cntxt_id (%d) more than the max (%d)",
2229228561Snp			    __func__, cntxt_id, sc->sge.neq - 1);
2230228561Snp		}
2231218792Snp		sc->sge.eqmap[cntxt_id] = (void *)fl;
2232218792Snp
2233218792Snp		FL_LOCK(fl);
2234228561Snp		/* Enough to make sure the SGE doesn't think it's starved */
2235228561Snp		refill_fl(sc, fl, fl->lowat);
2236218792Snp		FL_UNLOCK(fl);
2237228561Snp
2238228561Snp		iq->flags |= IQ_HAS_FL;
2239218792Snp	}
2240218792Snp
2241253873Snp	if (is_t5(sc) && cong >= 0) {
2242253873Snp		uint32_t param, val;
2243253873Snp
2244253873Snp		param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
2245253873Snp		    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
2246253873Snp		    V_FW_PARAMS_PARAM_YZ(iq->cntxt_id);
2247253889Snp		if (cong == 0)
2248253889Snp			val = 1 << 19;
2249253889Snp		else {
2250253889Snp			val = 2 << 19;
2251253889Snp			for (i = 0; i < 4; i++) {
2252253889Snp				if (cong & (1 << i))
2253253889Snp					val |= 1 << (i << 2);
2254253889Snp			}
2255253889Snp		}
2256253889Snp
2257253873Snp		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2258253873Snp		if (rc != 0) {
2259253873Snp			/* report error but carry on */
2260253873Snp			device_printf(sc->dev,
2261253873Snp			    "failed to set congestion manager context for "
2262253873Snp			    "ingress queue %d: %d\n", iq->cntxt_id, rc);
2263253873Snp		}
2264253873Snp	}
2265253873Snp
2266218792Snp	/* Enable IQ interrupts */
2267228561Snp	atomic_store_rel_int(&iq->state, IQS_IDLE);
2268218792Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
2269218792Snp	    V_INGRESSQID(iq->cntxt_id));
2270218792Snp
2271218792Snp	return (0);
2272218792Snp}
2273218792Snp
2274218792Snpstatic int
2275218792Snpfree_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
2276218792Snp{
2277265425Snp	int rc;
2278218792Snp	struct adapter *sc = iq->adapter;
2279218792Snp	device_t dev;
2280218792Snp
2281218792Snp	if (sc == NULL)
2282218792Snp		return (0);	/* nothing to do */
2283218792Snp
2284218792Snp	dev = pi ? pi->dev : sc->dev;
2285218792Snp
2286218792Snp	if (iq->flags & IQ_ALLOCATED) {
2287218792Snp		rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
2288218792Snp		    FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
2289218792Snp		    fl ? fl->cntxt_id : 0xffff, 0xffff);
2290218792Snp		if (rc != 0) {
2291218792Snp			device_printf(dev,
2292218792Snp			    "failed to free queue %p: %d\n", iq, rc);
2293218792Snp			return (rc);
2294218792Snp		}
2295218792Snp		iq->flags &= ~IQ_ALLOCATED;
2296218792Snp	}
2297218792Snp
2298218792Snp	free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
2299218792Snp
2300218792Snp	bzero(iq, sizeof(*iq));
2301218792Snp
2302218792Snp	if (fl) {
2303218792Snp		free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba,
2304218792Snp		    fl->desc);
2305218792Snp
2306254727Snp		if (fl->sdesc)
2307255050Snp			free_fl_sdesc(sc, fl);
2308218792Snp
2309218792Snp		if (mtx_initialized(&fl->fl_lock))
2310218792Snp			mtx_destroy(&fl->fl_lock);
2311218792Snp
2312218792Snp		bzero(fl, sizeof(*fl));
2313218792Snp	}
2314218792Snp
2315218792Snp	return (0);
2316218792Snp}
2317218792Snp
2318265425Snpstatic void
2319265425Snpadd_fl_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
2320265425Snp    struct sge_fl *fl)
2321265425Snp{
2322265425Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2323265425Snp
2324265425Snp	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL,
2325265425Snp	    "freelist");
2326265425Snp	children = SYSCTL_CHILDREN(oid);
2327265425Snp
2328265425Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id",
2329265425Snp	    CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I",
2330265425Snp	    "SGE context id of the freelist");
2331265425Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx,
2332265425Snp	    0, "consumer index");
2333265425Snp	if (fl->flags & FL_BUF_PACKING) {
2334265425Snp		SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset",
2335265425Snp		    CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset");
2336265425Snp	}
2337265425Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx,
2338265425Snp	    0, "producer index");
2339265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated",
2340265425Snp	    CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated");
2341265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined",
2342265425Snp	    CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters");
2343265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated",
2344265425Snp	    CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated");
2345265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled",
2346265425Snp	    CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled");
2347265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled",
2348265425Snp	    CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)");
2349265425Snp}
2350265425Snp
2351218792Snpstatic int
2352228561Snpalloc_fwq(struct adapter *sc)
2353218792Snp{
2354228561Snp	int rc, intr_idx;
2355228561Snp	struct sge_iq *fwq = &sc->sge.fwq;
2356228561Snp	struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
2357228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2358222510Snp
2359241397Snp	init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE);
2360228561Snp	fwq->flags |= IQ_INTR;	/* always */
2361228561Snp	intr_idx = sc->intr_count > 1 ? 1 : 0;
2362228561Snp	rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
2363228561Snp	if (rc != 0) {
2364228561Snp		device_printf(sc->dev,
2365228561Snp		    "failed to create firmware event queue: %d\n", rc);
2366222510Snp		return (rc);
2367228561Snp	}
2368222510Snp
2369228561Snp	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD,
2370228561Snp	    NULL, "firmware event queue");
2371222510Snp	children = SYSCTL_CHILDREN(oid);
2372222510Snp
2373228561Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id",
2374228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I",
2375228561Snp	    "absolute id of the queue");
2376228561Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id",
2377228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I",
2378228561Snp	    "SGE context id of the queue");
2379222510Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
2380228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I",
2381222510Snp	    "consumer index");
2382222510Snp
2383228561Snp	return (0);
2384218792Snp}
2385218792Snp
2386218792Snpstatic int
2387228561Snpfree_fwq(struct adapter *sc)
2388218792Snp{
2389228561Snp	return free_iq_fl(NULL, &sc->sge.fwq, NULL);
2390218792Snp}
2391218792Snp
2392218792Snpstatic int
2393228561Snpalloc_mgmtq(struct adapter *sc)
2394222510Snp{
2395222510Snp	int rc;
2396228561Snp	struct sge_wrq *mgmtq = &sc->sge.mgmtq;
2397228561Snp	char name[16];
2398228561Snp	struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
2399228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2400222510Snp
2401228561Snp	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD,
2402228561Snp	    NULL, "management queue");
2403228561Snp
2404228561Snp	snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev));
2405228561Snp	init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan,
2406228561Snp	    sc->sge.fwq.cntxt_id, name);
2407228561Snp	rc = alloc_wrq(sc, NULL, mgmtq, oid);
2408228561Snp	if (rc != 0) {
2409228561Snp		device_printf(sc->dev,
2410228561Snp		    "failed to create management queue: %d\n", rc);
2411222510Snp		return (rc);
2412228561Snp	}
2413222510Snp
2414228561Snp	return (0);
2415222510Snp}
2416222510Snp
2417222510Snpstatic int
2418228561Snpfree_mgmtq(struct adapter *sc)
2419222510Snp{
2420237263Snp
2421228561Snp	return free_wrq(sc, &sc->sge.mgmtq);
2422222510Snp}
2423222510Snp
2424239258Snpstatic inline int
2425239258Snptnl_cong(struct port_info *pi)
2426239258Snp{
2427239258Snp
2428239258Snp	if (cong_drop == -1)
2429239258Snp		return (-1);
2430239258Snp	else if (cong_drop == 1)
2431239258Snp		return (0);
2432239258Snp	else
2433265410Snp		return (pi->rx_chan_map);
2434239258Snp}
2435239258Snp
2436222510Snpstatic int
2437228561Snpalloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx,
2438228561Snp    struct sysctl_oid *oid)
2439218792Snp{
2440218792Snp	int rc;
2441218792Snp	struct sysctl_oid_list *children;
2442218792Snp	char name[16];
2443218792Snp
2444239258Snp	rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(pi));
2445218792Snp	if (rc != 0)
2446218792Snp		return (rc);
2447218792Snp
2448222701Snp	FL_LOCK(&rxq->fl);
2449228561Snp	refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8);
2450222701Snp	FL_UNLOCK(&rxq->fl);
2451222701Snp
2452237819Snp#if defined(INET) || defined(INET6)
2453218792Snp	rc = tcp_lro_init(&rxq->lro);
2454218792Snp	if (rc != 0)
2455218792Snp		return (rc);
2456218792Snp	rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */
2457218792Snp
2458218792Snp	if (pi->ifp->if_capenable & IFCAP_LRO)
2459228561Snp		rxq->iq.flags |= IQ_LRO_ENABLED;
2460218792Snp#endif
2461219289Snp	rxq->ifp = pi->ifp;
2462218792Snp
2463228561Snp	children = SYSCTL_CHILDREN(oid);
2464218792Snp
2465218792Snp	snprintf(name, sizeof(name), "%d", idx);
2466218792Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2467218792Snp	    NULL, "rx queue");
2468218792Snp	children = SYSCTL_CHILDREN(oid);
2469218792Snp
2470221911Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
2471222510Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I",
2472221911Snp	    "absolute id of the queue");
2473222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2474222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I",
2475222973Snp	    "SGE context id of the queue");
2476222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2477222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I",
2478222973Snp	    "consumer index");
2479237819Snp#if defined(INET) || defined(INET6)
2480218792Snp	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
2481218792Snp	    &rxq->lro.lro_queued, 0, NULL);
2482218792Snp	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
2483218792Snp	    &rxq->lro.lro_flushed, 0, NULL);
2484219290Snp#endif
2485218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
2486218792Snp	    &rxq->rxcsum, "# of times hardware assisted with checksum");
2487218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction",
2488218792Snp	    CTLFLAG_RD, &rxq->vlan_extraction,
2489218792Snp	    "# of times hardware extracted 802.1Q tag");
2490218792Snp
2491265425Snp	add_fl_sysctls(&pi->ctx, oid, &rxq->fl);
2492222973Snp
2493218792Snp	return (rc);
2494218792Snp}
2495218792Snp
2496218792Snpstatic int
2497218792Snpfree_rxq(struct port_info *pi, struct sge_rxq *rxq)
2498218792Snp{
2499218792Snp	int rc;
2500218792Snp
2501237819Snp#if defined(INET) || defined(INET6)
2502218792Snp	if (rxq->lro.ifp) {
2503218792Snp		tcp_lro_free(&rxq->lro);
2504218792Snp		rxq->lro.ifp = NULL;
2505218792Snp	}
2506218792Snp#endif
2507218792Snp
2508218792Snp	rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
2509218792Snp	if (rc == 0)
2510218792Snp		bzero(rxq, sizeof(*rxq));
2511218792Snp
2512218792Snp	return (rc);
2513218792Snp}
2514218792Snp
2515237263Snp#ifdef TCP_OFFLOAD
2516218792Snpstatic int
2517228561Snpalloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
2518228561Snp    int intr_idx, int idx, struct sysctl_oid *oid)
2519220873Snp{
2520228561Snp	int rc;
2521228561Snp	struct sysctl_oid_list *children;
2522220873Snp	char name[16];
2523220873Snp
2524228561Snp	rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx,
2525265410Snp	    pi->rx_chan_map);
2526228561Snp	if (rc != 0)
2527220873Snp		return (rc);
2528220873Snp
2529228561Snp	children = SYSCTL_CHILDREN(oid);
2530220873Snp
2531228561Snp	snprintf(name, sizeof(name), "%d", idx);
2532228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2533228561Snp	    NULL, "rx queue");
2534228561Snp	children = SYSCTL_CHILDREN(oid);
2535228561Snp
2536228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
2537228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16,
2538228561Snp	    "I", "absolute id of the queue");
2539228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2540228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16,
2541228561Snp	    "I", "SGE context id of the queue");
2542228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2543228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I",
2544228561Snp	    "consumer index");
2545228561Snp
2546265425Snp	add_fl_sysctls(&pi->ctx, oid, &ofld_rxq->fl);
2547228561Snp
2548228561Snp	return (rc);
2549228561Snp}
2550228561Snp
2551228561Snpstatic int
2552228561Snpfree_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq)
2553228561Snp{
2554228561Snp	int rc;
2555228561Snp
2556228561Snp	rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl);
2557228561Snp	if (rc == 0)
2558228561Snp		bzero(ofld_rxq, sizeof(*ofld_rxq));
2559228561Snp
2560228561Snp	return (rc);
2561228561Snp}
2562228561Snp#endif
2563228561Snp
2564228561Snpstatic int
2565228561Snpctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
2566228561Snp{
2567228561Snp	int rc, cntxt_id;
2568228561Snp	struct fw_eq_ctrl_cmd c;
2569228561Snp
2570220873Snp	bzero(&c, sizeof(c));
2571220873Snp
2572220873Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
2573220873Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
2574220873Snp	    V_FW_EQ_CTRL_CMD_VFN(0));
2575220873Snp	c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
2576220873Snp	    F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
2577220873Snp	c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */
2578220873Snp	c.physeqid_pkd = htobe32(0);
2579220873Snp	c.fetchszm_to_iqid =
2580220873Snp	    htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2581228561Snp		V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
2582222510Snp		F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
2583220873Snp	c.dcaen_to_eqsize =
2584220873Snp	    htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2585220873Snp		V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2586220873Snp		V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2587220873Snp		V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
2588220873Snp	c.eqaddr = htobe64(eq->ba);
2589220873Snp
2590220873Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2591220873Snp	if (rc != 0) {
2592220873Snp		device_printf(sc->dev,
2593228561Snp		    "failed to create control queue %d: %d\n", eq->tx_chan, rc);
2594220873Snp		return (rc);
2595220873Snp	}
2596228561Snp	eq->flags |= EQ_ALLOCATED;
2597220873Snp
2598220873Snp	eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
2599228561Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2600228561Snp	if (cntxt_id >= sc->sge.neq)
2601228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2602228561Snp		cntxt_id, sc->sge.neq - 1);
2603228561Snp	sc->sge.eqmap[cntxt_id] = eq;
2604220873Snp
2605228561Snp	return (rc);
2606228561Snp}
2607228561Snp
2608228561Snpstatic int
2609228561Snpeth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2610228561Snp{
2611228561Snp	int rc, cntxt_id;
2612228561Snp	struct fw_eq_eth_cmd c;
2613228561Snp
2614228561Snp	bzero(&c, sizeof(c));
2615228561Snp
2616228561Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
2617228561Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
2618228561Snp	    V_FW_EQ_ETH_CMD_VFN(0));
2619228561Snp	c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
2620228561Snp	    F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
2621228561Snp	c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
2622228561Snp	c.fetchszm_to_iqid =
2623228561Snp	    htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2624228561Snp		V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
2625228561Snp		V_FW_EQ_ETH_CMD_IQID(eq->iqid));
2626228561Snp	c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2627228561Snp		      V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2628228561Snp		      V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2629228561Snp		      V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
2630228561Snp	c.eqaddr = htobe64(eq->ba);
2631228561Snp
2632228561Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2633228561Snp	if (rc != 0) {
2634228561Snp		device_printf(pi->dev,
2635228561Snp		    "failed to create Ethernet egress queue: %d\n", rc);
2636228561Snp		return (rc);
2637228561Snp	}
2638228561Snp	eq->flags |= EQ_ALLOCATED;
2639228561Snp
2640228561Snp	eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
2641220873Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2642228561Snp	if (cntxt_id >= sc->sge.neq)
2643228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2644228561Snp		cntxt_id, sc->sge.neq - 1);
2645220873Snp	sc->sge.eqmap[cntxt_id] = eq;
2646220873Snp
2647228561Snp	return (rc);
2648228561Snp}
2649220873Snp
2650237263Snp#ifdef TCP_OFFLOAD
2651228561Snpstatic int
2652228561Snpofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2653228561Snp{
2654228561Snp	int rc, cntxt_id;
2655228561Snp	struct fw_eq_ofld_cmd c;
2656220873Snp
2657228561Snp	bzero(&c, sizeof(c));
2658220873Snp
2659228561Snp	c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST |
2660228561Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) |
2661228561Snp	    V_FW_EQ_OFLD_CMD_VFN(0));
2662228561Snp	c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
2663228561Snp	    F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
2664228561Snp	c.fetchszm_to_iqid =
2665228561Snp		htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2666228561Snp		    V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
2667228561Snp		    F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
2668228561Snp	c.dcaen_to_eqsize =
2669228561Snp	    htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2670228561Snp		V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2671228561Snp		V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2672228561Snp		V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize));
2673228561Snp	c.eqaddr = htobe64(eq->ba);
2674228561Snp
2675228561Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2676228561Snp	if (rc != 0) {
2677228561Snp		device_printf(pi->dev,
2678228561Snp		    "failed to create egress queue for TCP offload: %d\n", rc);
2679228561Snp		return (rc);
2680228561Snp	}
2681228561Snp	eq->flags |= EQ_ALLOCATED;
2682228561Snp
2683228561Snp	eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd));
2684228561Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2685228561Snp	if (cntxt_id >= sc->sge.neq)
2686228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2687228561Snp		cntxt_id, sc->sge.neq - 1);
2688228561Snp	sc->sge.eqmap[cntxt_id] = eq;
2689228561Snp
2690220873Snp	return (rc);
2691220873Snp}
2692228561Snp#endif
2693220873Snp
2694220873Snpstatic int
2695228561Snpalloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2696220873Snp{
2697220873Snp	int rc;
2698228561Snp	size_t len;
2699220873Snp
2700228561Snp	mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
2701228561Snp
2702228561Snp	len = eq->qsize * EQ_ESIZE;
2703228561Snp	rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
2704228561Snp	    &eq->ba, (void **)&eq->desc);
2705228561Snp	if (rc)
2706228561Snp		return (rc);
2707228561Snp
2708237512Snp	eq->cap = eq->qsize - spg_len / EQ_ESIZE;
2709228561Snp	eq->spg = (void *)&eq->desc[eq->cap];
2710228561Snp	eq->avail = eq->cap - 1;	/* one less to avoid cidx = pidx */
2711228561Snp	eq->pidx = eq->cidx = 0;
2712248925Snp	eq->doorbells = sc->doorbells;
2713228561Snp
2714228561Snp	switch (eq->flags & EQ_TYPEMASK) {
2715228561Snp	case EQ_CTRL:
2716228561Snp		rc = ctrl_eq_alloc(sc, eq);
2717228561Snp		break;
2718228561Snp
2719228561Snp	case EQ_ETH:
2720228561Snp		rc = eth_eq_alloc(sc, pi, eq);
2721228561Snp		break;
2722228561Snp
2723237263Snp#ifdef TCP_OFFLOAD
2724228561Snp	case EQ_OFLD:
2725228561Snp		rc = ofld_eq_alloc(sc, pi, eq);
2726228561Snp		break;
2727228561Snp#endif
2728228561Snp
2729228561Snp	default:
2730228561Snp		panic("%s: invalid eq type %d.", __func__,
2731228561Snp		    eq->flags & EQ_TYPEMASK);
2732228561Snp	}
2733228561Snp	if (rc != 0) {
2734228561Snp		device_printf(sc->dev,
2735228561Snp		    "failed to allocate egress queue(%d): %d",
2736228561Snp		    eq->flags & EQ_TYPEMASK, rc);
2737228561Snp	}
2738228561Snp
2739228561Snp	eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus;
2740228561Snp
2741248925Snp	if (isset(&eq->doorbells, DOORBELL_UDB) ||
2742248925Snp	    isset(&eq->doorbells, DOORBELL_UDBWC) ||
2743249392Snp	    isset(&eq->doorbells, DOORBELL_WCWR)) {
2744256794Snp		uint32_t s_qpp = sc->sge.eq_s_qpp;
2745248925Snp		uint32_t mask = (1 << s_qpp) - 1;
2746248925Snp		volatile uint8_t *udb;
2747248925Snp
2748248925Snp		udb = sc->udbs_base + UDBS_DB_OFFSET;
2749248925Snp		udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT;	/* pg offset */
2750248925Snp		eq->udb_qid = eq->cntxt_id & mask;		/* id in page */
2751248925Snp		if (eq->udb_qid > PAGE_SIZE / UDBS_SEG_SIZE)
2752249392Snp	    		clrbit(&eq->doorbells, DOORBELL_WCWR);
2753248925Snp		else {
2754248925Snp			udb += eq->udb_qid << UDBS_SEG_SHIFT;	/* seg offset */
2755248925Snp			eq->udb_qid = 0;
2756248925Snp		}
2757248925Snp		eq->udb = (volatile void *)udb;
2758248925Snp	}
2759248925Snp
2760228561Snp	return (rc);
2761228561Snp}
2762228561Snp
2763228561Snpstatic int
2764228561Snpfree_eq(struct adapter *sc, struct sge_eq *eq)
2765228561Snp{
2766228561Snp	int rc;
2767228561Snp
2768228561Snp	if (eq->flags & EQ_ALLOCATED) {
2769228561Snp		switch (eq->flags & EQ_TYPEMASK) {
2770228561Snp		case EQ_CTRL:
2771228561Snp			rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0,
2772228561Snp			    eq->cntxt_id);
2773228561Snp			break;
2774228561Snp
2775228561Snp		case EQ_ETH:
2776228561Snp			rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0,
2777228561Snp			    eq->cntxt_id);
2778228561Snp			break;
2779228561Snp
2780237263Snp#ifdef TCP_OFFLOAD
2781228561Snp		case EQ_OFLD:
2782228561Snp			rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0,
2783228561Snp			    eq->cntxt_id);
2784228561Snp			break;
2785228561Snp#endif
2786228561Snp
2787228561Snp		default:
2788228561Snp			panic("%s: invalid eq type %d.", __func__,
2789228561Snp			    eq->flags & EQ_TYPEMASK);
2790228561Snp		}
2791220873Snp		if (rc != 0) {
2792220873Snp			device_printf(sc->dev,
2793228561Snp			    "failed to free egress queue (%d): %d\n",
2794228561Snp			    eq->flags & EQ_TYPEMASK, rc);
2795220873Snp			return (rc);
2796220873Snp		}
2797228561Snp		eq->flags &= ~EQ_ALLOCATED;
2798220873Snp	}
2799220873Snp
2800220873Snp	free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
2801220873Snp
2802220873Snp	if (mtx_initialized(&eq->eq_lock))
2803220873Snp		mtx_destroy(&eq->eq_lock);
2804220873Snp
2805228561Snp	bzero(eq, sizeof(*eq));
2806220873Snp	return (0);
2807220873Snp}
2808220873Snp
2809220873Snpstatic int
2810228561Snpalloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
2811228561Snp    struct sysctl_oid *oid)
2812218792Snp{
2813228561Snp	int rc;
2814228561Snp	struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx;
2815228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2816228561Snp
2817228561Snp	rc = alloc_eq(sc, pi, &wrq->eq);
2818228561Snp	if (rc)
2819228561Snp		return (rc);
2820228561Snp
2821228561Snp	wrq->adapter = sc;
2822237263Snp	STAILQ_INIT(&wrq->wr_list);
2823228561Snp
2824228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
2825228561Snp	    &wrq->eq.cntxt_id, 0, "SGE context id of the queue");
2826228561Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
2827228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I",
2828228561Snp	    "consumer index");
2829228561Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx",
2830228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I",
2831228561Snp	    "producer index");
2832228561Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD,
2833228561Snp	    &wrq->tx_wrs, "# of work requests");
2834228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
2835228561Snp	    &wrq->no_desc, 0,
2836228561Snp	    "# of times queue ran out of hardware descriptors");
2837228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
2838228561Snp	    &wrq->eq.unstalled, 0, "# of times queue recovered after stall");
2839228561Snp
2840228561Snp	return (rc);
2841228561Snp}
2842228561Snp
2843228561Snpstatic int
2844228561Snpfree_wrq(struct adapter *sc, struct sge_wrq *wrq)
2845228561Snp{
2846228561Snp	int rc;
2847228561Snp
2848228561Snp	rc = free_eq(sc, &wrq->eq);
2849228561Snp	if (rc)
2850228561Snp		return (rc);
2851228561Snp
2852228561Snp	bzero(wrq, sizeof(*wrq));
2853228561Snp	return (0);
2854228561Snp}
2855228561Snp
2856228561Snpstatic int
2857228561Snpalloc_txq(struct port_info *pi, struct sge_txq *txq, int idx,
2858228561Snp    struct sysctl_oid *oid)
2859228561Snp{
2860228561Snp	int rc;
2861218792Snp	struct adapter *sc = pi->adapter;
2862218792Snp	struct sge_eq *eq = &txq->eq;
2863218792Snp	char name[16];
2864228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2865218792Snp
2866228561Snp	rc = alloc_eq(sc, pi, eq);
2867218792Snp	if (rc)
2868218792Snp		return (rc);
2869218792Snp
2870228561Snp	txq->ifp = pi->ifp;
2871228561Snp
2872220873Snp	txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
2873218792Snp	    M_ZERO | M_WAITOK);
2874220873Snp	txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
2875218792Snp
2876218792Snp	rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
2877218792Snp	    BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
2878220873Snp	    BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
2879218792Snp	if (rc != 0) {
2880218792Snp		device_printf(sc->dev,
2881218792Snp		    "failed to create tx DMA tag: %d\n", rc);
2882218792Snp		return (rc);
2883218792Snp	}
2884218792Snp
2885228561Snp	/*
2886228561Snp	 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
2887228561Snp	 * limit for any WR).  txq->no_dmamap events shouldn't occur if maps is
2888228561Snp	 * sized for the worst case.
2889228561Snp	 */
2890228561Snp	rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8,
2891228561Snp	    M_WAITOK);
2892218792Snp	if (rc != 0) {
2893218792Snp		device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
2894218792Snp		return (rc);
2895218792Snp	}
2896218792Snp
2897218792Snp	snprintf(name, sizeof(name), "%d", idx);
2898218792Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2899218792Snp	    NULL, "tx queue");
2900218792Snp	children = SYSCTL_CHILDREN(oid);
2901218792Snp
2902222973Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
2903222973Snp	    &eq->cntxt_id, 0, "SGE context id of the queue");
2904222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2905222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I",
2906222973Snp	    "consumer index");
2907222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx",
2908222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I",
2909222973Snp	    "producer index");
2910222973Snp
2911218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
2912218792Snp	    &txq->txcsum, "# of times hardware assisted with checksum");
2913218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion",
2914218792Snp	    CTLFLAG_RD, &txq->vlan_insertion,
2915218792Snp	    "# of times hardware inserted 802.1Q tag");
2916218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
2917237819Snp	    &txq->tso_wrs, "# of TSO work requests");
2918218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
2919218792Snp	    &txq->imm_wrs, "# of work requests with immediate data");
2920218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
2921218792Snp	    &txq->sgl_wrs, "# of work requests with direct SGL");
2922218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
2923218792Snp	    &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
2924218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD,
2925218792Snp	    &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)");
2926218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
2927218792Snp	    &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
2928218792Snp
2929246093Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "br_drops", CTLFLAG_RD,
2930246093Snp	    &txq->br->br_drops, "# of drops in the buf_ring for this queue");
2931218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
2932218792Snp	    &txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
2933218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
2934218792Snp	    &txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
2935218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
2936228561Snp	    &eq->egr_update, 0, "egress update notifications from the SGE");
2937228561Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
2938228561Snp	    &eq->unstalled, 0, "# of times txq recovered after stall");
2939218792Snp
2940218792Snp	return (rc);
2941218792Snp}
2942218792Snp
2943218792Snpstatic int
2944218792Snpfree_txq(struct port_info *pi, struct sge_txq *txq)
2945218792Snp{
2946218792Snp	int rc;
2947218792Snp	struct adapter *sc = pi->adapter;
2948218792Snp	struct sge_eq *eq = &txq->eq;
2949218792Snp
2950228561Snp	rc = free_eq(sc, eq);
2951228561Snp	if (rc)
2952228561Snp		return (rc);
2953220649Snp
2954220873Snp	free(txq->sdesc, M_CXGBE);
2955218792Snp
2956228561Snp	if (txq->txmaps.maps)
2957228561Snp		t4_free_tx_maps(&txq->txmaps, txq->tx_tag);
2958218792Snp
2959220873Snp	buf_ring_free(txq->br, M_CXGBE);
2960218792Snp
2961220873Snp	if (txq->tx_tag)
2962220873Snp		bus_dma_tag_destroy(txq->tx_tag);
2963218792Snp
2964218792Snp	bzero(txq, sizeof(*txq));
2965218792Snp	return (0);
2966218792Snp}
2967218792Snp
2968218792Snpstatic void
2969218792Snponeseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2970218792Snp{
2971218792Snp	bus_addr_t *ba = arg;
2972218792Snp
2973218792Snp	KASSERT(nseg == 1,
2974218792Snp	    ("%s meant for single segment mappings only.", __func__));
2975218792Snp
2976218792Snp	*ba = error ? 0 : segs->ds_addr;
2977218792Snp}
2978218792Snp
2979218792Snpstatic inline bool
2980218792Snpis_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
2981218792Snp{
2982218792Snp	*ctrl = (void *)((uintptr_t)iq->cdesc +
2983218792Snp	    (iq->esize - sizeof(struct rsp_ctrl)));
2984218792Snp
2985218792Snp	return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen);
2986218792Snp}
2987218792Snp
2988218792Snpstatic inline void
2989218792Snpiq_next(struct sge_iq *iq)
2990218792Snp{
2991218792Snp	iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
2992218792Snp	if (__predict_false(++iq->cidx == iq->qsize - 1)) {
2993218792Snp		iq->cidx = 0;
2994218792Snp		iq->gen ^= 1;
2995218792Snp		iq->cdesc = iq->desc;
2996218792Snp	}
2997218792Snp}
2998218792Snp
2999220905Snp#define FL_HW_IDX(x) ((x) >> 3)
3000218792Snpstatic inline void
3001218792Snpring_fl_db(struct adapter *sc, struct sge_fl *fl)
3002218792Snp{
3003218792Snp	int ndesc = fl->pending / 8;
3004248925Snp	uint32_t v;
3005218792Snp
3006220905Snp	if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx))
3007220905Snp		ndesc--;	/* hold back one credit */
3008218792Snp
3009220905Snp	if (ndesc <= 0)
3010220905Snp		return;		/* nothing to do */
3011220905Snp
3012248925Snp	v = F_DBPRIO | V_QID(fl->cntxt_id) | V_PIDX(ndesc);
3013248925Snp	if (is_t5(sc))
3014248925Snp		v |= F_DBTYPE;
3015248925Snp
3016218792Snp	wmb();
3017218792Snp
3018248925Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), v);
3019220905Snp	fl->pending -= ndesc * 8;
3020218792Snp}
3021218792Snp
3022220905Snp/*
3023228561Snp * Fill up the freelist by upto nbufs and maybe ring its doorbell.
3024228561Snp *
3025228561Snp * Returns non-zero to indicate that it should be added to the list of starving
3026228561Snp * freelists.
3027220905Snp */
3028228561Snpstatic int
3029228561Snprefill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs)
3030218792Snp{
3031218792Snp	__be64 *d = &fl->desc[fl->pidx];
3032218792Snp	struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
3033265425Snp	uintptr_t pa;
3034218792Snp	caddr_t cl;
3035265425Snp	struct cluster_layout *cll = &fl->cll_def;	/* default layout */
3036265425Snp	struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
3037265425Snp	struct cluster_metadata *clm;
3038218792Snp
3039218792Snp	FL_LOCK_ASSERT_OWNED(fl);
3040218792Snp
3041228561Snp	if (nbufs > fl->needed)
3042218792Snp		nbufs = fl->needed;
3043265425Snp	nbufs -= (fl->pidx + nbufs) % 8;
3044218792Snp
3045218792Snp	while (nbufs--) {
3046218792Snp
3047218792Snp		if (sd->cl != NULL) {
3048218792Snp
3049265425Snp			if (sd->nmbuf == 0) {
3050255050Snp				/*
3051265425Snp				 * Fast recycle without involving any atomics on
3052265425Snp				 * the cluster's metadata (if the cluster has
3053265425Snp				 * metadata).  This happens when all frames
3054265425Snp				 * received in the cluster were small enough to
3055265425Snp				 * fit within a single mbuf each.
3056255050Snp				 */
3057265425Snp				fl->cl_fast_recycled++;
3058265425Snp				goto recycled_fast;
3059255050Snp			}
3060218792Snp
3061218792Snp			/*
3062265425Snp			 * Cluster is guaranteed to have metadata.  Clusters
3063265425Snp			 * without metadata always take the fast recycle path
3064265425Snp			 * when they're recycled.
3065218792Snp			 */
3066265425Snp			clm = cl_metadata(sc, fl, &sd->cll, sd->cl);
3067265425Snp			MPASS(clm != NULL);
3068265425Snp
3069265425Snp			if (atomic_fetchadd_int(&clm->refcount, -1) == 1) {
3070265425Snp				fl->cl_recycled++;
3071265425Snp				goto recycled;
3072218792Snp			}
3073265425Snp			sd->cl = NULL;	/* gave up my reference */
3074218792Snp		}
3075265425Snp		MPASS(sd->cl == NULL);
3076265425Snpalloc:
3077265425Snp		cl = uma_zalloc(swz->zone, M_NOWAIT);
3078265425Snp		if (__predict_false(cl == NULL)) {
3079265425Snp			if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 ||
3080265425Snp			    fl->cll_def.zidx == fl->cll_alt.zidx)
3081265425Snp				break;
3082218792Snp
3083265425Snp			/* fall back to the safe zone */
3084265425Snp			cll = &fl->cll_alt;
3085265425Snp			swz = &sc->sge.sw_zone_info[cll->zidx];
3086265425Snp			goto alloc;
3087255050Snp		}
3088265425Snp		fl->cl_allocated++;
3089218792Snp
3090265425Snp		pa = pmap_kextract((vm_offset_t)cl);
3091265425Snp		pa += cll->region1;
3092218792Snp		sd->cl = cl;
3093265425Snp		sd->cll = *cll;
3094265425Snp		*d = htobe64(pa | cll->hwidx);
3095265425Snp		clm = cl_metadata(sc, fl, cll, cl);
3096265425Snp		if (clm != NULL) {
3097265425Snprecycled:
3098218792Snp#ifdef INVARIANTS
3099265425Snp			clm->sd = sd;
3100218792Snp#endif
3101265425Snp			clm->refcount = 1;
3102265425Snp		}
3103265425Snp		sd->nmbuf = 0;
3104265425Snprecycled_fast:
3105219290Snp		fl->pending++;
3106218792Snp		fl->needed--;
3107265425Snp		d++;
3108218792Snp		sd++;
3109265425Snp		if (__predict_false(++fl->pidx == fl->cap)) {
3110218792Snp			fl->pidx = 0;
3111218792Snp			sd = fl->sdesc;
3112218792Snp			d = fl->desc;
3113218792Snp		}
3114218792Snp	}
3115220905Snp
3116228561Snp	if (fl->pending >= 8)
3117220905Snp		ring_fl_db(sc, fl);
3118228561Snp
3119228561Snp	return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
3120218792Snp}
3121218792Snp
3122228561Snp/*
3123228561Snp * Attempt to refill all starving freelists.
3124228561Snp */
3125228561Snpstatic void
3126228561Snprefill_sfl(void *arg)
3127228561Snp{
3128228561Snp	struct adapter *sc = arg;
3129228561Snp	struct sge_fl *fl, *fl_temp;
3130228561Snp
3131228561Snp	mtx_lock(&sc->sfl_lock);
3132228561Snp	TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
3133228561Snp		FL_LOCK(fl);
3134228561Snp		refill_fl(sc, fl, 64);
3135228561Snp		if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
3136228561Snp			TAILQ_REMOVE(&sc->sfl, fl, link);
3137228561Snp			fl->flags &= ~FL_STARVING;
3138228561Snp		}
3139228561Snp		FL_UNLOCK(fl);
3140228561Snp	}
3141228561Snp
3142228561Snp	if (!TAILQ_EMPTY(&sc->sfl))
3143228561Snp		callout_schedule(&sc->sfl_callout, hz / 5);
3144228561Snp	mtx_unlock(&sc->sfl_lock);
3145228561Snp}
3146228561Snp
3147218792Snpstatic int
3148218792Snpalloc_fl_sdesc(struct sge_fl *fl)
3149218792Snp{
3150218792Snp
3151218792Snp	fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE,
3152218792Snp	    M_ZERO | M_WAITOK);
3153218792Snp
3154218792Snp	return (0);
3155218792Snp}
3156218792Snp
3157218792Snpstatic void
3158255050Snpfree_fl_sdesc(struct adapter *sc, struct sge_fl *fl)
3159218792Snp{
3160218792Snp	struct fl_sdesc *sd;
3161265425Snp	struct cluster_metadata *clm;
3162265425Snp	struct cluster_layout *cll;
3163218792Snp	int i;
3164218792Snp
3165218792Snp	sd = fl->sdesc;
3166218792Snp	for (i = 0; i < fl->cap; i++, sd++) {
3167265425Snp		if (sd->cl == NULL)
3168265425Snp			continue;
3169218792Snp
3170265425Snp		cll = &sd->cll;
3171265425Snp		clm = cl_metadata(sc, fl, cll, sd->cl);
3172265425Snp		if (sd->nmbuf == 0 ||
3173265425Snp		    (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1)) {
3174265425Snp			uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl);
3175218792Snp		}
3176265425Snp		sd->cl = NULL;
3177218792Snp	}
3178218792Snp
3179218792Snp	free(fl->sdesc, M_CXGBE);
3180218792Snp	fl->sdesc = NULL;
3181218792Snp}
3182218792Snp
3183228561Snpint
3184228561Snpt4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count,
3185228561Snp    int flags)
3186218792Snp{
3187218792Snp	struct tx_map *txm;
3188228561Snp	int i, rc;
3189218792Snp
3190228561Snp	txmaps->map_total = txmaps->map_avail = count;
3191228561Snp	txmaps->map_cidx = txmaps->map_pidx = 0;
3192218792Snp
3193228561Snp	txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
3194228561Snp	    M_ZERO | flags);
3195218792Snp
3196228561Snp	txm = txmaps->maps;
3197218792Snp	for (i = 0; i < count; i++, txm++) {
3198228561Snp		rc = bus_dmamap_create(tx_tag, 0, &txm->map);
3199218792Snp		if (rc != 0)
3200218792Snp			goto failed;
3201218792Snp	}
3202218792Snp
3203218792Snp	return (0);
3204218792Snpfailed:
3205218792Snp	while (--i >= 0) {
3206218792Snp		txm--;
3207228561Snp		bus_dmamap_destroy(tx_tag, txm->map);
3208218792Snp	}
3209228561Snp	KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__));
3210218792Snp
3211228561Snp	free(txmaps->maps, M_CXGBE);
3212228561Snp	txmaps->maps = NULL;
3213218792Snp
3214218792Snp	return (rc);
3215218792Snp}
3216218792Snp
3217228561Snpvoid
3218228561Snpt4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag)
3219218792Snp{
3220218792Snp	struct tx_map *txm;
3221218792Snp	int i;
3222218792Snp
3223228561Snp	txm = txmaps->maps;
3224228561Snp	for (i = 0; i < txmaps->map_total; i++, txm++) {
3225218792Snp
3226218792Snp		if (txm->m) {
3227228561Snp			bus_dmamap_unload(tx_tag, txm->map);
3228218792Snp			m_freem(txm->m);
3229218792Snp			txm->m = NULL;
3230218792Snp		}
3231218792Snp
3232228561Snp		bus_dmamap_destroy(tx_tag, txm->map);
3233218792Snp	}
3234218792Snp
3235228561Snp	free(txmaps->maps, M_CXGBE);
3236228561Snp	txmaps->maps = NULL;
3237218792Snp}
3238218792Snp
3239218792Snp/*
3240218792Snp * We'll do immediate data tx for non-TSO, but only when not coalescing.  We're
3241218792Snp * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
3242218792Snp * of immediate data.
3243218792Snp */
3244218792Snp#define IMM_LEN ( \
3245228561Snp      2 * EQ_ESIZE \
3246218792Snp    - sizeof(struct fw_eth_tx_pkt_wr) \
3247218792Snp    - sizeof(struct cpl_tx_pkt_core))
3248218792Snp
3249218792Snp/*
3250218792Snp * Returns non-zero on failure, no need to cleanup anything in that case.
3251218792Snp *
3252218792Snp * Note 1: We always try to defrag the mbuf if required and return EFBIG only
3253218792Snp * if the resulting chain still won't fit in a tx descriptor.
3254218792Snp *
3255218792Snp * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf
3256218792Snp * does not have the TCP header in it.
3257218792Snp */
3258218792Snpstatic int
3259218792Snpget_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
3260218792Snp    int sgl_only)
3261218792Snp{
3262218792Snp	struct mbuf *m = *fp;
3263228561Snp	struct tx_maps *txmaps;
3264218792Snp	struct tx_map *txm;
3265218792Snp	int rc, defragged = 0, n;
3266218792Snp
3267218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3268218792Snp
3269218792Snp	if (m->m_pkthdr.tso_segsz)
3270218792Snp		sgl_only = 1;	/* Do not allow immediate data with LSO */
3271218792Snp
3272218792Snpstart:	sgl->nsegs = 0;
3273218792Snp
3274218792Snp	if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
3275218792Snp		return (0);	/* nsegs = 0 tells caller to use imm. tx */
3276218792Snp
3277228561Snp	txmaps = &txq->txmaps;
3278228561Snp	if (txmaps->map_avail == 0) {
3279218792Snp		txq->no_dmamap++;
3280218792Snp		return (ENOMEM);
3281218792Snp	}
3282228561Snp	txm = &txmaps->maps[txmaps->map_pidx];
3283218792Snp
3284218792Snp	if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
3285218792Snp		*fp = m_pullup(m, 50);
3286218792Snp		m = *fp;
3287218792Snp		if (m == NULL)
3288218792Snp			return (ENOBUFS);
3289218792Snp	}
3290218792Snp
3291220873Snp	rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg,
3292218792Snp	    &sgl->nsegs, BUS_DMA_NOWAIT);
3293218792Snp	if (rc == EFBIG && defragged == 0) {
3294243857Sglebius		m = m_defrag(m, M_NOWAIT);
3295218792Snp		if (m == NULL)
3296218792Snp			return (EFBIG);
3297218792Snp
3298218792Snp		defragged = 1;
3299218792Snp		*fp = m;
3300218792Snp		goto start;
3301218792Snp	}
3302218792Snp	if (rc != 0)
3303218792Snp		return (rc);
3304218792Snp
3305218792Snp	txm->m = m;
3306228561Snp	txmaps->map_avail--;
3307228561Snp	if (++txmaps->map_pidx == txmaps->map_total)
3308228561Snp		txmaps->map_pidx = 0;
3309218792Snp
3310218792Snp	KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
3311218792Snp	    ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
3312218792Snp
3313218792Snp	/*
3314218792Snp	 * Store the # of flits required to hold this frame's SGL in nflits.  An
3315218792Snp	 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
3316218792Snp	 * multiple (len0 + len1, addr0, addr1) tuples.  If addr1 is not used
3317218792Snp	 * then len1 must be set to 0.
3318218792Snp	 */
3319218792Snp	n = sgl->nsegs - 1;
3320218792Snp	sgl->nflits = (3 * n) / 2 + (n & 1) + 2;
3321218792Snp
3322218792Snp	return (0);
3323218792Snp}
3324218792Snp
3325218792Snp
3326218792Snp/*
3327218792Snp * Releases all the txq resources used up in the specified sgl.
3328218792Snp */
3329218792Snpstatic int
3330218792Snpfree_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
3331218792Snp{
3332228561Snp	struct tx_maps *txmaps;
3333218792Snp	struct tx_map *txm;
3334218792Snp
3335218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3336218792Snp
3337218792Snp	if (sgl->nsegs == 0)
3338218792Snp		return (0);	/* didn't use any map */
3339218792Snp
3340228561Snp	txmaps = &txq->txmaps;
3341228561Snp
3342218792Snp	/* 1 pkt uses exactly 1 map, back it out */
3343218792Snp
3344228561Snp	txmaps->map_avail++;
3345228561Snp	if (txmaps->map_pidx > 0)
3346228561Snp		txmaps->map_pidx--;
3347218792Snp	else
3348228561Snp		txmaps->map_pidx = txmaps->map_total - 1;
3349218792Snp
3350228561Snp	txm = &txmaps->maps[txmaps->map_pidx];
3351220873Snp	bus_dmamap_unload(txq->tx_tag, txm->map);
3352218792Snp	txm->m = NULL;
3353218792Snp
3354218792Snp	return (0);
3355218792Snp}
3356218792Snp
3357218792Snpstatic int
3358218792Snpwrite_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
3359218792Snp    struct sgl *sgl)
3360218792Snp{
3361218792Snp	struct sge_eq *eq = &txq->eq;
3362218792Snp	struct fw_eth_tx_pkt_wr *wr;
3363218792Snp	struct cpl_tx_pkt_core *cpl;
3364218792Snp	uint32_t ctrl;	/* used in many unrelated places */
3365218792Snp	uint64_t ctrl1;
3366219286Snp	int nflits, ndesc, pktlen;
3367218792Snp	struct tx_sdesc *txsd;
3368218792Snp	caddr_t dst;
3369218792Snp
3370218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3371218792Snp
3372219286Snp	pktlen = m->m_pkthdr.len;
3373219286Snp
3374218792Snp	/*
3375218792Snp	 * Do we have enough flits to send this frame out?
3376218792Snp	 */
3377218792Snp	ctrl = sizeof(struct cpl_tx_pkt_core);
3378218792Snp	if (m->m_pkthdr.tso_segsz) {
3379218792Snp		nflits = TXPKT_LSO_WR_HDR;
3380237436Snp		ctrl += sizeof(struct cpl_tx_pkt_lso_core);
3381218792Snp	} else
3382218792Snp		nflits = TXPKT_WR_HDR;
3383218792Snp	if (sgl->nsegs > 0)
3384218792Snp		nflits += sgl->nflits;
3385218792Snp	else {
3386219286Snp		nflits += howmany(pktlen, 8);
3387219286Snp		ctrl += pktlen;
3388218792Snp	}
3389218792Snp	ndesc = howmany(nflits, 8);
3390218792Snp	if (ndesc > eq->avail)
3391218792Snp		return (ENOMEM);
3392218792Snp
3393218792Snp	/* Firmware work request header */
3394218792Snp	wr = (void *)&eq->desc[eq->pidx];
3395218792Snp	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
3396228561Snp	    V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
3397218792Snp	ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
3398228561Snp	if (eq->avail == ndesc) {
3399228561Snp		if (!(eq->flags & EQ_CRFLUSHED)) {
3400228561Snp			ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3401228561Snp			eq->flags |= EQ_CRFLUSHED;
3402228561Snp		}
3403228561Snp		eq->flags |= EQ_STALLED;
3404220643Snp	}
3405220643Snp
3406218792Snp	wr->equiq_to_len16 = htobe32(ctrl);
3407218792Snp	wr->r3 = 0;
3408218792Snp
3409218792Snp	if (m->m_pkthdr.tso_segsz) {
3410237436Snp		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
3411218792Snp		struct ether_header *eh;
3412237819Snp		void *l3hdr;
3413237819Snp#if defined(INET) || defined(INET6)
3414218792Snp		struct tcphdr *tcp;
3415237819Snp#endif
3416237819Snp		uint16_t eh_type;
3417218792Snp
3418218792Snp		ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
3419218792Snp		    F_LSO_LAST_SLICE;
3420218792Snp
3421218792Snp		eh = mtod(m, struct ether_header *);
3422237819Snp		eh_type = ntohs(eh->ether_type);
3423237819Snp		if (eh_type == ETHERTYPE_VLAN) {
3424237819Snp			struct ether_vlan_header *evh = (void *)eh;
3425237819Snp
3426218792Snp			ctrl |= V_LSO_ETHHDR_LEN(1);
3427237819Snp			l3hdr = evh + 1;
3428237819Snp			eh_type = ntohs(evh->evl_proto);
3429218792Snp		} else
3430237819Snp			l3hdr = eh + 1;
3431218792Snp
3432237819Snp		switch (eh_type) {
3433237819Snp#ifdef INET6
3434237819Snp		case ETHERTYPE_IPV6:
3435237819Snp		{
3436237819Snp			struct ip6_hdr *ip6 = l3hdr;
3437218792Snp
3438237819Snp			/*
3439237819Snp			 * XXX-BZ For now we do not pretend to support
3440237819Snp			 * IPv6 extension headers.
3441237819Snp			 */
3442237819Snp			KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO "
3443237819Snp			    "with ip6_nxt != TCP: %u", __func__, ip6->ip6_nxt));
3444237819Snp			tcp = (struct tcphdr *)(ip6 + 1);
3445237819Snp			ctrl |= F_LSO_IPV6;
3446237819Snp			ctrl |= V_LSO_IPHDR_LEN(sizeof(*ip6) >> 2) |
3447237819Snp			    V_LSO_TCPHDR_LEN(tcp->th_off);
3448237819Snp			break;
3449237819Snp		}
3450237819Snp#endif
3451237819Snp#ifdef INET
3452237819Snp		case ETHERTYPE_IP:
3453237819Snp		{
3454237819Snp			struct ip *ip = l3hdr;
3455237819Snp
3456237819Snp			tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4);
3457237819Snp			ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) |
3458237819Snp			    V_LSO_TCPHDR_LEN(tcp->th_off);
3459237819Snp			break;
3460237819Snp		}
3461237819Snp#endif
3462237819Snp		default:
3463237819Snp			panic("%s: CSUM_TSO but no supported IP version "
3464237819Snp			    "(0x%04x)", __func__, eh_type);
3465237819Snp		}
3466237819Snp
3467218792Snp		lso->lso_ctrl = htobe32(ctrl);
3468218792Snp		lso->ipid_ofst = htobe16(0);
3469218792Snp		lso->mss = htobe16(m->m_pkthdr.tso_segsz);
3470218792Snp		lso->seqno_offset = htobe32(0);
3471219286Snp		lso->len = htobe32(pktlen);
3472218792Snp
3473218792Snp		cpl = (void *)(lso + 1);
3474218792Snp
3475218792Snp		txq->tso_wrs++;
3476218792Snp	} else
3477218792Snp		cpl = (void *)(wr + 1);
3478218792Snp
3479218792Snp	/* Checksum offload */
3480218792Snp	ctrl1 = 0;
3481247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
3482218792Snp		ctrl1 |= F_TXPKT_IPCSUM_DIS;
3483237799Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
3484247062Snp	    CSUM_TCP_IPV6 | CSUM_TSO)))
3485218792Snp		ctrl1 |= F_TXPKT_L4CSUM_DIS;
3486237799Snp	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
3487247062Snp	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
3488218792Snp		txq->txcsum++;	/* some hardware assistance provided */
3489218792Snp
3490218792Snp	/* VLAN tag insertion */
3491218792Snp	if (m->m_flags & M_VLANTAG) {
3492218792Snp		ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
3493218792Snp		txq->vlan_insertion++;
3494218792Snp	}
3495218792Snp
3496218792Snp	/* CPL header */
3497218792Snp	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3498218792Snp	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3499218792Snp	cpl->pack = 0;
3500219286Snp	cpl->len = htobe16(pktlen);
3501218792Snp	cpl->ctrl1 = htobe64(ctrl1);
3502218792Snp
3503218792Snp	/* Software descriptor */
3504220873Snp	txsd = &txq->sdesc[eq->pidx];
3505218792Snp	txsd->desc_used = ndesc;
3506218792Snp
3507218792Snp	eq->pending += ndesc;
3508218792Snp	eq->avail -= ndesc;
3509218792Snp	eq->pidx += ndesc;
3510218792Snp	if (eq->pidx >= eq->cap)
3511218792Snp		eq->pidx -= eq->cap;
3512218792Snp
3513218792Snp	/* SGL */
3514218792Snp	dst = (void *)(cpl + 1);
3515218792Snp	if (sgl->nsegs > 0) {
3516220873Snp		txsd->credits = 1;
3517218792Snp		txq->sgl_wrs++;
3518218792Snp		write_sgl_to_txd(eq, sgl, &dst);
3519218792Snp	} else {
3520220873Snp		txsd->credits = 0;
3521218792Snp		txq->imm_wrs++;
3522218792Snp		for (; m; m = m->m_next) {
3523218792Snp			copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
3524219286Snp#ifdef INVARIANTS
3525219286Snp			pktlen -= m->m_len;
3526219286Snp#endif
3527218792Snp		}
3528219286Snp#ifdef INVARIANTS
3529219286Snp		KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
3530219286Snp#endif
3531219286Snp
3532218792Snp	}
3533218792Snp
3534218792Snp	txq->txpkt_wrs++;
3535218792Snp	return (0);
3536218792Snp}
3537218792Snp
3538218792Snp/*
3539218792Snp * Returns 0 to indicate that m has been accepted into a coalesced tx work
3540218792Snp * request.  It has either been folded into txpkts or txpkts was flushed and m
3541218792Snp * has started a new coalesced work request (as the first frame in a fresh
3542218792Snp * txpkts).
3543218792Snp *
3544218792Snp * Returns non-zero to indicate a failure - caller is responsible for
3545218792Snp * transmitting m, if there was anything in txpkts it has been flushed.
3546218792Snp */
3547218792Snpstatic int
3548218792Snpadd_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
3549218792Snp    struct mbuf *m, struct sgl *sgl)
3550218792Snp{
3551218792Snp	struct sge_eq *eq = &txq->eq;
3552218792Snp	int can_coalesce;
3553218792Snp	struct tx_sdesc *txsd;
3554218792Snp	int flits;
3555218792Snp
3556218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3557218792Snp
3558228561Snp	KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__));
3559228561Snp
3560218792Snp	if (txpkts->npkt > 0) {
3561218792Snp		flits = TXPKTS_PKT_HDR + sgl->nflits;
3562218792Snp		can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3563218792Snp		    txpkts->nflits + flits <= TX_WR_FLITS &&
3564218792Snp		    txpkts->nflits + flits <= eq->avail * 8 &&
3565218792Snp		    txpkts->plen + m->m_pkthdr.len < 65536;
3566218792Snp
3567218792Snp		if (can_coalesce) {
3568218792Snp			txpkts->npkt++;
3569218792Snp			txpkts->nflits += flits;
3570218792Snp			txpkts->plen += m->m_pkthdr.len;
3571218792Snp
3572220873Snp			txsd = &txq->sdesc[eq->pidx];
3573220873Snp			txsd->credits++;
3574218792Snp
3575218792Snp			return (0);
3576218792Snp		}
3577218792Snp
3578218792Snp		/*
3579218792Snp		 * Couldn't coalesce m into txpkts.  The first order of business
3580218792Snp		 * is to send txpkts on its way.  Then we'll revisit m.
3581218792Snp		 */
3582218792Snp		write_txpkts_wr(txq, txpkts);
3583218792Snp	}
3584218792Snp
3585218792Snp	/*
3586218792Snp	 * Check if we can start a new coalesced tx work request with m as
3587218792Snp	 * the first packet in it.
3588218792Snp	 */
3589218792Snp
3590218792Snp	KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__));
3591218792Snp
3592218792Snp	flits = TXPKTS_WR_HDR + sgl->nflits;
3593218792Snp	can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3594218792Snp	    flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
3595218792Snp
3596218792Snp	if (can_coalesce == 0)
3597218792Snp		return (EINVAL);
3598218792Snp
3599218792Snp	/*
3600218792Snp	 * Start a fresh coalesced tx WR with m as the first frame in it.
3601218792Snp	 */
3602218792Snp	txpkts->npkt = 1;
3603218792Snp	txpkts->nflits = flits;
3604218792Snp	txpkts->flitp = &eq->desc[eq->pidx].flit[2];
3605218792Snp	txpkts->plen = m->m_pkthdr.len;
3606218792Snp
3607220873Snp	txsd = &txq->sdesc[eq->pidx];
3608220873Snp	txsd->credits = 1;
3609218792Snp
3610218792Snp	return (0);
3611218792Snp}
3612218792Snp
3613218792Snp/*
3614218792Snp * Note that write_txpkts_wr can never run out of hardware descriptors (but
3615218792Snp * write_txpkt_wr can).  add_to_txpkts ensures that a frame is accepted for
3616218792Snp * coalescing only if sufficient hardware descriptors are available.
3617218792Snp */
3618218792Snpstatic void
3619218792Snpwrite_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
3620218792Snp{
3621218792Snp	struct sge_eq *eq = &txq->eq;
3622218792Snp	struct fw_eth_tx_pkts_wr *wr;
3623218792Snp	struct tx_sdesc *txsd;
3624218792Snp	uint32_t ctrl;
3625218792Snp	int ndesc;
3626218792Snp
3627218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3628218792Snp
3629218792Snp	ndesc = howmany(txpkts->nflits, 8);
3630218792Snp
3631218792Snp	wr = (void *)&eq->desc[eq->pidx];
3632228561Snp	wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
3633218792Snp	ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
3634228561Snp	if (eq->avail == ndesc) {
3635228561Snp		if (!(eq->flags & EQ_CRFLUSHED)) {
3636228561Snp			ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3637228561Snp			eq->flags |= EQ_CRFLUSHED;
3638228561Snp		}
3639228561Snp		eq->flags |= EQ_STALLED;
3640220643Snp	}
3641218792Snp	wr->equiq_to_len16 = htobe32(ctrl);
3642218792Snp	wr->plen = htobe16(txpkts->plen);
3643218792Snp	wr->npkt = txpkts->npkt;
3644222513Snp	wr->r3 = wr->type = 0;
3645218792Snp
3646218792Snp	/* Everything else already written */
3647218792Snp
3648220873Snp	txsd = &txq->sdesc[eq->pidx];
3649218792Snp	txsd->desc_used = ndesc;
3650218792Snp
3651220643Snp	KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__));
3652218792Snp
3653218792Snp	eq->pending += ndesc;
3654218792Snp	eq->avail -= ndesc;
3655218792Snp	eq->pidx += ndesc;
3656218792Snp	if (eq->pidx >= eq->cap)
3657218792Snp		eq->pidx -= eq->cap;
3658218792Snp
3659218792Snp	txq->txpkts_pkts += txpkts->npkt;
3660218792Snp	txq->txpkts_wrs++;
3661218792Snp	txpkts->npkt = 0;	/* emptied */
3662218792Snp}
3663218792Snp
3664218792Snpstatic inline void
3665218792Snpwrite_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
3666218792Snp    struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl)
3667218792Snp{
3668218792Snp	struct ulp_txpkt *ulpmc;
3669218792Snp	struct ulptx_idata *ulpsc;
3670218792Snp	struct cpl_tx_pkt_core *cpl;
3671218792Snp	struct sge_eq *eq = &txq->eq;
3672218792Snp	uintptr_t flitp, start, end;
3673218792Snp	uint64_t ctrl;
3674218792Snp	caddr_t dst;
3675218792Snp
3676218792Snp	KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__));
3677218792Snp
3678218792Snp	start = (uintptr_t)eq->desc;
3679218792Snp	end = (uintptr_t)eq->spg;
3680218792Snp
3681218792Snp	/* Checksum offload */
3682218792Snp	ctrl = 0;
3683247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
3684218792Snp		ctrl |= F_TXPKT_IPCSUM_DIS;
3685247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
3686247062Snp	    CSUM_TCP_IPV6 | CSUM_TSO)))
3687218792Snp		ctrl |= F_TXPKT_L4CSUM_DIS;
3688247062Snp	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
3689247062Snp	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
3690218792Snp		txq->txcsum++;	/* some hardware assistance provided */
3691218792Snp
3692218792Snp	/* VLAN tag insertion */
3693218792Snp	if (m->m_flags & M_VLANTAG) {
3694218792Snp		ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
3695218792Snp		txq->vlan_insertion++;
3696218792Snp	}
3697218792Snp
3698218792Snp	/*
3699218792Snp	 * The previous packet's SGL must have ended at a 16 byte boundary (this
3700218792Snp	 * is required by the firmware/hardware).  It follows that flitp cannot
3701218792Snp	 * wrap around between the ULPTX master command and ULPTX subcommand (8
3702218792Snp	 * bytes each), and that it can not wrap around in the middle of the
3703218792Snp	 * cpl_tx_pkt_core either.
3704218792Snp	 */
3705218792Snp	flitp = (uintptr_t)txpkts->flitp;
3706218792Snp	KASSERT((flitp & 0xf) == 0,
3707218792Snp	    ("%s: last SGL did not end at 16 byte boundary: %p",
3708218792Snp	    __func__, txpkts->flitp));
3709218792Snp
3710218792Snp	/* ULP master command */
3711218792Snp	ulpmc = (void *)flitp;
3712219288Snp	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) |
3713219288Snp	    V_ULP_TXPKT_FID(eq->iqid));
3714218792Snp	ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) +
3715218792Snp	    sizeof(*cpl) + 8 * sgl->nflits, 16));
3716218792Snp
3717218792Snp	/* ULP subcommand */
3718218792Snp	ulpsc = (void *)(ulpmc + 1);
3719218792Snp	ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
3720218792Snp	    F_ULP_TX_SC_MORE);
3721218792Snp	ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
3722218792Snp
3723218792Snp	flitp += sizeof(*ulpmc) + sizeof(*ulpsc);
3724218792Snp	if (flitp == end)
3725218792Snp		flitp = start;
3726218792Snp
3727218792Snp	/* CPL_TX_PKT */
3728218792Snp	cpl = (void *)flitp;
3729218792Snp	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3730218792Snp	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3731218792Snp	cpl->pack = 0;
3732218792Snp	cpl->len = htobe16(m->m_pkthdr.len);
3733218792Snp	cpl->ctrl1 = htobe64(ctrl);
3734218792Snp
3735218792Snp	flitp += sizeof(*cpl);
3736218792Snp	if (flitp == end)
3737218792Snp		flitp = start;
3738218792Snp
3739218792Snp	/* SGL for this frame */
3740218792Snp	dst = (caddr_t)flitp;
3741218792Snp	txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst);
3742218792Snp	txpkts->flitp = (void *)dst;
3743218792Snp
3744218792Snp	KASSERT(((uintptr_t)dst & 0xf) == 0,
3745218792Snp	    ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst));
3746218792Snp}
3747218792Snp
3748218792Snp/*
3749218792Snp * If the SGL ends on an address that is not 16 byte aligned, this function will
3750218792Snp * add a 0 filled flit at the end.  It returns 1 in that case.
3751218792Snp */
3752218792Snpstatic int
3753218792Snpwrite_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
3754218792Snp{
3755218792Snp	__be64 *flitp, *end;
3756218792Snp	struct ulptx_sgl *usgl;
3757218792Snp	bus_dma_segment_t *seg;
3758218792Snp	int i, padded;
3759218792Snp
3760218792Snp	KASSERT(sgl->nsegs > 0 && sgl->nflits > 0,
3761218792Snp	    ("%s: bad SGL - nsegs=%d, nflits=%d",
3762218792Snp	    __func__, sgl->nsegs, sgl->nflits));
3763218792Snp
3764218792Snp	KASSERT(((uintptr_t)(*to) & 0xf) == 0,
3765218792Snp	    ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
3766218792Snp
3767218792Snp	flitp = (__be64 *)(*to);
3768218792Snp	end = flitp + sgl->nflits;
3769218792Snp	seg = &sgl->seg[0];
3770218792Snp	usgl = (void *)flitp;
3771218792Snp
3772218792Snp	/*
3773218792Snp	 * We start at a 16 byte boundary somewhere inside the tx descriptor
3774218792Snp	 * ring, so we're at least 16 bytes away from the status page.  There is
3775218792Snp	 * no chance of a wrap around in the middle of usgl (which is 16 bytes).
3776218792Snp	 */
3777218792Snp
3778218792Snp	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
3779218792Snp	    V_ULPTX_NSGE(sgl->nsegs));
3780218792Snp	usgl->len0 = htobe32(seg->ds_len);
3781218792Snp	usgl->addr0 = htobe64(seg->ds_addr);
3782218792Snp	seg++;
3783218792Snp
3784218792Snp	if ((uintptr_t)end <= (uintptr_t)eq->spg) {
3785218792Snp
3786218792Snp		/* Won't wrap around at all */
3787218792Snp
3788218792Snp		for (i = 0; i < sgl->nsegs - 1; i++, seg++) {
3789218792Snp			usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len);
3790218792Snp			usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr);
3791218792Snp		}
3792218792Snp		if (i & 1)
3793218792Snp			usgl->sge[i / 2].len[1] = htobe32(0);
3794218792Snp	} else {
3795218792Snp
3796218792Snp		/* Will wrap somewhere in the rest of the SGL */
3797218792Snp
3798218792Snp		/* 2 flits already written, write the rest flit by flit */
3799218792Snp		flitp = (void *)(usgl + 1);
3800218792Snp		for (i = 0; i < sgl->nflits - 2; i++) {
3801218792Snp			if ((uintptr_t)flitp == (uintptr_t)eq->spg)
3802218792Snp				flitp = (void *)eq->desc;
3803218792Snp			*flitp++ = get_flit(seg, sgl->nsegs - 1, i);
3804218792Snp		}
3805218792Snp		end = flitp;
3806218792Snp	}
3807218792Snp
3808218792Snp	if ((uintptr_t)end & 0xf) {
3809218792Snp		*(uint64_t *)end = 0;
3810218792Snp		end++;
3811218792Snp		padded = 1;
3812218792Snp	} else
3813218792Snp		padded = 0;
3814218792Snp
3815218792Snp	if ((uintptr_t)end == (uintptr_t)eq->spg)
3816218792Snp		*to = (void *)eq->desc;
3817218792Snp	else
3818218792Snp		*to = (void *)end;
3819218792Snp
3820218792Snp	return (padded);
3821218792Snp}
3822218792Snp
3823218792Snpstatic inline void
3824218792Snpcopy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
3825218792Snp{
3826237263Snp	if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) {
3827218792Snp		bcopy(from, *to, len);
3828218792Snp		(*to) += len;
3829218792Snp	} else {
3830218792Snp		int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
3831218792Snp
3832218792Snp		bcopy(from, *to, portion);
3833218792Snp		from += portion;
3834218792Snp		portion = len - portion;	/* remaining */
3835218792Snp		bcopy(from, (void *)eq->desc, portion);
3836218792Snp		(*to) = (caddr_t)eq->desc + portion;
3837218792Snp	}
3838218792Snp}
3839218792Snp
3840218792Snpstatic inline void
3841220873Snpring_eq_db(struct adapter *sc, struct sge_eq *eq)
3842218792Snp{
3843248925Snp	u_int db, pending;
3844248925Snp
3845248925Snp	db = eq->doorbells;
3846248925Snp	pending = eq->pending;
3847248925Snp	if (pending > 1)
3848249392Snp		clrbit(&db, DOORBELL_WCWR);
3849248925Snp	eq->pending = 0;
3850218792Snp	wmb();
3851248925Snp
3852248925Snp	switch (ffs(db) - 1) {
3853248925Snp	case DOORBELL_UDB:
3854248925Snp		*eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
3855248925Snp		return;
3856248925Snp
3857249392Snp	case DOORBELL_WCWR: {
3858248925Snp		volatile uint64_t *dst, *src;
3859248925Snp		int i;
3860248925Snp
3861248925Snp		/*
3862248925Snp		 * Queues whose 128B doorbell segment fits in the page do not
3863248925Snp		 * use relative qid (udb_qid is always 0).  Only queues with
3864249392Snp		 * doorbell segments can do WCWR.
3865248925Snp		 */
3866248925Snp		KASSERT(eq->udb_qid == 0 && pending == 1,
3867248925Snp		    ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p",
3868248925Snp		    __func__, eq->doorbells, pending, eq->pidx, eq));
3869248925Snp
3870248925Snp		dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET -
3871248925Snp		    UDBS_DB_OFFSET);
3872248925Snp		i = eq->pidx ? eq->pidx - 1 : eq->cap - 1;
3873248925Snp		src = (void *)&eq->desc[i];
3874248925Snp		while (src != (void *)&eq->desc[i + 1])
3875248925Snp			*dst++ = *src++;
3876248925Snp		wmb();
3877248925Snp		return;
3878248925Snp	}
3879248925Snp
3880248925Snp	case DOORBELL_UDBWC:
3881248925Snp		*eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
3882248925Snp		wmb();
3883248925Snp		return;
3884248925Snp
3885248925Snp	case DOORBELL_KDB:
3886248925Snp		t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
3887248925Snp		    V_QID(eq->cntxt_id) | V_PIDX(pending));
3888248925Snp		return;
3889248925Snp	}
3890218792Snp}
3891218792Snp
3892219292Snpstatic inline int
3893219292Snpreclaimable(struct sge_eq *eq)
3894218792Snp{
3895219292Snp	unsigned int cidx;
3896218792Snp
3897218792Snp	cidx = eq->spg->cidx;	/* stable snapshot */
3898228561Snp	cidx = be16toh(cidx);
3899218792Snp
3900218792Snp	if (cidx >= eq->cidx)
3901219292Snp		return (cidx - eq->cidx);
3902218792Snp	else
3903219292Snp		return (cidx + eq->cap - eq->cidx);
3904219292Snp}
3905218792Snp
3906219292Snp/*
3907219292Snp * There are "can_reclaim" tx descriptors ready to be reclaimed.  Reclaim as
3908219292Snp * many as possible but stop when there are around "n" mbufs to free.
3909219292Snp *
3910219292Snp * The actual number reclaimed is provided as the return value.
3911219292Snp */
3912219292Snpstatic int
3913220873Snpreclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
3914219292Snp{
3915219292Snp	struct tx_sdesc *txsd;
3916228561Snp	struct tx_maps *txmaps;
3917219292Snp	struct tx_map *txm;
3918219292Snp	unsigned int reclaimed, maps;
3919220873Snp	struct sge_eq *eq = &txq->eq;
3920218792Snp
3921228561Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3922218792Snp
3923219292Snp	if (can_reclaim == 0)
3924219292Snp		can_reclaim = reclaimable(eq);
3925219292Snp
3926218792Snp	maps = reclaimed = 0;
3927219292Snp	while (can_reclaim && maps < n) {
3928218792Snp		int ndesc;
3929218792Snp
3930220873Snp		txsd = &txq->sdesc[eq->cidx];
3931218792Snp		ndesc = txsd->desc_used;
3932218792Snp
3933218792Snp		/* Firmware doesn't return "partial" credits. */
3934218792Snp		KASSERT(can_reclaim >= ndesc,
3935218792Snp		    ("%s: unexpected number of credits: %d, %d",
3936218792Snp		    __func__, can_reclaim, ndesc));
3937218792Snp
3938220873Snp		maps += txsd->credits;
3939219292Snp
3940218792Snp		reclaimed += ndesc;
3941219292Snp		can_reclaim -= ndesc;
3942218792Snp
3943218792Snp		eq->cidx += ndesc;
3944219292Snp		if (__predict_false(eq->cidx >= eq->cap))
3945218792Snp			eq->cidx -= eq->cap;
3946219292Snp	}
3947218792Snp
3948228561Snp	txmaps = &txq->txmaps;
3949228561Snp	txm = &txmaps->maps[txmaps->map_cidx];
3950219292Snp	if (maps)
3951219292Snp		prefetch(txm->m);
3952218792Snp
3953218792Snp	eq->avail += reclaimed;
3954218792Snp	KASSERT(eq->avail < eq->cap,	/* avail tops out at (cap - 1) */
3955218792Snp	    ("%s: too many descriptors available", __func__));
3956218792Snp
3957228561Snp	txmaps->map_avail += maps;
3958228561Snp	KASSERT(txmaps->map_avail <= txmaps->map_total,
3959218792Snp	    ("%s: too many maps available", __func__));
3960218792Snp
3961218792Snp	while (maps--) {
3962219292Snp		struct tx_map *next;
3963218792Snp
3964219292Snp		next = txm + 1;
3965228561Snp		if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total))
3966228561Snp			next = txmaps->maps;
3967219292Snp		prefetch(next->m);
3968219292Snp
3969220873Snp		bus_dmamap_unload(txq->tx_tag, txm->map);
3970218792Snp		m_freem(txm->m);
3971218792Snp		txm->m = NULL;
3972218792Snp
3973219292Snp		txm = next;
3974228561Snp		if (__predict_false(++txmaps->map_cidx == txmaps->map_total))
3975228561Snp			txmaps->map_cidx = 0;
3976218792Snp	}
3977218792Snp
3978218792Snp	return (reclaimed);
3979218792Snp}
3980218792Snp
3981218792Snpstatic void
3982218792Snpwrite_eqflush_wr(struct sge_eq *eq)
3983218792Snp{
3984218792Snp	struct fw_eq_flush_wr *wr;
3985218792Snp
3986218792Snp	EQ_LOCK_ASSERT_OWNED(eq);
3987218792Snp	KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
3988228561Snp	KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__));
3989218792Snp
3990218792Snp	wr = (void *)&eq->desc[eq->pidx];
3991218792Snp	bzero(wr, sizeof(*wr));
3992218792Snp	wr->opcode = FW_EQ_FLUSH_WR;
3993218792Snp	wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
3994218792Snp	    F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
3995218792Snp
3996228561Snp	eq->flags |= (EQ_CRFLUSHED | EQ_STALLED);
3997218792Snp	eq->pending++;
3998218792Snp	eq->avail--;
3999218792Snp	if (++eq->pidx == eq->cap)
4000265425Snp		eq->pidx = 0;
4001218792Snp}
4002218792Snp
4003218792Snpstatic __be64
4004218792Snpget_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
4005218792Snp{
4006218792Snp	int i = (idx / 3) * 2;
4007218792Snp
4008218792Snp	switch (idx % 3) {
4009218792Snp	case 0: {
4010218792Snp		__be64 rc;
4011218792Snp
4012218792Snp		rc = htobe32(sgl[i].ds_len);
4013218792Snp		if (i + 1 < nsegs)
4014218792Snp			rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32;
4015218792Snp
4016218792Snp		return (rc);
4017218792Snp	}
4018218792Snp	case 1:
4019218792Snp		return htobe64(sgl[i].ds_addr);
4020218792Snp	case 2:
4021218792Snp		return htobe64(sgl[i + 1].ds_addr);
4022218792Snp	}
4023218792Snp
4024218792Snp	return (0);
4025218792Snp}
4026218792Snp
4027218792Snpstatic void
4028265425Snpfind_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp)
4029218792Snp{
4030265425Snp	int8_t zidx, hwidx, idx;
4031265425Snp	uint16_t region1, region3;
4032265425Snp	int spare, spare_needed, n;
4033265425Snp	struct sw_zone_info *swz;
4034265425Snp	struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0];
4035218792Snp
4036265425Snp	/*
4037265425Snp	 * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize
4038265425Snp	 * large enough for the max payload and cluster metadata.  Otherwise
4039265425Snp	 * settle for the largest bufsize that leaves enough room in the cluster
4040265425Snp	 * for metadata.
4041265425Snp	 *
4042265425Snp	 * Without buffer packing: Look for the smallest zone which has a
4043265425Snp	 * bufsize large enough for the max payload.  Settle for the largest
4044265425Snp	 * bufsize available if there's nothing big enough for max payload.
4045265425Snp	 */
4046265425Snp	spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0;
4047265425Snp	swz = &sc->sge.sw_zone_info[0];
4048265425Snp	hwidx = -1;
4049265425Snp	for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) {
4050265425Snp		if (swz->size > largest_rx_cluster) {
4051265425Snp			if (__predict_true(hwidx != -1))
4052265425Snp				break;
4053218792Snp
4054265425Snp			/*
4055265425Snp			 * This is a misconfiguration.  largest_rx_cluster is
4056265425Snp			 * preventing us from finding a refill source.  See
4057265425Snp			 * dev.t5nex.<n>.buffer_sizes to figure out why.
4058265425Snp			 */
4059265425Snp			device_printf(sc->dev, "largest_rx_cluster=%u leaves no"
4060265425Snp			    " refill source for fl %p (dma %u).  Ignored.\n",
4061265425Snp			    largest_rx_cluster, fl, maxp);
4062265425Snp		}
4063265425Snp		for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) {
4064265425Snp			hwb = &hwb_list[idx];
4065265425Snp			spare = swz->size - hwb->size;
4066265425Snp			if (spare < spare_needed)
4067265425Snp				continue;
4068265425Snp
4069265425Snp			hwidx = idx;		/* best option so far */
4070265425Snp			if (hwb->size >= maxp) {
4071265425Snp
4072265425Snp				if ((fl->flags & FL_BUF_PACKING) == 0)
4073265425Snp					goto done; /* stop looking (not packing) */
4074265425Snp
4075265425Snp				if (swz->size >= safest_rx_cluster)
4076265425Snp					goto done; /* stop looking (packing) */
4077265425Snp			}
4078265425Snp			break;		/* keep looking, next zone */
4079265425Snp		}
4080255050Snp	}
4081265425Snpdone:
4082265425Snp	/* A usable hwidx has been located. */
4083265425Snp	MPASS(hwidx != -1);
4084265425Snp	hwb = &hwb_list[hwidx];
4085265425Snp	zidx = hwb->zidx;
4086265425Snp	swz = &sc->sge.sw_zone_info[zidx];
4087265425Snp	region1 = 0;
4088265425Snp	region3 = swz->size - hwb->size;
4089255050Snp
4090265425Snp	/*
4091265425Snp	 * Stay within this zone and see if there is a better match when mbuf
4092265425Snp	 * inlining is allowed.  Remember that the hwidx's are sorted in
4093265425Snp	 * decreasing order of size (so in increasing order of spare area).
4094265425Snp	 */
4095265425Snp	for (idx = hwidx; idx != -1; idx = hwb->next) {
4096265425Snp		hwb = &hwb_list[idx];
4097265425Snp		spare = swz->size - hwb->size;
4098255050Snp
4099265425Snp		if (allow_mbufs_in_cluster == 0 || hwb->size < maxp)
4100265425Snp			break;
4101265425Snp		if (spare < CL_METADATA_SIZE + MSIZE)
4102265425Snp			continue;
4103265425Snp		n = (spare - CL_METADATA_SIZE) / MSIZE;
4104265425Snp		if (n > howmany(hwb->size, maxp))
4105265425Snp			break;
4106255050Snp
4107265425Snp		hwidx = idx;
4108265425Snp		if (fl->flags & FL_BUF_PACKING) {
4109265425Snp			region1 = n * MSIZE;
4110265425Snp			region3 = spare - region1;
4111265425Snp		} else {
4112265425Snp			region1 = MSIZE;
4113265425Snp			region3 = spare - region1;
4114265425Snp			break;
4115255050Snp		}
4116255050Snp	}
4117255050Snp
4118265425Snp	KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES,
4119265425Snp	    ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp));
4120265425Snp	KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES,
4121265425Snp	    ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp));
4122265425Snp	KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 ==
4123265425Snp	    sc->sge.sw_zone_info[zidx].size,
4124265425Snp	    ("%s: bad buffer layout for fl %p, maxp %d. "
4125265425Snp		"cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
4126265425Snp		sc->sge.sw_zone_info[zidx].size, region1,
4127265425Snp		sc->sge.hw_buf_info[hwidx].size, region3));
4128265425Snp	if (fl->flags & FL_BUF_PACKING || region1 > 0) {
4129265425Snp		KASSERT(region3 >= CL_METADATA_SIZE,
4130265425Snp		    ("%s: no room for metadata.  fl %p, maxp %d; "
4131265425Snp		    "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
4132265425Snp		    sc->sge.sw_zone_info[zidx].size, region1,
4133265425Snp		    sc->sge.hw_buf_info[hwidx].size, region3));
4134265425Snp		KASSERT(region1 % MSIZE == 0,
4135265425Snp		    ("%s: bad mbuf region for fl %p, maxp %d. "
4136265425Snp		    "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
4137265425Snp		    sc->sge.sw_zone_info[zidx].size, region1,
4138265425Snp		    sc->sge.hw_buf_info[hwidx].size, region3));
4139265425Snp	}
4140265425Snp
4141265425Snp	fl->cll_def.zidx = zidx;
4142265425Snp	fl->cll_def.hwidx = hwidx;
4143265425Snp	fl->cll_def.region1 = region1;
4144265425Snp	fl->cll_def.region3 = region3;
4145265425Snp}
4146265425Snp
4147265425Snpstatic void
4148265425Snpfind_safe_refill_source(struct adapter *sc, struct sge_fl *fl)
4149265425Snp{
4150265425Snp	struct sge *s = &sc->sge;
4151265425Snp	struct hw_buf_info *hwb;
4152265425Snp	struct sw_zone_info *swz;
4153265425Snp	int spare;
4154265425Snp	int8_t hwidx;
4155265425Snp
4156265425Snp	if (fl->flags & FL_BUF_PACKING)
4157265425Snp		hwidx = s->safe_hwidx2;	/* with room for metadata */
4158265425Snp	else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) {
4159265425Snp		hwidx = s->safe_hwidx2;
4160265425Snp		hwb = &s->hw_buf_info[hwidx];
4161265425Snp		swz = &s->sw_zone_info[hwb->zidx];
4162265425Snp		spare = swz->size - hwb->size;
4163265425Snp
4164265425Snp		/* no good if there isn't room for an mbuf as well */
4165265425Snp		if (spare < CL_METADATA_SIZE + MSIZE)
4166265425Snp			hwidx = s->safe_hwidx1;
4167265425Snp	} else
4168265425Snp		hwidx = s->safe_hwidx1;
4169265425Snp
4170265425Snp	if (hwidx == -1) {
4171265425Snp		/* No fallback source */
4172265425Snp		fl->cll_alt.hwidx = -1;
4173265425Snp		fl->cll_alt.zidx = -1;
4174265425Snp
4175265425Snp		return;
4176265425Snp	}
4177265425Snp
4178265425Snp	hwb = &s->hw_buf_info[hwidx];
4179265425Snp	swz = &s->sw_zone_info[hwb->zidx];
4180265425Snp	spare = swz->size - hwb->size;
4181265425Snp	fl->cll_alt.hwidx = hwidx;
4182265425Snp	fl->cll_alt.zidx = hwb->zidx;
4183265425Snp	if (allow_mbufs_in_cluster)
4184265425Snp		fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE;
4185255050Snp	else
4186265425Snp		fl->cll_alt.region1 = 0;
4187265425Snp	fl->cll_alt.region3 = spare - fl->cll_alt.region1;
4188218792Snp}
4189219286Snp
4190222510Snpstatic void
4191228561Snpadd_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
4192222510Snp{
4193228561Snp	mtx_lock(&sc->sfl_lock);
4194228561Snp	FL_LOCK(fl);
4195228561Snp	if ((fl->flags & FL_DOOMED) == 0) {
4196228561Snp		fl->flags |= FL_STARVING;
4197228561Snp		TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
4198228561Snp		callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc);
4199222510Snp	}
4200228561Snp	FL_UNLOCK(fl);
4201228561Snp	mtx_unlock(&sc->sfl_lock);
4202222510Snp}
4203222510Snp
4204220873Snpstatic int
4205228561Snphandle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
4206228561Snp    struct mbuf *m)
4207220873Snp{
4208228561Snp	const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1);
4209228561Snp	unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
4210228561Snp	struct adapter *sc = iq->adapter;
4211228561Snp	struct sge *s = &sc->sge;
4212228561Snp	struct sge_eq *eq;
4213220873Snp
4214228561Snp	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
4215228561Snp	    rss->opcode));
4216220873Snp
4217228561Snp	eq = s->eqmap[qid - s->eq_start];
4218220873Snp	EQ_LOCK(eq);
4219228561Snp	KASSERT(eq->flags & EQ_CRFLUSHED,
4220228561Snp	    ("%s: unsolicited egress update", __func__));
4221228561Snp	eq->flags &= ~EQ_CRFLUSHED;
4222228561Snp	eq->egr_update++;
4223220873Snp
4224228561Snp	if (__predict_false(eq->flags & EQ_DOOMED))
4225228561Snp		wakeup_one(eq);
4226228561Snp	else if (eq->flags & EQ_STALLED && can_resume_tx(eq))
4227228561Snp		taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
4228228561Snp	EQ_UNLOCK(eq);
4229220873Snp
4230228561Snp	return (0);
4231228561Snp}
4232220873Snp
4233247291Snp/* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */
4234247291SnpCTASSERT(offsetof(struct cpl_fw4_msg, data) == \
4235247291Snp    offsetof(struct cpl_fw6_msg, data));
4236247291Snp
4237228561Snpstatic int
4238239336Snphandle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
4239228561Snp{
4240239336Snp	struct adapter *sc = iq->adapter;
4241228561Snp	const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
4242220873Snp
4243228561Snp	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
4244228561Snp	    rss->opcode));
4245220873Snp
4246247291Snp	if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) {
4247247291Snp		const struct rss_header *rss2;
4248247291Snp
4249247291Snp		rss2 = (const struct rss_header *)&cpl->data[0];
4250247291Snp		return (sc->cpl_handler[rss2->opcode](iq, rss2, m));
4251247291Snp	}
4252247291Snp
4253239336Snp	return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0]));
4254220873Snp}
4255221911Snp
4256221911Snpstatic int
4257222510Snpsysctl_uint16(SYSCTL_HANDLER_ARGS)
4258221911Snp{
4259221911Snp	uint16_t *id = arg1;
4260221911Snp	int i = *id;
4261221911Snp
4262221911Snp	return sysctl_handle_int(oidp, &i, 0, req);
4263221911Snp}
4264265425Snp
4265265425Snpstatic int
4266265425Snpsysctl_bufsizes(SYSCTL_HANDLER_ARGS)
4267265425Snp{
4268265425Snp	struct sge *s = arg1;
4269265425Snp	struct hw_buf_info *hwb = &s->hw_buf_info[0];
4270265425Snp	struct sw_zone_info *swz = &s->sw_zone_info[0];
4271265425Snp	int i, rc;
4272265425Snp	struct sbuf sb;
4273265425Snp	char c;
4274265425Snp
4275265425Snp	sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
4276265425Snp	for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) {
4277265425Snp		if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster)
4278265425Snp			c = '*';
4279265425Snp		else
4280265425Snp			c = '\0';
4281265425Snp
4282265425Snp		sbuf_printf(&sb, "%u%c ", hwb->size, c);
4283265425Snp	}
4284265425Snp	sbuf_trim(&sb);
4285265425Snp	sbuf_finish(&sb);
4286265425Snp	rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
4287265425Snp	sbuf_delete(&sb);
4288265425Snp	return (rc);
4289265425Snp}
4290