t4_sge.c revision 267694
1218792Snp/*-
2218792Snp * Copyright (c) 2011 Chelsio Communications, Inc.
3218792Snp * All rights reserved.
4218792Snp * Written by: Navdeep Parhar <np@FreeBSD.org>
5218792Snp *
6218792Snp * Redistribution and use in source and binary forms, with or without
7218792Snp * modification, are permitted provided that the following conditions
8218792Snp * are met:
9218792Snp * 1. Redistributions of source code must retain the above copyright
10218792Snp *    notice, this list of conditions and the following disclaimer.
11218792Snp * 2. Redistributions in binary form must reproduce the above copyright
12218792Snp *    notice, this list of conditions and the following disclaimer in the
13218792Snp *    documentation and/or other materials provided with the distribution.
14218792Snp *
15218792Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16218792Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17218792Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18218792Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19218792Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20218792Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21218792Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22218792Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23218792Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24218792Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25218792Snp * SUCH DAMAGE.
26218792Snp */
27218792Snp
28218792Snp#include <sys/cdefs.h>
29218792Snp__FBSDID("$FreeBSD: stable/10/sys/dev/cxgbe/t4_sge.c 267694 2014-06-21 00:30:51Z np $");
30218792Snp
31218792Snp#include "opt_inet.h"
32237819Snp#include "opt_inet6.h"
33218792Snp
34218792Snp#include <sys/types.h>
35218792Snp#include <sys/mbuf.h>
36218792Snp#include <sys/socket.h>
37218792Snp#include <sys/kernel.h>
38237263Snp#include <sys/kdb.h>
39219286Snp#include <sys/malloc.h>
40219286Snp#include <sys/queue.h>
41265425Snp#include <sys/sbuf.h>
42219286Snp#include <sys/taskqueue.h>
43255015Snp#include <sys/time.h>
44218792Snp#include <sys/sysctl.h>
45228561Snp#include <sys/smp.h>
46218792Snp#include <net/bpf.h>
47218792Snp#include <net/ethernet.h>
48218792Snp#include <net/if.h>
49218792Snp#include <net/if_vlan_var.h>
50218792Snp#include <netinet/in.h>
51218792Snp#include <netinet/ip.h>
52237819Snp#include <netinet/ip6.h>
53218792Snp#include <netinet/tcp.h>
54256131Sdim#include <machine/md_var.h>
55265425Snp#include <vm/vm.h>
56265425Snp#include <vm/pmap.h>
57218792Snp
58218792Snp#include "common/common.h"
59218792Snp#include "common/t4_regs.h"
60218792Snp#include "common/t4_regs_values.h"
61218792Snp#include "common/t4_msg.h"
62218792Snp
63248925Snp#ifdef T4_PKT_TIMESTAMP
64248925Snp#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
65248925Snp#else
66248925Snp#define RX_COPY_THRESHOLD MINCLSIZE
67248925Snp#endif
68248925Snp
69239258Snp/*
70239258Snp * Ethernet frames are DMA'd at this byte offset into the freelist buffer.
71239258Snp * 0-7 are valid values.
72239258Snp */
73239258Snpstatic int fl_pktshift = 2;
74239258SnpTUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift);
75218792Snp
76239258Snp/*
77239258Snp * Pad ethernet payload up to this boundary.
78239258Snp * -1: driver should figure out a good value.
79255050Snp *  0: disable padding.
80255050Snp *  Any power of 2 from 32 to 4096 (both inclusive) is also a valid value.
81239258Snp */
82239258Snpstatic int fl_pad = -1;
83239258SnpTUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad);
84218792Snp
85239258Snp/*
86239258Snp * Status page length.
87239258Snp * -1: driver should figure out a good value.
88239258Snp *  64 or 128 are the only other valid values.
89239258Snp */
90239258Snpstatic int spg_len = -1;
91239258SnpTUNABLE_INT("hw.cxgbe.spg_len", &spg_len);
92239258Snp
93239258Snp/*
94239258Snp * Congestion drops.
95239258Snp * -1: no congestion feedback (not recommended).
96239258Snp *  0: backpressure the channel instead of dropping packets right away.
97239258Snp *  1: no backpressure, drop packets for the congested queue immediately.
98239258Snp */
99239258Snpstatic int cong_drop = 0;
100239258SnpTUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop);
101239258Snp
102255050Snp/*
103255050Snp * Deliver multiple frames in the same free list buffer if they fit.
104255050Snp * -1: let the driver decide whether to enable buffer packing or not.
105255050Snp *  0: disable buffer packing.
106255050Snp *  1: enable buffer packing.
107255050Snp */
108255050Snpstatic int buffer_packing = -1;
109255050SnpTUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing);
110255050Snp
111255050Snp/*
112255050Snp * Start next frame in a packed buffer at this boundary.
113255050Snp * -1: driver should figure out a good value.
114255050Snp * T4:
115255050Snp * ---
116255050Snp * if fl_pad != 0
117255050Snp * 	value specified here will be overridden by fl_pad.
118255050Snp * else
119255050Snp * 	power of 2 from 32 to 4096 (both inclusive) is a valid value here.
120255050Snp * T5:
121255050Snp * ---
122255050Snp * 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value.
123255050Snp */
124255050Snpstatic int fl_pack = -1;
125255050Snpstatic int t4_fl_pack;
126255050Snpstatic int t5_fl_pack;
127255050SnpTUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack);
128255050Snp
129265425Snp/*
130265425Snp * Allow the driver to create mbuf(s) in a cluster allocated for rx.
131265425Snp * 0: never; always allocate mbufs from the zone_mbuf UMA zone.
132265425Snp * 1: ok to create mbuf(s) within a cluster if there is room.
133265425Snp */
134265425Snpstatic int allow_mbufs_in_cluster = 1;
135265425SnpTUNABLE_INT("hw.cxgbe.allow_mbufs_in_cluster", &allow_mbufs_in_cluster);
136265425Snp
137265425Snp/*
138265425Snp * Largest rx cluster size that the driver is allowed to allocate.
139265425Snp */
140265425Snpstatic int largest_rx_cluster = MJUM16BYTES;
141265425SnpTUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster);
142265425Snp
143265425Snp/*
144265425Snp * Size of cluster allocation that's most likely to succeed.  The driver will
145265425Snp * fall back to this size if it fails to allocate clusters larger than this.
146265425Snp */
147265425Snpstatic int safest_rx_cluster = PAGE_SIZE;
148265425SnpTUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster);
149265425Snp
150218792Snp/* Used to track coalesced tx work request */
151218792Snpstruct txpkts {
152218792Snp	uint64_t *flitp;	/* ptr to flit where next pkt should start */
153218792Snp	uint8_t npkt;		/* # of packets in this work request */
154218792Snp	uint8_t nflits;		/* # of flits used by this work request */
155218792Snp	uint16_t plen;		/* total payload (sum of all packets) */
156218792Snp};
157218792Snp
158218792Snp/* A packet's SGL.  This + m_pkthdr has all info needed for tx */
159218792Snpstruct sgl {
160218792Snp	int nsegs;		/* # of segments in the SGL, 0 means imm. tx */
161218792Snp	int nflits;		/* # of flits needed for the SGL */
162218792Snp	bus_dma_segment_t seg[TX_SGL_SEGS];
163218792Snp};
164218792Snp
165228561Snpstatic int service_iq(struct sge_iq *, int);
166265425Snpstatic struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t,
167228561Snp    int *);
168228561Snpstatic int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
169218792Snpstatic inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
170241397Snp    int);
171255050Snpstatic inline void init_fl(struct adapter *, struct sge_fl *, int, int, int,
172255050Snp    char *);
173228561Snpstatic inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t,
174228561Snp    char *);
175218792Snpstatic int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
176218792Snp    bus_addr_t *, void **);
177218792Snpstatic int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
178218792Snp    void *);
179218792Snpstatic int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
180222085Snp    int, int);
181218792Snpstatic int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
182265425Snpstatic void add_fl_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
183265425Snp    struct sge_fl *);
184228561Snpstatic int alloc_fwq(struct adapter *);
185228561Snpstatic int free_fwq(struct adapter *);
186228561Snpstatic int alloc_mgmtq(struct adapter *);
187228561Snpstatic int free_mgmtq(struct adapter *);
188228561Snpstatic int alloc_rxq(struct port_info *, struct sge_rxq *, int, int,
189228561Snp    struct sysctl_oid *);
190218792Snpstatic int free_rxq(struct port_info *, struct sge_rxq *);
191237263Snp#ifdef TCP_OFFLOAD
192228561Snpstatic int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int,
193228561Snp    struct sysctl_oid *);
194228561Snpstatic int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *);
195228561Snp#endif
196228561Snpstatic int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
197228561Snpstatic int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
198237263Snp#ifdef TCP_OFFLOAD
199228561Snpstatic int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
200228561Snp#endif
201228561Snpstatic int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *);
202228561Snpstatic int free_eq(struct adapter *, struct sge_eq *);
203228561Snpstatic int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *,
204228561Snp    struct sysctl_oid *);
205228561Snpstatic int free_wrq(struct adapter *, struct sge_wrq *);
206228561Snpstatic int alloc_txq(struct port_info *, struct sge_txq *, int,
207228561Snp    struct sysctl_oid *);
208218792Snpstatic int free_txq(struct port_info *, struct sge_txq *);
209218792Snpstatic void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
210218792Snpstatic inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **);
211218792Snpstatic inline void iq_next(struct sge_iq *);
212218792Snpstatic inline void ring_fl_db(struct adapter *, struct sge_fl *);
213228561Snpstatic int refill_fl(struct adapter *, struct sge_fl *, int);
214228561Snpstatic void refill_sfl(void *);
215218792Snpstatic int alloc_fl_sdesc(struct sge_fl *);
216255050Snpstatic void free_fl_sdesc(struct adapter *, struct sge_fl *);
217265425Snpstatic void find_best_refill_source(struct adapter *, struct sge_fl *, int);
218265425Snpstatic void find_safe_refill_source(struct adapter *, struct sge_fl *);
219228561Snpstatic void add_fl_to_sfl(struct adapter *, struct sge_fl *);
220218792Snp
221218792Snpstatic int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
222218792Snpstatic int free_pkt_sgl(struct sge_txq *, struct sgl *);
223218792Snpstatic int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *,
224218792Snp    struct sgl *);
225218792Snpstatic int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *,
226218792Snp    struct mbuf *, struct sgl *);
227218792Snpstatic void write_txpkts_wr(struct sge_txq *, struct txpkts *);
228218792Snpstatic inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *,
229218792Snp    struct txpkts *, struct mbuf *, struct sgl *);
230218792Snpstatic int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *);
231218792Snpstatic inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
232220873Snpstatic inline void ring_eq_db(struct adapter *, struct sge_eq *);
233219292Snpstatic inline int reclaimable(struct sge_eq *);
234220873Snpstatic int reclaim_tx_descs(struct sge_txq *, int, int);
235218792Snpstatic void write_eqflush_wr(struct sge_eq *);
236218792Snpstatic __be64 get_flit(bus_dma_segment_t *, int, int);
237228561Snpstatic int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
238228561Snp    struct mbuf *);
239239336Snpstatic int handle_fw_msg(struct sge_iq *, const struct rss_header *,
240228561Snp    struct mbuf *);
241218792Snp
242222510Snpstatic int sysctl_uint16(SYSCTL_HANDLER_ARGS);
243265425Snpstatic int sysctl_bufsizes(SYSCTL_HANDLER_ARGS);
244220873Snp
245219392Snp/*
246255050Snp * Called on MOD_LOAD.  Validates and calculates the SGE tunables.
247219392Snp */
248219392Snpvoid
249219392Snpt4_sge_modload(void)
250219392Snp{
251255050Snp	int pad;
252255050Snp
253255050Snp	/* set pad to a reasonable powerof2 between 16 and 4096 (inclusive) */
254255050Snp#if defined(__i386__) || defined(__amd64__)
255255050Snp	pad = max(cpu_clflush_line_size, 16);
256255050Snp#else
257255050Snp	pad = max(CACHE_LINE_SIZE, 16);
258219392Snp#endif
259255050Snp	pad = min(pad, 4096);
260219392Snp
261239258Snp	if (fl_pktshift < 0 || fl_pktshift > 7) {
262239258Snp		printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
263239258Snp		    " using 2 instead.\n", fl_pktshift);
264239258Snp		fl_pktshift = 2;
265239258Snp	}
266239258Snp
267255050Snp	if (fl_pad != 0 &&
268255050Snp	    (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad))) {
269239258Snp
270239258Snp		if (fl_pad != -1) {
271239258Snp			printf("Invalid hw.cxgbe.fl_pad value (%d),"
272255050Snp			    " using %d instead.\n", fl_pad, max(pad, 32));
273239258Snp		}
274255050Snp		fl_pad = max(pad, 32);
275239258Snp	}
276239258Snp
277255050Snp	/*
278255050Snp	 * T4 has the same pad and pack boundary.  If a pad boundary is set,
279255050Snp	 * pack boundary must be set to the same value.  Otherwise take the
280255050Snp	 * specified value or auto-calculate something reasonable.
281255050Snp	 */
282255050Snp	if (fl_pad)
283255050Snp		t4_fl_pack = fl_pad;
284255050Snp	else if (fl_pack < 32 || fl_pack > 4096 || !powerof2(fl_pack))
285255050Snp		t4_fl_pack = max(pad, 32);
286255050Snp	else
287255050Snp		t4_fl_pack = fl_pack;
288255050Snp
289255050Snp	/* T5's pack boundary is independent of the pad boundary. */
290255050Snp	if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 ||
291255050Snp	    !powerof2(fl_pack))
292265425Snp	       t5_fl_pack = max(pad, CACHE_LINE_SIZE);
293255050Snp	else
294255050Snp	       t5_fl_pack = fl_pack;
295255050Snp
296239258Snp	if (spg_len != 64 && spg_len != 128) {
297239258Snp		int len;
298239258Snp
299239258Snp#if defined(__i386__) || defined(__amd64__)
300239258Snp		len = cpu_clflush_line_size > 64 ? 128 : 64;
301239258Snp#else
302239258Snp		len = 64;
303239258Snp#endif
304239258Snp		if (spg_len != -1) {
305239258Snp			printf("Invalid hw.cxgbe.spg_len value (%d),"
306239258Snp			    " using %d instead.\n", spg_len, len);
307239258Snp		}
308239258Snp		spg_len = len;
309239258Snp	}
310239258Snp
311239258Snp	if (cong_drop < -1 || cong_drop > 1) {
312239258Snp		printf("Invalid hw.cxgbe.cong_drop value (%d),"
313239258Snp		    " using 0 instead.\n", cong_drop);
314239258Snp		cong_drop = 0;
315239258Snp	}
316219392Snp}
317219392Snp
318248925Snpvoid
319248925Snpt4_init_sge_cpl_handlers(struct adapter *sc)
320218792Snp{
321218792Snp
322248925Snp	t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_msg);
323248925Snp	t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_msg);
324248925Snp	t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
325248925Snp	t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
326248925Snp	t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
327248925Snp}
328248925Snp
329249391Snp/*
330249391Snp * adap->params.vpd.cclk must be set up before this is called.
331249391Snp */
332248925Snpvoid
333248925Snpt4_tweak_chip_settings(struct adapter *sc)
334248925Snp{
335248925Snp	int i;
336248925Snp	uint32_t v, m;
337248925Snp	int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
338249391Snp	int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk;
339248925Snp	int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
340248925Snp	uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
341265425Snp	static int sge_flbuf_sizes[] = {
342255050Snp		MCLBYTES,
343255050Snp#if MJUMPAGESIZE != MCLBYTES
344255050Snp		MJUMPAGESIZE,
345265425Snp		MJUMPAGESIZE - CL_METADATA_SIZE,
346265425Snp		MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE,
347255050Snp#endif
348255050Snp		MJUM9BYTES,
349255050Snp		MJUM16BYTES,
350265425Snp		MCLBYTES - MSIZE - CL_METADATA_SIZE,
351265425Snp		MJUM9BYTES - CL_METADATA_SIZE,
352265425Snp		MJUM16BYTES - CL_METADATA_SIZE,
353255050Snp	};
354248925Snp
355248925Snp	KASSERT(sc->flags & MASTER_PF,
356248925Snp	    ("%s: trying to change chip settings when not master.", __func__));
357248925Snp
358255050Snp	m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
359248925Snp	v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
360237512Snp	    V_EGRSTATUSPAGESIZE(spg_len == 128);
361255050Snp	if (is_t4(sc) && (fl_pad || buffer_packing)) {
362255050Snp		/* t4_fl_pack has the correct value even when fl_pad = 0 */
363255050Snp		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
364255050Snp		v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5);
365255050Snp	} else if (is_t5(sc) && fl_pad) {
366255050Snp		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
367255050Snp		v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5);
368255050Snp	}
369248925Snp	t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
370218792Snp
371255050Snp	if (is_t5(sc) && buffer_packing) {
372255050Snp		m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
373255050Snp		if (t5_fl_pack == 16)
374255050Snp			v = V_INGPACKBOUNDARY(0);
375255050Snp		else
376255050Snp			v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5);
377255050Snp		t4_set_reg_field(sc, A_SGE_CONTROL2, m, v);
378255050Snp	}
379255050Snp
380248925Snp	v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
381228561Snp	    V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
382228561Snp	    V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
383228561Snp	    V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
384228561Snp	    V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
385228561Snp	    V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
386228561Snp	    V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
387228561Snp	    V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
388248925Snp	t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v);
389228561Snp
390265425Snp	KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES,
391265425Snp	    ("%s: hw buffer size table too big", __func__));
392265425Snp	for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) {
393248925Snp		t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
394265425Snp		    sge_flbuf_sizes[i]);
395248925Snp	}
396228561Snp
397248925Snp	v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) |
398248925Snp	    V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]);
399248925Snp	t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v);
400228561Snp
401249391Snp	KASSERT(intr_timer[0] <= timer_max,
402249391Snp	    ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0],
403249391Snp	    timer_max));
404249391Snp	for (i = 1; i < nitems(intr_timer); i++) {
405249391Snp		KASSERT(intr_timer[i] >= intr_timer[i - 1],
406249391Snp		    ("%s: timers not listed in increasing order (%d)",
407249391Snp		    __func__, i));
408249391Snp
409249391Snp		while (intr_timer[i] > timer_max) {
410249391Snp			if (i == nitems(intr_timer) - 1) {
411249391Snp				intr_timer[i] = timer_max;
412249391Snp				break;
413249391Snp			}
414249391Snp			intr_timer[i] += intr_timer[i - 1];
415249391Snp			intr_timer[i] /= 2;
416249391Snp		}
417249391Snp	}
418249391Snp
419248925Snp	v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) |
420248925Snp	    V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1]));
421248925Snp	t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v);
422248925Snp	v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) |
423248925Snp	    V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3]));
424248925Snp	t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v);
425248925Snp	v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) |
426248925Snp	    V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5]));
427248925Snp	t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v);
428228561Snp
429248925Snp	if (cong_drop == 0) {
430248925Snp		m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
431248925Snp		    F_TUNNELCNGDROP3;
432248925Snp		t4_set_reg_field(sc, A_TP_PARA_REG3, m, 0);
433228561Snp	}
434228561Snp
435248925Snp	/* 4K, 16K, 64K, 256K DDP "page sizes" */
436248925Snp	v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
437248925Snp	t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v);
438248925Snp
439248925Snp	m = v = F_TDDPTAGTCB;
440248925Snp	t4_set_reg_field(sc, A_ULP_RX_CTL, m, v);
441248925Snp
442248925Snp	m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
443248925Snp	    F_RESETDDPOFFSET;
444248925Snp	v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
445248925Snp	t4_set_reg_field(sc, A_TP_PARA_REG5, m, v);
446248925Snp}
447248925Snp
448248925Snp/*
449265425Snp * SGE wants the buffer to be at least 64B and then a multiple of the pad
450265425Snp * boundary or 16, whichever is greater.
451265425Snp */
452265425Snpstatic inline int
453265425Snphwsz_ok(int hwsz)
454265425Snp{
455265425Snp	int mask = max(fl_pad, 16) - 1;
456265425Snp
457265425Snp	return (hwsz >= 64 && (hwsz & mask) == 0);
458265425Snp}
459265425Snp
460265425Snp/*
461248925Snp * XXX: driver really should be able to deal with unexpected settings.
462248925Snp */
463248925Snpint
464248925Snpt4_read_chip_settings(struct adapter *sc)
465248925Snp{
466248925Snp	struct sge *s = &sc->sge;
467255050Snp	int i, j, n, rc = 0;
468248925Snp	uint32_t m, v, r;
469248925Snp	uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
470265425Snp	static int sw_buf_sizes[] = {	/* Sorted by size */
471255050Snp		MCLBYTES,
472255050Snp#if MJUMPAGESIZE != MCLBYTES
473255050Snp		MJUMPAGESIZE,
474255050Snp#endif
475255050Snp		MJUM9BYTES,
476255050Snp		MJUM16BYTES
477255050Snp	};
478265425Snp	struct sw_zone_info *swz, *safe_swz;
479265425Snp	struct hw_buf_info *hwb;
480248925Snp
481255050Snp	m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
482248925Snp	v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
483248925Snp	    V_EGRSTATUSPAGESIZE(spg_len == 128);
484255050Snp	if (is_t4(sc) && (fl_pad || buffer_packing)) {
485255050Snp		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
486255050Snp		v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5);
487255050Snp	} else if (is_t5(sc) && fl_pad) {
488255050Snp		m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
489255050Snp		v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5);
490255050Snp	}
491248925Snp	r = t4_read_reg(sc, A_SGE_CONTROL);
492248925Snp	if ((r & m) != v) {
493248925Snp		device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r);
494228561Snp		rc = EINVAL;
495228561Snp	}
496228561Snp
497255050Snp	if (is_t5(sc) && buffer_packing) {
498255050Snp		m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
499255050Snp		if (t5_fl_pack == 16)
500255050Snp			v = V_INGPACKBOUNDARY(0);
501255050Snp		else
502255050Snp			v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5);
503255050Snp		r = t4_read_reg(sc, A_SGE_CONTROL2);
504255050Snp		if ((r & m) != v) {
505255050Snp			device_printf(sc->dev,
506255050Snp			    "invalid SGE_CONTROL2(0x%x)\n", r);
507255050Snp			rc = EINVAL;
508255050Snp		}
509255050Snp	}
510265425Snp	s->pack_boundary = is_t4(sc) ? t4_fl_pack : t5_fl_pack;
511255050Snp
512248925Snp	v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
513248925Snp	    V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
514248925Snp	    V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
515248925Snp	    V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
516248925Snp	    V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
517248925Snp	    V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
518248925Snp	    V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
519248925Snp	    V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
520248925Snp	r = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE);
521248925Snp	if (r != v) {
522248925Snp		device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r);
523228561Snp		rc = EINVAL;
524228561Snp	}
525228561Snp
526265425Snp	/* Filter out unusable hw buffer sizes entirely (mark with -2). */
527265425Snp	hwb = &s->hw_buf_info[0];
528265425Snp	for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) {
529265425Snp		r = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
530265425Snp		hwb->size = r;
531265425Snp		hwb->zidx = hwsz_ok(r) ? -1 : -2;
532265425Snp		hwb->next = -1;
533265425Snp	}
534265425Snp
535255050Snp	/*
536265425Snp	 * Create a sorted list in decreasing order of hw buffer sizes (and so
537265425Snp	 * increasing order of spare area) for each software zone.
538255050Snp	 */
539265425Snp	n = 0;	/* no usable buffer size to begin with */
540265425Snp	swz = &s->sw_zone_info[0];
541265425Snp	safe_swz = NULL;
542265425Snp	for (i = 0; i < SW_ZONE_SIZES; i++, swz++) {
543265425Snp		int8_t head = -1, tail = -1;
544265425Snp
545265425Snp		swz->size = sw_buf_sizes[i];
546265425Snp		swz->zone = m_getzone(swz->size);
547265425Snp		swz->type = m_gettype(swz->size);
548265425Snp
549265425Snp		if (swz->size == safest_rx_cluster)
550265425Snp			safe_swz = swz;
551265425Snp
552265425Snp		hwb = &s->hw_buf_info[0];
553265425Snp		for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) {
554265425Snp			if (hwb->zidx != -1 || hwb->size > swz->size)
555265425Snp				continue;
556265425Snp			hwb->zidx = i;
557265425Snp			if (head == -1)
558265425Snp				head = tail = j;
559265425Snp			else if (hwb->size < s->hw_buf_info[tail].size) {
560265425Snp				s->hw_buf_info[tail].next = j;
561265425Snp				tail = j;
562265425Snp			} else {
563265425Snp				int8_t *cur;
564265425Snp				struct hw_buf_info *t;
565265425Snp
566265425Snp				for (cur = &head; *cur != -1; cur = &t->next) {
567265425Snp					t = &s->hw_buf_info[*cur];
568265425Snp					if (hwb->size == t->size) {
569265425Snp						hwb->zidx = -2;
570265425Snp						break;
571265425Snp					}
572265425Snp					if (hwb->size > t->size) {
573265425Snp						hwb->next = *cur;
574265425Snp						*cur = j;
575265425Snp						break;
576265425Snp					}
577265425Snp				}
578265425Snp			}
579228561Snp		}
580265425Snp		swz->head_hwidx = head;
581265425Snp		swz->tail_hwidx = tail;
582265425Snp
583265425Snp		if (tail != -1) {
584255050Snp			n++;
585265425Snp			if (swz->size - s->hw_buf_info[tail].size >=
586265425Snp			    CL_METADATA_SIZE)
587265425Snp				sc->flags |= BUF_PACKING_OK;
588255050Snp		}
589255050Snp	}
590255050Snp	if (n == 0) {
591255050Snp		device_printf(sc->dev, "no usable SGE FL buffer size.\n");
592255050Snp		rc = EINVAL;
593255050Snp	}
594218792Snp
595265425Snp	s->safe_hwidx1 = -1;
596265425Snp	s->safe_hwidx2 = -1;
597265425Snp	if (safe_swz != NULL) {
598265425Snp		s->safe_hwidx1 = safe_swz->head_hwidx;
599265425Snp		for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) {
600265425Snp			int spare;
601265425Snp
602265425Snp			hwb = &s->hw_buf_info[i];
603265425Snp			spare = safe_swz->size - hwb->size;
604265425Snp			if (spare < CL_METADATA_SIZE)
605265425Snp				continue;
606265425Snp			if (s->safe_hwidx2 == -1 ||
607265425Snp			    spare == CL_METADATA_SIZE + MSIZE)
608265425Snp				s->safe_hwidx2 = i;
609265425Snp			if (spare >= CL_METADATA_SIZE + MSIZE)
610265425Snp				break;
611265425Snp		}
612265425Snp	}
613265425Snp
614248925Snp	r = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD);
615248925Snp	s->counter_val[0] = G_THRESHOLD_0(r);
616248925Snp	s->counter_val[1] = G_THRESHOLD_1(r);
617248925Snp	s->counter_val[2] = G_THRESHOLD_2(r);
618248925Snp	s->counter_val[3] = G_THRESHOLD_3(r);
619222701Snp
620248925Snp	r = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1);
621248925Snp	s->timer_val[0] = G_TIMERVALUE0(r) / core_ticks_per_usec(sc);
622248925Snp	s->timer_val[1] = G_TIMERVALUE1(r) / core_ticks_per_usec(sc);
623248925Snp	r = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3);
624248925Snp	s->timer_val[2] = G_TIMERVALUE2(r) / core_ticks_per_usec(sc);
625248925Snp	s->timer_val[3] = G_TIMERVALUE3(r) / core_ticks_per_usec(sc);
626248925Snp	r = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5);
627248925Snp	s->timer_val[4] = G_TIMERVALUE4(r) / core_ticks_per_usec(sc);
628248925Snp	s->timer_val[5] = G_TIMERVALUE5(r) / core_ticks_per_usec(sc);
629218792Snp
630248925Snp	if (cong_drop == 0) {
631248925Snp		m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
632248925Snp		    F_TUNNELCNGDROP3;
633248925Snp		r = t4_read_reg(sc, A_TP_PARA_REG3);
634248925Snp		if (r & m) {
635248925Snp			device_printf(sc->dev,
636248925Snp			    "invalid TP_PARA_REG3(0x%x)\n", r);
637248925Snp			rc = EINVAL;
638248925Snp		}
639248925Snp	}
640228561Snp
641248925Snp	v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
642248925Snp	r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ);
643248925Snp	if (r != v) {
644248925Snp		device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r);
645248925Snp		rc = EINVAL;
646248925Snp	}
647228561Snp
648248925Snp	m = v = F_TDDPTAGTCB;
649248925Snp	r = t4_read_reg(sc, A_ULP_RX_CTL);
650248925Snp	if ((r & m) != v) {
651248925Snp		device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r);
652248925Snp		rc = EINVAL;
653248925Snp	}
654239336Snp
655248925Snp	m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
656248925Snp	    F_RESETDDPOFFSET;
657248925Snp	v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
658248925Snp	r = t4_read_reg(sc, A_TP_PARA_REG5);
659248925Snp	if ((r & m) != v) {
660248925Snp		device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r);
661248925Snp		rc = EINVAL;
662248925Snp	}
663248925Snp
664248925Snp	r = t4_read_reg(sc, A_SGE_CONM_CTRL);
665248925Snp	s->fl_starve_threshold = G_EGRTHRESHOLD(r) * 2 + 1;
666265410Snp	if (is_t4(sc))
667265410Snp		s->fl_starve_threshold2 = s->fl_starve_threshold;
668265410Snp	else
669265410Snp		s->fl_starve_threshold2 = G_EGRTHRESHOLDPACKING(r) * 2 + 1;
670248925Snp
671256794Snp	/* egress queues: log2 of # of doorbells per BAR2 page */
672256794Snp	r = t4_read_reg(sc, A_SGE_EGRESS_QUEUES_PER_PAGE_PF);
673256794Snp	r >>= S_QUEUESPERPAGEPF0 +
674256794Snp	    (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf;
675256794Snp	s->eq_s_qpp = r & M_QUEUESPERPAGEPF0;
676248925Snp
677256794Snp	/* ingress queues: log2 of # of doorbells per BAR2 page */
678256794Snp	r = t4_read_reg(sc, A_SGE_INGRESS_QUEUES_PER_PAGE_PF);
679256794Snp	r >>= S_QUEUESPERPAGEPF0 +
680256794Snp	    (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf;
681256794Snp	s->iq_s_qpp = r & M_QUEUESPERPAGEPF0;
682256794Snp
683252705Snp	t4_init_tp_params(sc);
684248925Snp
685248925Snp	t4_read_mtu_tbl(sc, sc->params.mtus, NULL);
686248925Snp	t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd);
687248925Snp
688228561Snp	return (rc);
689218792Snp}
690218792Snp
691218792Snpint
692218792Snpt4_create_dma_tag(struct adapter *sc)
693218792Snp{
694218792Snp	int rc;
695218792Snp
696218792Snp	rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
697218792Snp	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
698218792Snp	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
699218792Snp	    NULL, &sc->dmat);
700218792Snp	if (rc != 0) {
701218792Snp		device_printf(sc->dev,
702218792Snp		    "failed to create main DMA tag: %d\n", rc);
703218792Snp	}
704218792Snp
705218792Snp	return (rc);
706218792Snp}
707218792Snp
708255052Snpstatic inline int
709255052Snpenable_buffer_packing(struct adapter *sc)
710255052Snp{
711255052Snp
712255052Snp	if (sc->flags & BUF_PACKING_OK &&
713255052Snp	    ((is_t5(sc) && buffer_packing) ||	/* 1 or -1 both ok for T5 */
714255052Snp	    (is_t4(sc) && buffer_packing == 1)))
715255052Snp		return (1);
716255052Snp	return (0);
717255052Snp}
718255052Snp
719253829Snpvoid
720253829Snpt4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
721253829Snp    struct sysctl_oid_list *children)
722253829Snp{
723253829Snp
724265425Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes",
725265425Snp	    CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A",
726265425Snp	    "freelist buffer sizes");
727265425Snp
728253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD,
729253829Snp	    NULL, fl_pktshift, "payload DMA offset in rx buffer (bytes)");
730253829Snp
731253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD,
732253829Snp	    NULL, fl_pad, "payload pad boundary (bytes)");
733253829Snp
734253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD,
735253829Snp	    NULL, spg_len, "status page size (bytes)");
736253829Snp
737253829Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD,
738253829Snp	    NULL, cong_drop, "congestion drop setting");
739255050Snp
740255050Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "buffer_packing", CTLFLAG_RD,
741255052Snp	    NULL, enable_buffer_packing(sc),
742255050Snp	    "pack multiple frames in one fl buffer");
743255050Snp
744255050Snp	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD,
745265425Snp	    NULL, sc->sge.pack_boundary, "payload pack boundary (bytes)");
746253829Snp}
747253829Snp
748218792Snpint
749218792Snpt4_destroy_dma_tag(struct adapter *sc)
750218792Snp{
751218792Snp	if (sc->dmat)
752218792Snp		bus_dma_tag_destroy(sc->dmat);
753218792Snp
754218792Snp	return (0);
755218792Snp}
756218792Snp
757218792Snp/*
758228561Snp * Allocate and initialize the firmware event queue and the management queue.
759218792Snp *
760218792Snp * Returns errno on failure.  Resources allocated up to that point may still be
761218792Snp * allocated.  Caller is responsible for cleanup in case this function fails.
762218792Snp */
763218792Snpint
764220873Snpt4_setup_adapter_queues(struct adapter *sc)
765218792Snp{
766228561Snp	int rc;
767218792Snp
768218792Snp	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
769218792Snp
770228561Snp	sysctl_ctx_init(&sc->ctx);
771228561Snp	sc->flags |= ADAP_SYSCTL_CTX;
772220873Snp
773222510Snp	/*
774222510Snp	 * Firmware event queue
775222510Snp	 */
776228561Snp	rc = alloc_fwq(sc);
777241398Snp	if (rc != 0)
778220873Snp		return (rc);
779218792Snp
780220873Snp	/*
781228561Snp	 * Management queue.  This is just a control queue that uses the fwq as
782228561Snp	 * its associated iq.
783220873Snp	 */
784228561Snp	rc = alloc_mgmtq(sc);
785220873Snp
786218792Snp	return (rc);
787218792Snp}
788218792Snp
789218792Snp/*
790218792Snp * Idempotent
791218792Snp */
792218792Snpint
793220873Snpt4_teardown_adapter_queues(struct adapter *sc)
794218792Snp{
795218792Snp
796218792Snp	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
797218792Snp
798228561Snp	/* Do this before freeing the queue */
799228561Snp	if (sc->flags & ADAP_SYSCTL_CTX) {
800220873Snp		sysctl_ctx_free(&sc->ctx);
801228561Snp		sc->flags &= ~ADAP_SYSCTL_CTX;
802220873Snp	}
803220873Snp
804228561Snp	free_mgmtq(sc);
805228561Snp	free_fwq(sc);
806220873Snp
807228561Snp	return (0);
808228561Snp}
809222510Snp
810228561Snpstatic inline int
811228561Snpfirst_vector(struct port_info *pi)
812228561Snp{
813228561Snp	struct adapter *sc = pi->adapter;
814228561Snp	int rc = T4_EXTRA_INTR, i;
815228561Snp
816228561Snp	if (sc->intr_count == 1)
817228561Snp		return (0);
818228561Snp
819228561Snp	for_each_port(sc, i) {
820238313Snp		struct port_info *p = sc->port[i];
821238313Snp
822228561Snp		if (i == pi->port_id)
823228561Snp			break;
824228561Snp
825237263Snp#ifdef TCP_OFFLOAD
826228561Snp		if (sc->flags & INTR_DIRECT)
827238313Snp			rc += p->nrxq + p->nofldrxq;
828228561Snp		else
829238313Snp			rc += max(p->nrxq, p->nofldrxq);
830228561Snp#else
831228561Snp		/*
832228561Snp		 * Not compiled with offload support and intr_count > 1.  Only
833228561Snp		 * NIC queues exist and they'd better be taking direct
834228561Snp		 * interrupts.
835228561Snp		 */
836228561Snp		KASSERT(sc->flags & INTR_DIRECT,
837228561Snp		    ("%s: intr_count %d, !INTR_DIRECT", __func__,
838228561Snp		    sc->intr_count));
839228561Snp
840238313Snp		rc += p->nrxq;
841228561Snp#endif
842218792Snp	}
843218792Snp
844228561Snp	return (rc);
845218792Snp}
846218792Snp
847228561Snp/*
848228561Snp * Given an arbitrary "index," come up with an iq that can be used by other
849228561Snp * queues (of this port) for interrupt forwarding, SGE egress updates, etc.
850228561Snp * The iq returned is guaranteed to be something that takes direct interrupts.
851228561Snp */
852228561Snpstatic struct sge_iq *
853228561Snpport_intr_iq(struct port_info *pi, int idx)
854228561Snp{
855228561Snp	struct adapter *sc = pi->adapter;
856228561Snp	struct sge *s = &sc->sge;
857228561Snp	struct sge_iq *iq = NULL;
858228561Snp
859228561Snp	if (sc->intr_count == 1)
860228561Snp		return (&sc->sge.fwq);
861228561Snp
862237263Snp#ifdef TCP_OFFLOAD
863228561Snp	if (sc->flags & INTR_DIRECT) {
864228561Snp		idx %= pi->nrxq + pi->nofldrxq;
865265425Snp
866228561Snp		if (idx >= pi->nrxq) {
867228561Snp			idx -= pi->nrxq;
868228561Snp			iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
869228561Snp		} else
870228561Snp			iq = &s->rxq[pi->first_rxq + idx].iq;
871228561Snp
872228561Snp	} else {
873228561Snp		idx %= max(pi->nrxq, pi->nofldrxq);
874228561Snp
875228561Snp		if (pi->nrxq >= pi->nofldrxq)
876228561Snp			iq = &s->rxq[pi->first_rxq + idx].iq;
877228561Snp		else
878228561Snp			iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
879228561Snp	}
880228561Snp#else
881228561Snp	/*
882228561Snp	 * Not compiled with offload support and intr_count > 1.  Only NIC
883228561Snp	 * queues exist and they'd better be taking direct interrupts.
884228561Snp	 */
885228561Snp	KASSERT(sc->flags & INTR_DIRECT,
886228561Snp	    ("%s: intr_count %d, !INTR_DIRECT", __func__, sc->intr_count));
887228561Snp
888228561Snp	idx %= pi->nrxq;
889228561Snp	iq = &s->rxq[pi->first_rxq + idx].iq;
890228561Snp#endif
891228561Snp
892228561Snp	KASSERT(iq->flags & IQ_INTR, ("%s: EDOOFUS", __func__));
893228561Snp	return (iq);
894228561Snp}
895228561Snp
896265425Snp/* Maximum payload that can be delivered with a single iq descriptor */
897239266Snpstatic inline int
898265425Snpmtu_to_max_payload(struct adapter *sc, int mtu, const int toe)
899239266Snp{
900265425Snp	int payload;
901239266Snp
902252728Snp#ifdef TCP_OFFLOAD
903265425Snp	if (toe) {
904265425Snp		payload = sc->tt.rx_coalesce ?
905265425Snp		    G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)) : mtu;
906265425Snp	} else {
907265425Snp#endif
908265425Snp		/* large enough even when hw VLAN extraction is disabled */
909265425Snp		payload = fl_pktshift + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
910265425Snp		    mtu;
911265425Snp#ifdef TCP_OFFLOAD
912265425Snp	}
913265425Snp#endif
914265425Snp	payload = roundup2(payload, fl_pad);
915252728Snp
916265425Snp	return (payload);
917252728Snp}
918252728Snp
919218792Snpint
920228561Snpt4_setup_port_queues(struct port_info *pi)
921218792Snp{
922228561Snp	int rc = 0, i, j, intr_idx, iqid;
923218792Snp	struct sge_rxq *rxq;
924218792Snp	struct sge_txq *txq;
925228561Snp	struct sge_wrq *ctrlq;
926237263Snp#ifdef TCP_OFFLOAD
927228561Snp	struct sge_ofld_rxq *ofld_rxq;
928228561Snp	struct sge_wrq *ofld_txq;
929237263Snp	struct sysctl_oid *oid2 = NULL;
930228561Snp#endif
931218792Snp	char name[16];
932218792Snp	struct adapter *sc = pi->adapter;
933252728Snp	struct ifnet *ifp = pi->ifp;
934237263Snp	struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
935228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
936265425Snp	int maxp, pack, mtu = ifp->if_mtu;
937218792Snp
938228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
939228561Snp	    NULL, "rx queues");
940218792Snp
941237263Snp#ifdef TCP_OFFLOAD
942228561Snp	if (is_offload(sc)) {
943228561Snp		oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq",
944228561Snp		    CTLFLAG_RD, NULL,
945228561Snp		    "rx queues for offloaded TCP connections");
946218792Snp	}
947228561Snp#endif
948218792Snp
949228561Snp	/* Interrupt vector to start from (when using multiple vectors) */
950228561Snp	intr_idx = first_vector(pi);
951228561Snp
952228561Snp	/*
953228561Snp	 * First pass over all rx queues (NIC and TOE):
954228561Snp	 * a) initialize iq and fl
955228561Snp	 * b) allocate queue iff it will take direct interrupts.
956228561Snp	 */
957265425Snp	maxp = mtu_to_max_payload(sc, mtu, 0);
958255052Snp	pack = enable_buffer_packing(sc);
959218792Snp	for_each_rxq(pi, i, rxq) {
960218792Snp
961228561Snp		init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq,
962241397Snp		    RX_IQ_ESIZE);
963218792Snp
964218792Snp		snprintf(name, sizeof(name), "%s rxq%d-fl",
965218792Snp		    device_get_nameunit(pi->dev), i);
966265425Snp		init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, pack, name);
967218792Snp
968228561Snp		if (sc->flags & INTR_DIRECT
969237263Snp#ifdef TCP_OFFLOAD
970228561Snp		    || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq)
971228561Snp#endif
972228561Snp		   ) {
973228561Snp			rxq->iq.flags |= IQ_INTR;
974228561Snp			rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
975228561Snp			if (rc != 0)
976228561Snp				goto done;
977228561Snp			intr_idx++;
978228561Snp		}
979228561Snp	}
980218792Snp
981237263Snp#ifdef TCP_OFFLOAD
982265425Snp	maxp = mtu_to_max_payload(sc, mtu, 1);
983228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
984228561Snp
985228561Snp		init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
986241397Snp		    pi->qsize_rxq, RX_IQ_ESIZE);
987228561Snp
988228561Snp		snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
989228561Snp		    device_get_nameunit(pi->dev), i);
990265425Snp		init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, pack, name);
991228561Snp
992228561Snp		if (sc->flags & INTR_DIRECT ||
993228561Snp		    (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
994228561Snp			ofld_rxq->iq.flags |= IQ_INTR;
995228561Snp			rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
996228561Snp			if (rc != 0)
997228561Snp				goto done;
998228561Snp			intr_idx++;
999228561Snp		}
1000228561Snp	}
1001228561Snp#endif
1002228561Snp
1003228561Snp	/*
1004228561Snp	 * Second pass over all rx queues (NIC and TOE).  The queues forwarding
1005228561Snp	 * their interrupts are allocated now.
1006228561Snp	 */
1007228561Snp	j = 0;
1008228561Snp	for_each_rxq(pi, i, rxq) {
1009228561Snp		if (rxq->iq.flags & IQ_INTR)
1010228561Snp			continue;
1011228561Snp
1012228561Snp		intr_idx = port_intr_iq(pi, j)->abs_id;
1013228561Snp
1014228561Snp		rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
1015218792Snp		if (rc != 0)
1016218792Snp			goto done;
1017228561Snp		j++;
1018218792Snp	}
1019218792Snp
1020237263Snp#ifdef TCP_OFFLOAD
1021228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
1022228561Snp		if (ofld_rxq->iq.flags & IQ_INTR)
1023228561Snp			continue;
1024228561Snp
1025228561Snp		intr_idx = port_intr_iq(pi, j)->abs_id;
1026228561Snp
1027228561Snp		rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
1028228561Snp		if (rc != 0)
1029228561Snp			goto done;
1030228561Snp		j++;
1031228561Snp	}
1032228561Snp#endif
1033228561Snp
1034228561Snp	/*
1035228561Snp	 * Now the tx queues.  Only one pass needed.
1036228561Snp	 */
1037228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD,
1038228561Snp	    NULL, "tx queues");
1039228561Snp	j = 0;
1040218792Snp	for_each_txq(pi, i, txq) {
1041228561Snp		uint16_t iqid;
1042218792Snp
1043228561Snp		iqid = port_intr_iq(pi, j)->cntxt_id;
1044228561Snp
1045218792Snp		snprintf(name, sizeof(name), "%s txq%d",
1046218792Snp		    device_get_nameunit(pi->dev), i);
1047228561Snp		init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid,
1048228561Snp		    name);
1049218792Snp
1050228561Snp		rc = alloc_txq(pi, txq, i, oid);
1051218792Snp		if (rc != 0)
1052218792Snp			goto done;
1053228561Snp		j++;
1054218792Snp	}
1055218792Snp
1056237263Snp#ifdef TCP_OFFLOAD
1057228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq",
1058228561Snp	    CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections");
1059228561Snp	for_each_ofld_txq(pi, i, ofld_txq) {
1060228561Snp		uint16_t iqid;
1061228561Snp
1062228561Snp		iqid = port_intr_iq(pi, j)->cntxt_id;
1063228561Snp
1064228561Snp		snprintf(name, sizeof(name), "%s ofld_txq%d",
1065228561Snp		    device_get_nameunit(pi->dev), i);
1066228561Snp		init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan,
1067228561Snp		    iqid, name);
1068228561Snp
1069228561Snp		snprintf(name, sizeof(name), "%d", i);
1070228561Snp		oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
1071228561Snp		    name, CTLFLAG_RD, NULL, "offload tx queue");
1072228561Snp
1073228561Snp		rc = alloc_wrq(sc, pi, ofld_txq, oid2);
1074228561Snp		if (rc != 0)
1075228561Snp			goto done;
1076228561Snp		j++;
1077228561Snp	}
1078228561Snp#endif
1079228561Snp
1080228561Snp	/*
1081228561Snp	 * Finally, the control queue.
1082228561Snp	 */
1083228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD,
1084228561Snp	    NULL, "ctrl queue");
1085228561Snp	ctrlq = &sc->sge.ctrlq[pi->port_id];
1086228561Snp	iqid = port_intr_iq(pi, 0)->cntxt_id;
1087228561Snp	snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev));
1088228561Snp	init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name);
1089228561Snp	rc = alloc_wrq(sc, pi, ctrlq, oid);
1090228561Snp
1091218792Snpdone:
1092218792Snp	if (rc)
1093228561Snp		t4_teardown_port_queues(pi);
1094218792Snp
1095218792Snp	return (rc);
1096218792Snp}
1097218792Snp
1098218792Snp/*
1099218792Snp * Idempotent
1100218792Snp */
1101218792Snpint
1102228561Snpt4_teardown_port_queues(struct port_info *pi)
1103218792Snp{
1104218792Snp	int i;
1105228561Snp	struct adapter *sc = pi->adapter;
1106218792Snp	struct sge_rxq *rxq;
1107218792Snp	struct sge_txq *txq;
1108237263Snp#ifdef TCP_OFFLOAD
1109228561Snp	struct sge_ofld_rxq *ofld_rxq;
1110228561Snp	struct sge_wrq *ofld_txq;
1111228561Snp#endif
1112218792Snp
1113218792Snp	/* Do this before freeing the queues */
1114228561Snp	if (pi->flags & PORT_SYSCTL_CTX) {
1115218792Snp		sysctl_ctx_free(&pi->ctx);
1116228561Snp		pi->flags &= ~PORT_SYSCTL_CTX;
1117218792Snp	}
1118218792Snp
1119228561Snp	/*
1120228561Snp	 * Take down all the tx queues first, as they reference the rx queues
1121228561Snp	 * (for egress updates, etc.).
1122228561Snp	 */
1123228561Snp
1124228561Snp	free_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
1125228561Snp
1126218792Snp	for_each_txq(pi, i, txq) {
1127218792Snp		free_txq(pi, txq);
1128218792Snp	}
1129218792Snp
1130237263Snp#ifdef TCP_OFFLOAD
1131228561Snp	for_each_ofld_txq(pi, i, ofld_txq) {
1132228561Snp		free_wrq(sc, ofld_txq);
1133228561Snp	}
1134228561Snp#endif
1135228561Snp
1136228561Snp	/*
1137228561Snp	 * Then take down the rx queues that forward their interrupts, as they
1138228561Snp	 * reference other rx queues.
1139228561Snp	 */
1140228561Snp
1141218792Snp	for_each_rxq(pi, i, rxq) {
1142228561Snp		if ((rxq->iq.flags & IQ_INTR) == 0)
1143228561Snp			free_rxq(pi, rxq);
1144218792Snp	}
1145218792Snp
1146237263Snp#ifdef TCP_OFFLOAD
1147228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
1148228561Snp		if ((ofld_rxq->iq.flags & IQ_INTR) == 0)
1149228561Snp			free_ofld_rxq(pi, ofld_rxq);
1150228561Snp	}
1151228561Snp#endif
1152228561Snp
1153228561Snp	/*
1154228561Snp	 * Then take down the rx queues that take direct interrupts.
1155228561Snp	 */
1156228561Snp
1157228561Snp	for_each_rxq(pi, i, rxq) {
1158228561Snp		if (rxq->iq.flags & IQ_INTR)
1159228561Snp			free_rxq(pi, rxq);
1160228561Snp	}
1161228561Snp
1162237263Snp#ifdef TCP_OFFLOAD
1163228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
1164228561Snp		if (ofld_rxq->iq.flags & IQ_INTR)
1165228561Snp			free_ofld_rxq(pi, ofld_rxq);
1166228561Snp	}
1167228561Snp#endif
1168228561Snp
1169218792Snp	return (0);
1170218792Snp}
1171218792Snp
1172228561Snp/*
1173228561Snp * Deals with errors and the firmware event queue.  All data rx queues forward
1174228561Snp * their interrupt to the firmware event queue.
1175228561Snp */
1176218792Snpvoid
1177218792Snpt4_intr_all(void *arg)
1178218792Snp{
1179218792Snp	struct adapter *sc = arg;
1180228561Snp	struct sge_iq *fwq = &sc->sge.fwq;
1181218792Snp
1182218792Snp	t4_intr_err(arg);
1183228561Snp	if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) {
1184228561Snp		service_iq(fwq, 0);
1185228561Snp		atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE);
1186218792Snp	}
1187218792Snp}
1188218792Snp
1189218792Snp/* Deals with error interrupts */
1190218792Snpvoid
1191218792Snpt4_intr_err(void *arg)
1192218792Snp{
1193218792Snp	struct adapter *sc = arg;
1194218792Snp
1195222510Snp	t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
1196218792Snp	t4_slow_intr_handler(sc);
1197218792Snp}
1198218792Snp
1199218792Snpvoid
1200218792Snpt4_intr_evt(void *arg)
1201218792Snp{
1202218792Snp	struct sge_iq *iq = arg;
1203220649Snp
1204228561Snp	if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
1205228561Snp		service_iq(iq, 0);
1206228561Snp		atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
1207222510Snp	}
1208220649Snp}
1209220649Snp
1210228561Snpvoid
1211228561Snpt4_intr(void *arg)
1212220649Snp{
1213220649Snp	struct sge_iq *iq = arg;
1214228561Snp
1215228561Snp	if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
1216228561Snp		service_iq(iq, 0);
1217228561Snp		atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
1218228561Snp	}
1219228561Snp}
1220228561Snp
1221228561Snp/*
1222228561Snp * Deals with anything and everything on the given ingress queue.
1223228561Snp */
1224228561Snpstatic int
1225228561Snpservice_iq(struct sge_iq *iq, int budget)
1226228561Snp{
1227228561Snp	struct sge_iq *q;
1228237263Snp	struct sge_rxq *rxq = iq_to_rxq(iq);	/* Use iff iq is part of rxq */
1229228561Snp	struct sge_fl *fl = &rxq->fl;		/* Use iff IQ_HAS_FL */
1230218792Snp	struct adapter *sc = iq->adapter;
1231218792Snp	struct rsp_ctrl *ctrl;
1232228561Snp	const struct rss_header *rss;
1233228561Snp	int ndescs = 0, limit, fl_bufs_used = 0;
1234228561Snp	int rsp_type;
1235228561Snp	uint32_t lq;
1236228561Snp	struct mbuf *m0;
1237228561Snp	STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
1238255015Snp#if defined(INET) || defined(INET6)
1239255015Snp	const struct timeval lro_timeout = {0, sc->lro_timeout};
1240255015Snp#endif
1241218792Snp
1242228561Snp	limit = budget ? budget : iq->qsize / 8;
1243218792Snp
1244228561Snp	KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
1245218792Snp
1246228561Snp	/*
1247228561Snp	 * We always come back and check the descriptor ring for new indirect
1248228561Snp	 * interrupts and other responses after running a single handler.
1249228561Snp	 */
1250228561Snp	for (;;) {
1251228561Snp		while (is_new_response(iq, &ctrl)) {
1252218792Snp
1253228561Snp			rmb();
1254218792Snp
1255228561Snp			m0 = NULL;
1256228561Snp			rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
1257228561Snp			lq = be32toh(ctrl->pldbuflen_qid);
1258228561Snp			rss = (const void *)iq->cdesc;
1259218792Snp
1260228561Snp			switch (rsp_type) {
1261228561Snp			case X_RSPD_TYPE_FLBUF:
1262228561Snp
1263228561Snp				KASSERT(iq->flags & IQ_HAS_FL,
1264228561Snp				    ("%s: data for an iq (%p) with no freelist",
1265228561Snp				    __func__, iq));
1266228561Snp
1267265425Snp				m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used);
1268255050Snp				if (__predict_false(m0 == NULL))
1269255050Snp					goto process_iql;
1270228561Snp#ifdef T4_PKT_TIMESTAMP
1271228561Snp				/*
1272228561Snp				 * 60 bit timestamp for the payload is
1273228561Snp				 * *(uint64_t *)m0->m_pktdat.  Note that it is
1274228561Snp				 * in the leading free-space in the mbuf.  The
1275228561Snp				 * kernel can clobber it during a pullup,
1276228561Snp				 * m_copymdata, etc.  You need to make sure that
1277228561Snp				 * the mbuf reaches you unmolested if you care
1278228561Snp				 * about the timestamp.
1279228561Snp				 */
1280228561Snp				*(uint64_t *)m0->m_pktdat =
1281228561Snp				    be64toh(ctrl->u.last_flit) &
1282228561Snp				    0xfffffffffffffff;
1283228561Snp#endif
1284228561Snp
1285228561Snp				/* fall through */
1286228561Snp
1287228561Snp			case X_RSPD_TYPE_CPL:
1288228561Snp				KASSERT(rss->opcode < NUM_CPL_CMDS,
1289228561Snp				    ("%s: bad opcode %02x.", __func__,
1290228561Snp				    rss->opcode));
1291228561Snp				sc->cpl_handler[rss->opcode](iq, rss, m0);
1292228561Snp				break;
1293228561Snp
1294228561Snp			case X_RSPD_TYPE_INTR:
1295228561Snp
1296228561Snp				/*
1297228561Snp				 * Interrupts should be forwarded only to queues
1298228561Snp				 * that are not forwarding their interrupts.
1299228561Snp				 * This means service_iq can recurse but only 1
1300228561Snp				 * level deep.
1301228561Snp				 */
1302228561Snp				KASSERT(budget == 0,
1303228561Snp				    ("%s: budget %u, rsp_type %u", __func__,
1304228561Snp				    budget, rsp_type));
1305228561Snp
1306255005Snp				/*
1307255005Snp				 * There are 1K interrupt-capable queues (qids 0
1308255005Snp				 * through 1023).  A response type indicating a
1309255005Snp				 * forwarded interrupt with a qid >= 1K is an
1310255005Snp				 * iWARP async notification.
1311255005Snp				 */
1312255005Snp				if (lq >= 1024) {
1313255005Snp                                        sc->an_handler(iq, ctrl);
1314255005Snp                                        break;
1315255005Snp                                }
1316255005Snp
1317228561Snp				q = sc->sge.iqmap[lq - sc->sge.iq_start];
1318228561Snp				if (atomic_cmpset_int(&q->state, IQS_IDLE,
1319228561Snp				    IQS_BUSY)) {
1320228561Snp					if (service_iq(q, q->qsize / 8) == 0) {
1321228561Snp						atomic_cmpset_int(&q->state,
1322228561Snp						    IQS_BUSY, IQS_IDLE);
1323228561Snp					} else {
1324228561Snp						STAILQ_INSERT_TAIL(&iql, q,
1325228561Snp						    link);
1326228561Snp					}
1327228561Snp				}
1328228561Snp				break;
1329228561Snp
1330228561Snp			default:
1331255005Snp				KASSERT(0,
1332255005Snp				    ("%s: illegal response type %d on iq %p",
1333255005Snp				    __func__, rsp_type, iq));
1334255005Snp				log(LOG_ERR,
1335255005Snp				    "%s: illegal response type %d on iq %p",
1336255005Snp				    device_get_nameunit(sc->dev), rsp_type, iq);
1337237263Snp				break;
1338228561Snp			}
1339228561Snp
1340265425Snp			if (fl_bufs_used >= 16) {
1341265425Snp				FL_LOCK(fl);
1342265425Snp				fl->needed += fl_bufs_used;
1343265425Snp				refill_fl(sc, fl, 32);
1344265425Snp				FL_UNLOCK(fl);
1345265425Snp				fl_bufs_used = 0;
1346265425Snp			}
1347265425Snp
1348228561Snp			iq_next(iq);
1349228561Snp			if (++ndescs == limit) {
1350228561Snp				t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
1351228561Snp				    V_CIDXINC(ndescs) |
1352228561Snp				    V_INGRESSQID(iq->cntxt_id) |
1353228561Snp				    V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
1354228561Snp				ndescs = 0;
1355228561Snp
1356255015Snp#if defined(INET) || defined(INET6)
1357255015Snp				if (iq->flags & IQ_LRO_ENABLED &&
1358255015Snp				    sc->lro_timeout != 0) {
1359255015Snp					tcp_lro_flush_inactive(&rxq->lro,
1360255015Snp					    &lro_timeout);
1361255015Snp				}
1362255015Snp#endif
1363255015Snp
1364267244Snp				if (budget) {
1365267244Snp					if (fl_bufs_used) {
1366267244Snp						FL_LOCK(fl);
1367267244Snp						fl->needed += fl_bufs_used;
1368267244Snp						refill_fl(sc, fl, 32);
1369267244Snp						FL_UNLOCK(fl);
1370267244Snp					}
1371228561Snp					return (EINPROGRESS);
1372267244Snp				}
1373228561Snp			}
1374218792Snp		}
1375222510Snp
1376255050Snpprocess_iql:
1377228561Snp		if (STAILQ_EMPTY(&iql))
1378228561Snp			break;
1379228561Snp
1380228561Snp		/*
1381228561Snp		 * Process the head only, and send it to the back of the list if
1382228561Snp		 * it's still not done.
1383228561Snp		 */
1384228561Snp		q = STAILQ_FIRST(&iql);
1385228561Snp		STAILQ_REMOVE_HEAD(&iql, link);
1386228561Snp		if (service_iq(q, q->qsize / 8) == 0)
1387228561Snp			atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE);
1388228561Snp		else
1389228561Snp			STAILQ_INSERT_TAIL(&iql, q, link);
1390218792Snp	}
1391218792Snp
1392237819Snp#if defined(INET) || defined(INET6)
1393228561Snp	if (iq->flags & IQ_LRO_ENABLED) {
1394228561Snp		struct lro_ctrl *lro = &rxq->lro;
1395228561Snp		struct lro_entry *l;
1396228561Snp
1397228561Snp		while (!SLIST_EMPTY(&lro->lro_active)) {
1398228561Snp			l = SLIST_FIRST(&lro->lro_active);
1399228561Snp			SLIST_REMOVE_HEAD(&lro->lro_active, next);
1400228561Snp			tcp_lro_flush(lro, l);
1401228561Snp		}
1402228561Snp	}
1403228561Snp#endif
1404228561Snp
1405228561Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
1406228561Snp	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
1407228561Snp
1408228561Snp	if (iq->flags & IQ_HAS_FL) {
1409228561Snp		int starved;
1410228561Snp
1411228561Snp		FL_LOCK(fl);
1412228561Snp		fl->needed += fl_bufs_used;
1413265425Snp		starved = refill_fl(sc, fl, 64);
1414228561Snp		FL_UNLOCK(fl);
1415228561Snp		if (__predict_false(starved != 0))
1416228561Snp			add_fl_to_sfl(sc, fl);
1417228561Snp	}
1418228561Snp
1419228561Snp	return (0);
1420218792Snp}
1421218792Snp
1422265425Snpstatic inline int
1423265425Snpcl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll)
1424255050Snp{
1425265425Snp	int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0;
1426255050Snp
1427265425Snp	if (rc)
1428265425Snp		MPASS(cll->region3 >= CL_METADATA_SIZE);
1429255050Snp
1430265425Snp	return (rc);
1431255050Snp}
1432255050Snp
1433265425Snpstatic inline struct cluster_metadata *
1434265425Snpcl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll,
1435265425Snp    caddr_t cl)
1436255050Snp{
1437255050Snp
1438265425Snp	if (cl_has_metadata(fl, cll)) {
1439265425Snp		struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
1440255050Snp
1441265425Snp		return ((struct cluster_metadata *)(cl + swz->size) - 1);
1442255050Snp	}
1443265425Snp	return (NULL);
1444255050Snp}
1445255050Snp
1446255050Snpstatic int
1447255050Snprxb_free(struct mbuf *m, void *arg1, void *arg2)
1448255050Snp{
1449255050Snp	uma_zone_t zone = arg1;
1450255050Snp	caddr_t cl = arg2;
1451255050Snp
1452255050Snp	uma_zfree(zone, cl);
1453255050Snp
1454255050Snp	return (EXT_FREE_OK);
1455255050Snp}
1456255050Snp
1457265425Snp/*
1458265425Snp * The mbuf returned by this function could be allocated from zone_mbuf or
1459265425Snp * constructed in spare room in the cluster.
1460265425Snp *
1461265425Snp * The mbuf carries the payload in one of these ways
1462265425Snp * a) frame inside the mbuf (mbuf from zone_mbuf)
1463265425Snp * b) m_cljset (for clusters without metadata) zone_mbuf
1464265425Snp * c) m_extaddref (cluster with metadata) inline mbuf
1465265425Snp * d) m_extaddref (cluster with metadata) zone_mbuf
1466265425Snp */
1467255050Snpstatic struct mbuf *
1468265425Snpget_scatter_segment(struct adapter *sc, struct sge_fl *fl, int total, int flags)
1469218792Snp{
1470265425Snp	struct mbuf *m;
1471228561Snp	struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
1472265425Snp	struct cluster_layout *cll = &sd->cll;
1473265425Snp	struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
1474265425Snp	struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx];
1475265425Snp	struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl);
1476265425Snp	int len, padded_len;
1477265425Snp	caddr_t payload;
1478218792Snp
1479265425Snp	len = min(total, hwb->size - fl->rx_offset);
1480265425Snp	padded_len = roundup2(len, fl_pad);
1481265425Snp	payload = sd->cl + cll->region1 + fl->rx_offset;
1482219290Snp
1483265425Snp	if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) {
1484255050Snp
1485265425Snp		/*
1486265425Snp		 * Copy payload into a freshly allocated mbuf.
1487265425Snp		 */
1488255050Snp
1489265425Snp		m = flags & M_PKTHDR ?
1490265425Snp		    m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
1491265425Snp		if (m == NULL)
1492255050Snp			return (NULL);
1493265425Snp		fl->mbuf_allocated++;
1494255050Snp#ifdef T4_PKT_TIMESTAMP
1495265425Snp		/* Leave room for a timestamp */
1496265425Snp		m->m_data += 8;
1497255050Snp#endif
1498265425Snp		/* copy data to mbuf */
1499265425Snp		bcopy(payload, mtod(m, caddr_t), len);
1500255050Snp
1501267694Snp	} else if (sd->nimbuf * MSIZE < cll->region1) {
1502255050Snp
1503265425Snp		/*
1504265425Snp		 * There's spare room in the cluster for an mbuf.  Create one
1505267694Snp		 * and associate it with the payload that's in the cluster.
1506265425Snp		 */
1507255050Snp
1508265425Snp		MPASS(clm != NULL);
1509267694Snp		m = (struct mbuf *)(sd->cl + sd->nimbuf * MSIZE);
1510265425Snp		/* No bzero required */
1511265425Snp		if (m_init(m, NULL, 0, M_NOWAIT, MT_DATA, flags | M_NOFREE))
1512265425Snp			return (NULL);
1513265425Snp		fl->mbuf_inlined++;
1514265425Snp		m_extaddref(m, payload, padded_len, &clm->refcount, rxb_free,
1515265425Snp		    swz->zone, sd->cl);
1516267694Snp		sd->nimbuf++;
1517255050Snp
1518265425Snp	} else {
1519255050Snp
1520265425Snp		/*
1521265425Snp		 * Grab an mbuf from zone_mbuf and associate it with the
1522265425Snp		 * payload in the cluster.
1523265425Snp		 */
1524255050Snp
1525265425Snp		m = flags & M_PKTHDR ?
1526265425Snp		    m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
1527265425Snp		if (m == NULL)
1528265425Snp			return (NULL);
1529265425Snp		fl->mbuf_allocated++;
1530267694Snp		if (clm != NULL) {
1531265425Snp			m_extaddref(m, payload, padded_len, &clm->refcount,
1532265425Snp			    rxb_free, swz->zone, sd->cl);
1533267694Snp			sd->nembuf++;
1534267694Snp		} else {
1535265425Snp			m_cljset(m, sd->cl, swz->type);
1536265425Snp			sd->cl = NULL;	/* consumed, not a recycle candidate */
1537255050Snp		}
1538255050Snp	}
1539265425Snp	if (flags & M_PKTHDR)
1540265425Snp		m->m_pkthdr.len = total;
1541265425Snp	m->m_len = len;
1542255050Snp
1543265425Snp	if (fl->flags & FL_BUF_PACKING) {
1544265425Snp		fl->rx_offset += roundup2(padded_len, sc->sge.pack_boundary);
1545265425Snp		MPASS(fl->rx_offset <= hwb->size);
1546265425Snp		if (fl->rx_offset < hwb->size)
1547265425Snp			return (m);	/* without advancing the cidx */
1548265425Snp	}
1549255050Snp
1550265425Snp	if (__predict_false(++fl->cidx == fl->cap))
1551265425Snp		fl->cidx = 0;
1552265425Snp	fl->rx_offset = 0;
1553255050Snp
1554265425Snp	return (m);
1555255050Snp}
1556255050Snp
1557255050Snpstatic struct mbuf *
1558265425Snpget_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
1559255050Snp    int *fl_bufs_used)
1560255050Snp{
1561265425Snp	struct mbuf *m0, *m, **pnext;
1562265425Snp	u_int nbuf, len;
1563255050Snp
1564255050Snp	/*
1565255050Snp	 * No assertion for the fl lock because we don't need it.  This routine
1566255050Snp	 * is called only from the rx interrupt handler and it only updates
1567255050Snp	 * fl->cidx.  (Contrast that with fl->pidx/fl->needed which could be
1568255050Snp	 * updated in the rx interrupt handler or the starvation helper routine.
1569255050Snp	 * That's why code that manipulates fl->pidx/fl->needed needs the fl
1570255050Snp	 * lock but this routine does not).
1571255050Snp	 */
1572255050Snp
1573265425Snp	nbuf = 0;
1574228561Snp	len = G_RSPD_LEN(len_newbuf);
1575265425Snp	if (__predict_false(fl->m0 != NULL)) {
1576266965Snp		M_ASSERTPKTHDR(fl->m0);
1577265425Snp		MPASS(len == fl->m0->m_pkthdr.len);
1578265425Snp		MPASS(fl->remaining < len);
1579218792Snp
1580265425Snp		m0 = fl->m0;
1581265425Snp		pnext = fl->pnext;
1582265425Snp		len = fl->remaining;
1583265425Snp		fl->m0 = NULL;
1584265425Snp		goto get_segment;
1585255050Snp	}
1586255050Snp
1587265425Snp	if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) {
1588265425Snp		nbuf++;
1589265425Snp		fl->rx_offset = 0;
1590265425Snp		if (__predict_false(++fl->cidx == fl->cap))
1591265425Snp			fl->cidx = 0;
1592228561Snp	}
1593218792Snp
1594265425Snp	/*
1595265425Snp	 * Payload starts at rx_offset in the current hw buffer.  Its length is
1596265425Snp	 * 'len' and it may span multiple hw buffers.
1597265425Snp	 */
1598218792Snp
1599265425Snp	m0 = get_scatter_segment(sc, fl, len, M_PKTHDR);
1600266965Snp	if (m0 == NULL)
1601266965Snp		goto done;
1602265425Snp	len -= m0->m_len;
1603265425Snp	pnext = &m0->m_next;
1604228561Snp	while (len > 0) {
1605265425Snp		nbuf++;
1606265425Snpget_segment:
1607265425Snp		MPASS(fl->rx_offset == 0);
1608265425Snp		m = get_scatter_segment(sc, fl, len, 0);
1609265425Snp		if (m == NULL) {
1610265425Snp			fl->m0 = m0;
1611265425Snp			fl->pnext = pnext;
1612265425Snp			fl->remaining = len;
1613266965Snp			m0 = NULL;
1614266965Snp			goto done;
1615218792Snp		}
1616265425Snp		*pnext = m;
1617265425Snp		pnext = &m->m_next;
1618228561Snp		len -= m->m_len;
1619265425Snp	}
1620265425Snp	*pnext = NULL;
1621265425Snp	if (fl->rx_offset == 0)
1622228561Snp		nbuf++;
1623266965Snpdone:
1624228561Snp	(*fl_bufs_used) += nbuf;
1625228561Snp	return (m0);
1626228561Snp}
1627218792Snp
1628228561Snpstatic int
1629228561Snpt4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
1630228561Snp{
1631237463Snp	struct sge_rxq *rxq = iq_to_rxq(iq);
1632228561Snp	struct ifnet *ifp = rxq->ifp;
1633228561Snp	const struct cpl_rx_pkt *cpl = (const void *)(rss + 1);
1634237819Snp#if defined(INET) || defined(INET6)
1635228561Snp	struct lro_ctrl *lro = &rxq->lro;
1636228561Snp#endif
1637219290Snp
1638228561Snp	KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__,
1639228561Snp	    rss->opcode));
1640219290Snp
1641239258Snp	m0->m_pkthdr.len -= fl_pktshift;
1642239258Snp	m0->m_len -= fl_pktshift;
1643239258Snp	m0->m_data += fl_pktshift;
1644219290Snp
1645228561Snp	m0->m_pkthdr.rcvif = ifp;
1646228561Snp	m0->m_flags |= M_FLOWID;
1647259142Snp	m0->m_pkthdr.flowid = be32toh(rss->hash_val);
1648219290Snp
1649237799Snp	if (cpl->csum_calc && !cpl->err_vec) {
1650237799Snp		if (ifp->if_capenable & IFCAP_RXCSUM &&
1651237799Snp		    cpl->l2info & htobe32(F_RXF_IP)) {
1652237831Snp			m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
1653237799Snp			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1654237799Snp			rxq->rxcsum++;
1655237799Snp		} else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
1656237799Snp		    cpl->l2info & htobe32(F_RXF_IP6)) {
1657237831Snp			m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
1658237799Snp			    CSUM_PSEUDO_HDR);
1659237799Snp			rxq->rxcsum++;
1660237799Snp		}
1661237799Snp
1662237799Snp		if (__predict_false(cpl->ip_frag))
1663228561Snp			m0->m_pkthdr.csum_data = be16toh(cpl->csum);
1664228561Snp		else
1665228561Snp			m0->m_pkthdr.csum_data = 0xffff;
1666228561Snp	}
1667219290Snp
1668228561Snp	if (cpl->vlan_ex) {
1669228561Snp		m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
1670228561Snp		m0->m_flags |= M_VLANTAG;
1671228561Snp		rxq->vlan_extraction++;
1672228561Snp	}
1673219290Snp
1674237819Snp#if defined(INET) || defined(INET6)
1675228561Snp	if (cpl->l2info & htobe32(F_RXF_LRO) &&
1676228561Snp	    iq->flags & IQ_LRO_ENABLED &&
1677228561Snp	    tcp_lro_rx(lro, m0, 0) == 0) {
1678228561Snp		/* queued for LRO */
1679228561Snp	} else
1680218792Snp#endif
1681228561Snp	ifp->if_input(ifp, m0);
1682218792Snp
1683228561Snp	return (0);
1684228561Snp}
1685218792Snp
1686228561Snp/*
1687228561Snp * Doesn't fail.  Holds on to work requests it can't send right away.
1688228561Snp */
1689237263Snpvoid
1690237263Snpt4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr)
1691228561Snp{
1692228561Snp	struct sge_eq *eq = &wrq->eq;
1693228561Snp	int can_reclaim;
1694228561Snp	caddr_t dst;
1695228561Snp
1696228561Snp	TXQ_LOCK_ASSERT_OWNED(wrq);
1697237263Snp#ifdef TCP_OFFLOAD
1698228561Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD ||
1699228561Snp	    (eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1700228561Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1701237263Snp#else
1702237263Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1703237263Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1704237263Snp#endif
1705228561Snp
1706237263Snp	if (__predict_true(wr != NULL))
1707237263Snp		STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
1708218792Snp
1709228561Snp	can_reclaim = reclaimable(eq);
1710228561Snp	if (__predict_false(eq->flags & EQ_STALLED)) {
1711228561Snp		if (can_reclaim < tx_resume_threshold(eq))
1712237263Snp			return;
1713228561Snp		eq->flags &= ~EQ_STALLED;
1714228561Snp		eq->unstalled++;
1715218792Snp	}
1716228561Snp	eq->cidx += can_reclaim;
1717228561Snp	eq->avail += can_reclaim;
1718228561Snp	if (__predict_false(eq->cidx >= eq->cap))
1719228561Snp		eq->cidx -= eq->cap;
1720228561Snp
1721237263Snp	while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) {
1722228561Snp		int ndesc;
1723228561Snp
1724237263Snp		if (__predict_false(wr->wr_len < 0 ||
1725237263Snp		    wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) {
1726228561Snp
1727228561Snp#ifdef INVARIANTS
1728237263Snp			panic("%s: work request with length %d", __func__,
1729237263Snp			    wr->wr_len);
1730237263Snp#endif
1731237263Snp#ifdef KDB
1732237263Snp			kdb_backtrace();
1733237263Snp#endif
1734237263Snp			log(LOG_ERR, "%s: %s work request with length %d",
1735237263Snp			    device_get_nameunit(sc->dev), __func__, wr->wr_len);
1736237263Snp			STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1737237263Snp			free_wrqe(wr);
1738228561Snp			continue;
1739228561Snp		}
1740218792Snp
1741237263Snp		ndesc = howmany(wr->wr_len, EQ_ESIZE);
1742228561Snp		if (eq->avail < ndesc) {
1743228561Snp			wrq->no_desc++;
1744228561Snp			break;
1745228561Snp		}
1746218792Snp
1747228561Snp		dst = (void *)&eq->desc[eq->pidx];
1748237263Snp		copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len);
1749218792Snp
1750228561Snp		eq->pidx += ndesc;
1751228561Snp		eq->avail -= ndesc;
1752228561Snp		if (__predict_false(eq->pidx >= eq->cap))
1753228561Snp			eq->pidx -= eq->cap;
1754228561Snp
1755228561Snp		eq->pending += ndesc;
1756252715Snp		if (eq->pending >= 8)
1757228561Snp			ring_eq_db(sc, eq);
1758228561Snp
1759228561Snp		wrq->tx_wrs++;
1760237263Snp		STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1761237263Snp		free_wrqe(wr);
1762228561Snp
1763228561Snp		if (eq->avail < 8) {
1764228561Snp			can_reclaim = reclaimable(eq);
1765228561Snp			eq->cidx += can_reclaim;
1766228561Snp			eq->avail += can_reclaim;
1767228561Snp			if (__predict_false(eq->cidx >= eq->cap))
1768228561Snp				eq->cidx -= eq->cap;
1769228561Snp		}
1770228561Snp	}
1771228561Snp
1772228561Snp	if (eq->pending)
1773228561Snp		ring_eq_db(sc, eq);
1774228561Snp
1775237263Snp	if (wr != NULL) {
1776228561Snp		eq->flags |= EQ_STALLED;
1777228561Snp		if (callout_pending(&eq->tx_callout) == 0)
1778228561Snp			callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1779228561Snp	}
1780220873Snp}
1781220873Snp
1782218792Snp/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
1783218792Snp#define TXPKTS_PKT_HDR ((\
1784218792Snp    sizeof(struct ulp_txpkt) + \
1785218792Snp    sizeof(struct ulptx_idata) + \
1786218792Snp    sizeof(struct cpl_tx_pkt_core) \
1787218792Snp    ) / 8)
1788218792Snp
1789218792Snp/* Header of a coalesced tx WR, before SGL of first packet (in flits) */
1790218792Snp#define TXPKTS_WR_HDR (\
1791218792Snp    sizeof(struct fw_eth_tx_pkts_wr) / 8 + \
1792218792Snp    TXPKTS_PKT_HDR)
1793218792Snp
1794218792Snp/* Header of a tx WR, before SGL of first packet (in flits) */
1795218792Snp#define TXPKT_WR_HDR ((\
1796218792Snp    sizeof(struct fw_eth_tx_pkt_wr) + \
1797218792Snp    sizeof(struct cpl_tx_pkt_core) \
1798218792Snp    ) / 8 )
1799218792Snp
1800218792Snp/* Header of a tx LSO WR, before SGL of first packet (in flits) */
1801218792Snp#define TXPKT_LSO_WR_HDR ((\
1802218792Snp    sizeof(struct fw_eth_tx_pkt_wr) + \
1803237436Snp    sizeof(struct cpl_tx_pkt_lso_core) + \
1804218792Snp    sizeof(struct cpl_tx_pkt_core) \
1805218792Snp    ) / 8 )
1806218792Snp
1807218792Snpint
1808218792Snpt4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
1809218792Snp{
1810218792Snp	struct port_info *pi = (void *)ifp->if_softc;
1811218792Snp	struct adapter *sc = pi->adapter;
1812218792Snp	struct sge_eq *eq = &txq->eq;
1813220873Snp	struct buf_ring *br = txq->br;
1814218792Snp	struct mbuf *next;
1815219292Snp	int rc, coalescing, can_reclaim;
1816218792Snp	struct txpkts txpkts;
1817218792Snp	struct sgl sgl;
1818218792Snp
1819218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
1820218792Snp	KASSERT(m, ("%s: called with nothing to do.", __func__));
1821228561Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH,
1822228561Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1823218792Snp
1824219292Snp	prefetch(&eq->desc[eq->pidx]);
1825220873Snp	prefetch(&txq->sdesc[eq->pidx]);
1826219292Snp
1827218792Snp	txpkts.npkt = 0;/* indicates there's nothing in txpkts */
1828218792Snp	coalescing = 0;
1829218792Snp
1830228561Snp	can_reclaim = reclaimable(eq);
1831228561Snp	if (__predict_false(eq->flags & EQ_STALLED)) {
1832228561Snp		if (can_reclaim < tx_resume_threshold(eq)) {
1833228561Snp			txq->m = m;
1834228561Snp			return (0);
1835228561Snp		}
1836228561Snp		eq->flags &= ~EQ_STALLED;
1837228561Snp		eq->unstalled++;
1838228561Snp	}
1839218792Snp
1840228561Snp	if (__predict_false(eq->flags & EQ_DOOMED)) {
1841228561Snp		m_freem(m);
1842228561Snp		while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
1843228561Snp			m_freem(m);
1844228561Snp		return (ENETDOWN);
1845228561Snp	}
1846228561Snp
1847228561Snp	if (eq->avail < 8 && can_reclaim)
1848228561Snp		reclaim_tx_descs(txq, can_reclaim, 32);
1849228561Snp
1850218792Snp	for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
1851218792Snp
1852218792Snp		if (eq->avail < 8)
1853218792Snp			break;
1854218792Snp
1855218792Snp		next = m->m_nextpkt;
1856218792Snp		m->m_nextpkt = NULL;
1857218792Snp
1858218792Snp		if (next || buf_ring_peek(br))
1859218792Snp			coalescing = 1;
1860218792Snp
1861218792Snp		rc = get_pkt_sgl(txq, &m, &sgl, coalescing);
1862218792Snp		if (rc != 0) {
1863218792Snp			if (rc == ENOMEM) {
1864218792Snp
1865218792Snp				/* Short of resources, suspend tx */
1866218792Snp
1867218792Snp				m->m_nextpkt = next;
1868218792Snp				break;
1869218792Snp			}
1870218792Snp
1871218792Snp			/*
1872218792Snp			 * Unrecoverable error for this packet, throw it away
1873218792Snp			 * and move on to the next.  get_pkt_sgl may already
1874218792Snp			 * have freed m (it will be NULL in that case and the
1875218792Snp			 * m_freem here is still safe).
1876218792Snp			 */
1877218792Snp
1878218792Snp			m_freem(m);
1879218792Snp			continue;
1880218792Snp		}
1881218792Snp
1882218792Snp		if (coalescing &&
1883218792Snp		    add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) {
1884218792Snp
1885218792Snp			/* Successfully absorbed into txpkts */
1886218792Snp
1887218792Snp			write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl);
1888218792Snp			goto doorbell;
1889218792Snp		}
1890218792Snp
1891218792Snp		/*
1892218792Snp		 * We weren't coalescing to begin with, or current frame could
1893218792Snp		 * not be coalesced (add_to_txpkts flushes txpkts if a frame
1894218792Snp		 * given to it can't be coalesced).  Either way there should be
1895218792Snp		 * nothing in txpkts.
1896218792Snp		 */
1897218792Snp		KASSERT(txpkts.npkt == 0,
1898218792Snp		    ("%s: txpkts not empty: %d", __func__, txpkts.npkt));
1899218792Snp
1900218792Snp		/* We're sending out individual packets now */
1901218792Snp		coalescing = 0;
1902218792Snp
1903218792Snp		if (eq->avail < 8)
1904220873Snp			reclaim_tx_descs(txq, 0, 8);
1905218792Snp		rc = write_txpkt_wr(pi, txq, m, &sgl);
1906218792Snp		if (rc != 0) {
1907218792Snp
1908218792Snp			/* Short of hardware descriptors, suspend tx */
1909218792Snp
1910218792Snp			/*
1911218792Snp			 * This is an unlikely but expensive failure.  We've
1912218792Snp			 * done all the hard work (DMA mappings etc.) and now we
1913218792Snp			 * can't send out the packet.  What's worse, we have to
1914218792Snp			 * spend even more time freeing up everything in sgl.
1915218792Snp			 */
1916218792Snp			txq->no_desc++;
1917218792Snp			free_pkt_sgl(txq, &sgl);
1918218792Snp
1919218792Snp			m->m_nextpkt = next;
1920218792Snp			break;
1921218792Snp		}
1922218792Snp
1923218792Snp		ETHER_BPF_MTAP(ifp, m);
1924218792Snp		if (sgl.nsegs == 0)
1925218792Snp			m_freem(m);
1926218792Snpdoorbell:
1927252715Snp		if (eq->pending >= 8)
1928252715Snp			ring_eq_db(sc, eq);
1929219292Snp
1930219292Snp		can_reclaim = reclaimable(eq);
1931219292Snp		if (can_reclaim >= 32)
1932228561Snp			reclaim_tx_descs(txq, can_reclaim, 64);
1933218792Snp	}
1934218792Snp
1935218792Snp	if (txpkts.npkt > 0)
1936218792Snp		write_txpkts_wr(txq, &txpkts);
1937218792Snp
1938218792Snp	/*
1939218792Snp	 * m not NULL means there was an error but we haven't thrown it away.
1940218792Snp	 * This can happen when we're short of tx descriptors (no_desc) or maybe
1941218792Snp	 * even DMA maps (no_dmamap).  Either way, a credit flush and reclaim
1942218792Snp	 * will get things going again.
1943218792Snp	 */
1944228561Snp	if (m && !(eq->flags & EQ_CRFLUSHED)) {
1945220873Snp		struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
1946220873Snp
1947228561Snp		/*
1948228561Snp		 * If EQ_CRFLUSHED is not set then we know we have at least one
1949228561Snp		 * available descriptor because any WR that reduces eq->avail to
1950228561Snp		 * 0 also sets EQ_CRFLUSHED.
1951228561Snp		 */
1952228561Snp		KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__));
1953228561Snp
1954220873Snp		txsd->desc_used = 1;
1955220873Snp		txsd->credits = 0;
1956218792Snp		write_eqflush_wr(eq);
1957220873Snp	}
1958218792Snp	txq->m = m;
1959218792Snp
1960218792Snp	if (eq->pending)
1961220873Snp		ring_eq_db(sc, eq);
1962218792Snp
1963228561Snp	reclaim_tx_descs(txq, 0, 128);
1964218792Snp
1965228561Snp	if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0)
1966228561Snp		callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1967228561Snp
1968218792Snp	return (0);
1969218792Snp}
1970218792Snp
1971218792Snpvoid
1972218792Snpt4_update_fl_bufsize(struct ifnet *ifp)
1973218792Snp{
1974218792Snp	struct port_info *pi = ifp->if_softc;
1975255050Snp	struct adapter *sc = pi->adapter;
1976218792Snp	struct sge_rxq *rxq;
1977252728Snp#ifdef TCP_OFFLOAD
1978252728Snp	struct sge_ofld_rxq *ofld_rxq;
1979252728Snp#endif
1980218792Snp	struct sge_fl *fl;
1981265425Snp	int i, maxp, mtu = ifp->if_mtu;
1982218792Snp
1983265425Snp	maxp = mtu_to_max_payload(sc, mtu, 0);
1984218792Snp	for_each_rxq(pi, i, rxq) {
1985218792Snp		fl = &rxq->fl;
1986218792Snp
1987218792Snp		FL_LOCK(fl);
1988265425Snp		find_best_refill_source(sc, fl, maxp);
1989218792Snp		FL_UNLOCK(fl);
1990218792Snp	}
1991252728Snp#ifdef TCP_OFFLOAD
1992265425Snp	maxp = mtu_to_max_payload(sc, mtu, 1);
1993252728Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
1994252728Snp		fl = &ofld_rxq->fl;
1995252728Snp
1996252728Snp		FL_LOCK(fl);
1997265425Snp		find_best_refill_source(sc, fl, maxp);
1998252728Snp		FL_UNLOCK(fl);
1999252728Snp	}
2000252728Snp#endif
2001218792Snp}
2002218792Snp
2003228561Snpint
2004228561Snpcan_resume_tx(struct sge_eq *eq)
2005228561Snp{
2006228561Snp	return (reclaimable(eq) >= tx_resume_threshold(eq));
2007228561Snp}
2008228561Snp
2009218792Snpstatic inline void
2010218792Snpinit_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
2011241397Snp    int qsize, int esize)
2012218792Snp{
2013218792Snp	KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
2014218792Snp	    ("%s: bad tmr_idx %d", __func__, tmr_idx));
2015218792Snp	KASSERT(pktc_idx < SGE_NCOUNTERS,	/* -ve is ok, means don't use */
2016218792Snp	    ("%s: bad pktc_idx %d", __func__, pktc_idx));
2017218792Snp
2018218792Snp	iq->flags = 0;
2019218792Snp	iq->adapter = sc;
2020234833Snp	iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx);
2021234833Snp	iq->intr_pktc_idx = SGE_NCOUNTERS - 1;
2022234833Snp	if (pktc_idx >= 0) {
2023234833Snp		iq->intr_params |= F_QINTR_CNT_EN;
2024234833Snp		iq->intr_pktc_idx = pktc_idx;
2025234833Snp	}
2026248925Snp	iq->qsize = roundup2(qsize, 16);	/* See FW_IQ_CMD/iqsize */
2027218792Snp	iq->esize = max(esize, 16);		/* See FW_IQ_CMD/iqesize */
2028218792Snp}
2029218792Snp
2030218792Snpstatic inline void
2031265425Snpinit_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, int pack,
2032255050Snp    char *name)
2033218792Snp{
2034255050Snp
2035218792Snp	fl->qsize = qsize;
2036218792Snp	strlcpy(fl->lockname, name, sizeof(fl->lockname));
2037255050Snp	if (pack)
2038255050Snp		fl->flags |= FL_BUF_PACKING;
2039265425Snp	find_best_refill_source(sc, fl, maxp);
2040265425Snp	find_safe_refill_source(sc, fl);
2041218792Snp}
2042218792Snp
2043218792Snpstatic inline void
2044228561Snpinit_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan,
2045228561Snp    uint16_t iqid, char *name)
2046218792Snp{
2047228561Snp	KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan));
2048228561Snp	KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype));
2049228561Snp
2050228561Snp	eq->flags = eqtype & EQ_TYPEMASK;
2051228561Snp	eq->tx_chan = tx_chan;
2052228561Snp	eq->iqid = iqid;
2053220873Snp	eq->qsize = qsize;
2054220873Snp	strlcpy(eq->lockname, name, sizeof(eq->lockname));
2055228561Snp
2056228561Snp	TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq);
2057228561Snp	callout_init(&eq->tx_callout, CALLOUT_MPSAFE);
2058218792Snp}
2059218792Snp
2060218792Snpstatic int
2061218792Snpalloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
2062218792Snp    bus_dmamap_t *map, bus_addr_t *pa, void **va)
2063218792Snp{
2064218792Snp	int rc;
2065218792Snp
2066218792Snp	rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
2067218792Snp	    BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
2068218792Snp	if (rc != 0) {
2069218792Snp		device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc);
2070218792Snp		goto done;
2071218792Snp	}
2072218792Snp
2073218792Snp	rc = bus_dmamem_alloc(*tag, va,
2074218792Snp	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
2075218792Snp	if (rc != 0) {
2076218792Snp		device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc);
2077218792Snp		goto done;
2078218792Snp	}
2079218792Snp
2080218792Snp	rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
2081218792Snp	if (rc != 0) {
2082218792Snp		device_printf(sc->dev, "cannot load DMA map: %d\n", rc);
2083218792Snp		goto done;
2084218792Snp	}
2085218792Snpdone:
2086218792Snp	if (rc)
2087218792Snp		free_ring(sc, *tag, *map, *pa, *va);
2088218792Snp
2089218792Snp	return (rc);
2090218792Snp}
2091218792Snp
2092218792Snpstatic int
2093218792Snpfree_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
2094218792Snp    bus_addr_t pa, void *va)
2095218792Snp{
2096218792Snp	if (pa)
2097218792Snp		bus_dmamap_unload(tag, map);
2098218792Snp	if (va)
2099218792Snp		bus_dmamem_free(tag, va, map);
2100218792Snp	if (tag)
2101218792Snp		bus_dma_tag_destroy(tag);
2102218792Snp
2103218792Snp	return (0);
2104218792Snp}
2105218792Snp
2106218792Snp/*
2107218792Snp * Allocates the ring for an ingress queue and an optional freelist.  If the
2108218792Snp * freelist is specified it will be allocated and then associated with the
2109218792Snp * ingress queue.
2110218792Snp *
2111218792Snp * Returns errno on failure.  Resources allocated up to that point may still be
2112218792Snp * allocated.  Caller is responsible for cleanup in case this function fails.
2113218792Snp *
2114228561Snp * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then
2115218792Snp * the intr_idx specifies the vector, starting from 0.  Otherwise it specifies
2116228561Snp * the abs_id of the ingress queue to which its interrupts should be forwarded.
2117218792Snp */
2118218792Snpstatic int
2119218792Snpalloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
2120222085Snp    int intr_idx, int cong)
2121218792Snp{
2122218792Snp	int rc, i, cntxt_id;
2123218792Snp	size_t len;
2124218792Snp	struct fw_iq_cmd c;
2125218792Snp	struct adapter *sc = iq->adapter;
2126218792Snp	__be32 v = 0;
2127218792Snp
2128218792Snp	len = iq->qsize * iq->esize;
2129218792Snp	rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
2130218792Snp	    (void **)&iq->desc);
2131218792Snp	if (rc != 0)
2132218792Snp		return (rc);
2133218792Snp
2134218792Snp	bzero(&c, sizeof(c));
2135218792Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
2136218792Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
2137218792Snp	    V_FW_IQ_CMD_VFN(0));
2138218792Snp
2139218792Snp	c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
2140218792Snp	    FW_LEN16(c));
2141218792Snp
2142218792Snp	/* Special handling for firmware event queue */
2143218792Snp	if (iq == &sc->sge.fwq)
2144218792Snp		v |= F_FW_IQ_CMD_IQASYNCH;
2145218792Snp
2146228561Snp	if (iq->flags & IQ_INTR) {
2147218792Snp		KASSERT(intr_idx < sc->intr_count,
2148218792Snp		    ("%s: invalid direct intr_idx %d", __func__, intr_idx));
2149228561Snp	} else
2150228561Snp		v |= F_FW_IQ_CMD_IQANDST;
2151228561Snp	v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
2152218792Snp
2153218792Snp	c.type_to_iqandstindex = htobe32(v |
2154218792Snp	    V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
2155218792Snp	    V_FW_IQ_CMD_VIID(pi->viid) |
2156218792Snp	    V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
2157218792Snp	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
2158218792Snp	    F_FW_IQ_CMD_IQGTSMODE |
2159218792Snp	    V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
2160218792Snp	    V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
2161218792Snp	c.iqsize = htobe16(iq->qsize);
2162218792Snp	c.iqaddr = htobe64(iq->ba);
2163222085Snp	if (cong >= 0)
2164222085Snp		c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
2165218792Snp
2166218792Snp	if (fl) {
2167218792Snp		mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
2168218792Snp
2169218792Snp		len = fl->qsize * RX_FL_ESIZE;
2170218792Snp		rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
2171218792Snp		    &fl->ba, (void **)&fl->desc);
2172218792Snp		if (rc)
2173218792Snp			return (rc);
2174218792Snp
2175218792Snp		/* Allocate space for one software descriptor per buffer. */
2176237512Snp		fl->cap = (fl->qsize - spg_len / RX_FL_ESIZE) * 8;
2177218792Snp		rc = alloc_fl_sdesc(fl);
2178218792Snp		if (rc != 0) {
2179218792Snp			device_printf(sc->dev,
2180218792Snp			    "failed to setup fl software descriptors: %d\n",
2181218792Snp			    rc);
2182218792Snp			return (rc);
2183218792Snp		}
2184220905Snp		fl->needed = fl->cap;
2185265410Snp		fl->lowat = fl->flags & FL_BUF_PACKING ?
2186265410Snp		    roundup2(sc->sge.fl_starve_threshold2, 8) :
2187265410Snp		    roundup2(sc->sge.fl_starve_threshold, 8);
2188218792Snp
2189228491Snp		c.iqns_to_fl0congen |=
2190222085Snp		    htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
2191222085Snp			F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
2192255050Snp			(fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) |
2193255050Snp			(fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN :
2194255050Snp			    0));
2195222085Snp		if (cong >= 0) {
2196222085Snp			c.iqns_to_fl0congen |=
2197222085Snp				htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
2198222085Snp				    F_FW_IQ_CMD_FL0CONGCIF |
2199222085Snp				    F_FW_IQ_CMD_FL0CONGEN);
2200222085Snp		}
2201218792Snp		c.fl0dcaen_to_fl0cidxfthresh =
2202218792Snp		    htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
2203218792Snp			V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
2204218792Snp		c.fl0size = htobe16(fl->qsize);
2205218792Snp		c.fl0addr = htobe64(fl->ba);
2206218792Snp	}
2207218792Snp
2208218792Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2209218792Snp	if (rc != 0) {
2210218792Snp		device_printf(sc->dev,
2211218792Snp		    "failed to create ingress queue: %d\n", rc);
2212218792Snp		return (rc);
2213218792Snp	}
2214218792Snp
2215218792Snp	iq->cdesc = iq->desc;
2216218792Snp	iq->cidx = 0;
2217218792Snp	iq->gen = 1;
2218218792Snp	iq->intr_next = iq->intr_params;
2219218792Snp	iq->cntxt_id = be16toh(c.iqid);
2220218792Snp	iq->abs_id = be16toh(c.physiqid);
2221228561Snp	iq->flags |= IQ_ALLOCATED;
2222218792Snp
2223218792Snp	cntxt_id = iq->cntxt_id - sc->sge.iq_start;
2224228561Snp	if (cntxt_id >= sc->sge.niq) {
2225228561Snp		panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
2226228561Snp		    cntxt_id, sc->sge.niq - 1);
2227228561Snp	}
2228218792Snp	sc->sge.iqmap[cntxt_id] = iq;
2229218792Snp
2230218792Snp	if (fl) {
2231218792Snp		fl->cntxt_id = be16toh(c.fl0id);
2232218792Snp		fl->pidx = fl->cidx = 0;
2233218792Snp
2234219883Snp		cntxt_id = fl->cntxt_id - sc->sge.eq_start;
2235228561Snp		if (cntxt_id >= sc->sge.neq) {
2236228561Snp			panic("%s: fl->cntxt_id (%d) more than the max (%d)",
2237228561Snp			    __func__, cntxt_id, sc->sge.neq - 1);
2238228561Snp		}
2239218792Snp		sc->sge.eqmap[cntxt_id] = (void *)fl;
2240218792Snp
2241218792Snp		FL_LOCK(fl);
2242228561Snp		/* Enough to make sure the SGE doesn't think it's starved */
2243228561Snp		refill_fl(sc, fl, fl->lowat);
2244218792Snp		FL_UNLOCK(fl);
2245228561Snp
2246228561Snp		iq->flags |= IQ_HAS_FL;
2247218792Snp	}
2248218792Snp
2249253873Snp	if (is_t5(sc) && cong >= 0) {
2250253873Snp		uint32_t param, val;
2251253873Snp
2252253873Snp		param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
2253253873Snp		    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
2254253873Snp		    V_FW_PARAMS_PARAM_YZ(iq->cntxt_id);
2255253889Snp		if (cong == 0)
2256253889Snp			val = 1 << 19;
2257253889Snp		else {
2258253889Snp			val = 2 << 19;
2259253889Snp			for (i = 0; i < 4; i++) {
2260253889Snp				if (cong & (1 << i))
2261253889Snp					val |= 1 << (i << 2);
2262253889Snp			}
2263253889Snp		}
2264253889Snp
2265253873Snp		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2266253873Snp		if (rc != 0) {
2267253873Snp			/* report error but carry on */
2268253873Snp			device_printf(sc->dev,
2269253873Snp			    "failed to set congestion manager context for "
2270253873Snp			    "ingress queue %d: %d\n", iq->cntxt_id, rc);
2271253873Snp		}
2272253873Snp	}
2273253873Snp
2274218792Snp	/* Enable IQ interrupts */
2275228561Snp	atomic_store_rel_int(&iq->state, IQS_IDLE);
2276218792Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
2277218792Snp	    V_INGRESSQID(iq->cntxt_id));
2278218792Snp
2279218792Snp	return (0);
2280218792Snp}
2281218792Snp
2282218792Snpstatic int
2283218792Snpfree_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
2284218792Snp{
2285265425Snp	int rc;
2286218792Snp	struct adapter *sc = iq->adapter;
2287218792Snp	device_t dev;
2288218792Snp
2289218792Snp	if (sc == NULL)
2290218792Snp		return (0);	/* nothing to do */
2291218792Snp
2292218792Snp	dev = pi ? pi->dev : sc->dev;
2293218792Snp
2294218792Snp	if (iq->flags & IQ_ALLOCATED) {
2295218792Snp		rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
2296218792Snp		    FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
2297218792Snp		    fl ? fl->cntxt_id : 0xffff, 0xffff);
2298218792Snp		if (rc != 0) {
2299218792Snp			device_printf(dev,
2300218792Snp			    "failed to free queue %p: %d\n", iq, rc);
2301218792Snp			return (rc);
2302218792Snp		}
2303218792Snp		iq->flags &= ~IQ_ALLOCATED;
2304218792Snp	}
2305218792Snp
2306218792Snp	free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
2307218792Snp
2308218792Snp	bzero(iq, sizeof(*iq));
2309218792Snp
2310218792Snp	if (fl) {
2311218792Snp		free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba,
2312218792Snp		    fl->desc);
2313218792Snp
2314254727Snp		if (fl->sdesc)
2315255050Snp			free_fl_sdesc(sc, fl);
2316218792Snp
2317218792Snp		if (mtx_initialized(&fl->fl_lock))
2318218792Snp			mtx_destroy(&fl->fl_lock);
2319218792Snp
2320218792Snp		bzero(fl, sizeof(*fl));
2321218792Snp	}
2322218792Snp
2323218792Snp	return (0);
2324218792Snp}
2325218792Snp
2326265425Snpstatic void
2327265425Snpadd_fl_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
2328265425Snp    struct sge_fl *fl)
2329265425Snp{
2330265425Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2331265425Snp
2332265425Snp	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL,
2333265425Snp	    "freelist");
2334265425Snp	children = SYSCTL_CHILDREN(oid);
2335265425Snp
2336265425Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id",
2337265425Snp	    CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I",
2338265425Snp	    "SGE context id of the freelist");
2339265425Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx,
2340265425Snp	    0, "consumer index");
2341265425Snp	if (fl->flags & FL_BUF_PACKING) {
2342265425Snp		SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset",
2343265425Snp		    CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset");
2344265425Snp	}
2345265425Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx,
2346265425Snp	    0, "producer index");
2347265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated",
2348265425Snp	    CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated");
2349265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined",
2350265425Snp	    CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters");
2351265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated",
2352265425Snp	    CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated");
2353265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled",
2354265425Snp	    CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled");
2355265425Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled",
2356265425Snp	    CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)");
2357265425Snp}
2358265425Snp
2359218792Snpstatic int
2360228561Snpalloc_fwq(struct adapter *sc)
2361218792Snp{
2362228561Snp	int rc, intr_idx;
2363228561Snp	struct sge_iq *fwq = &sc->sge.fwq;
2364228561Snp	struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
2365228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2366222510Snp
2367241397Snp	init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE);
2368228561Snp	fwq->flags |= IQ_INTR;	/* always */
2369228561Snp	intr_idx = sc->intr_count > 1 ? 1 : 0;
2370228561Snp	rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
2371228561Snp	if (rc != 0) {
2372228561Snp		device_printf(sc->dev,
2373228561Snp		    "failed to create firmware event queue: %d\n", rc);
2374222510Snp		return (rc);
2375228561Snp	}
2376222510Snp
2377228561Snp	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD,
2378228561Snp	    NULL, "firmware event queue");
2379222510Snp	children = SYSCTL_CHILDREN(oid);
2380222510Snp
2381228561Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id",
2382228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I",
2383228561Snp	    "absolute id of the queue");
2384228561Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id",
2385228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I",
2386228561Snp	    "SGE context id of the queue");
2387222510Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
2388228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I",
2389222510Snp	    "consumer index");
2390222510Snp
2391228561Snp	return (0);
2392218792Snp}
2393218792Snp
2394218792Snpstatic int
2395228561Snpfree_fwq(struct adapter *sc)
2396218792Snp{
2397228561Snp	return free_iq_fl(NULL, &sc->sge.fwq, NULL);
2398218792Snp}
2399218792Snp
2400218792Snpstatic int
2401228561Snpalloc_mgmtq(struct adapter *sc)
2402222510Snp{
2403222510Snp	int rc;
2404228561Snp	struct sge_wrq *mgmtq = &sc->sge.mgmtq;
2405228561Snp	char name[16];
2406228561Snp	struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
2407228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2408222510Snp
2409228561Snp	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD,
2410228561Snp	    NULL, "management queue");
2411228561Snp
2412228561Snp	snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev));
2413228561Snp	init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan,
2414228561Snp	    sc->sge.fwq.cntxt_id, name);
2415228561Snp	rc = alloc_wrq(sc, NULL, mgmtq, oid);
2416228561Snp	if (rc != 0) {
2417228561Snp		device_printf(sc->dev,
2418228561Snp		    "failed to create management queue: %d\n", rc);
2419222510Snp		return (rc);
2420228561Snp	}
2421222510Snp
2422228561Snp	return (0);
2423222510Snp}
2424222510Snp
2425222510Snpstatic int
2426228561Snpfree_mgmtq(struct adapter *sc)
2427222510Snp{
2428237263Snp
2429228561Snp	return free_wrq(sc, &sc->sge.mgmtq);
2430222510Snp}
2431222510Snp
2432239258Snpstatic inline int
2433239258Snptnl_cong(struct port_info *pi)
2434239258Snp{
2435239258Snp
2436239258Snp	if (cong_drop == -1)
2437239258Snp		return (-1);
2438239258Snp	else if (cong_drop == 1)
2439239258Snp		return (0);
2440239258Snp	else
2441265410Snp		return (pi->rx_chan_map);
2442239258Snp}
2443239258Snp
2444222510Snpstatic int
2445228561Snpalloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx,
2446228561Snp    struct sysctl_oid *oid)
2447218792Snp{
2448218792Snp	int rc;
2449218792Snp	struct sysctl_oid_list *children;
2450218792Snp	char name[16];
2451218792Snp
2452239258Snp	rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(pi));
2453218792Snp	if (rc != 0)
2454218792Snp		return (rc);
2455218792Snp
2456222701Snp	FL_LOCK(&rxq->fl);
2457228561Snp	refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8);
2458222701Snp	FL_UNLOCK(&rxq->fl);
2459222701Snp
2460237819Snp#if defined(INET) || defined(INET6)
2461218792Snp	rc = tcp_lro_init(&rxq->lro);
2462218792Snp	if (rc != 0)
2463218792Snp		return (rc);
2464218792Snp	rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */
2465218792Snp
2466218792Snp	if (pi->ifp->if_capenable & IFCAP_LRO)
2467228561Snp		rxq->iq.flags |= IQ_LRO_ENABLED;
2468218792Snp#endif
2469219289Snp	rxq->ifp = pi->ifp;
2470218792Snp
2471228561Snp	children = SYSCTL_CHILDREN(oid);
2472218792Snp
2473218792Snp	snprintf(name, sizeof(name), "%d", idx);
2474218792Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2475218792Snp	    NULL, "rx queue");
2476218792Snp	children = SYSCTL_CHILDREN(oid);
2477218792Snp
2478221911Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
2479222510Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I",
2480221911Snp	    "absolute id of the queue");
2481222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2482222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I",
2483222973Snp	    "SGE context id of the queue");
2484222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2485222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I",
2486222973Snp	    "consumer index");
2487237819Snp#if defined(INET) || defined(INET6)
2488218792Snp	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
2489218792Snp	    &rxq->lro.lro_queued, 0, NULL);
2490218792Snp	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
2491218792Snp	    &rxq->lro.lro_flushed, 0, NULL);
2492219290Snp#endif
2493218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
2494218792Snp	    &rxq->rxcsum, "# of times hardware assisted with checksum");
2495218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction",
2496218792Snp	    CTLFLAG_RD, &rxq->vlan_extraction,
2497218792Snp	    "# of times hardware extracted 802.1Q tag");
2498218792Snp
2499265425Snp	add_fl_sysctls(&pi->ctx, oid, &rxq->fl);
2500222973Snp
2501218792Snp	return (rc);
2502218792Snp}
2503218792Snp
2504218792Snpstatic int
2505218792Snpfree_rxq(struct port_info *pi, struct sge_rxq *rxq)
2506218792Snp{
2507218792Snp	int rc;
2508218792Snp
2509237819Snp#if defined(INET) || defined(INET6)
2510218792Snp	if (rxq->lro.ifp) {
2511218792Snp		tcp_lro_free(&rxq->lro);
2512218792Snp		rxq->lro.ifp = NULL;
2513218792Snp	}
2514218792Snp#endif
2515218792Snp
2516218792Snp	rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
2517218792Snp	if (rc == 0)
2518218792Snp		bzero(rxq, sizeof(*rxq));
2519218792Snp
2520218792Snp	return (rc);
2521218792Snp}
2522218792Snp
2523237263Snp#ifdef TCP_OFFLOAD
2524218792Snpstatic int
2525228561Snpalloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
2526228561Snp    int intr_idx, int idx, struct sysctl_oid *oid)
2527220873Snp{
2528228561Snp	int rc;
2529228561Snp	struct sysctl_oid_list *children;
2530220873Snp	char name[16];
2531220873Snp
2532228561Snp	rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx,
2533265410Snp	    pi->rx_chan_map);
2534228561Snp	if (rc != 0)
2535220873Snp		return (rc);
2536220873Snp
2537228561Snp	children = SYSCTL_CHILDREN(oid);
2538220873Snp
2539228561Snp	snprintf(name, sizeof(name), "%d", idx);
2540228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2541228561Snp	    NULL, "rx queue");
2542228561Snp	children = SYSCTL_CHILDREN(oid);
2543228561Snp
2544228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
2545228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16,
2546228561Snp	    "I", "absolute id of the queue");
2547228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2548228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16,
2549228561Snp	    "I", "SGE context id of the queue");
2550228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2551228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I",
2552228561Snp	    "consumer index");
2553228561Snp
2554265425Snp	add_fl_sysctls(&pi->ctx, oid, &ofld_rxq->fl);
2555228561Snp
2556228561Snp	return (rc);
2557228561Snp}
2558228561Snp
2559228561Snpstatic int
2560228561Snpfree_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq)
2561228561Snp{
2562228561Snp	int rc;
2563228561Snp
2564228561Snp	rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl);
2565228561Snp	if (rc == 0)
2566228561Snp		bzero(ofld_rxq, sizeof(*ofld_rxq));
2567228561Snp
2568228561Snp	return (rc);
2569228561Snp}
2570228561Snp#endif
2571228561Snp
2572228561Snpstatic int
2573228561Snpctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
2574228561Snp{
2575228561Snp	int rc, cntxt_id;
2576228561Snp	struct fw_eq_ctrl_cmd c;
2577228561Snp
2578220873Snp	bzero(&c, sizeof(c));
2579220873Snp
2580220873Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
2581220873Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
2582220873Snp	    V_FW_EQ_CTRL_CMD_VFN(0));
2583220873Snp	c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
2584220873Snp	    F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
2585220873Snp	c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */
2586220873Snp	c.physeqid_pkd = htobe32(0);
2587220873Snp	c.fetchszm_to_iqid =
2588220873Snp	    htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2589228561Snp		V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
2590222510Snp		F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
2591220873Snp	c.dcaen_to_eqsize =
2592220873Snp	    htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2593220873Snp		V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2594220873Snp		V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2595220873Snp		V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
2596220873Snp	c.eqaddr = htobe64(eq->ba);
2597220873Snp
2598220873Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2599220873Snp	if (rc != 0) {
2600220873Snp		device_printf(sc->dev,
2601228561Snp		    "failed to create control queue %d: %d\n", eq->tx_chan, rc);
2602220873Snp		return (rc);
2603220873Snp	}
2604228561Snp	eq->flags |= EQ_ALLOCATED;
2605220873Snp
2606220873Snp	eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
2607228561Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2608228561Snp	if (cntxt_id >= sc->sge.neq)
2609228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2610228561Snp		cntxt_id, sc->sge.neq - 1);
2611228561Snp	sc->sge.eqmap[cntxt_id] = eq;
2612220873Snp
2613228561Snp	return (rc);
2614228561Snp}
2615228561Snp
2616228561Snpstatic int
2617228561Snpeth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2618228561Snp{
2619228561Snp	int rc, cntxt_id;
2620228561Snp	struct fw_eq_eth_cmd c;
2621228561Snp
2622228561Snp	bzero(&c, sizeof(c));
2623228561Snp
2624228561Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
2625228561Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
2626228561Snp	    V_FW_EQ_ETH_CMD_VFN(0));
2627228561Snp	c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
2628228561Snp	    F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
2629228561Snp	c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
2630228561Snp	c.fetchszm_to_iqid =
2631228561Snp	    htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2632228561Snp		V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
2633228561Snp		V_FW_EQ_ETH_CMD_IQID(eq->iqid));
2634228561Snp	c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2635228561Snp		      V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2636228561Snp		      V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2637228561Snp		      V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
2638228561Snp	c.eqaddr = htobe64(eq->ba);
2639228561Snp
2640228561Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2641228561Snp	if (rc != 0) {
2642228561Snp		device_printf(pi->dev,
2643228561Snp		    "failed to create Ethernet egress queue: %d\n", rc);
2644228561Snp		return (rc);
2645228561Snp	}
2646228561Snp	eq->flags |= EQ_ALLOCATED;
2647228561Snp
2648228561Snp	eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
2649220873Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2650228561Snp	if (cntxt_id >= sc->sge.neq)
2651228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2652228561Snp		cntxt_id, sc->sge.neq - 1);
2653220873Snp	sc->sge.eqmap[cntxt_id] = eq;
2654220873Snp
2655228561Snp	return (rc);
2656228561Snp}
2657220873Snp
2658237263Snp#ifdef TCP_OFFLOAD
2659228561Snpstatic int
2660228561Snpofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2661228561Snp{
2662228561Snp	int rc, cntxt_id;
2663228561Snp	struct fw_eq_ofld_cmd c;
2664220873Snp
2665228561Snp	bzero(&c, sizeof(c));
2666220873Snp
2667228561Snp	c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST |
2668228561Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) |
2669228561Snp	    V_FW_EQ_OFLD_CMD_VFN(0));
2670228561Snp	c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
2671228561Snp	    F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
2672228561Snp	c.fetchszm_to_iqid =
2673228561Snp		htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2674228561Snp		    V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
2675228561Snp		    F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
2676228561Snp	c.dcaen_to_eqsize =
2677228561Snp	    htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2678228561Snp		V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2679228561Snp		V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2680228561Snp		V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize));
2681228561Snp	c.eqaddr = htobe64(eq->ba);
2682228561Snp
2683228561Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2684228561Snp	if (rc != 0) {
2685228561Snp		device_printf(pi->dev,
2686228561Snp		    "failed to create egress queue for TCP offload: %d\n", rc);
2687228561Snp		return (rc);
2688228561Snp	}
2689228561Snp	eq->flags |= EQ_ALLOCATED;
2690228561Snp
2691228561Snp	eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd));
2692228561Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2693228561Snp	if (cntxt_id >= sc->sge.neq)
2694228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2695228561Snp		cntxt_id, sc->sge.neq - 1);
2696228561Snp	sc->sge.eqmap[cntxt_id] = eq;
2697228561Snp
2698220873Snp	return (rc);
2699220873Snp}
2700228561Snp#endif
2701220873Snp
2702220873Snpstatic int
2703228561Snpalloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2704220873Snp{
2705220873Snp	int rc;
2706228561Snp	size_t len;
2707220873Snp
2708228561Snp	mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
2709228561Snp
2710228561Snp	len = eq->qsize * EQ_ESIZE;
2711228561Snp	rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
2712228561Snp	    &eq->ba, (void **)&eq->desc);
2713228561Snp	if (rc)
2714228561Snp		return (rc);
2715228561Snp
2716237512Snp	eq->cap = eq->qsize - spg_len / EQ_ESIZE;
2717228561Snp	eq->spg = (void *)&eq->desc[eq->cap];
2718228561Snp	eq->avail = eq->cap - 1;	/* one less to avoid cidx = pidx */
2719228561Snp	eq->pidx = eq->cidx = 0;
2720248925Snp	eq->doorbells = sc->doorbells;
2721228561Snp
2722228561Snp	switch (eq->flags & EQ_TYPEMASK) {
2723228561Snp	case EQ_CTRL:
2724228561Snp		rc = ctrl_eq_alloc(sc, eq);
2725228561Snp		break;
2726228561Snp
2727228561Snp	case EQ_ETH:
2728228561Snp		rc = eth_eq_alloc(sc, pi, eq);
2729228561Snp		break;
2730228561Snp
2731237263Snp#ifdef TCP_OFFLOAD
2732228561Snp	case EQ_OFLD:
2733228561Snp		rc = ofld_eq_alloc(sc, pi, eq);
2734228561Snp		break;
2735228561Snp#endif
2736228561Snp
2737228561Snp	default:
2738228561Snp		panic("%s: invalid eq type %d.", __func__,
2739228561Snp		    eq->flags & EQ_TYPEMASK);
2740228561Snp	}
2741228561Snp	if (rc != 0) {
2742228561Snp		device_printf(sc->dev,
2743228561Snp		    "failed to allocate egress queue(%d): %d",
2744228561Snp		    eq->flags & EQ_TYPEMASK, rc);
2745228561Snp	}
2746228561Snp
2747228561Snp	eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus;
2748228561Snp
2749248925Snp	if (isset(&eq->doorbells, DOORBELL_UDB) ||
2750248925Snp	    isset(&eq->doorbells, DOORBELL_UDBWC) ||
2751249392Snp	    isset(&eq->doorbells, DOORBELL_WCWR)) {
2752256794Snp		uint32_t s_qpp = sc->sge.eq_s_qpp;
2753248925Snp		uint32_t mask = (1 << s_qpp) - 1;
2754248925Snp		volatile uint8_t *udb;
2755248925Snp
2756248925Snp		udb = sc->udbs_base + UDBS_DB_OFFSET;
2757248925Snp		udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT;	/* pg offset */
2758248925Snp		eq->udb_qid = eq->cntxt_id & mask;		/* id in page */
2759248925Snp		if (eq->udb_qid > PAGE_SIZE / UDBS_SEG_SIZE)
2760249392Snp	    		clrbit(&eq->doorbells, DOORBELL_WCWR);
2761248925Snp		else {
2762248925Snp			udb += eq->udb_qid << UDBS_SEG_SHIFT;	/* seg offset */
2763248925Snp			eq->udb_qid = 0;
2764248925Snp		}
2765248925Snp		eq->udb = (volatile void *)udb;
2766248925Snp	}
2767248925Snp
2768228561Snp	return (rc);
2769228561Snp}
2770228561Snp
2771228561Snpstatic int
2772228561Snpfree_eq(struct adapter *sc, struct sge_eq *eq)
2773228561Snp{
2774228561Snp	int rc;
2775228561Snp
2776228561Snp	if (eq->flags & EQ_ALLOCATED) {
2777228561Snp		switch (eq->flags & EQ_TYPEMASK) {
2778228561Snp		case EQ_CTRL:
2779228561Snp			rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0,
2780228561Snp			    eq->cntxt_id);
2781228561Snp			break;
2782228561Snp
2783228561Snp		case EQ_ETH:
2784228561Snp			rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0,
2785228561Snp			    eq->cntxt_id);
2786228561Snp			break;
2787228561Snp
2788237263Snp#ifdef TCP_OFFLOAD
2789228561Snp		case EQ_OFLD:
2790228561Snp			rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0,
2791228561Snp			    eq->cntxt_id);
2792228561Snp			break;
2793228561Snp#endif
2794228561Snp
2795228561Snp		default:
2796228561Snp			panic("%s: invalid eq type %d.", __func__,
2797228561Snp			    eq->flags & EQ_TYPEMASK);
2798228561Snp		}
2799220873Snp		if (rc != 0) {
2800220873Snp			device_printf(sc->dev,
2801228561Snp			    "failed to free egress queue (%d): %d\n",
2802228561Snp			    eq->flags & EQ_TYPEMASK, rc);
2803220873Snp			return (rc);
2804220873Snp		}
2805228561Snp		eq->flags &= ~EQ_ALLOCATED;
2806220873Snp	}
2807220873Snp
2808220873Snp	free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
2809220873Snp
2810220873Snp	if (mtx_initialized(&eq->eq_lock))
2811220873Snp		mtx_destroy(&eq->eq_lock);
2812220873Snp
2813228561Snp	bzero(eq, sizeof(*eq));
2814220873Snp	return (0);
2815220873Snp}
2816220873Snp
2817220873Snpstatic int
2818228561Snpalloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
2819228561Snp    struct sysctl_oid *oid)
2820218792Snp{
2821228561Snp	int rc;
2822228561Snp	struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx;
2823228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2824228561Snp
2825228561Snp	rc = alloc_eq(sc, pi, &wrq->eq);
2826228561Snp	if (rc)
2827228561Snp		return (rc);
2828228561Snp
2829228561Snp	wrq->adapter = sc;
2830237263Snp	STAILQ_INIT(&wrq->wr_list);
2831228561Snp
2832228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
2833228561Snp	    &wrq->eq.cntxt_id, 0, "SGE context id of the queue");
2834228561Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
2835228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I",
2836228561Snp	    "consumer index");
2837228561Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx",
2838228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I",
2839228561Snp	    "producer index");
2840228561Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD,
2841228561Snp	    &wrq->tx_wrs, "# of work requests");
2842228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
2843228561Snp	    &wrq->no_desc, 0,
2844228561Snp	    "# of times queue ran out of hardware descriptors");
2845228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
2846228561Snp	    &wrq->eq.unstalled, 0, "# of times queue recovered after stall");
2847228561Snp
2848228561Snp	return (rc);
2849228561Snp}
2850228561Snp
2851228561Snpstatic int
2852228561Snpfree_wrq(struct adapter *sc, struct sge_wrq *wrq)
2853228561Snp{
2854228561Snp	int rc;
2855228561Snp
2856228561Snp	rc = free_eq(sc, &wrq->eq);
2857228561Snp	if (rc)
2858228561Snp		return (rc);
2859228561Snp
2860228561Snp	bzero(wrq, sizeof(*wrq));
2861228561Snp	return (0);
2862228561Snp}
2863228561Snp
2864228561Snpstatic int
2865228561Snpalloc_txq(struct port_info *pi, struct sge_txq *txq, int idx,
2866228561Snp    struct sysctl_oid *oid)
2867228561Snp{
2868228561Snp	int rc;
2869218792Snp	struct adapter *sc = pi->adapter;
2870218792Snp	struct sge_eq *eq = &txq->eq;
2871218792Snp	char name[16];
2872228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2873218792Snp
2874228561Snp	rc = alloc_eq(sc, pi, eq);
2875218792Snp	if (rc)
2876218792Snp		return (rc);
2877218792Snp
2878228561Snp	txq->ifp = pi->ifp;
2879228561Snp
2880220873Snp	txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
2881218792Snp	    M_ZERO | M_WAITOK);
2882220873Snp	txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
2883218792Snp
2884218792Snp	rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
2885218792Snp	    BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
2886220873Snp	    BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
2887218792Snp	if (rc != 0) {
2888218792Snp		device_printf(sc->dev,
2889218792Snp		    "failed to create tx DMA tag: %d\n", rc);
2890218792Snp		return (rc);
2891218792Snp	}
2892218792Snp
2893228561Snp	/*
2894228561Snp	 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
2895228561Snp	 * limit for any WR).  txq->no_dmamap events shouldn't occur if maps is
2896228561Snp	 * sized for the worst case.
2897228561Snp	 */
2898228561Snp	rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8,
2899228561Snp	    M_WAITOK);
2900218792Snp	if (rc != 0) {
2901218792Snp		device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
2902218792Snp		return (rc);
2903218792Snp	}
2904218792Snp
2905218792Snp	snprintf(name, sizeof(name), "%d", idx);
2906218792Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2907218792Snp	    NULL, "tx queue");
2908218792Snp	children = SYSCTL_CHILDREN(oid);
2909218792Snp
2910222973Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
2911222973Snp	    &eq->cntxt_id, 0, "SGE context id of the queue");
2912222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2913222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I",
2914222973Snp	    "consumer index");
2915222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx",
2916222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I",
2917222973Snp	    "producer index");
2918222973Snp
2919218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
2920218792Snp	    &txq->txcsum, "# of times hardware assisted with checksum");
2921218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion",
2922218792Snp	    CTLFLAG_RD, &txq->vlan_insertion,
2923218792Snp	    "# of times hardware inserted 802.1Q tag");
2924218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
2925237819Snp	    &txq->tso_wrs, "# of TSO work requests");
2926218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
2927218792Snp	    &txq->imm_wrs, "# of work requests with immediate data");
2928218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
2929218792Snp	    &txq->sgl_wrs, "# of work requests with direct SGL");
2930218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
2931218792Snp	    &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
2932218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD,
2933218792Snp	    &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)");
2934218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
2935218792Snp	    &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
2936218792Snp
2937246093Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "br_drops", CTLFLAG_RD,
2938246093Snp	    &txq->br->br_drops, "# of drops in the buf_ring for this queue");
2939218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
2940218792Snp	    &txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
2941218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
2942218792Snp	    &txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
2943218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
2944228561Snp	    &eq->egr_update, 0, "egress update notifications from the SGE");
2945228561Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
2946228561Snp	    &eq->unstalled, 0, "# of times txq recovered after stall");
2947218792Snp
2948218792Snp	return (rc);
2949218792Snp}
2950218792Snp
2951218792Snpstatic int
2952218792Snpfree_txq(struct port_info *pi, struct sge_txq *txq)
2953218792Snp{
2954218792Snp	int rc;
2955218792Snp	struct adapter *sc = pi->adapter;
2956218792Snp	struct sge_eq *eq = &txq->eq;
2957218792Snp
2958228561Snp	rc = free_eq(sc, eq);
2959228561Snp	if (rc)
2960228561Snp		return (rc);
2961220649Snp
2962220873Snp	free(txq->sdesc, M_CXGBE);
2963218792Snp
2964228561Snp	if (txq->txmaps.maps)
2965228561Snp		t4_free_tx_maps(&txq->txmaps, txq->tx_tag);
2966218792Snp
2967220873Snp	buf_ring_free(txq->br, M_CXGBE);
2968218792Snp
2969220873Snp	if (txq->tx_tag)
2970220873Snp		bus_dma_tag_destroy(txq->tx_tag);
2971218792Snp
2972218792Snp	bzero(txq, sizeof(*txq));
2973218792Snp	return (0);
2974218792Snp}
2975218792Snp
2976218792Snpstatic void
2977218792Snponeseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2978218792Snp{
2979218792Snp	bus_addr_t *ba = arg;
2980218792Snp
2981218792Snp	KASSERT(nseg == 1,
2982218792Snp	    ("%s meant for single segment mappings only.", __func__));
2983218792Snp
2984218792Snp	*ba = error ? 0 : segs->ds_addr;
2985218792Snp}
2986218792Snp
2987218792Snpstatic inline bool
2988218792Snpis_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
2989218792Snp{
2990218792Snp	*ctrl = (void *)((uintptr_t)iq->cdesc +
2991218792Snp	    (iq->esize - sizeof(struct rsp_ctrl)));
2992218792Snp
2993218792Snp	return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen);
2994218792Snp}
2995218792Snp
2996218792Snpstatic inline void
2997218792Snpiq_next(struct sge_iq *iq)
2998218792Snp{
2999218792Snp	iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
3000218792Snp	if (__predict_false(++iq->cidx == iq->qsize - 1)) {
3001218792Snp		iq->cidx = 0;
3002218792Snp		iq->gen ^= 1;
3003218792Snp		iq->cdesc = iq->desc;
3004218792Snp	}
3005218792Snp}
3006218792Snp
3007220905Snp#define FL_HW_IDX(x) ((x) >> 3)
3008218792Snpstatic inline void
3009218792Snpring_fl_db(struct adapter *sc, struct sge_fl *fl)
3010218792Snp{
3011218792Snp	int ndesc = fl->pending / 8;
3012248925Snp	uint32_t v;
3013218792Snp
3014220905Snp	if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx))
3015220905Snp		ndesc--;	/* hold back one credit */
3016218792Snp
3017220905Snp	if (ndesc <= 0)
3018220905Snp		return;		/* nothing to do */
3019220905Snp
3020248925Snp	v = F_DBPRIO | V_QID(fl->cntxt_id) | V_PIDX(ndesc);
3021248925Snp	if (is_t5(sc))
3022248925Snp		v |= F_DBTYPE;
3023248925Snp
3024218792Snp	wmb();
3025218792Snp
3026248925Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), v);
3027220905Snp	fl->pending -= ndesc * 8;
3028218792Snp}
3029218792Snp
3030220905Snp/*
3031228561Snp * Fill up the freelist by upto nbufs and maybe ring its doorbell.
3032228561Snp *
3033228561Snp * Returns non-zero to indicate that it should be added to the list of starving
3034228561Snp * freelists.
3035220905Snp */
3036228561Snpstatic int
3037228561Snprefill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs)
3038218792Snp{
3039218792Snp	__be64 *d = &fl->desc[fl->pidx];
3040218792Snp	struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
3041265425Snp	uintptr_t pa;
3042218792Snp	caddr_t cl;
3043265425Snp	struct cluster_layout *cll = &fl->cll_def;	/* default layout */
3044265425Snp	struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
3045265425Snp	struct cluster_metadata *clm;
3046218792Snp
3047218792Snp	FL_LOCK_ASSERT_OWNED(fl);
3048218792Snp
3049228561Snp	if (nbufs > fl->needed)
3050218792Snp		nbufs = fl->needed;
3051265425Snp	nbufs -= (fl->pidx + nbufs) % 8;
3052218792Snp
3053218792Snp	while (nbufs--) {
3054218792Snp
3055218792Snp		if (sd->cl != NULL) {
3056218792Snp
3057267694Snp			if (sd->nimbuf + sd->nembuf == 0) {
3058255050Snp				/*
3059265425Snp				 * Fast recycle without involving any atomics on
3060265425Snp				 * the cluster's metadata (if the cluster has
3061265425Snp				 * metadata).  This happens when all frames
3062265425Snp				 * received in the cluster were small enough to
3063265425Snp				 * fit within a single mbuf each.
3064255050Snp				 */
3065265425Snp				fl->cl_fast_recycled++;
3066267694Snp#ifdef INVARIANTS
3067267694Snp				clm = cl_metadata(sc, fl, &sd->cll, sd->cl);
3068267694Snp				if (clm != NULL)
3069267694Snp					MPASS(clm->refcount == 1);
3070267694Snp#endif
3071265425Snp				goto recycled_fast;
3072255050Snp			}
3073218792Snp
3074218792Snp			/*
3075265425Snp			 * Cluster is guaranteed to have metadata.  Clusters
3076265425Snp			 * without metadata always take the fast recycle path
3077265425Snp			 * when they're recycled.
3078218792Snp			 */
3079265425Snp			clm = cl_metadata(sc, fl, &sd->cll, sd->cl);
3080265425Snp			MPASS(clm != NULL);
3081265425Snp
3082265425Snp			if (atomic_fetchadd_int(&clm->refcount, -1) == 1) {
3083265425Snp				fl->cl_recycled++;
3084265425Snp				goto recycled;
3085218792Snp			}
3086265425Snp			sd->cl = NULL;	/* gave up my reference */
3087218792Snp		}
3088265425Snp		MPASS(sd->cl == NULL);
3089265425Snpalloc:
3090265425Snp		cl = uma_zalloc(swz->zone, M_NOWAIT);
3091265425Snp		if (__predict_false(cl == NULL)) {
3092265425Snp			if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 ||
3093265425Snp			    fl->cll_def.zidx == fl->cll_alt.zidx)
3094265425Snp				break;
3095218792Snp
3096265425Snp			/* fall back to the safe zone */
3097265425Snp			cll = &fl->cll_alt;
3098265425Snp			swz = &sc->sge.sw_zone_info[cll->zidx];
3099265425Snp			goto alloc;
3100255050Snp		}
3101265425Snp		fl->cl_allocated++;
3102218792Snp
3103265425Snp		pa = pmap_kextract((vm_offset_t)cl);
3104265425Snp		pa += cll->region1;
3105218792Snp		sd->cl = cl;
3106265425Snp		sd->cll = *cll;
3107265425Snp		*d = htobe64(pa | cll->hwidx);
3108265425Snp		clm = cl_metadata(sc, fl, cll, cl);
3109265425Snp		if (clm != NULL) {
3110265425Snprecycled:
3111218792Snp#ifdef INVARIANTS
3112265425Snp			clm->sd = sd;
3113218792Snp#endif
3114265425Snp			clm->refcount = 1;
3115265425Snp		}
3116267694Snp		sd->nimbuf = 0;
3117267694Snp		sd->nembuf = 0;
3118265425Snprecycled_fast:
3119219290Snp		fl->pending++;
3120218792Snp		fl->needed--;
3121265425Snp		d++;
3122218792Snp		sd++;
3123265425Snp		if (__predict_false(++fl->pidx == fl->cap)) {
3124218792Snp			fl->pidx = 0;
3125218792Snp			sd = fl->sdesc;
3126218792Snp			d = fl->desc;
3127218792Snp		}
3128218792Snp	}
3129220905Snp
3130228561Snp	if (fl->pending >= 8)
3131220905Snp		ring_fl_db(sc, fl);
3132228561Snp
3133228561Snp	return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
3134218792Snp}
3135218792Snp
3136228561Snp/*
3137228561Snp * Attempt to refill all starving freelists.
3138228561Snp */
3139228561Snpstatic void
3140228561Snprefill_sfl(void *arg)
3141228561Snp{
3142228561Snp	struct adapter *sc = arg;
3143228561Snp	struct sge_fl *fl, *fl_temp;
3144228561Snp
3145228561Snp	mtx_lock(&sc->sfl_lock);
3146228561Snp	TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
3147228561Snp		FL_LOCK(fl);
3148228561Snp		refill_fl(sc, fl, 64);
3149228561Snp		if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
3150228561Snp			TAILQ_REMOVE(&sc->sfl, fl, link);
3151228561Snp			fl->flags &= ~FL_STARVING;
3152228561Snp		}
3153228561Snp		FL_UNLOCK(fl);
3154228561Snp	}
3155228561Snp
3156228561Snp	if (!TAILQ_EMPTY(&sc->sfl))
3157228561Snp		callout_schedule(&sc->sfl_callout, hz / 5);
3158228561Snp	mtx_unlock(&sc->sfl_lock);
3159228561Snp}
3160228561Snp
3161218792Snpstatic int
3162218792Snpalloc_fl_sdesc(struct sge_fl *fl)
3163218792Snp{
3164218792Snp
3165218792Snp	fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE,
3166218792Snp	    M_ZERO | M_WAITOK);
3167218792Snp
3168218792Snp	return (0);
3169218792Snp}
3170218792Snp
3171218792Snpstatic void
3172255050Snpfree_fl_sdesc(struct adapter *sc, struct sge_fl *fl)
3173218792Snp{
3174218792Snp	struct fl_sdesc *sd;
3175265425Snp	struct cluster_metadata *clm;
3176265425Snp	struct cluster_layout *cll;
3177218792Snp	int i;
3178218792Snp
3179218792Snp	sd = fl->sdesc;
3180218792Snp	for (i = 0; i < fl->cap; i++, sd++) {
3181265425Snp		if (sd->cl == NULL)
3182265425Snp			continue;
3183218792Snp
3184265425Snp		cll = &sd->cll;
3185265425Snp		clm = cl_metadata(sc, fl, cll, sd->cl);
3186267694Snp		if (sd->nimbuf + sd->nembuf == 0 ||
3187265425Snp		    (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1)) {
3188265425Snp			uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl);
3189218792Snp		}
3190265425Snp		sd->cl = NULL;
3191218792Snp	}
3192218792Snp
3193218792Snp	free(fl->sdesc, M_CXGBE);
3194218792Snp	fl->sdesc = NULL;
3195218792Snp}
3196218792Snp
3197228561Snpint
3198228561Snpt4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count,
3199228561Snp    int flags)
3200218792Snp{
3201218792Snp	struct tx_map *txm;
3202228561Snp	int i, rc;
3203218792Snp
3204228561Snp	txmaps->map_total = txmaps->map_avail = count;
3205228561Snp	txmaps->map_cidx = txmaps->map_pidx = 0;
3206218792Snp
3207228561Snp	txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
3208228561Snp	    M_ZERO | flags);
3209218792Snp
3210228561Snp	txm = txmaps->maps;
3211218792Snp	for (i = 0; i < count; i++, txm++) {
3212228561Snp		rc = bus_dmamap_create(tx_tag, 0, &txm->map);
3213218792Snp		if (rc != 0)
3214218792Snp			goto failed;
3215218792Snp	}
3216218792Snp
3217218792Snp	return (0);
3218218792Snpfailed:
3219218792Snp	while (--i >= 0) {
3220218792Snp		txm--;
3221228561Snp		bus_dmamap_destroy(tx_tag, txm->map);
3222218792Snp	}
3223228561Snp	KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__));
3224218792Snp
3225228561Snp	free(txmaps->maps, M_CXGBE);
3226228561Snp	txmaps->maps = NULL;
3227218792Snp
3228218792Snp	return (rc);
3229218792Snp}
3230218792Snp
3231228561Snpvoid
3232228561Snpt4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag)
3233218792Snp{
3234218792Snp	struct tx_map *txm;
3235218792Snp	int i;
3236218792Snp
3237228561Snp	txm = txmaps->maps;
3238228561Snp	for (i = 0; i < txmaps->map_total; i++, txm++) {
3239218792Snp
3240218792Snp		if (txm->m) {
3241228561Snp			bus_dmamap_unload(tx_tag, txm->map);
3242218792Snp			m_freem(txm->m);
3243218792Snp			txm->m = NULL;
3244218792Snp		}
3245218792Snp
3246228561Snp		bus_dmamap_destroy(tx_tag, txm->map);
3247218792Snp	}
3248218792Snp
3249228561Snp	free(txmaps->maps, M_CXGBE);
3250228561Snp	txmaps->maps = NULL;
3251218792Snp}
3252218792Snp
3253218792Snp/*
3254218792Snp * We'll do immediate data tx for non-TSO, but only when not coalescing.  We're
3255218792Snp * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
3256218792Snp * of immediate data.
3257218792Snp */
3258218792Snp#define IMM_LEN ( \
3259228561Snp      2 * EQ_ESIZE \
3260218792Snp    - sizeof(struct fw_eth_tx_pkt_wr) \
3261218792Snp    - sizeof(struct cpl_tx_pkt_core))
3262218792Snp
3263218792Snp/*
3264218792Snp * Returns non-zero on failure, no need to cleanup anything in that case.
3265218792Snp *
3266218792Snp * Note 1: We always try to defrag the mbuf if required and return EFBIG only
3267218792Snp * if the resulting chain still won't fit in a tx descriptor.
3268218792Snp *
3269218792Snp * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf
3270218792Snp * does not have the TCP header in it.
3271218792Snp */
3272218792Snpstatic int
3273218792Snpget_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
3274218792Snp    int sgl_only)
3275218792Snp{
3276218792Snp	struct mbuf *m = *fp;
3277228561Snp	struct tx_maps *txmaps;
3278218792Snp	struct tx_map *txm;
3279218792Snp	int rc, defragged = 0, n;
3280218792Snp
3281218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3282218792Snp
3283218792Snp	if (m->m_pkthdr.tso_segsz)
3284218792Snp		sgl_only = 1;	/* Do not allow immediate data with LSO */
3285218792Snp
3286218792Snpstart:	sgl->nsegs = 0;
3287218792Snp
3288218792Snp	if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
3289218792Snp		return (0);	/* nsegs = 0 tells caller to use imm. tx */
3290218792Snp
3291228561Snp	txmaps = &txq->txmaps;
3292228561Snp	if (txmaps->map_avail == 0) {
3293218792Snp		txq->no_dmamap++;
3294218792Snp		return (ENOMEM);
3295218792Snp	}
3296228561Snp	txm = &txmaps->maps[txmaps->map_pidx];
3297218792Snp
3298218792Snp	if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
3299218792Snp		*fp = m_pullup(m, 50);
3300218792Snp		m = *fp;
3301218792Snp		if (m == NULL)
3302218792Snp			return (ENOBUFS);
3303218792Snp	}
3304218792Snp
3305220873Snp	rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg,
3306218792Snp	    &sgl->nsegs, BUS_DMA_NOWAIT);
3307218792Snp	if (rc == EFBIG && defragged == 0) {
3308243857Sglebius		m = m_defrag(m, M_NOWAIT);
3309218792Snp		if (m == NULL)
3310218792Snp			return (EFBIG);
3311218792Snp
3312218792Snp		defragged = 1;
3313218792Snp		*fp = m;
3314218792Snp		goto start;
3315218792Snp	}
3316218792Snp	if (rc != 0)
3317218792Snp		return (rc);
3318218792Snp
3319218792Snp	txm->m = m;
3320228561Snp	txmaps->map_avail--;
3321228561Snp	if (++txmaps->map_pidx == txmaps->map_total)
3322228561Snp		txmaps->map_pidx = 0;
3323218792Snp
3324218792Snp	KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
3325218792Snp	    ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
3326218792Snp
3327218792Snp	/*
3328218792Snp	 * Store the # of flits required to hold this frame's SGL in nflits.  An
3329218792Snp	 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
3330218792Snp	 * multiple (len0 + len1, addr0, addr1) tuples.  If addr1 is not used
3331218792Snp	 * then len1 must be set to 0.
3332218792Snp	 */
3333218792Snp	n = sgl->nsegs - 1;
3334218792Snp	sgl->nflits = (3 * n) / 2 + (n & 1) + 2;
3335218792Snp
3336218792Snp	return (0);
3337218792Snp}
3338218792Snp
3339218792Snp
3340218792Snp/*
3341218792Snp * Releases all the txq resources used up in the specified sgl.
3342218792Snp */
3343218792Snpstatic int
3344218792Snpfree_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
3345218792Snp{
3346228561Snp	struct tx_maps *txmaps;
3347218792Snp	struct tx_map *txm;
3348218792Snp
3349218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3350218792Snp
3351218792Snp	if (sgl->nsegs == 0)
3352218792Snp		return (0);	/* didn't use any map */
3353218792Snp
3354228561Snp	txmaps = &txq->txmaps;
3355228561Snp
3356218792Snp	/* 1 pkt uses exactly 1 map, back it out */
3357218792Snp
3358228561Snp	txmaps->map_avail++;
3359228561Snp	if (txmaps->map_pidx > 0)
3360228561Snp		txmaps->map_pidx--;
3361218792Snp	else
3362228561Snp		txmaps->map_pidx = txmaps->map_total - 1;
3363218792Snp
3364228561Snp	txm = &txmaps->maps[txmaps->map_pidx];
3365220873Snp	bus_dmamap_unload(txq->tx_tag, txm->map);
3366218792Snp	txm->m = NULL;
3367218792Snp
3368218792Snp	return (0);
3369218792Snp}
3370218792Snp
3371218792Snpstatic int
3372218792Snpwrite_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
3373218792Snp    struct sgl *sgl)
3374218792Snp{
3375218792Snp	struct sge_eq *eq = &txq->eq;
3376218792Snp	struct fw_eth_tx_pkt_wr *wr;
3377218792Snp	struct cpl_tx_pkt_core *cpl;
3378218792Snp	uint32_t ctrl;	/* used in many unrelated places */
3379218792Snp	uint64_t ctrl1;
3380219286Snp	int nflits, ndesc, pktlen;
3381218792Snp	struct tx_sdesc *txsd;
3382218792Snp	caddr_t dst;
3383218792Snp
3384218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3385218792Snp
3386219286Snp	pktlen = m->m_pkthdr.len;
3387219286Snp
3388218792Snp	/*
3389218792Snp	 * Do we have enough flits to send this frame out?
3390218792Snp	 */
3391218792Snp	ctrl = sizeof(struct cpl_tx_pkt_core);
3392218792Snp	if (m->m_pkthdr.tso_segsz) {
3393218792Snp		nflits = TXPKT_LSO_WR_HDR;
3394237436Snp		ctrl += sizeof(struct cpl_tx_pkt_lso_core);
3395218792Snp	} else
3396218792Snp		nflits = TXPKT_WR_HDR;
3397218792Snp	if (sgl->nsegs > 0)
3398218792Snp		nflits += sgl->nflits;
3399218792Snp	else {
3400219286Snp		nflits += howmany(pktlen, 8);
3401219286Snp		ctrl += pktlen;
3402218792Snp	}
3403218792Snp	ndesc = howmany(nflits, 8);
3404218792Snp	if (ndesc > eq->avail)
3405218792Snp		return (ENOMEM);
3406218792Snp
3407218792Snp	/* Firmware work request header */
3408218792Snp	wr = (void *)&eq->desc[eq->pidx];
3409218792Snp	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
3410228561Snp	    V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
3411218792Snp	ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
3412228561Snp	if (eq->avail == ndesc) {
3413228561Snp		if (!(eq->flags & EQ_CRFLUSHED)) {
3414228561Snp			ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3415228561Snp			eq->flags |= EQ_CRFLUSHED;
3416228561Snp		}
3417228561Snp		eq->flags |= EQ_STALLED;
3418220643Snp	}
3419220643Snp
3420218792Snp	wr->equiq_to_len16 = htobe32(ctrl);
3421218792Snp	wr->r3 = 0;
3422218792Snp
3423218792Snp	if (m->m_pkthdr.tso_segsz) {
3424237436Snp		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
3425218792Snp		struct ether_header *eh;
3426237819Snp		void *l3hdr;
3427237819Snp#if defined(INET) || defined(INET6)
3428218792Snp		struct tcphdr *tcp;
3429237819Snp#endif
3430237819Snp		uint16_t eh_type;
3431218792Snp
3432218792Snp		ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
3433218792Snp		    F_LSO_LAST_SLICE;
3434218792Snp
3435218792Snp		eh = mtod(m, struct ether_header *);
3436237819Snp		eh_type = ntohs(eh->ether_type);
3437237819Snp		if (eh_type == ETHERTYPE_VLAN) {
3438237819Snp			struct ether_vlan_header *evh = (void *)eh;
3439237819Snp
3440218792Snp			ctrl |= V_LSO_ETHHDR_LEN(1);
3441237819Snp			l3hdr = evh + 1;
3442237819Snp			eh_type = ntohs(evh->evl_proto);
3443218792Snp		} else
3444237819Snp			l3hdr = eh + 1;
3445218792Snp
3446237819Snp		switch (eh_type) {
3447237819Snp#ifdef INET6
3448237819Snp		case ETHERTYPE_IPV6:
3449237819Snp		{
3450237819Snp			struct ip6_hdr *ip6 = l3hdr;
3451218792Snp
3452237819Snp			/*
3453237819Snp			 * XXX-BZ For now we do not pretend to support
3454237819Snp			 * IPv6 extension headers.
3455237819Snp			 */
3456237819Snp			KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO "
3457237819Snp			    "with ip6_nxt != TCP: %u", __func__, ip6->ip6_nxt));
3458237819Snp			tcp = (struct tcphdr *)(ip6 + 1);
3459237819Snp			ctrl |= F_LSO_IPV6;
3460237819Snp			ctrl |= V_LSO_IPHDR_LEN(sizeof(*ip6) >> 2) |
3461237819Snp			    V_LSO_TCPHDR_LEN(tcp->th_off);
3462237819Snp			break;
3463237819Snp		}
3464237819Snp#endif
3465237819Snp#ifdef INET
3466237819Snp		case ETHERTYPE_IP:
3467237819Snp		{
3468237819Snp			struct ip *ip = l3hdr;
3469237819Snp
3470237819Snp			tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4);
3471237819Snp			ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) |
3472237819Snp			    V_LSO_TCPHDR_LEN(tcp->th_off);
3473237819Snp			break;
3474237819Snp		}
3475237819Snp#endif
3476237819Snp		default:
3477237819Snp			panic("%s: CSUM_TSO but no supported IP version "
3478237819Snp			    "(0x%04x)", __func__, eh_type);
3479237819Snp		}
3480237819Snp
3481218792Snp		lso->lso_ctrl = htobe32(ctrl);
3482218792Snp		lso->ipid_ofst = htobe16(0);
3483218792Snp		lso->mss = htobe16(m->m_pkthdr.tso_segsz);
3484218792Snp		lso->seqno_offset = htobe32(0);
3485219286Snp		lso->len = htobe32(pktlen);
3486218792Snp
3487218792Snp		cpl = (void *)(lso + 1);
3488218792Snp
3489218792Snp		txq->tso_wrs++;
3490218792Snp	} else
3491218792Snp		cpl = (void *)(wr + 1);
3492218792Snp
3493218792Snp	/* Checksum offload */
3494218792Snp	ctrl1 = 0;
3495247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
3496218792Snp		ctrl1 |= F_TXPKT_IPCSUM_DIS;
3497237799Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
3498247062Snp	    CSUM_TCP_IPV6 | CSUM_TSO)))
3499218792Snp		ctrl1 |= F_TXPKT_L4CSUM_DIS;
3500237799Snp	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
3501247062Snp	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
3502218792Snp		txq->txcsum++;	/* some hardware assistance provided */
3503218792Snp
3504218792Snp	/* VLAN tag insertion */
3505218792Snp	if (m->m_flags & M_VLANTAG) {
3506218792Snp		ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
3507218792Snp		txq->vlan_insertion++;
3508218792Snp	}
3509218792Snp
3510218792Snp	/* CPL header */
3511218792Snp	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3512218792Snp	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3513218792Snp	cpl->pack = 0;
3514219286Snp	cpl->len = htobe16(pktlen);
3515218792Snp	cpl->ctrl1 = htobe64(ctrl1);
3516218792Snp
3517218792Snp	/* Software descriptor */
3518220873Snp	txsd = &txq->sdesc[eq->pidx];
3519218792Snp	txsd->desc_used = ndesc;
3520218792Snp
3521218792Snp	eq->pending += ndesc;
3522218792Snp	eq->avail -= ndesc;
3523218792Snp	eq->pidx += ndesc;
3524218792Snp	if (eq->pidx >= eq->cap)
3525218792Snp		eq->pidx -= eq->cap;
3526218792Snp
3527218792Snp	/* SGL */
3528218792Snp	dst = (void *)(cpl + 1);
3529218792Snp	if (sgl->nsegs > 0) {
3530220873Snp		txsd->credits = 1;
3531218792Snp		txq->sgl_wrs++;
3532218792Snp		write_sgl_to_txd(eq, sgl, &dst);
3533218792Snp	} else {
3534220873Snp		txsd->credits = 0;
3535218792Snp		txq->imm_wrs++;
3536218792Snp		for (; m; m = m->m_next) {
3537218792Snp			copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
3538219286Snp#ifdef INVARIANTS
3539219286Snp			pktlen -= m->m_len;
3540219286Snp#endif
3541218792Snp		}
3542219286Snp#ifdef INVARIANTS
3543219286Snp		KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
3544219286Snp#endif
3545219286Snp
3546218792Snp	}
3547218792Snp
3548218792Snp	txq->txpkt_wrs++;
3549218792Snp	return (0);
3550218792Snp}
3551218792Snp
3552218792Snp/*
3553218792Snp * Returns 0 to indicate that m has been accepted into a coalesced tx work
3554218792Snp * request.  It has either been folded into txpkts or txpkts was flushed and m
3555218792Snp * has started a new coalesced work request (as the first frame in a fresh
3556218792Snp * txpkts).
3557218792Snp *
3558218792Snp * Returns non-zero to indicate a failure - caller is responsible for
3559218792Snp * transmitting m, if there was anything in txpkts it has been flushed.
3560218792Snp */
3561218792Snpstatic int
3562218792Snpadd_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
3563218792Snp    struct mbuf *m, struct sgl *sgl)
3564218792Snp{
3565218792Snp	struct sge_eq *eq = &txq->eq;
3566218792Snp	int can_coalesce;
3567218792Snp	struct tx_sdesc *txsd;
3568218792Snp	int flits;
3569218792Snp
3570218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3571218792Snp
3572228561Snp	KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__));
3573228561Snp
3574218792Snp	if (txpkts->npkt > 0) {
3575218792Snp		flits = TXPKTS_PKT_HDR + sgl->nflits;
3576218792Snp		can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3577218792Snp		    txpkts->nflits + flits <= TX_WR_FLITS &&
3578218792Snp		    txpkts->nflits + flits <= eq->avail * 8 &&
3579218792Snp		    txpkts->plen + m->m_pkthdr.len < 65536;
3580218792Snp
3581218792Snp		if (can_coalesce) {
3582218792Snp			txpkts->npkt++;
3583218792Snp			txpkts->nflits += flits;
3584218792Snp			txpkts->plen += m->m_pkthdr.len;
3585218792Snp
3586220873Snp			txsd = &txq->sdesc[eq->pidx];
3587220873Snp			txsd->credits++;
3588218792Snp
3589218792Snp			return (0);
3590218792Snp		}
3591218792Snp
3592218792Snp		/*
3593218792Snp		 * Couldn't coalesce m into txpkts.  The first order of business
3594218792Snp		 * is to send txpkts on its way.  Then we'll revisit m.
3595218792Snp		 */
3596218792Snp		write_txpkts_wr(txq, txpkts);
3597218792Snp	}
3598218792Snp
3599218792Snp	/*
3600218792Snp	 * Check if we can start a new coalesced tx work request with m as
3601218792Snp	 * the first packet in it.
3602218792Snp	 */
3603218792Snp
3604218792Snp	KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__));
3605218792Snp
3606218792Snp	flits = TXPKTS_WR_HDR + sgl->nflits;
3607218792Snp	can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3608218792Snp	    flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
3609218792Snp
3610218792Snp	if (can_coalesce == 0)
3611218792Snp		return (EINVAL);
3612218792Snp
3613218792Snp	/*
3614218792Snp	 * Start a fresh coalesced tx WR with m as the first frame in it.
3615218792Snp	 */
3616218792Snp	txpkts->npkt = 1;
3617218792Snp	txpkts->nflits = flits;
3618218792Snp	txpkts->flitp = &eq->desc[eq->pidx].flit[2];
3619218792Snp	txpkts->plen = m->m_pkthdr.len;
3620218792Snp
3621220873Snp	txsd = &txq->sdesc[eq->pidx];
3622220873Snp	txsd->credits = 1;
3623218792Snp
3624218792Snp	return (0);
3625218792Snp}
3626218792Snp
3627218792Snp/*
3628218792Snp * Note that write_txpkts_wr can never run out of hardware descriptors (but
3629218792Snp * write_txpkt_wr can).  add_to_txpkts ensures that a frame is accepted for
3630218792Snp * coalescing only if sufficient hardware descriptors are available.
3631218792Snp */
3632218792Snpstatic void
3633218792Snpwrite_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
3634218792Snp{
3635218792Snp	struct sge_eq *eq = &txq->eq;
3636218792Snp	struct fw_eth_tx_pkts_wr *wr;
3637218792Snp	struct tx_sdesc *txsd;
3638218792Snp	uint32_t ctrl;
3639218792Snp	int ndesc;
3640218792Snp
3641218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3642218792Snp
3643218792Snp	ndesc = howmany(txpkts->nflits, 8);
3644218792Snp
3645218792Snp	wr = (void *)&eq->desc[eq->pidx];
3646228561Snp	wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
3647218792Snp	ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
3648228561Snp	if (eq->avail == ndesc) {
3649228561Snp		if (!(eq->flags & EQ_CRFLUSHED)) {
3650228561Snp			ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3651228561Snp			eq->flags |= EQ_CRFLUSHED;
3652228561Snp		}
3653228561Snp		eq->flags |= EQ_STALLED;
3654220643Snp	}
3655218792Snp	wr->equiq_to_len16 = htobe32(ctrl);
3656218792Snp	wr->plen = htobe16(txpkts->plen);
3657218792Snp	wr->npkt = txpkts->npkt;
3658222513Snp	wr->r3 = wr->type = 0;
3659218792Snp
3660218792Snp	/* Everything else already written */
3661218792Snp
3662220873Snp	txsd = &txq->sdesc[eq->pidx];
3663218792Snp	txsd->desc_used = ndesc;
3664218792Snp
3665220643Snp	KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__));
3666218792Snp
3667218792Snp	eq->pending += ndesc;
3668218792Snp	eq->avail -= ndesc;
3669218792Snp	eq->pidx += ndesc;
3670218792Snp	if (eq->pidx >= eq->cap)
3671218792Snp		eq->pidx -= eq->cap;
3672218792Snp
3673218792Snp	txq->txpkts_pkts += txpkts->npkt;
3674218792Snp	txq->txpkts_wrs++;
3675218792Snp	txpkts->npkt = 0;	/* emptied */
3676218792Snp}
3677218792Snp
3678218792Snpstatic inline void
3679218792Snpwrite_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
3680218792Snp    struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl)
3681218792Snp{
3682218792Snp	struct ulp_txpkt *ulpmc;
3683218792Snp	struct ulptx_idata *ulpsc;
3684218792Snp	struct cpl_tx_pkt_core *cpl;
3685218792Snp	struct sge_eq *eq = &txq->eq;
3686218792Snp	uintptr_t flitp, start, end;
3687218792Snp	uint64_t ctrl;
3688218792Snp	caddr_t dst;
3689218792Snp
3690218792Snp	KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__));
3691218792Snp
3692218792Snp	start = (uintptr_t)eq->desc;
3693218792Snp	end = (uintptr_t)eq->spg;
3694218792Snp
3695218792Snp	/* Checksum offload */
3696218792Snp	ctrl = 0;
3697247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
3698218792Snp		ctrl |= F_TXPKT_IPCSUM_DIS;
3699247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
3700247062Snp	    CSUM_TCP_IPV6 | CSUM_TSO)))
3701218792Snp		ctrl |= F_TXPKT_L4CSUM_DIS;
3702247062Snp	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
3703247062Snp	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
3704218792Snp		txq->txcsum++;	/* some hardware assistance provided */
3705218792Snp
3706218792Snp	/* VLAN tag insertion */
3707218792Snp	if (m->m_flags & M_VLANTAG) {
3708218792Snp		ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
3709218792Snp		txq->vlan_insertion++;
3710218792Snp	}
3711218792Snp
3712218792Snp	/*
3713218792Snp	 * The previous packet's SGL must have ended at a 16 byte boundary (this
3714218792Snp	 * is required by the firmware/hardware).  It follows that flitp cannot
3715218792Snp	 * wrap around between the ULPTX master command and ULPTX subcommand (8
3716218792Snp	 * bytes each), and that it can not wrap around in the middle of the
3717218792Snp	 * cpl_tx_pkt_core either.
3718218792Snp	 */
3719218792Snp	flitp = (uintptr_t)txpkts->flitp;
3720218792Snp	KASSERT((flitp & 0xf) == 0,
3721218792Snp	    ("%s: last SGL did not end at 16 byte boundary: %p",
3722218792Snp	    __func__, txpkts->flitp));
3723218792Snp
3724218792Snp	/* ULP master command */
3725218792Snp	ulpmc = (void *)flitp;
3726219288Snp	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) |
3727219288Snp	    V_ULP_TXPKT_FID(eq->iqid));
3728218792Snp	ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) +
3729218792Snp	    sizeof(*cpl) + 8 * sgl->nflits, 16));
3730218792Snp
3731218792Snp	/* ULP subcommand */
3732218792Snp	ulpsc = (void *)(ulpmc + 1);
3733218792Snp	ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
3734218792Snp	    F_ULP_TX_SC_MORE);
3735218792Snp	ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
3736218792Snp
3737218792Snp	flitp += sizeof(*ulpmc) + sizeof(*ulpsc);
3738218792Snp	if (flitp == end)
3739218792Snp		flitp = start;
3740218792Snp
3741218792Snp	/* CPL_TX_PKT */
3742218792Snp	cpl = (void *)flitp;
3743218792Snp	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3744218792Snp	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3745218792Snp	cpl->pack = 0;
3746218792Snp	cpl->len = htobe16(m->m_pkthdr.len);
3747218792Snp	cpl->ctrl1 = htobe64(ctrl);
3748218792Snp
3749218792Snp	flitp += sizeof(*cpl);
3750218792Snp	if (flitp == end)
3751218792Snp		flitp = start;
3752218792Snp
3753218792Snp	/* SGL for this frame */
3754218792Snp	dst = (caddr_t)flitp;
3755218792Snp	txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst);
3756218792Snp	txpkts->flitp = (void *)dst;
3757218792Snp
3758218792Snp	KASSERT(((uintptr_t)dst & 0xf) == 0,
3759218792Snp	    ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst));
3760218792Snp}
3761218792Snp
3762218792Snp/*
3763218792Snp * If the SGL ends on an address that is not 16 byte aligned, this function will
3764218792Snp * add a 0 filled flit at the end.  It returns 1 in that case.
3765218792Snp */
3766218792Snpstatic int
3767218792Snpwrite_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
3768218792Snp{
3769218792Snp	__be64 *flitp, *end;
3770218792Snp	struct ulptx_sgl *usgl;
3771218792Snp	bus_dma_segment_t *seg;
3772218792Snp	int i, padded;
3773218792Snp
3774218792Snp	KASSERT(sgl->nsegs > 0 && sgl->nflits > 0,
3775218792Snp	    ("%s: bad SGL - nsegs=%d, nflits=%d",
3776218792Snp	    __func__, sgl->nsegs, sgl->nflits));
3777218792Snp
3778218792Snp	KASSERT(((uintptr_t)(*to) & 0xf) == 0,
3779218792Snp	    ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
3780218792Snp
3781218792Snp	flitp = (__be64 *)(*to);
3782218792Snp	end = flitp + sgl->nflits;
3783218792Snp	seg = &sgl->seg[0];
3784218792Snp	usgl = (void *)flitp;
3785218792Snp
3786218792Snp	/*
3787218792Snp	 * We start at a 16 byte boundary somewhere inside the tx descriptor
3788218792Snp	 * ring, so we're at least 16 bytes away from the status page.  There is
3789218792Snp	 * no chance of a wrap around in the middle of usgl (which is 16 bytes).
3790218792Snp	 */
3791218792Snp
3792218792Snp	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
3793218792Snp	    V_ULPTX_NSGE(sgl->nsegs));
3794218792Snp	usgl->len0 = htobe32(seg->ds_len);
3795218792Snp	usgl->addr0 = htobe64(seg->ds_addr);
3796218792Snp	seg++;
3797218792Snp
3798218792Snp	if ((uintptr_t)end <= (uintptr_t)eq->spg) {
3799218792Snp
3800218792Snp		/* Won't wrap around at all */
3801218792Snp
3802218792Snp		for (i = 0; i < sgl->nsegs - 1; i++, seg++) {
3803218792Snp			usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len);
3804218792Snp			usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr);
3805218792Snp		}
3806218792Snp		if (i & 1)
3807218792Snp			usgl->sge[i / 2].len[1] = htobe32(0);
3808218792Snp	} else {
3809218792Snp
3810218792Snp		/* Will wrap somewhere in the rest of the SGL */
3811218792Snp
3812218792Snp		/* 2 flits already written, write the rest flit by flit */
3813218792Snp		flitp = (void *)(usgl + 1);
3814218792Snp		for (i = 0; i < sgl->nflits - 2; i++) {
3815218792Snp			if ((uintptr_t)flitp == (uintptr_t)eq->spg)
3816218792Snp				flitp = (void *)eq->desc;
3817218792Snp			*flitp++ = get_flit(seg, sgl->nsegs - 1, i);
3818218792Snp		}
3819218792Snp		end = flitp;
3820218792Snp	}
3821218792Snp
3822218792Snp	if ((uintptr_t)end & 0xf) {
3823218792Snp		*(uint64_t *)end = 0;
3824218792Snp		end++;
3825218792Snp		padded = 1;
3826218792Snp	} else
3827218792Snp		padded = 0;
3828218792Snp
3829218792Snp	if ((uintptr_t)end == (uintptr_t)eq->spg)
3830218792Snp		*to = (void *)eq->desc;
3831218792Snp	else
3832218792Snp		*to = (void *)end;
3833218792Snp
3834218792Snp	return (padded);
3835218792Snp}
3836218792Snp
3837218792Snpstatic inline void
3838218792Snpcopy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
3839218792Snp{
3840237263Snp	if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) {
3841218792Snp		bcopy(from, *to, len);
3842218792Snp		(*to) += len;
3843218792Snp	} else {
3844218792Snp		int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
3845218792Snp
3846218792Snp		bcopy(from, *to, portion);
3847218792Snp		from += portion;
3848218792Snp		portion = len - portion;	/* remaining */
3849218792Snp		bcopy(from, (void *)eq->desc, portion);
3850218792Snp		(*to) = (caddr_t)eq->desc + portion;
3851218792Snp	}
3852218792Snp}
3853218792Snp
3854218792Snpstatic inline void
3855220873Snpring_eq_db(struct adapter *sc, struct sge_eq *eq)
3856218792Snp{
3857248925Snp	u_int db, pending;
3858248925Snp
3859248925Snp	db = eq->doorbells;
3860248925Snp	pending = eq->pending;
3861248925Snp	if (pending > 1)
3862249392Snp		clrbit(&db, DOORBELL_WCWR);
3863248925Snp	eq->pending = 0;
3864218792Snp	wmb();
3865248925Snp
3866248925Snp	switch (ffs(db) - 1) {
3867248925Snp	case DOORBELL_UDB:
3868248925Snp		*eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
3869248925Snp		return;
3870248925Snp
3871249392Snp	case DOORBELL_WCWR: {
3872248925Snp		volatile uint64_t *dst, *src;
3873248925Snp		int i;
3874248925Snp
3875248925Snp		/*
3876248925Snp		 * Queues whose 128B doorbell segment fits in the page do not
3877248925Snp		 * use relative qid (udb_qid is always 0).  Only queues with
3878249392Snp		 * doorbell segments can do WCWR.
3879248925Snp		 */
3880248925Snp		KASSERT(eq->udb_qid == 0 && pending == 1,
3881248925Snp		    ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p",
3882248925Snp		    __func__, eq->doorbells, pending, eq->pidx, eq));
3883248925Snp
3884248925Snp		dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET -
3885248925Snp		    UDBS_DB_OFFSET);
3886248925Snp		i = eq->pidx ? eq->pidx - 1 : eq->cap - 1;
3887248925Snp		src = (void *)&eq->desc[i];
3888248925Snp		while (src != (void *)&eq->desc[i + 1])
3889248925Snp			*dst++ = *src++;
3890248925Snp		wmb();
3891248925Snp		return;
3892248925Snp	}
3893248925Snp
3894248925Snp	case DOORBELL_UDBWC:
3895248925Snp		*eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
3896248925Snp		wmb();
3897248925Snp		return;
3898248925Snp
3899248925Snp	case DOORBELL_KDB:
3900248925Snp		t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
3901248925Snp		    V_QID(eq->cntxt_id) | V_PIDX(pending));
3902248925Snp		return;
3903248925Snp	}
3904218792Snp}
3905218792Snp
3906219292Snpstatic inline int
3907219292Snpreclaimable(struct sge_eq *eq)
3908218792Snp{
3909219292Snp	unsigned int cidx;
3910218792Snp
3911218792Snp	cidx = eq->spg->cidx;	/* stable snapshot */
3912228561Snp	cidx = be16toh(cidx);
3913218792Snp
3914218792Snp	if (cidx >= eq->cidx)
3915219292Snp		return (cidx - eq->cidx);
3916218792Snp	else
3917219292Snp		return (cidx + eq->cap - eq->cidx);
3918219292Snp}
3919218792Snp
3920219292Snp/*
3921219292Snp * There are "can_reclaim" tx descriptors ready to be reclaimed.  Reclaim as
3922219292Snp * many as possible but stop when there are around "n" mbufs to free.
3923219292Snp *
3924219292Snp * The actual number reclaimed is provided as the return value.
3925219292Snp */
3926219292Snpstatic int
3927220873Snpreclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
3928219292Snp{
3929219292Snp	struct tx_sdesc *txsd;
3930228561Snp	struct tx_maps *txmaps;
3931219292Snp	struct tx_map *txm;
3932219292Snp	unsigned int reclaimed, maps;
3933220873Snp	struct sge_eq *eq = &txq->eq;
3934218792Snp
3935228561Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3936218792Snp
3937219292Snp	if (can_reclaim == 0)
3938219292Snp		can_reclaim = reclaimable(eq);
3939219292Snp
3940218792Snp	maps = reclaimed = 0;
3941219292Snp	while (can_reclaim && maps < n) {
3942218792Snp		int ndesc;
3943218792Snp
3944220873Snp		txsd = &txq->sdesc[eq->cidx];
3945218792Snp		ndesc = txsd->desc_used;
3946218792Snp
3947218792Snp		/* Firmware doesn't return "partial" credits. */
3948218792Snp		KASSERT(can_reclaim >= ndesc,
3949218792Snp		    ("%s: unexpected number of credits: %d, %d",
3950218792Snp		    __func__, can_reclaim, ndesc));
3951218792Snp
3952220873Snp		maps += txsd->credits;
3953219292Snp
3954218792Snp		reclaimed += ndesc;
3955219292Snp		can_reclaim -= ndesc;
3956218792Snp
3957218792Snp		eq->cidx += ndesc;
3958219292Snp		if (__predict_false(eq->cidx >= eq->cap))
3959218792Snp			eq->cidx -= eq->cap;
3960219292Snp	}
3961218792Snp
3962228561Snp	txmaps = &txq->txmaps;
3963228561Snp	txm = &txmaps->maps[txmaps->map_cidx];
3964219292Snp	if (maps)
3965219292Snp		prefetch(txm->m);
3966218792Snp
3967218792Snp	eq->avail += reclaimed;
3968218792Snp	KASSERT(eq->avail < eq->cap,	/* avail tops out at (cap - 1) */
3969218792Snp	    ("%s: too many descriptors available", __func__));
3970218792Snp
3971228561Snp	txmaps->map_avail += maps;
3972228561Snp	KASSERT(txmaps->map_avail <= txmaps->map_total,
3973218792Snp	    ("%s: too many maps available", __func__));
3974218792Snp
3975218792Snp	while (maps--) {
3976219292Snp		struct tx_map *next;
3977218792Snp
3978219292Snp		next = txm + 1;
3979228561Snp		if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total))
3980228561Snp			next = txmaps->maps;
3981219292Snp		prefetch(next->m);
3982219292Snp
3983220873Snp		bus_dmamap_unload(txq->tx_tag, txm->map);
3984218792Snp		m_freem(txm->m);
3985218792Snp		txm->m = NULL;
3986218792Snp
3987219292Snp		txm = next;
3988228561Snp		if (__predict_false(++txmaps->map_cidx == txmaps->map_total))
3989228561Snp			txmaps->map_cidx = 0;
3990218792Snp	}
3991218792Snp
3992218792Snp	return (reclaimed);
3993218792Snp}
3994218792Snp
3995218792Snpstatic void
3996218792Snpwrite_eqflush_wr(struct sge_eq *eq)
3997218792Snp{
3998218792Snp	struct fw_eq_flush_wr *wr;
3999218792Snp
4000218792Snp	EQ_LOCK_ASSERT_OWNED(eq);
4001218792Snp	KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
4002228561Snp	KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__));
4003218792Snp
4004218792Snp	wr = (void *)&eq->desc[eq->pidx];
4005218792Snp	bzero(wr, sizeof(*wr));
4006218792Snp	wr->opcode = FW_EQ_FLUSH_WR;
4007218792Snp	wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
4008218792Snp	    F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
4009218792Snp
4010228561Snp	eq->flags |= (EQ_CRFLUSHED | EQ_STALLED);
4011218792Snp	eq->pending++;
4012218792Snp	eq->avail--;
4013218792Snp	if (++eq->pidx == eq->cap)
4014265425Snp		eq->pidx = 0;
4015218792Snp}
4016218792Snp
4017218792Snpstatic __be64
4018218792Snpget_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
4019218792Snp{
4020218792Snp	int i = (idx / 3) * 2;
4021218792Snp
4022218792Snp	switch (idx % 3) {
4023218792Snp	case 0: {
4024218792Snp		__be64 rc;
4025218792Snp
4026218792Snp		rc = htobe32(sgl[i].ds_len);
4027218792Snp		if (i + 1 < nsegs)
4028218792Snp			rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32;
4029218792Snp
4030218792Snp		return (rc);
4031218792Snp	}
4032218792Snp	case 1:
4033218792Snp		return htobe64(sgl[i].ds_addr);
4034218792Snp	case 2:
4035218792Snp		return htobe64(sgl[i + 1].ds_addr);
4036218792Snp	}
4037218792Snp
4038218792Snp	return (0);
4039218792Snp}
4040218792Snp
4041218792Snpstatic void
4042265425Snpfind_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp)
4043218792Snp{
4044265425Snp	int8_t zidx, hwidx, idx;
4045265425Snp	uint16_t region1, region3;
4046265425Snp	int spare, spare_needed, n;
4047265425Snp	struct sw_zone_info *swz;
4048265425Snp	struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0];
4049218792Snp
4050265425Snp	/*
4051265425Snp	 * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize
4052265425Snp	 * large enough for the max payload and cluster metadata.  Otherwise
4053265425Snp	 * settle for the largest bufsize that leaves enough room in the cluster
4054265425Snp	 * for metadata.
4055265425Snp	 *
4056265425Snp	 * Without buffer packing: Look for the smallest zone which has a
4057265425Snp	 * bufsize large enough for the max payload.  Settle for the largest
4058265425Snp	 * bufsize available if there's nothing big enough for max payload.
4059265425Snp	 */
4060265425Snp	spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0;
4061265425Snp	swz = &sc->sge.sw_zone_info[0];
4062265425Snp	hwidx = -1;
4063265425Snp	for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) {
4064265425Snp		if (swz->size > largest_rx_cluster) {
4065265425Snp			if (__predict_true(hwidx != -1))
4066265425Snp				break;
4067218792Snp
4068265425Snp			/*
4069265425Snp			 * This is a misconfiguration.  largest_rx_cluster is
4070265425Snp			 * preventing us from finding a refill source.  See
4071265425Snp			 * dev.t5nex.<n>.buffer_sizes to figure out why.
4072265425Snp			 */
4073265425Snp			device_printf(sc->dev, "largest_rx_cluster=%u leaves no"
4074265425Snp			    " refill source for fl %p (dma %u).  Ignored.\n",
4075265425Snp			    largest_rx_cluster, fl, maxp);
4076265425Snp		}
4077265425Snp		for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) {
4078265425Snp			hwb = &hwb_list[idx];
4079265425Snp			spare = swz->size - hwb->size;
4080265425Snp			if (spare < spare_needed)
4081265425Snp				continue;
4082265425Snp
4083265425Snp			hwidx = idx;		/* best option so far */
4084265425Snp			if (hwb->size >= maxp) {
4085265425Snp
4086265425Snp				if ((fl->flags & FL_BUF_PACKING) == 0)
4087265425Snp					goto done; /* stop looking (not packing) */
4088265425Snp
4089265425Snp				if (swz->size >= safest_rx_cluster)
4090265425Snp					goto done; /* stop looking (packing) */
4091265425Snp			}
4092265425Snp			break;		/* keep looking, next zone */
4093265425Snp		}
4094255050Snp	}
4095265425Snpdone:
4096265425Snp	/* A usable hwidx has been located. */
4097265425Snp	MPASS(hwidx != -1);
4098265425Snp	hwb = &hwb_list[hwidx];
4099265425Snp	zidx = hwb->zidx;
4100265425Snp	swz = &sc->sge.sw_zone_info[zidx];
4101265425Snp	region1 = 0;
4102265425Snp	region3 = swz->size - hwb->size;
4103255050Snp
4104265425Snp	/*
4105265425Snp	 * Stay within this zone and see if there is a better match when mbuf
4106265425Snp	 * inlining is allowed.  Remember that the hwidx's are sorted in
4107265425Snp	 * decreasing order of size (so in increasing order of spare area).
4108265425Snp	 */
4109265425Snp	for (idx = hwidx; idx != -1; idx = hwb->next) {
4110265425Snp		hwb = &hwb_list[idx];
4111265425Snp		spare = swz->size - hwb->size;
4112255050Snp
4113265425Snp		if (allow_mbufs_in_cluster == 0 || hwb->size < maxp)
4114265425Snp			break;
4115265425Snp		if (spare < CL_METADATA_SIZE + MSIZE)
4116265425Snp			continue;
4117265425Snp		n = (spare - CL_METADATA_SIZE) / MSIZE;
4118265425Snp		if (n > howmany(hwb->size, maxp))
4119265425Snp			break;
4120255050Snp
4121265425Snp		hwidx = idx;
4122265425Snp		if (fl->flags & FL_BUF_PACKING) {
4123265425Snp			region1 = n * MSIZE;
4124265425Snp			region3 = spare - region1;
4125265425Snp		} else {
4126265425Snp			region1 = MSIZE;
4127265425Snp			region3 = spare - region1;
4128265425Snp			break;
4129255050Snp		}
4130255050Snp	}
4131255050Snp
4132265425Snp	KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES,
4133265425Snp	    ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp));
4134265425Snp	KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES,
4135265425Snp	    ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp));
4136265425Snp	KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 ==
4137265425Snp	    sc->sge.sw_zone_info[zidx].size,
4138265425Snp	    ("%s: bad buffer layout for fl %p, maxp %d. "
4139265425Snp		"cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
4140265425Snp		sc->sge.sw_zone_info[zidx].size, region1,
4141265425Snp		sc->sge.hw_buf_info[hwidx].size, region3));
4142265425Snp	if (fl->flags & FL_BUF_PACKING || region1 > 0) {
4143265425Snp		KASSERT(region3 >= CL_METADATA_SIZE,
4144265425Snp		    ("%s: no room for metadata.  fl %p, maxp %d; "
4145265425Snp		    "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
4146265425Snp		    sc->sge.sw_zone_info[zidx].size, region1,
4147265425Snp		    sc->sge.hw_buf_info[hwidx].size, region3));
4148265425Snp		KASSERT(region1 % MSIZE == 0,
4149265425Snp		    ("%s: bad mbuf region for fl %p, maxp %d. "
4150265425Snp		    "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
4151265425Snp		    sc->sge.sw_zone_info[zidx].size, region1,
4152265425Snp		    sc->sge.hw_buf_info[hwidx].size, region3));
4153265425Snp	}
4154265425Snp
4155265425Snp	fl->cll_def.zidx = zidx;
4156265425Snp	fl->cll_def.hwidx = hwidx;
4157265425Snp	fl->cll_def.region1 = region1;
4158265425Snp	fl->cll_def.region3 = region3;
4159265425Snp}
4160265425Snp
4161265425Snpstatic void
4162265425Snpfind_safe_refill_source(struct adapter *sc, struct sge_fl *fl)
4163265425Snp{
4164265425Snp	struct sge *s = &sc->sge;
4165265425Snp	struct hw_buf_info *hwb;
4166265425Snp	struct sw_zone_info *swz;
4167265425Snp	int spare;
4168265425Snp	int8_t hwidx;
4169265425Snp
4170265425Snp	if (fl->flags & FL_BUF_PACKING)
4171265425Snp		hwidx = s->safe_hwidx2;	/* with room for metadata */
4172265425Snp	else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) {
4173265425Snp		hwidx = s->safe_hwidx2;
4174265425Snp		hwb = &s->hw_buf_info[hwidx];
4175265425Snp		swz = &s->sw_zone_info[hwb->zidx];
4176265425Snp		spare = swz->size - hwb->size;
4177265425Snp
4178265425Snp		/* no good if there isn't room for an mbuf as well */
4179265425Snp		if (spare < CL_METADATA_SIZE + MSIZE)
4180265425Snp			hwidx = s->safe_hwidx1;
4181265425Snp	} else
4182265425Snp		hwidx = s->safe_hwidx1;
4183265425Snp
4184265425Snp	if (hwidx == -1) {
4185265425Snp		/* No fallback source */
4186265425Snp		fl->cll_alt.hwidx = -1;
4187265425Snp		fl->cll_alt.zidx = -1;
4188265425Snp
4189265425Snp		return;
4190265425Snp	}
4191265425Snp
4192265425Snp	hwb = &s->hw_buf_info[hwidx];
4193265425Snp	swz = &s->sw_zone_info[hwb->zidx];
4194265425Snp	spare = swz->size - hwb->size;
4195265425Snp	fl->cll_alt.hwidx = hwidx;
4196265425Snp	fl->cll_alt.zidx = hwb->zidx;
4197265425Snp	if (allow_mbufs_in_cluster)
4198265425Snp		fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE;
4199255050Snp	else
4200265425Snp		fl->cll_alt.region1 = 0;
4201265425Snp	fl->cll_alt.region3 = spare - fl->cll_alt.region1;
4202218792Snp}
4203219286Snp
4204222510Snpstatic void
4205228561Snpadd_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
4206222510Snp{
4207228561Snp	mtx_lock(&sc->sfl_lock);
4208228561Snp	FL_LOCK(fl);
4209228561Snp	if ((fl->flags & FL_DOOMED) == 0) {
4210228561Snp		fl->flags |= FL_STARVING;
4211228561Snp		TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
4212228561Snp		callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc);
4213222510Snp	}
4214228561Snp	FL_UNLOCK(fl);
4215228561Snp	mtx_unlock(&sc->sfl_lock);
4216222510Snp}
4217222510Snp
4218220873Snpstatic int
4219228561Snphandle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
4220228561Snp    struct mbuf *m)
4221220873Snp{
4222228561Snp	const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1);
4223228561Snp	unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
4224228561Snp	struct adapter *sc = iq->adapter;
4225228561Snp	struct sge *s = &sc->sge;
4226228561Snp	struct sge_eq *eq;
4227220873Snp
4228228561Snp	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
4229228561Snp	    rss->opcode));
4230220873Snp
4231228561Snp	eq = s->eqmap[qid - s->eq_start];
4232220873Snp	EQ_LOCK(eq);
4233228561Snp	KASSERT(eq->flags & EQ_CRFLUSHED,
4234228561Snp	    ("%s: unsolicited egress update", __func__));
4235228561Snp	eq->flags &= ~EQ_CRFLUSHED;
4236228561Snp	eq->egr_update++;
4237220873Snp
4238228561Snp	if (__predict_false(eq->flags & EQ_DOOMED))
4239228561Snp		wakeup_one(eq);
4240228561Snp	else if (eq->flags & EQ_STALLED && can_resume_tx(eq))
4241228561Snp		taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
4242228561Snp	EQ_UNLOCK(eq);
4243220873Snp
4244228561Snp	return (0);
4245228561Snp}
4246220873Snp
4247247291Snp/* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */
4248247291SnpCTASSERT(offsetof(struct cpl_fw4_msg, data) == \
4249247291Snp    offsetof(struct cpl_fw6_msg, data));
4250247291Snp
4251228561Snpstatic int
4252239336Snphandle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
4253228561Snp{
4254239336Snp	struct adapter *sc = iq->adapter;
4255228561Snp	const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
4256220873Snp
4257228561Snp	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
4258228561Snp	    rss->opcode));
4259220873Snp
4260247291Snp	if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) {
4261247291Snp		const struct rss_header *rss2;
4262247291Snp
4263247291Snp		rss2 = (const struct rss_header *)&cpl->data[0];
4264247291Snp		return (sc->cpl_handler[rss2->opcode](iq, rss2, m));
4265247291Snp	}
4266247291Snp
4267239336Snp	return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0]));
4268220873Snp}
4269221911Snp
4270221911Snpstatic int
4271222510Snpsysctl_uint16(SYSCTL_HANDLER_ARGS)
4272221911Snp{
4273221911Snp	uint16_t *id = arg1;
4274221911Snp	int i = *id;
4275221911Snp
4276221911Snp	return sysctl_handle_int(oidp, &i, 0, req);
4277221911Snp}
4278265425Snp
4279265425Snpstatic int
4280265425Snpsysctl_bufsizes(SYSCTL_HANDLER_ARGS)
4281265425Snp{
4282265425Snp	struct sge *s = arg1;
4283265425Snp	struct hw_buf_info *hwb = &s->hw_buf_info[0];
4284265425Snp	struct sw_zone_info *swz = &s->sw_zone_info[0];
4285265425Snp	int i, rc;
4286265425Snp	struct sbuf sb;
4287265425Snp	char c;
4288265425Snp
4289265425Snp	sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
4290265425Snp	for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) {
4291265425Snp		if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster)
4292265425Snp			c = '*';
4293265425Snp		else
4294265425Snp			c = '\0';
4295265425Snp
4296265425Snp		sbuf_printf(&sb, "%u%c ", hwb->size, c);
4297265425Snp	}
4298265425Snp	sbuf_trim(&sb);
4299265425Snp	sbuf_finish(&sb);
4300265425Snp	rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
4301265425Snp	sbuf_delete(&sb);
4302265425Snp	return (rc);
4303265425Snp}
4304