t4_sge.c revision 247291
1218792Snp/*-
2218792Snp * Copyright (c) 2011 Chelsio Communications, Inc.
3218792Snp * All rights reserved.
4218792Snp * Written by: Navdeep Parhar <np@FreeBSD.org>
5218792Snp *
6218792Snp * Redistribution and use in source and binary forms, with or without
7218792Snp * modification, are permitted provided that the following conditions
8218792Snp * are met:
9218792Snp * 1. Redistributions of source code must retain the above copyright
10218792Snp *    notice, this list of conditions and the following disclaimer.
11218792Snp * 2. Redistributions in binary form must reproduce the above copyright
12218792Snp *    notice, this list of conditions and the following disclaimer in the
13218792Snp *    documentation and/or other materials provided with the distribution.
14218792Snp *
15218792Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16218792Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17218792Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18218792Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19218792Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20218792Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21218792Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22218792Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23218792Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24218792Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25218792Snp * SUCH DAMAGE.
26218792Snp */
27218792Snp
28218792Snp#include <sys/cdefs.h>
29218792Snp__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_sge.c 247291 2013-02-26 00:27:27Z np $");
30218792Snp
31218792Snp#include "opt_inet.h"
32237819Snp#include "opt_inet6.h"
33218792Snp
34218792Snp#include <sys/types.h>
35218792Snp#include <sys/mbuf.h>
36218792Snp#include <sys/socket.h>
37218792Snp#include <sys/kernel.h>
38237263Snp#include <sys/kdb.h>
39219286Snp#include <sys/malloc.h>
40219286Snp#include <sys/queue.h>
41219286Snp#include <sys/taskqueue.h>
42218792Snp#include <sys/sysctl.h>
43228561Snp#include <sys/smp.h>
44218792Snp#include <net/bpf.h>
45218792Snp#include <net/ethernet.h>
46218792Snp#include <net/if.h>
47218792Snp#include <net/if_vlan_var.h>
48218792Snp#include <netinet/in.h>
49218792Snp#include <netinet/ip.h>
50237819Snp#include <netinet/ip6.h>
51218792Snp#include <netinet/tcp.h>
52218792Snp
53218792Snp#include "common/common.h"
54218792Snp#include "common/t4_regs.h"
55218792Snp#include "common/t4_regs_values.h"
56218792Snp#include "common/t4_msg.h"
57218792Snp
58218792Snpstruct fl_buf_info {
59218792Snp	int size;
60218792Snp	int type;
61218792Snp	uma_zone_t zone;
62218792Snp};
63218792Snp
64219392Snp/* Filled up by t4_sge_modload */
65219392Snpstatic struct fl_buf_info fl_buf_info[FL_BUF_SIZES];
66219392Snp
67218792Snp#define FL_BUF_SIZE(x)	(fl_buf_info[x].size)
68218792Snp#define FL_BUF_TYPE(x)	(fl_buf_info[x].type)
69218792Snp#define FL_BUF_ZONE(x)	(fl_buf_info[x].zone)
70218792Snp
71239258Snp/*
72239258Snp * Ethernet frames are DMA'd at this byte offset into the freelist buffer.
73239258Snp * 0-7 are valid values.
74239258Snp */
75239258Snpstatic int fl_pktshift = 2;
76239258SnpTUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift);
77218792Snp
78239258Snp/*
79239258Snp * Pad ethernet payload up to this boundary.
80239258Snp * -1: driver should figure out a good value.
81239258Snp *  Any power of 2, from 32 to 4096 (both inclusive) is a valid value.
82239258Snp */
83239258Snpstatic int fl_pad = -1;
84239258SnpTUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad);
85218792Snp
86239258Snp/*
87239258Snp * Status page length.
88239258Snp * -1: driver should figure out a good value.
89239258Snp *  64 or 128 are the only other valid values.
90239258Snp */
91239258Snpstatic int spg_len = -1;
92239258SnpTUNABLE_INT("hw.cxgbe.spg_len", &spg_len);
93239258Snp
94239258Snp/*
95239258Snp * Congestion drops.
96239258Snp * -1: no congestion feedback (not recommended).
97239258Snp *  0: backpressure the channel instead of dropping packets right away.
98239258Snp *  1: no backpressure, drop packets for the congested queue immediately.
99239258Snp */
100239258Snpstatic int cong_drop = 0;
101239258SnpTUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop);
102239258Snp
103218792Snp/* Used to track coalesced tx work request */
104218792Snpstruct txpkts {
105218792Snp	uint64_t *flitp;	/* ptr to flit where next pkt should start */
106218792Snp	uint8_t npkt;		/* # of packets in this work request */
107218792Snp	uint8_t nflits;		/* # of flits used by this work request */
108218792Snp	uint16_t plen;		/* total payload (sum of all packets) */
109218792Snp};
110218792Snp
111218792Snp/* A packet's SGL.  This + m_pkthdr has all info needed for tx */
112218792Snpstruct sgl {
113218792Snp	int nsegs;		/* # of segments in the SGL, 0 means imm. tx */
114218792Snp	int nflits;		/* # of flits needed for the SGL */
115218792Snp	bus_dma_segment_t seg[TX_SGL_SEGS];
116218792Snp};
117218792Snp
118228561Snpstatic int service_iq(struct sge_iq *, int);
119228561Snpstatic struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t,
120228561Snp    int *);
121228561Snpstatic int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
122218792Snpstatic inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
123241397Snp    int);
124228561Snpstatic inline void init_fl(struct sge_fl *, int, int, char *);
125228561Snpstatic inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t,
126228561Snp    char *);
127218792Snpstatic int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
128218792Snp    bus_addr_t *, void **);
129218792Snpstatic int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
130218792Snp    void *);
131218792Snpstatic int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
132222085Snp    int, int);
133218792Snpstatic int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
134228561Snpstatic int alloc_fwq(struct adapter *);
135228561Snpstatic int free_fwq(struct adapter *);
136228561Snpstatic int alloc_mgmtq(struct adapter *);
137228561Snpstatic int free_mgmtq(struct adapter *);
138228561Snpstatic int alloc_rxq(struct port_info *, struct sge_rxq *, int, int,
139228561Snp    struct sysctl_oid *);
140218792Snpstatic int free_rxq(struct port_info *, struct sge_rxq *);
141237263Snp#ifdef TCP_OFFLOAD
142228561Snpstatic int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int,
143228561Snp    struct sysctl_oid *);
144228561Snpstatic int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *);
145228561Snp#endif
146228561Snpstatic int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
147228561Snpstatic int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
148237263Snp#ifdef TCP_OFFLOAD
149228561Snpstatic int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
150228561Snp#endif
151228561Snpstatic int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *);
152228561Snpstatic int free_eq(struct adapter *, struct sge_eq *);
153228561Snpstatic int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *,
154228561Snp    struct sysctl_oid *);
155228561Snpstatic int free_wrq(struct adapter *, struct sge_wrq *);
156228561Snpstatic int alloc_txq(struct port_info *, struct sge_txq *, int,
157228561Snp    struct sysctl_oid *);
158218792Snpstatic int free_txq(struct port_info *, struct sge_txq *);
159218792Snpstatic void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
160218792Snpstatic inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **);
161218792Snpstatic inline void iq_next(struct sge_iq *);
162218792Snpstatic inline void ring_fl_db(struct adapter *, struct sge_fl *);
163228561Snpstatic int refill_fl(struct adapter *, struct sge_fl *, int);
164228561Snpstatic void refill_sfl(void *);
165218792Snpstatic int alloc_fl_sdesc(struct sge_fl *);
166218792Snpstatic void free_fl_sdesc(struct sge_fl *);
167218792Snpstatic void set_fl_tag_idx(struct sge_fl *, int);
168228561Snpstatic void add_fl_to_sfl(struct adapter *, struct sge_fl *);
169218792Snp
170218792Snpstatic int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
171218792Snpstatic int free_pkt_sgl(struct sge_txq *, struct sgl *);
172218792Snpstatic int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *,
173218792Snp    struct sgl *);
174218792Snpstatic int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *,
175218792Snp    struct mbuf *, struct sgl *);
176218792Snpstatic void write_txpkts_wr(struct sge_txq *, struct txpkts *);
177218792Snpstatic inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *,
178218792Snp    struct txpkts *, struct mbuf *, struct sgl *);
179218792Snpstatic int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *);
180218792Snpstatic inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
181220873Snpstatic inline void ring_eq_db(struct adapter *, struct sge_eq *);
182219292Snpstatic inline int reclaimable(struct sge_eq *);
183220873Snpstatic int reclaim_tx_descs(struct sge_txq *, int, int);
184218792Snpstatic void write_eqflush_wr(struct sge_eq *);
185218792Snpstatic __be64 get_flit(bus_dma_segment_t *, int, int);
186228561Snpstatic int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
187228561Snp    struct mbuf *);
188239336Snpstatic int handle_fw_msg(struct sge_iq *, const struct rss_header *,
189228561Snp    struct mbuf *);
190218792Snp
191222510Snpstatic int sysctl_uint16(SYSCTL_HANDLER_ARGS);
192220873Snp
193237512Snp#if defined(__i386__) || defined(__amd64__)
194237512Snpextern u_int cpu_clflush_line_size;
195237512Snp#endif
196237512Snp
197219392Snp/*
198239258Snp * Called on MOD_LOAD.  Fills up fl_buf_info[] and validates/calculates the SGE
199239258Snp * tunables.
200219392Snp */
201219392Snpvoid
202219392Snpt4_sge_modload(void)
203219392Snp{
204219392Snp	int i;
205219392Snp	int bufsize[FL_BUF_SIZES] = {
206219392Snp		MCLBYTES,
207219392Snp#if MJUMPAGESIZE != MCLBYTES
208219392Snp		MJUMPAGESIZE,
209219392Snp#endif
210219392Snp		MJUM9BYTES,
211219392Snp		MJUM16BYTES
212219392Snp	};
213219392Snp
214219392Snp	for (i = 0; i < FL_BUF_SIZES; i++) {
215219392Snp		FL_BUF_SIZE(i) = bufsize[i];
216219392Snp		FL_BUF_TYPE(i) = m_gettype(bufsize[i]);
217219392Snp		FL_BUF_ZONE(i) = m_getzone(bufsize[i]);
218219392Snp	}
219237512Snp
220239258Snp	if (fl_pktshift < 0 || fl_pktshift > 7) {
221239258Snp		printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
222239258Snp		    " using 2 instead.\n", fl_pktshift);
223239258Snp		fl_pktshift = 2;
224239258Snp	}
225239258Snp
226239258Snp	if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) {
227239258Snp		int pad;
228239258Snp
229237512Snp#if defined(__i386__) || defined(__amd64__)
230239258Snp		pad = max(cpu_clflush_line_size, 32);
231239258Snp#else
232239258Snp		pad = max(CACHE_LINE_SIZE, 32);
233237512Snp#endif
234239258Snp		pad = min(pad, 4096);
235239258Snp
236239258Snp		if (fl_pad != -1) {
237239258Snp			printf("Invalid hw.cxgbe.fl_pad value (%d),"
238239258Snp			    " using %d instead.\n", fl_pad, pad);
239239258Snp		}
240239258Snp		fl_pad = pad;
241239258Snp	}
242239258Snp
243239258Snp	if (spg_len != 64 && spg_len != 128) {
244239258Snp		int len;
245239258Snp
246239258Snp#if defined(__i386__) || defined(__amd64__)
247239258Snp		len = cpu_clflush_line_size > 64 ? 128 : 64;
248239258Snp#else
249239258Snp		len = 64;
250239258Snp#endif
251239258Snp		if (spg_len != -1) {
252239258Snp			printf("Invalid hw.cxgbe.spg_len value (%d),"
253239258Snp			    " using %d instead.\n", spg_len, len);
254239258Snp		}
255239258Snp		spg_len = len;
256239258Snp	}
257239258Snp
258239258Snp	if (cong_drop < -1 || cong_drop > 1) {
259239258Snp		printf("Invalid hw.cxgbe.cong_drop value (%d),"
260239258Snp		    " using 0 instead.\n", cong_drop);
261239258Snp		cong_drop = 0;
262239258Snp	}
263219392Snp}
264219392Snp
265218792Snp/**
266218792Snp *	t4_sge_init - initialize SGE
267218792Snp *	@sc: the adapter
268218792Snp *
269218792Snp *	Performs SGE initialization needed every time after a chip reset.
270218792Snp *	We do not initialize any of the queues here, instead the driver
271218792Snp *	top-level must request them individually.
272218792Snp */
273228561Snpint
274218792Snpt4_sge_init(struct adapter *sc)
275218792Snp{
276218792Snp	struct sge *s = &sc->sge;
277228561Snp	int i, rc = 0;
278228561Snp	uint32_t ctrl_mask, ctrl_val, hpsize, v;
279218792Snp
280228561Snp	ctrl_mask = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE |
281228561Snp	    V_INGPADBOUNDARY(M_INGPADBOUNDARY) |
282228561Snp	    F_EGRSTATUSPAGESIZE;
283239258Snp	ctrl_val = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
284237512Snp	    V_INGPADBOUNDARY(ilog2(fl_pad) - 5) |
285237512Snp	    V_EGRSTATUSPAGESIZE(spg_len == 128);
286218792Snp
287228561Snp	hpsize = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
288228561Snp	    V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
289228561Snp	    V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
290228561Snp	    V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
291228561Snp	    V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
292228561Snp	    V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
293228561Snp	    V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
294228561Snp	    V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
295228561Snp
296228561Snp	if (sc->flags & MASTER_PF) {
297228561Snp		int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
298228561Snp		int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
299228561Snp
300228561Snp		t4_set_reg_field(sc, A_SGE_CONTROL, ctrl_mask, ctrl_val);
301228561Snp		t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, hpsize);
302228561Snp		for (i = 0; i < FL_BUF_SIZES; i++) {
303228561Snp			t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
304228561Snp			    FL_BUF_SIZE(i));
305228561Snp		}
306228561Snp
307228561Snp		t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD,
308228561Snp		    V_THRESHOLD_0(intr_pktcount[0]) |
309228561Snp		    V_THRESHOLD_1(intr_pktcount[1]) |
310228561Snp		    V_THRESHOLD_2(intr_pktcount[2]) |
311228561Snp		    V_THRESHOLD_3(intr_pktcount[3]));
312228561Snp
313228561Snp		t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1,
314228561Snp		    V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) |
315228561Snp		    V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])));
316228561Snp		t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3,
317228561Snp		    V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) |
318228561Snp		    V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])));
319228561Snp		t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5,
320228561Snp		    V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) |
321228561Snp		    V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])));
322241493Snp
323241493Snp		if (cong_drop == 0) {
324241493Snp			t4_set_reg_field(sc, A_TP_PARA_REG3, F_TUNNELCNGDROP0 |
325241493Snp			    F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
326241493Snp			    F_TUNNELCNGDROP3, 0);
327241493Snp		}
328228561Snp	}
329228561Snp
330228561Snp	v = t4_read_reg(sc, A_SGE_CONTROL);
331228561Snp	if ((v & ctrl_mask) != ctrl_val) {
332228561Snp		device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", v);
333228561Snp		rc = EINVAL;
334228561Snp	}
335228561Snp
336228561Snp	v = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE);
337228561Snp	if (v != hpsize) {
338228561Snp		device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", v);
339228561Snp		rc = EINVAL;
340228561Snp	}
341228561Snp
342218792Snp	for (i = 0; i < FL_BUF_SIZES; i++) {
343228561Snp		v = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
344228561Snp		if (v != FL_BUF_SIZE(i)) {
345228561Snp			device_printf(sc->dev,
346228561Snp			    "invalid SGE_FL_BUFFER_SIZE[%d](0x%x)\n", i, v);
347228561Snp			rc = EINVAL;
348228561Snp		}
349218792Snp	}
350218792Snp
351228561Snp	v = t4_read_reg(sc, A_SGE_CONM_CTRL);
352228561Snp	s->fl_starve_threshold = G_EGRTHRESHOLD(v) * 2 + 1;
353222701Snp
354228561Snp	v = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD);
355228561Snp	sc->sge.counter_val[0] = G_THRESHOLD_0(v);
356228561Snp	sc->sge.counter_val[1] = G_THRESHOLD_1(v);
357228561Snp	sc->sge.counter_val[2] = G_THRESHOLD_2(v);
358228561Snp	sc->sge.counter_val[3] = G_THRESHOLD_3(v);
359218792Snp
360228561Snp	v = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1);
361228561Snp	sc->sge.timer_val[0] = G_TIMERVALUE0(v) / core_ticks_per_usec(sc);
362228561Snp	sc->sge.timer_val[1] = G_TIMERVALUE1(v) / core_ticks_per_usec(sc);
363228561Snp	v = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3);
364228561Snp	sc->sge.timer_val[2] = G_TIMERVALUE2(v) / core_ticks_per_usec(sc);
365228561Snp	sc->sge.timer_val[3] = G_TIMERVALUE3(v) / core_ticks_per_usec(sc);
366228561Snp	v = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5);
367228561Snp	sc->sge.timer_val[4] = G_TIMERVALUE4(v) / core_ticks_per_usec(sc);
368228561Snp	sc->sge.timer_val[5] = G_TIMERVALUE5(v) / core_ticks_per_usec(sc);
369228561Snp
370239336Snp	t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_msg);
371239336Snp	t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_msg);
372228561Snp	t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
373228561Snp	t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
374228561Snp
375239336Snp	t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
376239336Snp
377228561Snp	return (rc);
378218792Snp}
379218792Snp
380218792Snpint
381218792Snpt4_create_dma_tag(struct adapter *sc)
382218792Snp{
383218792Snp	int rc;
384218792Snp
385218792Snp	rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
386218792Snp	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
387218792Snp	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
388218792Snp	    NULL, &sc->dmat);
389218792Snp	if (rc != 0) {
390218792Snp		device_printf(sc->dev,
391218792Snp		    "failed to create main DMA tag: %d\n", rc);
392218792Snp	}
393218792Snp
394218792Snp	return (rc);
395218792Snp}
396218792Snp
397218792Snpint
398218792Snpt4_destroy_dma_tag(struct adapter *sc)
399218792Snp{
400218792Snp	if (sc->dmat)
401218792Snp		bus_dma_tag_destroy(sc->dmat);
402218792Snp
403218792Snp	return (0);
404218792Snp}
405218792Snp
406218792Snp/*
407228561Snp * Allocate and initialize the firmware event queue and the management queue.
408218792Snp *
409218792Snp * Returns errno on failure.  Resources allocated up to that point may still be
410218792Snp * allocated.  Caller is responsible for cleanup in case this function fails.
411218792Snp */
412218792Snpint
413220873Snpt4_setup_adapter_queues(struct adapter *sc)
414218792Snp{
415228561Snp	int rc;
416218792Snp
417218792Snp	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
418218792Snp
419228561Snp	sysctl_ctx_init(&sc->ctx);
420228561Snp	sc->flags |= ADAP_SYSCTL_CTX;
421220873Snp
422222510Snp	/*
423222510Snp	 * Firmware event queue
424222510Snp	 */
425228561Snp	rc = alloc_fwq(sc);
426241398Snp	if (rc != 0)
427220873Snp		return (rc);
428218792Snp
429220873Snp	/*
430228561Snp	 * Management queue.  This is just a control queue that uses the fwq as
431228561Snp	 * its associated iq.
432220873Snp	 */
433228561Snp	rc = alloc_mgmtq(sc);
434220873Snp
435218792Snp	return (rc);
436218792Snp}
437218792Snp
438218792Snp/*
439218792Snp * Idempotent
440218792Snp */
441218792Snpint
442220873Snpt4_teardown_adapter_queues(struct adapter *sc)
443218792Snp{
444218792Snp
445218792Snp	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
446218792Snp
447228561Snp	/* Do this before freeing the queue */
448228561Snp	if (sc->flags & ADAP_SYSCTL_CTX) {
449220873Snp		sysctl_ctx_free(&sc->ctx);
450228561Snp		sc->flags &= ~ADAP_SYSCTL_CTX;
451220873Snp	}
452220873Snp
453228561Snp	free_mgmtq(sc);
454228561Snp	free_fwq(sc);
455220873Snp
456228561Snp	return (0);
457228561Snp}
458222510Snp
459228561Snpstatic inline int
460228561Snpfirst_vector(struct port_info *pi)
461228561Snp{
462228561Snp	struct adapter *sc = pi->adapter;
463228561Snp	int rc = T4_EXTRA_INTR, i;
464228561Snp
465228561Snp	if (sc->intr_count == 1)
466228561Snp		return (0);
467228561Snp
468228561Snp	for_each_port(sc, i) {
469238313Snp		struct port_info *p = sc->port[i];
470238313Snp
471228561Snp		if (i == pi->port_id)
472228561Snp			break;
473228561Snp
474237263Snp#ifdef TCP_OFFLOAD
475228561Snp		if (sc->flags & INTR_DIRECT)
476238313Snp			rc += p->nrxq + p->nofldrxq;
477228561Snp		else
478238313Snp			rc += max(p->nrxq, p->nofldrxq);
479228561Snp#else
480228561Snp		/*
481228561Snp		 * Not compiled with offload support and intr_count > 1.  Only
482228561Snp		 * NIC queues exist and they'd better be taking direct
483228561Snp		 * interrupts.
484228561Snp		 */
485228561Snp		KASSERT(sc->flags & INTR_DIRECT,
486228561Snp		    ("%s: intr_count %d, !INTR_DIRECT", __func__,
487228561Snp		    sc->intr_count));
488228561Snp
489238313Snp		rc += p->nrxq;
490228561Snp#endif
491218792Snp	}
492218792Snp
493228561Snp	return (rc);
494218792Snp}
495218792Snp
496228561Snp/*
497228561Snp * Given an arbitrary "index," come up with an iq that can be used by other
498228561Snp * queues (of this port) for interrupt forwarding, SGE egress updates, etc.
499228561Snp * The iq returned is guaranteed to be something that takes direct interrupts.
500228561Snp */
501228561Snpstatic struct sge_iq *
502228561Snpport_intr_iq(struct port_info *pi, int idx)
503228561Snp{
504228561Snp	struct adapter *sc = pi->adapter;
505228561Snp	struct sge *s = &sc->sge;
506228561Snp	struct sge_iq *iq = NULL;
507228561Snp
508228561Snp	if (sc->intr_count == 1)
509228561Snp		return (&sc->sge.fwq);
510228561Snp
511237263Snp#ifdef TCP_OFFLOAD
512228561Snp	if (sc->flags & INTR_DIRECT) {
513228561Snp		idx %= pi->nrxq + pi->nofldrxq;
514228561Snp
515228561Snp		if (idx >= pi->nrxq) {
516228561Snp			idx -= pi->nrxq;
517228561Snp			iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
518228561Snp		} else
519228561Snp			iq = &s->rxq[pi->first_rxq + idx].iq;
520228561Snp
521228561Snp	} else {
522228561Snp		idx %= max(pi->nrxq, pi->nofldrxq);
523228561Snp
524228561Snp		if (pi->nrxq >= pi->nofldrxq)
525228561Snp			iq = &s->rxq[pi->first_rxq + idx].iq;
526228561Snp		else
527228561Snp			iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
528228561Snp	}
529228561Snp#else
530228561Snp	/*
531228561Snp	 * Not compiled with offload support and intr_count > 1.  Only NIC
532228561Snp	 * queues exist and they'd better be taking direct interrupts.
533228561Snp	 */
534228561Snp	KASSERT(sc->flags & INTR_DIRECT,
535228561Snp	    ("%s: intr_count %d, !INTR_DIRECT", __func__, sc->intr_count));
536228561Snp
537228561Snp	idx %= pi->nrxq;
538228561Snp	iq = &s->rxq[pi->first_rxq + idx].iq;
539228561Snp#endif
540228561Snp
541228561Snp	KASSERT(iq->flags & IQ_INTR, ("%s: EDOOFUS", __func__));
542228561Snp	return (iq);
543228561Snp}
544228561Snp
545239266Snpstatic inline int
546239266Snpmtu_to_bufsize(int mtu)
547239266Snp{
548239266Snp	int bufsize;
549239266Snp
550239266Snp	/* large enough for a frame even when VLAN extraction is disabled */
551239266Snp	bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu;
552239266Snp	bufsize = roundup(bufsize + fl_pktshift, fl_pad);
553239266Snp
554239266Snp	return (bufsize);
555239266Snp}
556239266Snp
557218792Snpint
558228561Snpt4_setup_port_queues(struct port_info *pi)
559218792Snp{
560228561Snp	int rc = 0, i, j, intr_idx, iqid;
561218792Snp	struct sge_rxq *rxq;
562218792Snp	struct sge_txq *txq;
563228561Snp	struct sge_wrq *ctrlq;
564237263Snp#ifdef TCP_OFFLOAD
565228561Snp	struct sge_ofld_rxq *ofld_rxq;
566228561Snp	struct sge_wrq *ofld_txq;
567237263Snp	struct sysctl_oid *oid2 = NULL;
568228561Snp#endif
569218792Snp	char name[16];
570218792Snp	struct adapter *sc = pi->adapter;
571237263Snp	struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
572228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
573239266Snp	int bufsize = mtu_to_bufsize(pi->ifp->if_mtu);
574218792Snp
575228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
576228561Snp	    NULL, "rx queues");
577218792Snp
578237263Snp#ifdef TCP_OFFLOAD
579228561Snp	if (is_offload(sc)) {
580228561Snp		oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq",
581228561Snp		    CTLFLAG_RD, NULL,
582228561Snp		    "rx queues for offloaded TCP connections");
583218792Snp	}
584228561Snp#endif
585218792Snp
586228561Snp	/* Interrupt vector to start from (when using multiple vectors) */
587228561Snp	intr_idx = first_vector(pi);
588228561Snp
589228561Snp	/*
590228561Snp	 * First pass over all rx queues (NIC and TOE):
591228561Snp	 * a) initialize iq and fl
592228561Snp	 * b) allocate queue iff it will take direct interrupts.
593228561Snp	 */
594218792Snp	for_each_rxq(pi, i, rxq) {
595218792Snp
596228561Snp		init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq,
597241397Snp		    RX_IQ_ESIZE);
598218792Snp
599218792Snp		snprintf(name, sizeof(name), "%s rxq%d-fl",
600218792Snp		    device_get_nameunit(pi->dev), i);
601239266Snp		init_fl(&rxq->fl, pi->qsize_rxq / 8, bufsize, name);
602218792Snp
603228561Snp		if (sc->flags & INTR_DIRECT
604237263Snp#ifdef TCP_OFFLOAD
605228561Snp		    || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq)
606228561Snp#endif
607228561Snp		   ) {
608228561Snp			rxq->iq.flags |= IQ_INTR;
609228561Snp			rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
610228561Snp			if (rc != 0)
611228561Snp				goto done;
612228561Snp			intr_idx++;
613228561Snp		}
614228561Snp	}
615218792Snp
616237263Snp#ifdef TCP_OFFLOAD
617228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
618228561Snp
619228561Snp		init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
620241397Snp		    pi->qsize_rxq, RX_IQ_ESIZE);
621228561Snp
622228561Snp		snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
623228561Snp		    device_get_nameunit(pi->dev), i);
624239341Snp		init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, OFLD_BUF_SIZE, name);
625228561Snp
626228561Snp		if (sc->flags & INTR_DIRECT ||
627228561Snp		    (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
628228561Snp			ofld_rxq->iq.flags |= IQ_INTR;
629228561Snp			rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
630228561Snp			if (rc != 0)
631228561Snp				goto done;
632228561Snp			intr_idx++;
633228561Snp		}
634228561Snp	}
635228561Snp#endif
636228561Snp
637228561Snp	/*
638228561Snp	 * Second pass over all rx queues (NIC and TOE).  The queues forwarding
639228561Snp	 * their interrupts are allocated now.
640228561Snp	 */
641228561Snp	j = 0;
642228561Snp	for_each_rxq(pi, i, rxq) {
643228561Snp		if (rxq->iq.flags & IQ_INTR)
644228561Snp			continue;
645228561Snp
646228561Snp		intr_idx = port_intr_iq(pi, j)->abs_id;
647228561Snp
648228561Snp		rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
649218792Snp		if (rc != 0)
650218792Snp			goto done;
651228561Snp		j++;
652218792Snp	}
653218792Snp
654237263Snp#ifdef TCP_OFFLOAD
655228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
656228561Snp		if (ofld_rxq->iq.flags & IQ_INTR)
657228561Snp			continue;
658228561Snp
659228561Snp		intr_idx = port_intr_iq(pi, j)->abs_id;
660228561Snp
661228561Snp		rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
662228561Snp		if (rc != 0)
663228561Snp			goto done;
664228561Snp		j++;
665228561Snp	}
666228561Snp#endif
667228561Snp
668228561Snp	/*
669228561Snp	 * Now the tx queues.  Only one pass needed.
670228561Snp	 */
671228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD,
672228561Snp	    NULL, "tx queues");
673228561Snp	j = 0;
674218792Snp	for_each_txq(pi, i, txq) {
675228561Snp		uint16_t iqid;
676218792Snp
677228561Snp		iqid = port_intr_iq(pi, j)->cntxt_id;
678228561Snp
679218792Snp		snprintf(name, sizeof(name), "%s txq%d",
680218792Snp		    device_get_nameunit(pi->dev), i);
681228561Snp		init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid,
682228561Snp		    name);
683218792Snp
684228561Snp		rc = alloc_txq(pi, txq, i, oid);
685218792Snp		if (rc != 0)
686218792Snp			goto done;
687228561Snp		j++;
688218792Snp	}
689218792Snp
690237263Snp#ifdef TCP_OFFLOAD
691228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq",
692228561Snp	    CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections");
693228561Snp	for_each_ofld_txq(pi, i, ofld_txq) {
694228561Snp		uint16_t iqid;
695228561Snp
696228561Snp		iqid = port_intr_iq(pi, j)->cntxt_id;
697228561Snp
698228561Snp		snprintf(name, sizeof(name), "%s ofld_txq%d",
699228561Snp		    device_get_nameunit(pi->dev), i);
700228561Snp		init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan,
701228561Snp		    iqid, name);
702228561Snp
703228561Snp		snprintf(name, sizeof(name), "%d", i);
704228561Snp		oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
705228561Snp		    name, CTLFLAG_RD, NULL, "offload tx queue");
706228561Snp
707228561Snp		rc = alloc_wrq(sc, pi, ofld_txq, oid2);
708228561Snp		if (rc != 0)
709228561Snp			goto done;
710228561Snp		j++;
711228561Snp	}
712228561Snp#endif
713228561Snp
714228561Snp	/*
715228561Snp	 * Finally, the control queue.
716228561Snp	 */
717228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD,
718228561Snp	    NULL, "ctrl queue");
719228561Snp	ctrlq = &sc->sge.ctrlq[pi->port_id];
720228561Snp	iqid = port_intr_iq(pi, 0)->cntxt_id;
721228561Snp	snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev));
722228561Snp	init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name);
723228561Snp	rc = alloc_wrq(sc, pi, ctrlq, oid);
724228561Snp
725218792Snpdone:
726218792Snp	if (rc)
727228561Snp		t4_teardown_port_queues(pi);
728218792Snp
729218792Snp	return (rc);
730218792Snp}
731218792Snp
732218792Snp/*
733218792Snp * Idempotent
734218792Snp */
735218792Snpint
736228561Snpt4_teardown_port_queues(struct port_info *pi)
737218792Snp{
738218792Snp	int i;
739228561Snp	struct adapter *sc = pi->adapter;
740218792Snp	struct sge_rxq *rxq;
741218792Snp	struct sge_txq *txq;
742237263Snp#ifdef TCP_OFFLOAD
743228561Snp	struct sge_ofld_rxq *ofld_rxq;
744228561Snp	struct sge_wrq *ofld_txq;
745228561Snp#endif
746218792Snp
747218792Snp	/* Do this before freeing the queues */
748228561Snp	if (pi->flags & PORT_SYSCTL_CTX) {
749218792Snp		sysctl_ctx_free(&pi->ctx);
750228561Snp		pi->flags &= ~PORT_SYSCTL_CTX;
751218792Snp	}
752218792Snp
753228561Snp	/*
754228561Snp	 * Take down all the tx queues first, as they reference the rx queues
755228561Snp	 * (for egress updates, etc.).
756228561Snp	 */
757228561Snp
758228561Snp	free_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
759228561Snp
760218792Snp	for_each_txq(pi, i, txq) {
761218792Snp		free_txq(pi, txq);
762218792Snp	}
763218792Snp
764237263Snp#ifdef TCP_OFFLOAD
765228561Snp	for_each_ofld_txq(pi, i, ofld_txq) {
766228561Snp		free_wrq(sc, ofld_txq);
767228561Snp	}
768228561Snp#endif
769228561Snp
770228561Snp	/*
771228561Snp	 * Then take down the rx queues that forward their interrupts, as they
772228561Snp	 * reference other rx queues.
773228561Snp	 */
774228561Snp
775218792Snp	for_each_rxq(pi, i, rxq) {
776228561Snp		if ((rxq->iq.flags & IQ_INTR) == 0)
777228561Snp			free_rxq(pi, rxq);
778218792Snp	}
779218792Snp
780237263Snp#ifdef TCP_OFFLOAD
781228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
782228561Snp		if ((ofld_rxq->iq.flags & IQ_INTR) == 0)
783228561Snp			free_ofld_rxq(pi, ofld_rxq);
784228561Snp	}
785228561Snp#endif
786228561Snp
787228561Snp	/*
788228561Snp	 * Then take down the rx queues that take direct interrupts.
789228561Snp	 */
790228561Snp
791228561Snp	for_each_rxq(pi, i, rxq) {
792228561Snp		if (rxq->iq.flags & IQ_INTR)
793228561Snp			free_rxq(pi, rxq);
794228561Snp	}
795228561Snp
796237263Snp#ifdef TCP_OFFLOAD
797228561Snp	for_each_ofld_rxq(pi, i, ofld_rxq) {
798228561Snp		if (ofld_rxq->iq.flags & IQ_INTR)
799228561Snp			free_ofld_rxq(pi, ofld_rxq);
800228561Snp	}
801228561Snp#endif
802228561Snp
803218792Snp	return (0);
804218792Snp}
805218792Snp
806228561Snp/*
807228561Snp * Deals with errors and the firmware event queue.  All data rx queues forward
808228561Snp * their interrupt to the firmware event queue.
809228561Snp */
810218792Snpvoid
811218792Snpt4_intr_all(void *arg)
812218792Snp{
813218792Snp	struct adapter *sc = arg;
814228561Snp	struct sge_iq *fwq = &sc->sge.fwq;
815218792Snp
816218792Snp	t4_intr_err(arg);
817228561Snp	if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) {
818228561Snp		service_iq(fwq, 0);
819228561Snp		atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE);
820218792Snp	}
821218792Snp}
822218792Snp
823218792Snp/* Deals with error interrupts */
824218792Snpvoid
825218792Snpt4_intr_err(void *arg)
826218792Snp{
827218792Snp	struct adapter *sc = arg;
828218792Snp
829222510Snp	t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
830218792Snp	t4_slow_intr_handler(sc);
831218792Snp}
832218792Snp
833218792Snpvoid
834218792Snpt4_intr_evt(void *arg)
835218792Snp{
836218792Snp	struct sge_iq *iq = arg;
837220649Snp
838228561Snp	if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
839228561Snp		service_iq(iq, 0);
840228561Snp		atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
841222510Snp	}
842220649Snp}
843220649Snp
844228561Snpvoid
845228561Snpt4_intr(void *arg)
846220649Snp{
847220649Snp	struct sge_iq *iq = arg;
848228561Snp
849228561Snp	if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
850228561Snp		service_iq(iq, 0);
851228561Snp		atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
852228561Snp	}
853228561Snp}
854228561Snp
855228561Snp/*
856228561Snp * Deals with anything and everything on the given ingress queue.
857228561Snp */
858228561Snpstatic int
859228561Snpservice_iq(struct sge_iq *iq, int budget)
860228561Snp{
861228561Snp	struct sge_iq *q;
862237263Snp	struct sge_rxq *rxq = iq_to_rxq(iq);	/* Use iff iq is part of rxq */
863228561Snp	struct sge_fl *fl = &rxq->fl;		/* Use iff IQ_HAS_FL */
864218792Snp	struct adapter *sc = iq->adapter;
865218792Snp	struct rsp_ctrl *ctrl;
866228561Snp	const struct rss_header *rss;
867228561Snp	int ndescs = 0, limit, fl_bufs_used = 0;
868228561Snp	int rsp_type;
869228561Snp	uint32_t lq;
870228561Snp	struct mbuf *m0;
871228561Snp	STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
872218792Snp
873228561Snp	limit = budget ? budget : iq->qsize / 8;
874218792Snp
875228561Snp	KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
876218792Snp
877228561Snp	/*
878228561Snp	 * We always come back and check the descriptor ring for new indirect
879228561Snp	 * interrupts and other responses after running a single handler.
880228561Snp	 */
881228561Snp	for (;;) {
882228561Snp		while (is_new_response(iq, &ctrl)) {
883218792Snp
884228561Snp			rmb();
885218792Snp
886228561Snp			m0 = NULL;
887228561Snp			rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
888228561Snp			lq = be32toh(ctrl->pldbuflen_qid);
889228561Snp			rss = (const void *)iq->cdesc;
890218792Snp
891228561Snp			switch (rsp_type) {
892228561Snp			case X_RSPD_TYPE_FLBUF:
893228561Snp
894228561Snp				KASSERT(iq->flags & IQ_HAS_FL,
895228561Snp				    ("%s: data for an iq (%p) with no freelist",
896228561Snp				    __func__, iq));
897228561Snp
898228561Snp				m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used);
899228561Snp#ifdef T4_PKT_TIMESTAMP
900228561Snp				/*
901228561Snp				 * 60 bit timestamp for the payload is
902228561Snp				 * *(uint64_t *)m0->m_pktdat.  Note that it is
903228561Snp				 * in the leading free-space in the mbuf.  The
904228561Snp				 * kernel can clobber it during a pullup,
905228561Snp				 * m_copymdata, etc.  You need to make sure that
906228561Snp				 * the mbuf reaches you unmolested if you care
907228561Snp				 * about the timestamp.
908228561Snp				 */
909228561Snp				*(uint64_t *)m0->m_pktdat =
910228561Snp				    be64toh(ctrl->u.last_flit) &
911228561Snp				    0xfffffffffffffff;
912228561Snp#endif
913228561Snp
914228561Snp				/* fall through */
915228561Snp
916228561Snp			case X_RSPD_TYPE_CPL:
917228561Snp				KASSERT(rss->opcode < NUM_CPL_CMDS,
918228561Snp				    ("%s: bad opcode %02x.", __func__,
919228561Snp				    rss->opcode));
920228561Snp				sc->cpl_handler[rss->opcode](iq, rss, m0);
921228561Snp				break;
922228561Snp
923228561Snp			case X_RSPD_TYPE_INTR:
924228561Snp
925228561Snp				/*
926228561Snp				 * Interrupts should be forwarded only to queues
927228561Snp				 * that are not forwarding their interrupts.
928228561Snp				 * This means service_iq can recurse but only 1
929228561Snp				 * level deep.
930228561Snp				 */
931228561Snp				KASSERT(budget == 0,
932228561Snp				    ("%s: budget %u, rsp_type %u", __func__,
933228561Snp				    budget, rsp_type));
934228561Snp
935228561Snp				q = sc->sge.iqmap[lq - sc->sge.iq_start];
936228561Snp				if (atomic_cmpset_int(&q->state, IQS_IDLE,
937228561Snp				    IQS_BUSY)) {
938228561Snp					if (service_iq(q, q->qsize / 8) == 0) {
939228561Snp						atomic_cmpset_int(&q->state,
940228561Snp						    IQS_BUSY, IQS_IDLE);
941228561Snp					} else {
942228561Snp						STAILQ_INSERT_TAIL(&iql, q,
943228561Snp						    link);
944228561Snp					}
945228561Snp				}
946228561Snp				break;
947228561Snp
948228561Snp			default:
949237263Snp				sc->an_handler(iq, ctrl);
950237263Snp				break;
951228561Snp			}
952228561Snp
953228561Snp			iq_next(iq);
954228561Snp			if (++ndescs == limit) {
955228561Snp				t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
956228561Snp				    V_CIDXINC(ndescs) |
957228561Snp				    V_INGRESSQID(iq->cntxt_id) |
958228561Snp				    V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
959228561Snp				ndescs = 0;
960228561Snp
961228561Snp				if (fl_bufs_used > 0) {
962228561Snp					FL_LOCK(fl);
963228561Snp					fl->needed += fl_bufs_used;
964228561Snp					refill_fl(sc, fl, fl->cap / 8);
965228561Snp					FL_UNLOCK(fl);
966228561Snp					fl_bufs_used = 0;
967228561Snp				}
968228561Snp
969228561Snp				if (budget)
970228561Snp					return (EINPROGRESS);
971228561Snp			}
972218792Snp		}
973222510Snp
974228561Snp		if (STAILQ_EMPTY(&iql))
975228561Snp			break;
976228561Snp
977228561Snp		/*
978228561Snp		 * Process the head only, and send it to the back of the list if
979228561Snp		 * it's still not done.
980228561Snp		 */
981228561Snp		q = STAILQ_FIRST(&iql);
982228561Snp		STAILQ_REMOVE_HEAD(&iql, link);
983228561Snp		if (service_iq(q, q->qsize / 8) == 0)
984228561Snp			atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE);
985228561Snp		else
986228561Snp			STAILQ_INSERT_TAIL(&iql, q, link);
987218792Snp	}
988218792Snp
989237819Snp#if defined(INET) || defined(INET6)
990228561Snp	if (iq->flags & IQ_LRO_ENABLED) {
991228561Snp		struct lro_ctrl *lro = &rxq->lro;
992228561Snp		struct lro_entry *l;
993228561Snp
994228561Snp		while (!SLIST_EMPTY(&lro->lro_active)) {
995228561Snp			l = SLIST_FIRST(&lro->lro_active);
996228561Snp			SLIST_REMOVE_HEAD(&lro->lro_active, next);
997228561Snp			tcp_lro_flush(lro, l);
998228561Snp		}
999228561Snp	}
1000228561Snp#endif
1001228561Snp
1002228561Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
1003228561Snp	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
1004228561Snp
1005228561Snp	if (iq->flags & IQ_HAS_FL) {
1006228561Snp		int starved;
1007228561Snp
1008228561Snp		FL_LOCK(fl);
1009228561Snp		fl->needed += fl_bufs_used;
1010228561Snp		starved = refill_fl(sc, fl, fl->cap / 4);
1011228561Snp		FL_UNLOCK(fl);
1012228561Snp		if (__predict_false(starved != 0))
1013228561Snp			add_fl_to_sfl(sc, fl);
1014228561Snp	}
1015228561Snp
1016228561Snp	return (0);
1017218792Snp}
1018218792Snp
1019228561Snpstatic struct mbuf *
1020228561Snpget_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
1021228561Snp    int *fl_bufs_used)
1022218792Snp{
1023218792Snp	struct mbuf *m0, *m;
1024228561Snp	struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
1025228561Snp	unsigned int nbuf, len;
1026218792Snp
1027228561Snp	/*
1028228561Snp	 * No assertion for the fl lock because we don't need it.  This routine
1029228561Snp	 * is called only from the rx interrupt handler and it only updates
1030228561Snp	 * fl->cidx.  (Contrast that with fl->pidx/fl->needed which could be
1031228561Snp	 * updated in the rx interrupt handler or the starvation helper routine.
1032228561Snp	 * That's why code that manipulates fl->pidx/fl->needed needs the fl
1033228561Snp	 * lock but this routine does not).
1034228561Snp	 */
1035219290Snp
1036228561Snp	if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0))
1037228561Snp		panic("%s: cannot handle packed frames", __func__);
1038228561Snp	len = G_RSPD_LEN(len_newbuf);
1039218792Snp
1040228561Snp	m0 = sd->m;
1041228561Snp	sd->m = NULL;	/* consumed */
1042218792Snp
1043228561Snp	bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD);
1044228561Snp	m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR);
1045228561Snp#ifdef T4_PKT_TIMESTAMP
1046228561Snp	/* Leave room for a timestamp */
1047228561Snp	m0->m_data += 8;
1048228561Snp#endif
1049218792Snp
1050228561Snp	if (len < RX_COPY_THRESHOLD) {
1051228561Snp		/* copy data to mbuf, buffer will be recycled */
1052228561Snp		bcopy(sd->cl, mtod(m0, caddr_t), len);
1053228561Snp		m0->m_len = len;
1054228561Snp	} else {
1055228561Snp		bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
1056228561Snp		m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx));
1057228561Snp		sd->cl = NULL;	/* consumed */
1058228561Snp		m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
1059228561Snp	}
1060228561Snp	m0->m_pkthdr.len = len;
1061218792Snp
1062228561Snp	sd++;
1063228561Snp	if (__predict_false(++fl->cidx == fl->cap)) {
1064228561Snp		sd = fl->sdesc;
1065228561Snp		fl->cidx = 0;
1066228561Snp	}
1067218792Snp
1068228561Snp	m = m0;
1069228561Snp	len -= m->m_len;
1070228561Snp	nbuf = 1;	/* # of fl buffers used */
1071219290Snp
1072228561Snp	while (len > 0) {
1073228561Snp		m->m_next = sd->m;
1074219290Snp		sd->m = NULL;	/* consumed */
1075228561Snp		m = m->m_next;
1076219290Snp
1077219290Snp		bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
1078219290Snp		    BUS_DMASYNC_POSTREAD);
1079218792Snp
1080228561Snp		m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
1081228561Snp		if (len <= MLEN) {
1082228561Snp			bcopy(sd->cl, mtod(m, caddr_t), len);
1083228561Snp			m->m_len = len;
1084219290Snp		} else {
1085228561Snp			bus_dmamap_unload(fl->tag[sd->tag_idx],
1086228561Snp			    sd->map);
1087228561Snp			m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx));
1088219290Snp			sd->cl = NULL;	/* consumed */
1089228561Snp			m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
1090218792Snp		}
1091218792Snp
1092228561Snp		sd++;
1093228561Snp		if (__predict_false(++fl->cidx == fl->cap)) {
1094228561Snp			sd = fl->sdesc;
1095228561Snp			fl->cidx = 0;
1096218792Snp		}
1097218792Snp
1098228561Snp		len -= m->m_len;
1099228561Snp		nbuf++;
1100228561Snp	}
1101218792Snp
1102228561Snp	(*fl_bufs_used) += nbuf;
1103219290Snp
1104228561Snp	return (m0);
1105228561Snp}
1106218792Snp
1107228561Snpstatic int
1108228561Snpt4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
1109228561Snp{
1110237463Snp	struct sge_rxq *rxq = iq_to_rxq(iq);
1111228561Snp	struct ifnet *ifp = rxq->ifp;
1112228561Snp	const struct cpl_rx_pkt *cpl = (const void *)(rss + 1);
1113237819Snp#if defined(INET) || defined(INET6)
1114228561Snp	struct lro_ctrl *lro = &rxq->lro;
1115228561Snp#endif
1116219290Snp
1117228561Snp	KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__,
1118228561Snp	    rss->opcode));
1119219290Snp
1120239258Snp	m0->m_pkthdr.len -= fl_pktshift;
1121239258Snp	m0->m_len -= fl_pktshift;
1122239258Snp	m0->m_data += fl_pktshift;
1123219290Snp
1124228561Snp	m0->m_pkthdr.rcvif = ifp;
1125228561Snp	m0->m_flags |= M_FLOWID;
1126228561Snp	m0->m_pkthdr.flowid = rss->hash_val;
1127219290Snp
1128237799Snp	if (cpl->csum_calc && !cpl->err_vec) {
1129237799Snp		if (ifp->if_capenable & IFCAP_RXCSUM &&
1130237799Snp		    cpl->l2info & htobe32(F_RXF_IP)) {
1131237831Snp			m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
1132237799Snp			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1133237799Snp			rxq->rxcsum++;
1134237799Snp		} else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
1135237799Snp		    cpl->l2info & htobe32(F_RXF_IP6)) {
1136237831Snp			m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
1137237799Snp			    CSUM_PSEUDO_HDR);
1138237799Snp			rxq->rxcsum++;
1139237799Snp		}
1140237799Snp
1141237799Snp		if (__predict_false(cpl->ip_frag))
1142228561Snp			m0->m_pkthdr.csum_data = be16toh(cpl->csum);
1143228561Snp		else
1144228561Snp			m0->m_pkthdr.csum_data = 0xffff;
1145228561Snp	}
1146219290Snp
1147228561Snp	if (cpl->vlan_ex) {
1148228561Snp		m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
1149228561Snp		m0->m_flags |= M_VLANTAG;
1150228561Snp		rxq->vlan_extraction++;
1151228561Snp	}
1152219290Snp
1153237819Snp#if defined(INET) || defined(INET6)
1154228561Snp	if (cpl->l2info & htobe32(F_RXF_LRO) &&
1155228561Snp	    iq->flags & IQ_LRO_ENABLED &&
1156228561Snp	    tcp_lro_rx(lro, m0, 0) == 0) {
1157228561Snp		/* queued for LRO */
1158228561Snp	} else
1159218792Snp#endif
1160228561Snp	ifp->if_input(ifp, m0);
1161218792Snp
1162228561Snp	return (0);
1163228561Snp}
1164218792Snp
1165228561Snp/*
1166228561Snp * Doesn't fail.  Holds on to work requests it can't send right away.
1167228561Snp */
1168237263Snpvoid
1169237263Snpt4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr)
1170228561Snp{
1171228561Snp	struct sge_eq *eq = &wrq->eq;
1172228561Snp	int can_reclaim;
1173228561Snp	caddr_t dst;
1174228561Snp
1175228561Snp	TXQ_LOCK_ASSERT_OWNED(wrq);
1176237263Snp#ifdef TCP_OFFLOAD
1177228561Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD ||
1178228561Snp	    (eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1179228561Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1180237263Snp#else
1181237263Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1182237263Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1183237263Snp#endif
1184228561Snp
1185237263Snp	if (__predict_true(wr != NULL))
1186237263Snp		STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
1187218792Snp
1188228561Snp	can_reclaim = reclaimable(eq);
1189228561Snp	if (__predict_false(eq->flags & EQ_STALLED)) {
1190228561Snp		if (can_reclaim < tx_resume_threshold(eq))
1191237263Snp			return;
1192228561Snp		eq->flags &= ~EQ_STALLED;
1193228561Snp		eq->unstalled++;
1194218792Snp	}
1195228561Snp	eq->cidx += can_reclaim;
1196228561Snp	eq->avail += can_reclaim;
1197228561Snp	if (__predict_false(eq->cidx >= eq->cap))
1198228561Snp		eq->cidx -= eq->cap;
1199228561Snp
1200237263Snp	while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) {
1201228561Snp		int ndesc;
1202228561Snp
1203237263Snp		if (__predict_false(wr->wr_len < 0 ||
1204237263Snp		    wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) {
1205228561Snp
1206228561Snp#ifdef INVARIANTS
1207237263Snp			panic("%s: work request with length %d", __func__,
1208237263Snp			    wr->wr_len);
1209237263Snp#endif
1210237263Snp#ifdef KDB
1211237263Snp			kdb_backtrace();
1212237263Snp#endif
1213237263Snp			log(LOG_ERR, "%s: %s work request with length %d",
1214237263Snp			    device_get_nameunit(sc->dev), __func__, wr->wr_len);
1215237263Snp			STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1216237263Snp			free_wrqe(wr);
1217228561Snp			continue;
1218228561Snp		}
1219218792Snp
1220237263Snp		ndesc = howmany(wr->wr_len, EQ_ESIZE);
1221228561Snp		if (eq->avail < ndesc) {
1222228561Snp			wrq->no_desc++;
1223228561Snp			break;
1224228561Snp		}
1225218792Snp
1226228561Snp		dst = (void *)&eq->desc[eq->pidx];
1227237263Snp		copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len);
1228218792Snp
1229228561Snp		eq->pidx += ndesc;
1230228561Snp		eq->avail -= ndesc;
1231228561Snp		if (__predict_false(eq->pidx >= eq->cap))
1232228561Snp			eq->pidx -= eq->cap;
1233228561Snp
1234228561Snp		eq->pending += ndesc;
1235228561Snp		if (eq->pending > 16)
1236228561Snp			ring_eq_db(sc, eq);
1237228561Snp
1238228561Snp		wrq->tx_wrs++;
1239237263Snp		STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1240237263Snp		free_wrqe(wr);
1241228561Snp
1242228561Snp		if (eq->avail < 8) {
1243228561Snp			can_reclaim = reclaimable(eq);
1244228561Snp			eq->cidx += can_reclaim;
1245228561Snp			eq->avail += can_reclaim;
1246228561Snp			if (__predict_false(eq->cidx >= eq->cap))
1247228561Snp				eq->cidx -= eq->cap;
1248228561Snp		}
1249228561Snp	}
1250228561Snp
1251228561Snp	if (eq->pending)
1252228561Snp		ring_eq_db(sc, eq);
1253228561Snp
1254237263Snp	if (wr != NULL) {
1255228561Snp		eq->flags |= EQ_STALLED;
1256228561Snp		if (callout_pending(&eq->tx_callout) == 0)
1257228561Snp			callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1258228561Snp	}
1259220873Snp}
1260220873Snp
1261218792Snp/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
1262218792Snp#define TXPKTS_PKT_HDR ((\
1263218792Snp    sizeof(struct ulp_txpkt) + \
1264218792Snp    sizeof(struct ulptx_idata) + \
1265218792Snp    sizeof(struct cpl_tx_pkt_core) \
1266218792Snp    ) / 8)
1267218792Snp
1268218792Snp/* Header of a coalesced tx WR, before SGL of first packet (in flits) */
1269218792Snp#define TXPKTS_WR_HDR (\
1270218792Snp    sizeof(struct fw_eth_tx_pkts_wr) / 8 + \
1271218792Snp    TXPKTS_PKT_HDR)
1272218792Snp
1273218792Snp/* Header of a tx WR, before SGL of first packet (in flits) */
1274218792Snp#define TXPKT_WR_HDR ((\
1275218792Snp    sizeof(struct fw_eth_tx_pkt_wr) + \
1276218792Snp    sizeof(struct cpl_tx_pkt_core) \
1277218792Snp    ) / 8 )
1278218792Snp
1279218792Snp/* Header of a tx LSO WR, before SGL of first packet (in flits) */
1280218792Snp#define TXPKT_LSO_WR_HDR ((\
1281218792Snp    sizeof(struct fw_eth_tx_pkt_wr) + \
1282237436Snp    sizeof(struct cpl_tx_pkt_lso_core) + \
1283218792Snp    sizeof(struct cpl_tx_pkt_core) \
1284218792Snp    ) / 8 )
1285218792Snp
1286218792Snpint
1287218792Snpt4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
1288218792Snp{
1289218792Snp	struct port_info *pi = (void *)ifp->if_softc;
1290218792Snp	struct adapter *sc = pi->adapter;
1291218792Snp	struct sge_eq *eq = &txq->eq;
1292220873Snp	struct buf_ring *br = txq->br;
1293218792Snp	struct mbuf *next;
1294219292Snp	int rc, coalescing, can_reclaim;
1295218792Snp	struct txpkts txpkts;
1296218792Snp	struct sgl sgl;
1297218792Snp
1298218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
1299218792Snp	KASSERT(m, ("%s: called with nothing to do.", __func__));
1300228561Snp	KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH,
1301228561Snp	    ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1302218792Snp
1303219292Snp	prefetch(&eq->desc[eq->pidx]);
1304220873Snp	prefetch(&txq->sdesc[eq->pidx]);
1305219292Snp
1306218792Snp	txpkts.npkt = 0;/* indicates there's nothing in txpkts */
1307218792Snp	coalescing = 0;
1308218792Snp
1309228561Snp	can_reclaim = reclaimable(eq);
1310228561Snp	if (__predict_false(eq->flags & EQ_STALLED)) {
1311228561Snp		if (can_reclaim < tx_resume_threshold(eq)) {
1312228561Snp			txq->m = m;
1313228561Snp			return (0);
1314228561Snp		}
1315228561Snp		eq->flags &= ~EQ_STALLED;
1316228561Snp		eq->unstalled++;
1317228561Snp	}
1318218792Snp
1319228561Snp	if (__predict_false(eq->flags & EQ_DOOMED)) {
1320228561Snp		m_freem(m);
1321228561Snp		while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
1322228561Snp			m_freem(m);
1323228561Snp		return (ENETDOWN);
1324228561Snp	}
1325228561Snp
1326228561Snp	if (eq->avail < 8 && can_reclaim)
1327228561Snp		reclaim_tx_descs(txq, can_reclaim, 32);
1328228561Snp
1329218792Snp	for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
1330218792Snp
1331218792Snp		if (eq->avail < 8)
1332218792Snp			break;
1333218792Snp
1334218792Snp		next = m->m_nextpkt;
1335218792Snp		m->m_nextpkt = NULL;
1336218792Snp
1337218792Snp		if (next || buf_ring_peek(br))
1338218792Snp			coalescing = 1;
1339218792Snp
1340218792Snp		rc = get_pkt_sgl(txq, &m, &sgl, coalescing);
1341218792Snp		if (rc != 0) {
1342218792Snp			if (rc == ENOMEM) {
1343218792Snp
1344218792Snp				/* Short of resources, suspend tx */
1345218792Snp
1346218792Snp				m->m_nextpkt = next;
1347218792Snp				break;
1348218792Snp			}
1349218792Snp
1350218792Snp			/*
1351218792Snp			 * Unrecoverable error for this packet, throw it away
1352218792Snp			 * and move on to the next.  get_pkt_sgl may already
1353218792Snp			 * have freed m (it will be NULL in that case and the
1354218792Snp			 * m_freem here is still safe).
1355218792Snp			 */
1356218792Snp
1357218792Snp			m_freem(m);
1358218792Snp			continue;
1359218792Snp		}
1360218792Snp
1361218792Snp		if (coalescing &&
1362218792Snp		    add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) {
1363218792Snp
1364218792Snp			/* Successfully absorbed into txpkts */
1365218792Snp
1366218792Snp			write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl);
1367218792Snp			goto doorbell;
1368218792Snp		}
1369218792Snp
1370218792Snp		/*
1371218792Snp		 * We weren't coalescing to begin with, or current frame could
1372218792Snp		 * not be coalesced (add_to_txpkts flushes txpkts if a frame
1373218792Snp		 * given to it can't be coalesced).  Either way there should be
1374218792Snp		 * nothing in txpkts.
1375218792Snp		 */
1376218792Snp		KASSERT(txpkts.npkt == 0,
1377218792Snp		    ("%s: txpkts not empty: %d", __func__, txpkts.npkt));
1378218792Snp
1379218792Snp		/* We're sending out individual packets now */
1380218792Snp		coalescing = 0;
1381218792Snp
1382218792Snp		if (eq->avail < 8)
1383220873Snp			reclaim_tx_descs(txq, 0, 8);
1384218792Snp		rc = write_txpkt_wr(pi, txq, m, &sgl);
1385218792Snp		if (rc != 0) {
1386218792Snp
1387218792Snp			/* Short of hardware descriptors, suspend tx */
1388218792Snp
1389218792Snp			/*
1390218792Snp			 * This is an unlikely but expensive failure.  We've
1391218792Snp			 * done all the hard work (DMA mappings etc.) and now we
1392218792Snp			 * can't send out the packet.  What's worse, we have to
1393218792Snp			 * spend even more time freeing up everything in sgl.
1394218792Snp			 */
1395218792Snp			txq->no_desc++;
1396218792Snp			free_pkt_sgl(txq, &sgl);
1397218792Snp
1398218792Snp			m->m_nextpkt = next;
1399218792Snp			break;
1400218792Snp		}
1401218792Snp
1402218792Snp		ETHER_BPF_MTAP(ifp, m);
1403218792Snp		if (sgl.nsegs == 0)
1404218792Snp			m_freem(m);
1405218792Snpdoorbell:
1406228561Snp		if (eq->pending >= 64)
1407220873Snp		    ring_eq_db(sc, eq);
1408219292Snp
1409219292Snp		can_reclaim = reclaimable(eq);
1410219292Snp		if (can_reclaim >= 32)
1411228561Snp			reclaim_tx_descs(txq, can_reclaim, 64);
1412218792Snp	}
1413218792Snp
1414218792Snp	if (txpkts.npkt > 0)
1415218792Snp		write_txpkts_wr(txq, &txpkts);
1416218792Snp
1417218792Snp	/*
1418218792Snp	 * m not NULL means there was an error but we haven't thrown it away.
1419218792Snp	 * This can happen when we're short of tx descriptors (no_desc) or maybe
1420218792Snp	 * even DMA maps (no_dmamap).  Either way, a credit flush and reclaim
1421218792Snp	 * will get things going again.
1422218792Snp	 */
1423228561Snp	if (m && !(eq->flags & EQ_CRFLUSHED)) {
1424220873Snp		struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
1425220873Snp
1426228561Snp		/*
1427228561Snp		 * If EQ_CRFLUSHED is not set then we know we have at least one
1428228561Snp		 * available descriptor because any WR that reduces eq->avail to
1429228561Snp		 * 0 also sets EQ_CRFLUSHED.
1430228561Snp		 */
1431228561Snp		KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__));
1432228561Snp
1433220873Snp		txsd->desc_used = 1;
1434220873Snp		txsd->credits = 0;
1435218792Snp		write_eqflush_wr(eq);
1436220873Snp	}
1437218792Snp	txq->m = m;
1438218792Snp
1439218792Snp	if (eq->pending)
1440220873Snp		ring_eq_db(sc, eq);
1441218792Snp
1442228561Snp	reclaim_tx_descs(txq, 0, 128);
1443218792Snp
1444228561Snp	if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0)
1445228561Snp		callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1446228561Snp
1447218792Snp	return (0);
1448218792Snp}
1449218792Snp
1450218792Snpvoid
1451218792Snpt4_update_fl_bufsize(struct ifnet *ifp)
1452218792Snp{
1453218792Snp	struct port_info *pi = ifp->if_softc;
1454218792Snp	struct sge_rxq *rxq;
1455218792Snp	struct sge_fl *fl;
1456239266Snp	int i, bufsize = mtu_to_bufsize(ifp->if_mtu);
1457218792Snp
1458218792Snp	for_each_rxq(pi, i, rxq) {
1459218792Snp		fl = &rxq->fl;
1460218792Snp
1461218792Snp		FL_LOCK(fl);
1462228561Snp		set_fl_tag_idx(fl, bufsize);
1463218792Snp		FL_UNLOCK(fl);
1464218792Snp	}
1465218792Snp}
1466218792Snp
1467228561Snpint
1468228561Snpcan_resume_tx(struct sge_eq *eq)
1469228561Snp{
1470228561Snp	return (reclaimable(eq) >= tx_resume_threshold(eq));
1471228561Snp}
1472228561Snp
1473218792Snpstatic inline void
1474218792Snpinit_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
1475241397Snp    int qsize, int esize)
1476218792Snp{
1477218792Snp	KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
1478218792Snp	    ("%s: bad tmr_idx %d", __func__, tmr_idx));
1479218792Snp	KASSERT(pktc_idx < SGE_NCOUNTERS,	/* -ve is ok, means don't use */
1480218792Snp	    ("%s: bad pktc_idx %d", __func__, pktc_idx));
1481218792Snp
1482218792Snp	iq->flags = 0;
1483218792Snp	iq->adapter = sc;
1484234833Snp	iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx);
1485234833Snp	iq->intr_pktc_idx = SGE_NCOUNTERS - 1;
1486234833Snp	if (pktc_idx >= 0) {
1487234833Snp		iq->intr_params |= F_QINTR_CNT_EN;
1488234833Snp		iq->intr_pktc_idx = pktc_idx;
1489234833Snp	}
1490218792Snp	iq->qsize = roundup(qsize, 16);		/* See FW_IQ_CMD/iqsize */
1491218792Snp	iq->esize = max(esize, 16);		/* See FW_IQ_CMD/iqesize */
1492218792Snp}
1493218792Snp
1494218792Snpstatic inline void
1495228561Snpinit_fl(struct sge_fl *fl, int qsize, int bufsize, char *name)
1496218792Snp{
1497218792Snp	fl->qsize = qsize;
1498218792Snp	strlcpy(fl->lockname, name, sizeof(fl->lockname));
1499228561Snp	set_fl_tag_idx(fl, bufsize);
1500218792Snp}
1501218792Snp
1502218792Snpstatic inline void
1503228561Snpinit_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan,
1504228561Snp    uint16_t iqid, char *name)
1505218792Snp{
1506228561Snp	KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan));
1507228561Snp	KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype));
1508228561Snp
1509228561Snp	eq->flags = eqtype & EQ_TYPEMASK;
1510228561Snp	eq->tx_chan = tx_chan;
1511228561Snp	eq->iqid = iqid;
1512220873Snp	eq->qsize = qsize;
1513220873Snp	strlcpy(eq->lockname, name, sizeof(eq->lockname));
1514228561Snp
1515228561Snp	TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq);
1516228561Snp	callout_init(&eq->tx_callout, CALLOUT_MPSAFE);
1517218792Snp}
1518218792Snp
1519218792Snpstatic int
1520218792Snpalloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
1521218792Snp    bus_dmamap_t *map, bus_addr_t *pa, void **va)
1522218792Snp{
1523218792Snp	int rc;
1524218792Snp
1525218792Snp	rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
1526218792Snp	    BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
1527218792Snp	if (rc != 0) {
1528218792Snp		device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc);
1529218792Snp		goto done;
1530218792Snp	}
1531218792Snp
1532218792Snp	rc = bus_dmamem_alloc(*tag, va,
1533218792Snp	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
1534218792Snp	if (rc != 0) {
1535218792Snp		device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc);
1536218792Snp		goto done;
1537218792Snp	}
1538218792Snp
1539218792Snp	rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
1540218792Snp	if (rc != 0) {
1541218792Snp		device_printf(sc->dev, "cannot load DMA map: %d\n", rc);
1542218792Snp		goto done;
1543218792Snp	}
1544218792Snpdone:
1545218792Snp	if (rc)
1546218792Snp		free_ring(sc, *tag, *map, *pa, *va);
1547218792Snp
1548218792Snp	return (rc);
1549218792Snp}
1550218792Snp
1551218792Snpstatic int
1552218792Snpfree_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
1553218792Snp    bus_addr_t pa, void *va)
1554218792Snp{
1555218792Snp	if (pa)
1556218792Snp		bus_dmamap_unload(tag, map);
1557218792Snp	if (va)
1558218792Snp		bus_dmamem_free(tag, va, map);
1559218792Snp	if (tag)
1560218792Snp		bus_dma_tag_destroy(tag);
1561218792Snp
1562218792Snp	return (0);
1563218792Snp}
1564218792Snp
1565218792Snp/*
1566218792Snp * Allocates the ring for an ingress queue and an optional freelist.  If the
1567218792Snp * freelist is specified it will be allocated and then associated with the
1568218792Snp * ingress queue.
1569218792Snp *
1570218792Snp * Returns errno on failure.  Resources allocated up to that point may still be
1571218792Snp * allocated.  Caller is responsible for cleanup in case this function fails.
1572218792Snp *
1573228561Snp * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then
1574218792Snp * the intr_idx specifies the vector, starting from 0.  Otherwise it specifies
1575228561Snp * the abs_id of the ingress queue to which its interrupts should be forwarded.
1576218792Snp */
1577218792Snpstatic int
1578218792Snpalloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
1579222085Snp    int intr_idx, int cong)
1580218792Snp{
1581218792Snp	int rc, i, cntxt_id;
1582218792Snp	size_t len;
1583218792Snp	struct fw_iq_cmd c;
1584218792Snp	struct adapter *sc = iq->adapter;
1585218792Snp	__be32 v = 0;
1586218792Snp
1587218792Snp	len = iq->qsize * iq->esize;
1588218792Snp	rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
1589218792Snp	    (void **)&iq->desc);
1590218792Snp	if (rc != 0)
1591218792Snp		return (rc);
1592218792Snp
1593218792Snp	bzero(&c, sizeof(c));
1594218792Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
1595218792Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
1596218792Snp	    V_FW_IQ_CMD_VFN(0));
1597218792Snp
1598218792Snp	c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
1599218792Snp	    FW_LEN16(c));
1600218792Snp
1601218792Snp	/* Special handling for firmware event queue */
1602218792Snp	if (iq == &sc->sge.fwq)
1603218792Snp		v |= F_FW_IQ_CMD_IQASYNCH;
1604218792Snp
1605228561Snp	if (iq->flags & IQ_INTR) {
1606218792Snp		KASSERT(intr_idx < sc->intr_count,
1607218792Snp		    ("%s: invalid direct intr_idx %d", __func__, intr_idx));
1608228561Snp	} else
1609228561Snp		v |= F_FW_IQ_CMD_IQANDST;
1610228561Snp	v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
1611218792Snp
1612218792Snp	c.type_to_iqandstindex = htobe32(v |
1613218792Snp	    V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
1614218792Snp	    V_FW_IQ_CMD_VIID(pi->viid) |
1615218792Snp	    V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
1616218792Snp	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
1617218792Snp	    F_FW_IQ_CMD_IQGTSMODE |
1618218792Snp	    V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
1619218792Snp	    V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
1620218792Snp	c.iqsize = htobe16(iq->qsize);
1621218792Snp	c.iqaddr = htobe64(iq->ba);
1622222085Snp	if (cong >= 0)
1623222085Snp		c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
1624218792Snp
1625218792Snp	if (fl) {
1626218792Snp		mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
1627218792Snp
1628218792Snp		for (i = 0; i < FL_BUF_SIZES; i++) {
1629218792Snp
1630218792Snp			/*
1631218792Snp			 * A freelist buffer must be 16 byte aligned as the SGE
1632218792Snp			 * uses the low 4 bits of the bus addr to figure out the
1633218792Snp			 * buffer size.
1634218792Snp			 */
1635218792Snp			rc = bus_dma_tag_create(sc->dmat, 16, 0,
1636218792Snp			    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1637218792Snp			    FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW,
1638218792Snp			    NULL, NULL, &fl->tag[i]);
1639218792Snp			if (rc != 0) {
1640218792Snp				device_printf(sc->dev,
1641218792Snp				    "failed to create fl DMA tag[%d]: %d\n",
1642218792Snp				    i, rc);
1643218792Snp				return (rc);
1644218792Snp			}
1645218792Snp		}
1646218792Snp		len = fl->qsize * RX_FL_ESIZE;
1647218792Snp		rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
1648218792Snp		    &fl->ba, (void **)&fl->desc);
1649218792Snp		if (rc)
1650218792Snp			return (rc);
1651218792Snp
1652218792Snp		/* Allocate space for one software descriptor per buffer. */
1653237512Snp		fl->cap = (fl->qsize - spg_len / RX_FL_ESIZE) * 8;
1654218792Snp		FL_LOCK(fl);
1655218792Snp		rc = alloc_fl_sdesc(fl);
1656218792Snp		FL_UNLOCK(fl);
1657218792Snp		if (rc != 0) {
1658218792Snp			device_printf(sc->dev,
1659218792Snp			    "failed to setup fl software descriptors: %d\n",
1660218792Snp			    rc);
1661218792Snp			return (rc);
1662218792Snp		}
1663220905Snp		fl->needed = fl->cap;
1664228561Snp		fl->lowat = roundup(sc->sge.fl_starve_threshold, 8);
1665218792Snp
1666228491Snp		c.iqns_to_fl0congen |=
1667222085Snp		    htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
1668222085Snp			F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
1669222085Snp			F_FW_IQ_CMD_FL0PADEN);
1670222085Snp		if (cong >= 0) {
1671222085Snp			c.iqns_to_fl0congen |=
1672222085Snp				htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
1673222085Snp				    F_FW_IQ_CMD_FL0CONGCIF |
1674222085Snp				    F_FW_IQ_CMD_FL0CONGEN);
1675222085Snp		}
1676218792Snp		c.fl0dcaen_to_fl0cidxfthresh =
1677218792Snp		    htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
1678218792Snp			V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
1679218792Snp		c.fl0size = htobe16(fl->qsize);
1680218792Snp		c.fl0addr = htobe64(fl->ba);
1681218792Snp	}
1682218792Snp
1683218792Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1684218792Snp	if (rc != 0) {
1685218792Snp		device_printf(sc->dev,
1686218792Snp		    "failed to create ingress queue: %d\n", rc);
1687218792Snp		return (rc);
1688218792Snp	}
1689218792Snp
1690218792Snp	iq->cdesc = iq->desc;
1691218792Snp	iq->cidx = 0;
1692218792Snp	iq->gen = 1;
1693218792Snp	iq->intr_next = iq->intr_params;
1694218792Snp	iq->cntxt_id = be16toh(c.iqid);
1695218792Snp	iq->abs_id = be16toh(c.physiqid);
1696228561Snp	iq->flags |= IQ_ALLOCATED;
1697218792Snp
1698218792Snp	cntxt_id = iq->cntxt_id - sc->sge.iq_start;
1699228561Snp	if (cntxt_id >= sc->sge.niq) {
1700228561Snp		panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
1701228561Snp		    cntxt_id, sc->sge.niq - 1);
1702228561Snp	}
1703218792Snp	sc->sge.iqmap[cntxt_id] = iq;
1704218792Snp
1705218792Snp	if (fl) {
1706218792Snp		fl->cntxt_id = be16toh(c.fl0id);
1707218792Snp		fl->pidx = fl->cidx = 0;
1708218792Snp
1709219883Snp		cntxt_id = fl->cntxt_id - sc->sge.eq_start;
1710228561Snp		if (cntxt_id >= sc->sge.neq) {
1711228561Snp			panic("%s: fl->cntxt_id (%d) more than the max (%d)",
1712228561Snp			    __func__, cntxt_id, sc->sge.neq - 1);
1713228561Snp		}
1714218792Snp		sc->sge.eqmap[cntxt_id] = (void *)fl;
1715218792Snp
1716218792Snp		FL_LOCK(fl);
1717228561Snp		/* Enough to make sure the SGE doesn't think it's starved */
1718228561Snp		refill_fl(sc, fl, fl->lowat);
1719218792Snp		FL_UNLOCK(fl);
1720228561Snp
1721228561Snp		iq->flags |= IQ_HAS_FL;
1722218792Snp	}
1723218792Snp
1724218792Snp	/* Enable IQ interrupts */
1725228561Snp	atomic_store_rel_int(&iq->state, IQS_IDLE);
1726218792Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
1727218792Snp	    V_INGRESSQID(iq->cntxt_id));
1728218792Snp
1729218792Snp	return (0);
1730218792Snp}
1731218792Snp
1732218792Snpstatic int
1733218792Snpfree_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
1734218792Snp{
1735218792Snp	int i, rc;
1736218792Snp	struct adapter *sc = iq->adapter;
1737218792Snp	device_t dev;
1738218792Snp
1739218792Snp	if (sc == NULL)
1740218792Snp		return (0);	/* nothing to do */
1741218792Snp
1742218792Snp	dev = pi ? pi->dev : sc->dev;
1743218792Snp
1744218792Snp	if (iq->flags & IQ_ALLOCATED) {
1745218792Snp		rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
1746218792Snp		    FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
1747218792Snp		    fl ? fl->cntxt_id : 0xffff, 0xffff);
1748218792Snp		if (rc != 0) {
1749218792Snp			device_printf(dev,
1750218792Snp			    "failed to free queue %p: %d\n", iq, rc);
1751218792Snp			return (rc);
1752218792Snp		}
1753218792Snp		iq->flags &= ~IQ_ALLOCATED;
1754218792Snp	}
1755218792Snp
1756218792Snp	free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
1757218792Snp
1758218792Snp	bzero(iq, sizeof(*iq));
1759218792Snp
1760218792Snp	if (fl) {
1761218792Snp		free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba,
1762218792Snp		    fl->desc);
1763218792Snp
1764218792Snp		if (fl->sdesc) {
1765218792Snp			FL_LOCK(fl);
1766218792Snp			free_fl_sdesc(fl);
1767218792Snp			FL_UNLOCK(fl);
1768218792Snp		}
1769218792Snp
1770218792Snp		if (mtx_initialized(&fl->fl_lock))
1771218792Snp			mtx_destroy(&fl->fl_lock);
1772218792Snp
1773218792Snp		for (i = 0; i < FL_BUF_SIZES; i++) {
1774218792Snp			if (fl->tag[i])
1775218792Snp				bus_dma_tag_destroy(fl->tag[i]);
1776218792Snp		}
1777218792Snp
1778218792Snp		bzero(fl, sizeof(*fl));
1779218792Snp	}
1780218792Snp
1781218792Snp	return (0);
1782218792Snp}
1783218792Snp
1784218792Snpstatic int
1785228561Snpalloc_fwq(struct adapter *sc)
1786218792Snp{
1787228561Snp	int rc, intr_idx;
1788228561Snp	struct sge_iq *fwq = &sc->sge.fwq;
1789228561Snp	struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
1790228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
1791222510Snp
1792241397Snp	init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE);
1793228561Snp	fwq->flags |= IQ_INTR;	/* always */
1794228561Snp	intr_idx = sc->intr_count > 1 ? 1 : 0;
1795228561Snp	rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
1796228561Snp	if (rc != 0) {
1797228561Snp		device_printf(sc->dev,
1798228561Snp		    "failed to create firmware event queue: %d\n", rc);
1799222510Snp		return (rc);
1800228561Snp	}
1801222510Snp
1802228561Snp	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD,
1803228561Snp	    NULL, "firmware event queue");
1804222510Snp	children = SYSCTL_CHILDREN(oid);
1805222510Snp
1806228561Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id",
1807228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I",
1808228561Snp	    "absolute id of the queue");
1809228561Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id",
1810228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I",
1811228561Snp	    "SGE context id of the queue");
1812222510Snp	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
1813228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I",
1814222510Snp	    "consumer index");
1815222510Snp
1816228561Snp	return (0);
1817218792Snp}
1818218792Snp
1819218792Snpstatic int
1820228561Snpfree_fwq(struct adapter *sc)
1821218792Snp{
1822228561Snp	return free_iq_fl(NULL, &sc->sge.fwq, NULL);
1823218792Snp}
1824218792Snp
1825218792Snpstatic int
1826228561Snpalloc_mgmtq(struct adapter *sc)
1827222510Snp{
1828222510Snp	int rc;
1829228561Snp	struct sge_wrq *mgmtq = &sc->sge.mgmtq;
1830228561Snp	char name[16];
1831228561Snp	struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
1832228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
1833222510Snp
1834228561Snp	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD,
1835228561Snp	    NULL, "management queue");
1836228561Snp
1837228561Snp	snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev));
1838228561Snp	init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan,
1839228561Snp	    sc->sge.fwq.cntxt_id, name);
1840228561Snp	rc = alloc_wrq(sc, NULL, mgmtq, oid);
1841228561Snp	if (rc != 0) {
1842228561Snp		device_printf(sc->dev,
1843228561Snp		    "failed to create management queue: %d\n", rc);
1844222510Snp		return (rc);
1845228561Snp	}
1846222510Snp
1847228561Snp	return (0);
1848222510Snp}
1849222510Snp
1850222510Snpstatic int
1851228561Snpfree_mgmtq(struct adapter *sc)
1852222510Snp{
1853237263Snp
1854228561Snp	return free_wrq(sc, &sc->sge.mgmtq);
1855222510Snp}
1856222510Snp
1857239258Snpstatic inline int
1858239258Snptnl_cong(struct port_info *pi)
1859239258Snp{
1860239258Snp
1861239258Snp	if (cong_drop == -1)
1862239258Snp		return (-1);
1863239258Snp	else if (cong_drop == 1)
1864239258Snp		return (0);
1865239258Snp	else
1866239258Snp		return (1 << pi->tx_chan);
1867239258Snp}
1868239258Snp
1869222510Snpstatic int
1870228561Snpalloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx,
1871228561Snp    struct sysctl_oid *oid)
1872218792Snp{
1873218792Snp	int rc;
1874218792Snp	struct sysctl_oid_list *children;
1875218792Snp	char name[16];
1876218792Snp
1877239258Snp	rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(pi));
1878218792Snp	if (rc != 0)
1879218792Snp		return (rc);
1880218792Snp
1881222701Snp	FL_LOCK(&rxq->fl);
1882228561Snp	refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8);
1883222701Snp	FL_UNLOCK(&rxq->fl);
1884222701Snp
1885237819Snp#if defined(INET) || defined(INET6)
1886218792Snp	rc = tcp_lro_init(&rxq->lro);
1887218792Snp	if (rc != 0)
1888218792Snp		return (rc);
1889218792Snp	rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */
1890218792Snp
1891218792Snp	if (pi->ifp->if_capenable & IFCAP_LRO)
1892228561Snp		rxq->iq.flags |= IQ_LRO_ENABLED;
1893218792Snp#endif
1894219289Snp	rxq->ifp = pi->ifp;
1895218792Snp
1896228561Snp	children = SYSCTL_CHILDREN(oid);
1897218792Snp
1898218792Snp	snprintf(name, sizeof(name), "%d", idx);
1899218792Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1900218792Snp	    NULL, "rx queue");
1901218792Snp	children = SYSCTL_CHILDREN(oid);
1902218792Snp
1903221911Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
1904222510Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I",
1905221911Snp	    "absolute id of the queue");
1906222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
1907222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I",
1908222973Snp	    "SGE context id of the queue");
1909222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
1910222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I",
1911222973Snp	    "consumer index");
1912237819Snp#if defined(INET) || defined(INET6)
1913218792Snp	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
1914218792Snp	    &rxq->lro.lro_queued, 0, NULL);
1915218792Snp	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
1916218792Snp	    &rxq->lro.lro_flushed, 0, NULL);
1917219290Snp#endif
1918218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
1919218792Snp	    &rxq->rxcsum, "# of times hardware assisted with checksum");
1920218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction",
1921218792Snp	    CTLFLAG_RD, &rxq->vlan_extraction,
1922218792Snp	    "# of times hardware extracted 802.1Q tag");
1923218792Snp
1924222973Snp	children = SYSCTL_CHILDREN(oid);
1925222973Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
1926222973Snp	    NULL, "freelist");
1927222973Snp	children = SYSCTL_CHILDREN(oid);
1928222973Snp
1929222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
1930222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &rxq->fl.cntxt_id, 0, sysctl_uint16, "I",
1931222973Snp	    "SGE context id of the queue");
1932222973Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
1933222973Snp	    &rxq->fl.cidx, 0, "consumer index");
1934222973Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
1935222973Snp	    &rxq->fl.pidx, 0, "producer index");
1936222973Snp
1937218792Snp	return (rc);
1938218792Snp}
1939218792Snp
1940218792Snpstatic int
1941218792Snpfree_rxq(struct port_info *pi, struct sge_rxq *rxq)
1942218792Snp{
1943218792Snp	int rc;
1944218792Snp
1945237819Snp#if defined(INET) || defined(INET6)
1946218792Snp	if (rxq->lro.ifp) {
1947218792Snp		tcp_lro_free(&rxq->lro);
1948218792Snp		rxq->lro.ifp = NULL;
1949218792Snp	}
1950218792Snp#endif
1951218792Snp
1952218792Snp	rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
1953218792Snp	if (rc == 0)
1954218792Snp		bzero(rxq, sizeof(*rxq));
1955218792Snp
1956218792Snp	return (rc);
1957218792Snp}
1958218792Snp
1959237263Snp#ifdef TCP_OFFLOAD
1960218792Snpstatic int
1961228561Snpalloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
1962228561Snp    int intr_idx, int idx, struct sysctl_oid *oid)
1963220873Snp{
1964228561Snp	int rc;
1965228561Snp	struct sysctl_oid_list *children;
1966220873Snp	char name[16];
1967220873Snp
1968228561Snp	rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx,
1969228561Snp	    1 << pi->tx_chan);
1970228561Snp	if (rc != 0)
1971220873Snp		return (rc);
1972220873Snp
1973228561Snp	children = SYSCTL_CHILDREN(oid);
1974220873Snp
1975228561Snp	snprintf(name, sizeof(name), "%d", idx);
1976228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1977228561Snp	    NULL, "rx queue");
1978228561Snp	children = SYSCTL_CHILDREN(oid);
1979228561Snp
1980228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
1981228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16,
1982228561Snp	    "I", "absolute id of the queue");
1983228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
1984228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16,
1985228561Snp	    "I", "SGE context id of the queue");
1986228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
1987228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I",
1988228561Snp	    "consumer index");
1989228561Snp
1990228561Snp	children = SYSCTL_CHILDREN(oid);
1991228561Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
1992228561Snp	    NULL, "freelist");
1993228561Snp	children = SYSCTL_CHILDREN(oid);
1994228561Snp
1995228561Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
1996228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->fl.cntxt_id, 0, sysctl_uint16,
1997228561Snp	    "I", "SGE context id of the queue");
1998228561Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
1999228561Snp	    &ofld_rxq->fl.cidx, 0, "consumer index");
2000228561Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
2001228561Snp	    &ofld_rxq->fl.pidx, 0, "producer index");
2002228561Snp
2003228561Snp	return (rc);
2004228561Snp}
2005228561Snp
2006228561Snpstatic int
2007228561Snpfree_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq)
2008228561Snp{
2009228561Snp	int rc;
2010228561Snp
2011228561Snp	rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl);
2012228561Snp	if (rc == 0)
2013228561Snp		bzero(ofld_rxq, sizeof(*ofld_rxq));
2014228561Snp
2015228561Snp	return (rc);
2016228561Snp}
2017228561Snp#endif
2018228561Snp
2019228561Snpstatic int
2020228561Snpctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
2021228561Snp{
2022228561Snp	int rc, cntxt_id;
2023228561Snp	struct fw_eq_ctrl_cmd c;
2024228561Snp
2025220873Snp	bzero(&c, sizeof(c));
2026220873Snp
2027220873Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
2028220873Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
2029220873Snp	    V_FW_EQ_CTRL_CMD_VFN(0));
2030220873Snp	c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
2031220873Snp	    F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
2032220873Snp	c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */
2033220873Snp	c.physeqid_pkd = htobe32(0);
2034220873Snp	c.fetchszm_to_iqid =
2035220873Snp	    htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2036228561Snp		V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
2037222510Snp		F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
2038220873Snp	c.dcaen_to_eqsize =
2039220873Snp	    htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2040220873Snp		V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2041220873Snp		V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2042220873Snp		V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
2043220873Snp	c.eqaddr = htobe64(eq->ba);
2044220873Snp
2045220873Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2046220873Snp	if (rc != 0) {
2047220873Snp		device_printf(sc->dev,
2048228561Snp		    "failed to create control queue %d: %d\n", eq->tx_chan, rc);
2049220873Snp		return (rc);
2050220873Snp	}
2051228561Snp	eq->flags |= EQ_ALLOCATED;
2052220873Snp
2053220873Snp	eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
2054228561Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2055228561Snp	if (cntxt_id >= sc->sge.neq)
2056228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2057228561Snp		cntxt_id, sc->sge.neq - 1);
2058228561Snp	sc->sge.eqmap[cntxt_id] = eq;
2059220873Snp
2060228561Snp	return (rc);
2061228561Snp}
2062228561Snp
2063228561Snpstatic int
2064228561Snpeth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2065228561Snp{
2066228561Snp	int rc, cntxt_id;
2067228561Snp	struct fw_eq_eth_cmd c;
2068228561Snp
2069228561Snp	bzero(&c, sizeof(c));
2070228561Snp
2071228561Snp	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
2072228561Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
2073228561Snp	    V_FW_EQ_ETH_CMD_VFN(0));
2074228561Snp	c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
2075228561Snp	    F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
2076228561Snp	c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
2077228561Snp	c.fetchszm_to_iqid =
2078228561Snp	    htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2079228561Snp		V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
2080228561Snp		V_FW_EQ_ETH_CMD_IQID(eq->iqid));
2081228561Snp	c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2082228561Snp		      V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2083228561Snp		      V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2084228561Snp		      V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
2085228561Snp	c.eqaddr = htobe64(eq->ba);
2086228561Snp
2087228561Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2088228561Snp	if (rc != 0) {
2089228561Snp		device_printf(pi->dev,
2090228561Snp		    "failed to create Ethernet egress queue: %d\n", rc);
2091228561Snp		return (rc);
2092228561Snp	}
2093228561Snp	eq->flags |= EQ_ALLOCATED;
2094228561Snp
2095228561Snp	eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
2096220873Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2097228561Snp	if (cntxt_id >= sc->sge.neq)
2098228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2099228561Snp		cntxt_id, sc->sge.neq - 1);
2100220873Snp	sc->sge.eqmap[cntxt_id] = eq;
2101220873Snp
2102228561Snp	return (rc);
2103228561Snp}
2104220873Snp
2105237263Snp#ifdef TCP_OFFLOAD
2106228561Snpstatic int
2107228561Snpofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2108228561Snp{
2109228561Snp	int rc, cntxt_id;
2110228561Snp	struct fw_eq_ofld_cmd c;
2111220873Snp
2112228561Snp	bzero(&c, sizeof(c));
2113220873Snp
2114228561Snp	c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST |
2115228561Snp	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) |
2116228561Snp	    V_FW_EQ_OFLD_CMD_VFN(0));
2117228561Snp	c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
2118228561Snp	    F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
2119228561Snp	c.fetchszm_to_iqid =
2120228561Snp		htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2121228561Snp		    V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
2122228561Snp		    F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
2123228561Snp	c.dcaen_to_eqsize =
2124228561Snp	    htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2125228561Snp		V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2126228561Snp		V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2127228561Snp		V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize));
2128228561Snp	c.eqaddr = htobe64(eq->ba);
2129228561Snp
2130228561Snp	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2131228561Snp	if (rc != 0) {
2132228561Snp		device_printf(pi->dev,
2133228561Snp		    "failed to create egress queue for TCP offload: %d\n", rc);
2134228561Snp		return (rc);
2135228561Snp	}
2136228561Snp	eq->flags |= EQ_ALLOCATED;
2137228561Snp
2138228561Snp	eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd));
2139228561Snp	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2140228561Snp	if (cntxt_id >= sc->sge.neq)
2141228561Snp	    panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2142228561Snp		cntxt_id, sc->sge.neq - 1);
2143228561Snp	sc->sge.eqmap[cntxt_id] = eq;
2144228561Snp
2145220873Snp	return (rc);
2146220873Snp}
2147228561Snp#endif
2148220873Snp
2149220873Snpstatic int
2150228561Snpalloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2151220873Snp{
2152220873Snp	int rc;
2153228561Snp	size_t len;
2154220873Snp
2155228561Snp	mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
2156228561Snp
2157228561Snp	len = eq->qsize * EQ_ESIZE;
2158228561Snp	rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
2159228561Snp	    &eq->ba, (void **)&eq->desc);
2160228561Snp	if (rc)
2161228561Snp		return (rc);
2162228561Snp
2163237512Snp	eq->cap = eq->qsize - spg_len / EQ_ESIZE;
2164228561Snp	eq->spg = (void *)&eq->desc[eq->cap];
2165228561Snp	eq->avail = eq->cap - 1;	/* one less to avoid cidx = pidx */
2166228561Snp	eq->pidx = eq->cidx = 0;
2167228561Snp
2168228561Snp	switch (eq->flags & EQ_TYPEMASK) {
2169228561Snp	case EQ_CTRL:
2170228561Snp		rc = ctrl_eq_alloc(sc, eq);
2171228561Snp		break;
2172228561Snp
2173228561Snp	case EQ_ETH:
2174228561Snp		rc = eth_eq_alloc(sc, pi, eq);
2175228561Snp		break;
2176228561Snp
2177237263Snp#ifdef TCP_OFFLOAD
2178228561Snp	case EQ_OFLD:
2179228561Snp		rc = ofld_eq_alloc(sc, pi, eq);
2180228561Snp		break;
2181228561Snp#endif
2182228561Snp
2183228561Snp	default:
2184228561Snp		panic("%s: invalid eq type %d.", __func__,
2185228561Snp		    eq->flags & EQ_TYPEMASK);
2186228561Snp	}
2187228561Snp	if (rc != 0) {
2188228561Snp		device_printf(sc->dev,
2189228561Snp		    "failed to allocate egress queue(%d): %d",
2190228561Snp		    eq->flags & EQ_TYPEMASK, rc);
2191228561Snp	}
2192228561Snp
2193228561Snp	eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus;
2194228561Snp
2195228561Snp	return (rc);
2196228561Snp}
2197228561Snp
2198228561Snpstatic int
2199228561Snpfree_eq(struct adapter *sc, struct sge_eq *eq)
2200228561Snp{
2201228561Snp	int rc;
2202228561Snp
2203228561Snp	if (eq->flags & EQ_ALLOCATED) {
2204228561Snp		switch (eq->flags & EQ_TYPEMASK) {
2205228561Snp		case EQ_CTRL:
2206228561Snp			rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0,
2207228561Snp			    eq->cntxt_id);
2208228561Snp			break;
2209228561Snp
2210228561Snp		case EQ_ETH:
2211228561Snp			rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0,
2212228561Snp			    eq->cntxt_id);
2213228561Snp			break;
2214228561Snp
2215237263Snp#ifdef TCP_OFFLOAD
2216228561Snp		case EQ_OFLD:
2217228561Snp			rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0,
2218228561Snp			    eq->cntxt_id);
2219228561Snp			break;
2220228561Snp#endif
2221228561Snp
2222228561Snp		default:
2223228561Snp			panic("%s: invalid eq type %d.", __func__,
2224228561Snp			    eq->flags & EQ_TYPEMASK);
2225228561Snp		}
2226220873Snp		if (rc != 0) {
2227220873Snp			device_printf(sc->dev,
2228228561Snp			    "failed to free egress queue (%d): %d\n",
2229228561Snp			    eq->flags & EQ_TYPEMASK, rc);
2230220873Snp			return (rc);
2231220873Snp		}
2232228561Snp		eq->flags &= ~EQ_ALLOCATED;
2233220873Snp	}
2234220873Snp
2235220873Snp	free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
2236220873Snp
2237220873Snp	if (mtx_initialized(&eq->eq_lock))
2238220873Snp		mtx_destroy(&eq->eq_lock);
2239220873Snp
2240228561Snp	bzero(eq, sizeof(*eq));
2241220873Snp	return (0);
2242220873Snp}
2243220873Snp
2244220873Snpstatic int
2245228561Snpalloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
2246228561Snp    struct sysctl_oid *oid)
2247218792Snp{
2248228561Snp	int rc;
2249228561Snp	struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx;
2250228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2251228561Snp
2252228561Snp	rc = alloc_eq(sc, pi, &wrq->eq);
2253228561Snp	if (rc)
2254228561Snp		return (rc);
2255228561Snp
2256228561Snp	wrq->adapter = sc;
2257237263Snp	STAILQ_INIT(&wrq->wr_list);
2258228561Snp
2259228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
2260228561Snp	    &wrq->eq.cntxt_id, 0, "SGE context id of the queue");
2261228561Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
2262228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I",
2263228561Snp	    "consumer index");
2264228561Snp	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx",
2265228561Snp	    CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I",
2266228561Snp	    "producer index");
2267228561Snp	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD,
2268228561Snp	    &wrq->tx_wrs, "# of work requests");
2269228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
2270228561Snp	    &wrq->no_desc, 0,
2271228561Snp	    "# of times queue ran out of hardware descriptors");
2272228561Snp	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
2273228561Snp	    &wrq->eq.unstalled, 0, "# of times queue recovered after stall");
2274228561Snp
2275228561Snp
2276228561Snp	return (rc);
2277228561Snp}
2278228561Snp
2279228561Snpstatic int
2280228561Snpfree_wrq(struct adapter *sc, struct sge_wrq *wrq)
2281228561Snp{
2282228561Snp	int rc;
2283228561Snp
2284228561Snp	rc = free_eq(sc, &wrq->eq);
2285228561Snp	if (rc)
2286228561Snp		return (rc);
2287228561Snp
2288228561Snp	bzero(wrq, sizeof(*wrq));
2289228561Snp	return (0);
2290228561Snp}
2291228561Snp
2292228561Snpstatic int
2293228561Snpalloc_txq(struct port_info *pi, struct sge_txq *txq, int idx,
2294228561Snp    struct sysctl_oid *oid)
2295228561Snp{
2296228561Snp	int rc;
2297218792Snp	struct adapter *sc = pi->adapter;
2298218792Snp	struct sge_eq *eq = &txq->eq;
2299218792Snp	char name[16];
2300228561Snp	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2301218792Snp
2302228561Snp	rc = alloc_eq(sc, pi, eq);
2303218792Snp	if (rc)
2304218792Snp		return (rc);
2305218792Snp
2306228561Snp	txq->ifp = pi->ifp;
2307228561Snp
2308220873Snp	txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
2309218792Snp	    M_ZERO | M_WAITOK);
2310220873Snp	txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
2311218792Snp
2312218792Snp	rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
2313218792Snp	    BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
2314220873Snp	    BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
2315218792Snp	if (rc != 0) {
2316218792Snp		device_printf(sc->dev,
2317218792Snp		    "failed to create tx DMA tag: %d\n", rc);
2318218792Snp		return (rc);
2319218792Snp	}
2320218792Snp
2321228561Snp	/*
2322228561Snp	 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
2323228561Snp	 * limit for any WR).  txq->no_dmamap events shouldn't occur if maps is
2324228561Snp	 * sized for the worst case.
2325228561Snp	 */
2326228561Snp	rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8,
2327228561Snp	    M_WAITOK);
2328218792Snp	if (rc != 0) {
2329218792Snp		device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
2330218792Snp		return (rc);
2331218792Snp	}
2332218792Snp
2333218792Snp	snprintf(name, sizeof(name), "%d", idx);
2334218792Snp	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2335218792Snp	    NULL, "tx queue");
2336218792Snp	children = SYSCTL_CHILDREN(oid);
2337218792Snp
2338222973Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
2339222973Snp	    &eq->cntxt_id, 0, "SGE context id of the queue");
2340222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2341222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I",
2342222973Snp	    "consumer index");
2343222973Snp	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx",
2344222973Snp	    CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I",
2345222973Snp	    "producer index");
2346222973Snp
2347218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
2348218792Snp	    &txq->txcsum, "# of times hardware assisted with checksum");
2349218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion",
2350218792Snp	    CTLFLAG_RD, &txq->vlan_insertion,
2351218792Snp	    "# of times hardware inserted 802.1Q tag");
2352218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
2353237819Snp	    &txq->tso_wrs, "# of TSO work requests");
2354218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
2355218792Snp	    &txq->imm_wrs, "# of work requests with immediate data");
2356218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
2357218792Snp	    &txq->sgl_wrs, "# of work requests with direct SGL");
2358218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
2359218792Snp	    &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
2360218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD,
2361218792Snp	    &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)");
2362218792Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
2363218792Snp	    &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
2364218792Snp
2365246093Snp	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "br_drops", CTLFLAG_RD,
2366246093Snp	    &txq->br->br_drops, "# of drops in the buf_ring for this queue");
2367218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
2368218792Snp	    &txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
2369218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
2370218792Snp	    &txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
2371218792Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
2372228561Snp	    &eq->egr_update, 0, "egress update notifications from the SGE");
2373228561Snp	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
2374228561Snp	    &eq->unstalled, 0, "# of times txq recovered after stall");
2375218792Snp
2376218792Snp	return (rc);
2377218792Snp}
2378218792Snp
2379218792Snpstatic int
2380218792Snpfree_txq(struct port_info *pi, struct sge_txq *txq)
2381218792Snp{
2382218792Snp	int rc;
2383218792Snp	struct adapter *sc = pi->adapter;
2384218792Snp	struct sge_eq *eq = &txq->eq;
2385218792Snp
2386228561Snp	rc = free_eq(sc, eq);
2387228561Snp	if (rc)
2388228561Snp		return (rc);
2389220649Snp
2390220873Snp	free(txq->sdesc, M_CXGBE);
2391218792Snp
2392228561Snp	if (txq->txmaps.maps)
2393228561Snp		t4_free_tx_maps(&txq->txmaps, txq->tx_tag);
2394218792Snp
2395220873Snp	buf_ring_free(txq->br, M_CXGBE);
2396218792Snp
2397220873Snp	if (txq->tx_tag)
2398220873Snp		bus_dma_tag_destroy(txq->tx_tag);
2399218792Snp
2400218792Snp	bzero(txq, sizeof(*txq));
2401218792Snp	return (0);
2402218792Snp}
2403218792Snp
2404218792Snpstatic void
2405218792Snponeseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2406218792Snp{
2407218792Snp	bus_addr_t *ba = arg;
2408218792Snp
2409218792Snp	KASSERT(nseg == 1,
2410218792Snp	    ("%s meant for single segment mappings only.", __func__));
2411218792Snp
2412218792Snp	*ba = error ? 0 : segs->ds_addr;
2413218792Snp}
2414218792Snp
2415218792Snpstatic inline bool
2416218792Snpis_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
2417218792Snp{
2418218792Snp	*ctrl = (void *)((uintptr_t)iq->cdesc +
2419218792Snp	    (iq->esize - sizeof(struct rsp_ctrl)));
2420218792Snp
2421218792Snp	return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen);
2422218792Snp}
2423218792Snp
2424218792Snpstatic inline void
2425218792Snpiq_next(struct sge_iq *iq)
2426218792Snp{
2427218792Snp	iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
2428218792Snp	if (__predict_false(++iq->cidx == iq->qsize - 1)) {
2429218792Snp		iq->cidx = 0;
2430218792Snp		iq->gen ^= 1;
2431218792Snp		iq->cdesc = iq->desc;
2432218792Snp	}
2433218792Snp}
2434218792Snp
2435220905Snp#define FL_HW_IDX(x) ((x) >> 3)
2436218792Snpstatic inline void
2437218792Snpring_fl_db(struct adapter *sc, struct sge_fl *fl)
2438218792Snp{
2439218792Snp	int ndesc = fl->pending / 8;
2440218792Snp
2441220905Snp	if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx))
2442220905Snp		ndesc--;	/* hold back one credit */
2443218792Snp
2444220905Snp	if (ndesc <= 0)
2445220905Snp		return;		/* nothing to do */
2446220905Snp
2447218792Snp	wmb();
2448218792Snp
2449218792Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), F_DBPRIO |
2450218792Snp	    V_QID(fl->cntxt_id) | V_PIDX(ndesc));
2451220905Snp	fl->pending -= ndesc * 8;
2452218792Snp}
2453218792Snp
2454220905Snp/*
2455228561Snp * Fill up the freelist by upto nbufs and maybe ring its doorbell.
2456228561Snp *
2457228561Snp * Returns non-zero to indicate that it should be added to the list of starving
2458228561Snp * freelists.
2459220905Snp */
2460228561Snpstatic int
2461228561Snprefill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs)
2462218792Snp{
2463218792Snp	__be64 *d = &fl->desc[fl->pidx];
2464218792Snp	struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
2465218792Snp	bus_dma_tag_t tag;
2466218792Snp	bus_addr_t pa;
2467218792Snp	caddr_t cl;
2468218792Snp	int rc;
2469218792Snp
2470218792Snp	FL_LOCK_ASSERT_OWNED(fl);
2471218792Snp
2472228561Snp	if (nbufs > fl->needed)
2473218792Snp		nbufs = fl->needed;
2474218792Snp
2475218792Snp	while (nbufs--) {
2476218792Snp
2477218792Snp		if (sd->cl != NULL) {
2478218792Snp
2479218792Snp			/*
2480218792Snp			 * This happens when a frame small enough to fit
2481218792Snp			 * entirely in an mbuf was received in cl last time.
2482218792Snp			 * We'd held on to cl and can reuse it now.  Note that
2483218792Snp			 * we reuse a cluster of the old size if fl->tag_idx is
2484218792Snp			 * no longer the same as sd->tag_idx.
2485218792Snp			 */
2486218792Snp
2487218792Snp			KASSERT(*d == sd->ba_tag,
2488218792Snp			    ("%s: recyling problem at pidx %d",
2489218792Snp			    __func__, fl->pidx));
2490218792Snp
2491218792Snp			d++;
2492218792Snp			goto recycled;
2493218792Snp		}
2494218792Snp
2495218792Snp
2496218792Snp		if (fl->tag_idx != sd->tag_idx) {
2497218792Snp			bus_dmamap_t map;
2498218792Snp			bus_dma_tag_t newtag = fl->tag[fl->tag_idx];
2499218792Snp			bus_dma_tag_t oldtag = fl->tag[sd->tag_idx];
2500218792Snp
2501218792Snp			/*
2502218792Snp			 * An MTU change can get us here.  Discard the old map
2503218792Snp			 * which was created with the old tag, but only if
2504218792Snp			 * we're able to get a new one.
2505218792Snp			 */
2506218792Snp			rc = bus_dmamap_create(newtag, 0, &map);
2507218792Snp			if (rc == 0) {
2508218792Snp				bus_dmamap_destroy(oldtag, sd->map);
2509218792Snp				sd->map = map;
2510218792Snp				sd->tag_idx = fl->tag_idx;
2511218792Snp			}
2512218792Snp		}
2513218792Snp
2514218792Snp		tag = fl->tag[sd->tag_idx];
2515218792Snp
2516218792Snp		cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx));
2517218792Snp		if (cl == NULL)
2518218792Snp			break;
2519218792Snp
2520219290Snp		rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx),
2521219290Snp		    oneseg_dma_callback, &pa, 0);
2522218792Snp		if (rc != 0 || pa == 0) {
2523218792Snp			fl->dmamap_failed++;
2524218792Snp			uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl);
2525218792Snp			break;
2526218792Snp		}
2527218792Snp
2528218792Snp		sd->cl = cl;
2529218792Snp		*d++ = htobe64(pa | sd->tag_idx);
2530218792Snp
2531218792Snp#ifdef INVARIANTS
2532218792Snp		sd->ba_tag = htobe64(pa | sd->tag_idx);
2533218792Snp#endif
2534218792Snp
2535219290Snprecycled:
2536219290Snp		/* sd->m is never recycled, should always be NULL */
2537219290Snp		KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__));
2538219290Snp
2539219290Snp		sd->m = m_gethdr(M_NOWAIT, MT_NOINIT);
2540219290Snp		if (sd->m == NULL)
2541219290Snp			break;
2542219290Snp
2543219290Snp		fl->pending++;
2544218792Snp		fl->needed--;
2545218792Snp		sd++;
2546218792Snp		if (++fl->pidx == fl->cap) {
2547218792Snp			fl->pidx = 0;
2548218792Snp			sd = fl->sdesc;
2549218792Snp			d = fl->desc;
2550218792Snp		}
2551218792Snp	}
2552220905Snp
2553228561Snp	if (fl->pending >= 8)
2554220905Snp		ring_fl_db(sc, fl);
2555228561Snp
2556228561Snp	return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
2557218792Snp}
2558218792Snp
2559228561Snp/*
2560228561Snp * Attempt to refill all starving freelists.
2561228561Snp */
2562228561Snpstatic void
2563228561Snprefill_sfl(void *arg)
2564228561Snp{
2565228561Snp	struct adapter *sc = arg;
2566228561Snp	struct sge_fl *fl, *fl_temp;
2567228561Snp
2568228561Snp	mtx_lock(&sc->sfl_lock);
2569228561Snp	TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
2570228561Snp		FL_LOCK(fl);
2571228561Snp		refill_fl(sc, fl, 64);
2572228561Snp		if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
2573228561Snp			TAILQ_REMOVE(&sc->sfl, fl, link);
2574228561Snp			fl->flags &= ~FL_STARVING;
2575228561Snp		}
2576228561Snp		FL_UNLOCK(fl);
2577228561Snp	}
2578228561Snp
2579228561Snp	if (!TAILQ_EMPTY(&sc->sfl))
2580228561Snp		callout_schedule(&sc->sfl_callout, hz / 5);
2581228561Snp	mtx_unlock(&sc->sfl_lock);
2582228561Snp}
2583228561Snp
2584218792Snpstatic int
2585218792Snpalloc_fl_sdesc(struct sge_fl *fl)
2586218792Snp{
2587218792Snp	struct fl_sdesc *sd;
2588218792Snp	bus_dma_tag_t tag;
2589218792Snp	int i, rc;
2590218792Snp
2591218792Snp	FL_LOCK_ASSERT_OWNED(fl);
2592218792Snp
2593218792Snp	fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE,
2594218792Snp	    M_ZERO | M_WAITOK);
2595218792Snp
2596218792Snp	tag = fl->tag[fl->tag_idx];
2597218792Snp	sd = fl->sdesc;
2598218792Snp	for (i = 0; i < fl->cap; i++, sd++) {
2599218792Snp
2600218792Snp		sd->tag_idx = fl->tag_idx;
2601218792Snp		rc = bus_dmamap_create(tag, 0, &sd->map);
2602218792Snp		if (rc != 0)
2603218792Snp			goto failed;
2604218792Snp	}
2605218792Snp
2606218792Snp	return (0);
2607218792Snpfailed:
2608218792Snp	while (--i >= 0) {
2609218792Snp		sd--;
2610218792Snp		bus_dmamap_destroy(tag, sd->map);
2611218792Snp		if (sd->m) {
2612219392Snp			m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
2613218792Snp			m_free(sd->m);
2614218792Snp			sd->m = NULL;
2615218792Snp		}
2616218792Snp	}
2617218792Snp	KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__));
2618218792Snp
2619218792Snp	free(fl->sdesc, M_CXGBE);
2620218792Snp	fl->sdesc = NULL;
2621218792Snp
2622218792Snp	return (rc);
2623218792Snp}
2624218792Snp
2625218792Snpstatic void
2626218792Snpfree_fl_sdesc(struct sge_fl *fl)
2627218792Snp{
2628218792Snp	struct fl_sdesc *sd;
2629218792Snp	int i;
2630218792Snp
2631218792Snp	FL_LOCK_ASSERT_OWNED(fl);
2632218792Snp
2633218792Snp	sd = fl->sdesc;
2634218792Snp	for (i = 0; i < fl->cap; i++, sd++) {
2635218792Snp
2636218792Snp		if (sd->m) {
2637219392Snp			m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
2638218792Snp			m_free(sd->m);
2639218792Snp			sd->m = NULL;
2640218792Snp		}
2641218792Snp
2642218792Snp		if (sd->cl) {
2643218792Snp			bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
2644218792Snp			uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl);
2645218792Snp			sd->cl = NULL;
2646218792Snp		}
2647218792Snp
2648218792Snp		bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map);
2649218792Snp	}
2650218792Snp
2651218792Snp	free(fl->sdesc, M_CXGBE);
2652218792Snp	fl->sdesc = NULL;
2653218792Snp}
2654218792Snp
2655228561Snpint
2656228561Snpt4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count,
2657228561Snp    int flags)
2658218792Snp{
2659218792Snp	struct tx_map *txm;
2660228561Snp	int i, rc;
2661218792Snp
2662228561Snp	txmaps->map_total = txmaps->map_avail = count;
2663228561Snp	txmaps->map_cidx = txmaps->map_pidx = 0;
2664218792Snp
2665228561Snp	txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
2666228561Snp	    M_ZERO | flags);
2667218792Snp
2668228561Snp	txm = txmaps->maps;
2669218792Snp	for (i = 0; i < count; i++, txm++) {
2670228561Snp		rc = bus_dmamap_create(tx_tag, 0, &txm->map);
2671218792Snp		if (rc != 0)
2672218792Snp			goto failed;
2673218792Snp	}
2674218792Snp
2675218792Snp	return (0);
2676218792Snpfailed:
2677218792Snp	while (--i >= 0) {
2678218792Snp		txm--;
2679228561Snp		bus_dmamap_destroy(tx_tag, txm->map);
2680218792Snp	}
2681228561Snp	KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__));
2682218792Snp
2683228561Snp	free(txmaps->maps, M_CXGBE);
2684228561Snp	txmaps->maps = NULL;
2685218792Snp
2686218792Snp	return (rc);
2687218792Snp}
2688218792Snp
2689228561Snpvoid
2690228561Snpt4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag)
2691218792Snp{
2692218792Snp	struct tx_map *txm;
2693218792Snp	int i;
2694218792Snp
2695228561Snp	txm = txmaps->maps;
2696228561Snp	for (i = 0; i < txmaps->map_total; i++, txm++) {
2697218792Snp
2698218792Snp		if (txm->m) {
2699228561Snp			bus_dmamap_unload(tx_tag, txm->map);
2700218792Snp			m_freem(txm->m);
2701218792Snp			txm->m = NULL;
2702218792Snp		}
2703218792Snp
2704228561Snp		bus_dmamap_destroy(tx_tag, txm->map);
2705218792Snp	}
2706218792Snp
2707228561Snp	free(txmaps->maps, M_CXGBE);
2708228561Snp	txmaps->maps = NULL;
2709218792Snp}
2710218792Snp
2711218792Snp/*
2712218792Snp * We'll do immediate data tx for non-TSO, but only when not coalescing.  We're
2713218792Snp * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
2714218792Snp * of immediate data.
2715218792Snp */
2716218792Snp#define IMM_LEN ( \
2717228561Snp      2 * EQ_ESIZE \
2718218792Snp    - sizeof(struct fw_eth_tx_pkt_wr) \
2719218792Snp    - sizeof(struct cpl_tx_pkt_core))
2720218792Snp
2721218792Snp/*
2722218792Snp * Returns non-zero on failure, no need to cleanup anything in that case.
2723218792Snp *
2724218792Snp * Note 1: We always try to defrag the mbuf if required and return EFBIG only
2725218792Snp * if the resulting chain still won't fit in a tx descriptor.
2726218792Snp *
2727218792Snp * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf
2728218792Snp * does not have the TCP header in it.
2729218792Snp */
2730218792Snpstatic int
2731218792Snpget_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
2732218792Snp    int sgl_only)
2733218792Snp{
2734218792Snp	struct mbuf *m = *fp;
2735228561Snp	struct tx_maps *txmaps;
2736218792Snp	struct tx_map *txm;
2737218792Snp	int rc, defragged = 0, n;
2738218792Snp
2739218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
2740218792Snp
2741218792Snp	if (m->m_pkthdr.tso_segsz)
2742218792Snp		sgl_only = 1;	/* Do not allow immediate data with LSO */
2743218792Snp
2744218792Snpstart:	sgl->nsegs = 0;
2745218792Snp
2746218792Snp	if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
2747218792Snp		return (0);	/* nsegs = 0 tells caller to use imm. tx */
2748218792Snp
2749228561Snp	txmaps = &txq->txmaps;
2750228561Snp	if (txmaps->map_avail == 0) {
2751218792Snp		txq->no_dmamap++;
2752218792Snp		return (ENOMEM);
2753218792Snp	}
2754228561Snp	txm = &txmaps->maps[txmaps->map_pidx];
2755218792Snp
2756218792Snp	if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
2757218792Snp		*fp = m_pullup(m, 50);
2758218792Snp		m = *fp;
2759218792Snp		if (m == NULL)
2760218792Snp			return (ENOBUFS);
2761218792Snp	}
2762218792Snp
2763220873Snp	rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg,
2764218792Snp	    &sgl->nsegs, BUS_DMA_NOWAIT);
2765218792Snp	if (rc == EFBIG && defragged == 0) {
2766243857Sglebius		m = m_defrag(m, M_NOWAIT);
2767218792Snp		if (m == NULL)
2768218792Snp			return (EFBIG);
2769218792Snp
2770218792Snp		defragged = 1;
2771218792Snp		*fp = m;
2772218792Snp		goto start;
2773218792Snp	}
2774218792Snp	if (rc != 0)
2775218792Snp		return (rc);
2776218792Snp
2777218792Snp	txm->m = m;
2778228561Snp	txmaps->map_avail--;
2779228561Snp	if (++txmaps->map_pidx == txmaps->map_total)
2780228561Snp		txmaps->map_pidx = 0;
2781218792Snp
2782218792Snp	KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
2783218792Snp	    ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
2784218792Snp
2785218792Snp	/*
2786218792Snp	 * Store the # of flits required to hold this frame's SGL in nflits.  An
2787218792Snp	 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
2788218792Snp	 * multiple (len0 + len1, addr0, addr1) tuples.  If addr1 is not used
2789218792Snp	 * then len1 must be set to 0.
2790218792Snp	 */
2791218792Snp	n = sgl->nsegs - 1;
2792218792Snp	sgl->nflits = (3 * n) / 2 + (n & 1) + 2;
2793218792Snp
2794218792Snp	return (0);
2795218792Snp}
2796218792Snp
2797218792Snp
2798218792Snp/*
2799218792Snp * Releases all the txq resources used up in the specified sgl.
2800218792Snp */
2801218792Snpstatic int
2802218792Snpfree_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
2803218792Snp{
2804228561Snp	struct tx_maps *txmaps;
2805218792Snp	struct tx_map *txm;
2806218792Snp
2807218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
2808218792Snp
2809218792Snp	if (sgl->nsegs == 0)
2810218792Snp		return (0);	/* didn't use any map */
2811218792Snp
2812228561Snp	txmaps = &txq->txmaps;
2813228561Snp
2814218792Snp	/* 1 pkt uses exactly 1 map, back it out */
2815218792Snp
2816228561Snp	txmaps->map_avail++;
2817228561Snp	if (txmaps->map_pidx > 0)
2818228561Snp		txmaps->map_pidx--;
2819218792Snp	else
2820228561Snp		txmaps->map_pidx = txmaps->map_total - 1;
2821218792Snp
2822228561Snp	txm = &txmaps->maps[txmaps->map_pidx];
2823220873Snp	bus_dmamap_unload(txq->tx_tag, txm->map);
2824218792Snp	txm->m = NULL;
2825218792Snp
2826218792Snp	return (0);
2827218792Snp}
2828218792Snp
2829218792Snpstatic int
2830218792Snpwrite_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
2831218792Snp    struct sgl *sgl)
2832218792Snp{
2833218792Snp	struct sge_eq *eq = &txq->eq;
2834218792Snp	struct fw_eth_tx_pkt_wr *wr;
2835218792Snp	struct cpl_tx_pkt_core *cpl;
2836218792Snp	uint32_t ctrl;	/* used in many unrelated places */
2837218792Snp	uint64_t ctrl1;
2838219286Snp	int nflits, ndesc, pktlen;
2839218792Snp	struct tx_sdesc *txsd;
2840218792Snp	caddr_t dst;
2841218792Snp
2842218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
2843218792Snp
2844219286Snp	pktlen = m->m_pkthdr.len;
2845219286Snp
2846218792Snp	/*
2847218792Snp	 * Do we have enough flits to send this frame out?
2848218792Snp	 */
2849218792Snp	ctrl = sizeof(struct cpl_tx_pkt_core);
2850218792Snp	if (m->m_pkthdr.tso_segsz) {
2851218792Snp		nflits = TXPKT_LSO_WR_HDR;
2852237436Snp		ctrl += sizeof(struct cpl_tx_pkt_lso_core);
2853218792Snp	} else
2854218792Snp		nflits = TXPKT_WR_HDR;
2855218792Snp	if (sgl->nsegs > 0)
2856218792Snp		nflits += sgl->nflits;
2857218792Snp	else {
2858219286Snp		nflits += howmany(pktlen, 8);
2859219286Snp		ctrl += pktlen;
2860218792Snp	}
2861218792Snp	ndesc = howmany(nflits, 8);
2862218792Snp	if (ndesc > eq->avail)
2863218792Snp		return (ENOMEM);
2864218792Snp
2865218792Snp	/* Firmware work request header */
2866218792Snp	wr = (void *)&eq->desc[eq->pidx];
2867218792Snp	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
2868228561Snp	    V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
2869218792Snp	ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
2870228561Snp	if (eq->avail == ndesc) {
2871228561Snp		if (!(eq->flags & EQ_CRFLUSHED)) {
2872228561Snp			ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
2873228561Snp			eq->flags |= EQ_CRFLUSHED;
2874228561Snp		}
2875228561Snp		eq->flags |= EQ_STALLED;
2876220643Snp	}
2877220643Snp
2878218792Snp	wr->equiq_to_len16 = htobe32(ctrl);
2879218792Snp	wr->r3 = 0;
2880218792Snp
2881218792Snp	if (m->m_pkthdr.tso_segsz) {
2882237436Snp		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
2883218792Snp		struct ether_header *eh;
2884237819Snp		void *l3hdr;
2885237819Snp#if defined(INET) || defined(INET6)
2886218792Snp		struct tcphdr *tcp;
2887237819Snp#endif
2888237819Snp		uint16_t eh_type;
2889218792Snp
2890218792Snp		ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
2891218792Snp		    F_LSO_LAST_SLICE;
2892218792Snp
2893218792Snp		eh = mtod(m, struct ether_header *);
2894237819Snp		eh_type = ntohs(eh->ether_type);
2895237819Snp		if (eh_type == ETHERTYPE_VLAN) {
2896237819Snp			struct ether_vlan_header *evh = (void *)eh;
2897237819Snp
2898218792Snp			ctrl |= V_LSO_ETHHDR_LEN(1);
2899237819Snp			l3hdr = evh + 1;
2900237819Snp			eh_type = ntohs(evh->evl_proto);
2901218792Snp		} else
2902237819Snp			l3hdr = eh + 1;
2903218792Snp
2904237819Snp		switch (eh_type) {
2905237819Snp#ifdef INET6
2906237819Snp		case ETHERTYPE_IPV6:
2907237819Snp		{
2908237819Snp			struct ip6_hdr *ip6 = l3hdr;
2909218792Snp
2910237819Snp			/*
2911237819Snp			 * XXX-BZ For now we do not pretend to support
2912237819Snp			 * IPv6 extension headers.
2913237819Snp			 */
2914237819Snp			KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO "
2915237819Snp			    "with ip6_nxt != TCP: %u", __func__, ip6->ip6_nxt));
2916237819Snp			tcp = (struct tcphdr *)(ip6 + 1);
2917237819Snp			ctrl |= F_LSO_IPV6;
2918237819Snp			ctrl |= V_LSO_IPHDR_LEN(sizeof(*ip6) >> 2) |
2919237819Snp			    V_LSO_TCPHDR_LEN(tcp->th_off);
2920237819Snp			break;
2921237819Snp		}
2922237819Snp#endif
2923237819Snp#ifdef INET
2924237819Snp		case ETHERTYPE_IP:
2925237819Snp		{
2926237819Snp			struct ip *ip = l3hdr;
2927237819Snp
2928237819Snp			tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4);
2929237819Snp			ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) |
2930237819Snp			    V_LSO_TCPHDR_LEN(tcp->th_off);
2931237819Snp			break;
2932237819Snp		}
2933237819Snp#endif
2934237819Snp		default:
2935237819Snp			panic("%s: CSUM_TSO but no supported IP version "
2936237819Snp			    "(0x%04x)", __func__, eh_type);
2937237819Snp		}
2938237819Snp
2939218792Snp		lso->lso_ctrl = htobe32(ctrl);
2940218792Snp		lso->ipid_ofst = htobe16(0);
2941218792Snp		lso->mss = htobe16(m->m_pkthdr.tso_segsz);
2942218792Snp		lso->seqno_offset = htobe32(0);
2943219286Snp		lso->len = htobe32(pktlen);
2944218792Snp
2945218792Snp		cpl = (void *)(lso + 1);
2946218792Snp
2947218792Snp		txq->tso_wrs++;
2948218792Snp	} else
2949218792Snp		cpl = (void *)(wr + 1);
2950218792Snp
2951218792Snp	/* Checksum offload */
2952218792Snp	ctrl1 = 0;
2953247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
2954218792Snp		ctrl1 |= F_TXPKT_IPCSUM_DIS;
2955237799Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
2956247062Snp	    CSUM_TCP_IPV6 | CSUM_TSO)))
2957218792Snp		ctrl1 |= F_TXPKT_L4CSUM_DIS;
2958237799Snp	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
2959247062Snp	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
2960218792Snp		txq->txcsum++;	/* some hardware assistance provided */
2961218792Snp
2962218792Snp	/* VLAN tag insertion */
2963218792Snp	if (m->m_flags & M_VLANTAG) {
2964218792Snp		ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
2965218792Snp		txq->vlan_insertion++;
2966218792Snp	}
2967218792Snp
2968218792Snp	/* CPL header */
2969218792Snp	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
2970218792Snp	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
2971218792Snp	cpl->pack = 0;
2972219286Snp	cpl->len = htobe16(pktlen);
2973218792Snp	cpl->ctrl1 = htobe64(ctrl1);
2974218792Snp
2975218792Snp	/* Software descriptor */
2976220873Snp	txsd = &txq->sdesc[eq->pidx];
2977218792Snp	txsd->desc_used = ndesc;
2978218792Snp
2979218792Snp	eq->pending += ndesc;
2980218792Snp	eq->avail -= ndesc;
2981218792Snp	eq->pidx += ndesc;
2982218792Snp	if (eq->pidx >= eq->cap)
2983218792Snp		eq->pidx -= eq->cap;
2984218792Snp
2985218792Snp	/* SGL */
2986218792Snp	dst = (void *)(cpl + 1);
2987218792Snp	if (sgl->nsegs > 0) {
2988220873Snp		txsd->credits = 1;
2989218792Snp		txq->sgl_wrs++;
2990218792Snp		write_sgl_to_txd(eq, sgl, &dst);
2991218792Snp	} else {
2992220873Snp		txsd->credits = 0;
2993218792Snp		txq->imm_wrs++;
2994218792Snp		for (; m; m = m->m_next) {
2995218792Snp			copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
2996219286Snp#ifdef INVARIANTS
2997219286Snp			pktlen -= m->m_len;
2998219286Snp#endif
2999218792Snp		}
3000219286Snp#ifdef INVARIANTS
3001219286Snp		KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
3002219286Snp#endif
3003219286Snp
3004218792Snp	}
3005218792Snp
3006218792Snp	txq->txpkt_wrs++;
3007218792Snp	return (0);
3008218792Snp}
3009218792Snp
3010218792Snp/*
3011218792Snp * Returns 0 to indicate that m has been accepted into a coalesced tx work
3012218792Snp * request.  It has either been folded into txpkts or txpkts was flushed and m
3013218792Snp * has started a new coalesced work request (as the first frame in a fresh
3014218792Snp * txpkts).
3015218792Snp *
3016218792Snp * Returns non-zero to indicate a failure - caller is responsible for
3017218792Snp * transmitting m, if there was anything in txpkts it has been flushed.
3018218792Snp */
3019218792Snpstatic int
3020218792Snpadd_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
3021218792Snp    struct mbuf *m, struct sgl *sgl)
3022218792Snp{
3023218792Snp	struct sge_eq *eq = &txq->eq;
3024218792Snp	int can_coalesce;
3025218792Snp	struct tx_sdesc *txsd;
3026218792Snp	int flits;
3027218792Snp
3028218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3029218792Snp
3030228561Snp	KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__));
3031228561Snp
3032218792Snp	if (txpkts->npkt > 0) {
3033218792Snp		flits = TXPKTS_PKT_HDR + sgl->nflits;
3034218792Snp		can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3035218792Snp		    txpkts->nflits + flits <= TX_WR_FLITS &&
3036218792Snp		    txpkts->nflits + flits <= eq->avail * 8 &&
3037218792Snp		    txpkts->plen + m->m_pkthdr.len < 65536;
3038218792Snp
3039218792Snp		if (can_coalesce) {
3040218792Snp			txpkts->npkt++;
3041218792Snp			txpkts->nflits += flits;
3042218792Snp			txpkts->plen += m->m_pkthdr.len;
3043218792Snp
3044220873Snp			txsd = &txq->sdesc[eq->pidx];
3045220873Snp			txsd->credits++;
3046218792Snp
3047218792Snp			return (0);
3048218792Snp		}
3049218792Snp
3050218792Snp		/*
3051218792Snp		 * Couldn't coalesce m into txpkts.  The first order of business
3052218792Snp		 * is to send txpkts on its way.  Then we'll revisit m.
3053218792Snp		 */
3054218792Snp		write_txpkts_wr(txq, txpkts);
3055218792Snp	}
3056218792Snp
3057218792Snp	/*
3058218792Snp	 * Check if we can start a new coalesced tx work request with m as
3059218792Snp	 * the first packet in it.
3060218792Snp	 */
3061218792Snp
3062218792Snp	KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__));
3063218792Snp
3064218792Snp	flits = TXPKTS_WR_HDR + sgl->nflits;
3065218792Snp	can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3066218792Snp	    flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
3067218792Snp
3068218792Snp	if (can_coalesce == 0)
3069218792Snp		return (EINVAL);
3070218792Snp
3071218792Snp	/*
3072218792Snp	 * Start a fresh coalesced tx WR with m as the first frame in it.
3073218792Snp	 */
3074218792Snp	txpkts->npkt = 1;
3075218792Snp	txpkts->nflits = flits;
3076218792Snp	txpkts->flitp = &eq->desc[eq->pidx].flit[2];
3077218792Snp	txpkts->plen = m->m_pkthdr.len;
3078218792Snp
3079220873Snp	txsd = &txq->sdesc[eq->pidx];
3080220873Snp	txsd->credits = 1;
3081218792Snp
3082218792Snp	return (0);
3083218792Snp}
3084218792Snp
3085218792Snp/*
3086218792Snp * Note that write_txpkts_wr can never run out of hardware descriptors (but
3087218792Snp * write_txpkt_wr can).  add_to_txpkts ensures that a frame is accepted for
3088218792Snp * coalescing only if sufficient hardware descriptors are available.
3089218792Snp */
3090218792Snpstatic void
3091218792Snpwrite_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
3092218792Snp{
3093218792Snp	struct sge_eq *eq = &txq->eq;
3094218792Snp	struct fw_eth_tx_pkts_wr *wr;
3095218792Snp	struct tx_sdesc *txsd;
3096218792Snp	uint32_t ctrl;
3097218792Snp	int ndesc;
3098218792Snp
3099218792Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3100218792Snp
3101218792Snp	ndesc = howmany(txpkts->nflits, 8);
3102218792Snp
3103218792Snp	wr = (void *)&eq->desc[eq->pidx];
3104228561Snp	wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
3105218792Snp	ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
3106228561Snp	if (eq->avail == ndesc) {
3107228561Snp		if (!(eq->flags & EQ_CRFLUSHED)) {
3108228561Snp			ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3109228561Snp			eq->flags |= EQ_CRFLUSHED;
3110228561Snp		}
3111228561Snp		eq->flags |= EQ_STALLED;
3112220643Snp	}
3113218792Snp	wr->equiq_to_len16 = htobe32(ctrl);
3114218792Snp	wr->plen = htobe16(txpkts->plen);
3115218792Snp	wr->npkt = txpkts->npkt;
3116222513Snp	wr->r3 = wr->type = 0;
3117218792Snp
3118218792Snp	/* Everything else already written */
3119218792Snp
3120220873Snp	txsd = &txq->sdesc[eq->pidx];
3121218792Snp	txsd->desc_used = ndesc;
3122218792Snp
3123220643Snp	KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__));
3124218792Snp
3125218792Snp	eq->pending += ndesc;
3126218792Snp	eq->avail -= ndesc;
3127218792Snp	eq->pidx += ndesc;
3128218792Snp	if (eq->pidx >= eq->cap)
3129218792Snp		eq->pidx -= eq->cap;
3130218792Snp
3131218792Snp	txq->txpkts_pkts += txpkts->npkt;
3132218792Snp	txq->txpkts_wrs++;
3133218792Snp	txpkts->npkt = 0;	/* emptied */
3134218792Snp}
3135218792Snp
3136218792Snpstatic inline void
3137218792Snpwrite_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
3138218792Snp    struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl)
3139218792Snp{
3140218792Snp	struct ulp_txpkt *ulpmc;
3141218792Snp	struct ulptx_idata *ulpsc;
3142218792Snp	struct cpl_tx_pkt_core *cpl;
3143218792Snp	struct sge_eq *eq = &txq->eq;
3144218792Snp	uintptr_t flitp, start, end;
3145218792Snp	uint64_t ctrl;
3146218792Snp	caddr_t dst;
3147218792Snp
3148218792Snp	KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__));
3149218792Snp
3150218792Snp	start = (uintptr_t)eq->desc;
3151218792Snp	end = (uintptr_t)eq->spg;
3152218792Snp
3153218792Snp	/* Checksum offload */
3154218792Snp	ctrl = 0;
3155247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
3156218792Snp		ctrl |= F_TXPKT_IPCSUM_DIS;
3157247062Snp	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
3158247062Snp	    CSUM_TCP_IPV6 | CSUM_TSO)))
3159218792Snp		ctrl |= F_TXPKT_L4CSUM_DIS;
3160247062Snp	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
3161247062Snp	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
3162218792Snp		txq->txcsum++;	/* some hardware assistance provided */
3163218792Snp
3164218792Snp	/* VLAN tag insertion */
3165218792Snp	if (m->m_flags & M_VLANTAG) {
3166218792Snp		ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
3167218792Snp		txq->vlan_insertion++;
3168218792Snp	}
3169218792Snp
3170218792Snp	/*
3171218792Snp	 * The previous packet's SGL must have ended at a 16 byte boundary (this
3172218792Snp	 * is required by the firmware/hardware).  It follows that flitp cannot
3173218792Snp	 * wrap around between the ULPTX master command and ULPTX subcommand (8
3174218792Snp	 * bytes each), and that it can not wrap around in the middle of the
3175218792Snp	 * cpl_tx_pkt_core either.
3176218792Snp	 */
3177218792Snp	flitp = (uintptr_t)txpkts->flitp;
3178218792Snp	KASSERT((flitp & 0xf) == 0,
3179218792Snp	    ("%s: last SGL did not end at 16 byte boundary: %p",
3180218792Snp	    __func__, txpkts->flitp));
3181218792Snp
3182218792Snp	/* ULP master command */
3183218792Snp	ulpmc = (void *)flitp;
3184219288Snp	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) |
3185219288Snp	    V_ULP_TXPKT_FID(eq->iqid));
3186218792Snp	ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) +
3187218792Snp	    sizeof(*cpl) + 8 * sgl->nflits, 16));
3188218792Snp
3189218792Snp	/* ULP subcommand */
3190218792Snp	ulpsc = (void *)(ulpmc + 1);
3191218792Snp	ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
3192218792Snp	    F_ULP_TX_SC_MORE);
3193218792Snp	ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
3194218792Snp
3195218792Snp	flitp += sizeof(*ulpmc) + sizeof(*ulpsc);
3196218792Snp	if (flitp == end)
3197218792Snp		flitp = start;
3198218792Snp
3199218792Snp	/* CPL_TX_PKT */
3200218792Snp	cpl = (void *)flitp;
3201218792Snp	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3202218792Snp	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3203218792Snp	cpl->pack = 0;
3204218792Snp	cpl->len = htobe16(m->m_pkthdr.len);
3205218792Snp	cpl->ctrl1 = htobe64(ctrl);
3206218792Snp
3207218792Snp	flitp += sizeof(*cpl);
3208218792Snp	if (flitp == end)
3209218792Snp		flitp = start;
3210218792Snp
3211218792Snp	/* SGL for this frame */
3212218792Snp	dst = (caddr_t)flitp;
3213218792Snp	txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst);
3214218792Snp	txpkts->flitp = (void *)dst;
3215218792Snp
3216218792Snp	KASSERT(((uintptr_t)dst & 0xf) == 0,
3217218792Snp	    ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst));
3218218792Snp}
3219218792Snp
3220218792Snp/*
3221218792Snp * If the SGL ends on an address that is not 16 byte aligned, this function will
3222218792Snp * add a 0 filled flit at the end.  It returns 1 in that case.
3223218792Snp */
3224218792Snpstatic int
3225218792Snpwrite_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
3226218792Snp{
3227218792Snp	__be64 *flitp, *end;
3228218792Snp	struct ulptx_sgl *usgl;
3229218792Snp	bus_dma_segment_t *seg;
3230218792Snp	int i, padded;
3231218792Snp
3232218792Snp	KASSERT(sgl->nsegs > 0 && sgl->nflits > 0,
3233218792Snp	    ("%s: bad SGL - nsegs=%d, nflits=%d",
3234218792Snp	    __func__, sgl->nsegs, sgl->nflits));
3235218792Snp
3236218792Snp	KASSERT(((uintptr_t)(*to) & 0xf) == 0,
3237218792Snp	    ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
3238218792Snp
3239218792Snp	flitp = (__be64 *)(*to);
3240218792Snp	end = flitp + sgl->nflits;
3241218792Snp	seg = &sgl->seg[0];
3242218792Snp	usgl = (void *)flitp;
3243218792Snp
3244218792Snp	/*
3245218792Snp	 * We start at a 16 byte boundary somewhere inside the tx descriptor
3246218792Snp	 * ring, so we're at least 16 bytes away from the status page.  There is
3247218792Snp	 * no chance of a wrap around in the middle of usgl (which is 16 bytes).
3248218792Snp	 */
3249218792Snp
3250218792Snp	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
3251218792Snp	    V_ULPTX_NSGE(sgl->nsegs));
3252218792Snp	usgl->len0 = htobe32(seg->ds_len);
3253218792Snp	usgl->addr0 = htobe64(seg->ds_addr);
3254218792Snp	seg++;
3255218792Snp
3256218792Snp	if ((uintptr_t)end <= (uintptr_t)eq->spg) {
3257218792Snp
3258218792Snp		/* Won't wrap around at all */
3259218792Snp
3260218792Snp		for (i = 0; i < sgl->nsegs - 1; i++, seg++) {
3261218792Snp			usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len);
3262218792Snp			usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr);
3263218792Snp		}
3264218792Snp		if (i & 1)
3265218792Snp			usgl->sge[i / 2].len[1] = htobe32(0);
3266218792Snp	} else {
3267218792Snp
3268218792Snp		/* Will wrap somewhere in the rest of the SGL */
3269218792Snp
3270218792Snp		/* 2 flits already written, write the rest flit by flit */
3271218792Snp		flitp = (void *)(usgl + 1);
3272218792Snp		for (i = 0; i < sgl->nflits - 2; i++) {
3273218792Snp			if ((uintptr_t)flitp == (uintptr_t)eq->spg)
3274218792Snp				flitp = (void *)eq->desc;
3275218792Snp			*flitp++ = get_flit(seg, sgl->nsegs - 1, i);
3276218792Snp		}
3277218792Snp		end = flitp;
3278218792Snp	}
3279218792Snp
3280218792Snp	if ((uintptr_t)end & 0xf) {
3281218792Snp		*(uint64_t *)end = 0;
3282218792Snp		end++;
3283218792Snp		padded = 1;
3284218792Snp	} else
3285218792Snp		padded = 0;
3286218792Snp
3287218792Snp	if ((uintptr_t)end == (uintptr_t)eq->spg)
3288218792Snp		*to = (void *)eq->desc;
3289218792Snp	else
3290218792Snp		*to = (void *)end;
3291218792Snp
3292218792Snp	return (padded);
3293218792Snp}
3294218792Snp
3295218792Snpstatic inline void
3296218792Snpcopy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
3297218792Snp{
3298237263Snp	if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) {
3299218792Snp		bcopy(from, *to, len);
3300218792Snp		(*to) += len;
3301218792Snp	} else {
3302218792Snp		int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
3303218792Snp
3304218792Snp		bcopy(from, *to, portion);
3305218792Snp		from += portion;
3306218792Snp		portion = len - portion;	/* remaining */
3307218792Snp		bcopy(from, (void *)eq->desc, portion);
3308218792Snp		(*to) = (caddr_t)eq->desc + portion;
3309218792Snp	}
3310218792Snp}
3311218792Snp
3312218792Snpstatic inline void
3313220873Snpring_eq_db(struct adapter *sc, struct sge_eq *eq)
3314218792Snp{
3315218792Snp	wmb();
3316218792Snp	t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
3317218792Snp	    V_QID(eq->cntxt_id) | V_PIDX(eq->pending));
3318218792Snp	eq->pending = 0;
3319218792Snp}
3320218792Snp
3321219292Snpstatic inline int
3322219292Snpreclaimable(struct sge_eq *eq)
3323218792Snp{
3324219292Snp	unsigned int cidx;
3325218792Snp
3326218792Snp	cidx = eq->spg->cidx;	/* stable snapshot */
3327228561Snp	cidx = be16toh(cidx);
3328218792Snp
3329218792Snp	if (cidx >= eq->cidx)
3330219292Snp		return (cidx - eq->cidx);
3331218792Snp	else
3332219292Snp		return (cidx + eq->cap - eq->cidx);
3333219292Snp}
3334218792Snp
3335219292Snp/*
3336219292Snp * There are "can_reclaim" tx descriptors ready to be reclaimed.  Reclaim as
3337219292Snp * many as possible but stop when there are around "n" mbufs to free.
3338219292Snp *
3339219292Snp * The actual number reclaimed is provided as the return value.
3340219292Snp */
3341219292Snpstatic int
3342220873Snpreclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
3343219292Snp{
3344219292Snp	struct tx_sdesc *txsd;
3345228561Snp	struct tx_maps *txmaps;
3346219292Snp	struct tx_map *txm;
3347219292Snp	unsigned int reclaimed, maps;
3348220873Snp	struct sge_eq *eq = &txq->eq;
3349218792Snp
3350228561Snp	TXQ_LOCK_ASSERT_OWNED(txq);
3351218792Snp
3352219292Snp	if (can_reclaim == 0)
3353219292Snp		can_reclaim = reclaimable(eq);
3354219292Snp
3355218792Snp	maps = reclaimed = 0;
3356219292Snp	while (can_reclaim && maps < n) {
3357218792Snp		int ndesc;
3358218792Snp
3359220873Snp		txsd = &txq->sdesc[eq->cidx];
3360218792Snp		ndesc = txsd->desc_used;
3361218792Snp
3362218792Snp		/* Firmware doesn't return "partial" credits. */
3363218792Snp		KASSERT(can_reclaim >= ndesc,
3364218792Snp		    ("%s: unexpected number of credits: %d, %d",
3365218792Snp		    __func__, can_reclaim, ndesc));
3366218792Snp
3367220873Snp		maps += txsd->credits;
3368219292Snp
3369218792Snp		reclaimed += ndesc;
3370219292Snp		can_reclaim -= ndesc;
3371218792Snp
3372218792Snp		eq->cidx += ndesc;
3373219292Snp		if (__predict_false(eq->cidx >= eq->cap))
3374218792Snp			eq->cidx -= eq->cap;
3375219292Snp	}
3376218792Snp
3377228561Snp	txmaps = &txq->txmaps;
3378228561Snp	txm = &txmaps->maps[txmaps->map_cidx];
3379219292Snp	if (maps)
3380219292Snp		prefetch(txm->m);
3381218792Snp
3382218792Snp	eq->avail += reclaimed;
3383218792Snp	KASSERT(eq->avail < eq->cap,	/* avail tops out at (cap - 1) */
3384218792Snp	    ("%s: too many descriptors available", __func__));
3385218792Snp
3386228561Snp	txmaps->map_avail += maps;
3387228561Snp	KASSERT(txmaps->map_avail <= txmaps->map_total,
3388218792Snp	    ("%s: too many maps available", __func__));
3389218792Snp
3390218792Snp	while (maps--) {
3391219292Snp		struct tx_map *next;
3392218792Snp
3393219292Snp		next = txm + 1;
3394228561Snp		if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total))
3395228561Snp			next = txmaps->maps;
3396219292Snp		prefetch(next->m);
3397219292Snp
3398220873Snp		bus_dmamap_unload(txq->tx_tag, txm->map);
3399218792Snp		m_freem(txm->m);
3400218792Snp		txm->m = NULL;
3401218792Snp
3402219292Snp		txm = next;
3403228561Snp		if (__predict_false(++txmaps->map_cidx == txmaps->map_total))
3404228561Snp			txmaps->map_cidx = 0;
3405218792Snp	}
3406218792Snp
3407218792Snp	return (reclaimed);
3408218792Snp}
3409218792Snp
3410218792Snpstatic void
3411218792Snpwrite_eqflush_wr(struct sge_eq *eq)
3412218792Snp{
3413218792Snp	struct fw_eq_flush_wr *wr;
3414218792Snp
3415218792Snp	EQ_LOCK_ASSERT_OWNED(eq);
3416218792Snp	KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
3417228561Snp	KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__));
3418218792Snp
3419218792Snp	wr = (void *)&eq->desc[eq->pidx];
3420218792Snp	bzero(wr, sizeof(*wr));
3421218792Snp	wr->opcode = FW_EQ_FLUSH_WR;
3422218792Snp	wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
3423218792Snp	    F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
3424218792Snp
3425228561Snp	eq->flags |= (EQ_CRFLUSHED | EQ_STALLED);
3426218792Snp	eq->pending++;
3427218792Snp	eq->avail--;
3428218792Snp	if (++eq->pidx == eq->cap)
3429218792Snp		eq->pidx = 0;
3430218792Snp}
3431218792Snp
3432218792Snpstatic __be64
3433218792Snpget_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
3434218792Snp{
3435218792Snp	int i = (idx / 3) * 2;
3436218792Snp
3437218792Snp	switch (idx % 3) {
3438218792Snp	case 0: {
3439218792Snp		__be64 rc;
3440218792Snp
3441218792Snp		rc = htobe32(sgl[i].ds_len);
3442218792Snp		if (i + 1 < nsegs)
3443218792Snp			rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32;
3444218792Snp
3445218792Snp		return (rc);
3446218792Snp	}
3447218792Snp	case 1:
3448218792Snp		return htobe64(sgl[i].ds_addr);
3449218792Snp	case 2:
3450218792Snp		return htobe64(sgl[i + 1].ds_addr);
3451218792Snp	}
3452218792Snp
3453218792Snp	return (0);
3454218792Snp}
3455218792Snp
3456218792Snpstatic void
3457228561Snpset_fl_tag_idx(struct sge_fl *fl, int bufsize)
3458218792Snp{
3459218792Snp	int i;
3460218792Snp
3461218792Snp	for (i = 0; i < FL_BUF_SIZES - 1; i++) {
3462228561Snp		if (FL_BUF_SIZE(i) >= bufsize)
3463218792Snp			break;
3464218792Snp	}
3465218792Snp
3466218792Snp	fl->tag_idx = i;
3467218792Snp}
3468219286Snp
3469222510Snpstatic void
3470228561Snpadd_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
3471222510Snp{
3472228561Snp	mtx_lock(&sc->sfl_lock);
3473228561Snp	FL_LOCK(fl);
3474228561Snp	if ((fl->flags & FL_DOOMED) == 0) {
3475228561Snp		fl->flags |= FL_STARVING;
3476228561Snp		TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
3477228561Snp		callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc);
3478222510Snp	}
3479228561Snp	FL_UNLOCK(fl);
3480228561Snp	mtx_unlock(&sc->sfl_lock);
3481222510Snp}
3482222510Snp
3483220873Snpstatic int
3484228561Snphandle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
3485228561Snp    struct mbuf *m)
3486220873Snp{
3487228561Snp	const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1);
3488228561Snp	unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
3489228561Snp	struct adapter *sc = iq->adapter;
3490228561Snp	struct sge *s = &sc->sge;
3491228561Snp	struct sge_eq *eq;
3492220873Snp
3493228561Snp	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
3494228561Snp	    rss->opcode));
3495220873Snp
3496228561Snp	eq = s->eqmap[qid - s->eq_start];
3497220873Snp	EQ_LOCK(eq);
3498228561Snp	KASSERT(eq->flags & EQ_CRFLUSHED,
3499228561Snp	    ("%s: unsolicited egress update", __func__));
3500228561Snp	eq->flags &= ~EQ_CRFLUSHED;
3501228561Snp	eq->egr_update++;
3502220873Snp
3503228561Snp	if (__predict_false(eq->flags & EQ_DOOMED))
3504228561Snp		wakeup_one(eq);
3505228561Snp	else if (eq->flags & EQ_STALLED && can_resume_tx(eq))
3506228561Snp		taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
3507228561Snp	EQ_UNLOCK(eq);
3508220873Snp
3509228561Snp	return (0);
3510228561Snp}
3511220873Snp
3512247291Snp/* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */
3513247291SnpCTASSERT(offsetof(struct cpl_fw4_msg, data) == \
3514247291Snp    offsetof(struct cpl_fw6_msg, data));
3515247291Snp
3516228561Snpstatic int
3517239336Snphandle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
3518228561Snp{
3519239336Snp	struct adapter *sc = iq->adapter;
3520228561Snp	const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
3521220873Snp
3522228561Snp	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
3523228561Snp	    rss->opcode));
3524220873Snp
3525247291Snp	if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) {
3526247291Snp		const struct rss_header *rss2;
3527247291Snp
3528247291Snp		rss2 = (const struct rss_header *)&cpl->data[0];
3529247291Snp		return (sc->cpl_handler[rss2->opcode](iq, rss2, m));
3530247291Snp	}
3531247291Snp
3532239336Snp	return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0]));
3533220873Snp}
3534221911Snp
3535221911Snpstatic int
3536222510Snpsysctl_uint16(SYSCTL_HANDLER_ARGS)
3537221911Snp{
3538221911Snp	uint16_t *id = arg1;
3539221911Snp	int i = *id;
3540221911Snp
3541221911Snp	return sysctl_handle_int(oidp, &i, 0, req);
3542221911Snp}
3543