Deleted Added
full compact
t4_sge.c (254727) t4_sge.c (255005)
1/*-
2 * Copyright (c) 2011 Chelsio Communications, Inc.
3 * All rights reserved.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2011 Chelsio Communications, Inc.
3 * All rights reserved.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_sge.c 254727 2013-08-23 18:03:18Z np $");
29__FBSDID("$FreeBSD: head/sys/dev/cxgbe/t4_sge.c 255005 2013-08-28 20:45:45Z np $");
30
31#include "opt_inet.h"
32#include "opt_inet6.h"
33
34#include <sys/types.h>
35#include <sys/mbuf.h>
36#include <sys/socket.h>
37#include <sys/kernel.h>
38#include <sys/kdb.h>
39#include <sys/malloc.h>
40#include <sys/queue.h>
41#include <sys/taskqueue.h>
42#include <sys/sysctl.h>
43#include <sys/smp.h>
44#include <net/bpf.h>
45#include <net/ethernet.h>
46#include <net/if.h>
47#include <net/if_vlan_var.h>
48#include <netinet/in.h>
49#include <netinet/ip.h>
50#include <netinet/ip6.h>
51#include <netinet/tcp.h>
52
53#include "common/common.h"
54#include "common/t4_regs.h"
55#include "common/t4_regs_values.h"
56#include "common/t4_msg.h"
57
58struct fl_buf_info {
59 int size;
60 int type;
61 uma_zone_t zone;
62};
63
64/* Filled up by t4_sge_modload */
65static struct fl_buf_info fl_buf_info[FL_BUF_SIZES];
66
67#define FL_BUF_SIZE(x) (fl_buf_info[x].size)
68#define FL_BUF_TYPE(x) (fl_buf_info[x].type)
69#define FL_BUF_ZONE(x) (fl_buf_info[x].zone)
70
71#ifdef T4_PKT_TIMESTAMP
72#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
73#else
74#define RX_COPY_THRESHOLD MINCLSIZE
75#endif
76
77/*
78 * Ethernet frames are DMA'd at this byte offset into the freelist buffer.
79 * 0-7 are valid values.
80 */
81static int fl_pktshift = 2;
82TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift);
83
84/*
85 * Pad ethernet payload up to this boundary.
86 * -1: driver should figure out a good value.
87 * Any power of 2, from 32 to 4096 (both inclusive) is a valid value.
88 */
89static int fl_pad = -1;
90TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad);
91
92/*
93 * Status page length.
94 * -1: driver should figure out a good value.
95 * 64 or 128 are the only other valid values.
96 */
97static int spg_len = -1;
98TUNABLE_INT("hw.cxgbe.spg_len", &spg_len);
99
100/*
101 * Congestion drops.
102 * -1: no congestion feedback (not recommended).
103 * 0: backpressure the channel instead of dropping packets right away.
104 * 1: no backpressure, drop packets for the congested queue immediately.
105 */
106static int cong_drop = 0;
107TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop);
108
109/* Used to track coalesced tx work request */
110struct txpkts {
111 uint64_t *flitp; /* ptr to flit where next pkt should start */
112 uint8_t npkt; /* # of packets in this work request */
113 uint8_t nflits; /* # of flits used by this work request */
114 uint16_t plen; /* total payload (sum of all packets) */
115};
116
117/* A packet's SGL. This + m_pkthdr has all info needed for tx */
118struct sgl {
119 int nsegs; /* # of segments in the SGL, 0 means imm. tx */
120 int nflits; /* # of flits needed for the SGL */
121 bus_dma_segment_t seg[TX_SGL_SEGS];
122};
123
124static int service_iq(struct sge_iq *, int);
125static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t,
126 int *);
127static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
128static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
129 int);
130static inline void init_fl(struct sge_fl *, int, int, char *);
131static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t,
132 char *);
133static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
134 bus_addr_t *, void **);
135static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
136 void *);
137static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
138 int, int);
139static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
140static int alloc_fwq(struct adapter *);
141static int free_fwq(struct adapter *);
142static int alloc_mgmtq(struct adapter *);
143static int free_mgmtq(struct adapter *);
144static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int,
145 struct sysctl_oid *);
146static int free_rxq(struct port_info *, struct sge_rxq *);
147#ifdef TCP_OFFLOAD
148static int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int,
149 struct sysctl_oid *);
150static int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *);
151#endif
152static int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
153static int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
154#ifdef TCP_OFFLOAD
155static int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
156#endif
157static int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *);
158static int free_eq(struct adapter *, struct sge_eq *);
159static int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *,
160 struct sysctl_oid *);
161static int free_wrq(struct adapter *, struct sge_wrq *);
162static int alloc_txq(struct port_info *, struct sge_txq *, int,
163 struct sysctl_oid *);
164static int free_txq(struct port_info *, struct sge_txq *);
165static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
166static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **);
167static inline void iq_next(struct sge_iq *);
168static inline void ring_fl_db(struct adapter *, struct sge_fl *);
169static int refill_fl(struct adapter *, struct sge_fl *, int);
170static void refill_sfl(void *);
171static int alloc_fl_sdesc(struct sge_fl *);
172static void free_fl_sdesc(struct sge_fl *);
173static void set_fl_tag_idx(struct sge_fl *, int);
174static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
175
176static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
177static int free_pkt_sgl(struct sge_txq *, struct sgl *);
178static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *,
179 struct sgl *);
180static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *,
181 struct mbuf *, struct sgl *);
182static void write_txpkts_wr(struct sge_txq *, struct txpkts *);
183static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *,
184 struct txpkts *, struct mbuf *, struct sgl *);
185static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *);
186static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
187static inline void ring_eq_db(struct adapter *, struct sge_eq *);
188static inline int reclaimable(struct sge_eq *);
189static int reclaim_tx_descs(struct sge_txq *, int, int);
190static void write_eqflush_wr(struct sge_eq *);
191static __be64 get_flit(bus_dma_segment_t *, int, int);
192static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
193 struct mbuf *);
194static int handle_fw_msg(struct sge_iq *, const struct rss_header *,
195 struct mbuf *);
196
197static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
198
199#if defined(__i386__) || defined(__amd64__)
200extern u_int cpu_clflush_line_size;
201#endif
202
203/*
204 * Called on MOD_LOAD. Fills up fl_buf_info[] and validates/calculates the SGE
205 * tunables.
206 */
207void
208t4_sge_modload(void)
209{
210 int i;
211 int bufsize[FL_BUF_SIZES] = {
212 MCLBYTES,
213#if MJUMPAGESIZE != MCLBYTES
214 MJUMPAGESIZE,
215#endif
216 MJUM9BYTES,
217 MJUM16BYTES
218 };
219
220 for (i = 0; i < FL_BUF_SIZES; i++) {
221 FL_BUF_SIZE(i) = bufsize[i];
222 FL_BUF_TYPE(i) = m_gettype(bufsize[i]);
223 FL_BUF_ZONE(i) = m_getzone(bufsize[i]);
224 }
225
226 if (fl_pktshift < 0 || fl_pktshift > 7) {
227 printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
228 " using 2 instead.\n", fl_pktshift);
229 fl_pktshift = 2;
230 }
231
232 if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) {
233 int pad;
234
235#if defined(__i386__) || defined(__amd64__)
236 pad = max(cpu_clflush_line_size, 32);
237#else
238 pad = max(CACHE_LINE_SIZE, 32);
239#endif
240 pad = min(pad, 4096);
241
242 if (fl_pad != -1) {
243 printf("Invalid hw.cxgbe.fl_pad value (%d),"
244 " using %d instead.\n", fl_pad, pad);
245 }
246 fl_pad = pad;
247 }
248
249 if (spg_len != 64 && spg_len != 128) {
250 int len;
251
252#if defined(__i386__) || defined(__amd64__)
253 len = cpu_clflush_line_size > 64 ? 128 : 64;
254#else
255 len = 64;
256#endif
257 if (spg_len != -1) {
258 printf("Invalid hw.cxgbe.spg_len value (%d),"
259 " using %d instead.\n", spg_len, len);
260 }
261 spg_len = len;
262 }
263
264 if (cong_drop < -1 || cong_drop > 1) {
265 printf("Invalid hw.cxgbe.cong_drop value (%d),"
266 " using 0 instead.\n", cong_drop);
267 cong_drop = 0;
268 }
269}
270
271void
272t4_init_sge_cpl_handlers(struct adapter *sc)
273{
274
275 t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_msg);
276 t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_msg);
277 t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
278 t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
279 t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
280}
281
282/*
283 * adap->params.vpd.cclk must be set up before this is called.
284 */
285void
286t4_tweak_chip_settings(struct adapter *sc)
287{
288 int i;
289 uint32_t v, m;
290 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
291 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk;
292 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
293 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
294
295 KASSERT(sc->flags & MASTER_PF,
296 ("%s: trying to change chip settings when not master.", __func__));
297
298 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE |
299 V_INGPADBOUNDARY(M_INGPADBOUNDARY) | F_EGRSTATUSPAGESIZE;
300 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
301 V_INGPADBOUNDARY(ilog2(fl_pad) - 5) |
302 V_EGRSTATUSPAGESIZE(spg_len == 128);
303 t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
304
305 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
306 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
307 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
308 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
309 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
310 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
311 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
312 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
313 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v);
314
315 for (i = 0; i < FL_BUF_SIZES; i++) {
316 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
317 FL_BUF_SIZE(i));
318 }
319
320 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) |
321 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]);
322 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v);
323
324 KASSERT(intr_timer[0] <= timer_max,
325 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0],
326 timer_max));
327 for (i = 1; i < nitems(intr_timer); i++) {
328 KASSERT(intr_timer[i] >= intr_timer[i - 1],
329 ("%s: timers not listed in increasing order (%d)",
330 __func__, i));
331
332 while (intr_timer[i] > timer_max) {
333 if (i == nitems(intr_timer) - 1) {
334 intr_timer[i] = timer_max;
335 break;
336 }
337 intr_timer[i] += intr_timer[i - 1];
338 intr_timer[i] /= 2;
339 }
340 }
341
342 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) |
343 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1]));
344 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v);
345 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) |
346 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3]));
347 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v);
348 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) |
349 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5]));
350 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v);
351
352 if (cong_drop == 0) {
353 m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
354 F_TUNNELCNGDROP3;
355 t4_set_reg_field(sc, A_TP_PARA_REG3, m, 0);
356 }
357
358 /* 4K, 16K, 64K, 256K DDP "page sizes" */
359 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
360 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v);
361
362 m = v = F_TDDPTAGTCB;
363 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v);
364
365 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
366 F_RESETDDPOFFSET;
367 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
368 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v);
369}
370
371/*
372 * XXX: driver really should be able to deal with unexpected settings.
373 */
374int
375t4_read_chip_settings(struct adapter *sc)
376{
377 struct sge *s = &sc->sge;
378 int i, rc = 0;
379 uint32_t m, v, r;
380 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
381
382 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE |
383 V_INGPADBOUNDARY(M_INGPADBOUNDARY) | F_EGRSTATUSPAGESIZE;
384 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
385 V_INGPADBOUNDARY(ilog2(fl_pad) - 5) |
386 V_EGRSTATUSPAGESIZE(spg_len == 128);
387 r = t4_read_reg(sc, A_SGE_CONTROL);
388 if ((r & m) != v) {
389 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r);
390 rc = EINVAL;
391 }
392
393 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
394 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
395 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
396 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
397 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
398 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
399 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
400 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
401 r = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE);
402 if (r != v) {
403 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r);
404 rc = EINVAL;
405 }
406
407 for (i = 0; i < FL_BUF_SIZES; i++) {
408 v = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
409 if (v != FL_BUF_SIZE(i)) {
410 device_printf(sc->dev,
411 "invalid SGE_FL_BUFFER_SIZE[%d](0x%x)\n", i, v);
412 rc = EINVAL;
413 }
414 }
415
416 r = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD);
417 s->counter_val[0] = G_THRESHOLD_0(r);
418 s->counter_val[1] = G_THRESHOLD_1(r);
419 s->counter_val[2] = G_THRESHOLD_2(r);
420 s->counter_val[3] = G_THRESHOLD_3(r);
421
422 r = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1);
423 s->timer_val[0] = G_TIMERVALUE0(r) / core_ticks_per_usec(sc);
424 s->timer_val[1] = G_TIMERVALUE1(r) / core_ticks_per_usec(sc);
425 r = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3);
426 s->timer_val[2] = G_TIMERVALUE2(r) / core_ticks_per_usec(sc);
427 s->timer_val[3] = G_TIMERVALUE3(r) / core_ticks_per_usec(sc);
428 r = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5);
429 s->timer_val[4] = G_TIMERVALUE4(r) / core_ticks_per_usec(sc);
430 s->timer_val[5] = G_TIMERVALUE5(r) / core_ticks_per_usec(sc);
431
432 if (cong_drop == 0) {
433 m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
434 F_TUNNELCNGDROP3;
435 r = t4_read_reg(sc, A_TP_PARA_REG3);
436 if (r & m) {
437 device_printf(sc->dev,
438 "invalid TP_PARA_REG3(0x%x)\n", r);
439 rc = EINVAL;
440 }
441 }
442
443 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
444 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ);
445 if (r != v) {
446 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r);
447 rc = EINVAL;
448 }
449
450 m = v = F_TDDPTAGTCB;
451 r = t4_read_reg(sc, A_ULP_RX_CTL);
452 if ((r & m) != v) {
453 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r);
454 rc = EINVAL;
455 }
456
457 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
458 F_RESETDDPOFFSET;
459 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
460 r = t4_read_reg(sc, A_TP_PARA_REG5);
461 if ((r & m) != v) {
462 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r);
463 rc = EINVAL;
464 }
465
466 r = t4_read_reg(sc, A_SGE_CONM_CTRL);
467 s->fl_starve_threshold = G_EGRTHRESHOLD(r) * 2 + 1;
468
469 if (is_t5(sc)) {
470 r = t4_read_reg(sc, A_SGE_EGRESS_QUEUES_PER_PAGE_PF);
471 r >>= S_QUEUESPERPAGEPF0 +
472 (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf;
473 s->s_qpp = r & M_QUEUESPERPAGEPF0;
474 }
475
476 t4_init_tp_params(sc);
477
478 t4_read_mtu_tbl(sc, sc->params.mtus, NULL);
479 t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd);
480
481 return (rc);
482}
483
484int
485t4_create_dma_tag(struct adapter *sc)
486{
487 int rc;
488
489 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
490 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
491 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
492 NULL, &sc->dmat);
493 if (rc != 0) {
494 device_printf(sc->dev,
495 "failed to create main DMA tag: %d\n", rc);
496 }
497
498 return (rc);
499}
500
501void
502t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
503 struct sysctl_oid_list *children)
504{
505
506 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD,
507 NULL, fl_pktshift, "payload DMA offset in rx buffer (bytes)");
508
509 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD,
510 NULL, fl_pad, "payload pad boundary (bytes)");
511
512 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD,
513 NULL, spg_len, "status page size (bytes)");
514
515 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD,
516 NULL, cong_drop, "congestion drop setting");
517}
518
519int
520t4_destroy_dma_tag(struct adapter *sc)
521{
522 if (sc->dmat)
523 bus_dma_tag_destroy(sc->dmat);
524
525 return (0);
526}
527
528/*
529 * Allocate and initialize the firmware event queue and the management queue.
530 *
531 * Returns errno on failure. Resources allocated up to that point may still be
532 * allocated. Caller is responsible for cleanup in case this function fails.
533 */
534int
535t4_setup_adapter_queues(struct adapter *sc)
536{
537 int rc;
538
539 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
540
541 sysctl_ctx_init(&sc->ctx);
542 sc->flags |= ADAP_SYSCTL_CTX;
543
544 /*
545 * Firmware event queue
546 */
547 rc = alloc_fwq(sc);
548 if (rc != 0)
549 return (rc);
550
551 /*
552 * Management queue. This is just a control queue that uses the fwq as
553 * its associated iq.
554 */
555 rc = alloc_mgmtq(sc);
556
557 return (rc);
558}
559
560/*
561 * Idempotent
562 */
563int
564t4_teardown_adapter_queues(struct adapter *sc)
565{
566
567 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
568
569 /* Do this before freeing the queue */
570 if (sc->flags & ADAP_SYSCTL_CTX) {
571 sysctl_ctx_free(&sc->ctx);
572 sc->flags &= ~ADAP_SYSCTL_CTX;
573 }
574
575 free_mgmtq(sc);
576 free_fwq(sc);
577
578 return (0);
579}
580
581static inline int
582first_vector(struct port_info *pi)
583{
584 struct adapter *sc = pi->adapter;
585 int rc = T4_EXTRA_INTR, i;
586
587 if (sc->intr_count == 1)
588 return (0);
589
590 for_each_port(sc, i) {
591 struct port_info *p = sc->port[i];
592
593 if (i == pi->port_id)
594 break;
595
596#ifdef TCP_OFFLOAD
597 if (sc->flags & INTR_DIRECT)
598 rc += p->nrxq + p->nofldrxq;
599 else
600 rc += max(p->nrxq, p->nofldrxq);
601#else
602 /*
603 * Not compiled with offload support and intr_count > 1. Only
604 * NIC queues exist and they'd better be taking direct
605 * interrupts.
606 */
607 KASSERT(sc->flags & INTR_DIRECT,
608 ("%s: intr_count %d, !INTR_DIRECT", __func__,
609 sc->intr_count));
610
611 rc += p->nrxq;
612#endif
613 }
614
615 return (rc);
616}
617
618/*
619 * Given an arbitrary "index," come up with an iq that can be used by other
620 * queues (of this port) for interrupt forwarding, SGE egress updates, etc.
621 * The iq returned is guaranteed to be something that takes direct interrupts.
622 */
623static struct sge_iq *
624port_intr_iq(struct port_info *pi, int idx)
625{
626 struct adapter *sc = pi->adapter;
627 struct sge *s = &sc->sge;
628 struct sge_iq *iq = NULL;
629
630 if (sc->intr_count == 1)
631 return (&sc->sge.fwq);
632
633#ifdef TCP_OFFLOAD
634 if (sc->flags & INTR_DIRECT) {
635 idx %= pi->nrxq + pi->nofldrxq;
636
637 if (idx >= pi->nrxq) {
638 idx -= pi->nrxq;
639 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
640 } else
641 iq = &s->rxq[pi->first_rxq + idx].iq;
642
643 } else {
644 idx %= max(pi->nrxq, pi->nofldrxq);
645
646 if (pi->nrxq >= pi->nofldrxq)
647 iq = &s->rxq[pi->first_rxq + idx].iq;
648 else
649 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
650 }
651#else
652 /*
653 * Not compiled with offload support and intr_count > 1. Only NIC
654 * queues exist and they'd better be taking direct interrupts.
655 */
656 KASSERT(sc->flags & INTR_DIRECT,
657 ("%s: intr_count %d, !INTR_DIRECT", __func__, sc->intr_count));
658
659 idx %= pi->nrxq;
660 iq = &s->rxq[pi->first_rxq + idx].iq;
661#endif
662
663 KASSERT(iq->flags & IQ_INTR, ("%s: EDOOFUS", __func__));
664 return (iq);
665}
666
667static inline int
668mtu_to_bufsize(int mtu)
669{
670 int bufsize;
671
672 /* large enough for a frame even when VLAN extraction is disabled */
673 bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu;
674 bufsize = roundup2(bufsize + fl_pktshift, fl_pad);
675
676 return (bufsize);
677}
678
679#ifdef TCP_OFFLOAD
680static inline int
681mtu_to_bufsize_toe(struct adapter *sc, int mtu)
682{
683
684 if (sc->tt.rx_coalesce)
685 return (G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)));
686
687 return (mtu);
688}
689#endif
690
691int
692t4_setup_port_queues(struct port_info *pi)
693{
694 int rc = 0, i, j, intr_idx, iqid;
695 struct sge_rxq *rxq;
696 struct sge_txq *txq;
697 struct sge_wrq *ctrlq;
698#ifdef TCP_OFFLOAD
699 struct sge_ofld_rxq *ofld_rxq;
700 struct sge_wrq *ofld_txq;
701 struct sysctl_oid *oid2 = NULL;
702#endif
703 char name[16];
704 struct adapter *sc = pi->adapter;
705 struct ifnet *ifp = pi->ifp;
706 struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
707 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
708 int bufsize;
709
710 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
711 NULL, "rx queues");
712
713#ifdef TCP_OFFLOAD
714 if (is_offload(sc)) {
715 oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq",
716 CTLFLAG_RD, NULL,
717 "rx queues for offloaded TCP connections");
718 }
719#endif
720
721 /* Interrupt vector to start from (when using multiple vectors) */
722 intr_idx = first_vector(pi);
723
724 /*
725 * First pass over all rx queues (NIC and TOE):
726 * a) initialize iq and fl
727 * b) allocate queue iff it will take direct interrupts.
728 */
729 bufsize = mtu_to_bufsize(ifp->if_mtu);
730 for_each_rxq(pi, i, rxq) {
731
732 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq,
733 RX_IQ_ESIZE);
734
735 snprintf(name, sizeof(name), "%s rxq%d-fl",
736 device_get_nameunit(pi->dev), i);
737 init_fl(&rxq->fl, pi->qsize_rxq / 8, bufsize, name);
738
739 if (sc->flags & INTR_DIRECT
740#ifdef TCP_OFFLOAD
741 || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq)
742#endif
743 ) {
744 rxq->iq.flags |= IQ_INTR;
745 rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
746 if (rc != 0)
747 goto done;
748 intr_idx++;
749 }
750 }
751
752#ifdef TCP_OFFLOAD
753 bufsize = mtu_to_bufsize_toe(sc, ifp->if_mtu);
754 for_each_ofld_rxq(pi, i, ofld_rxq) {
755
756 init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
757 pi->qsize_rxq, RX_IQ_ESIZE);
758
759 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
760 device_get_nameunit(pi->dev), i);
761 init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, bufsize, name);
762
763 if (sc->flags & INTR_DIRECT ||
764 (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
765 ofld_rxq->iq.flags |= IQ_INTR;
766 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
767 if (rc != 0)
768 goto done;
769 intr_idx++;
770 }
771 }
772#endif
773
774 /*
775 * Second pass over all rx queues (NIC and TOE). The queues forwarding
776 * their interrupts are allocated now.
777 */
778 j = 0;
779 for_each_rxq(pi, i, rxq) {
780 if (rxq->iq.flags & IQ_INTR)
781 continue;
782
783 intr_idx = port_intr_iq(pi, j)->abs_id;
784
785 rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
786 if (rc != 0)
787 goto done;
788 j++;
789 }
790
791#ifdef TCP_OFFLOAD
792 for_each_ofld_rxq(pi, i, ofld_rxq) {
793 if (ofld_rxq->iq.flags & IQ_INTR)
794 continue;
795
796 intr_idx = port_intr_iq(pi, j)->abs_id;
797
798 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
799 if (rc != 0)
800 goto done;
801 j++;
802 }
803#endif
804
805 /*
806 * Now the tx queues. Only one pass needed.
807 */
808 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD,
809 NULL, "tx queues");
810 j = 0;
811 for_each_txq(pi, i, txq) {
812 uint16_t iqid;
813
814 iqid = port_intr_iq(pi, j)->cntxt_id;
815
816 snprintf(name, sizeof(name), "%s txq%d",
817 device_get_nameunit(pi->dev), i);
818 init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid,
819 name);
820
821 rc = alloc_txq(pi, txq, i, oid);
822 if (rc != 0)
823 goto done;
824 j++;
825 }
826
827#ifdef TCP_OFFLOAD
828 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq",
829 CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections");
830 for_each_ofld_txq(pi, i, ofld_txq) {
831 uint16_t iqid;
832
833 iqid = port_intr_iq(pi, j)->cntxt_id;
834
835 snprintf(name, sizeof(name), "%s ofld_txq%d",
836 device_get_nameunit(pi->dev), i);
837 init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan,
838 iqid, name);
839
840 snprintf(name, sizeof(name), "%d", i);
841 oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
842 name, CTLFLAG_RD, NULL, "offload tx queue");
843
844 rc = alloc_wrq(sc, pi, ofld_txq, oid2);
845 if (rc != 0)
846 goto done;
847 j++;
848 }
849#endif
850
851 /*
852 * Finally, the control queue.
853 */
854 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD,
855 NULL, "ctrl queue");
856 ctrlq = &sc->sge.ctrlq[pi->port_id];
857 iqid = port_intr_iq(pi, 0)->cntxt_id;
858 snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev));
859 init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name);
860 rc = alloc_wrq(sc, pi, ctrlq, oid);
861
862done:
863 if (rc)
864 t4_teardown_port_queues(pi);
865
866 return (rc);
867}
868
869/*
870 * Idempotent
871 */
872int
873t4_teardown_port_queues(struct port_info *pi)
874{
875 int i;
876 struct adapter *sc = pi->adapter;
877 struct sge_rxq *rxq;
878 struct sge_txq *txq;
879#ifdef TCP_OFFLOAD
880 struct sge_ofld_rxq *ofld_rxq;
881 struct sge_wrq *ofld_txq;
882#endif
883
884 /* Do this before freeing the queues */
885 if (pi->flags & PORT_SYSCTL_CTX) {
886 sysctl_ctx_free(&pi->ctx);
887 pi->flags &= ~PORT_SYSCTL_CTX;
888 }
889
890 /*
891 * Take down all the tx queues first, as they reference the rx queues
892 * (for egress updates, etc.).
893 */
894
895 free_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
896
897 for_each_txq(pi, i, txq) {
898 free_txq(pi, txq);
899 }
900
901#ifdef TCP_OFFLOAD
902 for_each_ofld_txq(pi, i, ofld_txq) {
903 free_wrq(sc, ofld_txq);
904 }
905#endif
906
907 /*
908 * Then take down the rx queues that forward their interrupts, as they
909 * reference other rx queues.
910 */
911
912 for_each_rxq(pi, i, rxq) {
913 if ((rxq->iq.flags & IQ_INTR) == 0)
914 free_rxq(pi, rxq);
915 }
916
917#ifdef TCP_OFFLOAD
918 for_each_ofld_rxq(pi, i, ofld_rxq) {
919 if ((ofld_rxq->iq.flags & IQ_INTR) == 0)
920 free_ofld_rxq(pi, ofld_rxq);
921 }
922#endif
923
924 /*
925 * Then take down the rx queues that take direct interrupts.
926 */
927
928 for_each_rxq(pi, i, rxq) {
929 if (rxq->iq.flags & IQ_INTR)
930 free_rxq(pi, rxq);
931 }
932
933#ifdef TCP_OFFLOAD
934 for_each_ofld_rxq(pi, i, ofld_rxq) {
935 if (ofld_rxq->iq.flags & IQ_INTR)
936 free_ofld_rxq(pi, ofld_rxq);
937 }
938#endif
939
940 return (0);
941}
942
943/*
944 * Deals with errors and the firmware event queue. All data rx queues forward
945 * their interrupt to the firmware event queue.
946 */
947void
948t4_intr_all(void *arg)
949{
950 struct adapter *sc = arg;
951 struct sge_iq *fwq = &sc->sge.fwq;
952
953 t4_intr_err(arg);
954 if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) {
955 service_iq(fwq, 0);
956 atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE);
957 }
958}
959
960/* Deals with error interrupts */
961void
962t4_intr_err(void *arg)
963{
964 struct adapter *sc = arg;
965
966 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
967 t4_slow_intr_handler(sc);
968}
969
970void
971t4_intr_evt(void *arg)
972{
973 struct sge_iq *iq = arg;
974
975 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
976 service_iq(iq, 0);
977 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
978 }
979}
980
981void
982t4_intr(void *arg)
983{
984 struct sge_iq *iq = arg;
985
986 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
987 service_iq(iq, 0);
988 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
989 }
990}
991
992/*
993 * Deals with anything and everything on the given ingress queue.
994 */
995static int
996service_iq(struct sge_iq *iq, int budget)
997{
998 struct sge_iq *q;
999 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */
1000 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */
1001 struct adapter *sc = iq->adapter;
1002 struct rsp_ctrl *ctrl;
1003 const struct rss_header *rss;
1004 int ndescs = 0, limit, fl_bufs_used = 0;
1005 int rsp_type;
1006 uint32_t lq;
1007 struct mbuf *m0;
1008 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
1009
1010 limit = budget ? budget : iq->qsize / 8;
1011
1012 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
1013
1014 /*
1015 * We always come back and check the descriptor ring for new indirect
1016 * interrupts and other responses after running a single handler.
1017 */
1018 for (;;) {
1019 while (is_new_response(iq, &ctrl)) {
1020
1021 rmb();
1022
1023 m0 = NULL;
1024 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
1025 lq = be32toh(ctrl->pldbuflen_qid);
1026 rss = (const void *)iq->cdesc;
1027
1028 switch (rsp_type) {
1029 case X_RSPD_TYPE_FLBUF:
1030
1031 KASSERT(iq->flags & IQ_HAS_FL,
1032 ("%s: data for an iq (%p) with no freelist",
1033 __func__, iq));
1034
1035 m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used);
1036#ifdef T4_PKT_TIMESTAMP
1037 /*
1038 * 60 bit timestamp for the payload is
1039 * *(uint64_t *)m0->m_pktdat. Note that it is
1040 * in the leading free-space in the mbuf. The
1041 * kernel can clobber it during a pullup,
1042 * m_copymdata, etc. You need to make sure that
1043 * the mbuf reaches you unmolested if you care
1044 * about the timestamp.
1045 */
1046 *(uint64_t *)m0->m_pktdat =
1047 be64toh(ctrl->u.last_flit) &
1048 0xfffffffffffffff;
1049#endif
1050
1051 /* fall through */
1052
1053 case X_RSPD_TYPE_CPL:
1054 KASSERT(rss->opcode < NUM_CPL_CMDS,
1055 ("%s: bad opcode %02x.", __func__,
1056 rss->opcode));
1057 sc->cpl_handler[rss->opcode](iq, rss, m0);
1058 break;
1059
1060 case X_RSPD_TYPE_INTR:
1061
1062 /*
1063 * Interrupts should be forwarded only to queues
1064 * that are not forwarding their interrupts.
1065 * This means service_iq can recurse but only 1
1066 * level deep.
1067 */
1068 KASSERT(budget == 0,
1069 ("%s: budget %u, rsp_type %u", __func__,
1070 budget, rsp_type));
1071
30
31#include "opt_inet.h"
32#include "opt_inet6.h"
33
34#include <sys/types.h>
35#include <sys/mbuf.h>
36#include <sys/socket.h>
37#include <sys/kernel.h>
38#include <sys/kdb.h>
39#include <sys/malloc.h>
40#include <sys/queue.h>
41#include <sys/taskqueue.h>
42#include <sys/sysctl.h>
43#include <sys/smp.h>
44#include <net/bpf.h>
45#include <net/ethernet.h>
46#include <net/if.h>
47#include <net/if_vlan_var.h>
48#include <netinet/in.h>
49#include <netinet/ip.h>
50#include <netinet/ip6.h>
51#include <netinet/tcp.h>
52
53#include "common/common.h"
54#include "common/t4_regs.h"
55#include "common/t4_regs_values.h"
56#include "common/t4_msg.h"
57
58struct fl_buf_info {
59 int size;
60 int type;
61 uma_zone_t zone;
62};
63
64/* Filled up by t4_sge_modload */
65static struct fl_buf_info fl_buf_info[FL_BUF_SIZES];
66
67#define FL_BUF_SIZE(x) (fl_buf_info[x].size)
68#define FL_BUF_TYPE(x) (fl_buf_info[x].type)
69#define FL_BUF_ZONE(x) (fl_buf_info[x].zone)
70
71#ifdef T4_PKT_TIMESTAMP
72#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
73#else
74#define RX_COPY_THRESHOLD MINCLSIZE
75#endif
76
77/*
78 * Ethernet frames are DMA'd at this byte offset into the freelist buffer.
79 * 0-7 are valid values.
80 */
81static int fl_pktshift = 2;
82TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift);
83
84/*
85 * Pad ethernet payload up to this boundary.
86 * -1: driver should figure out a good value.
87 * Any power of 2, from 32 to 4096 (both inclusive) is a valid value.
88 */
89static int fl_pad = -1;
90TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad);
91
92/*
93 * Status page length.
94 * -1: driver should figure out a good value.
95 * 64 or 128 are the only other valid values.
96 */
97static int spg_len = -1;
98TUNABLE_INT("hw.cxgbe.spg_len", &spg_len);
99
100/*
101 * Congestion drops.
102 * -1: no congestion feedback (not recommended).
103 * 0: backpressure the channel instead of dropping packets right away.
104 * 1: no backpressure, drop packets for the congested queue immediately.
105 */
106static int cong_drop = 0;
107TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop);
108
109/* Used to track coalesced tx work request */
110struct txpkts {
111 uint64_t *flitp; /* ptr to flit where next pkt should start */
112 uint8_t npkt; /* # of packets in this work request */
113 uint8_t nflits; /* # of flits used by this work request */
114 uint16_t plen; /* total payload (sum of all packets) */
115};
116
117/* A packet's SGL. This + m_pkthdr has all info needed for tx */
118struct sgl {
119 int nsegs; /* # of segments in the SGL, 0 means imm. tx */
120 int nflits; /* # of flits needed for the SGL */
121 bus_dma_segment_t seg[TX_SGL_SEGS];
122};
123
124static int service_iq(struct sge_iq *, int);
125static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t,
126 int *);
127static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
128static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
129 int);
130static inline void init_fl(struct sge_fl *, int, int, char *);
131static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t,
132 char *);
133static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
134 bus_addr_t *, void **);
135static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
136 void *);
137static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
138 int, int);
139static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
140static int alloc_fwq(struct adapter *);
141static int free_fwq(struct adapter *);
142static int alloc_mgmtq(struct adapter *);
143static int free_mgmtq(struct adapter *);
144static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int,
145 struct sysctl_oid *);
146static int free_rxq(struct port_info *, struct sge_rxq *);
147#ifdef TCP_OFFLOAD
148static int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int,
149 struct sysctl_oid *);
150static int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *);
151#endif
152static int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
153static int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
154#ifdef TCP_OFFLOAD
155static int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
156#endif
157static int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *);
158static int free_eq(struct adapter *, struct sge_eq *);
159static int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *,
160 struct sysctl_oid *);
161static int free_wrq(struct adapter *, struct sge_wrq *);
162static int alloc_txq(struct port_info *, struct sge_txq *, int,
163 struct sysctl_oid *);
164static int free_txq(struct port_info *, struct sge_txq *);
165static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
166static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **);
167static inline void iq_next(struct sge_iq *);
168static inline void ring_fl_db(struct adapter *, struct sge_fl *);
169static int refill_fl(struct adapter *, struct sge_fl *, int);
170static void refill_sfl(void *);
171static int alloc_fl_sdesc(struct sge_fl *);
172static void free_fl_sdesc(struct sge_fl *);
173static void set_fl_tag_idx(struct sge_fl *, int);
174static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
175
176static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
177static int free_pkt_sgl(struct sge_txq *, struct sgl *);
178static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *,
179 struct sgl *);
180static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *,
181 struct mbuf *, struct sgl *);
182static void write_txpkts_wr(struct sge_txq *, struct txpkts *);
183static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *,
184 struct txpkts *, struct mbuf *, struct sgl *);
185static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *);
186static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
187static inline void ring_eq_db(struct adapter *, struct sge_eq *);
188static inline int reclaimable(struct sge_eq *);
189static int reclaim_tx_descs(struct sge_txq *, int, int);
190static void write_eqflush_wr(struct sge_eq *);
191static __be64 get_flit(bus_dma_segment_t *, int, int);
192static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
193 struct mbuf *);
194static int handle_fw_msg(struct sge_iq *, const struct rss_header *,
195 struct mbuf *);
196
197static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
198
199#if defined(__i386__) || defined(__amd64__)
200extern u_int cpu_clflush_line_size;
201#endif
202
203/*
204 * Called on MOD_LOAD. Fills up fl_buf_info[] and validates/calculates the SGE
205 * tunables.
206 */
207void
208t4_sge_modload(void)
209{
210 int i;
211 int bufsize[FL_BUF_SIZES] = {
212 MCLBYTES,
213#if MJUMPAGESIZE != MCLBYTES
214 MJUMPAGESIZE,
215#endif
216 MJUM9BYTES,
217 MJUM16BYTES
218 };
219
220 for (i = 0; i < FL_BUF_SIZES; i++) {
221 FL_BUF_SIZE(i) = bufsize[i];
222 FL_BUF_TYPE(i) = m_gettype(bufsize[i]);
223 FL_BUF_ZONE(i) = m_getzone(bufsize[i]);
224 }
225
226 if (fl_pktshift < 0 || fl_pktshift > 7) {
227 printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
228 " using 2 instead.\n", fl_pktshift);
229 fl_pktshift = 2;
230 }
231
232 if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) {
233 int pad;
234
235#if defined(__i386__) || defined(__amd64__)
236 pad = max(cpu_clflush_line_size, 32);
237#else
238 pad = max(CACHE_LINE_SIZE, 32);
239#endif
240 pad = min(pad, 4096);
241
242 if (fl_pad != -1) {
243 printf("Invalid hw.cxgbe.fl_pad value (%d),"
244 " using %d instead.\n", fl_pad, pad);
245 }
246 fl_pad = pad;
247 }
248
249 if (spg_len != 64 && spg_len != 128) {
250 int len;
251
252#if defined(__i386__) || defined(__amd64__)
253 len = cpu_clflush_line_size > 64 ? 128 : 64;
254#else
255 len = 64;
256#endif
257 if (spg_len != -1) {
258 printf("Invalid hw.cxgbe.spg_len value (%d),"
259 " using %d instead.\n", spg_len, len);
260 }
261 spg_len = len;
262 }
263
264 if (cong_drop < -1 || cong_drop > 1) {
265 printf("Invalid hw.cxgbe.cong_drop value (%d),"
266 " using 0 instead.\n", cong_drop);
267 cong_drop = 0;
268 }
269}
270
271void
272t4_init_sge_cpl_handlers(struct adapter *sc)
273{
274
275 t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_msg);
276 t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_msg);
277 t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
278 t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
279 t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
280}
281
282/*
283 * adap->params.vpd.cclk must be set up before this is called.
284 */
285void
286t4_tweak_chip_settings(struct adapter *sc)
287{
288 int i;
289 uint32_t v, m;
290 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
291 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk;
292 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
293 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
294
295 KASSERT(sc->flags & MASTER_PF,
296 ("%s: trying to change chip settings when not master.", __func__));
297
298 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE |
299 V_INGPADBOUNDARY(M_INGPADBOUNDARY) | F_EGRSTATUSPAGESIZE;
300 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
301 V_INGPADBOUNDARY(ilog2(fl_pad) - 5) |
302 V_EGRSTATUSPAGESIZE(spg_len == 128);
303 t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
304
305 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
306 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
307 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
308 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
309 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
310 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
311 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
312 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
313 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v);
314
315 for (i = 0; i < FL_BUF_SIZES; i++) {
316 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
317 FL_BUF_SIZE(i));
318 }
319
320 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) |
321 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]);
322 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v);
323
324 KASSERT(intr_timer[0] <= timer_max,
325 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0],
326 timer_max));
327 for (i = 1; i < nitems(intr_timer); i++) {
328 KASSERT(intr_timer[i] >= intr_timer[i - 1],
329 ("%s: timers not listed in increasing order (%d)",
330 __func__, i));
331
332 while (intr_timer[i] > timer_max) {
333 if (i == nitems(intr_timer) - 1) {
334 intr_timer[i] = timer_max;
335 break;
336 }
337 intr_timer[i] += intr_timer[i - 1];
338 intr_timer[i] /= 2;
339 }
340 }
341
342 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) |
343 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1]));
344 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v);
345 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) |
346 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3]));
347 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v);
348 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) |
349 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5]));
350 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v);
351
352 if (cong_drop == 0) {
353 m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
354 F_TUNNELCNGDROP3;
355 t4_set_reg_field(sc, A_TP_PARA_REG3, m, 0);
356 }
357
358 /* 4K, 16K, 64K, 256K DDP "page sizes" */
359 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
360 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v);
361
362 m = v = F_TDDPTAGTCB;
363 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v);
364
365 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
366 F_RESETDDPOFFSET;
367 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
368 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v);
369}
370
371/*
372 * XXX: driver really should be able to deal with unexpected settings.
373 */
374int
375t4_read_chip_settings(struct adapter *sc)
376{
377 struct sge *s = &sc->sge;
378 int i, rc = 0;
379 uint32_t m, v, r;
380 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
381
382 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE |
383 V_INGPADBOUNDARY(M_INGPADBOUNDARY) | F_EGRSTATUSPAGESIZE;
384 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
385 V_INGPADBOUNDARY(ilog2(fl_pad) - 5) |
386 V_EGRSTATUSPAGESIZE(spg_len == 128);
387 r = t4_read_reg(sc, A_SGE_CONTROL);
388 if ((r & m) != v) {
389 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r);
390 rc = EINVAL;
391 }
392
393 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
394 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
395 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
396 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
397 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
398 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
399 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
400 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
401 r = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE);
402 if (r != v) {
403 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r);
404 rc = EINVAL;
405 }
406
407 for (i = 0; i < FL_BUF_SIZES; i++) {
408 v = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
409 if (v != FL_BUF_SIZE(i)) {
410 device_printf(sc->dev,
411 "invalid SGE_FL_BUFFER_SIZE[%d](0x%x)\n", i, v);
412 rc = EINVAL;
413 }
414 }
415
416 r = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD);
417 s->counter_val[0] = G_THRESHOLD_0(r);
418 s->counter_val[1] = G_THRESHOLD_1(r);
419 s->counter_val[2] = G_THRESHOLD_2(r);
420 s->counter_val[3] = G_THRESHOLD_3(r);
421
422 r = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1);
423 s->timer_val[0] = G_TIMERVALUE0(r) / core_ticks_per_usec(sc);
424 s->timer_val[1] = G_TIMERVALUE1(r) / core_ticks_per_usec(sc);
425 r = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3);
426 s->timer_val[2] = G_TIMERVALUE2(r) / core_ticks_per_usec(sc);
427 s->timer_val[3] = G_TIMERVALUE3(r) / core_ticks_per_usec(sc);
428 r = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5);
429 s->timer_val[4] = G_TIMERVALUE4(r) / core_ticks_per_usec(sc);
430 s->timer_val[5] = G_TIMERVALUE5(r) / core_ticks_per_usec(sc);
431
432 if (cong_drop == 0) {
433 m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
434 F_TUNNELCNGDROP3;
435 r = t4_read_reg(sc, A_TP_PARA_REG3);
436 if (r & m) {
437 device_printf(sc->dev,
438 "invalid TP_PARA_REG3(0x%x)\n", r);
439 rc = EINVAL;
440 }
441 }
442
443 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6);
444 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ);
445 if (r != v) {
446 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r);
447 rc = EINVAL;
448 }
449
450 m = v = F_TDDPTAGTCB;
451 r = t4_read_reg(sc, A_ULP_RX_CTL);
452 if ((r & m) != v) {
453 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r);
454 rc = EINVAL;
455 }
456
457 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET |
458 F_RESETDDPOFFSET;
459 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET;
460 r = t4_read_reg(sc, A_TP_PARA_REG5);
461 if ((r & m) != v) {
462 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r);
463 rc = EINVAL;
464 }
465
466 r = t4_read_reg(sc, A_SGE_CONM_CTRL);
467 s->fl_starve_threshold = G_EGRTHRESHOLD(r) * 2 + 1;
468
469 if (is_t5(sc)) {
470 r = t4_read_reg(sc, A_SGE_EGRESS_QUEUES_PER_PAGE_PF);
471 r >>= S_QUEUESPERPAGEPF0 +
472 (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf;
473 s->s_qpp = r & M_QUEUESPERPAGEPF0;
474 }
475
476 t4_init_tp_params(sc);
477
478 t4_read_mtu_tbl(sc, sc->params.mtus, NULL);
479 t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd);
480
481 return (rc);
482}
483
484int
485t4_create_dma_tag(struct adapter *sc)
486{
487 int rc;
488
489 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
490 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
491 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
492 NULL, &sc->dmat);
493 if (rc != 0) {
494 device_printf(sc->dev,
495 "failed to create main DMA tag: %d\n", rc);
496 }
497
498 return (rc);
499}
500
501void
502t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
503 struct sysctl_oid_list *children)
504{
505
506 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD,
507 NULL, fl_pktshift, "payload DMA offset in rx buffer (bytes)");
508
509 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD,
510 NULL, fl_pad, "payload pad boundary (bytes)");
511
512 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD,
513 NULL, spg_len, "status page size (bytes)");
514
515 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD,
516 NULL, cong_drop, "congestion drop setting");
517}
518
519int
520t4_destroy_dma_tag(struct adapter *sc)
521{
522 if (sc->dmat)
523 bus_dma_tag_destroy(sc->dmat);
524
525 return (0);
526}
527
528/*
529 * Allocate and initialize the firmware event queue and the management queue.
530 *
531 * Returns errno on failure. Resources allocated up to that point may still be
532 * allocated. Caller is responsible for cleanup in case this function fails.
533 */
534int
535t4_setup_adapter_queues(struct adapter *sc)
536{
537 int rc;
538
539 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
540
541 sysctl_ctx_init(&sc->ctx);
542 sc->flags |= ADAP_SYSCTL_CTX;
543
544 /*
545 * Firmware event queue
546 */
547 rc = alloc_fwq(sc);
548 if (rc != 0)
549 return (rc);
550
551 /*
552 * Management queue. This is just a control queue that uses the fwq as
553 * its associated iq.
554 */
555 rc = alloc_mgmtq(sc);
556
557 return (rc);
558}
559
560/*
561 * Idempotent
562 */
563int
564t4_teardown_adapter_queues(struct adapter *sc)
565{
566
567 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
568
569 /* Do this before freeing the queue */
570 if (sc->flags & ADAP_SYSCTL_CTX) {
571 sysctl_ctx_free(&sc->ctx);
572 sc->flags &= ~ADAP_SYSCTL_CTX;
573 }
574
575 free_mgmtq(sc);
576 free_fwq(sc);
577
578 return (0);
579}
580
581static inline int
582first_vector(struct port_info *pi)
583{
584 struct adapter *sc = pi->adapter;
585 int rc = T4_EXTRA_INTR, i;
586
587 if (sc->intr_count == 1)
588 return (0);
589
590 for_each_port(sc, i) {
591 struct port_info *p = sc->port[i];
592
593 if (i == pi->port_id)
594 break;
595
596#ifdef TCP_OFFLOAD
597 if (sc->flags & INTR_DIRECT)
598 rc += p->nrxq + p->nofldrxq;
599 else
600 rc += max(p->nrxq, p->nofldrxq);
601#else
602 /*
603 * Not compiled with offload support and intr_count > 1. Only
604 * NIC queues exist and they'd better be taking direct
605 * interrupts.
606 */
607 KASSERT(sc->flags & INTR_DIRECT,
608 ("%s: intr_count %d, !INTR_DIRECT", __func__,
609 sc->intr_count));
610
611 rc += p->nrxq;
612#endif
613 }
614
615 return (rc);
616}
617
618/*
619 * Given an arbitrary "index," come up with an iq that can be used by other
620 * queues (of this port) for interrupt forwarding, SGE egress updates, etc.
621 * The iq returned is guaranteed to be something that takes direct interrupts.
622 */
623static struct sge_iq *
624port_intr_iq(struct port_info *pi, int idx)
625{
626 struct adapter *sc = pi->adapter;
627 struct sge *s = &sc->sge;
628 struct sge_iq *iq = NULL;
629
630 if (sc->intr_count == 1)
631 return (&sc->sge.fwq);
632
633#ifdef TCP_OFFLOAD
634 if (sc->flags & INTR_DIRECT) {
635 idx %= pi->nrxq + pi->nofldrxq;
636
637 if (idx >= pi->nrxq) {
638 idx -= pi->nrxq;
639 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
640 } else
641 iq = &s->rxq[pi->first_rxq + idx].iq;
642
643 } else {
644 idx %= max(pi->nrxq, pi->nofldrxq);
645
646 if (pi->nrxq >= pi->nofldrxq)
647 iq = &s->rxq[pi->first_rxq + idx].iq;
648 else
649 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
650 }
651#else
652 /*
653 * Not compiled with offload support and intr_count > 1. Only NIC
654 * queues exist and they'd better be taking direct interrupts.
655 */
656 KASSERT(sc->flags & INTR_DIRECT,
657 ("%s: intr_count %d, !INTR_DIRECT", __func__, sc->intr_count));
658
659 idx %= pi->nrxq;
660 iq = &s->rxq[pi->first_rxq + idx].iq;
661#endif
662
663 KASSERT(iq->flags & IQ_INTR, ("%s: EDOOFUS", __func__));
664 return (iq);
665}
666
667static inline int
668mtu_to_bufsize(int mtu)
669{
670 int bufsize;
671
672 /* large enough for a frame even when VLAN extraction is disabled */
673 bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu;
674 bufsize = roundup2(bufsize + fl_pktshift, fl_pad);
675
676 return (bufsize);
677}
678
679#ifdef TCP_OFFLOAD
680static inline int
681mtu_to_bufsize_toe(struct adapter *sc, int mtu)
682{
683
684 if (sc->tt.rx_coalesce)
685 return (G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)));
686
687 return (mtu);
688}
689#endif
690
691int
692t4_setup_port_queues(struct port_info *pi)
693{
694 int rc = 0, i, j, intr_idx, iqid;
695 struct sge_rxq *rxq;
696 struct sge_txq *txq;
697 struct sge_wrq *ctrlq;
698#ifdef TCP_OFFLOAD
699 struct sge_ofld_rxq *ofld_rxq;
700 struct sge_wrq *ofld_txq;
701 struct sysctl_oid *oid2 = NULL;
702#endif
703 char name[16];
704 struct adapter *sc = pi->adapter;
705 struct ifnet *ifp = pi->ifp;
706 struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
707 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
708 int bufsize;
709
710 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
711 NULL, "rx queues");
712
713#ifdef TCP_OFFLOAD
714 if (is_offload(sc)) {
715 oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq",
716 CTLFLAG_RD, NULL,
717 "rx queues for offloaded TCP connections");
718 }
719#endif
720
721 /* Interrupt vector to start from (when using multiple vectors) */
722 intr_idx = first_vector(pi);
723
724 /*
725 * First pass over all rx queues (NIC and TOE):
726 * a) initialize iq and fl
727 * b) allocate queue iff it will take direct interrupts.
728 */
729 bufsize = mtu_to_bufsize(ifp->if_mtu);
730 for_each_rxq(pi, i, rxq) {
731
732 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq,
733 RX_IQ_ESIZE);
734
735 snprintf(name, sizeof(name), "%s rxq%d-fl",
736 device_get_nameunit(pi->dev), i);
737 init_fl(&rxq->fl, pi->qsize_rxq / 8, bufsize, name);
738
739 if (sc->flags & INTR_DIRECT
740#ifdef TCP_OFFLOAD
741 || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq)
742#endif
743 ) {
744 rxq->iq.flags |= IQ_INTR;
745 rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
746 if (rc != 0)
747 goto done;
748 intr_idx++;
749 }
750 }
751
752#ifdef TCP_OFFLOAD
753 bufsize = mtu_to_bufsize_toe(sc, ifp->if_mtu);
754 for_each_ofld_rxq(pi, i, ofld_rxq) {
755
756 init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
757 pi->qsize_rxq, RX_IQ_ESIZE);
758
759 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
760 device_get_nameunit(pi->dev), i);
761 init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, bufsize, name);
762
763 if (sc->flags & INTR_DIRECT ||
764 (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
765 ofld_rxq->iq.flags |= IQ_INTR;
766 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
767 if (rc != 0)
768 goto done;
769 intr_idx++;
770 }
771 }
772#endif
773
774 /*
775 * Second pass over all rx queues (NIC and TOE). The queues forwarding
776 * their interrupts are allocated now.
777 */
778 j = 0;
779 for_each_rxq(pi, i, rxq) {
780 if (rxq->iq.flags & IQ_INTR)
781 continue;
782
783 intr_idx = port_intr_iq(pi, j)->abs_id;
784
785 rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
786 if (rc != 0)
787 goto done;
788 j++;
789 }
790
791#ifdef TCP_OFFLOAD
792 for_each_ofld_rxq(pi, i, ofld_rxq) {
793 if (ofld_rxq->iq.flags & IQ_INTR)
794 continue;
795
796 intr_idx = port_intr_iq(pi, j)->abs_id;
797
798 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
799 if (rc != 0)
800 goto done;
801 j++;
802 }
803#endif
804
805 /*
806 * Now the tx queues. Only one pass needed.
807 */
808 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD,
809 NULL, "tx queues");
810 j = 0;
811 for_each_txq(pi, i, txq) {
812 uint16_t iqid;
813
814 iqid = port_intr_iq(pi, j)->cntxt_id;
815
816 snprintf(name, sizeof(name), "%s txq%d",
817 device_get_nameunit(pi->dev), i);
818 init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid,
819 name);
820
821 rc = alloc_txq(pi, txq, i, oid);
822 if (rc != 0)
823 goto done;
824 j++;
825 }
826
827#ifdef TCP_OFFLOAD
828 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq",
829 CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections");
830 for_each_ofld_txq(pi, i, ofld_txq) {
831 uint16_t iqid;
832
833 iqid = port_intr_iq(pi, j)->cntxt_id;
834
835 snprintf(name, sizeof(name), "%s ofld_txq%d",
836 device_get_nameunit(pi->dev), i);
837 init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan,
838 iqid, name);
839
840 snprintf(name, sizeof(name), "%d", i);
841 oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
842 name, CTLFLAG_RD, NULL, "offload tx queue");
843
844 rc = alloc_wrq(sc, pi, ofld_txq, oid2);
845 if (rc != 0)
846 goto done;
847 j++;
848 }
849#endif
850
851 /*
852 * Finally, the control queue.
853 */
854 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD,
855 NULL, "ctrl queue");
856 ctrlq = &sc->sge.ctrlq[pi->port_id];
857 iqid = port_intr_iq(pi, 0)->cntxt_id;
858 snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev));
859 init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name);
860 rc = alloc_wrq(sc, pi, ctrlq, oid);
861
862done:
863 if (rc)
864 t4_teardown_port_queues(pi);
865
866 return (rc);
867}
868
869/*
870 * Idempotent
871 */
872int
873t4_teardown_port_queues(struct port_info *pi)
874{
875 int i;
876 struct adapter *sc = pi->adapter;
877 struct sge_rxq *rxq;
878 struct sge_txq *txq;
879#ifdef TCP_OFFLOAD
880 struct sge_ofld_rxq *ofld_rxq;
881 struct sge_wrq *ofld_txq;
882#endif
883
884 /* Do this before freeing the queues */
885 if (pi->flags & PORT_SYSCTL_CTX) {
886 sysctl_ctx_free(&pi->ctx);
887 pi->flags &= ~PORT_SYSCTL_CTX;
888 }
889
890 /*
891 * Take down all the tx queues first, as they reference the rx queues
892 * (for egress updates, etc.).
893 */
894
895 free_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
896
897 for_each_txq(pi, i, txq) {
898 free_txq(pi, txq);
899 }
900
901#ifdef TCP_OFFLOAD
902 for_each_ofld_txq(pi, i, ofld_txq) {
903 free_wrq(sc, ofld_txq);
904 }
905#endif
906
907 /*
908 * Then take down the rx queues that forward their interrupts, as they
909 * reference other rx queues.
910 */
911
912 for_each_rxq(pi, i, rxq) {
913 if ((rxq->iq.flags & IQ_INTR) == 0)
914 free_rxq(pi, rxq);
915 }
916
917#ifdef TCP_OFFLOAD
918 for_each_ofld_rxq(pi, i, ofld_rxq) {
919 if ((ofld_rxq->iq.flags & IQ_INTR) == 0)
920 free_ofld_rxq(pi, ofld_rxq);
921 }
922#endif
923
924 /*
925 * Then take down the rx queues that take direct interrupts.
926 */
927
928 for_each_rxq(pi, i, rxq) {
929 if (rxq->iq.flags & IQ_INTR)
930 free_rxq(pi, rxq);
931 }
932
933#ifdef TCP_OFFLOAD
934 for_each_ofld_rxq(pi, i, ofld_rxq) {
935 if (ofld_rxq->iq.flags & IQ_INTR)
936 free_ofld_rxq(pi, ofld_rxq);
937 }
938#endif
939
940 return (0);
941}
942
943/*
944 * Deals with errors and the firmware event queue. All data rx queues forward
945 * their interrupt to the firmware event queue.
946 */
947void
948t4_intr_all(void *arg)
949{
950 struct adapter *sc = arg;
951 struct sge_iq *fwq = &sc->sge.fwq;
952
953 t4_intr_err(arg);
954 if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) {
955 service_iq(fwq, 0);
956 atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE);
957 }
958}
959
960/* Deals with error interrupts */
961void
962t4_intr_err(void *arg)
963{
964 struct adapter *sc = arg;
965
966 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
967 t4_slow_intr_handler(sc);
968}
969
970void
971t4_intr_evt(void *arg)
972{
973 struct sge_iq *iq = arg;
974
975 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
976 service_iq(iq, 0);
977 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
978 }
979}
980
981void
982t4_intr(void *arg)
983{
984 struct sge_iq *iq = arg;
985
986 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
987 service_iq(iq, 0);
988 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
989 }
990}
991
992/*
993 * Deals with anything and everything on the given ingress queue.
994 */
995static int
996service_iq(struct sge_iq *iq, int budget)
997{
998 struct sge_iq *q;
999 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */
1000 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */
1001 struct adapter *sc = iq->adapter;
1002 struct rsp_ctrl *ctrl;
1003 const struct rss_header *rss;
1004 int ndescs = 0, limit, fl_bufs_used = 0;
1005 int rsp_type;
1006 uint32_t lq;
1007 struct mbuf *m0;
1008 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
1009
1010 limit = budget ? budget : iq->qsize / 8;
1011
1012 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
1013
1014 /*
1015 * We always come back and check the descriptor ring for new indirect
1016 * interrupts and other responses after running a single handler.
1017 */
1018 for (;;) {
1019 while (is_new_response(iq, &ctrl)) {
1020
1021 rmb();
1022
1023 m0 = NULL;
1024 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
1025 lq = be32toh(ctrl->pldbuflen_qid);
1026 rss = (const void *)iq->cdesc;
1027
1028 switch (rsp_type) {
1029 case X_RSPD_TYPE_FLBUF:
1030
1031 KASSERT(iq->flags & IQ_HAS_FL,
1032 ("%s: data for an iq (%p) with no freelist",
1033 __func__, iq));
1034
1035 m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used);
1036#ifdef T4_PKT_TIMESTAMP
1037 /*
1038 * 60 bit timestamp for the payload is
1039 * *(uint64_t *)m0->m_pktdat. Note that it is
1040 * in the leading free-space in the mbuf. The
1041 * kernel can clobber it during a pullup,
1042 * m_copymdata, etc. You need to make sure that
1043 * the mbuf reaches you unmolested if you care
1044 * about the timestamp.
1045 */
1046 *(uint64_t *)m0->m_pktdat =
1047 be64toh(ctrl->u.last_flit) &
1048 0xfffffffffffffff;
1049#endif
1050
1051 /* fall through */
1052
1053 case X_RSPD_TYPE_CPL:
1054 KASSERT(rss->opcode < NUM_CPL_CMDS,
1055 ("%s: bad opcode %02x.", __func__,
1056 rss->opcode));
1057 sc->cpl_handler[rss->opcode](iq, rss, m0);
1058 break;
1059
1060 case X_RSPD_TYPE_INTR:
1061
1062 /*
1063 * Interrupts should be forwarded only to queues
1064 * that are not forwarding their interrupts.
1065 * This means service_iq can recurse but only 1
1066 * level deep.
1067 */
1068 KASSERT(budget == 0,
1069 ("%s: budget %u, rsp_type %u", __func__,
1070 budget, rsp_type));
1071
1072 /*
1073 * There are 1K interrupt-capable queues (qids 0
1074 * through 1023). A response type indicating a
1075 * forwarded interrupt with a qid >= 1K is an
1076 * iWARP async notification.
1077 */
1078 if (lq >= 1024) {
1079 sc->an_handler(iq, ctrl);
1080 break;
1081 }
1082
1072 q = sc->sge.iqmap[lq - sc->sge.iq_start];
1073 if (atomic_cmpset_int(&q->state, IQS_IDLE,
1074 IQS_BUSY)) {
1075 if (service_iq(q, q->qsize / 8) == 0) {
1076 atomic_cmpset_int(&q->state,
1077 IQS_BUSY, IQS_IDLE);
1078 } else {
1079 STAILQ_INSERT_TAIL(&iql, q,
1080 link);
1081 }
1082 }
1083 break;
1084
1085 default:
1083 q = sc->sge.iqmap[lq - sc->sge.iq_start];
1084 if (atomic_cmpset_int(&q->state, IQS_IDLE,
1085 IQS_BUSY)) {
1086 if (service_iq(q, q->qsize / 8) == 0) {
1087 atomic_cmpset_int(&q->state,
1088 IQS_BUSY, IQS_IDLE);
1089 } else {
1090 STAILQ_INSERT_TAIL(&iql, q,
1091 link);
1092 }
1093 }
1094 break;
1095
1096 default:
1086 sc->an_handler(iq, ctrl);
1097 KASSERT(0,
1098 ("%s: illegal response type %d on iq %p",
1099 __func__, rsp_type, iq));
1100 log(LOG_ERR,
1101 "%s: illegal response type %d on iq %p",
1102 device_get_nameunit(sc->dev), rsp_type, iq);
1087 break;
1088 }
1089
1090 iq_next(iq);
1091 if (++ndescs == limit) {
1092 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
1093 V_CIDXINC(ndescs) |
1094 V_INGRESSQID(iq->cntxt_id) |
1095 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
1096 ndescs = 0;
1097
1098 if (fl_bufs_used > 0) {
1099 FL_LOCK(fl);
1100 fl->needed += fl_bufs_used;
1101 refill_fl(sc, fl, fl->cap / 8);
1102 FL_UNLOCK(fl);
1103 fl_bufs_used = 0;
1104 }
1105
1106 if (budget)
1107 return (EINPROGRESS);
1108 }
1109 }
1110
1111 if (STAILQ_EMPTY(&iql))
1112 break;
1113
1114 /*
1115 * Process the head only, and send it to the back of the list if
1116 * it's still not done.
1117 */
1118 q = STAILQ_FIRST(&iql);
1119 STAILQ_REMOVE_HEAD(&iql, link);
1120 if (service_iq(q, q->qsize / 8) == 0)
1121 atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE);
1122 else
1123 STAILQ_INSERT_TAIL(&iql, q, link);
1124 }
1125
1126#if defined(INET) || defined(INET6)
1127 if (iq->flags & IQ_LRO_ENABLED) {
1128 struct lro_ctrl *lro = &rxq->lro;
1129 struct lro_entry *l;
1130
1131 while (!SLIST_EMPTY(&lro->lro_active)) {
1132 l = SLIST_FIRST(&lro->lro_active);
1133 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1134 tcp_lro_flush(lro, l);
1135 }
1136 }
1137#endif
1138
1139 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
1140 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
1141
1142 if (iq->flags & IQ_HAS_FL) {
1143 int starved;
1144
1145 FL_LOCK(fl);
1146 fl->needed += fl_bufs_used;
1147 starved = refill_fl(sc, fl, fl->cap / 4);
1148 FL_UNLOCK(fl);
1149 if (__predict_false(starved != 0))
1150 add_fl_to_sfl(sc, fl);
1151 }
1152
1153 return (0);
1154}
1155
1156static struct mbuf *
1157get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
1158 int *fl_bufs_used)
1159{
1160 struct mbuf *m0, *m;
1161 struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
1162 unsigned int nbuf, len;
1163
1164 /*
1165 * No assertion for the fl lock because we don't need it. This routine
1166 * is called only from the rx interrupt handler and it only updates
1167 * fl->cidx. (Contrast that with fl->pidx/fl->needed which could be
1168 * updated in the rx interrupt handler or the starvation helper routine.
1169 * That's why code that manipulates fl->pidx/fl->needed needs the fl
1170 * lock but this routine does not).
1171 */
1172
1173 if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0))
1174 panic("%s: cannot handle packed frames", __func__);
1175 len = G_RSPD_LEN(len_newbuf);
1176
1177 m0 = sd->m;
1178 sd->m = NULL; /* consumed */
1179
1180 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD);
1181 m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR);
1182#ifdef T4_PKT_TIMESTAMP
1183 /* Leave room for a timestamp */
1184 m0->m_data += 8;
1185#endif
1186
1187 if (len < RX_COPY_THRESHOLD) {
1188 /* copy data to mbuf, buffer will be recycled */
1189 bcopy(sd->cl, mtod(m0, caddr_t), len);
1190 m0->m_len = len;
1191 } else {
1192 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
1193 m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx));
1194 sd->cl = NULL; /* consumed */
1195 m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
1196 }
1197 m0->m_pkthdr.len = len;
1198
1199 sd++;
1200 if (__predict_false(++fl->cidx == fl->cap)) {
1201 sd = fl->sdesc;
1202 fl->cidx = 0;
1203 }
1204
1205 m = m0;
1206 len -= m->m_len;
1207 nbuf = 1; /* # of fl buffers used */
1208
1209 while (len > 0) {
1210 m->m_next = sd->m;
1211 sd->m = NULL; /* consumed */
1212 m = m->m_next;
1213
1214 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
1215 BUS_DMASYNC_POSTREAD);
1216
1217 m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
1218 if (len <= MLEN) {
1219 bcopy(sd->cl, mtod(m, caddr_t), len);
1220 m->m_len = len;
1221 } else {
1222 bus_dmamap_unload(fl->tag[sd->tag_idx],
1223 sd->map);
1224 m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx));
1225 sd->cl = NULL; /* consumed */
1226 m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
1227 }
1228
1229 sd++;
1230 if (__predict_false(++fl->cidx == fl->cap)) {
1231 sd = fl->sdesc;
1232 fl->cidx = 0;
1233 }
1234
1235 len -= m->m_len;
1236 nbuf++;
1237 }
1238
1239 (*fl_bufs_used) += nbuf;
1240
1241 return (m0);
1242}
1243
1244static int
1245t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
1246{
1247 struct sge_rxq *rxq = iq_to_rxq(iq);
1248 struct ifnet *ifp = rxq->ifp;
1249 const struct cpl_rx_pkt *cpl = (const void *)(rss + 1);
1250#if defined(INET) || defined(INET6)
1251 struct lro_ctrl *lro = &rxq->lro;
1252#endif
1253
1254 KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__,
1255 rss->opcode));
1256
1257 m0->m_pkthdr.len -= fl_pktshift;
1258 m0->m_len -= fl_pktshift;
1259 m0->m_data += fl_pktshift;
1260
1261 m0->m_pkthdr.rcvif = ifp;
1262 m0->m_flags |= M_FLOWID;
1263 m0->m_pkthdr.flowid = rss->hash_val;
1264
1265 if (cpl->csum_calc && !cpl->err_vec) {
1266 if (ifp->if_capenable & IFCAP_RXCSUM &&
1267 cpl->l2info & htobe32(F_RXF_IP)) {
1268 m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
1269 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1270 rxq->rxcsum++;
1271 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
1272 cpl->l2info & htobe32(F_RXF_IP6)) {
1273 m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
1274 CSUM_PSEUDO_HDR);
1275 rxq->rxcsum++;
1276 }
1277
1278 if (__predict_false(cpl->ip_frag))
1279 m0->m_pkthdr.csum_data = be16toh(cpl->csum);
1280 else
1281 m0->m_pkthdr.csum_data = 0xffff;
1282 }
1283
1284 if (cpl->vlan_ex) {
1285 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
1286 m0->m_flags |= M_VLANTAG;
1287 rxq->vlan_extraction++;
1288 }
1289
1290#if defined(INET) || defined(INET6)
1291 if (cpl->l2info & htobe32(F_RXF_LRO) &&
1292 iq->flags & IQ_LRO_ENABLED &&
1293 tcp_lro_rx(lro, m0, 0) == 0) {
1294 /* queued for LRO */
1295 } else
1296#endif
1297 ifp->if_input(ifp, m0);
1298
1299 return (0);
1300}
1301
1302/*
1303 * Doesn't fail. Holds on to work requests it can't send right away.
1304 */
1305void
1306t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr)
1307{
1308 struct sge_eq *eq = &wrq->eq;
1309 int can_reclaim;
1310 caddr_t dst;
1311
1312 TXQ_LOCK_ASSERT_OWNED(wrq);
1313#ifdef TCP_OFFLOAD
1314 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD ||
1315 (eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1316 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1317#else
1318 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1319 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1320#endif
1321
1322 if (__predict_true(wr != NULL))
1323 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
1324
1325 can_reclaim = reclaimable(eq);
1326 if (__predict_false(eq->flags & EQ_STALLED)) {
1327 if (can_reclaim < tx_resume_threshold(eq))
1328 return;
1329 eq->flags &= ~EQ_STALLED;
1330 eq->unstalled++;
1331 }
1332 eq->cidx += can_reclaim;
1333 eq->avail += can_reclaim;
1334 if (__predict_false(eq->cidx >= eq->cap))
1335 eq->cidx -= eq->cap;
1336
1337 while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) {
1338 int ndesc;
1339
1340 if (__predict_false(wr->wr_len < 0 ||
1341 wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) {
1342
1343#ifdef INVARIANTS
1344 panic("%s: work request with length %d", __func__,
1345 wr->wr_len);
1346#endif
1347#ifdef KDB
1348 kdb_backtrace();
1349#endif
1350 log(LOG_ERR, "%s: %s work request with length %d",
1351 device_get_nameunit(sc->dev), __func__, wr->wr_len);
1352 STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1353 free_wrqe(wr);
1354 continue;
1355 }
1356
1357 ndesc = howmany(wr->wr_len, EQ_ESIZE);
1358 if (eq->avail < ndesc) {
1359 wrq->no_desc++;
1360 break;
1361 }
1362
1363 dst = (void *)&eq->desc[eq->pidx];
1364 copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len);
1365
1366 eq->pidx += ndesc;
1367 eq->avail -= ndesc;
1368 if (__predict_false(eq->pidx >= eq->cap))
1369 eq->pidx -= eq->cap;
1370
1371 eq->pending += ndesc;
1372 if (eq->pending >= 8)
1373 ring_eq_db(sc, eq);
1374
1375 wrq->tx_wrs++;
1376 STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1377 free_wrqe(wr);
1378
1379 if (eq->avail < 8) {
1380 can_reclaim = reclaimable(eq);
1381 eq->cidx += can_reclaim;
1382 eq->avail += can_reclaim;
1383 if (__predict_false(eq->cidx >= eq->cap))
1384 eq->cidx -= eq->cap;
1385 }
1386 }
1387
1388 if (eq->pending)
1389 ring_eq_db(sc, eq);
1390
1391 if (wr != NULL) {
1392 eq->flags |= EQ_STALLED;
1393 if (callout_pending(&eq->tx_callout) == 0)
1394 callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1395 }
1396}
1397
1398/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
1399#define TXPKTS_PKT_HDR ((\
1400 sizeof(struct ulp_txpkt) + \
1401 sizeof(struct ulptx_idata) + \
1402 sizeof(struct cpl_tx_pkt_core) \
1403 ) / 8)
1404
1405/* Header of a coalesced tx WR, before SGL of first packet (in flits) */
1406#define TXPKTS_WR_HDR (\
1407 sizeof(struct fw_eth_tx_pkts_wr) / 8 + \
1408 TXPKTS_PKT_HDR)
1409
1410/* Header of a tx WR, before SGL of first packet (in flits) */
1411#define TXPKT_WR_HDR ((\
1412 sizeof(struct fw_eth_tx_pkt_wr) + \
1413 sizeof(struct cpl_tx_pkt_core) \
1414 ) / 8 )
1415
1416/* Header of a tx LSO WR, before SGL of first packet (in flits) */
1417#define TXPKT_LSO_WR_HDR ((\
1418 sizeof(struct fw_eth_tx_pkt_wr) + \
1419 sizeof(struct cpl_tx_pkt_lso_core) + \
1420 sizeof(struct cpl_tx_pkt_core) \
1421 ) / 8 )
1422
1423int
1424t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
1425{
1426 struct port_info *pi = (void *)ifp->if_softc;
1427 struct adapter *sc = pi->adapter;
1428 struct sge_eq *eq = &txq->eq;
1429 struct buf_ring *br = txq->br;
1430 struct mbuf *next;
1431 int rc, coalescing, can_reclaim;
1432 struct txpkts txpkts;
1433 struct sgl sgl;
1434
1435 TXQ_LOCK_ASSERT_OWNED(txq);
1436 KASSERT(m, ("%s: called with nothing to do.", __func__));
1437 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH,
1438 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1439
1440 prefetch(&eq->desc[eq->pidx]);
1441 prefetch(&txq->sdesc[eq->pidx]);
1442
1443 txpkts.npkt = 0;/* indicates there's nothing in txpkts */
1444 coalescing = 0;
1445
1446 can_reclaim = reclaimable(eq);
1447 if (__predict_false(eq->flags & EQ_STALLED)) {
1448 if (can_reclaim < tx_resume_threshold(eq)) {
1449 txq->m = m;
1450 return (0);
1451 }
1452 eq->flags &= ~EQ_STALLED;
1453 eq->unstalled++;
1454 }
1455
1456 if (__predict_false(eq->flags & EQ_DOOMED)) {
1457 m_freem(m);
1458 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
1459 m_freem(m);
1460 return (ENETDOWN);
1461 }
1462
1463 if (eq->avail < 8 && can_reclaim)
1464 reclaim_tx_descs(txq, can_reclaim, 32);
1465
1466 for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
1467
1468 if (eq->avail < 8)
1469 break;
1470
1471 next = m->m_nextpkt;
1472 m->m_nextpkt = NULL;
1473
1474 if (next || buf_ring_peek(br))
1475 coalescing = 1;
1476
1477 rc = get_pkt_sgl(txq, &m, &sgl, coalescing);
1478 if (rc != 0) {
1479 if (rc == ENOMEM) {
1480
1481 /* Short of resources, suspend tx */
1482
1483 m->m_nextpkt = next;
1484 break;
1485 }
1486
1487 /*
1488 * Unrecoverable error for this packet, throw it away
1489 * and move on to the next. get_pkt_sgl may already
1490 * have freed m (it will be NULL in that case and the
1491 * m_freem here is still safe).
1492 */
1493
1494 m_freem(m);
1495 continue;
1496 }
1497
1498 if (coalescing &&
1499 add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) {
1500
1501 /* Successfully absorbed into txpkts */
1502
1503 write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl);
1504 goto doorbell;
1505 }
1506
1507 /*
1508 * We weren't coalescing to begin with, or current frame could
1509 * not be coalesced (add_to_txpkts flushes txpkts if a frame
1510 * given to it can't be coalesced). Either way there should be
1511 * nothing in txpkts.
1512 */
1513 KASSERT(txpkts.npkt == 0,
1514 ("%s: txpkts not empty: %d", __func__, txpkts.npkt));
1515
1516 /* We're sending out individual packets now */
1517 coalescing = 0;
1518
1519 if (eq->avail < 8)
1520 reclaim_tx_descs(txq, 0, 8);
1521 rc = write_txpkt_wr(pi, txq, m, &sgl);
1522 if (rc != 0) {
1523
1524 /* Short of hardware descriptors, suspend tx */
1525
1526 /*
1527 * This is an unlikely but expensive failure. We've
1528 * done all the hard work (DMA mappings etc.) and now we
1529 * can't send out the packet. What's worse, we have to
1530 * spend even more time freeing up everything in sgl.
1531 */
1532 txq->no_desc++;
1533 free_pkt_sgl(txq, &sgl);
1534
1535 m->m_nextpkt = next;
1536 break;
1537 }
1538
1539 ETHER_BPF_MTAP(ifp, m);
1540 if (sgl.nsegs == 0)
1541 m_freem(m);
1542doorbell:
1543 if (eq->pending >= 8)
1544 ring_eq_db(sc, eq);
1545
1546 can_reclaim = reclaimable(eq);
1547 if (can_reclaim >= 32)
1548 reclaim_tx_descs(txq, can_reclaim, 64);
1549 }
1550
1551 if (txpkts.npkt > 0)
1552 write_txpkts_wr(txq, &txpkts);
1553
1554 /*
1555 * m not NULL means there was an error but we haven't thrown it away.
1556 * This can happen when we're short of tx descriptors (no_desc) or maybe
1557 * even DMA maps (no_dmamap). Either way, a credit flush and reclaim
1558 * will get things going again.
1559 */
1560 if (m && !(eq->flags & EQ_CRFLUSHED)) {
1561 struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
1562
1563 /*
1564 * If EQ_CRFLUSHED is not set then we know we have at least one
1565 * available descriptor because any WR that reduces eq->avail to
1566 * 0 also sets EQ_CRFLUSHED.
1567 */
1568 KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__));
1569
1570 txsd->desc_used = 1;
1571 txsd->credits = 0;
1572 write_eqflush_wr(eq);
1573 }
1574 txq->m = m;
1575
1576 if (eq->pending)
1577 ring_eq_db(sc, eq);
1578
1579 reclaim_tx_descs(txq, 0, 128);
1580
1581 if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0)
1582 callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1583
1584 return (0);
1585}
1586
1587void
1588t4_update_fl_bufsize(struct ifnet *ifp)
1589{
1590 struct port_info *pi = ifp->if_softc;
1591 struct sge_rxq *rxq;
1592#ifdef TCP_OFFLOAD
1593 struct sge_ofld_rxq *ofld_rxq;
1594#endif
1595 struct sge_fl *fl;
1596 int i, bufsize;
1597
1598 bufsize = mtu_to_bufsize(ifp->if_mtu);
1599 for_each_rxq(pi, i, rxq) {
1600 fl = &rxq->fl;
1601
1602 FL_LOCK(fl);
1603 set_fl_tag_idx(fl, bufsize);
1604 FL_UNLOCK(fl);
1605 }
1606#ifdef TCP_OFFLOAD
1607 bufsize = mtu_to_bufsize_toe(pi->adapter, ifp->if_mtu);
1608 for_each_ofld_rxq(pi, i, ofld_rxq) {
1609 fl = &ofld_rxq->fl;
1610
1611 FL_LOCK(fl);
1612 set_fl_tag_idx(fl, bufsize);
1613 FL_UNLOCK(fl);
1614 }
1615#endif
1616}
1617
1618int
1619can_resume_tx(struct sge_eq *eq)
1620{
1621 return (reclaimable(eq) >= tx_resume_threshold(eq));
1622}
1623
1624static inline void
1625init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
1626 int qsize, int esize)
1627{
1628 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
1629 ("%s: bad tmr_idx %d", __func__, tmr_idx));
1630 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */
1631 ("%s: bad pktc_idx %d", __func__, pktc_idx));
1632
1633 iq->flags = 0;
1634 iq->adapter = sc;
1635 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx);
1636 iq->intr_pktc_idx = SGE_NCOUNTERS - 1;
1637 if (pktc_idx >= 0) {
1638 iq->intr_params |= F_QINTR_CNT_EN;
1639 iq->intr_pktc_idx = pktc_idx;
1640 }
1641 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */
1642 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */
1643}
1644
1645static inline void
1646init_fl(struct sge_fl *fl, int qsize, int bufsize, char *name)
1647{
1648 fl->qsize = qsize;
1649 strlcpy(fl->lockname, name, sizeof(fl->lockname));
1650 set_fl_tag_idx(fl, bufsize);
1651}
1652
1653static inline void
1654init_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan,
1655 uint16_t iqid, char *name)
1656{
1657 KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan));
1658 KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype));
1659
1660 eq->flags = eqtype & EQ_TYPEMASK;
1661 eq->tx_chan = tx_chan;
1662 eq->iqid = iqid;
1663 eq->qsize = qsize;
1664 strlcpy(eq->lockname, name, sizeof(eq->lockname));
1665
1666 TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq);
1667 callout_init(&eq->tx_callout, CALLOUT_MPSAFE);
1668}
1669
1670static int
1671alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
1672 bus_dmamap_t *map, bus_addr_t *pa, void **va)
1673{
1674 int rc;
1675
1676 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
1677 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
1678 if (rc != 0) {
1679 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc);
1680 goto done;
1681 }
1682
1683 rc = bus_dmamem_alloc(*tag, va,
1684 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
1685 if (rc != 0) {
1686 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc);
1687 goto done;
1688 }
1689
1690 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
1691 if (rc != 0) {
1692 device_printf(sc->dev, "cannot load DMA map: %d\n", rc);
1693 goto done;
1694 }
1695done:
1696 if (rc)
1697 free_ring(sc, *tag, *map, *pa, *va);
1698
1699 return (rc);
1700}
1701
1702static int
1703free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
1704 bus_addr_t pa, void *va)
1705{
1706 if (pa)
1707 bus_dmamap_unload(tag, map);
1708 if (va)
1709 bus_dmamem_free(tag, va, map);
1710 if (tag)
1711 bus_dma_tag_destroy(tag);
1712
1713 return (0);
1714}
1715
1716/*
1717 * Allocates the ring for an ingress queue and an optional freelist. If the
1718 * freelist is specified it will be allocated and then associated with the
1719 * ingress queue.
1720 *
1721 * Returns errno on failure. Resources allocated up to that point may still be
1722 * allocated. Caller is responsible for cleanup in case this function fails.
1723 *
1724 * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then
1725 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies
1726 * the abs_id of the ingress queue to which its interrupts should be forwarded.
1727 */
1728static int
1729alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
1730 int intr_idx, int cong)
1731{
1732 int rc, i, cntxt_id;
1733 size_t len;
1734 struct fw_iq_cmd c;
1735 struct adapter *sc = iq->adapter;
1736 __be32 v = 0;
1737
1738 len = iq->qsize * iq->esize;
1739 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
1740 (void **)&iq->desc);
1741 if (rc != 0)
1742 return (rc);
1743
1744 bzero(&c, sizeof(c));
1745 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
1746 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
1747 V_FW_IQ_CMD_VFN(0));
1748
1749 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
1750 FW_LEN16(c));
1751
1752 /* Special handling for firmware event queue */
1753 if (iq == &sc->sge.fwq)
1754 v |= F_FW_IQ_CMD_IQASYNCH;
1755
1756 if (iq->flags & IQ_INTR) {
1757 KASSERT(intr_idx < sc->intr_count,
1758 ("%s: invalid direct intr_idx %d", __func__, intr_idx));
1759 } else
1760 v |= F_FW_IQ_CMD_IQANDST;
1761 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
1762
1763 c.type_to_iqandstindex = htobe32(v |
1764 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
1765 V_FW_IQ_CMD_VIID(pi->viid) |
1766 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
1767 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
1768 F_FW_IQ_CMD_IQGTSMODE |
1769 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
1770 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
1771 c.iqsize = htobe16(iq->qsize);
1772 c.iqaddr = htobe64(iq->ba);
1773 if (cong >= 0)
1774 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
1775
1776 if (fl) {
1777 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
1778
1779 for (i = 0; i < FL_BUF_SIZES; i++) {
1780
1781 /*
1782 * A freelist buffer must be 16 byte aligned as the SGE
1783 * uses the low 4 bits of the bus addr to figure out the
1784 * buffer size.
1785 */
1786 rc = bus_dma_tag_create(sc->dmat, 16, 0,
1787 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1788 FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW,
1789 NULL, NULL, &fl->tag[i]);
1790 if (rc != 0) {
1791 device_printf(sc->dev,
1792 "failed to create fl DMA tag[%d]: %d\n",
1793 i, rc);
1794 return (rc);
1795 }
1796 }
1797 len = fl->qsize * RX_FL_ESIZE;
1798 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
1799 &fl->ba, (void **)&fl->desc);
1800 if (rc)
1801 return (rc);
1802
1803 /* Allocate space for one software descriptor per buffer. */
1804 fl->cap = (fl->qsize - spg_len / RX_FL_ESIZE) * 8;
1805 rc = alloc_fl_sdesc(fl);
1806 if (rc != 0) {
1807 device_printf(sc->dev,
1808 "failed to setup fl software descriptors: %d\n",
1809 rc);
1810 return (rc);
1811 }
1812 fl->needed = fl->cap;
1813 fl->lowat = roundup2(sc->sge.fl_starve_threshold, 8);
1814
1815 c.iqns_to_fl0congen |=
1816 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
1817 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
1818 F_FW_IQ_CMD_FL0PADEN);
1819 if (cong >= 0) {
1820 c.iqns_to_fl0congen |=
1821 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
1822 F_FW_IQ_CMD_FL0CONGCIF |
1823 F_FW_IQ_CMD_FL0CONGEN);
1824 }
1825 c.fl0dcaen_to_fl0cidxfthresh =
1826 htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
1827 V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
1828 c.fl0size = htobe16(fl->qsize);
1829 c.fl0addr = htobe64(fl->ba);
1830 }
1831
1832 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1833 if (rc != 0) {
1834 device_printf(sc->dev,
1835 "failed to create ingress queue: %d\n", rc);
1836 return (rc);
1837 }
1838
1839 iq->cdesc = iq->desc;
1840 iq->cidx = 0;
1841 iq->gen = 1;
1842 iq->intr_next = iq->intr_params;
1843 iq->cntxt_id = be16toh(c.iqid);
1844 iq->abs_id = be16toh(c.physiqid);
1845 iq->flags |= IQ_ALLOCATED;
1846
1847 cntxt_id = iq->cntxt_id - sc->sge.iq_start;
1848 if (cntxt_id >= sc->sge.niq) {
1849 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
1850 cntxt_id, sc->sge.niq - 1);
1851 }
1852 sc->sge.iqmap[cntxt_id] = iq;
1853
1854 if (fl) {
1855 fl->cntxt_id = be16toh(c.fl0id);
1856 fl->pidx = fl->cidx = 0;
1857
1858 cntxt_id = fl->cntxt_id - sc->sge.eq_start;
1859 if (cntxt_id >= sc->sge.neq) {
1860 panic("%s: fl->cntxt_id (%d) more than the max (%d)",
1861 __func__, cntxt_id, sc->sge.neq - 1);
1862 }
1863 sc->sge.eqmap[cntxt_id] = (void *)fl;
1864
1865 FL_LOCK(fl);
1866 /* Enough to make sure the SGE doesn't think it's starved */
1867 refill_fl(sc, fl, fl->lowat);
1868 FL_UNLOCK(fl);
1869
1870 iq->flags |= IQ_HAS_FL;
1871 }
1872
1873 if (is_t5(sc) && cong >= 0) {
1874 uint32_t param, val;
1875
1876 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1877 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
1878 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id);
1879 if (cong == 0)
1880 val = 1 << 19;
1881 else {
1882 val = 2 << 19;
1883 for (i = 0; i < 4; i++) {
1884 if (cong & (1 << i))
1885 val |= 1 << (i << 2);
1886 }
1887 }
1888
1889 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
1890 if (rc != 0) {
1891 /* report error but carry on */
1892 device_printf(sc->dev,
1893 "failed to set congestion manager context for "
1894 "ingress queue %d: %d\n", iq->cntxt_id, rc);
1895 }
1896 }
1897
1898 /* Enable IQ interrupts */
1899 atomic_store_rel_int(&iq->state, IQS_IDLE);
1900 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
1901 V_INGRESSQID(iq->cntxt_id));
1902
1903 return (0);
1904}
1905
1906static int
1907free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
1908{
1909 int i, rc;
1910 struct adapter *sc = iq->adapter;
1911 device_t dev;
1912
1913 if (sc == NULL)
1914 return (0); /* nothing to do */
1915
1916 dev = pi ? pi->dev : sc->dev;
1917
1918 if (iq->flags & IQ_ALLOCATED) {
1919 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
1920 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
1921 fl ? fl->cntxt_id : 0xffff, 0xffff);
1922 if (rc != 0) {
1923 device_printf(dev,
1924 "failed to free queue %p: %d\n", iq, rc);
1925 return (rc);
1926 }
1927 iq->flags &= ~IQ_ALLOCATED;
1928 }
1929
1930 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
1931
1932 bzero(iq, sizeof(*iq));
1933
1934 if (fl) {
1935 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba,
1936 fl->desc);
1937
1938 if (fl->sdesc)
1939 free_fl_sdesc(fl);
1940
1941 if (mtx_initialized(&fl->fl_lock))
1942 mtx_destroy(&fl->fl_lock);
1943
1944 for (i = 0; i < FL_BUF_SIZES; i++) {
1945 if (fl->tag[i])
1946 bus_dma_tag_destroy(fl->tag[i]);
1947 }
1948
1949 bzero(fl, sizeof(*fl));
1950 }
1951
1952 return (0);
1953}
1954
1955static int
1956alloc_fwq(struct adapter *sc)
1957{
1958 int rc, intr_idx;
1959 struct sge_iq *fwq = &sc->sge.fwq;
1960 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
1961 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
1962
1963 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE);
1964 fwq->flags |= IQ_INTR; /* always */
1965 intr_idx = sc->intr_count > 1 ? 1 : 0;
1966 rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
1967 if (rc != 0) {
1968 device_printf(sc->dev,
1969 "failed to create firmware event queue: %d\n", rc);
1970 return (rc);
1971 }
1972
1973 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD,
1974 NULL, "firmware event queue");
1975 children = SYSCTL_CHILDREN(oid);
1976
1977 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id",
1978 CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I",
1979 "absolute id of the queue");
1980 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id",
1981 CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I",
1982 "SGE context id of the queue");
1983 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
1984 CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I",
1985 "consumer index");
1986
1987 return (0);
1988}
1989
1990static int
1991free_fwq(struct adapter *sc)
1992{
1993 return free_iq_fl(NULL, &sc->sge.fwq, NULL);
1994}
1995
1996static int
1997alloc_mgmtq(struct adapter *sc)
1998{
1999 int rc;
2000 struct sge_wrq *mgmtq = &sc->sge.mgmtq;
2001 char name[16];
2002 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
2003 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2004
2005 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD,
2006 NULL, "management queue");
2007
2008 snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev));
2009 init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan,
2010 sc->sge.fwq.cntxt_id, name);
2011 rc = alloc_wrq(sc, NULL, mgmtq, oid);
2012 if (rc != 0) {
2013 device_printf(sc->dev,
2014 "failed to create management queue: %d\n", rc);
2015 return (rc);
2016 }
2017
2018 return (0);
2019}
2020
2021static int
2022free_mgmtq(struct adapter *sc)
2023{
2024
2025 return free_wrq(sc, &sc->sge.mgmtq);
2026}
2027
2028static inline int
2029tnl_cong(struct port_info *pi)
2030{
2031
2032 if (cong_drop == -1)
2033 return (-1);
2034 else if (cong_drop == 1)
2035 return (0);
2036 else
2037 return (1 << pi->tx_chan);
2038}
2039
2040static int
2041alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx,
2042 struct sysctl_oid *oid)
2043{
2044 int rc;
2045 struct sysctl_oid_list *children;
2046 char name[16];
2047
2048 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(pi));
2049 if (rc != 0)
2050 return (rc);
2051
2052 FL_LOCK(&rxq->fl);
2053 refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8);
2054 FL_UNLOCK(&rxq->fl);
2055
2056#if defined(INET) || defined(INET6)
2057 rc = tcp_lro_init(&rxq->lro);
2058 if (rc != 0)
2059 return (rc);
2060 rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */
2061
2062 if (pi->ifp->if_capenable & IFCAP_LRO)
2063 rxq->iq.flags |= IQ_LRO_ENABLED;
2064#endif
2065 rxq->ifp = pi->ifp;
2066
2067 children = SYSCTL_CHILDREN(oid);
2068
2069 snprintf(name, sizeof(name), "%d", idx);
2070 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2071 NULL, "rx queue");
2072 children = SYSCTL_CHILDREN(oid);
2073
2074 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
2075 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I",
2076 "absolute id of the queue");
2077 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2078 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I",
2079 "SGE context id of the queue");
2080 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2081 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I",
2082 "consumer index");
2083#if defined(INET) || defined(INET6)
2084 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
2085 &rxq->lro.lro_queued, 0, NULL);
2086 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
2087 &rxq->lro.lro_flushed, 0, NULL);
2088#endif
2089 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
2090 &rxq->rxcsum, "# of times hardware assisted with checksum");
2091 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction",
2092 CTLFLAG_RD, &rxq->vlan_extraction,
2093 "# of times hardware extracted 802.1Q tag");
2094
2095 children = SYSCTL_CHILDREN(oid);
2096 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
2097 NULL, "freelist");
2098 children = SYSCTL_CHILDREN(oid);
2099
2100 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2101 CTLTYPE_INT | CTLFLAG_RD, &rxq->fl.cntxt_id, 0, sysctl_uint16, "I",
2102 "SGE context id of the queue");
2103 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
2104 &rxq->fl.cidx, 0, "consumer index");
2105 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
2106 &rxq->fl.pidx, 0, "producer index");
2107
2108 return (rc);
2109}
2110
2111static int
2112free_rxq(struct port_info *pi, struct sge_rxq *rxq)
2113{
2114 int rc;
2115
2116#if defined(INET) || defined(INET6)
2117 if (rxq->lro.ifp) {
2118 tcp_lro_free(&rxq->lro);
2119 rxq->lro.ifp = NULL;
2120 }
2121#endif
2122
2123 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
2124 if (rc == 0)
2125 bzero(rxq, sizeof(*rxq));
2126
2127 return (rc);
2128}
2129
2130#ifdef TCP_OFFLOAD
2131static int
2132alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
2133 int intr_idx, int idx, struct sysctl_oid *oid)
2134{
2135 int rc;
2136 struct sysctl_oid_list *children;
2137 char name[16];
2138
2139 rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx,
2140 1 << pi->tx_chan);
2141 if (rc != 0)
2142 return (rc);
2143
2144 children = SYSCTL_CHILDREN(oid);
2145
2146 snprintf(name, sizeof(name), "%d", idx);
2147 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2148 NULL, "rx queue");
2149 children = SYSCTL_CHILDREN(oid);
2150
2151 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
2152 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16,
2153 "I", "absolute id of the queue");
2154 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2155 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16,
2156 "I", "SGE context id of the queue");
2157 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2158 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I",
2159 "consumer index");
2160
2161 children = SYSCTL_CHILDREN(oid);
2162 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
2163 NULL, "freelist");
2164 children = SYSCTL_CHILDREN(oid);
2165
2166 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2167 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->fl.cntxt_id, 0, sysctl_uint16,
2168 "I", "SGE context id of the queue");
2169 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
2170 &ofld_rxq->fl.cidx, 0, "consumer index");
2171 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
2172 &ofld_rxq->fl.pidx, 0, "producer index");
2173
2174 return (rc);
2175}
2176
2177static int
2178free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq)
2179{
2180 int rc;
2181
2182 rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl);
2183 if (rc == 0)
2184 bzero(ofld_rxq, sizeof(*ofld_rxq));
2185
2186 return (rc);
2187}
2188#endif
2189
2190static int
2191ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
2192{
2193 int rc, cntxt_id;
2194 struct fw_eq_ctrl_cmd c;
2195
2196 bzero(&c, sizeof(c));
2197
2198 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
2199 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
2200 V_FW_EQ_CTRL_CMD_VFN(0));
2201 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
2202 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
2203 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */
2204 c.physeqid_pkd = htobe32(0);
2205 c.fetchszm_to_iqid =
2206 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2207 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
2208 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
2209 c.dcaen_to_eqsize =
2210 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2211 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2212 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2213 V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
2214 c.eqaddr = htobe64(eq->ba);
2215
2216 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2217 if (rc != 0) {
2218 device_printf(sc->dev,
2219 "failed to create control queue %d: %d\n", eq->tx_chan, rc);
2220 return (rc);
2221 }
2222 eq->flags |= EQ_ALLOCATED;
2223
2224 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
2225 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2226 if (cntxt_id >= sc->sge.neq)
2227 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2228 cntxt_id, sc->sge.neq - 1);
2229 sc->sge.eqmap[cntxt_id] = eq;
2230
2231 return (rc);
2232}
2233
2234static int
2235eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2236{
2237 int rc, cntxt_id;
2238 struct fw_eq_eth_cmd c;
2239
2240 bzero(&c, sizeof(c));
2241
2242 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
2243 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
2244 V_FW_EQ_ETH_CMD_VFN(0));
2245 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
2246 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
2247 c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
2248 c.fetchszm_to_iqid =
2249 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2250 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
2251 V_FW_EQ_ETH_CMD_IQID(eq->iqid));
2252 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2253 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2254 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2255 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
2256 c.eqaddr = htobe64(eq->ba);
2257
2258 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2259 if (rc != 0) {
2260 device_printf(pi->dev,
2261 "failed to create Ethernet egress queue: %d\n", rc);
2262 return (rc);
2263 }
2264 eq->flags |= EQ_ALLOCATED;
2265
2266 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
2267 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2268 if (cntxt_id >= sc->sge.neq)
2269 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2270 cntxt_id, sc->sge.neq - 1);
2271 sc->sge.eqmap[cntxt_id] = eq;
2272
2273 return (rc);
2274}
2275
2276#ifdef TCP_OFFLOAD
2277static int
2278ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2279{
2280 int rc, cntxt_id;
2281 struct fw_eq_ofld_cmd c;
2282
2283 bzero(&c, sizeof(c));
2284
2285 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST |
2286 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) |
2287 V_FW_EQ_OFLD_CMD_VFN(0));
2288 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
2289 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
2290 c.fetchszm_to_iqid =
2291 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2292 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
2293 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
2294 c.dcaen_to_eqsize =
2295 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2296 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2297 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2298 V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize));
2299 c.eqaddr = htobe64(eq->ba);
2300
2301 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2302 if (rc != 0) {
2303 device_printf(pi->dev,
2304 "failed to create egress queue for TCP offload: %d\n", rc);
2305 return (rc);
2306 }
2307 eq->flags |= EQ_ALLOCATED;
2308
2309 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd));
2310 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2311 if (cntxt_id >= sc->sge.neq)
2312 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2313 cntxt_id, sc->sge.neq - 1);
2314 sc->sge.eqmap[cntxt_id] = eq;
2315
2316 return (rc);
2317}
2318#endif
2319
2320static int
2321alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2322{
2323 int rc;
2324 size_t len;
2325
2326 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
2327
2328 len = eq->qsize * EQ_ESIZE;
2329 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
2330 &eq->ba, (void **)&eq->desc);
2331 if (rc)
2332 return (rc);
2333
2334 eq->cap = eq->qsize - spg_len / EQ_ESIZE;
2335 eq->spg = (void *)&eq->desc[eq->cap];
2336 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */
2337 eq->pidx = eq->cidx = 0;
2338 eq->doorbells = sc->doorbells;
2339
2340 switch (eq->flags & EQ_TYPEMASK) {
2341 case EQ_CTRL:
2342 rc = ctrl_eq_alloc(sc, eq);
2343 break;
2344
2345 case EQ_ETH:
2346 rc = eth_eq_alloc(sc, pi, eq);
2347 break;
2348
2349#ifdef TCP_OFFLOAD
2350 case EQ_OFLD:
2351 rc = ofld_eq_alloc(sc, pi, eq);
2352 break;
2353#endif
2354
2355 default:
2356 panic("%s: invalid eq type %d.", __func__,
2357 eq->flags & EQ_TYPEMASK);
2358 }
2359 if (rc != 0) {
2360 device_printf(sc->dev,
2361 "failed to allocate egress queue(%d): %d",
2362 eq->flags & EQ_TYPEMASK, rc);
2363 }
2364
2365 eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus;
2366
2367 if (isset(&eq->doorbells, DOORBELL_UDB) ||
2368 isset(&eq->doorbells, DOORBELL_UDBWC) ||
2369 isset(&eq->doorbells, DOORBELL_WCWR)) {
2370 uint32_t s_qpp = sc->sge.s_qpp;
2371 uint32_t mask = (1 << s_qpp) - 1;
2372 volatile uint8_t *udb;
2373
2374 udb = sc->udbs_base + UDBS_DB_OFFSET;
2375 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */
2376 eq->udb_qid = eq->cntxt_id & mask; /* id in page */
2377 if (eq->udb_qid > PAGE_SIZE / UDBS_SEG_SIZE)
2378 clrbit(&eq->doorbells, DOORBELL_WCWR);
2379 else {
2380 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */
2381 eq->udb_qid = 0;
2382 }
2383 eq->udb = (volatile void *)udb;
2384 }
2385
2386 return (rc);
2387}
2388
2389static int
2390free_eq(struct adapter *sc, struct sge_eq *eq)
2391{
2392 int rc;
2393
2394 if (eq->flags & EQ_ALLOCATED) {
2395 switch (eq->flags & EQ_TYPEMASK) {
2396 case EQ_CTRL:
2397 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0,
2398 eq->cntxt_id);
2399 break;
2400
2401 case EQ_ETH:
2402 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0,
2403 eq->cntxt_id);
2404 break;
2405
2406#ifdef TCP_OFFLOAD
2407 case EQ_OFLD:
2408 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0,
2409 eq->cntxt_id);
2410 break;
2411#endif
2412
2413 default:
2414 panic("%s: invalid eq type %d.", __func__,
2415 eq->flags & EQ_TYPEMASK);
2416 }
2417 if (rc != 0) {
2418 device_printf(sc->dev,
2419 "failed to free egress queue (%d): %d\n",
2420 eq->flags & EQ_TYPEMASK, rc);
2421 return (rc);
2422 }
2423 eq->flags &= ~EQ_ALLOCATED;
2424 }
2425
2426 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
2427
2428 if (mtx_initialized(&eq->eq_lock))
2429 mtx_destroy(&eq->eq_lock);
2430
2431 bzero(eq, sizeof(*eq));
2432 return (0);
2433}
2434
2435static int
2436alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
2437 struct sysctl_oid *oid)
2438{
2439 int rc;
2440 struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx;
2441 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2442
2443 rc = alloc_eq(sc, pi, &wrq->eq);
2444 if (rc)
2445 return (rc);
2446
2447 wrq->adapter = sc;
2448 STAILQ_INIT(&wrq->wr_list);
2449
2450 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
2451 &wrq->eq.cntxt_id, 0, "SGE context id of the queue");
2452 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
2453 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I",
2454 "consumer index");
2455 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx",
2456 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I",
2457 "producer index");
2458 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD,
2459 &wrq->tx_wrs, "# of work requests");
2460 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
2461 &wrq->no_desc, 0,
2462 "# of times queue ran out of hardware descriptors");
2463 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
2464 &wrq->eq.unstalled, 0, "# of times queue recovered after stall");
2465
2466
2467 return (rc);
2468}
2469
2470static int
2471free_wrq(struct adapter *sc, struct sge_wrq *wrq)
2472{
2473 int rc;
2474
2475 rc = free_eq(sc, &wrq->eq);
2476 if (rc)
2477 return (rc);
2478
2479 bzero(wrq, sizeof(*wrq));
2480 return (0);
2481}
2482
2483static int
2484alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx,
2485 struct sysctl_oid *oid)
2486{
2487 int rc;
2488 struct adapter *sc = pi->adapter;
2489 struct sge_eq *eq = &txq->eq;
2490 char name[16];
2491 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2492
2493 rc = alloc_eq(sc, pi, eq);
2494 if (rc)
2495 return (rc);
2496
2497 txq->ifp = pi->ifp;
2498
2499 txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
2500 M_ZERO | M_WAITOK);
2501 txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
2502
2503 rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
2504 BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
2505 BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
2506 if (rc != 0) {
2507 device_printf(sc->dev,
2508 "failed to create tx DMA tag: %d\n", rc);
2509 return (rc);
2510 }
2511
2512 /*
2513 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
2514 * limit for any WR). txq->no_dmamap events shouldn't occur if maps is
2515 * sized for the worst case.
2516 */
2517 rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8,
2518 M_WAITOK);
2519 if (rc != 0) {
2520 device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
2521 return (rc);
2522 }
2523
2524 snprintf(name, sizeof(name), "%d", idx);
2525 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2526 NULL, "tx queue");
2527 children = SYSCTL_CHILDREN(oid);
2528
2529 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
2530 &eq->cntxt_id, 0, "SGE context id of the queue");
2531 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2532 CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I",
2533 "consumer index");
2534 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx",
2535 CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I",
2536 "producer index");
2537
2538 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
2539 &txq->txcsum, "# of times hardware assisted with checksum");
2540 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion",
2541 CTLFLAG_RD, &txq->vlan_insertion,
2542 "# of times hardware inserted 802.1Q tag");
2543 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
2544 &txq->tso_wrs, "# of TSO work requests");
2545 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
2546 &txq->imm_wrs, "# of work requests with immediate data");
2547 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
2548 &txq->sgl_wrs, "# of work requests with direct SGL");
2549 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
2550 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
2551 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD,
2552 &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)");
2553 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
2554 &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
2555
2556 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "br_drops", CTLFLAG_RD,
2557 &txq->br->br_drops, "# of drops in the buf_ring for this queue");
2558 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
2559 &txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
2560 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
2561 &txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
2562 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
2563 &eq->egr_update, 0, "egress update notifications from the SGE");
2564 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
2565 &eq->unstalled, 0, "# of times txq recovered after stall");
2566
2567 return (rc);
2568}
2569
2570static int
2571free_txq(struct port_info *pi, struct sge_txq *txq)
2572{
2573 int rc;
2574 struct adapter *sc = pi->adapter;
2575 struct sge_eq *eq = &txq->eq;
2576
2577 rc = free_eq(sc, eq);
2578 if (rc)
2579 return (rc);
2580
2581 free(txq->sdesc, M_CXGBE);
2582
2583 if (txq->txmaps.maps)
2584 t4_free_tx_maps(&txq->txmaps, txq->tx_tag);
2585
2586 buf_ring_free(txq->br, M_CXGBE);
2587
2588 if (txq->tx_tag)
2589 bus_dma_tag_destroy(txq->tx_tag);
2590
2591 bzero(txq, sizeof(*txq));
2592 return (0);
2593}
2594
2595static void
2596oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2597{
2598 bus_addr_t *ba = arg;
2599
2600 KASSERT(nseg == 1,
2601 ("%s meant for single segment mappings only.", __func__));
2602
2603 *ba = error ? 0 : segs->ds_addr;
2604}
2605
2606static inline bool
2607is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
2608{
2609 *ctrl = (void *)((uintptr_t)iq->cdesc +
2610 (iq->esize - sizeof(struct rsp_ctrl)));
2611
2612 return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen);
2613}
2614
2615static inline void
2616iq_next(struct sge_iq *iq)
2617{
2618 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
2619 if (__predict_false(++iq->cidx == iq->qsize - 1)) {
2620 iq->cidx = 0;
2621 iq->gen ^= 1;
2622 iq->cdesc = iq->desc;
2623 }
2624}
2625
2626#define FL_HW_IDX(x) ((x) >> 3)
2627static inline void
2628ring_fl_db(struct adapter *sc, struct sge_fl *fl)
2629{
2630 int ndesc = fl->pending / 8;
2631 uint32_t v;
2632
2633 if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx))
2634 ndesc--; /* hold back one credit */
2635
2636 if (ndesc <= 0)
2637 return; /* nothing to do */
2638
2639 v = F_DBPRIO | V_QID(fl->cntxt_id) | V_PIDX(ndesc);
2640 if (is_t5(sc))
2641 v |= F_DBTYPE;
2642
2643 wmb();
2644
2645 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), v);
2646 fl->pending -= ndesc * 8;
2647}
2648
2649/*
2650 * Fill up the freelist by upto nbufs and maybe ring its doorbell.
2651 *
2652 * Returns non-zero to indicate that it should be added to the list of starving
2653 * freelists.
2654 */
2655static int
2656refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs)
2657{
2658 __be64 *d = &fl->desc[fl->pidx];
2659 struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
2660 bus_dma_tag_t tag;
2661 bus_addr_t pa;
2662 caddr_t cl;
2663 int rc;
2664
2665 FL_LOCK_ASSERT_OWNED(fl);
2666
2667 if (nbufs > fl->needed)
2668 nbufs = fl->needed;
2669
2670 while (nbufs--) {
2671
2672 if (sd->cl != NULL) {
2673
2674 /*
2675 * This happens when a frame small enough to fit
2676 * entirely in an mbuf was received in cl last time.
2677 * We'd held on to cl and can reuse it now. Note that
2678 * we reuse a cluster of the old size if fl->tag_idx is
2679 * no longer the same as sd->tag_idx.
2680 */
2681
2682 KASSERT(*d == sd->ba_tag,
2683 ("%s: recyling problem at pidx %d",
2684 __func__, fl->pidx));
2685
2686 d++;
2687 goto recycled;
2688 }
2689
2690
2691 if (fl->tag_idx != sd->tag_idx) {
2692 bus_dmamap_t map;
2693 bus_dma_tag_t newtag = fl->tag[fl->tag_idx];
2694 bus_dma_tag_t oldtag = fl->tag[sd->tag_idx];
2695
2696 /*
2697 * An MTU change can get us here. Discard the old map
2698 * which was created with the old tag, but only if
2699 * we're able to get a new one.
2700 */
2701 rc = bus_dmamap_create(newtag, 0, &map);
2702 if (rc == 0) {
2703 bus_dmamap_destroy(oldtag, sd->map);
2704 sd->map = map;
2705 sd->tag_idx = fl->tag_idx;
2706 }
2707 }
2708
2709 tag = fl->tag[sd->tag_idx];
2710
2711 cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx));
2712 if (cl == NULL)
2713 break;
2714
2715 rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx),
2716 oneseg_dma_callback, &pa, 0);
2717 if (rc != 0 || pa == 0) {
2718 fl->dmamap_failed++;
2719 uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl);
2720 break;
2721 }
2722
2723 sd->cl = cl;
2724 *d++ = htobe64(pa | sd->tag_idx);
2725
2726#ifdef INVARIANTS
2727 sd->ba_tag = htobe64(pa | sd->tag_idx);
2728#endif
2729
2730recycled:
2731 /* sd->m is never recycled, should always be NULL */
2732 KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__));
2733
2734 sd->m = m_gethdr(M_NOWAIT, MT_NOINIT);
2735 if (sd->m == NULL)
2736 break;
2737
2738 fl->pending++;
2739 fl->needed--;
2740 sd++;
2741 if (++fl->pidx == fl->cap) {
2742 fl->pidx = 0;
2743 sd = fl->sdesc;
2744 d = fl->desc;
2745 }
2746 }
2747
2748 if (fl->pending >= 8)
2749 ring_fl_db(sc, fl);
2750
2751 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
2752}
2753
2754/*
2755 * Attempt to refill all starving freelists.
2756 */
2757static void
2758refill_sfl(void *arg)
2759{
2760 struct adapter *sc = arg;
2761 struct sge_fl *fl, *fl_temp;
2762
2763 mtx_lock(&sc->sfl_lock);
2764 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
2765 FL_LOCK(fl);
2766 refill_fl(sc, fl, 64);
2767 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
2768 TAILQ_REMOVE(&sc->sfl, fl, link);
2769 fl->flags &= ~FL_STARVING;
2770 }
2771 FL_UNLOCK(fl);
2772 }
2773
2774 if (!TAILQ_EMPTY(&sc->sfl))
2775 callout_schedule(&sc->sfl_callout, hz / 5);
2776 mtx_unlock(&sc->sfl_lock);
2777}
2778
2779static int
2780alloc_fl_sdesc(struct sge_fl *fl)
2781{
2782 struct fl_sdesc *sd;
2783 bus_dma_tag_t tag;
2784 int i, rc;
2785
2786 fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE,
2787 M_ZERO | M_WAITOK);
2788
2789 tag = fl->tag[fl->tag_idx];
2790 sd = fl->sdesc;
2791 for (i = 0; i < fl->cap; i++, sd++) {
2792
2793 sd->tag_idx = fl->tag_idx;
2794 rc = bus_dmamap_create(tag, 0, &sd->map);
2795 if (rc != 0)
2796 goto failed;
2797 }
2798
2799 return (0);
2800failed:
2801 while (--i >= 0) {
2802 sd--;
2803 bus_dmamap_destroy(tag, sd->map);
2804 if (sd->m) {
2805 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
2806 m_free(sd->m);
2807 sd->m = NULL;
2808 }
2809 }
2810 KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__));
2811
2812 free(fl->sdesc, M_CXGBE);
2813 fl->sdesc = NULL;
2814
2815 return (rc);
2816}
2817
2818static void
2819free_fl_sdesc(struct sge_fl *fl)
2820{
2821 struct fl_sdesc *sd;
2822 int i;
2823
2824 sd = fl->sdesc;
2825 for (i = 0; i < fl->cap; i++, sd++) {
2826
2827 if (sd->m) {
2828 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
2829 m_free(sd->m);
2830 sd->m = NULL;
2831 }
2832
2833 if (sd->cl) {
2834 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
2835 uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl);
2836 sd->cl = NULL;
2837 }
2838
2839 bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map);
2840 }
2841
2842 free(fl->sdesc, M_CXGBE);
2843 fl->sdesc = NULL;
2844}
2845
2846int
2847t4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count,
2848 int flags)
2849{
2850 struct tx_map *txm;
2851 int i, rc;
2852
2853 txmaps->map_total = txmaps->map_avail = count;
2854 txmaps->map_cidx = txmaps->map_pidx = 0;
2855
2856 txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
2857 M_ZERO | flags);
2858
2859 txm = txmaps->maps;
2860 for (i = 0; i < count; i++, txm++) {
2861 rc = bus_dmamap_create(tx_tag, 0, &txm->map);
2862 if (rc != 0)
2863 goto failed;
2864 }
2865
2866 return (0);
2867failed:
2868 while (--i >= 0) {
2869 txm--;
2870 bus_dmamap_destroy(tx_tag, txm->map);
2871 }
2872 KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__));
2873
2874 free(txmaps->maps, M_CXGBE);
2875 txmaps->maps = NULL;
2876
2877 return (rc);
2878}
2879
2880void
2881t4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag)
2882{
2883 struct tx_map *txm;
2884 int i;
2885
2886 txm = txmaps->maps;
2887 for (i = 0; i < txmaps->map_total; i++, txm++) {
2888
2889 if (txm->m) {
2890 bus_dmamap_unload(tx_tag, txm->map);
2891 m_freem(txm->m);
2892 txm->m = NULL;
2893 }
2894
2895 bus_dmamap_destroy(tx_tag, txm->map);
2896 }
2897
2898 free(txmaps->maps, M_CXGBE);
2899 txmaps->maps = NULL;
2900}
2901
2902/*
2903 * We'll do immediate data tx for non-TSO, but only when not coalescing. We're
2904 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
2905 * of immediate data.
2906 */
2907#define IMM_LEN ( \
2908 2 * EQ_ESIZE \
2909 - sizeof(struct fw_eth_tx_pkt_wr) \
2910 - sizeof(struct cpl_tx_pkt_core))
2911
2912/*
2913 * Returns non-zero on failure, no need to cleanup anything in that case.
2914 *
2915 * Note 1: We always try to defrag the mbuf if required and return EFBIG only
2916 * if the resulting chain still won't fit in a tx descriptor.
2917 *
2918 * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf
2919 * does not have the TCP header in it.
2920 */
2921static int
2922get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
2923 int sgl_only)
2924{
2925 struct mbuf *m = *fp;
2926 struct tx_maps *txmaps;
2927 struct tx_map *txm;
2928 int rc, defragged = 0, n;
2929
2930 TXQ_LOCK_ASSERT_OWNED(txq);
2931
2932 if (m->m_pkthdr.tso_segsz)
2933 sgl_only = 1; /* Do not allow immediate data with LSO */
2934
2935start: sgl->nsegs = 0;
2936
2937 if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
2938 return (0); /* nsegs = 0 tells caller to use imm. tx */
2939
2940 txmaps = &txq->txmaps;
2941 if (txmaps->map_avail == 0) {
2942 txq->no_dmamap++;
2943 return (ENOMEM);
2944 }
2945 txm = &txmaps->maps[txmaps->map_pidx];
2946
2947 if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
2948 *fp = m_pullup(m, 50);
2949 m = *fp;
2950 if (m == NULL)
2951 return (ENOBUFS);
2952 }
2953
2954 rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg,
2955 &sgl->nsegs, BUS_DMA_NOWAIT);
2956 if (rc == EFBIG && defragged == 0) {
2957 m = m_defrag(m, M_NOWAIT);
2958 if (m == NULL)
2959 return (EFBIG);
2960
2961 defragged = 1;
2962 *fp = m;
2963 goto start;
2964 }
2965 if (rc != 0)
2966 return (rc);
2967
2968 txm->m = m;
2969 txmaps->map_avail--;
2970 if (++txmaps->map_pidx == txmaps->map_total)
2971 txmaps->map_pidx = 0;
2972
2973 KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
2974 ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
2975
2976 /*
2977 * Store the # of flits required to hold this frame's SGL in nflits. An
2978 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
2979 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used
2980 * then len1 must be set to 0.
2981 */
2982 n = sgl->nsegs - 1;
2983 sgl->nflits = (3 * n) / 2 + (n & 1) + 2;
2984
2985 return (0);
2986}
2987
2988
2989/*
2990 * Releases all the txq resources used up in the specified sgl.
2991 */
2992static int
2993free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
2994{
2995 struct tx_maps *txmaps;
2996 struct tx_map *txm;
2997
2998 TXQ_LOCK_ASSERT_OWNED(txq);
2999
3000 if (sgl->nsegs == 0)
3001 return (0); /* didn't use any map */
3002
3003 txmaps = &txq->txmaps;
3004
3005 /* 1 pkt uses exactly 1 map, back it out */
3006
3007 txmaps->map_avail++;
3008 if (txmaps->map_pidx > 0)
3009 txmaps->map_pidx--;
3010 else
3011 txmaps->map_pidx = txmaps->map_total - 1;
3012
3013 txm = &txmaps->maps[txmaps->map_pidx];
3014 bus_dmamap_unload(txq->tx_tag, txm->map);
3015 txm->m = NULL;
3016
3017 return (0);
3018}
3019
3020static int
3021write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
3022 struct sgl *sgl)
3023{
3024 struct sge_eq *eq = &txq->eq;
3025 struct fw_eth_tx_pkt_wr *wr;
3026 struct cpl_tx_pkt_core *cpl;
3027 uint32_t ctrl; /* used in many unrelated places */
3028 uint64_t ctrl1;
3029 int nflits, ndesc, pktlen;
3030 struct tx_sdesc *txsd;
3031 caddr_t dst;
3032
3033 TXQ_LOCK_ASSERT_OWNED(txq);
3034
3035 pktlen = m->m_pkthdr.len;
3036
3037 /*
3038 * Do we have enough flits to send this frame out?
3039 */
3040 ctrl = sizeof(struct cpl_tx_pkt_core);
3041 if (m->m_pkthdr.tso_segsz) {
3042 nflits = TXPKT_LSO_WR_HDR;
3043 ctrl += sizeof(struct cpl_tx_pkt_lso_core);
3044 } else
3045 nflits = TXPKT_WR_HDR;
3046 if (sgl->nsegs > 0)
3047 nflits += sgl->nflits;
3048 else {
3049 nflits += howmany(pktlen, 8);
3050 ctrl += pktlen;
3051 }
3052 ndesc = howmany(nflits, 8);
3053 if (ndesc > eq->avail)
3054 return (ENOMEM);
3055
3056 /* Firmware work request header */
3057 wr = (void *)&eq->desc[eq->pidx];
3058 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
3059 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
3060 ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
3061 if (eq->avail == ndesc) {
3062 if (!(eq->flags & EQ_CRFLUSHED)) {
3063 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3064 eq->flags |= EQ_CRFLUSHED;
3065 }
3066 eq->flags |= EQ_STALLED;
3067 }
3068
3069 wr->equiq_to_len16 = htobe32(ctrl);
3070 wr->r3 = 0;
3071
3072 if (m->m_pkthdr.tso_segsz) {
3073 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
3074 struct ether_header *eh;
3075 void *l3hdr;
3076#if defined(INET) || defined(INET6)
3077 struct tcphdr *tcp;
3078#endif
3079 uint16_t eh_type;
3080
3081 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
3082 F_LSO_LAST_SLICE;
3083
3084 eh = mtod(m, struct ether_header *);
3085 eh_type = ntohs(eh->ether_type);
3086 if (eh_type == ETHERTYPE_VLAN) {
3087 struct ether_vlan_header *evh = (void *)eh;
3088
3089 ctrl |= V_LSO_ETHHDR_LEN(1);
3090 l3hdr = evh + 1;
3091 eh_type = ntohs(evh->evl_proto);
3092 } else
3093 l3hdr = eh + 1;
3094
3095 switch (eh_type) {
3096#ifdef INET6
3097 case ETHERTYPE_IPV6:
3098 {
3099 struct ip6_hdr *ip6 = l3hdr;
3100
3101 /*
3102 * XXX-BZ For now we do not pretend to support
3103 * IPv6 extension headers.
3104 */
3105 KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO "
3106 "with ip6_nxt != TCP: %u", __func__, ip6->ip6_nxt));
3107 tcp = (struct tcphdr *)(ip6 + 1);
3108 ctrl |= F_LSO_IPV6;
3109 ctrl |= V_LSO_IPHDR_LEN(sizeof(*ip6) >> 2) |
3110 V_LSO_TCPHDR_LEN(tcp->th_off);
3111 break;
3112 }
3113#endif
3114#ifdef INET
3115 case ETHERTYPE_IP:
3116 {
3117 struct ip *ip = l3hdr;
3118
3119 tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4);
3120 ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) |
3121 V_LSO_TCPHDR_LEN(tcp->th_off);
3122 break;
3123 }
3124#endif
3125 default:
3126 panic("%s: CSUM_TSO but no supported IP version "
3127 "(0x%04x)", __func__, eh_type);
3128 }
3129
3130 lso->lso_ctrl = htobe32(ctrl);
3131 lso->ipid_ofst = htobe16(0);
3132 lso->mss = htobe16(m->m_pkthdr.tso_segsz);
3133 lso->seqno_offset = htobe32(0);
3134 lso->len = htobe32(pktlen);
3135
3136 cpl = (void *)(lso + 1);
3137
3138 txq->tso_wrs++;
3139 } else
3140 cpl = (void *)(wr + 1);
3141
3142 /* Checksum offload */
3143 ctrl1 = 0;
3144 if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
3145 ctrl1 |= F_TXPKT_IPCSUM_DIS;
3146 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
3147 CSUM_TCP_IPV6 | CSUM_TSO)))
3148 ctrl1 |= F_TXPKT_L4CSUM_DIS;
3149 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
3150 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
3151 txq->txcsum++; /* some hardware assistance provided */
3152
3153 /* VLAN tag insertion */
3154 if (m->m_flags & M_VLANTAG) {
3155 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
3156 txq->vlan_insertion++;
3157 }
3158
3159 /* CPL header */
3160 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3161 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3162 cpl->pack = 0;
3163 cpl->len = htobe16(pktlen);
3164 cpl->ctrl1 = htobe64(ctrl1);
3165
3166 /* Software descriptor */
3167 txsd = &txq->sdesc[eq->pidx];
3168 txsd->desc_used = ndesc;
3169
3170 eq->pending += ndesc;
3171 eq->avail -= ndesc;
3172 eq->pidx += ndesc;
3173 if (eq->pidx >= eq->cap)
3174 eq->pidx -= eq->cap;
3175
3176 /* SGL */
3177 dst = (void *)(cpl + 1);
3178 if (sgl->nsegs > 0) {
3179 txsd->credits = 1;
3180 txq->sgl_wrs++;
3181 write_sgl_to_txd(eq, sgl, &dst);
3182 } else {
3183 txsd->credits = 0;
3184 txq->imm_wrs++;
3185 for (; m; m = m->m_next) {
3186 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
3187#ifdef INVARIANTS
3188 pktlen -= m->m_len;
3189#endif
3190 }
3191#ifdef INVARIANTS
3192 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
3193#endif
3194
3195 }
3196
3197 txq->txpkt_wrs++;
3198 return (0);
3199}
3200
3201/*
3202 * Returns 0 to indicate that m has been accepted into a coalesced tx work
3203 * request. It has either been folded into txpkts or txpkts was flushed and m
3204 * has started a new coalesced work request (as the first frame in a fresh
3205 * txpkts).
3206 *
3207 * Returns non-zero to indicate a failure - caller is responsible for
3208 * transmitting m, if there was anything in txpkts it has been flushed.
3209 */
3210static int
3211add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
3212 struct mbuf *m, struct sgl *sgl)
3213{
3214 struct sge_eq *eq = &txq->eq;
3215 int can_coalesce;
3216 struct tx_sdesc *txsd;
3217 int flits;
3218
3219 TXQ_LOCK_ASSERT_OWNED(txq);
3220
3221 KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__));
3222
3223 if (txpkts->npkt > 0) {
3224 flits = TXPKTS_PKT_HDR + sgl->nflits;
3225 can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3226 txpkts->nflits + flits <= TX_WR_FLITS &&
3227 txpkts->nflits + flits <= eq->avail * 8 &&
3228 txpkts->plen + m->m_pkthdr.len < 65536;
3229
3230 if (can_coalesce) {
3231 txpkts->npkt++;
3232 txpkts->nflits += flits;
3233 txpkts->plen += m->m_pkthdr.len;
3234
3235 txsd = &txq->sdesc[eq->pidx];
3236 txsd->credits++;
3237
3238 return (0);
3239 }
3240
3241 /*
3242 * Couldn't coalesce m into txpkts. The first order of business
3243 * is to send txpkts on its way. Then we'll revisit m.
3244 */
3245 write_txpkts_wr(txq, txpkts);
3246 }
3247
3248 /*
3249 * Check if we can start a new coalesced tx work request with m as
3250 * the first packet in it.
3251 */
3252
3253 KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__));
3254
3255 flits = TXPKTS_WR_HDR + sgl->nflits;
3256 can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3257 flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
3258
3259 if (can_coalesce == 0)
3260 return (EINVAL);
3261
3262 /*
3263 * Start a fresh coalesced tx WR with m as the first frame in it.
3264 */
3265 txpkts->npkt = 1;
3266 txpkts->nflits = flits;
3267 txpkts->flitp = &eq->desc[eq->pidx].flit[2];
3268 txpkts->plen = m->m_pkthdr.len;
3269
3270 txsd = &txq->sdesc[eq->pidx];
3271 txsd->credits = 1;
3272
3273 return (0);
3274}
3275
3276/*
3277 * Note that write_txpkts_wr can never run out of hardware descriptors (but
3278 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for
3279 * coalescing only if sufficient hardware descriptors are available.
3280 */
3281static void
3282write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
3283{
3284 struct sge_eq *eq = &txq->eq;
3285 struct fw_eth_tx_pkts_wr *wr;
3286 struct tx_sdesc *txsd;
3287 uint32_t ctrl;
3288 int ndesc;
3289
3290 TXQ_LOCK_ASSERT_OWNED(txq);
3291
3292 ndesc = howmany(txpkts->nflits, 8);
3293
3294 wr = (void *)&eq->desc[eq->pidx];
3295 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
3296 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
3297 if (eq->avail == ndesc) {
3298 if (!(eq->flags & EQ_CRFLUSHED)) {
3299 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3300 eq->flags |= EQ_CRFLUSHED;
3301 }
3302 eq->flags |= EQ_STALLED;
3303 }
3304 wr->equiq_to_len16 = htobe32(ctrl);
3305 wr->plen = htobe16(txpkts->plen);
3306 wr->npkt = txpkts->npkt;
3307 wr->r3 = wr->type = 0;
3308
3309 /* Everything else already written */
3310
3311 txsd = &txq->sdesc[eq->pidx];
3312 txsd->desc_used = ndesc;
3313
3314 KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__));
3315
3316 eq->pending += ndesc;
3317 eq->avail -= ndesc;
3318 eq->pidx += ndesc;
3319 if (eq->pidx >= eq->cap)
3320 eq->pidx -= eq->cap;
3321
3322 txq->txpkts_pkts += txpkts->npkt;
3323 txq->txpkts_wrs++;
3324 txpkts->npkt = 0; /* emptied */
3325}
3326
3327static inline void
3328write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
3329 struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl)
3330{
3331 struct ulp_txpkt *ulpmc;
3332 struct ulptx_idata *ulpsc;
3333 struct cpl_tx_pkt_core *cpl;
3334 struct sge_eq *eq = &txq->eq;
3335 uintptr_t flitp, start, end;
3336 uint64_t ctrl;
3337 caddr_t dst;
3338
3339 KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__));
3340
3341 start = (uintptr_t)eq->desc;
3342 end = (uintptr_t)eq->spg;
3343
3344 /* Checksum offload */
3345 ctrl = 0;
3346 if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
3347 ctrl |= F_TXPKT_IPCSUM_DIS;
3348 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
3349 CSUM_TCP_IPV6 | CSUM_TSO)))
3350 ctrl |= F_TXPKT_L4CSUM_DIS;
3351 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
3352 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
3353 txq->txcsum++; /* some hardware assistance provided */
3354
3355 /* VLAN tag insertion */
3356 if (m->m_flags & M_VLANTAG) {
3357 ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
3358 txq->vlan_insertion++;
3359 }
3360
3361 /*
3362 * The previous packet's SGL must have ended at a 16 byte boundary (this
3363 * is required by the firmware/hardware). It follows that flitp cannot
3364 * wrap around between the ULPTX master command and ULPTX subcommand (8
3365 * bytes each), and that it can not wrap around in the middle of the
3366 * cpl_tx_pkt_core either.
3367 */
3368 flitp = (uintptr_t)txpkts->flitp;
3369 KASSERT((flitp & 0xf) == 0,
3370 ("%s: last SGL did not end at 16 byte boundary: %p",
3371 __func__, txpkts->flitp));
3372
3373 /* ULP master command */
3374 ulpmc = (void *)flitp;
3375 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) |
3376 V_ULP_TXPKT_FID(eq->iqid));
3377 ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) +
3378 sizeof(*cpl) + 8 * sgl->nflits, 16));
3379
3380 /* ULP subcommand */
3381 ulpsc = (void *)(ulpmc + 1);
3382 ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
3383 F_ULP_TX_SC_MORE);
3384 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
3385
3386 flitp += sizeof(*ulpmc) + sizeof(*ulpsc);
3387 if (flitp == end)
3388 flitp = start;
3389
3390 /* CPL_TX_PKT */
3391 cpl = (void *)flitp;
3392 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3393 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3394 cpl->pack = 0;
3395 cpl->len = htobe16(m->m_pkthdr.len);
3396 cpl->ctrl1 = htobe64(ctrl);
3397
3398 flitp += sizeof(*cpl);
3399 if (flitp == end)
3400 flitp = start;
3401
3402 /* SGL for this frame */
3403 dst = (caddr_t)flitp;
3404 txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst);
3405 txpkts->flitp = (void *)dst;
3406
3407 KASSERT(((uintptr_t)dst & 0xf) == 0,
3408 ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst));
3409}
3410
3411/*
3412 * If the SGL ends on an address that is not 16 byte aligned, this function will
3413 * add a 0 filled flit at the end. It returns 1 in that case.
3414 */
3415static int
3416write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
3417{
3418 __be64 *flitp, *end;
3419 struct ulptx_sgl *usgl;
3420 bus_dma_segment_t *seg;
3421 int i, padded;
3422
3423 KASSERT(sgl->nsegs > 0 && sgl->nflits > 0,
3424 ("%s: bad SGL - nsegs=%d, nflits=%d",
3425 __func__, sgl->nsegs, sgl->nflits));
3426
3427 KASSERT(((uintptr_t)(*to) & 0xf) == 0,
3428 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
3429
3430 flitp = (__be64 *)(*to);
3431 end = flitp + sgl->nflits;
3432 seg = &sgl->seg[0];
3433 usgl = (void *)flitp;
3434
3435 /*
3436 * We start at a 16 byte boundary somewhere inside the tx descriptor
3437 * ring, so we're at least 16 bytes away from the status page. There is
3438 * no chance of a wrap around in the middle of usgl (which is 16 bytes).
3439 */
3440
3441 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
3442 V_ULPTX_NSGE(sgl->nsegs));
3443 usgl->len0 = htobe32(seg->ds_len);
3444 usgl->addr0 = htobe64(seg->ds_addr);
3445 seg++;
3446
3447 if ((uintptr_t)end <= (uintptr_t)eq->spg) {
3448
3449 /* Won't wrap around at all */
3450
3451 for (i = 0; i < sgl->nsegs - 1; i++, seg++) {
3452 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len);
3453 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr);
3454 }
3455 if (i & 1)
3456 usgl->sge[i / 2].len[1] = htobe32(0);
3457 } else {
3458
3459 /* Will wrap somewhere in the rest of the SGL */
3460
3461 /* 2 flits already written, write the rest flit by flit */
3462 flitp = (void *)(usgl + 1);
3463 for (i = 0; i < sgl->nflits - 2; i++) {
3464 if ((uintptr_t)flitp == (uintptr_t)eq->spg)
3465 flitp = (void *)eq->desc;
3466 *flitp++ = get_flit(seg, sgl->nsegs - 1, i);
3467 }
3468 end = flitp;
3469 }
3470
3471 if ((uintptr_t)end & 0xf) {
3472 *(uint64_t *)end = 0;
3473 end++;
3474 padded = 1;
3475 } else
3476 padded = 0;
3477
3478 if ((uintptr_t)end == (uintptr_t)eq->spg)
3479 *to = (void *)eq->desc;
3480 else
3481 *to = (void *)end;
3482
3483 return (padded);
3484}
3485
3486static inline void
3487copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
3488{
3489 if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) {
3490 bcopy(from, *to, len);
3491 (*to) += len;
3492 } else {
3493 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
3494
3495 bcopy(from, *to, portion);
3496 from += portion;
3497 portion = len - portion; /* remaining */
3498 bcopy(from, (void *)eq->desc, portion);
3499 (*to) = (caddr_t)eq->desc + portion;
3500 }
3501}
3502
3503static inline void
3504ring_eq_db(struct adapter *sc, struct sge_eq *eq)
3505{
3506 u_int db, pending;
3507
3508 db = eq->doorbells;
3509 pending = eq->pending;
3510 if (pending > 1)
3511 clrbit(&db, DOORBELL_WCWR);
3512 eq->pending = 0;
3513 wmb();
3514
3515 switch (ffs(db) - 1) {
3516 case DOORBELL_UDB:
3517 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
3518 return;
3519
3520 case DOORBELL_WCWR: {
3521 volatile uint64_t *dst, *src;
3522 int i;
3523
3524 /*
3525 * Queues whose 128B doorbell segment fits in the page do not
3526 * use relative qid (udb_qid is always 0). Only queues with
3527 * doorbell segments can do WCWR.
3528 */
3529 KASSERT(eq->udb_qid == 0 && pending == 1,
3530 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p",
3531 __func__, eq->doorbells, pending, eq->pidx, eq));
3532
3533 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET -
3534 UDBS_DB_OFFSET);
3535 i = eq->pidx ? eq->pidx - 1 : eq->cap - 1;
3536 src = (void *)&eq->desc[i];
3537 while (src != (void *)&eq->desc[i + 1])
3538 *dst++ = *src++;
3539 wmb();
3540 return;
3541 }
3542
3543 case DOORBELL_UDBWC:
3544 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
3545 wmb();
3546 return;
3547
3548 case DOORBELL_KDB:
3549 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
3550 V_QID(eq->cntxt_id) | V_PIDX(pending));
3551 return;
3552 }
3553}
3554
3555static inline int
3556reclaimable(struct sge_eq *eq)
3557{
3558 unsigned int cidx;
3559
3560 cidx = eq->spg->cidx; /* stable snapshot */
3561 cidx = be16toh(cidx);
3562
3563 if (cidx >= eq->cidx)
3564 return (cidx - eq->cidx);
3565 else
3566 return (cidx + eq->cap - eq->cidx);
3567}
3568
3569/*
3570 * There are "can_reclaim" tx descriptors ready to be reclaimed. Reclaim as
3571 * many as possible but stop when there are around "n" mbufs to free.
3572 *
3573 * The actual number reclaimed is provided as the return value.
3574 */
3575static int
3576reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
3577{
3578 struct tx_sdesc *txsd;
3579 struct tx_maps *txmaps;
3580 struct tx_map *txm;
3581 unsigned int reclaimed, maps;
3582 struct sge_eq *eq = &txq->eq;
3583
3584 TXQ_LOCK_ASSERT_OWNED(txq);
3585
3586 if (can_reclaim == 0)
3587 can_reclaim = reclaimable(eq);
3588
3589 maps = reclaimed = 0;
3590 while (can_reclaim && maps < n) {
3591 int ndesc;
3592
3593 txsd = &txq->sdesc[eq->cidx];
3594 ndesc = txsd->desc_used;
3595
3596 /* Firmware doesn't return "partial" credits. */
3597 KASSERT(can_reclaim >= ndesc,
3598 ("%s: unexpected number of credits: %d, %d",
3599 __func__, can_reclaim, ndesc));
3600
3601 maps += txsd->credits;
3602
3603 reclaimed += ndesc;
3604 can_reclaim -= ndesc;
3605
3606 eq->cidx += ndesc;
3607 if (__predict_false(eq->cidx >= eq->cap))
3608 eq->cidx -= eq->cap;
3609 }
3610
3611 txmaps = &txq->txmaps;
3612 txm = &txmaps->maps[txmaps->map_cidx];
3613 if (maps)
3614 prefetch(txm->m);
3615
3616 eq->avail += reclaimed;
3617 KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */
3618 ("%s: too many descriptors available", __func__));
3619
3620 txmaps->map_avail += maps;
3621 KASSERT(txmaps->map_avail <= txmaps->map_total,
3622 ("%s: too many maps available", __func__));
3623
3624 while (maps--) {
3625 struct tx_map *next;
3626
3627 next = txm + 1;
3628 if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total))
3629 next = txmaps->maps;
3630 prefetch(next->m);
3631
3632 bus_dmamap_unload(txq->tx_tag, txm->map);
3633 m_freem(txm->m);
3634 txm->m = NULL;
3635
3636 txm = next;
3637 if (__predict_false(++txmaps->map_cidx == txmaps->map_total))
3638 txmaps->map_cidx = 0;
3639 }
3640
3641 return (reclaimed);
3642}
3643
3644static void
3645write_eqflush_wr(struct sge_eq *eq)
3646{
3647 struct fw_eq_flush_wr *wr;
3648
3649 EQ_LOCK_ASSERT_OWNED(eq);
3650 KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
3651 KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__));
3652
3653 wr = (void *)&eq->desc[eq->pidx];
3654 bzero(wr, sizeof(*wr));
3655 wr->opcode = FW_EQ_FLUSH_WR;
3656 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
3657 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
3658
3659 eq->flags |= (EQ_CRFLUSHED | EQ_STALLED);
3660 eq->pending++;
3661 eq->avail--;
3662 if (++eq->pidx == eq->cap)
3663 eq->pidx = 0;
3664}
3665
3666static __be64
3667get_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
3668{
3669 int i = (idx / 3) * 2;
3670
3671 switch (idx % 3) {
3672 case 0: {
3673 __be64 rc;
3674
3675 rc = htobe32(sgl[i].ds_len);
3676 if (i + 1 < nsegs)
3677 rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32;
3678
3679 return (rc);
3680 }
3681 case 1:
3682 return htobe64(sgl[i].ds_addr);
3683 case 2:
3684 return htobe64(sgl[i + 1].ds_addr);
3685 }
3686
3687 return (0);
3688}
3689
3690static void
3691set_fl_tag_idx(struct sge_fl *fl, int bufsize)
3692{
3693 int i;
3694
3695 for (i = 0; i < FL_BUF_SIZES - 1; i++) {
3696 if (FL_BUF_SIZE(i) >= bufsize)
3697 break;
3698 }
3699
3700 fl->tag_idx = i;
3701}
3702
3703static void
3704add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
3705{
3706 mtx_lock(&sc->sfl_lock);
3707 FL_LOCK(fl);
3708 if ((fl->flags & FL_DOOMED) == 0) {
3709 fl->flags |= FL_STARVING;
3710 TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
3711 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc);
3712 }
3713 FL_UNLOCK(fl);
3714 mtx_unlock(&sc->sfl_lock);
3715}
3716
3717static int
3718handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
3719 struct mbuf *m)
3720{
3721 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1);
3722 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
3723 struct adapter *sc = iq->adapter;
3724 struct sge *s = &sc->sge;
3725 struct sge_eq *eq;
3726
3727 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
3728 rss->opcode));
3729
3730 eq = s->eqmap[qid - s->eq_start];
3731 EQ_LOCK(eq);
3732 KASSERT(eq->flags & EQ_CRFLUSHED,
3733 ("%s: unsolicited egress update", __func__));
3734 eq->flags &= ~EQ_CRFLUSHED;
3735 eq->egr_update++;
3736
3737 if (__predict_false(eq->flags & EQ_DOOMED))
3738 wakeup_one(eq);
3739 else if (eq->flags & EQ_STALLED && can_resume_tx(eq))
3740 taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
3741 EQ_UNLOCK(eq);
3742
3743 return (0);
3744}
3745
3746/* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */
3747CTASSERT(offsetof(struct cpl_fw4_msg, data) == \
3748 offsetof(struct cpl_fw6_msg, data));
3749
3750static int
3751handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
3752{
3753 struct adapter *sc = iq->adapter;
3754 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
3755
3756 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
3757 rss->opcode));
3758
3759 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) {
3760 const struct rss_header *rss2;
3761
3762 rss2 = (const struct rss_header *)&cpl->data[0];
3763 return (sc->cpl_handler[rss2->opcode](iq, rss2, m));
3764 }
3765
3766 return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0]));
3767}
3768
3769static int
3770sysctl_uint16(SYSCTL_HANDLER_ARGS)
3771{
3772 uint16_t *id = arg1;
3773 int i = *id;
3774
3775 return sysctl_handle_int(oidp, &i, 0, req);
3776}
1103 break;
1104 }
1105
1106 iq_next(iq);
1107 if (++ndescs == limit) {
1108 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
1109 V_CIDXINC(ndescs) |
1110 V_INGRESSQID(iq->cntxt_id) |
1111 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
1112 ndescs = 0;
1113
1114 if (fl_bufs_used > 0) {
1115 FL_LOCK(fl);
1116 fl->needed += fl_bufs_used;
1117 refill_fl(sc, fl, fl->cap / 8);
1118 FL_UNLOCK(fl);
1119 fl_bufs_used = 0;
1120 }
1121
1122 if (budget)
1123 return (EINPROGRESS);
1124 }
1125 }
1126
1127 if (STAILQ_EMPTY(&iql))
1128 break;
1129
1130 /*
1131 * Process the head only, and send it to the back of the list if
1132 * it's still not done.
1133 */
1134 q = STAILQ_FIRST(&iql);
1135 STAILQ_REMOVE_HEAD(&iql, link);
1136 if (service_iq(q, q->qsize / 8) == 0)
1137 atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE);
1138 else
1139 STAILQ_INSERT_TAIL(&iql, q, link);
1140 }
1141
1142#if defined(INET) || defined(INET6)
1143 if (iq->flags & IQ_LRO_ENABLED) {
1144 struct lro_ctrl *lro = &rxq->lro;
1145 struct lro_entry *l;
1146
1147 while (!SLIST_EMPTY(&lro->lro_active)) {
1148 l = SLIST_FIRST(&lro->lro_active);
1149 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1150 tcp_lro_flush(lro, l);
1151 }
1152 }
1153#endif
1154
1155 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
1156 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
1157
1158 if (iq->flags & IQ_HAS_FL) {
1159 int starved;
1160
1161 FL_LOCK(fl);
1162 fl->needed += fl_bufs_used;
1163 starved = refill_fl(sc, fl, fl->cap / 4);
1164 FL_UNLOCK(fl);
1165 if (__predict_false(starved != 0))
1166 add_fl_to_sfl(sc, fl);
1167 }
1168
1169 return (0);
1170}
1171
1172static struct mbuf *
1173get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
1174 int *fl_bufs_used)
1175{
1176 struct mbuf *m0, *m;
1177 struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
1178 unsigned int nbuf, len;
1179
1180 /*
1181 * No assertion for the fl lock because we don't need it. This routine
1182 * is called only from the rx interrupt handler and it only updates
1183 * fl->cidx. (Contrast that with fl->pidx/fl->needed which could be
1184 * updated in the rx interrupt handler or the starvation helper routine.
1185 * That's why code that manipulates fl->pidx/fl->needed needs the fl
1186 * lock but this routine does not).
1187 */
1188
1189 if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0))
1190 panic("%s: cannot handle packed frames", __func__);
1191 len = G_RSPD_LEN(len_newbuf);
1192
1193 m0 = sd->m;
1194 sd->m = NULL; /* consumed */
1195
1196 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD);
1197 m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR);
1198#ifdef T4_PKT_TIMESTAMP
1199 /* Leave room for a timestamp */
1200 m0->m_data += 8;
1201#endif
1202
1203 if (len < RX_COPY_THRESHOLD) {
1204 /* copy data to mbuf, buffer will be recycled */
1205 bcopy(sd->cl, mtod(m0, caddr_t), len);
1206 m0->m_len = len;
1207 } else {
1208 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
1209 m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx));
1210 sd->cl = NULL; /* consumed */
1211 m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
1212 }
1213 m0->m_pkthdr.len = len;
1214
1215 sd++;
1216 if (__predict_false(++fl->cidx == fl->cap)) {
1217 sd = fl->sdesc;
1218 fl->cidx = 0;
1219 }
1220
1221 m = m0;
1222 len -= m->m_len;
1223 nbuf = 1; /* # of fl buffers used */
1224
1225 while (len > 0) {
1226 m->m_next = sd->m;
1227 sd->m = NULL; /* consumed */
1228 m = m->m_next;
1229
1230 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
1231 BUS_DMASYNC_POSTREAD);
1232
1233 m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
1234 if (len <= MLEN) {
1235 bcopy(sd->cl, mtod(m, caddr_t), len);
1236 m->m_len = len;
1237 } else {
1238 bus_dmamap_unload(fl->tag[sd->tag_idx],
1239 sd->map);
1240 m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx));
1241 sd->cl = NULL; /* consumed */
1242 m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
1243 }
1244
1245 sd++;
1246 if (__predict_false(++fl->cidx == fl->cap)) {
1247 sd = fl->sdesc;
1248 fl->cidx = 0;
1249 }
1250
1251 len -= m->m_len;
1252 nbuf++;
1253 }
1254
1255 (*fl_bufs_used) += nbuf;
1256
1257 return (m0);
1258}
1259
1260static int
1261t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
1262{
1263 struct sge_rxq *rxq = iq_to_rxq(iq);
1264 struct ifnet *ifp = rxq->ifp;
1265 const struct cpl_rx_pkt *cpl = (const void *)(rss + 1);
1266#if defined(INET) || defined(INET6)
1267 struct lro_ctrl *lro = &rxq->lro;
1268#endif
1269
1270 KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__,
1271 rss->opcode));
1272
1273 m0->m_pkthdr.len -= fl_pktshift;
1274 m0->m_len -= fl_pktshift;
1275 m0->m_data += fl_pktshift;
1276
1277 m0->m_pkthdr.rcvif = ifp;
1278 m0->m_flags |= M_FLOWID;
1279 m0->m_pkthdr.flowid = rss->hash_val;
1280
1281 if (cpl->csum_calc && !cpl->err_vec) {
1282 if (ifp->if_capenable & IFCAP_RXCSUM &&
1283 cpl->l2info & htobe32(F_RXF_IP)) {
1284 m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
1285 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1286 rxq->rxcsum++;
1287 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
1288 cpl->l2info & htobe32(F_RXF_IP6)) {
1289 m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
1290 CSUM_PSEUDO_HDR);
1291 rxq->rxcsum++;
1292 }
1293
1294 if (__predict_false(cpl->ip_frag))
1295 m0->m_pkthdr.csum_data = be16toh(cpl->csum);
1296 else
1297 m0->m_pkthdr.csum_data = 0xffff;
1298 }
1299
1300 if (cpl->vlan_ex) {
1301 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
1302 m0->m_flags |= M_VLANTAG;
1303 rxq->vlan_extraction++;
1304 }
1305
1306#if defined(INET) || defined(INET6)
1307 if (cpl->l2info & htobe32(F_RXF_LRO) &&
1308 iq->flags & IQ_LRO_ENABLED &&
1309 tcp_lro_rx(lro, m0, 0) == 0) {
1310 /* queued for LRO */
1311 } else
1312#endif
1313 ifp->if_input(ifp, m0);
1314
1315 return (0);
1316}
1317
1318/*
1319 * Doesn't fail. Holds on to work requests it can't send right away.
1320 */
1321void
1322t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr)
1323{
1324 struct sge_eq *eq = &wrq->eq;
1325 int can_reclaim;
1326 caddr_t dst;
1327
1328 TXQ_LOCK_ASSERT_OWNED(wrq);
1329#ifdef TCP_OFFLOAD
1330 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD ||
1331 (eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1332 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1333#else
1334 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL,
1335 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1336#endif
1337
1338 if (__predict_true(wr != NULL))
1339 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
1340
1341 can_reclaim = reclaimable(eq);
1342 if (__predict_false(eq->flags & EQ_STALLED)) {
1343 if (can_reclaim < tx_resume_threshold(eq))
1344 return;
1345 eq->flags &= ~EQ_STALLED;
1346 eq->unstalled++;
1347 }
1348 eq->cidx += can_reclaim;
1349 eq->avail += can_reclaim;
1350 if (__predict_false(eq->cidx >= eq->cap))
1351 eq->cidx -= eq->cap;
1352
1353 while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) {
1354 int ndesc;
1355
1356 if (__predict_false(wr->wr_len < 0 ||
1357 wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) {
1358
1359#ifdef INVARIANTS
1360 panic("%s: work request with length %d", __func__,
1361 wr->wr_len);
1362#endif
1363#ifdef KDB
1364 kdb_backtrace();
1365#endif
1366 log(LOG_ERR, "%s: %s work request with length %d",
1367 device_get_nameunit(sc->dev), __func__, wr->wr_len);
1368 STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1369 free_wrqe(wr);
1370 continue;
1371 }
1372
1373 ndesc = howmany(wr->wr_len, EQ_ESIZE);
1374 if (eq->avail < ndesc) {
1375 wrq->no_desc++;
1376 break;
1377 }
1378
1379 dst = (void *)&eq->desc[eq->pidx];
1380 copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len);
1381
1382 eq->pidx += ndesc;
1383 eq->avail -= ndesc;
1384 if (__predict_false(eq->pidx >= eq->cap))
1385 eq->pidx -= eq->cap;
1386
1387 eq->pending += ndesc;
1388 if (eq->pending >= 8)
1389 ring_eq_db(sc, eq);
1390
1391 wrq->tx_wrs++;
1392 STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
1393 free_wrqe(wr);
1394
1395 if (eq->avail < 8) {
1396 can_reclaim = reclaimable(eq);
1397 eq->cidx += can_reclaim;
1398 eq->avail += can_reclaim;
1399 if (__predict_false(eq->cidx >= eq->cap))
1400 eq->cidx -= eq->cap;
1401 }
1402 }
1403
1404 if (eq->pending)
1405 ring_eq_db(sc, eq);
1406
1407 if (wr != NULL) {
1408 eq->flags |= EQ_STALLED;
1409 if (callout_pending(&eq->tx_callout) == 0)
1410 callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1411 }
1412}
1413
1414/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
1415#define TXPKTS_PKT_HDR ((\
1416 sizeof(struct ulp_txpkt) + \
1417 sizeof(struct ulptx_idata) + \
1418 sizeof(struct cpl_tx_pkt_core) \
1419 ) / 8)
1420
1421/* Header of a coalesced tx WR, before SGL of first packet (in flits) */
1422#define TXPKTS_WR_HDR (\
1423 sizeof(struct fw_eth_tx_pkts_wr) / 8 + \
1424 TXPKTS_PKT_HDR)
1425
1426/* Header of a tx WR, before SGL of first packet (in flits) */
1427#define TXPKT_WR_HDR ((\
1428 sizeof(struct fw_eth_tx_pkt_wr) + \
1429 sizeof(struct cpl_tx_pkt_core) \
1430 ) / 8 )
1431
1432/* Header of a tx LSO WR, before SGL of first packet (in flits) */
1433#define TXPKT_LSO_WR_HDR ((\
1434 sizeof(struct fw_eth_tx_pkt_wr) + \
1435 sizeof(struct cpl_tx_pkt_lso_core) + \
1436 sizeof(struct cpl_tx_pkt_core) \
1437 ) / 8 )
1438
1439int
1440t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
1441{
1442 struct port_info *pi = (void *)ifp->if_softc;
1443 struct adapter *sc = pi->adapter;
1444 struct sge_eq *eq = &txq->eq;
1445 struct buf_ring *br = txq->br;
1446 struct mbuf *next;
1447 int rc, coalescing, can_reclaim;
1448 struct txpkts txpkts;
1449 struct sgl sgl;
1450
1451 TXQ_LOCK_ASSERT_OWNED(txq);
1452 KASSERT(m, ("%s: called with nothing to do.", __func__));
1453 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH,
1454 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
1455
1456 prefetch(&eq->desc[eq->pidx]);
1457 prefetch(&txq->sdesc[eq->pidx]);
1458
1459 txpkts.npkt = 0;/* indicates there's nothing in txpkts */
1460 coalescing = 0;
1461
1462 can_reclaim = reclaimable(eq);
1463 if (__predict_false(eq->flags & EQ_STALLED)) {
1464 if (can_reclaim < tx_resume_threshold(eq)) {
1465 txq->m = m;
1466 return (0);
1467 }
1468 eq->flags &= ~EQ_STALLED;
1469 eq->unstalled++;
1470 }
1471
1472 if (__predict_false(eq->flags & EQ_DOOMED)) {
1473 m_freem(m);
1474 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
1475 m_freem(m);
1476 return (ENETDOWN);
1477 }
1478
1479 if (eq->avail < 8 && can_reclaim)
1480 reclaim_tx_descs(txq, can_reclaim, 32);
1481
1482 for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
1483
1484 if (eq->avail < 8)
1485 break;
1486
1487 next = m->m_nextpkt;
1488 m->m_nextpkt = NULL;
1489
1490 if (next || buf_ring_peek(br))
1491 coalescing = 1;
1492
1493 rc = get_pkt_sgl(txq, &m, &sgl, coalescing);
1494 if (rc != 0) {
1495 if (rc == ENOMEM) {
1496
1497 /* Short of resources, suspend tx */
1498
1499 m->m_nextpkt = next;
1500 break;
1501 }
1502
1503 /*
1504 * Unrecoverable error for this packet, throw it away
1505 * and move on to the next. get_pkt_sgl may already
1506 * have freed m (it will be NULL in that case and the
1507 * m_freem here is still safe).
1508 */
1509
1510 m_freem(m);
1511 continue;
1512 }
1513
1514 if (coalescing &&
1515 add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) {
1516
1517 /* Successfully absorbed into txpkts */
1518
1519 write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl);
1520 goto doorbell;
1521 }
1522
1523 /*
1524 * We weren't coalescing to begin with, or current frame could
1525 * not be coalesced (add_to_txpkts flushes txpkts if a frame
1526 * given to it can't be coalesced). Either way there should be
1527 * nothing in txpkts.
1528 */
1529 KASSERT(txpkts.npkt == 0,
1530 ("%s: txpkts not empty: %d", __func__, txpkts.npkt));
1531
1532 /* We're sending out individual packets now */
1533 coalescing = 0;
1534
1535 if (eq->avail < 8)
1536 reclaim_tx_descs(txq, 0, 8);
1537 rc = write_txpkt_wr(pi, txq, m, &sgl);
1538 if (rc != 0) {
1539
1540 /* Short of hardware descriptors, suspend tx */
1541
1542 /*
1543 * This is an unlikely but expensive failure. We've
1544 * done all the hard work (DMA mappings etc.) and now we
1545 * can't send out the packet. What's worse, we have to
1546 * spend even more time freeing up everything in sgl.
1547 */
1548 txq->no_desc++;
1549 free_pkt_sgl(txq, &sgl);
1550
1551 m->m_nextpkt = next;
1552 break;
1553 }
1554
1555 ETHER_BPF_MTAP(ifp, m);
1556 if (sgl.nsegs == 0)
1557 m_freem(m);
1558doorbell:
1559 if (eq->pending >= 8)
1560 ring_eq_db(sc, eq);
1561
1562 can_reclaim = reclaimable(eq);
1563 if (can_reclaim >= 32)
1564 reclaim_tx_descs(txq, can_reclaim, 64);
1565 }
1566
1567 if (txpkts.npkt > 0)
1568 write_txpkts_wr(txq, &txpkts);
1569
1570 /*
1571 * m not NULL means there was an error but we haven't thrown it away.
1572 * This can happen when we're short of tx descriptors (no_desc) or maybe
1573 * even DMA maps (no_dmamap). Either way, a credit flush and reclaim
1574 * will get things going again.
1575 */
1576 if (m && !(eq->flags & EQ_CRFLUSHED)) {
1577 struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
1578
1579 /*
1580 * If EQ_CRFLUSHED is not set then we know we have at least one
1581 * available descriptor because any WR that reduces eq->avail to
1582 * 0 also sets EQ_CRFLUSHED.
1583 */
1584 KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__));
1585
1586 txsd->desc_used = 1;
1587 txsd->credits = 0;
1588 write_eqflush_wr(eq);
1589 }
1590 txq->m = m;
1591
1592 if (eq->pending)
1593 ring_eq_db(sc, eq);
1594
1595 reclaim_tx_descs(txq, 0, 128);
1596
1597 if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0)
1598 callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1599
1600 return (0);
1601}
1602
1603void
1604t4_update_fl_bufsize(struct ifnet *ifp)
1605{
1606 struct port_info *pi = ifp->if_softc;
1607 struct sge_rxq *rxq;
1608#ifdef TCP_OFFLOAD
1609 struct sge_ofld_rxq *ofld_rxq;
1610#endif
1611 struct sge_fl *fl;
1612 int i, bufsize;
1613
1614 bufsize = mtu_to_bufsize(ifp->if_mtu);
1615 for_each_rxq(pi, i, rxq) {
1616 fl = &rxq->fl;
1617
1618 FL_LOCK(fl);
1619 set_fl_tag_idx(fl, bufsize);
1620 FL_UNLOCK(fl);
1621 }
1622#ifdef TCP_OFFLOAD
1623 bufsize = mtu_to_bufsize_toe(pi->adapter, ifp->if_mtu);
1624 for_each_ofld_rxq(pi, i, ofld_rxq) {
1625 fl = &ofld_rxq->fl;
1626
1627 FL_LOCK(fl);
1628 set_fl_tag_idx(fl, bufsize);
1629 FL_UNLOCK(fl);
1630 }
1631#endif
1632}
1633
1634int
1635can_resume_tx(struct sge_eq *eq)
1636{
1637 return (reclaimable(eq) >= tx_resume_threshold(eq));
1638}
1639
1640static inline void
1641init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
1642 int qsize, int esize)
1643{
1644 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
1645 ("%s: bad tmr_idx %d", __func__, tmr_idx));
1646 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */
1647 ("%s: bad pktc_idx %d", __func__, pktc_idx));
1648
1649 iq->flags = 0;
1650 iq->adapter = sc;
1651 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx);
1652 iq->intr_pktc_idx = SGE_NCOUNTERS - 1;
1653 if (pktc_idx >= 0) {
1654 iq->intr_params |= F_QINTR_CNT_EN;
1655 iq->intr_pktc_idx = pktc_idx;
1656 }
1657 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */
1658 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */
1659}
1660
1661static inline void
1662init_fl(struct sge_fl *fl, int qsize, int bufsize, char *name)
1663{
1664 fl->qsize = qsize;
1665 strlcpy(fl->lockname, name, sizeof(fl->lockname));
1666 set_fl_tag_idx(fl, bufsize);
1667}
1668
1669static inline void
1670init_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan,
1671 uint16_t iqid, char *name)
1672{
1673 KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan));
1674 KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype));
1675
1676 eq->flags = eqtype & EQ_TYPEMASK;
1677 eq->tx_chan = tx_chan;
1678 eq->iqid = iqid;
1679 eq->qsize = qsize;
1680 strlcpy(eq->lockname, name, sizeof(eq->lockname));
1681
1682 TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq);
1683 callout_init(&eq->tx_callout, CALLOUT_MPSAFE);
1684}
1685
1686static int
1687alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
1688 bus_dmamap_t *map, bus_addr_t *pa, void **va)
1689{
1690 int rc;
1691
1692 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
1693 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
1694 if (rc != 0) {
1695 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc);
1696 goto done;
1697 }
1698
1699 rc = bus_dmamem_alloc(*tag, va,
1700 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
1701 if (rc != 0) {
1702 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc);
1703 goto done;
1704 }
1705
1706 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
1707 if (rc != 0) {
1708 device_printf(sc->dev, "cannot load DMA map: %d\n", rc);
1709 goto done;
1710 }
1711done:
1712 if (rc)
1713 free_ring(sc, *tag, *map, *pa, *va);
1714
1715 return (rc);
1716}
1717
1718static int
1719free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
1720 bus_addr_t pa, void *va)
1721{
1722 if (pa)
1723 bus_dmamap_unload(tag, map);
1724 if (va)
1725 bus_dmamem_free(tag, va, map);
1726 if (tag)
1727 bus_dma_tag_destroy(tag);
1728
1729 return (0);
1730}
1731
1732/*
1733 * Allocates the ring for an ingress queue and an optional freelist. If the
1734 * freelist is specified it will be allocated and then associated with the
1735 * ingress queue.
1736 *
1737 * Returns errno on failure. Resources allocated up to that point may still be
1738 * allocated. Caller is responsible for cleanup in case this function fails.
1739 *
1740 * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then
1741 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies
1742 * the abs_id of the ingress queue to which its interrupts should be forwarded.
1743 */
1744static int
1745alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
1746 int intr_idx, int cong)
1747{
1748 int rc, i, cntxt_id;
1749 size_t len;
1750 struct fw_iq_cmd c;
1751 struct adapter *sc = iq->adapter;
1752 __be32 v = 0;
1753
1754 len = iq->qsize * iq->esize;
1755 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
1756 (void **)&iq->desc);
1757 if (rc != 0)
1758 return (rc);
1759
1760 bzero(&c, sizeof(c));
1761 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
1762 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
1763 V_FW_IQ_CMD_VFN(0));
1764
1765 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
1766 FW_LEN16(c));
1767
1768 /* Special handling for firmware event queue */
1769 if (iq == &sc->sge.fwq)
1770 v |= F_FW_IQ_CMD_IQASYNCH;
1771
1772 if (iq->flags & IQ_INTR) {
1773 KASSERT(intr_idx < sc->intr_count,
1774 ("%s: invalid direct intr_idx %d", __func__, intr_idx));
1775 } else
1776 v |= F_FW_IQ_CMD_IQANDST;
1777 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
1778
1779 c.type_to_iqandstindex = htobe32(v |
1780 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
1781 V_FW_IQ_CMD_VIID(pi->viid) |
1782 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
1783 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
1784 F_FW_IQ_CMD_IQGTSMODE |
1785 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
1786 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
1787 c.iqsize = htobe16(iq->qsize);
1788 c.iqaddr = htobe64(iq->ba);
1789 if (cong >= 0)
1790 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
1791
1792 if (fl) {
1793 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
1794
1795 for (i = 0; i < FL_BUF_SIZES; i++) {
1796
1797 /*
1798 * A freelist buffer must be 16 byte aligned as the SGE
1799 * uses the low 4 bits of the bus addr to figure out the
1800 * buffer size.
1801 */
1802 rc = bus_dma_tag_create(sc->dmat, 16, 0,
1803 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1804 FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW,
1805 NULL, NULL, &fl->tag[i]);
1806 if (rc != 0) {
1807 device_printf(sc->dev,
1808 "failed to create fl DMA tag[%d]: %d\n",
1809 i, rc);
1810 return (rc);
1811 }
1812 }
1813 len = fl->qsize * RX_FL_ESIZE;
1814 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
1815 &fl->ba, (void **)&fl->desc);
1816 if (rc)
1817 return (rc);
1818
1819 /* Allocate space for one software descriptor per buffer. */
1820 fl->cap = (fl->qsize - spg_len / RX_FL_ESIZE) * 8;
1821 rc = alloc_fl_sdesc(fl);
1822 if (rc != 0) {
1823 device_printf(sc->dev,
1824 "failed to setup fl software descriptors: %d\n",
1825 rc);
1826 return (rc);
1827 }
1828 fl->needed = fl->cap;
1829 fl->lowat = roundup2(sc->sge.fl_starve_threshold, 8);
1830
1831 c.iqns_to_fl0congen |=
1832 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
1833 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
1834 F_FW_IQ_CMD_FL0PADEN);
1835 if (cong >= 0) {
1836 c.iqns_to_fl0congen |=
1837 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
1838 F_FW_IQ_CMD_FL0CONGCIF |
1839 F_FW_IQ_CMD_FL0CONGEN);
1840 }
1841 c.fl0dcaen_to_fl0cidxfthresh =
1842 htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
1843 V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
1844 c.fl0size = htobe16(fl->qsize);
1845 c.fl0addr = htobe64(fl->ba);
1846 }
1847
1848 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1849 if (rc != 0) {
1850 device_printf(sc->dev,
1851 "failed to create ingress queue: %d\n", rc);
1852 return (rc);
1853 }
1854
1855 iq->cdesc = iq->desc;
1856 iq->cidx = 0;
1857 iq->gen = 1;
1858 iq->intr_next = iq->intr_params;
1859 iq->cntxt_id = be16toh(c.iqid);
1860 iq->abs_id = be16toh(c.physiqid);
1861 iq->flags |= IQ_ALLOCATED;
1862
1863 cntxt_id = iq->cntxt_id - sc->sge.iq_start;
1864 if (cntxt_id >= sc->sge.niq) {
1865 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
1866 cntxt_id, sc->sge.niq - 1);
1867 }
1868 sc->sge.iqmap[cntxt_id] = iq;
1869
1870 if (fl) {
1871 fl->cntxt_id = be16toh(c.fl0id);
1872 fl->pidx = fl->cidx = 0;
1873
1874 cntxt_id = fl->cntxt_id - sc->sge.eq_start;
1875 if (cntxt_id >= sc->sge.neq) {
1876 panic("%s: fl->cntxt_id (%d) more than the max (%d)",
1877 __func__, cntxt_id, sc->sge.neq - 1);
1878 }
1879 sc->sge.eqmap[cntxt_id] = (void *)fl;
1880
1881 FL_LOCK(fl);
1882 /* Enough to make sure the SGE doesn't think it's starved */
1883 refill_fl(sc, fl, fl->lowat);
1884 FL_UNLOCK(fl);
1885
1886 iq->flags |= IQ_HAS_FL;
1887 }
1888
1889 if (is_t5(sc) && cong >= 0) {
1890 uint32_t param, val;
1891
1892 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1893 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
1894 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id);
1895 if (cong == 0)
1896 val = 1 << 19;
1897 else {
1898 val = 2 << 19;
1899 for (i = 0; i < 4; i++) {
1900 if (cong & (1 << i))
1901 val |= 1 << (i << 2);
1902 }
1903 }
1904
1905 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
1906 if (rc != 0) {
1907 /* report error but carry on */
1908 device_printf(sc->dev,
1909 "failed to set congestion manager context for "
1910 "ingress queue %d: %d\n", iq->cntxt_id, rc);
1911 }
1912 }
1913
1914 /* Enable IQ interrupts */
1915 atomic_store_rel_int(&iq->state, IQS_IDLE);
1916 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
1917 V_INGRESSQID(iq->cntxt_id));
1918
1919 return (0);
1920}
1921
1922static int
1923free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
1924{
1925 int i, rc;
1926 struct adapter *sc = iq->adapter;
1927 device_t dev;
1928
1929 if (sc == NULL)
1930 return (0); /* nothing to do */
1931
1932 dev = pi ? pi->dev : sc->dev;
1933
1934 if (iq->flags & IQ_ALLOCATED) {
1935 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
1936 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
1937 fl ? fl->cntxt_id : 0xffff, 0xffff);
1938 if (rc != 0) {
1939 device_printf(dev,
1940 "failed to free queue %p: %d\n", iq, rc);
1941 return (rc);
1942 }
1943 iq->flags &= ~IQ_ALLOCATED;
1944 }
1945
1946 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
1947
1948 bzero(iq, sizeof(*iq));
1949
1950 if (fl) {
1951 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba,
1952 fl->desc);
1953
1954 if (fl->sdesc)
1955 free_fl_sdesc(fl);
1956
1957 if (mtx_initialized(&fl->fl_lock))
1958 mtx_destroy(&fl->fl_lock);
1959
1960 for (i = 0; i < FL_BUF_SIZES; i++) {
1961 if (fl->tag[i])
1962 bus_dma_tag_destroy(fl->tag[i]);
1963 }
1964
1965 bzero(fl, sizeof(*fl));
1966 }
1967
1968 return (0);
1969}
1970
1971static int
1972alloc_fwq(struct adapter *sc)
1973{
1974 int rc, intr_idx;
1975 struct sge_iq *fwq = &sc->sge.fwq;
1976 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
1977 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
1978
1979 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE);
1980 fwq->flags |= IQ_INTR; /* always */
1981 intr_idx = sc->intr_count > 1 ? 1 : 0;
1982 rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
1983 if (rc != 0) {
1984 device_printf(sc->dev,
1985 "failed to create firmware event queue: %d\n", rc);
1986 return (rc);
1987 }
1988
1989 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD,
1990 NULL, "firmware event queue");
1991 children = SYSCTL_CHILDREN(oid);
1992
1993 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id",
1994 CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I",
1995 "absolute id of the queue");
1996 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id",
1997 CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I",
1998 "SGE context id of the queue");
1999 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
2000 CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I",
2001 "consumer index");
2002
2003 return (0);
2004}
2005
2006static int
2007free_fwq(struct adapter *sc)
2008{
2009 return free_iq_fl(NULL, &sc->sge.fwq, NULL);
2010}
2011
2012static int
2013alloc_mgmtq(struct adapter *sc)
2014{
2015 int rc;
2016 struct sge_wrq *mgmtq = &sc->sge.mgmtq;
2017 char name[16];
2018 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
2019 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2020
2021 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD,
2022 NULL, "management queue");
2023
2024 snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev));
2025 init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan,
2026 sc->sge.fwq.cntxt_id, name);
2027 rc = alloc_wrq(sc, NULL, mgmtq, oid);
2028 if (rc != 0) {
2029 device_printf(sc->dev,
2030 "failed to create management queue: %d\n", rc);
2031 return (rc);
2032 }
2033
2034 return (0);
2035}
2036
2037static int
2038free_mgmtq(struct adapter *sc)
2039{
2040
2041 return free_wrq(sc, &sc->sge.mgmtq);
2042}
2043
2044static inline int
2045tnl_cong(struct port_info *pi)
2046{
2047
2048 if (cong_drop == -1)
2049 return (-1);
2050 else if (cong_drop == 1)
2051 return (0);
2052 else
2053 return (1 << pi->tx_chan);
2054}
2055
2056static int
2057alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx,
2058 struct sysctl_oid *oid)
2059{
2060 int rc;
2061 struct sysctl_oid_list *children;
2062 char name[16];
2063
2064 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(pi));
2065 if (rc != 0)
2066 return (rc);
2067
2068 FL_LOCK(&rxq->fl);
2069 refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8);
2070 FL_UNLOCK(&rxq->fl);
2071
2072#if defined(INET) || defined(INET6)
2073 rc = tcp_lro_init(&rxq->lro);
2074 if (rc != 0)
2075 return (rc);
2076 rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */
2077
2078 if (pi->ifp->if_capenable & IFCAP_LRO)
2079 rxq->iq.flags |= IQ_LRO_ENABLED;
2080#endif
2081 rxq->ifp = pi->ifp;
2082
2083 children = SYSCTL_CHILDREN(oid);
2084
2085 snprintf(name, sizeof(name), "%d", idx);
2086 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2087 NULL, "rx queue");
2088 children = SYSCTL_CHILDREN(oid);
2089
2090 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
2091 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I",
2092 "absolute id of the queue");
2093 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2094 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I",
2095 "SGE context id of the queue");
2096 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2097 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I",
2098 "consumer index");
2099#if defined(INET) || defined(INET6)
2100 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
2101 &rxq->lro.lro_queued, 0, NULL);
2102 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
2103 &rxq->lro.lro_flushed, 0, NULL);
2104#endif
2105 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
2106 &rxq->rxcsum, "# of times hardware assisted with checksum");
2107 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction",
2108 CTLFLAG_RD, &rxq->vlan_extraction,
2109 "# of times hardware extracted 802.1Q tag");
2110
2111 children = SYSCTL_CHILDREN(oid);
2112 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
2113 NULL, "freelist");
2114 children = SYSCTL_CHILDREN(oid);
2115
2116 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2117 CTLTYPE_INT | CTLFLAG_RD, &rxq->fl.cntxt_id, 0, sysctl_uint16, "I",
2118 "SGE context id of the queue");
2119 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
2120 &rxq->fl.cidx, 0, "consumer index");
2121 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
2122 &rxq->fl.pidx, 0, "producer index");
2123
2124 return (rc);
2125}
2126
2127static int
2128free_rxq(struct port_info *pi, struct sge_rxq *rxq)
2129{
2130 int rc;
2131
2132#if defined(INET) || defined(INET6)
2133 if (rxq->lro.ifp) {
2134 tcp_lro_free(&rxq->lro);
2135 rxq->lro.ifp = NULL;
2136 }
2137#endif
2138
2139 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
2140 if (rc == 0)
2141 bzero(rxq, sizeof(*rxq));
2142
2143 return (rc);
2144}
2145
2146#ifdef TCP_OFFLOAD
2147static int
2148alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
2149 int intr_idx, int idx, struct sysctl_oid *oid)
2150{
2151 int rc;
2152 struct sysctl_oid_list *children;
2153 char name[16];
2154
2155 rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx,
2156 1 << pi->tx_chan);
2157 if (rc != 0)
2158 return (rc);
2159
2160 children = SYSCTL_CHILDREN(oid);
2161
2162 snprintf(name, sizeof(name), "%d", idx);
2163 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2164 NULL, "rx queue");
2165 children = SYSCTL_CHILDREN(oid);
2166
2167 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
2168 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16,
2169 "I", "absolute id of the queue");
2170 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2171 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16,
2172 "I", "SGE context id of the queue");
2173 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2174 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I",
2175 "consumer index");
2176
2177 children = SYSCTL_CHILDREN(oid);
2178 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
2179 NULL, "freelist");
2180 children = SYSCTL_CHILDREN(oid);
2181
2182 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
2183 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->fl.cntxt_id, 0, sysctl_uint16,
2184 "I", "SGE context id of the queue");
2185 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
2186 &ofld_rxq->fl.cidx, 0, "consumer index");
2187 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
2188 &ofld_rxq->fl.pidx, 0, "producer index");
2189
2190 return (rc);
2191}
2192
2193static int
2194free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq)
2195{
2196 int rc;
2197
2198 rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl);
2199 if (rc == 0)
2200 bzero(ofld_rxq, sizeof(*ofld_rxq));
2201
2202 return (rc);
2203}
2204#endif
2205
2206static int
2207ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
2208{
2209 int rc, cntxt_id;
2210 struct fw_eq_ctrl_cmd c;
2211
2212 bzero(&c, sizeof(c));
2213
2214 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
2215 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
2216 V_FW_EQ_CTRL_CMD_VFN(0));
2217 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
2218 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
2219 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */
2220 c.physeqid_pkd = htobe32(0);
2221 c.fetchszm_to_iqid =
2222 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2223 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
2224 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
2225 c.dcaen_to_eqsize =
2226 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2227 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2228 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2229 V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
2230 c.eqaddr = htobe64(eq->ba);
2231
2232 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2233 if (rc != 0) {
2234 device_printf(sc->dev,
2235 "failed to create control queue %d: %d\n", eq->tx_chan, rc);
2236 return (rc);
2237 }
2238 eq->flags |= EQ_ALLOCATED;
2239
2240 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
2241 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2242 if (cntxt_id >= sc->sge.neq)
2243 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2244 cntxt_id, sc->sge.neq - 1);
2245 sc->sge.eqmap[cntxt_id] = eq;
2246
2247 return (rc);
2248}
2249
2250static int
2251eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2252{
2253 int rc, cntxt_id;
2254 struct fw_eq_eth_cmd c;
2255
2256 bzero(&c, sizeof(c));
2257
2258 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
2259 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
2260 V_FW_EQ_ETH_CMD_VFN(0));
2261 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
2262 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
2263 c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
2264 c.fetchszm_to_iqid =
2265 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2266 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
2267 V_FW_EQ_ETH_CMD_IQID(eq->iqid));
2268 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2269 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2270 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2271 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
2272 c.eqaddr = htobe64(eq->ba);
2273
2274 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2275 if (rc != 0) {
2276 device_printf(pi->dev,
2277 "failed to create Ethernet egress queue: %d\n", rc);
2278 return (rc);
2279 }
2280 eq->flags |= EQ_ALLOCATED;
2281
2282 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
2283 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2284 if (cntxt_id >= sc->sge.neq)
2285 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2286 cntxt_id, sc->sge.neq - 1);
2287 sc->sge.eqmap[cntxt_id] = eq;
2288
2289 return (rc);
2290}
2291
2292#ifdef TCP_OFFLOAD
2293static int
2294ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2295{
2296 int rc, cntxt_id;
2297 struct fw_eq_ofld_cmd c;
2298
2299 bzero(&c, sizeof(c));
2300
2301 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST |
2302 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) |
2303 V_FW_EQ_OFLD_CMD_VFN(0));
2304 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
2305 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
2306 c.fetchszm_to_iqid =
2307 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
2308 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
2309 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
2310 c.dcaen_to_eqsize =
2311 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
2312 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
2313 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
2314 V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize));
2315 c.eqaddr = htobe64(eq->ba);
2316
2317 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
2318 if (rc != 0) {
2319 device_printf(pi->dev,
2320 "failed to create egress queue for TCP offload: %d\n", rc);
2321 return (rc);
2322 }
2323 eq->flags |= EQ_ALLOCATED;
2324
2325 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd));
2326 cntxt_id = eq->cntxt_id - sc->sge.eq_start;
2327 if (cntxt_id >= sc->sge.neq)
2328 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
2329 cntxt_id, sc->sge.neq - 1);
2330 sc->sge.eqmap[cntxt_id] = eq;
2331
2332 return (rc);
2333}
2334#endif
2335
2336static int
2337alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
2338{
2339 int rc;
2340 size_t len;
2341
2342 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
2343
2344 len = eq->qsize * EQ_ESIZE;
2345 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
2346 &eq->ba, (void **)&eq->desc);
2347 if (rc)
2348 return (rc);
2349
2350 eq->cap = eq->qsize - spg_len / EQ_ESIZE;
2351 eq->spg = (void *)&eq->desc[eq->cap];
2352 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */
2353 eq->pidx = eq->cidx = 0;
2354 eq->doorbells = sc->doorbells;
2355
2356 switch (eq->flags & EQ_TYPEMASK) {
2357 case EQ_CTRL:
2358 rc = ctrl_eq_alloc(sc, eq);
2359 break;
2360
2361 case EQ_ETH:
2362 rc = eth_eq_alloc(sc, pi, eq);
2363 break;
2364
2365#ifdef TCP_OFFLOAD
2366 case EQ_OFLD:
2367 rc = ofld_eq_alloc(sc, pi, eq);
2368 break;
2369#endif
2370
2371 default:
2372 panic("%s: invalid eq type %d.", __func__,
2373 eq->flags & EQ_TYPEMASK);
2374 }
2375 if (rc != 0) {
2376 device_printf(sc->dev,
2377 "failed to allocate egress queue(%d): %d",
2378 eq->flags & EQ_TYPEMASK, rc);
2379 }
2380
2381 eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus;
2382
2383 if (isset(&eq->doorbells, DOORBELL_UDB) ||
2384 isset(&eq->doorbells, DOORBELL_UDBWC) ||
2385 isset(&eq->doorbells, DOORBELL_WCWR)) {
2386 uint32_t s_qpp = sc->sge.s_qpp;
2387 uint32_t mask = (1 << s_qpp) - 1;
2388 volatile uint8_t *udb;
2389
2390 udb = sc->udbs_base + UDBS_DB_OFFSET;
2391 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */
2392 eq->udb_qid = eq->cntxt_id & mask; /* id in page */
2393 if (eq->udb_qid > PAGE_SIZE / UDBS_SEG_SIZE)
2394 clrbit(&eq->doorbells, DOORBELL_WCWR);
2395 else {
2396 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */
2397 eq->udb_qid = 0;
2398 }
2399 eq->udb = (volatile void *)udb;
2400 }
2401
2402 return (rc);
2403}
2404
2405static int
2406free_eq(struct adapter *sc, struct sge_eq *eq)
2407{
2408 int rc;
2409
2410 if (eq->flags & EQ_ALLOCATED) {
2411 switch (eq->flags & EQ_TYPEMASK) {
2412 case EQ_CTRL:
2413 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0,
2414 eq->cntxt_id);
2415 break;
2416
2417 case EQ_ETH:
2418 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0,
2419 eq->cntxt_id);
2420 break;
2421
2422#ifdef TCP_OFFLOAD
2423 case EQ_OFLD:
2424 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0,
2425 eq->cntxt_id);
2426 break;
2427#endif
2428
2429 default:
2430 panic("%s: invalid eq type %d.", __func__,
2431 eq->flags & EQ_TYPEMASK);
2432 }
2433 if (rc != 0) {
2434 device_printf(sc->dev,
2435 "failed to free egress queue (%d): %d\n",
2436 eq->flags & EQ_TYPEMASK, rc);
2437 return (rc);
2438 }
2439 eq->flags &= ~EQ_ALLOCATED;
2440 }
2441
2442 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
2443
2444 if (mtx_initialized(&eq->eq_lock))
2445 mtx_destroy(&eq->eq_lock);
2446
2447 bzero(eq, sizeof(*eq));
2448 return (0);
2449}
2450
2451static int
2452alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
2453 struct sysctl_oid *oid)
2454{
2455 int rc;
2456 struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx;
2457 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2458
2459 rc = alloc_eq(sc, pi, &wrq->eq);
2460 if (rc)
2461 return (rc);
2462
2463 wrq->adapter = sc;
2464 STAILQ_INIT(&wrq->wr_list);
2465
2466 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
2467 &wrq->eq.cntxt_id, 0, "SGE context id of the queue");
2468 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
2469 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I",
2470 "consumer index");
2471 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx",
2472 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I",
2473 "producer index");
2474 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD,
2475 &wrq->tx_wrs, "# of work requests");
2476 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
2477 &wrq->no_desc, 0,
2478 "# of times queue ran out of hardware descriptors");
2479 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
2480 &wrq->eq.unstalled, 0, "# of times queue recovered after stall");
2481
2482
2483 return (rc);
2484}
2485
2486static int
2487free_wrq(struct adapter *sc, struct sge_wrq *wrq)
2488{
2489 int rc;
2490
2491 rc = free_eq(sc, &wrq->eq);
2492 if (rc)
2493 return (rc);
2494
2495 bzero(wrq, sizeof(*wrq));
2496 return (0);
2497}
2498
2499static int
2500alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx,
2501 struct sysctl_oid *oid)
2502{
2503 int rc;
2504 struct adapter *sc = pi->adapter;
2505 struct sge_eq *eq = &txq->eq;
2506 char name[16];
2507 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
2508
2509 rc = alloc_eq(sc, pi, eq);
2510 if (rc)
2511 return (rc);
2512
2513 txq->ifp = pi->ifp;
2514
2515 txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
2516 M_ZERO | M_WAITOK);
2517 txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
2518
2519 rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
2520 BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
2521 BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
2522 if (rc != 0) {
2523 device_printf(sc->dev,
2524 "failed to create tx DMA tag: %d\n", rc);
2525 return (rc);
2526 }
2527
2528 /*
2529 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
2530 * limit for any WR). txq->no_dmamap events shouldn't occur if maps is
2531 * sized for the worst case.
2532 */
2533 rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8,
2534 M_WAITOK);
2535 if (rc != 0) {
2536 device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
2537 return (rc);
2538 }
2539
2540 snprintf(name, sizeof(name), "%d", idx);
2541 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
2542 NULL, "tx queue");
2543 children = SYSCTL_CHILDREN(oid);
2544
2545 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
2546 &eq->cntxt_id, 0, "SGE context id of the queue");
2547 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
2548 CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I",
2549 "consumer index");
2550 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx",
2551 CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I",
2552 "producer index");
2553
2554 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
2555 &txq->txcsum, "# of times hardware assisted with checksum");
2556 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion",
2557 CTLFLAG_RD, &txq->vlan_insertion,
2558 "# of times hardware inserted 802.1Q tag");
2559 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
2560 &txq->tso_wrs, "# of TSO work requests");
2561 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
2562 &txq->imm_wrs, "# of work requests with immediate data");
2563 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
2564 &txq->sgl_wrs, "# of work requests with direct SGL");
2565 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
2566 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
2567 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD,
2568 &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)");
2569 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
2570 &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
2571
2572 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "br_drops", CTLFLAG_RD,
2573 &txq->br->br_drops, "# of drops in the buf_ring for this queue");
2574 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
2575 &txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
2576 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
2577 &txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
2578 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
2579 &eq->egr_update, 0, "egress update notifications from the SGE");
2580 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
2581 &eq->unstalled, 0, "# of times txq recovered after stall");
2582
2583 return (rc);
2584}
2585
2586static int
2587free_txq(struct port_info *pi, struct sge_txq *txq)
2588{
2589 int rc;
2590 struct adapter *sc = pi->adapter;
2591 struct sge_eq *eq = &txq->eq;
2592
2593 rc = free_eq(sc, eq);
2594 if (rc)
2595 return (rc);
2596
2597 free(txq->sdesc, M_CXGBE);
2598
2599 if (txq->txmaps.maps)
2600 t4_free_tx_maps(&txq->txmaps, txq->tx_tag);
2601
2602 buf_ring_free(txq->br, M_CXGBE);
2603
2604 if (txq->tx_tag)
2605 bus_dma_tag_destroy(txq->tx_tag);
2606
2607 bzero(txq, sizeof(*txq));
2608 return (0);
2609}
2610
2611static void
2612oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2613{
2614 bus_addr_t *ba = arg;
2615
2616 KASSERT(nseg == 1,
2617 ("%s meant for single segment mappings only.", __func__));
2618
2619 *ba = error ? 0 : segs->ds_addr;
2620}
2621
2622static inline bool
2623is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
2624{
2625 *ctrl = (void *)((uintptr_t)iq->cdesc +
2626 (iq->esize - sizeof(struct rsp_ctrl)));
2627
2628 return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen);
2629}
2630
2631static inline void
2632iq_next(struct sge_iq *iq)
2633{
2634 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
2635 if (__predict_false(++iq->cidx == iq->qsize - 1)) {
2636 iq->cidx = 0;
2637 iq->gen ^= 1;
2638 iq->cdesc = iq->desc;
2639 }
2640}
2641
2642#define FL_HW_IDX(x) ((x) >> 3)
2643static inline void
2644ring_fl_db(struct adapter *sc, struct sge_fl *fl)
2645{
2646 int ndesc = fl->pending / 8;
2647 uint32_t v;
2648
2649 if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx))
2650 ndesc--; /* hold back one credit */
2651
2652 if (ndesc <= 0)
2653 return; /* nothing to do */
2654
2655 v = F_DBPRIO | V_QID(fl->cntxt_id) | V_PIDX(ndesc);
2656 if (is_t5(sc))
2657 v |= F_DBTYPE;
2658
2659 wmb();
2660
2661 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), v);
2662 fl->pending -= ndesc * 8;
2663}
2664
2665/*
2666 * Fill up the freelist by upto nbufs and maybe ring its doorbell.
2667 *
2668 * Returns non-zero to indicate that it should be added to the list of starving
2669 * freelists.
2670 */
2671static int
2672refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs)
2673{
2674 __be64 *d = &fl->desc[fl->pidx];
2675 struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
2676 bus_dma_tag_t tag;
2677 bus_addr_t pa;
2678 caddr_t cl;
2679 int rc;
2680
2681 FL_LOCK_ASSERT_OWNED(fl);
2682
2683 if (nbufs > fl->needed)
2684 nbufs = fl->needed;
2685
2686 while (nbufs--) {
2687
2688 if (sd->cl != NULL) {
2689
2690 /*
2691 * This happens when a frame small enough to fit
2692 * entirely in an mbuf was received in cl last time.
2693 * We'd held on to cl and can reuse it now. Note that
2694 * we reuse a cluster of the old size if fl->tag_idx is
2695 * no longer the same as sd->tag_idx.
2696 */
2697
2698 KASSERT(*d == sd->ba_tag,
2699 ("%s: recyling problem at pidx %d",
2700 __func__, fl->pidx));
2701
2702 d++;
2703 goto recycled;
2704 }
2705
2706
2707 if (fl->tag_idx != sd->tag_idx) {
2708 bus_dmamap_t map;
2709 bus_dma_tag_t newtag = fl->tag[fl->tag_idx];
2710 bus_dma_tag_t oldtag = fl->tag[sd->tag_idx];
2711
2712 /*
2713 * An MTU change can get us here. Discard the old map
2714 * which was created with the old tag, but only if
2715 * we're able to get a new one.
2716 */
2717 rc = bus_dmamap_create(newtag, 0, &map);
2718 if (rc == 0) {
2719 bus_dmamap_destroy(oldtag, sd->map);
2720 sd->map = map;
2721 sd->tag_idx = fl->tag_idx;
2722 }
2723 }
2724
2725 tag = fl->tag[sd->tag_idx];
2726
2727 cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx));
2728 if (cl == NULL)
2729 break;
2730
2731 rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx),
2732 oneseg_dma_callback, &pa, 0);
2733 if (rc != 0 || pa == 0) {
2734 fl->dmamap_failed++;
2735 uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl);
2736 break;
2737 }
2738
2739 sd->cl = cl;
2740 *d++ = htobe64(pa | sd->tag_idx);
2741
2742#ifdef INVARIANTS
2743 sd->ba_tag = htobe64(pa | sd->tag_idx);
2744#endif
2745
2746recycled:
2747 /* sd->m is never recycled, should always be NULL */
2748 KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__));
2749
2750 sd->m = m_gethdr(M_NOWAIT, MT_NOINIT);
2751 if (sd->m == NULL)
2752 break;
2753
2754 fl->pending++;
2755 fl->needed--;
2756 sd++;
2757 if (++fl->pidx == fl->cap) {
2758 fl->pidx = 0;
2759 sd = fl->sdesc;
2760 d = fl->desc;
2761 }
2762 }
2763
2764 if (fl->pending >= 8)
2765 ring_fl_db(sc, fl);
2766
2767 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
2768}
2769
2770/*
2771 * Attempt to refill all starving freelists.
2772 */
2773static void
2774refill_sfl(void *arg)
2775{
2776 struct adapter *sc = arg;
2777 struct sge_fl *fl, *fl_temp;
2778
2779 mtx_lock(&sc->sfl_lock);
2780 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
2781 FL_LOCK(fl);
2782 refill_fl(sc, fl, 64);
2783 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
2784 TAILQ_REMOVE(&sc->sfl, fl, link);
2785 fl->flags &= ~FL_STARVING;
2786 }
2787 FL_UNLOCK(fl);
2788 }
2789
2790 if (!TAILQ_EMPTY(&sc->sfl))
2791 callout_schedule(&sc->sfl_callout, hz / 5);
2792 mtx_unlock(&sc->sfl_lock);
2793}
2794
2795static int
2796alloc_fl_sdesc(struct sge_fl *fl)
2797{
2798 struct fl_sdesc *sd;
2799 bus_dma_tag_t tag;
2800 int i, rc;
2801
2802 fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE,
2803 M_ZERO | M_WAITOK);
2804
2805 tag = fl->tag[fl->tag_idx];
2806 sd = fl->sdesc;
2807 for (i = 0; i < fl->cap; i++, sd++) {
2808
2809 sd->tag_idx = fl->tag_idx;
2810 rc = bus_dmamap_create(tag, 0, &sd->map);
2811 if (rc != 0)
2812 goto failed;
2813 }
2814
2815 return (0);
2816failed:
2817 while (--i >= 0) {
2818 sd--;
2819 bus_dmamap_destroy(tag, sd->map);
2820 if (sd->m) {
2821 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
2822 m_free(sd->m);
2823 sd->m = NULL;
2824 }
2825 }
2826 KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__));
2827
2828 free(fl->sdesc, M_CXGBE);
2829 fl->sdesc = NULL;
2830
2831 return (rc);
2832}
2833
2834static void
2835free_fl_sdesc(struct sge_fl *fl)
2836{
2837 struct fl_sdesc *sd;
2838 int i;
2839
2840 sd = fl->sdesc;
2841 for (i = 0; i < fl->cap; i++, sd++) {
2842
2843 if (sd->m) {
2844 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
2845 m_free(sd->m);
2846 sd->m = NULL;
2847 }
2848
2849 if (sd->cl) {
2850 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
2851 uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl);
2852 sd->cl = NULL;
2853 }
2854
2855 bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map);
2856 }
2857
2858 free(fl->sdesc, M_CXGBE);
2859 fl->sdesc = NULL;
2860}
2861
2862int
2863t4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count,
2864 int flags)
2865{
2866 struct tx_map *txm;
2867 int i, rc;
2868
2869 txmaps->map_total = txmaps->map_avail = count;
2870 txmaps->map_cidx = txmaps->map_pidx = 0;
2871
2872 txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
2873 M_ZERO | flags);
2874
2875 txm = txmaps->maps;
2876 for (i = 0; i < count; i++, txm++) {
2877 rc = bus_dmamap_create(tx_tag, 0, &txm->map);
2878 if (rc != 0)
2879 goto failed;
2880 }
2881
2882 return (0);
2883failed:
2884 while (--i >= 0) {
2885 txm--;
2886 bus_dmamap_destroy(tx_tag, txm->map);
2887 }
2888 KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__));
2889
2890 free(txmaps->maps, M_CXGBE);
2891 txmaps->maps = NULL;
2892
2893 return (rc);
2894}
2895
2896void
2897t4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag)
2898{
2899 struct tx_map *txm;
2900 int i;
2901
2902 txm = txmaps->maps;
2903 for (i = 0; i < txmaps->map_total; i++, txm++) {
2904
2905 if (txm->m) {
2906 bus_dmamap_unload(tx_tag, txm->map);
2907 m_freem(txm->m);
2908 txm->m = NULL;
2909 }
2910
2911 bus_dmamap_destroy(tx_tag, txm->map);
2912 }
2913
2914 free(txmaps->maps, M_CXGBE);
2915 txmaps->maps = NULL;
2916}
2917
2918/*
2919 * We'll do immediate data tx for non-TSO, but only when not coalescing. We're
2920 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
2921 * of immediate data.
2922 */
2923#define IMM_LEN ( \
2924 2 * EQ_ESIZE \
2925 - sizeof(struct fw_eth_tx_pkt_wr) \
2926 - sizeof(struct cpl_tx_pkt_core))
2927
2928/*
2929 * Returns non-zero on failure, no need to cleanup anything in that case.
2930 *
2931 * Note 1: We always try to defrag the mbuf if required and return EFBIG only
2932 * if the resulting chain still won't fit in a tx descriptor.
2933 *
2934 * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf
2935 * does not have the TCP header in it.
2936 */
2937static int
2938get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
2939 int sgl_only)
2940{
2941 struct mbuf *m = *fp;
2942 struct tx_maps *txmaps;
2943 struct tx_map *txm;
2944 int rc, defragged = 0, n;
2945
2946 TXQ_LOCK_ASSERT_OWNED(txq);
2947
2948 if (m->m_pkthdr.tso_segsz)
2949 sgl_only = 1; /* Do not allow immediate data with LSO */
2950
2951start: sgl->nsegs = 0;
2952
2953 if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
2954 return (0); /* nsegs = 0 tells caller to use imm. tx */
2955
2956 txmaps = &txq->txmaps;
2957 if (txmaps->map_avail == 0) {
2958 txq->no_dmamap++;
2959 return (ENOMEM);
2960 }
2961 txm = &txmaps->maps[txmaps->map_pidx];
2962
2963 if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
2964 *fp = m_pullup(m, 50);
2965 m = *fp;
2966 if (m == NULL)
2967 return (ENOBUFS);
2968 }
2969
2970 rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg,
2971 &sgl->nsegs, BUS_DMA_NOWAIT);
2972 if (rc == EFBIG && defragged == 0) {
2973 m = m_defrag(m, M_NOWAIT);
2974 if (m == NULL)
2975 return (EFBIG);
2976
2977 defragged = 1;
2978 *fp = m;
2979 goto start;
2980 }
2981 if (rc != 0)
2982 return (rc);
2983
2984 txm->m = m;
2985 txmaps->map_avail--;
2986 if (++txmaps->map_pidx == txmaps->map_total)
2987 txmaps->map_pidx = 0;
2988
2989 KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
2990 ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
2991
2992 /*
2993 * Store the # of flits required to hold this frame's SGL in nflits. An
2994 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
2995 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used
2996 * then len1 must be set to 0.
2997 */
2998 n = sgl->nsegs - 1;
2999 sgl->nflits = (3 * n) / 2 + (n & 1) + 2;
3000
3001 return (0);
3002}
3003
3004
3005/*
3006 * Releases all the txq resources used up in the specified sgl.
3007 */
3008static int
3009free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
3010{
3011 struct tx_maps *txmaps;
3012 struct tx_map *txm;
3013
3014 TXQ_LOCK_ASSERT_OWNED(txq);
3015
3016 if (sgl->nsegs == 0)
3017 return (0); /* didn't use any map */
3018
3019 txmaps = &txq->txmaps;
3020
3021 /* 1 pkt uses exactly 1 map, back it out */
3022
3023 txmaps->map_avail++;
3024 if (txmaps->map_pidx > 0)
3025 txmaps->map_pidx--;
3026 else
3027 txmaps->map_pidx = txmaps->map_total - 1;
3028
3029 txm = &txmaps->maps[txmaps->map_pidx];
3030 bus_dmamap_unload(txq->tx_tag, txm->map);
3031 txm->m = NULL;
3032
3033 return (0);
3034}
3035
3036static int
3037write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
3038 struct sgl *sgl)
3039{
3040 struct sge_eq *eq = &txq->eq;
3041 struct fw_eth_tx_pkt_wr *wr;
3042 struct cpl_tx_pkt_core *cpl;
3043 uint32_t ctrl; /* used in many unrelated places */
3044 uint64_t ctrl1;
3045 int nflits, ndesc, pktlen;
3046 struct tx_sdesc *txsd;
3047 caddr_t dst;
3048
3049 TXQ_LOCK_ASSERT_OWNED(txq);
3050
3051 pktlen = m->m_pkthdr.len;
3052
3053 /*
3054 * Do we have enough flits to send this frame out?
3055 */
3056 ctrl = sizeof(struct cpl_tx_pkt_core);
3057 if (m->m_pkthdr.tso_segsz) {
3058 nflits = TXPKT_LSO_WR_HDR;
3059 ctrl += sizeof(struct cpl_tx_pkt_lso_core);
3060 } else
3061 nflits = TXPKT_WR_HDR;
3062 if (sgl->nsegs > 0)
3063 nflits += sgl->nflits;
3064 else {
3065 nflits += howmany(pktlen, 8);
3066 ctrl += pktlen;
3067 }
3068 ndesc = howmany(nflits, 8);
3069 if (ndesc > eq->avail)
3070 return (ENOMEM);
3071
3072 /* Firmware work request header */
3073 wr = (void *)&eq->desc[eq->pidx];
3074 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
3075 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
3076 ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
3077 if (eq->avail == ndesc) {
3078 if (!(eq->flags & EQ_CRFLUSHED)) {
3079 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3080 eq->flags |= EQ_CRFLUSHED;
3081 }
3082 eq->flags |= EQ_STALLED;
3083 }
3084
3085 wr->equiq_to_len16 = htobe32(ctrl);
3086 wr->r3 = 0;
3087
3088 if (m->m_pkthdr.tso_segsz) {
3089 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
3090 struct ether_header *eh;
3091 void *l3hdr;
3092#if defined(INET) || defined(INET6)
3093 struct tcphdr *tcp;
3094#endif
3095 uint16_t eh_type;
3096
3097 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
3098 F_LSO_LAST_SLICE;
3099
3100 eh = mtod(m, struct ether_header *);
3101 eh_type = ntohs(eh->ether_type);
3102 if (eh_type == ETHERTYPE_VLAN) {
3103 struct ether_vlan_header *evh = (void *)eh;
3104
3105 ctrl |= V_LSO_ETHHDR_LEN(1);
3106 l3hdr = evh + 1;
3107 eh_type = ntohs(evh->evl_proto);
3108 } else
3109 l3hdr = eh + 1;
3110
3111 switch (eh_type) {
3112#ifdef INET6
3113 case ETHERTYPE_IPV6:
3114 {
3115 struct ip6_hdr *ip6 = l3hdr;
3116
3117 /*
3118 * XXX-BZ For now we do not pretend to support
3119 * IPv6 extension headers.
3120 */
3121 KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO "
3122 "with ip6_nxt != TCP: %u", __func__, ip6->ip6_nxt));
3123 tcp = (struct tcphdr *)(ip6 + 1);
3124 ctrl |= F_LSO_IPV6;
3125 ctrl |= V_LSO_IPHDR_LEN(sizeof(*ip6) >> 2) |
3126 V_LSO_TCPHDR_LEN(tcp->th_off);
3127 break;
3128 }
3129#endif
3130#ifdef INET
3131 case ETHERTYPE_IP:
3132 {
3133 struct ip *ip = l3hdr;
3134
3135 tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4);
3136 ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) |
3137 V_LSO_TCPHDR_LEN(tcp->th_off);
3138 break;
3139 }
3140#endif
3141 default:
3142 panic("%s: CSUM_TSO but no supported IP version "
3143 "(0x%04x)", __func__, eh_type);
3144 }
3145
3146 lso->lso_ctrl = htobe32(ctrl);
3147 lso->ipid_ofst = htobe16(0);
3148 lso->mss = htobe16(m->m_pkthdr.tso_segsz);
3149 lso->seqno_offset = htobe32(0);
3150 lso->len = htobe32(pktlen);
3151
3152 cpl = (void *)(lso + 1);
3153
3154 txq->tso_wrs++;
3155 } else
3156 cpl = (void *)(wr + 1);
3157
3158 /* Checksum offload */
3159 ctrl1 = 0;
3160 if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
3161 ctrl1 |= F_TXPKT_IPCSUM_DIS;
3162 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
3163 CSUM_TCP_IPV6 | CSUM_TSO)))
3164 ctrl1 |= F_TXPKT_L4CSUM_DIS;
3165 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
3166 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
3167 txq->txcsum++; /* some hardware assistance provided */
3168
3169 /* VLAN tag insertion */
3170 if (m->m_flags & M_VLANTAG) {
3171 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
3172 txq->vlan_insertion++;
3173 }
3174
3175 /* CPL header */
3176 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3177 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3178 cpl->pack = 0;
3179 cpl->len = htobe16(pktlen);
3180 cpl->ctrl1 = htobe64(ctrl1);
3181
3182 /* Software descriptor */
3183 txsd = &txq->sdesc[eq->pidx];
3184 txsd->desc_used = ndesc;
3185
3186 eq->pending += ndesc;
3187 eq->avail -= ndesc;
3188 eq->pidx += ndesc;
3189 if (eq->pidx >= eq->cap)
3190 eq->pidx -= eq->cap;
3191
3192 /* SGL */
3193 dst = (void *)(cpl + 1);
3194 if (sgl->nsegs > 0) {
3195 txsd->credits = 1;
3196 txq->sgl_wrs++;
3197 write_sgl_to_txd(eq, sgl, &dst);
3198 } else {
3199 txsd->credits = 0;
3200 txq->imm_wrs++;
3201 for (; m; m = m->m_next) {
3202 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
3203#ifdef INVARIANTS
3204 pktlen -= m->m_len;
3205#endif
3206 }
3207#ifdef INVARIANTS
3208 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
3209#endif
3210
3211 }
3212
3213 txq->txpkt_wrs++;
3214 return (0);
3215}
3216
3217/*
3218 * Returns 0 to indicate that m has been accepted into a coalesced tx work
3219 * request. It has either been folded into txpkts or txpkts was flushed and m
3220 * has started a new coalesced work request (as the first frame in a fresh
3221 * txpkts).
3222 *
3223 * Returns non-zero to indicate a failure - caller is responsible for
3224 * transmitting m, if there was anything in txpkts it has been flushed.
3225 */
3226static int
3227add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
3228 struct mbuf *m, struct sgl *sgl)
3229{
3230 struct sge_eq *eq = &txq->eq;
3231 int can_coalesce;
3232 struct tx_sdesc *txsd;
3233 int flits;
3234
3235 TXQ_LOCK_ASSERT_OWNED(txq);
3236
3237 KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__));
3238
3239 if (txpkts->npkt > 0) {
3240 flits = TXPKTS_PKT_HDR + sgl->nflits;
3241 can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3242 txpkts->nflits + flits <= TX_WR_FLITS &&
3243 txpkts->nflits + flits <= eq->avail * 8 &&
3244 txpkts->plen + m->m_pkthdr.len < 65536;
3245
3246 if (can_coalesce) {
3247 txpkts->npkt++;
3248 txpkts->nflits += flits;
3249 txpkts->plen += m->m_pkthdr.len;
3250
3251 txsd = &txq->sdesc[eq->pidx];
3252 txsd->credits++;
3253
3254 return (0);
3255 }
3256
3257 /*
3258 * Couldn't coalesce m into txpkts. The first order of business
3259 * is to send txpkts on its way. Then we'll revisit m.
3260 */
3261 write_txpkts_wr(txq, txpkts);
3262 }
3263
3264 /*
3265 * Check if we can start a new coalesced tx work request with m as
3266 * the first packet in it.
3267 */
3268
3269 KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__));
3270
3271 flits = TXPKTS_WR_HDR + sgl->nflits;
3272 can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
3273 flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
3274
3275 if (can_coalesce == 0)
3276 return (EINVAL);
3277
3278 /*
3279 * Start a fresh coalesced tx WR with m as the first frame in it.
3280 */
3281 txpkts->npkt = 1;
3282 txpkts->nflits = flits;
3283 txpkts->flitp = &eq->desc[eq->pidx].flit[2];
3284 txpkts->plen = m->m_pkthdr.len;
3285
3286 txsd = &txq->sdesc[eq->pidx];
3287 txsd->credits = 1;
3288
3289 return (0);
3290}
3291
3292/*
3293 * Note that write_txpkts_wr can never run out of hardware descriptors (but
3294 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for
3295 * coalescing only if sufficient hardware descriptors are available.
3296 */
3297static void
3298write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
3299{
3300 struct sge_eq *eq = &txq->eq;
3301 struct fw_eth_tx_pkts_wr *wr;
3302 struct tx_sdesc *txsd;
3303 uint32_t ctrl;
3304 int ndesc;
3305
3306 TXQ_LOCK_ASSERT_OWNED(txq);
3307
3308 ndesc = howmany(txpkts->nflits, 8);
3309
3310 wr = (void *)&eq->desc[eq->pidx];
3311 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
3312 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
3313 if (eq->avail == ndesc) {
3314 if (!(eq->flags & EQ_CRFLUSHED)) {
3315 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
3316 eq->flags |= EQ_CRFLUSHED;
3317 }
3318 eq->flags |= EQ_STALLED;
3319 }
3320 wr->equiq_to_len16 = htobe32(ctrl);
3321 wr->plen = htobe16(txpkts->plen);
3322 wr->npkt = txpkts->npkt;
3323 wr->r3 = wr->type = 0;
3324
3325 /* Everything else already written */
3326
3327 txsd = &txq->sdesc[eq->pidx];
3328 txsd->desc_used = ndesc;
3329
3330 KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__));
3331
3332 eq->pending += ndesc;
3333 eq->avail -= ndesc;
3334 eq->pidx += ndesc;
3335 if (eq->pidx >= eq->cap)
3336 eq->pidx -= eq->cap;
3337
3338 txq->txpkts_pkts += txpkts->npkt;
3339 txq->txpkts_wrs++;
3340 txpkts->npkt = 0; /* emptied */
3341}
3342
3343static inline void
3344write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
3345 struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl)
3346{
3347 struct ulp_txpkt *ulpmc;
3348 struct ulptx_idata *ulpsc;
3349 struct cpl_tx_pkt_core *cpl;
3350 struct sge_eq *eq = &txq->eq;
3351 uintptr_t flitp, start, end;
3352 uint64_t ctrl;
3353 caddr_t dst;
3354
3355 KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__));
3356
3357 start = (uintptr_t)eq->desc;
3358 end = (uintptr_t)eq->spg;
3359
3360 /* Checksum offload */
3361 ctrl = 0;
3362 if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
3363 ctrl |= F_TXPKT_IPCSUM_DIS;
3364 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
3365 CSUM_TCP_IPV6 | CSUM_TSO)))
3366 ctrl |= F_TXPKT_L4CSUM_DIS;
3367 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
3368 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
3369 txq->txcsum++; /* some hardware assistance provided */
3370
3371 /* VLAN tag insertion */
3372 if (m->m_flags & M_VLANTAG) {
3373 ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
3374 txq->vlan_insertion++;
3375 }
3376
3377 /*
3378 * The previous packet's SGL must have ended at a 16 byte boundary (this
3379 * is required by the firmware/hardware). It follows that flitp cannot
3380 * wrap around between the ULPTX master command and ULPTX subcommand (8
3381 * bytes each), and that it can not wrap around in the middle of the
3382 * cpl_tx_pkt_core either.
3383 */
3384 flitp = (uintptr_t)txpkts->flitp;
3385 KASSERT((flitp & 0xf) == 0,
3386 ("%s: last SGL did not end at 16 byte boundary: %p",
3387 __func__, txpkts->flitp));
3388
3389 /* ULP master command */
3390 ulpmc = (void *)flitp;
3391 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) |
3392 V_ULP_TXPKT_FID(eq->iqid));
3393 ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) +
3394 sizeof(*cpl) + 8 * sgl->nflits, 16));
3395
3396 /* ULP subcommand */
3397 ulpsc = (void *)(ulpmc + 1);
3398 ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
3399 F_ULP_TX_SC_MORE);
3400 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
3401
3402 flitp += sizeof(*ulpmc) + sizeof(*ulpsc);
3403 if (flitp == end)
3404 flitp = start;
3405
3406 /* CPL_TX_PKT */
3407 cpl = (void *)flitp;
3408 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
3409 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
3410 cpl->pack = 0;
3411 cpl->len = htobe16(m->m_pkthdr.len);
3412 cpl->ctrl1 = htobe64(ctrl);
3413
3414 flitp += sizeof(*cpl);
3415 if (flitp == end)
3416 flitp = start;
3417
3418 /* SGL for this frame */
3419 dst = (caddr_t)flitp;
3420 txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst);
3421 txpkts->flitp = (void *)dst;
3422
3423 KASSERT(((uintptr_t)dst & 0xf) == 0,
3424 ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst));
3425}
3426
3427/*
3428 * If the SGL ends on an address that is not 16 byte aligned, this function will
3429 * add a 0 filled flit at the end. It returns 1 in that case.
3430 */
3431static int
3432write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
3433{
3434 __be64 *flitp, *end;
3435 struct ulptx_sgl *usgl;
3436 bus_dma_segment_t *seg;
3437 int i, padded;
3438
3439 KASSERT(sgl->nsegs > 0 && sgl->nflits > 0,
3440 ("%s: bad SGL - nsegs=%d, nflits=%d",
3441 __func__, sgl->nsegs, sgl->nflits));
3442
3443 KASSERT(((uintptr_t)(*to) & 0xf) == 0,
3444 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
3445
3446 flitp = (__be64 *)(*to);
3447 end = flitp + sgl->nflits;
3448 seg = &sgl->seg[0];
3449 usgl = (void *)flitp;
3450
3451 /*
3452 * We start at a 16 byte boundary somewhere inside the tx descriptor
3453 * ring, so we're at least 16 bytes away from the status page. There is
3454 * no chance of a wrap around in the middle of usgl (which is 16 bytes).
3455 */
3456
3457 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
3458 V_ULPTX_NSGE(sgl->nsegs));
3459 usgl->len0 = htobe32(seg->ds_len);
3460 usgl->addr0 = htobe64(seg->ds_addr);
3461 seg++;
3462
3463 if ((uintptr_t)end <= (uintptr_t)eq->spg) {
3464
3465 /* Won't wrap around at all */
3466
3467 for (i = 0; i < sgl->nsegs - 1; i++, seg++) {
3468 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len);
3469 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr);
3470 }
3471 if (i & 1)
3472 usgl->sge[i / 2].len[1] = htobe32(0);
3473 } else {
3474
3475 /* Will wrap somewhere in the rest of the SGL */
3476
3477 /* 2 flits already written, write the rest flit by flit */
3478 flitp = (void *)(usgl + 1);
3479 for (i = 0; i < sgl->nflits - 2; i++) {
3480 if ((uintptr_t)flitp == (uintptr_t)eq->spg)
3481 flitp = (void *)eq->desc;
3482 *flitp++ = get_flit(seg, sgl->nsegs - 1, i);
3483 }
3484 end = flitp;
3485 }
3486
3487 if ((uintptr_t)end & 0xf) {
3488 *(uint64_t *)end = 0;
3489 end++;
3490 padded = 1;
3491 } else
3492 padded = 0;
3493
3494 if ((uintptr_t)end == (uintptr_t)eq->spg)
3495 *to = (void *)eq->desc;
3496 else
3497 *to = (void *)end;
3498
3499 return (padded);
3500}
3501
3502static inline void
3503copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
3504{
3505 if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) {
3506 bcopy(from, *to, len);
3507 (*to) += len;
3508 } else {
3509 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
3510
3511 bcopy(from, *to, portion);
3512 from += portion;
3513 portion = len - portion; /* remaining */
3514 bcopy(from, (void *)eq->desc, portion);
3515 (*to) = (caddr_t)eq->desc + portion;
3516 }
3517}
3518
3519static inline void
3520ring_eq_db(struct adapter *sc, struct sge_eq *eq)
3521{
3522 u_int db, pending;
3523
3524 db = eq->doorbells;
3525 pending = eq->pending;
3526 if (pending > 1)
3527 clrbit(&db, DOORBELL_WCWR);
3528 eq->pending = 0;
3529 wmb();
3530
3531 switch (ffs(db) - 1) {
3532 case DOORBELL_UDB:
3533 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
3534 return;
3535
3536 case DOORBELL_WCWR: {
3537 volatile uint64_t *dst, *src;
3538 int i;
3539
3540 /*
3541 * Queues whose 128B doorbell segment fits in the page do not
3542 * use relative qid (udb_qid is always 0). Only queues with
3543 * doorbell segments can do WCWR.
3544 */
3545 KASSERT(eq->udb_qid == 0 && pending == 1,
3546 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p",
3547 __func__, eq->doorbells, pending, eq->pidx, eq));
3548
3549 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET -
3550 UDBS_DB_OFFSET);
3551 i = eq->pidx ? eq->pidx - 1 : eq->cap - 1;
3552 src = (void *)&eq->desc[i];
3553 while (src != (void *)&eq->desc[i + 1])
3554 *dst++ = *src++;
3555 wmb();
3556 return;
3557 }
3558
3559 case DOORBELL_UDBWC:
3560 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
3561 wmb();
3562 return;
3563
3564 case DOORBELL_KDB:
3565 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
3566 V_QID(eq->cntxt_id) | V_PIDX(pending));
3567 return;
3568 }
3569}
3570
3571static inline int
3572reclaimable(struct sge_eq *eq)
3573{
3574 unsigned int cidx;
3575
3576 cidx = eq->spg->cidx; /* stable snapshot */
3577 cidx = be16toh(cidx);
3578
3579 if (cidx >= eq->cidx)
3580 return (cidx - eq->cidx);
3581 else
3582 return (cidx + eq->cap - eq->cidx);
3583}
3584
3585/*
3586 * There are "can_reclaim" tx descriptors ready to be reclaimed. Reclaim as
3587 * many as possible but stop when there are around "n" mbufs to free.
3588 *
3589 * The actual number reclaimed is provided as the return value.
3590 */
3591static int
3592reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
3593{
3594 struct tx_sdesc *txsd;
3595 struct tx_maps *txmaps;
3596 struct tx_map *txm;
3597 unsigned int reclaimed, maps;
3598 struct sge_eq *eq = &txq->eq;
3599
3600 TXQ_LOCK_ASSERT_OWNED(txq);
3601
3602 if (can_reclaim == 0)
3603 can_reclaim = reclaimable(eq);
3604
3605 maps = reclaimed = 0;
3606 while (can_reclaim && maps < n) {
3607 int ndesc;
3608
3609 txsd = &txq->sdesc[eq->cidx];
3610 ndesc = txsd->desc_used;
3611
3612 /* Firmware doesn't return "partial" credits. */
3613 KASSERT(can_reclaim >= ndesc,
3614 ("%s: unexpected number of credits: %d, %d",
3615 __func__, can_reclaim, ndesc));
3616
3617 maps += txsd->credits;
3618
3619 reclaimed += ndesc;
3620 can_reclaim -= ndesc;
3621
3622 eq->cidx += ndesc;
3623 if (__predict_false(eq->cidx >= eq->cap))
3624 eq->cidx -= eq->cap;
3625 }
3626
3627 txmaps = &txq->txmaps;
3628 txm = &txmaps->maps[txmaps->map_cidx];
3629 if (maps)
3630 prefetch(txm->m);
3631
3632 eq->avail += reclaimed;
3633 KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */
3634 ("%s: too many descriptors available", __func__));
3635
3636 txmaps->map_avail += maps;
3637 KASSERT(txmaps->map_avail <= txmaps->map_total,
3638 ("%s: too many maps available", __func__));
3639
3640 while (maps--) {
3641 struct tx_map *next;
3642
3643 next = txm + 1;
3644 if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total))
3645 next = txmaps->maps;
3646 prefetch(next->m);
3647
3648 bus_dmamap_unload(txq->tx_tag, txm->map);
3649 m_freem(txm->m);
3650 txm->m = NULL;
3651
3652 txm = next;
3653 if (__predict_false(++txmaps->map_cidx == txmaps->map_total))
3654 txmaps->map_cidx = 0;
3655 }
3656
3657 return (reclaimed);
3658}
3659
3660static void
3661write_eqflush_wr(struct sge_eq *eq)
3662{
3663 struct fw_eq_flush_wr *wr;
3664
3665 EQ_LOCK_ASSERT_OWNED(eq);
3666 KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
3667 KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__));
3668
3669 wr = (void *)&eq->desc[eq->pidx];
3670 bzero(wr, sizeof(*wr));
3671 wr->opcode = FW_EQ_FLUSH_WR;
3672 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
3673 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
3674
3675 eq->flags |= (EQ_CRFLUSHED | EQ_STALLED);
3676 eq->pending++;
3677 eq->avail--;
3678 if (++eq->pidx == eq->cap)
3679 eq->pidx = 0;
3680}
3681
3682static __be64
3683get_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
3684{
3685 int i = (idx / 3) * 2;
3686
3687 switch (idx % 3) {
3688 case 0: {
3689 __be64 rc;
3690
3691 rc = htobe32(sgl[i].ds_len);
3692 if (i + 1 < nsegs)
3693 rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32;
3694
3695 return (rc);
3696 }
3697 case 1:
3698 return htobe64(sgl[i].ds_addr);
3699 case 2:
3700 return htobe64(sgl[i + 1].ds_addr);
3701 }
3702
3703 return (0);
3704}
3705
3706static void
3707set_fl_tag_idx(struct sge_fl *fl, int bufsize)
3708{
3709 int i;
3710
3711 for (i = 0; i < FL_BUF_SIZES - 1; i++) {
3712 if (FL_BUF_SIZE(i) >= bufsize)
3713 break;
3714 }
3715
3716 fl->tag_idx = i;
3717}
3718
3719static void
3720add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
3721{
3722 mtx_lock(&sc->sfl_lock);
3723 FL_LOCK(fl);
3724 if ((fl->flags & FL_DOOMED) == 0) {
3725 fl->flags |= FL_STARVING;
3726 TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
3727 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc);
3728 }
3729 FL_UNLOCK(fl);
3730 mtx_unlock(&sc->sfl_lock);
3731}
3732
3733static int
3734handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
3735 struct mbuf *m)
3736{
3737 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1);
3738 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
3739 struct adapter *sc = iq->adapter;
3740 struct sge *s = &sc->sge;
3741 struct sge_eq *eq;
3742
3743 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
3744 rss->opcode));
3745
3746 eq = s->eqmap[qid - s->eq_start];
3747 EQ_LOCK(eq);
3748 KASSERT(eq->flags & EQ_CRFLUSHED,
3749 ("%s: unsolicited egress update", __func__));
3750 eq->flags &= ~EQ_CRFLUSHED;
3751 eq->egr_update++;
3752
3753 if (__predict_false(eq->flags & EQ_DOOMED))
3754 wakeup_one(eq);
3755 else if (eq->flags & EQ_STALLED && can_resume_tx(eq))
3756 taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
3757 EQ_UNLOCK(eq);
3758
3759 return (0);
3760}
3761
3762/* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */
3763CTASSERT(offsetof(struct cpl_fw4_msg, data) == \
3764 offsetof(struct cpl_fw6_msg, data));
3765
3766static int
3767handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
3768{
3769 struct adapter *sc = iq->adapter;
3770 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
3771
3772 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
3773 rss->opcode));
3774
3775 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) {
3776 const struct rss_header *rss2;
3777
3778 rss2 = (const struct rss_header *)&cpl->data[0];
3779 return (sc->cpl_handler[rss2->opcode](iq, rss2, m));
3780 }
3781
3782 return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0]));
3783}
3784
3785static int
3786sysctl_uint16(SYSCTL_HANDLER_ARGS)
3787{
3788 uint16_t *id = arg1;
3789 int i = *id;
3790
3791 return sysctl_handle_int(oidp, &i, 0, req);
3792}